1 Star 1 Fork 2

kangyu1122 / 获取国家统计局五级行政区域

Create your Gitee Account
Explore and code with more than 6 million developers,Free private repositories !:)
Sign up
This repository doesn't specify license. Without author's permission, this code is only for learning and cannot be used for other purposes.
Clone or download
Program.cs 16.22 KB
Copy Edit Web IDE Raw Blame History
kangyu1122 authored 2020-09-29 14:51 . 第一次提交
using System;
using System.Collections.Generic;
using System.Data;
using System.Data.OleDb;
using System.Data.OracleClient;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
namespace GetCityApp
{
class Program
{
static string MainUrl = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/";
static DataTable dt = new DataTable();
static DataTable Getcitytr_dt = new DataTable();
static DataTable Getcountytr_dt = new DataTable();
static DataTable Gettowntr_dt = new DataTable();
static DataTable Getvillagetr_dt = new DataTable();
static int count = 0;
static CookieContainer cc = new CookieContainer();
static List<string> ports = new List<string>();
static string html = string.Empty;
static void Main(string[] args)
{
DoCommand("DELETE FROM kangyu_all_city_list");
ports.Add("59.38.62.109,9797");
ports.Add("112.253.11.113,8000");
ports.Add("222.189.191.206,9999");
ports.Add("118.212.104.138,9999");
ports.Add("121.232.194.25,9000");
ports.Add("115.218.2.139,9000");
ports.Add("49.89.84.107,9999");
ports.Add("171.12.220.36,9999");
ports.Add("175.44.108.155,9999");
ports.Add("182.32.163.128,9999");
ports.Add("112.84.98.235,9999");
ports.Add("1.197.203.59,9999");
ports.Add("175.42.122.46,9999");
ports.Add("163.204.240.134,9999");
ports.Add("19.254.94.114,34422");
string page = "index.html";
GetPageHtml(MainUrl + page);
Dictionary<string, string> MainCity = GetMainCity(html);
//DataTable dt = new DataTable();
dt.Columns.Add("ID");
dt.Columns.Add("P_ID");
dt.Columns.Add("NAME");
dt.Columns.Add("URL");
dt.Columns.Add("LEVEL_ID");
Getcitytr_dt.Columns.Add("ID");
Getcitytr_dt.Columns.Add("P_ID");
Getcitytr_dt.Columns.Add("NAME");
Getcitytr_dt.Columns.Add("URL");
Getcitytr_dt.Columns.Add("LEVEL_ID");
Getcountytr_dt.Columns.Add("ID");
Getcountytr_dt.Columns.Add("P_ID");
Getcountytr_dt.Columns.Add("NAME");
Getcountytr_dt.Columns.Add("URL");
Getcountytr_dt.Columns.Add("LEVEL_ID");
Gettowntr_dt.Columns.Add("ID");
Gettowntr_dt.Columns.Add("P_ID");
Gettowntr_dt.Columns.Add("NAME");
Gettowntr_dt.Columns.Add("URL");
Gettowntr_dt.Columns.Add("LEVEL_ID");
Getvillagetr_dt.Columns.Add("ID");
Getvillagetr_dt.Columns.Add("P_ID");
Getvillagetr_dt.Columns.Add("NAME");
Getvillagetr_dt.Columns.Add("URL");
Getvillagetr_dt.Columns.Add("LEVEL_ID");
foreach (KeyValuePair<string, string> item in MainCity)
{
TableAddRow(dt, item.Key, "", item.Key, MainUrl + item.Value, 1);
//Console.WriteLine("ADD:" + item.Key + "——" + item.Value);
}
foreach (DataRow dr in dt.Rows)
{
Getcitytr(dr["url"].ToString(), dr["id"].ToString());
//Thread.Sleep(1000);
}
foreach (DataRow dr in Getcitytr_dt.Rows)
{
Getcountytr(dr["url"].ToString(), dr["id"].ToString());
//Thread.Sleep(1000);
}
foreach (DataRow dr in Getcountytr_dt.Rows)
{
if (dr["url"].ToString() != "")
{
Gettowntr(dr["url"].ToString(), dr["id"].ToString());
}
//Thread.Sleep(1000);
}
foreach (DataRow dr in Gettowntr_dt.Rows)
{
if (dr["url"].ToString() != "")
{
Getvillagetr(dr["url"].ToString(), dr["id"].ToString());
}
//Thread.Sleep(1000);
}
}
private static void GetPageHtml(string Url)
{
string ip = string.Empty;
int port = 0;
//while (!CanUse(ip, port))
//{
// Random ra = new Random();
// int i = ra.Next(0, ports.Count);
// ip = ports[i].ToString().Split(',')[0];
// port = Int32.Parse(ports[i].ToString().Split(',')[1]);
//}
string strs = string.Empty;
try
{
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(Url);
req.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
//req.Referer = "http://132.228.219.45:8080/BigDataSqlSever/index.action";
int r = new Random().Next(1000);
req.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36" + "." + r.ToString() + Guid.NewGuid().ToString();
//req.Host = "132.228.28.213:8081";
req.ContentType = "application/x-www-form-urlencoded";
req.Method = "GET";
//System.Net.WebProxy proxy = new WebProxy(ip, port);
//req.Proxy = proxy;
//cc.Add(new Uri("http://132.228.28.213:8081"), new Cookie("JSESSIONID", "FE51A1530582C7C52DC92167FC93596C"));
//req.CookieContainer = cc;
req.Headers.Add("DNT", "1");
req.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
req.KeepAlive = true;
req.CookieContainer = cc;
HttpWebResponse wrp = (HttpWebResponse)req.GetResponse();
html = new StreamReader(wrp.GetResponseStream(), Encoding.GetEncoding("gb2312")).ReadToEnd();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine("10秒后继续执行");
Thread.Sleep(10000);
GetPageHtml(Url);
}
}
/// <summary>
/// 第一级
/// </summary>
/// <param name="Html"></param>
/// <returns></returns>
private static Dictionary<string, string> GetMainCity(string Html)
{
Dictionary<string, string> dic = new Dictionary<string, string>();
string html = Html.Substring(Html.IndexOf("<tr class='provincetr'>"));
string keyValue = "<td><a href='";
while (html.Contains(keyValue))
{
html = html.Substring(html.IndexOf(keyValue) + keyValue.Length);
string url = html.Substring(0, html.IndexOf("'"));
string name = html.Substring(html.IndexOf(url) + url.Length + 2);
html = name;
name = name.Substring(0, name.IndexOf("<br/>"));
dic.Add(name, url);
}
return dic;
}
/// <summary>
/// 第二级
/// </summary>
/// <param name="url"></param>
private static void Getcitytr(string url, string p_id)
{
GetPageHtml(url);
string key = "<tr class='citytr'>";
List<string> cityList = new List<string>();
html = html.Substring(html.IndexOf(key));
html = html.Substring(0, html.IndexOf("</table"));
string p_url = url.Substring(0, url.LastIndexOf("/") + 1);
while (html.Contains(key))
{
html = html.Substring(html.IndexOf(key));
string cityname = html.Substring(0, html.IndexOf("</tr>"));
cityList.Add(cityname);
html = html.Substring(html.IndexOf(cityname) + cityname.Length);
}
foreach (string city in cityList)
{
string cityurl = city.Substring(city.IndexOf("<a href='") + 9);
cityurl = cityurl.Substring(0, cityurl.IndexOf("'"));
string id = city.Substring(city.IndexOf(cityurl) + cityurl.Length + 2);
id = id.Substring(0, id.IndexOf("<"));
string cityname = city.Substring(city.IndexOf("<a href='") + 9);
cityname = city.Substring(city.LastIndexOf("<td>"));
cityname = cityname.Replace("<td>", "").Replace("</td>", "").Replace("<a href='" + cityurl + "'>", "").Replace("</a>", "");
if (cityname == "县")
{
}
//cityname = cityname.Substring(cityname.IndexOf(cityurl) + cityurl.Length + 2);
cityurl = p_url + cityurl;
TableAddRow(Getcitytr_dt, id, p_id, cityname, cityurl, 2);
}
}
/// <summary>
/// 第三级
/// </summary>
/// <param name="url"></param>
private static void Getcountytr(string url, string p_id)
{
GetPageHtml(url);
string key = "<tr class='countytr'>";
List<string> cityList = new List<string>();
if (html.IndexOf(key) == -1)
{
key= "<tr class='towntr'>";
}
html = html.Substring(html.IndexOf(key));
html = html.Substring(0, html.IndexOf("</table"));
string p_url = url.Substring(0, url.LastIndexOf("/") + 1);
while (html.Contains(key))
{
html = html.Substring(html.IndexOf(key));
string cityname = html.Substring(0, html.IndexOf("</tr>"));
cityList.Add(cityname);
html = html.Substring(html.IndexOf(cityname) + cityname.Length);
}
foreach (string city in cityList)
{
string cityurl = city.Substring(city.IndexOf("<a href='") + 9);
cityurl = cityurl.Substring(0, cityurl.IndexOf("'"));
string id = string.Empty;
if (city.Contains("市辖区"))
{
}
id = city.Substring(city.IndexOf(cityurl) + cityurl.Length + 2);
id = id.Substring(0, id.IndexOf("<"));
if (!city.Contains("<a"))
{
cityurl = "";
id = city.Substring(city.IndexOf("<td>") + 4);
id = id.Substring(0, id.IndexOf("</td>"));
}
string cityname = city.Substring(city.IndexOf("<a href='") + 9);
cityname = city.Substring(city.LastIndexOf("<td>"));
cityname = cityname.Replace("<td>", "").Replace("</td>", "").Replace("<a href='" + cityurl + "'>", "").Replace("</a>", "");
//cityname = cityname.Substring(cityname.IndexOf(cityurl) + cityurl.Length + 2);
cityurl = cityurl.Contains(".html") ? p_url + cityurl : "";
TableAddRow(Getcountytr_dt, id, p_id, cityname, cityurl, 3);
}
}
/// <summary>
/// 第四级
/// </summary>
/// <param name="url"></param>
/// <param name="p_id"></param>
private static void Gettowntr(string url, string p_id)
{
GetPageHtml(url);
string key = "<tr class='towntr'>";
List<string> cityList = new List<string>();
html = html.Substring(html.IndexOf(key));
html = html.Substring(0, html.IndexOf("</table"));
string p_url = url.Substring(0, url.LastIndexOf("/") + 1);
while (html.Contains(key))
{
html = html.Substring(html.IndexOf(key));
string cityname = html.Substring(0, html.IndexOf("</tr>"));
cityList.Add(cityname);
html = html.Substring(html.IndexOf(cityname) + cityname.Length);
}
foreach (string city in cityList)
{
string cityurl = city.Substring(city.IndexOf("<a href='") + 9);
cityurl = cityurl.Substring(0, cityurl.IndexOf("'"));
string id = string.Empty;
id = city.Substring(city.IndexOf(cityurl) + cityurl.Length + 2);
id = id.Substring(0, id.IndexOf("<"));
if (!city.Contains("<a"))
{
cityurl = "";
id = city.Substring(city.IndexOf("<td>") + 4);
id = id.Substring(0, id.IndexOf("</td>"));
}
string cityname = city.Substring(city.IndexOf("<a href='") + 9);
cityname = city.Substring(city.LastIndexOf("<td>"));
cityname = cityname.Replace("<td>", "").Replace("</td>", "").Replace("<a href='" + cityurl + "'>", "").Replace("</a>", "");
//cityname = cityname.Substring(cityname.IndexOf(cityurl) + cityurl.Length + 2);
cityurl = cityurl.Contains(".html") ? p_url + cityurl : "";
TableAddRow(Gettowntr_dt, id, p_id, cityname, cityurl, 4);
}
}
/// <summary>
/// 第五级
/// </summary>
/// <param name="url"></param>
/// <param name="p_id"></param>
private static void Getvillagetr(string url, string p_id)
{
GetPageHtml(url);
string key = "<tr class='villagetr'>";
List<string> cityList = new List<string>();
html = html.Substring(html.IndexOf(key));
html = html.Substring(0, html.IndexOf("</table"));
string p_url = url.Substring(0, url.LastIndexOf("/") + 1);
while (html.Contains(key))
{
html = html.Substring(html.IndexOf(key));
string cityname = html.Substring(0, html.IndexOf("</tr>"));
cityList.Add(cityname);
html = html.Substring(html.IndexOf(cityname) + cityname.Length);
}
foreach (string city in cityList)
{
//city = "<tr class=\"villagetr\"><td>441900003001</td><td>111</td><td>岗贝社区居民委员会</td></tr>";
string cityurl = "";
string id = string.Empty;
id = city.Substring(city.IndexOf("<td>") + 4);
id = id.Substring(0, id.IndexOf("</td>"));
string cityname = city.Substring(city.LastIndexOf("<td>") + 4);
cityname = city.Substring(city.LastIndexOf("</td>"));
//cityname = cityname.Replace("<td>", "").Replace("</td>", "").Replace("<a href='" + cityurl + "'>", "").Replace("</a>", "");
//cityname = cityname.Substring(cityname.IndexOf(cityurl) + cityurl.Length + 2);
//cityurl = p_url + cityurl;
TableAddRow(Getvillagetr_dt, id, p_id, cityname, cityurl, 5);
}
}
private static void TableAddRow(DataTable DT, string id, string p_id, string name, string url, int level_id)
{
count++;
DataRow dr = DT.NewRow();
dr["ID"] = id;
dr["P_ID"] = p_id;
dr["NAME"] = name;
dr["URL"] = url;
dr["LEVEL_ID"] = level_id.ToString();
DT.Rows.Add(dr);
InsetData(id, p_id, name, level_id);
Console.WriteLine(count.ToString()+" 第" + level_id.ToString() + "级 " + id + "------" + name);
}
private static void InsetData(string id, string p_id, string name, int level_id)
{
DoCommand("INSERT INTO kangyu_all_city_list VALUES('" + id + "','" + p_id + "','" + name + "'," + level_id + ")");
}
public static void DoCommand(string ConnString)
{
OleDbConnection conn = new OleDbConnection("Provider=Microsoft.Jet.OLEDB.4.0;Data Source=city_db.mdb"); //Jet OLEDB:Database Password=
try
{
conn.Open();
OleDbCommand cmd = conn.CreateCommand();
cmd.CommandText = ConnString;
cmd.ExecuteNonQuery();
}
catch(Exception EX) { }
finally
{
conn.Close();
conn.Dispose();
}
}
}
}

Comment ( 0 )

Sign in for post a comment

C#
1
https://gitee.com/kangyu1122/GetCityApp.git
git@gitee.com:kangyu1122/GetCityApp.git
kangyu1122
GetCityApp
获取国家统计局五级行政区域
master

Search