C# 抓取网页内容并保存TXT代码

public static void WriteLog(string Path, string textContent)
{
if (!File.Exists(Path))
{
using (StreamWriter writer = File.CreateText(Path))
{
writer.WriteLine(textContent);
writer.Close();
}
}
else
{
using (StreamWriter writer2 = File.AppendText(Path))
{
writer2.WriteLine(DateTime.Now.ToString() + " " + textContent + "\n");
writer2.Close();
}
}
}

private void button3_Click(object sender, EventArgs e)
{


//https://www.360docs.net/doc/125678138.html,/newhouse/newhouse/HouseList.aspx?__EVENTARGUMENT=168&__EVENTTARGET=AspNetPager1

for (int start = 1; start < 169; start++)
{

string content = File.ReadAllText(@"C:\house\"+start.ToString()+@".htm", Encoding.GetEncoding("GB2312"));


//取得网页代码
string webhttp = "https://www.360docs.net/doc/125678138.html,/newhouse/newhouse/HouseList.aspx?__EVENTARGUMENT=" + start.ToString() + "&__EVENTTARGET=AspNetPager1";
https://www.360docs.net/doc/125678138.html,.HttpWebRequest request = (https://www.360docs.net/doc/125678138.html,.HttpWebRequest)https://www.360docs.net/doc/125678138.html,.WebRequest.Create(webhttp);
//https://www.360docs.net/doc/125678138.html,.HttpWebRequest request = (https://www.360docs.net/doc/125678138.html,.HttpWebRequest)https://www.360docs.net/doc/125678138.html,.WebRequest.Create("https://www.360docs.net/doc/125678138.html,/ianc/archive/2007/02/14/1509958.aspx");
//urlConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");

request.Method = "GET";
request.KeepAlive = false;

//获取或设置一个值,该值指示是否与Internet资源建立持久连接。
//https://www.360docs.net/doc/125678138.html,erAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
https://www.360docs.net/doc/125678138.html,.WebResponse response = request.GetResponse();



System.IO.Stream resStream = response.GetResponseStream();

System.IO.StreamReader sr = new System.IO.StreamReader(resStream, System.Text.Encoding.Default);
//System.IO.StreamReader sr = new System.IO.StreamReader(resStream, System.Text.Encoding.GetEncoding("GB2312"));
string str = sr.ReadToEnd();
resStream.Close();
sr.Close();

//string str=GetWebContent("https://www.360docs.net/doc/125678138.html,/lottery/3d_index.htm");


//MessageBox.Show(str);

///*查找第一个匹配的值*/
//textBox3.Text = Regex.Match(content, @rule, RegexOptions.IgnoreCase).Value;

/*查找所有匹配的集合*/
// MatchCollection mc = Regex.Matches(content, @rule, RegexOpti

ons.IgnoreCase | RegexOptions.Multiline);

////string content = str;
//string rule = "]*>(?.*?)]*>(?.*?)]*>(?.*?)]*>(?.*?)[^<]*";
//string rule = "(?<=)(.*?)(?=<)";

string rule = @"

    [\s\S]*?
      ";
      //string rule001 = @"[\s\S]*?";

      //string rule = "
        "+rule001+"
          ";



          //-----------------------
          //https://www.360docs.net/doc/125678138.html,/newhouse/newhouse/HouseList.aspx?__EVENTARGUMENT=168&__EVENTTARGET=AspNetPager1



          //--------------------------

          MatchCollection mc = Regex.Matches(content, @rule, RegexOptions.IgnoreCase | RegexOptions.Multiline);

          string contentxml = "";

          for (int i = 1; i < mc.Count; i++)
          {
          //resultGridView.Rows.Add();

          //resultGridView.Rows[i - 1].Cells[0].Value = mc[i].Value;

          contentxml = contentxml + mc[i].Value;

          //resultGridView.Rows[i - 1].Cells[1].Value = mc[i+1].Value;
          //resultGridView.Rows[i - 1].Cells[2].Value = mc[i+2].Value;
          //resultGridView.Rows[i - 1].Cells[3].Value = mc[i+3].Value;
          //resultGridView.Rows[i - 1].Cells[4].Value = mc[i+4].Value;


          //i=i + 5;



          }

          //替换掉双引号
          //path = path.Replace("\"", string.Empty);

          contentxml = contentxml.Replace(@"
            ",@"");

            contentxml = contentxml.Replace(@"
              ", @"");


              contentxml = contentxml.Replace(@"
            • ",@"");

              contentxml = contentxml.Replace(@"
            • ", @"");
              contentxml = contentxml.Replace(@"
            • ", @"");

              contentxml = contentxml.Replace(@"
            • ", @"");


              contentxml = contentxml.Replace(@"
            • ", @"");

              contentxml = contentxml.Replace(@"
            ", string.Empty);

            contentxml = contentxml.Replace(@"
              ", string.Empty);
              contentxml = contentxml.Replace(@"", @"");




              WriteLog("C://CNSI//house.txt", contentxml);








              }
              MessageBox.Show("ok");

              //if (contentxml == "")
              // return;
              ////if (this.saveFileDialog1.ShowDialog() == DialogResult.Cancel)
              //// return;
              //string FileName = "house";
              ////if (FileName.Length < 1)

              //// return;
              //FileName += ".doc";
              //try
              //{
              // Object Nothing = System.Reflection.Missing.Value;
              // Directory.CreateDirectory("C:/CNSI"); //创建文件所在目录
              // string name = "CNSI_" + ".doc";
              // //object filename = "C://CNSI//" + name; //文件保存路径
              // ////创建Word文档
              // //Word.Application WordApp = new Word.ApplicationClass();
              // //Word.Document WordDoc = WordApp.Documents.Add(ref Nothing, ref Nothing, ref Nothing, ref Nothing);



              // Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
              // Microsoft.Office.Interop.Word.Document doc;
              // object nothing = System.Reflection.Missing.Value;
              // doc = word.Documents.Add(ref nothing, ref nothing, ref nothing, ref nothing);
              // https://www.360docs.net/doc/125678138.html,st.Range.Text = contentxml;
              // //object myfileName = FileName;
              // //object filename = "C://CNSI//" + FileName; //文件保存路径
              // object myfileName = "C://CNSI//" + FileName; //文件保存路径
              // //将WordDoc文档对象的内容保存为doc文档

              // doc.SaveAs(ref myfileName, ref nothing, ref nothing, ref nothing, ref nothing, ref nothing, ref nothing, ref nothing, ref nothing, ref nothing, ref nothing, ref nothing, ref nothing, ref nothing, ref nothing, ref nothing);
              // //关闭WordDoc文档对象
              // doc.Close(ref nothing, ref nothing, ref nothing);
              // //关闭WordApp组件对象


              // word.Quit(ref nothing, ref nothing, ref nothing);
              // MessageBox.Show("Word文件保存成功", "信息提示", MessageBoxButtons.OK, https://www.360docs.net/doc/125678138.html,rmation);

              //}
              //catch (System.Exception ex)
              //{
              // MessageBox.Show(this, ex.Message.ToString(), "信息提示", MessageBoxButtons.OK, https://www.360docs.net/doc/125678138.html,rmation);
              //}


              //bool first = true;


              //foreach (Match m in mc)
              //{
              // if (first) //由于第一行是表格的抬头,所以直接跳过
              // {
              // first = false;
              // continue;
              // }

              // //将解析出的数据填充到gridview的行中
              // //resultGridView.Rows.Add();
              // //resultGridView.Rows[resultGridView.Rows.Count - 1].Cells[0].Value = m.Groups.Value;

              // //resultGridView.Rows[resultGridView.Rows.Count - 1].Cells[0].Value = m.Groups["value1"].Value;
              // //resultGridView.Rows[resultGridView.Rows.Count - 1].Cells[1].Value = m.Groups["value2"].Value;
              // //resultGridView.Rows[resultGridView.Rows.Count - 1].Cells

              [2].Value = m.Groups["value3"].Value;
              // //resultGridView.Rows[resultGridView.Rows.Count - 1].Cells[3].Value = m.Groups["value4"].Value;

              //}





              }

              //------------------------------------------------
              //保存文本为Word文件
              //----------------------------------------------
              private void button4_Click(object sender, System.EventArgs e)
              {
              if (this.textBox4.Text == "")
              return;
              //if (this.saveFileDialog1.ShowDialog() == DialogResult.Cancel)
              // return;
              string FileName = textBox4.Text.Substring(2,5);
              if(FileName.Length<1)
              return;
              FileName+=".doc";
              try
              {
              Object Nothing = System.Reflection.Missing.Value;
              Directory.CreateDirectory("C:/CNSI"); //创建文件所在目录
              string name = "CNSI_" + ".doc";
              //object filename = "C://CNSI//" + name; //文件保存路径
              ////创建Word文档
              //Word.Application WordApp = new Word.ApplicationClass();
              //Word.Document WordDoc = WordApp.Documents.Add(ref Nothing, ref Nothing, ref Nothing, ref Nothing);



              Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
              Microsoft.Office.Interop.Word.Document doc;
              object nothing = System.Reflection.Missing.Value;
              doc = word.Documents.Add(ref nothing,ref nothing,ref nothing,ref nothing);
              https://www.360docs.net/doc/125678138.html,st.Range.Text = this.textBox4.Text;
              //object myfileName = FileName;
              //object filename = "C://CNSI//" + FileName; //文件保存路径
              object myfileName = "C://CNSI//" + FileName; //文件保存路径
              //将WordDoc文档对象的内容保存为doc文档

              doc.SaveAs(ref myfileName,ref nothing,ref nothing,ref nothing,ref nothing,ref nothing,ref nothing,ref nothing,ref nothing,ref nothing,ref nothing,ref nothing,ref nothing,ref nothing,ref nothing,ref nothing);
              //关闭WordDoc文档对象
              doc.Close(ref nothing, ref nothing, ref nothing);
              //关闭WordApp组件对象


              word.Quit(ref nothing, ref nothing, ref nothing);
              MessageBox.Show("Word文件保存成功","信息提示",MessageBoxButtons.OK,https://www.360docs.net/doc/125678138.html,rmation);

              }
              catch(System.Exception ex)
              {
              MessageBox.Show(this,ex.Message.ToString(),"信息提示",MessageBoxButtons.OK,https://www.360docs.net/doc/125678138.html,rmation);
              }
              }

相关文档
最新文档