一、窗体应用程序界面:
二、上源码:
using System;
using System.Collections.Generic;using System.ComponentModel;using System.Data;using System.Drawing;using System.IO;using System.Linq;using System.Net;using System.Text;using System.Threading.Tasks;using System.Windows.Forms;namespace WebCatchTest0911
{ public partial class Form1 : Form { public Form1() { InitializeComponent(); } public static CookieCollection CC = new CookieCollection(); private void btn_Start_Click(object sender, EventArgs e) { string str = GetWebPageSource(textBox1.Text.Trim()); }public static string GetWebPageSource(string Url)
{ if (Url.Contains("about")) { Url = Url.Replace("about", "http"); } try { //http://brand.tmall.com/brandMap.htm HttpWebRequest MyRequest = (HttpWebRequest)WebRequest.Create(Url); MyRequest.Method = "GET"; MyRequest.Headers.Add("Accept-Encoding", "GBK"); MyRequest.Headers.Add("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3"); MyRequest.Headers.Add("Cache-Control", "max-age=0"); MyRequest.KeepAlive = true; MyRequest.Host = "www.icoolbr.com"; MyRequest.ProtocolVersion = HttpVersion.Version11; MyRequest.ContentType = "text/html; charset=GBK"; MyRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36"; MyRequest.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; MyRequest.AllowAutoRedirect = true; MyRequest.CookieContainer = new CookieContainer(); MyRequest.CookieContainer.Add(CC); HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse(); StreamReader srd = new StreamReader(MyResponse.GetResponseStream(), Encoding.GetEncoding("GBK")); string txt = srd.ReadToEnd(); CC = MyResponse.Cookies; srd.Close(); srd.Dispose(); return txt; } catch { return ""; } } }}三、总结
1)、HttpWebRequest的参数可以通过浏览器查看(F12);
2)、注意释放资源;
四、下章实现提取网页内容