三种asp.net 抓取网页源码实现

写法1 比较推荐    /// <summary>
    /// 用HttpWebRequest取得网页源码
    /// 对于带BOM的网页很有效,不管是什么编码都能正确识别
    /// </summary>
    /// <param name="url">网页地址" </param>
    /// <returns>返回网页源文件</returns>
    public static string GetHtmlSource2(string  url)
    {
    //处理内容
    string html = "";
    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
    request.Accept = "*/*"; //接受任意文件
    request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)"; // 模拟使用IE在浏览 http://www.52mvc.com
    request.AllowAutoRedirect = true;//是否允许302
    //request.CookieContainer = new CookieContainer();//cookie容器,
    request.Referer = url; //当前页面的引用
    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
    Stream stream = response.GetResponseStream();
    StreamReader reader = new StreamReader(stream, Encoding.Default);
    html = reader.ReadToEnd();
    stream.Close();
    return html;
    }
    写法2
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using System.IO;
    using System.Text;
    using System.Net;
    namespace MySql
    {
    public class GetHttpData
    {
    public static string GetHttpData2(string Url)
    {
    string sException = null;
    string sRslt = null;
    WebResponse oWebRps = null;
    WebRequest oWebRqst = WebRequest.Create(Url);
    oWebRqst.Timeout = 50000;
    try
    {
    oWebRps = oWebRqst.GetResponse();
    }
    catch (WebException e)
    {
    sException = e.Message.ToString();
    }
    catch (Exception e)
    {
    sException = e.ToString();
    }
    finally
    {
    if (oWebRps != null)
    {
    StreamReader oStreamRd = new StreamReader(oWebRps.GetResponseStream(), Encoding.GetEncoding("utf-8"));
    sRslt = oStreamRd.ReadToEnd();
    oStreamRd.Close();
    oWebRps.Close();
    }
    }
    return sRslt;
    }
    }
    }

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注

Grow your business fast with

Suku