三种asp.net 抓取网页源码实现

写法1 比较推荐    /// <summary>
    /// 用HttpWebRequest取得网页源码
    /// 对于带BOM的网页很有效，不管是什么编码都能正确识别
    /// </summary>
    /// <param name="url">网页地址" </param>
    /// <returns>返回网页源文件</returns>
    public static string GetHtmlSource2（string url）
    {
    //处理内容
    string html = "";
    HttpWebRequest request = （HttpWebRequest）WebRequest.Create（url）；
    request.Accept = "*/*"; //接受任意文件
    request.UserAgent = "Mozilla/4.0 （compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322）"; // 模拟使用IE在浏览 http://www.52mvc.com
    request.AllowAutoRedirect = true;//是否允许302
    //request.CookieContainer = new CookieContainer（）；//cookie容器，
    request.Referer = url; //当前页面的引用
    HttpWebResponse response = （HttpWebResponse）request.GetResponse（）；
    Stream stream = response.GetResponseStream（）；
    StreamReader reader = new StreamReader（stream, Encoding.Default）；
    html = reader.ReadToEnd（）；
    stream.Close（）；
    return html;
    }
    写法2
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using System.IO;
    using System.Text;
    using System.Net;
    namespace MySql
    {
    public class GetHttpData
    {
    public static string GetHttpData2（string Url）
    {
    string sException = null;
    string sRslt = null;
    WebResponse oWebRps = null;
    WebRequest oWebRqst = WebRequest.Create（Url）；
    oWebRqst.Timeout = 50000;
    try
    {
    oWebRps = oWebRqst.GetResponse（）；
    }
    catch （WebException e）
    {
    sException = e.Message.ToString（）；
    }
    catch （Exception e）
    {
    sException = e.ToString（）；
    }
    finally
    {
    if （oWebRps != null）
    {
    StreamReader oStreamRd = new StreamReader（oWebRps.GetResponseStream（）， Encoding.GetEncoding（"utf-8"））；
    sRslt = oStreamRd.ReadToEnd（）；
    oStreamRd.Close（）；
    oWebRps.Close（）；
    }
    }
    return sRslt;
    }
    }
    }

柠檬铺

三种asp.net 抓取网页源码实现

发表回复取消回复

Grow your business fast with

Suku

三种asp.net 抓取网页源码实现

发表回复 取消回复

Grow your business fast with

Suku

发表回复取消回复