700字范文,内容丰富有趣,生活中的好帮手!
700字范文 > 用html制作搜狗搜索 详解基于搜狗搜索开发的微信公众号的爬虫系统(C#)

用html制作搜狗搜索 详解基于搜狗搜索开发的微信公众号的爬虫系统(C#)

时间:2023-11-14 20:11:49

相关推荐

用html制作搜狗搜索 详解基于搜狗搜索开发的微信公众号的爬虫系统(C#)

///

/// 指定header参数的HTTP Get方法

///

///

///

/// respondse

public string Get(WebHeaderCollection headers, string url ,string responseEncoding="UTF-8",bool isUseCookie = false)

{

string responseText = "";

try

{

var request = (HttpWebRequest)WebRequest.Create(url);

request.Method = "GET";

foreach (string key in headers.Keys)

{

switch (key.ToLower())

{

case "user-agent":

request.UserAgent = headers[key];

break;

case "referer":

request.Referer = headers[key];

break;

case "host":

request.Host = headers[key];

break;

case "contenttype":

request.ContentType = headers[key];

break;

case "accept":

request.Accept = headers[key];

break;

default:

break;

}

}

if (string.IsNullOrEmpty(request.Referer))

{

request.Referer = "/";

};

if (string.IsNullOrEmpty(request.Host))

{

request.Host = "";

};

if (string.IsNullOrEmpty(request.UserAgent))

{

Random r = new Random();

int index = r.Next(WechatSogouBasic._agent.Count - 1);

request.UserAgent = WechatSogouBasic._agent[index];

}

if (isUseCookie)

{

CookieCollection cc = Tools.LoadCookieFromCache();

request.CookieContainer = new CookieContainer();

request.CookieContainer.Add(cc);

}

HttpWebResponse response = (HttpWebResponse)request.GetResponse();

if (isUseCookie && response.Cookies.Count >0)

{

var cookieCollection = response.Cookies;

WechatCache cache = new WechatCache(Config.CacheDir, 3000);

if (!cache.Add("cookieCollection",

cookieCollection, 3000)) { cache.Update("cookieCollection",

cookieCollection, 3000); };

}

// Get the stream containing content returned by the server.

Stream dataStream = response.GetResponseStream();

//如果response是图片,则返回以base64方式返回图片内容,否则返回html内容

if (response.Headers.Get("Content-Type") == "image/jpeg" || response.Headers.Get("Content-Type") == "image/jpg")

{

Image img = Image.FromStream(dataStream, true);

using (MemoryStream ms = new MemoryStream())

{

// Convert Image to byte[]

//img.Save("myfile.jpg");

img.Save(ms,System.Drawing.Imaging.ImageFormat.Jpeg);

byte[] imageBytes = ms.ToArray();

// Convert byte[] to Base64 String

string base64String = Convert.ToBase64String(imageBytes);

responseText = base64String;

}

}

else //read response string

{

// Open the stream using a StreamReader for easy access.

Encoding encoding;

switch (responseEncoding.ToLower())

{

case "utf-8":

encoding = Encoding.UTF8;

break;

case "unicode":

encoding = Encoding.Unicode;

break;

case "ascii":

encoding = Encoding.ASCII;

break;

default:

encoding = Encoding.Default;

break;

}

StreamReader reader = new StreamReader(dataStream, encoding);//System.Text.Encoding.Default

// Read the content.

if (response.StatusCode == HttpStatusCode.OK)

{

responseText = reader.ReadToEnd();

if (responseText.Contains("用户您好,您的访问过于频繁,为确认本次访问为正常用户行为,需要您协助验证"))

{

_vcode_url = url;

throw new Exception(" verification code");

}

}

else

{

logger.Error("requests status_code error" + response.StatusCode);

throw new Exception("requests status_code error");

}

reader.Close();

}

dataStream.Close();

response.Close();

}

catch (Exception e)

{

logger.Error(e);

}

return responseText;

}

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。