需求:在百度搜索,然后将搜索结果保存到文件bd_python66.html
百度搜索的url:/s?wd=搜索词
params参数进行url传参,代码如下:
# -*- coding: utf-8 -*-
import requests
import re
def get_html(url,key_value,retry=2):
try:
r = requests.get(url=url,headers=headers,params=key_value,timeout=5)
except Exception as e:
print(e)
if retry > 0:
get_html(url,retry-1)
else:
r.encoding = 'utf-8'
page_text = r.text
return page_text
if __name__ == "__main__":
# 自定义请求头信息
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
}
url = '/s?' # 注意该url
kw = {'wd':''}
html = get_html(url,kw)
# 提取网页title
title = re.search('
(.*?)',html)
print(title.group(1))
D:python3installpython.exe D:/python/py3script/test.py
_百度搜索
Process finished with exit code 0
url重定向演示,Github 将所有的 HTTP 请求重定向到 HTTPS。案例代码如下:
# -*- coding: utf-8 -*-
import requests
def get_html(url,retry=2):
try:
r = requests.get(url=url,headers=headers,timeout=5)
except Exception as e:
print(e)
if retry > 0:
get_html(url,retry-1)
else:
print('重定向',r.history)
print('重定向后的请求url',r.url)
print(r.status_code)
if __name__ == "__main__":
# 自定义请求头信息
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
}
url = '/'
get_html(url)
D:python3installpython.exe D:/python/py3script/test.py
重定向 []
重定向后的请求url /
200
Process finished with exit code 0
通过 allow_redirects 参数禁用重定向处理:
# -*- coding: utf-8 -*-
import requests
def get_html(url,retry=2):
try:
r = requests.get(url=url,headers=headers,allow_redirects=False,timeout=5)
except Exception as e:
print(e)
if retry > 0:
get_html(url,retry-1)
else:
print('重定向',r.history)
print('请求url',r.url)
print(r.status_code)
if __name__ == "__main__":
# 自定义请求头信息
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
}
url = '/'
get_html(url)
D:python3installpython.exe D:/python/py3script/test.py
重定向 []
请求url /
301
Process finished with exit code 0
cookie自动登录:如果我们不登录人人网的是不能访问个人主页的,我们登录人人网后通过浏览器抓包找到cookie,然后把cookie加到自己构造的请求头里面,再访问个人主页的url一样可以获取到正常信息(推荐阅读:cookie是什么),代码如下:
# -*- coding: utf-8 -*-
import requests
import re
def get_html(url,retry=2):
try:
r = requests.get(url=url,headers=headers,timeout=5)
except Exception as e:
print(e)
if retry > 0:
get_html(url,retry-1)
else:
page_text = r.text
return page_text
if __name__ == "__main__":
# 自定义请求头信息
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
'Cookie': 'anonymid=jy80yf87nu48vb; depovince=BJ; jebecookies=b398e24f-6670-48af-a58e-e6fd6456bcd6|||||; _r01_=1; JSESSIONID=abcrmwfsBwR_ufXciZcWw; ick_login=8a9f224f-1671-41ed-ab25-74e0a42ac995; _de=96965DC06F71F402340E4CEC836F3769696BF75400CE19CC; p=cae405d7c6e785f089ca39606c9d88695; first_login_flag=1; [emailprotected]; ln_hurl=/photos/hdn521/1208/1350/h_main_sadA_14a1000031012f76.jpg; t=940ac4b9ace0423b80a81dee055637955; societyguester=940ac4b9ace0423b80a81dee055637955; id=347908095; xnsid=fd3328ae; ver=7.0; loginfrom=null; jebe_key=8cc12fc2-9a64-4553-85ea-671b395d345b%7Cef398f6216b3a86b3d29665bee53e231%7C1563415076620%7C1%7C1563415078837; jebe_key=8cc12fc2-9a64-4553-85ea-671b395d345b%7Cef398f6216b3a86b3d29665bee53e231%7C1563415076620%7C1%7C1563415078840; wp_fold=0',
}
url = '/347908095/profile'
html = get_html(url)
title = re.search('
(.*?)',html)
print(title.group(1))
D:python3installpython.exe D:/python/py3script/test.py
人人网 - 老董
Process finished with exit code 0