入门级
import requests
import re
import os
from urllib import error
def main():
dirPath = "E:\python\yirenzhixia-images"
word = "一人之下壁纸"
url = "/search/index?word=" + word + "&ie=utf-8&tn=baiduimage&pn="
i = 1
j = 0
while i < 50:
url = url + str(i)
try:
result = requests.get(url, timeout=10)
except error.HTTPError as e:
i += 1
continue
else:
text = result.text
list = re.findall('"thumbURL":"(.*?.jpg)"', text, re.S)
if len(list) == 0:
i += 1
continue
else:
i += len(list)
for enum in list:
print(enum)
try:
image = requests.get(enum, timeout=7)
except BaseException:
print("当前图片无法下载")
continue
else:
filePath = os.path.join(dirPath, "girl_image_" + str(j) + ".jpg")
f = open(filePath, 'wb')
f.write(image.content)
f.close()
j += 1
if __name__ == '__main__':
main()
效果:
标签:__,python,text,image,list,爬虫,爬取,url,import
来源: /m0_37738114/article/details/90486678