class SciencedirectspiderSpider(scrapy.Spider):name = 'sciencedirectspider'allowed_domains = ['']start_urls = ['/search?qs=kidney%20stone']# 在初始化这里进行def __init__(self, year='', search='', **kwargs):self.year = yearself.search = searchself.urls = '/search?qs=' + search + '&years=' + year + '&sortBy=date'self.browser = webdriver.Chrome(chrome_options=chorme_options)super().__init__()def start_requests(self):# //*[@id="srp-pagination"]/li[1]/text()[4]url = "/search?qs=kidney%20stone"response = scrapy.Request(self.urls, callback=self.page, meta={'url': self.urls})yield response
执行命令:scrapy crawl sciencedirectspider --nolog -a "search=kidney stone" -a "year="
注意一个-a
一个参数
main执行语句:
from scrapy.cmdline import execute# execute(['scrapy', 'crawl', 'sciencedirectspider','--nolog']) # 不打印日志execute(['scrapy', 'crawl', 'sciencedirectspider','--nolog','-a','search=kidney stone','-a','year=']) # 不打印日志
post请求payload参数
'''遇到问题没人解答?小编创建了一个Python学习交流QQ群:778463939寻找有志同道合的小伙伴,互帮互助,群里还有不错的视频学习教程和PDF电子书!'''class IeeexplorespiderSpider(scrapy.Spider):name = 'ieeexplorespider'allowed_domains = ['']start_urls = ['/']headers = {"Content-Type": "application/json","Host": "","Origin": "","User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"}def start_requests(self):# url = "/search/searchresult.jsp?newsearch=true&queryText=Security%20Analytics"url = "/rest/search"data = {"highlight": True,"matchPubs": True,"newsearch": True,"pageNumber": "1","queryText": "Security Analytics","returnFacets": ["ALL"],"returnType": "SEARCH"}response = scrapy.Request(url=url, body=json.dumps(data), method='POST', callback=self.parse,headers=self.headers)yield responsedef parse(self, response):print(123)print(response.text)