700字范文,内容丰富有趣,生活中的好帮手!
700字范文 > python爬虫图书信息并存入数据库 以及安装工具库

python爬虫图书信息并存入数据库 以及安装工具库

时间:2023-10-27 23:02:07

相关推荐

python爬虫图书信息并存入数据库 以及安装工具库

1、安装工具库,一定要给软件管理员权限,不然往往会出错

安装parsel库

pip install parsel;

安装csv库

pip install csv;

安装pymysql库

pip install pymysql;

安装time库

pip install time;

安装requests库

pip install requests;

2、爬虫代码,记得更改数据库连接

import parselimport csvimport pymysqlimport timeimport requestssession_request=requests.session()class DataManager():def create_data(self):self.db = pymysql.connect(host='localhost',user='root',password='1111',db='hhh',port=3306,charset='utf8')# 使用cursor()方法获取操作游标self.cursor = self.db.cursor()#self.cursor.execute("drop table %s")self.cursor.execute("drop table if exists best")sql = """create table best(rank01 int,name varchar(255),author varchar(255),publish varchar(255),price varchar(255),href varchar(255),ISBN varchar(255),type varchar(2555),type2 varchar(255),type1 varchar(255), pic varchar(500),primary key(ISBN))"""self.cursor.execute(sql)mit()self.db.close()def save_data(self,data):self.db=pymysql.connect(host='localhost',user='root',password='1111',db='hhh',port=3306,charset='utf8')# 使用cursor()方法获取操作游标self.cursor = self.db.cursor()#cursor.execute("drop table a")sql = "insert ignore into best(rank01,name,author,publish,price,href,ISBN,type,type2,type1,pic)" + "values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"self.cursor.execute(sql,data)mit()self.db.close()def update_data(self):self.db=pymysql.connect(host='localhost',user='root',password='1111',db='hhh',port=3306,charset='utf8')# 使用cursor()方法获取操作游标self.cursor = self.db.cursor()#cursor.execute("drop table a")sql1="UPDATE best SET `name` = replace (`name`,'¥','')"sql2="UPDATE best SET `pic` = concat ('http:',pic)"self.cursor.execute(sql1)self.cursor.execute(sql2)mit()self.db.close()def saved(self):f = open('../try.csv', mode='w', encoding='utf-8', newline='')csv_writer = csv.DictWriter(f, fieldnames=['排名','标题','作者','出版社','价格','二级链接','ISBN','类型','子类','大类','图片',])csv_writer.writeheader()conn = DataManager()#conn.create_data()def jingdong(key):for i in key:for page in range(1,9):print(f'---------------------------正在爬取第{page}页的数据------------------------------')url = '/Search?keyword={0}&page={1}'.format(i,page)#i = i + 2headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:95.0) Gecko/0101 Firefox/95.0','Connection':'close'}res = requests.get(url=url, headers=headers,verify=False,proxies='',timeout=(100,100))#print(res.text)s = requests.session()s.keep_alive=Falseselector = parsel.Selector(res.text)lis = selector.css('ul.gl-warp li')var1 = 0var1 = var1 + (page - 1) * 30for li in lis:var1=var1+1if var1<=240:title = li.xpath('.//em//text()').extract()title = ''.join(title)author=li.css('.p-bi-name a::attr(title)').get()# star = li.css('.p-commit a::attr(text)').get()publish=li.css('.p-bi-store a::attr(title)').get()price = li.css('.p-price i::text').get()price = ''.join(price)#pic=li.getElementsByTag("img").eq(0).attr("source-data-lazy-img")pic=li.css('.p-img img::attr(data-lazy-img)').get()type1="京东"if (i == '普通心理学' or i == '发展心理学' or i == '社会心理学' or i == '实验心理学' or i == '现代心理与教育统计' or i == '心理与教育测量' or i == '心理学考研'):type = '心理学'elif (i == '外国教育史' or i == '中国教育史' or i == '教育心理学' or i == '当代教育心理学' or i == '教育科学研究方法' or i == '教育研究方法导论' or i == '教育学基础' or i == '教育学原理' or i == '教育管理学' or i == '教育学考研'):type = '教育学'elif (i == '中国古代史' or i == '中国近代史' or i == '中国现代史' or i == '中华人民共和国国史' or i == '世界史' or i == '历史学考研'):type = '历史学'elif (i == '有机化学' or i == '普通化学' or i == '定量分析简明教程' or i == '植物生理学' or i == '生物化学' or i == '基础生物化学' or i == '动物生理学' or i == '无机及分析化学' or i == '农学考研'):type = '农学'elif (i == '西医综合' or i == '中医综合'):type = '医学'elif (i == '计算机考研' or i == '考研数据结构' or i == '考研操作系统' or i == '考研计算机网络' or i == '考研计算机组成原理'):type = '计算机'elif (i == '考研数学一' or i == '考研数学二' or i == '考研高等数学' or i == '考研线性代数' or i == '考研离散数学'):type = '数学'elif (i == '考研英语一' or i == '考研英语二'):type = '英语'elif (i == '法硕宪法' or i == '法硕刑法' or i == '法硕法理学' or i == '法硕法制史'):type = '法硕'else:type = itype2=ilj=li.css('.p-price i::attr(data-price)').get()#href = ''.join('%s' %id for id in href)# author=li.css('.search_book_author a::attr(title)').get()url1 = '/{0}.html'.format(lj)#print(url1)headers1={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/0101 Firefox/96.0',#'Cookie':'shshshfp=3c289c1a63e36651f05fb636dc340040; shshshfpa=dd88b21f-cdca-8396-c80b-bc8520927f42-1634219613; __jda=122270672.16342196148011056850881.1634219615.1644472856.1644552000.33; __jdu=16342196148011056850881; shshshfpb=zbAyHvVjyj0kTgseoL10wjw%3D%3D; unpl=JF8EAMlnNSttXEIAVksAGRQXHAoHW1RfTkcLOmQFXVUIHFdQEgRPFhF7XlVdXhRKFB9vZxRUXVNPXA4fCisSEXteXVdZDEsWC2tXVgQFDQ8VXURJQlZAFDNVCV9dSRZRZjJWBFtdT1xWSAYYRRMfDlAKDlhCR1FpMjVkXlh7VAQrAhwQFUhdXFldDUIWA29mAVNUXEpdDBsyKxUge21WV14ISRMzblcEZB8MF1AGGQoZF11LWlZbXghDEANqbgRU…stk_smdl=7knnmqehcbgbgwzen6m4ektuwoc1m2eu; thor=3DFD8E64646A58BF816C7F6005F2EC1BD5633D6FA110B7B8301E1513FBA2D59B083848FC872BBAC6622685A60EBDAA84D669546A326F2137BCC305D9A115E9CF7CA16A579001CD497630B5A41DA30FC7C3EF5633B7FA8E842FDA0E6D56B33E63EFFC359D5B342FB6C0FAC9150821B799F5B918B6BE970090BBD8AE6E563EC7E9CD825A2A0AE6A1607BA8A0EE42A3CDBEC700C8E85DDC4BB2C6AA41E36CD5100A; =000; ip_cityCode=1329; 3AB9D23F7A4B3C9B=JKEZXDK7L3QUMPZVYQ3BV6MO2YOWVRTZWGNMF7SVAGXY3BSWS5INL7C3FMHH2I5W7A6HVOU6UNP6IS5BPZRZI3CFTE','Accept':'vif,image/webp,*/*','Connection': 'close'}session = requests.Session()session.keep_alive = Falseresponse = session.get(url1, headers=headers1,timeout=(100,100))#res1 = requests.get(url=url1, headers=headers1)#print(response.text)selector1 = parsel.Selector(response.text)datas1 = selector1.css('div.p-parameter ul')ISBN=datas1.css('li:nth-child(2)::attr(title)').get()if str.isdigit(ISBN):ISBN=ISBNelse:ISBN=datas1.css('li:nth-child(3)::attr(title)').get()dit = {'排名': var1,'标题': title,'作者': author,'出版社': publish,'价格': price,# '评价':star'二级链接':lj,'ISBN': ISBN,'大类': type1,'子类':type2,'类型': type,'图片':pic}print(dit)data = (var1,title, author,publish,price,lj,ISBN,type1,type2,type,pic)conn.save_data(data)time.sleep(3)conn.update_data()def dangdang(key1):global url#conn.create_data()for i in key1:for page in range(1,5):print(f'---------------------------正在爬取第{page}页的数据------------------------------')url = '/?key={0}&act=input&page_index={1}'.format(i, page)headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:95.0) Gecko/0101 Firefox/95.0','Connection': 'close'}res=requests.get(url=url,headers=headers)# print(res.text)selector=parsel.Selector(res.text)lis=selector.css('ul.bigimg li')var1 = 0var1 = var1 + (page - 1) * 60for li in lis:#rank=li.css('li::attr(ddt-pit)').get(),var1 = var1 + 1if var1<=240:#rank=int(rank)+1,title=li.css('.pic::attr(title)').get(),author=li.css('.search_book_author a::attr(title)').get(),publish=li.css('.search_book_author span:nth-child(3) a::attr(title)').get(),search_now_price = li.css('.price .search_now_price::text').get(),href=li.css('li::attr(id)').get()pic=li.css('.pic img::attr(data-original)').get()if pic:pic = li.css('.pic img::attr(data-original)').get()else:pic=li.css('.pic img::attr(src)').get()type1 = "当当"if (i == '普通心理学' or i == '发展心理学' or i == '社会心理学' or i == '实验心理学' or i == '现代心理与教育统计' or i == '心理与教育测量' or i == '心理学考研'):type = '心理学'elif (i == '外国教育史' or i == '中国教育史' or i == '教育心理学' or i == '当代教育心理学' or i == '教育科学研究方法' or i == '教育研究方法导论' or i == '教育学基础' or i == '教育学原理' or i == '教育管理学' or i == '教育学考研'):type = '教育学'elif (i == '中国古代史' or i == '中国近代史' or i == '中国现代史' or i == '中华人民共和国国史' or i == '世界史' or i == '历史学考研'):type = '历史学'elif (i == '有机化学' or i == '普通化学' or i == '定量分析简明教程' or i == '植物生理学' or i == '生物化学' or i == '基础生物化学' or i == '动物生理学' or i == '无机及分析化学' or i == '农学考研'):type = '农学'elif (i == '西医综合' or i == '中医综合'):type = '医学'elif (i == '计算机考研' or i == '考研数据结构' or i == '考研操作系统' or i == '考研计算机网络' or i == '考研计算机组成原理'):type = '计算机'elif (i == '考研数学一' or i == '考研数学二' or i == '考研高等数学' or i == '考研线性代数' or i == '考研离散数学'):type = '数学'elif (i == '考研英语一' or i == '考研英语二'):type = '英语'elif (i == '法硕宪法' or i == '法硕刑法' or i == '法硕法理学' or i == '法硕法制史'):type = '法硕'else:type = itype2=iurl1 = '/{0}.html'.format(href[1:])#print(url1)headers1 = {'Host': '','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/0101 Firefox/97.0',#'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/0101 Firefox/97.0',# 'Accept': 'application/json, text/javascript, */*; q=0.01',# 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',# 'Accept-Encoding': 'gzip, deflate','X-Requested-With': 'XMLHttpRequest',# 'Connection': 'keep-alive','Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9','Accept-Encoding':'gzip, deflate','Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6','Connection':'keep-alive','Referer': '/29318534.html','Cookie':'__permanent_id=124512273289901432946690599; MDD_channelId=70000; MDD_fromPlatform=307; cart_id=3000402242875800085; ddscreen=2; __visit_id=0319005554346203477128885238018; __out_refer=1647622554%7C!%%7C!%7C; sessionID=pc_88ad0343ee32a8bde31351fb311944464d0bd9d54872e21bf7cbb92c0bdfc005; USERNUM=/oY1QLxEjoRf8F1BPrYDwg==; =.ASPXAUTH=E/Y43u+9P2oXQ7frW2zc3oHn4ex2wFmLNbvvPULoFUkHYh9DB/xLQA==; =email=MTg1OTY5NTU4Mjc1NzM3MkBkZG1vYmlscGhvbmVfX3VzZXIuY29t&nickname=&display_id=4364540830447&customerid=heDnAN69L3NPvUwTr7Ulxg==&viptype=q5N401F9gis=&show_name=185****5827; ddoy=email=1859695582757372@&nickname=&validatedflag=2&uname=18596955827&utype=0&.ALFG=off&.ALTM=1647623058650; dest_area=country_id%3D9000%26province_id%3D111%26city_id%3D0%26district_id%3D0%26town_id%3D0; LOGIN_TIME=1647624613325; __rpm=s_112100.94003212839%2C94003212840.1.1647624612027%7Cp_29245768.023.1.1647624622507; __trace_id=0319013022730370005295884515148'#'__permanent_id=124512273289901432946690599; MDD_channelId=70000; MDD_fromPlatform=307; cart_id=3000402242875800085; ddscreen=2; __visit_id=0319005554346203477128885238018; __out_refer=1647622554%7C!%%7C!%7C; sessionID=pc_88ad0343ee32a8bde31351fb311944464d0bd9d54872e21bf7cbb92c0bdfc005; USERNUM=/oY1QLxEjoRf8F1BPrYDwg==; =.ASPXAUTH=E/Y43u+9P2oXQ7frW2zc3oHn4ex2wFmLNbvvPULoFUkHYh9DB/xLQA==; =email=MTg1OTY5NTU4Mjc1NzM3MkBkZG1vYmlscGhvbmVfX3VzZXIuY29t&nickname=&display_id=4364540830447&customerid=heDnAN69L3NPvUwTr7Ulxg==&viptype=q5N401F9gis=&show_name=185****5827; ddoy=email=1859695582757372@&nickname=&validatedflag=2&uname=18596955827&utype=0&.ALFG=off&.ALTM=1647623058650; LOGIN_TIME=1647623177492; dest_area=country_id%3D9000%26province_id%3D111%26city_id%3D0%26district_id%3D0%26town_id%3D0; __rpm=s_112100.94003212839%2C94003212840.1.1647623179663%7Cp_29245768.023.1.1647623199572; __trace_id=0319010639793162226569974513141'#'__permanent_id=124512273289901432946690599; MDD_channelId=70000; MDD_fromPlatform=307; cart_id=3000402242875800085; =email=MTg1OTY5NTU4Mjc1NzM3MkBkZG1vYmlscGhvbmVfX3VzZXIuY29t&nickname=&display_id=4364540830447&customerid=heDnAN69L3NPvUwTr7Ulxg==&viptype=q5N401F9gis=&show_name=185****5827; LOGIN_TIME=1647179843617; ddscreen=2; dest_area=country_id%3D9000%26province_id%3D111%26city_id%20%3D0%26district_id%3D0%26town_id%3D0; __visit_id=0319005554346203477128885238018; __out_refer=1647622554%7C!%%7C!%7C; __rpm=mix_317715...1647622555444%7Clogin_page...1647622564107; __trace_id=0319005604329375746738469175348; sessionID=pc_14d191b1bd684749d240e12bc0a03a74afa0fc95f7d0b1f7c21c201c9a328b42; USERNUM=/oY1QLxEjoRf8F1BPrYDwg==; =.ASPXAUTH=E/Y43u+9P2oXQ7frW2zc3oHn4ex2wFmLNbvvPULoFUkHYh9DB/xLQA==; ddoy=email=1859695582757372@&nickname=&validatedflag=2&uname=18596955827&utype=0&.ALFG=off&.ALTM=1647622569863'#'__permanent_id=0311203226063710728357963318796; MDD_channelId=70000; MDD_fromPlatform=307; ddscreen=2; dest_area=country_id%3D9000%26province_id%3D111%26city_id%3D0%26district_id%3D0%26town_id%3D0; pos_0_start=1647611713006; pos_9_end=1647611501085; pos_0_end=1647611501081; ad_ids=6054773%7C%231; __rpm=s_112100...1647617738091%7Clogin_page...1647617745627; __visit_id=0318233538166222327206084311659; __out_refer=; __trace_id=0318233545705796952788186262101; sessionID=pc_2cc1f70b4b45257557a29fd25703045aa19b7ea3bec6ad6822a260481a76e7d3; USERNUM=/oY1QLxEjoRf8F1BPrYDwg==; =.ASPXAUTH=E/Y43u+9P2oXQ7frW2zc3oHn4ex2wFmLNbvvPULoFUkHYh9DB/xLQA==; =email=MTg1OTY5NTU4Mjc1NzM3MkBkZG1vYmlscGhvbmVfX3VzZXIuY29t&nickname=&display_id=4364540830447&customerid=heDnAN69L3NPvUwTr7Ulxg==&viptype=q5N401F9gis=&show_name=185****5827; ddoy=email=1859695582757372@&nickname=&validatedflag=2&uname=18596955827&utype=0&.ALFG=off&.ALTM=1647617749313'#'__permanent_id=0311203226063710728357963318796; MDD_channelId=70000; MDD_fromPlatform=307; =email=MTg1OTY5NTU4Mjc1NzM3MkBkZG1vYmlscGhvbmVfX3VzZXIuY29t&nickname=&display_id=4364540830447&customerid=heDnAN69L3NPvUwTr7Ulxg==&viptype=q5N401F9gis=&show_name=185****5827; ddoy=email=1859695582757372@&nickname=&validatedflag=2&uname=18596955827&utype=0&.ALFG=off&.ALTM=1647615521516; ddscreen=2; dest_area=country_id%3D9000%26province_id%3D111%26city_id%3D0%26district_id%3D0%26town_id%3D0; pos_0_start=1647611713006; pos_9_end=1647611501085; pos_0_end=1647611501081; ad_ids=6054773%7C%231; __rpm=s_112100...1647615507619%7Clogin_page...1647615516858; __visit_id=0318225827739360595874668607159; __out_refer=; __trace_id=0318225836965392522577352456798; sessionID=pc_f33c79493d58fa9e184f84165884abe89d2280bb44b28704867ea95ac703acee; USERNUM=/oY1QLxEjoRf8F1BPrYDwg==; =.ASPXAUTH=E/Y43u+9P2oXQ7frW2zc3oHn4ex2wFmLNbvvPULoFUkHYh9DB/xLQA=='}session = requests.Session()session.keep_alive=Falseresponse = session.get(url=url1, headers=headers1,timeout=(20,20),proxies='',allow_redirects=False)selector1 = parsel.Selector(response.text)datas1 = selector1.css('div.pro_content ul')#print(response.text)ISBN = datas1.css('li:nth-child(5)::text').get()#ISBN=re.findall('/d+',str)dit = {'排名':var1,'标题': title,'作者': author,'出版社': publish,'价格': search_now_price,'二级链接':href[1:],'ISBN':ISBN,'大类': type1,'子类':type2,'类型': type,'图片':pic,}# print(ISBN)data=(var1,title,author,publish,search_now_price,href[1:],ISBN,type1,type2,type,pic)print(dit)conn.save_data(data)#conn.delete_data()time.sleep(3)def main():key1 = (# '考研政治', '考研英语一','考研英语二', '考研数学一', '考研数学二', '考研线性代数', '考研离散数学', '考研高等数学', '考研计算机', '考研数据结构', '考研计算机组成原理', '考研操作系统',#'考研计算机网络',#'普通心理学', '发展心理学', '社会心理学', '实验心理学', '现代心理与教育统计', '心理与教育测量',##'外国教育史', '教育心理学', '当代教育心理学', '教育科学研究方法', '教育研究方法导论', '教育学基础', '教育学原理', '教育管理学', '中国教育史',#'中国古代史', '中国近代史', '中国现代史', '中华人民共和国国史', '世界史',#'有机化学',#'普通化学', '定量分析简明教程', '植物生理学', '生物化学', '基础生物化学', '动物生理学', '无机及分析化学',#'西医综合', '管理类联考', '经济类联考', '法硕宪法', '法硕刑法', '法硕法理学', '法硕法制史',#'教育学考研', '历史学考研', '农学考研', '医学考研', '心理学考研','中医综合') #key=('考研英语一', '考研政治','考研英语二', '考研数学一', '考研数学二', '考研线性代数', '考研离散数学', '考研高等数学', '考研计算机', '考研数据结构', '考研计算机组成原理', '考研操作系统','考研计算机网络','普通心理学', '发展心理学', '社会心理学', '实验心理学', '现代心理与教育统计', '心理与教育测量','外国教育史', '教育心理学', '当代教育心理学', '教育科学研究方法', '教育研究方法导论', '教育学基础', '教育学原理', '教育管理学', '中国教育史','中国古代史', '中国近代史', '中国现代史', '中华人民共和国国史', '世界史','有机化学', '普通化学', '定量分析简明教程', '植物生理学', '生物化学', '基础生物化学', '动物生理学', '无机及分析化学','西医综合', '管理类联考', '经济类联考', '法硕宪法', '法硕刑法', '法硕法理学', '法硕法制史','教育学考研', '历史学考研', '农学考研', '医学考研', '心理学考研', '中医综合')#dangdang(key1)jingdong(key)def jiehe_data(self):sql = """insert into best* select Ceiling((a.rank01+b.rank01)/2) from %s a inner join %s b on a.ISBN=SUBSTRING(b.ISBN,12); """self.cursor.execute(sql)mit()self.db.close()if __name__=='__main__':#key = "考研" + input("shuru:")while True:# conn = DataManager()main()time.sleep(600)

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。