从/shenzhen.html抓取北京,深圳,上海,广州,成都的pm2.5指数,并按照空气质量从优到差排序,保存在txt文档里
代码如下:#coding=utf-8
fromseleniumimportwebdriver
fromtimeimportsleep
classPM:
def__init__(self):
self.dr=webdriver.Chrome()
self.pm25_info=self.get_pm25_info()
defget_pm25_info(self):
city_list=['beijing','shenzhen','shanghai','guangzhou','chengdu']
pm_info_list=[]
i=0
whilei
self.dr.get('/'+city_list[i]+'.html')
sleep(3)
city_name=self.dr.find_element_by_css_selector('.bi_loaction_city').text#定位城市名字
pm_exp=self.dr.find_element_by_css_selector('.pm25_span').text#定位PM2.5指数
air_num=self.dr.find_element_by_css_selector('.bi_aqiarea_num').text#定位AQI指数
air_quality=self.dr.find_element_by_css_selector('.bi_aqiarea_rightspan').text#定位空气质量描述
pm_info_list.append((city_name,pm_exp,air_num,air_quality))
i+=1
pm_info_list.sort(key=lambdax:float(x[2]))#按AQI指数排序(AQI指数决定了空气质量的优良)
returnpm_info_list
defget_pm_info_file(self):
self.file_title='北上广深成五地PM2.5数据信息'
self.file=open(self.file_title+'.txt','wb')
self.floor=1
foriteminself.pm25_info:
separate_line='**********'+'TOP'+str(self.floor)+'**********\n'
self.file.write(separate_line.encode('utf-8'))
self.file.write(('城市:'+item[0]+'\n'
'PM2.5指数:'+item[1]+''+'微克/立方米'+'\n'
'AQI指数:'+item[2]+'\n'
'空气质量:'+item[3]+'\n').encode('utf-8'))
self.floor+=1
self.file.close()
defquit(self):
self.dr.quit()
if__name__=='__main__':
pm_info=PM()
pm_info.get_pm_info_file()
pm_info.quit()
网页如下:
生成txt如下: