一、实现目标
本文利用股城网(/gpdmylb.html)提供的股票代码数据,经由同花顺网(/000001/ )检索交易结束后的当日开盘价格、收盘价格。
其中,股城网的网页数据(目标:股票代码、股票名称)通过BeautifulSoup抓取,同花顺网的目标数据(交易结束后的当日开盘价格、收盘价格)转化成json字典后提取,最后录入EXCEL文件。
二、使用步骤
1.引入库
import requests
import json
import bs4
from bs4 import BeautifulSoup
import traceback
import re
import openpyxl
from datetime import datetime
2.读入数据
def getHTMLText(url):try:headers={'user-agent':'Mozilla/5.0'}r = requests.get(url, headers=headers,timeout = 30)r.raise_for_status()r.encoding = r.apparent_encodingprint(r.status_code)return r.textexcept:return ""def getStockList(infoDict,stock_code_url):html = getHTMLText(stock_code_url)soup = BeautifulSoup(html, 'html.parser')section = soup.find('section',class_='stockTable')a = section.find_all('a')for i in a:try:content = i.textv=content[:-8]k=re.findall(r'\d{6}',content)[0]infoDict[k]=vexcept:print('There is something wrong.') def getStockInfo(infoDict, stock_price_url,fpath):today = datetime.today()day=today.strftime('%Y%m%d')wb=openpyxl.Workbook()sheet=wb.activesheet.title='stock_newday' sheet['A1'] ='股票代码'sheet['B1'] ='股票名称' sheet['C1'] = day+'开盘价格'sheet['D1'] = day+'收盘价格'for stock in sorted(infoDict.keys()):name = infoDict[stock]url = stock_price_url + stockheaders={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}r = requests.get(url, headers=headers,timeout = 30)text = r.text[14: int(len(r.text)) - 1]d = text.encode('utf-8')dicts =json.loads(d)if isinstance(dicts,dict):topen_price = dicts['data'][stock]['7']over_price = dicts['data'][stock]['10']sheet.append([stock,name,topen_price,over_price])else:continuewb.save(fpath) def main():stock_code_url = '/gpdmylb.html'stock_price_url = '/quote.php?cate=real&type=stock&callback=showStockDate&return=json&code='output_file=r'C:\Users\Administrator\Desktop\***.xlsx'#修改项infoDict = {}getStockList(infoDict, stock_code_url)getStockInfo(infoDict,stock_price_url, output_file)main()
总结
以上就是今天记录的内容。
新的改变
待续。