700字范文,内容丰富有趣,生活中的好帮手!
700字范文 > python抓取股票数据_Python获取股票历史数据和收盘数据的代码实现

python抓取股票数据_Python获取股票历史数据和收盘数据的代码实现

时间:2022-03-19 10:48:27

相关推荐

python抓取股票数据_Python获取股票历史数据和收盘数据的代码实现

各种股票软件,例如通达信、同花顺、大智慧,都可以实时查看股票价格和走势,做一些简单的选股和定量分析,但是如果你想做更复杂的分析,例如回归分析、关联分析等就有点捉襟见肘,所以最好能够获取股票历史及实时数据并存储到数据库,然后再通过其他工具,例如SPSS、SAS、EXCEL或者其他高级编程语言连接数据库获取股票数据进行定量分析,这样就能实现更多目的了。

为此,首先需要找到可以获取股票数据的接口,新浪、雅虎、腾讯等都有接口可以实时获取股票数据,历史数据选择了雅虎接口,收盘数据选择了腾讯接口。

(1)项目结构

(2)数据库连接池

connectionpool.py#-*- coding: UTF-8 -*-

'''

create a connection pool

'''

from DBUtils import PooledDB

import MySQLdb

import string

maxconn = 30 #最大连接数

mincached = 10 #最小空闲连接

maxcached = 20 #最大空闲连接

maxshared = 30 #最大共享连接

connstring="root#root#127.0.0.1#3307#pystock#utf8" #数据库地址

dbtype = "mysql" #选择mysql作为存储数据库

def createConnectionPool(connstring, dbtype):

db_conn = connstring.split("#");

if dbtype=='mysql':

try:

pool = PooledDB.PooledDB(MySQLdb, user=db_conn[0],passwd=db_conn[1],host=db_conn[2],port=string.atoi(db_conn[3]),db=db_conn[4],charset=db_conn[5], mincached=mincached,maxcached=maxcached,maxshared=maxshared,maxconnections=maxconn)

return pool

except Exception, e:

raise Exception,'conn datasource Excepts,%s!!!(%s).'%(db_conn[2],str(e))

return None

pool = createConnectionPool(connstring, dbtype)

(3)数据库操作

DBOperator.py#-*- coding: UTF-8 -*-

'''

Created on -3-13

@author: Casey

'''

import MySQLdb

from stockmining.stocks.setting import LoggerFactory

import connectionpool

class DBOperator(object):

def __init__(self):

self.logger = LoggerFactory.getLogger('DBOperator')

#self.conn = None

def connDB(self):

#单连接

#self.conn=MySQLdb.connect(host="127.0.0.1",user="root",passwd="root",db="pystock",port=3307,charset="utf8")

#连接池中获取连接

self.conn=connectionpool.pool.connection()

return self.conn

def closeDB(self):

if(self.conn != None):

self.conn.close()

def insertIntoDB(self, table, dict):

try:

if(self.conn != None):

cursor = self.conn.cursor()

else:

raise MySQLdb.Error('No connection')

sql = "insert into " + table + "("

param = []

for key in dict:

sql += key + ','

param.append(dict.get(key))

param = tuple(param)

sql = sql[:-1] + ") values("

for i in range(len(dict)):

sql += "%s,"

sql = sql[:-1] + ")"

self.logger.debug(sql % param)

n = cursor.execute(sql, param)

mit()

cursor.close()

except MySQLdb.Error,e:

self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1]))

self.conn.rollback()

def execute(self, sql):

try:

if(self.conn != None):

cursor = self.conn.cursor()

else:

raise MySQLdb.Error('No connection')

n = cursor.execute(sql)

return n

except MySQLdb.Error,e:

self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1]))

def findBySQL(self, sql):

try:

if(self.conn != None):

cursor = self.conn.cursor()

else:

raise MySQLdb.Error('No connection')

cursor.execute(sql)

rows = cursor.fetchall()

return rows

except MySQLdb.Error,e:

self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1]))

def findByCondition(self, table, fields, wheres):

try:

if(self.conn != None):

cursor = self.conn.cursor()

else:

raise MySQLdb.Error('No connection')

sql = "select "

for field in fields:

sql += field + ","

sql = sql[:-1] + " from " + table + " where "

param = []

values = ''

for where in wheres:

sql += where.key + "='%s' and "

param.append(where.value)

param = tuple(param)

self.logger.debug(sql)

n = cursor.execute(sql[:-5] % param)

mit()

cursor.close()

except MySQLdb.Error,e:

self.logger.error("Mysql Error %d: %s" % (e.args[0], e.args[1]))

(4)日志

LoggerFactory.py#-*- coding: UTF-8 -*-

'''

Created on -3-11

@author: Casey

'''

import logging

import time

'''

传入名称

'''

def getLogger(name):

now = time.strftime('%Y-%m-%d %H:%M:%S')

logging.basicConfig(

level = logging.DEBUG,

format = now +" : " + name + ' LINE %(lineno)-4d %(levelname)-8s %(message)s',

datefmt = '%m-%d %H:%M',

filename = "d:\\stocks\stock.log",

filemode = 'w');

console = logging.StreamHandler();

console.setLevel(logging.DEBUG);

formatter = logging.Formatter(name + ': LINE %(lineno)-4d : %(levelname)-8s %(message)s');

console.setFormatter(formatter);

logger = logging.getLogger(name)

logger.addHandler(console);

return logger

if __name__ == '__main__':

getLogger("www").debug("www")

(5)获取股票历史数据

参 数:s — 股票名称

a —

起始时间,月

b — 起始时间,日

c — 起始时间,年

d — 结束时间,月

e — 结束时间,日

f — 结束时间,年

g— 时间周期。

(一定注意月份参数,其值比真实数据-1。如需要9月数据,则写为08。)

示例 查询浦发银行.09.25 –

.10.8之间日线数据

返回:

Date,Open,High,Low,Close,Volume,Adj

Close

-09-30,12.37,12.99,12.32,12.95,76420500,12.95

-09-29,12.20,12.69,12.12,12.48,79916400,12.48

-09-28,12.92,12.92,12.57,12.58,63988100,12.58

-09-27,13.00,13.02,12.89,12.94,43203600,12.94

因为数据量比较大,需要跑很久,所以也可以考虑多线程模式来获取相关数据,单线程模式:#-*- coding: UTF-8 -*-

'''

Created on -3-1

@author: Casey

'''

import urllib

import re

import sys

from setting import params

import urllib2

from db import *

dbOperator = DBOperator()

table = "stock_quote_yahoo"

'''查找指定日期股票流量'''

def isStockExitsInDate(table, stock, date):

sql = "select * from " + table + " where code = '%d' and date='%s'" % (stock, date)

n = dbOperator.execute(sql)

if n >= 1:

return True

def getHistoryStockData(code, dataurl):

try:

r = urllib2.Request(dataurl)

try:

stdout = urllib2.urlopen(r, data=None, timeout=3)

except Exception,e:

print ">>>>>> Exception: " +str(e)

return None

stdoutInfo = stdout.read().decode(params.codingtype).encode('utf-8')

tempData = stdoutInfo.replace('"', '')

stockQuotes = []

if tempData.find('404') != -1: stockQuotes = tempData.split("\n")

stockDetail = {}

for stockQuote in stockQuotes:

stockInfo = stockQuote.split(",")

if len(stockInfo) == 7 and stockInfo[0]!='Date':

if not isStockExitsInDate(table, code, stockInfo[0]):

stockDetail["date"] = stockInfo[0]

stockDetail["open"] = stockInfo[1] #开盘

stockDetail["high"] = stockInfo[2] #最高

stockDetail["low"] = stockInfo[3] #最低

stockDetail["close"] = stockInfo[4] #收盘

stockDetail["volume"] = stockInfo[5] #交易量

stockDetail["adj_close"] = stockInfo[6] #收盘adj价格

stockDetail["code"] = code #代码

dbOperator.insertIntoDB(table, stockDetail)

result = tempData

except Exception as err:

print ">>>>>> Exception: " + str(dataurl) + " " + str(err)

else:

return result

finally:

None

def get_stock_history():

#沪市-历史数据

for code in range(601999, 602100):

dataUrl = "/table.csv?s=%d.SS&a=01&b=01&c=&d=01&e=01&f=&g=d" % code

print getHistoryStockData(code, dataUrl )

#深市-历史数据

for code in range(1, 1999):

dataUrl = "/table.csv?s=%06d.SZ&a=01&b=01&c=&d=01&e=01&f=&g=d" % code

print getHistoryStockData(code, dataUrl)

#中小板股票

for code in range(2001, 2999):

dataUrl = "/table.csv?s=%06d.SZ&a=01&b=01&c=&d=01&e=01&f=&g=d" % code

print getHistoryStockData(code, dataUrl)

#创业板股票

for code in range(300001, 300400):

dataUrl = "/table.csv?s=%d.SZ&a=01&b=01&c=&d=01&e=01&f=&g=d" % code

print getHistoryStockData(code, dataUrl)

def main():

"main function"

dbOperator.connDB()

get_stock_history()

dbOperator.closeDB()

if __name__ == '__main__':

main()

(6)获取实时价格和现金流数据

A:实时价格数据采用腾讯的接口:沪市:/q=sh,深市:/q=sz

v_sz000001="51~平安银行~000001~11.27~11.27~11.30~316703~151512~165192~11.27~93~11.26~

4352~11.25~4996~11.24~1037~11.23~1801~11.28~1181~11.29~2108~11.30~1075~11.31~1592~11.32~

1118~15:00:24/11.27/3146/S/3545407/17948|14:56:59/11.26/15/S/16890/17787|

14:56:56/11.25/404/S/454693/17783|14:56:54/11.26/173/B/194674/17780|14:56:51

/11.26/306/B/344526/17777|14:56:47/11.26/16/B/18016/17773~

1029150142~0.00~0.00~11.36~11.25~

11.26/313557/354285045~

316703~35783~0.27~7.38~~11.36~11.25~0.98~1330.32~1612.59~1.03~12.40~10.14~";

数据比较多,比较有用的是:1-名称;2-代码;3-价格;4-昨日收盘;5-今日开盘;6-交易量(手);7-外盘;8-内盘;9-买一;10-买一量;11-买二;12-买二量;13-买三;14-买三量;15-买四;16-买四量;17-买五;18-买五量;19-卖一;20-卖一量;21-卖二;22-卖二量;23-卖三;24-卖三量;25-卖四;26-卖四量;27-卖五;28-卖五量;30-时间;31-涨跌;32-涨跌率;33-最高价;34-最低价;35-成交量(万);38-换手率;39-市盈率;42-振幅;43-流通市值;44-总市值;45-市净率

例如平安银行的现金流数据/q=ff_sz000001:v_ff_sz000001="sz000001~21162.20~24136.40~-2974.20~-8.31~14620.87~11646.65~2974.22~

8.31~35783.07~261502.0~261158.3~平安银行~1029~1028^37054.20^39358.20~

1027^39713.50^42230.70~1026^82000.80^83689.90~1023^81571.30^71743.10";

比较重要的:1-主力流入;2-主力流出;3-主力净流量;4-主力流入/主力总资金;5-散户流入;6-散户流出;7-散户净流量;8-散户流入/散户总资金;9-总资金流量;12-名字;13-日期

采用多线程、数据库连接池实现股票实时价格和现金流数据的获取:#-*- coding: UTF-8 -*-

'''

Created on 3月2日

@author: Casey

'''

import time

import threading

'''

上证编码:'600001' .. '602100'

深圳编码:'000001' .. '001999'

中小板:'002001' .. '002999'

创业板:'300001' .. '300400'

'''

import urllib2

from datetime import date

from db import *

from setting import *

class StockTencent(object):

#数据库表

__stockTables = {'cash':'stock_cash_tencent','quotation':'stock_quotation_tencent'}

'''初始化'''

def __init__(self):

self.__logger = LoggerFactory.getLogger('StockTencent')

self.__dbOperator = DBOperator()

def main(self):

self.__dbOperator.connDB()

threading.Thread(target = self.getStockCash).start()

threading.Thread(target = self.getStockQuotation).start()

self.__dbOperator.closeDB()

'''查找指定日期股票流量'''

def __isStockExitsInDate(self, table, stock, date):

sql = "select * from " + table + " where code = '%s' and date='%s'" % (stock, date)

n = self.__dbOperator.execute(sql)

if n >= 1:

return True

'''获取股票资金流明细'''

def __getStockCashDetail(self, dataUrl):

#读取数据

tempData = self.__getDataFromUrl(dataUrl)

if tempData == None:

time.sleep(10)

tempData = self.__getDataFromUrl(dataUrl)

return False

#解析资金流向数据

stockCash = {}

stockInfo = tempData.split('~')

if len(stockInfo) < 13: return

if len(stockInfo) != 0 and stockInfo[0].find('pv_none') == -1:

table = self.__stockTables['cash']

code = stockInfo[0].split('=')[1][2:]

date = stockInfo[13]

if not self.__isStockExitsInDate(table, code, date):

stockCash['code'] = stockInfo[0].split('=')[1][2:]

stockCash['main_in_cash'] = stockInfo[1]

stockCash['main_out_cash'] = stockInfo[2]

stockCash['main_net_cash'] = stockInfo[3]

stockCash['main_net_rate'] = stockInfo[4]

stockCash['private_in_cash'] = stockInfo[5]

stockCash['private_out_cash'] = stockInfo[6]

stockCash['private_net_cash'] = stockInfo[7]

stockCash['private_net_rate'] = stockInfo[8]

stockCash['total_cash'] = stockInfo[9]

stockCash['name'] = stockInfo[12].decode('utf8')

stockCash['date'] = stockInfo[13]

#插入数据库

self.__dbOperator.insertIntoDB(table, stockCash)

'''获取股票交易信息明细'''

def getStockQuotationDetail(self, dataUrl):

tempData = self.__getDataFromUrl(dataUrl)

if tempData == None:

time.sleep(10)

tempData = self.__getDataFromUrl(dataUrl)

return False

stockQuotation = {}

stockInfo = tempData.split('~')

if len(stockInfo) < 45: return

if len(stockInfo) != 0 and stockInfo[0].find('pv_none') ==-1 and stockInfo[3].find('0.00') == -1:

table = self.__stockTables['quotation']

code = stockInfo[2]

date = stockInfo[30]

if not self.__isStockExitsInDate(table, code, date):

stockQuotation['code'] = stockInfo[2]

stockQuotation['name'] = stockInfo[1].decode('utf8')

stockQuotation['price'] = stockInfo[3]

stockQuotation['yesterday_close'] = stockInfo[4]

stockQuotation['today_open'] = stockInfo[5]

stockQuotation['volume'] = stockInfo[6]

stockQuotation['outer_sell'] = stockInfo[7]

stockQuotation['inner_buy'] = stockInfo[8]

stockQuotation['buy_one'] = stockInfo[9]

stockQuotation['buy_one_volume'] = stockInfo[10]

stockQuotation['buy_two'] = stockInfo[11]

stockQuotation['buy_two_volume'] = stockInfo[12]

stockQuotation['buy_three'] = stockInfo[13]

stockQuotation['buy_three_volume'] = stockInfo[14]

stockQuotation['buy_four'] = stockInfo[15]

stockQuotation['buy_four_volume'] = stockInfo[16]

stockQuotation['buy_five'] = stockInfo[17]

stockQuotation['buy_five_volume'] = stockInfo[18]

stockQuotation['sell_one'] = stockInfo[19]

stockQuotation['sell_one_volume'] = stockInfo[20]

stockQuotation['sell_two'] = stockInfo[22]

stockQuotation['sell_two_volume'] = stockInfo[22]

stockQuotation['sell_three'] = stockInfo[23]

stockQuotation['sell_three_volume'] = stockInfo[24]

stockQuotation['sell_four'] = stockInfo[25]

stockQuotation['sell_four_volume'] = stockInfo[26]

stockQuotation['sell_five'] = stockInfo[27]

stockQuotation['sell_five_volume'] = stockInfo[28]

stockQuotation['datetime'] = stockInfo[30]

stockQuotation['updown'] = stockInfo[31]

stockQuotation['updown_rate'] = stockInfo[32]

stockQuotation['heighest_price'] = stockInfo[33]

stockQuotation['lowest_price'] = stockInfo[34]

stockQuotation['volume_amout'] = stockInfo[35].split('/')[2]

stockQuotation['turnover_rate'] = stockInfo[38]

stockQuotation['pe_rate'] = stockInfo[39]

stockQuotation['viberation_rate'] = stockInfo[42]

stockQuotation['circulated_stock'] = stockInfo[43]

stockQuotation['total_stock'] = stockInfo[44]

stockQuotation['pb_rate'] = stockInfo[45]

self.__dbOperator.insertIntoDB(table, stockQuotation)

'''读取信息'''

def __getDataFromUrl(self, dataUrl):

r = urllib2.Request(dataUrl)

try:

stdout = urllib2.urlopen(r, data=None, timeout=3)

except Exception,e:

self.__logger.error(">>>>>> Exception: " +str(e))

return None

stdoutInfo = stdout.read().decode(params.codingtype).encode('utf-8')

tempData = stdoutInfo.replace('"', '')

self.__logger.debug(tempData)

return tempData

'''获取股票现金流量'''

def getStockCash(self):

self.__logger.debug("开始:收集股票现金流信息")

try:

#沪市股票

for code in range(600001, 602100):

dataUrl = "/q=ff_sh%d" % code

self.__getStockCashDetail(dataUrl)

#深市股票

for code in range(1, 1999):

dataUrl = "/q=ff_sz%06d" % code

self.__getStockCashDetail(dataUrl)

#中小板股票

for code in range(2001, 2999):

dataUrl = "/q=ff_sz%06d" % code

self.__getStockCashDetail(dataUrl)

#'300001' .. '300400'

#创业板股票

for code in range(300001, 300400):

dataUrl = "/q=ff_sz%d" % code

self.__getStockCashDetail(dataUrl)

except Exception as err:

self.__logger.error(">>>>>> Exception: " +str(code) + " " + str(err))

finally:

None

self.__logger.debug("结束:股票现金流收集")

'''获取股票交易行情数据'''

def getStockQuotation(self):

self.__logger.debug("开始:收集股票交易行情数据")

try:

#沪市股票

for code in range(600001, 602100):

dataUrl = "/q=sh%d" % code

self.getStockQuotationDetail(dataUrl)

#深市股票

for code in range(1, 1999):

dataUrl = "/q=sz%06d" % code

self.getStockQuotationDetail(dataUrl)

#中小板股票

for code in range(2001, 2999):

dataUrl = "/q=sz%06d" % code

self.getStockQuotationDetail(dataUrl)

#'300001' .. '300400'

# 创业板股票

for code in range(300001, 300400):

dataUrl = "/q=sz%d" % code

self.getStockQuotationDetail(dataUrl)

except Exception as err:

self.__logger.error(">>>>>> Exception: " +str(code) + " " + str(err))

finally:

None

self.__logger.debug("结束:收集股票交易行情数据")

if __name__ == '__main__':

StockTencent(). main()

(7)加入到系统任务计划中收集盘后数据

(8)收集后的数据可以用以分析了,例如:

求取10月28日主力净流入最大的股票:select * from stock_cash_tencent where main_net_cash = (select max(main_net_cash) from stock_cash_tencent where date = '1028' )

原来是“兴蓉环境”,当日放量上涨,次日收跌,连续多日有主力资金流入。

excel中做分析:

平安银行的资金流量分析

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。