def DownloadData(local, st, q): ''' code is stockcode in string type st is time to market in string 'YYYY-MM-DD' type ''' t = local.split('/') kind = t[1] code = t[2] if kind == 'cq': kind = None try: if len(st) > 8: df = ts.get_h_data(code, start=st, autype=kind) else: df = ts.get_h_data(code, autype=kind) if len(df) > 0: df = df.sort_index() df = df.sort_index(axis=1) filename = q.get() try: df.to_hdf(filename, local) except: store = pd.HDFStore(filename, mode='a') store[local] = df store.flush() store.close() q.put(filename) return [True, code] except: return [False, code]
def plotstock(name,realname=u'代码'): sdatestring=getstartdate() quotes = ts.get_h_data(name,start=sdatestring) quotes.index = quotes.index.astype('datetime64[D]') sh = ts.get_h_data('sh',start=sdatestring) sh.index = sh.index.astype('datetime64[D]') quotes.sort_index(inplace=True) sh.sort_index(inplace=True) opens = quotes["close"] opensh=sh["close"] means=sum(opens)/len(opens) meansh=sum(opensh)/len(opensh) scale=meansh/means plt.figure() plt.subplot(211) quotes["close"].plot() sh["close"]=sh["close"]/scale sh["close"].plot() plt.legend([realname+':'+name,u'上证综指']) plt.subplot(212) qs=quotes["close"][-10:] qs.plot() shs=sh["close"][-10:] means=sum(qs)/len(qs) meansh=sum(shs)/len(shs) shs=shs/meansh*means shs.plot() plt.show()
def load_data_from_tushare(stock_number, start_date, end_date): #stock_number = stock_number[2:] try: print "stock number for tushare {}".format(stock_number) raw_data = ts.get_h_data(stock_number, start = start_date, end = end_date, autype=None) raw_data = raw_data.sort() open_price = raw_data["open"] high_price = raw_data["high"] low_price = raw_data["low"] close_price = raw_data["close"] vol = raw_data["volume"] amount = raw_data["amount"] fuquan_data = ts.get_h_data(stock_number, start = start_date, end = end_date) fuquan_data = fuquan_data.sort() fuquan_close_price = fuquan_data["close"] fuquan_open_price = fuquan_data["open"] data = {"open price":open_price, "high price":high_price, "low price":low_price, "close price":close_price, "vol":vol, "amount": amount, "fuquan close price": fuquan_close_price, "fuquan open price": fuquan_open_price} dfdata = pd.DataFrame(data) return dfdata except: data = {"open price":[], "high price":[], "low price":[], "close price":[], "vol":[], "amount": []} dfdata = pd.DataFrame(data) return dfdata
def initData(code,Index=False): """ code 证券代码,LD 最后交易日,Index 指数 --> ds 数据集,LASTDATE 最后交易日 检查最新数据,如果mysql DB没有,就下载,并写库。 如果不是没有最新交易日数据(例如:没数据或停牌),返回None """ b2.log('获取%s数据..........'%code) print('获取%s数据..........'%code) if Index : ds=ts.get_h_data(code,index=Index) #LASTDATE=ds.index[0].strftime('%Y-%m-%d') ds.to_sql('zs'+code,engine,if_exists='replace') else: tbstatus=readydata('qfq%s'%code) if tbstatus==1: b2.log('qfq%s......Data Ready'%code) sqlcmd='select * from qfq%s order by date'%code ds=sql.read_sql(sqlcmd,engine,index_col='date') elif tbstatus==0: ds=ts.get_h_data(code).sort() ds.to_sql('qfq'+code,engine,if_exists='replace') engine.execute('''insert into tb_stamp values ('qfq%s',curdate())'''%code) sqlcmd='''select count(*) from qfq%s where date=(select max(date) from zs159915)'''%(code) result=engine.execute(sqlcmd) if list(result)[0][0]==0: ds=None else: ds=None return ds
def download_stock_hist_price(self, code, start, end=None): print "downloading " + code if end is None: price = ts.get_h_data(code, start) else: price = ts.get_h_data(code, start, end) return price
def get_adj_price_data(stocks_index, timeToMarket): ''' stocks_index :: a string of the stock number like '002292' timeToMarket :: a string of the IPO date like '20120229' return :: a DataFrame containing the adj. price data ''' year = int( timeToMarket[0:4] ) month = int( timeToMarket[4:6] ) date = int( timeToMarket[6:8] ) d0 = datetime.datetime(year, month, date) d1 = d0 + datetime.timedelta(days = 730) # getting data of 2 years each time today = datetime.datetime.today() if (today - d1).days > 0: final = ts.get_h_data(stocks_index, autype='hfq', start=d0.strftime('%Y-%m-%d'), end=d1.strftime('%Y-%m-%d')) else: final = ts.get_h_data(stocks_index, autype='hfq', start=d0.strftime('%Y-%m-%d'), end=today.strftime('%Y-%m-%d')) return final while (today - d1).days > 0: d0 = d1 + datetime.timedelta(days = 1) d1 = d0 + datetime.timedelta(days = 730) if (today - d1).days > 0: tmp = ts.get_h_data(stocks_index, autype='hfq', start=d0.strftime('%Y-%m-%d'), end=d1.strftime('%Y-%m-%d')) try: final = tmp.append(final) except: pass else: tmp = ts.get_h_data(stocks_index, autype='hfq', start=d0.strftime('%Y-%m-%d'), end=today.strftime('%Y-%m-%d')) try: final = tmp.append(final) except: pass return final
def getdata(code): global lock dayhistory = ts.get_h_data(code, stock_timetomarket[code]) if not dayhistory: dayhistory = ts.get_h_data(code, stock_timetomarket[code], index=True) dayhistory['code'] = code dayhistory.index = dayhistory.index.date print '\n code %s insert......\n' % code dayhistory.to_sql('h_data', engine, if_exists='append')
def __init__(self, industry, benchmark, datatype, dic, start,end): global ms ms = ts.Master() self.stock = dic['stock'] self.interest = dic['interest'] self.industry = industry self.date = pd.date_range(start, end) self.days = len(self.date) self.beta, self.alpha, self.ir = None, None, None first_prev = ms.TradeCal(exchangeCD='XSHG', beginDate=start.replace("-",""), endDate=start.replace("-",""), field='calendarDate,prevTradeDate').prevTradeDate.iloc[0] #Build Benchmark self.benchmark = ts.get_hist_data(benchmark, first_prev, end) self.benchreturn = returns( self.benchmark[self.benchmark.index >= start].close, self.benchmark.close[:-1]) self.benchreturn.index = pd.DatetimeIndex([pd.to_datetime(i) for i in self.benchreturn.keys()]) #Datatype Effect if datatype == 'cap': #import pdb; pdb.set_trace() weight = self.stock.totalAssets[self.stock.industry == industry] weight = 1.0 * weight / weight.sum() #A series of cap weights d = 0 for i in weight.index: a = ts.get_h_data(i, first_prev, end) a_close = a.close if len(a.index) != len(self.benchmark.index): a_close = equal_len(a.close, self.benchmark) return_of_i = returns( a_close[a_close.index >= start], a_close[a_close.index < end] ) * weight.loc[i] #A series of cap-weighted return of the industry (index is date) d = d + return_of_i elif datatype == 'beta': index_list = self.stock.index[self.stock.industry == industry] total_beta, d = 0, 0 for i in index_list: a = ts.get_h_data(i, first_prev, end) return_of_i = returns( a[a.index >= start].close, a.close[:-1]) beta_i = 1.0 / beta(return_of_i, self.benchreturn) d += return_of_i * beta_i total_beta += beta_is d = 1.0 * d / total_beta else: raise KeyError('datatype can only be cap or beta') self.returns = d
def request_dayk(table, code, engine, start_date = '1990-01-01', end_date = '2050-01-01'): try: dayK_bfq = ts.get_h_data(code, start_date, end_date, None, retry_count=500) dayK_hfq = ts.get_h_data(code, start_date, end_date, 'hfq', retry_count=500) dayK_bfq['open_hfq'] = dayK_hfq['open'] dayK_bfq['high_hfq'] = dayK_hfq['high'] dayK_bfq['low_hfq'] = dayK_hfq['low'] dayK_bfq['close_hfq'] = dayK_hfq['close'] dayK_bfq['code'] = code dayK_bfq.to_sql(table, engine, if_exists='append', dtype={'date': Date}) logging.info(str(code) + ', request_dayk success') except Exception: logging.error(str(code) + ' request_dayk failed on ' + str(threading.currentThread()))
def DownloadCqAll(code,st): ''' code is stockcode in string type st is time to market in string 'YYYY-MM-DD' type ''' if len(st)>2: df=ts.get_h_data(code,start=st,autype=None,retry_count=5,pause=1) df=df.sort_index(ascending=1) else: df=ts.get_h_data(code,autype=None,retry_count=5,pause=1) #print code+':'+st+' finished!' df=df.sort_index(axis=0) df=df.sort_index(axis=1) return [code,df]
def load_data_from_tushare_real_time(stock_number, start_date): try: print "stock number for tushare {}".format(stock_number) raw_data = ts.get_h_data(stock_number, start = start_date, autype=None) raw_data = raw_data.sort() open_price = list(raw_data["open"].values) high_price = list(raw_data["high"].values) low_price = list(raw_data["low"].values) close_price = list(raw_data["close"].values) vol = list(raw_data["volume"].values) amount = list(raw_data["amount"].values) f = lambda x:str(x).split(" ")[0] date = map(f,list(raw_data.index)) fuquan_data = ts.get_h_data(stock_number, start = start_date) fuquan_data = fuquan_data.sort() fuquan_close_price = list(fuquan_data["close"].values) fuquan_open_price = list(fuquan_data["open"].values) o, h, l, c, v, a, d = get_sina_data(stock_number) open_price.append(o) close_price.append(c) fuquan_close_price.append(c) fuquan_open_price.append(o) high_price.append(h) low_price.append(l) vol.append(v) amount.append(a) date.append(d) ff = lambda x:float(x) open_price = map(ff,open_price) high_price = map(ff,high_price) low_price = map(ff,low_price) close_price = map(ff,close_price) fuquan_close_price = map(ff,fuquan_close_price) fuquan_open_price = map(ff,fuquan_open_price) vol = map(ff,vol) amount = map(ff,amount) data = {"open price":open_price, "high price":high_price, "low price":low_price, "close price":close_price, "vol":vol, "amount": amount, "date": date, "fuquan close price": fuquan_close_price, "fuquan open price": fuquan_open_price} dfdata = pd.DataFrame(data) return dfdata except: data = {"open price":[], "high price":[], "low price":[], "close price":[], "vol":[], "amount": []} dfdata = pd.DataFrame(data) return dfdata
def get_data(): global DatasrcMap DatasrcMap.clear() #startDate = '2014-04-06' #endDate = '2015-02-01' try: dataframe = ts.get_h_data(gl.STCode, start=startDate, end=endDate) #, retry_count=10 except Exception as e: print(e) print('sleep。。。。。。。。。。。。。。。。。') time.sleep(1) #网络异常,等待30s return -1 if dataframe is None: print('\nNone。。。。。。。。。。。。。。。。。。。。') return -1 print('\n0:tushare获取成功') dataframe.sort_index(inplace=True) #按date升序排列 dataframe = dataframe.tail(10+60) #截取最近10天的数据#@@@@@@@@@@@@@@@@@@@@@@ #print(dataframe) day = 0 for each in dataframe.index: date = each.strftime('%Y-%m-%d') 开 = float(dataframe[day:day+1]['open']) 高 = float(dataframe[day:day+1]['high']) 低 = float(dataframe[day:day+1]['low']) 收 = float(dataframe[day:day+1]['close']) 量 = float(dataframe[day:day+1]['volume']) 金额 = float(dataframe[day:day+1]['amount']) DatasrcMap[day] = [date,开,高,低,收,量,金额] day = day + 1 #endof 'for' return 1
def down_dk_all(code, i): global G_CODE is_succ = False t_name = 'a' + code if me.IsTableExist(t_name, G_DBengine) == False: s_date = '2013-01-01' else: s_date = me.GetLatestDateFromTable(t_name, G_DBengine) while is_succ == False: try: df = ts.get_h_data(code, autype='hfq', start=s_date) if str(type(df)) == '<class \'pandas.core.frame.DataFrame\'>': print s_date, df.index.size del df['open'] del df['high'] del df['low'] del df['volume'] df['amount'] = df['amount'] / 10000 df = df.drop(df.index.values[df.index.size-1]) G_CODE.iat[i,4] = df.index.size if df.index.size != 0: df.to_sql(t_name, G_DBengine, if_exists='append') is_succ = True except ValueError, e: print 'ValueError:', e
def download_kline_source_select(code, date_start, date_end): try: if len(code)==6: df_qfq = ts.get_h_data(str(code), start=date_start, end=date_end) # 前复权 else: # import pandas.io.data as web # price = web.get_data_yahoo('000001.SS', '1991-07-15') df_qfq = ts.get_hist_data(str(code), start=date_start, end=date_end) if len(df_qfq)==0: return None #if df_qfq is None: #df_qfq = ts.get_hist_data(code, start=date_start, end=date_end) # df_qfq = df_qfq[::-1] df_qfq[KEY_CODE] = code df_qfq[KEY_DATE] = df_qfq.index columns = [KEY_CODE, KEY_DATE, KEY_OPEN, KEY_HIGH, KEY_CLOSE, KEY_LOW, KEY_VOLUME] df_qfq = df_qfq[columns] print df_qfq.head() return df_qfq except Exception as e: print str(e)
def get_stock_data(stock_list, start_date): stock_data_list = [] for stockcode in stock_list: stock_data_list.append({}) stock_data_list[-1]['code'] = stockcode stock_data_list[-1]['data']=ts.get_h_data(code = stockcode, start = start_date).sort(ascending = True) return stock_data_list
def __get_data_tushare(code, start, end, look_back): # FIXME: look_back_pos may not equal (start - look_back),think!!! # Adjust:look_back应该是交易日的天数而不是绝对天数,适当的加上一个日期长度, # 使得我们取得的数据足够 ADJUST = 210 look_back_pos = parse(start) - timedelta(look_back + ADJUST) look_back_pos = str(look_back_pos.date()) if parse(look_back_pos).year <= datetime.now().year - 3: hist_data = ts.get_h_data( code=code, start=look_back_pos, end=end, index=True, pause=1, retry_count=5) else: hist_data = ts.get_hist_data( code=code, start=look_back_pos, end=end) if len(hist_data) == 0 or hist_data is None: print('fetched data returned 0 row, something was wrong') if len(hist_data) < look_back: print('did not retrieve enough data') # we only need selected columns columns = ['open', 'high', 'low', 'close', 'volume'] ret = pd.DataFrame(hist_data[columns]) return ret
def _getIndexDaysFromTuShare(self, code, startDate, endDate, fields, name=None): """ 从TuShare获取指数日线数据 """ tuShareCode = code[:-3] sleep(self.tuShareDaysSleepTimeConst) try: df = ts.get_h_data(tuShareCode, startDate, endDate, index=True) if df is None or df.empty: # If no data, TuShare return None df = pd.DataFrame(columns=['open', 'high', 'close', 'low', 'volume', 'amount']) else: df = df.sort_index() except Exception as ex: self._info.print("从TuShare获取{}({})日线数据[{}, {}]失败: {}".format(code, name, startDate, endDate, ex), DyLogData.error) return None # no turn and factor for index df['turnover'] = 0 df['factor'] = 1 df.index.name = None # change to Wind's indicators df.reset_index(inplace=True) # 把时间索引转成列 df.rename(columns={'index': 'datetime', 'amount': 'amt', 'turnover': 'turn', 'factor': 'adjfactor'}, inplace=True) # 把日期的HH:MM:SS转成 00:00:00 df['datetime'] = df['datetime'].map(lambda x: x.strftime('%Y-%m-%d')) df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d') # select according @fields df = df[['datetime'] + fields] return df
def main(): now = time.strftime("%Y-%m-%d") # print(now) token = '60517739976b768e07823056c6f9cb0fee33ed55a1709b3eaa14a76c6a1b7a56' sb = StockBox() # sb.looper(id) id = '300333' # sb.realtime(id) sb.base_function("300333") # pandas_test=Pandas_test() # pandas_test.test_function() # sb.longhuban('2016-04-05') # sb.getNews() # sb.fund() # sb.get_stock_chengfeng() # sb.date_store() # sb.profit_test() # sb.daily_longhu() # 获取历史数据 近3年的数据 history = ts.get_hist_data(id) print(u"历史3年的数据") print(history.head(10)) history_all = ts.get_h_data(id, '20015101', '20160101') print(u'所有的历史数据') print(history_all)
def getHistoryData(datapath, sid, start=None, end=None): ''' :param datapath: :param sid: :param start: :param end: :return: ''' DAYFORMAT = '%Y-%m-%d' MAXINTERVAL = 365 * 3 totalData = [] if end == None: end = datetime.strftime(datetime.today(), DAYFORMAT) endday = end if (datetime.strptime(end, DAYFORMAT) - datetime.strptime(start, DAYFORMAT)).days > MAXINTERVAL: startday = datetime.strftime(datetime.strptime(endday, DAYFORMAT) - timedelta(days=MAXINTERVAL), DAYFORMAT) else: startday = start while ( datetime.strptime(startday, DAYFORMAT) >= datetime.strptime(start, DAYFORMAT)): # 分成三年三年这种调用方式,然后汇总 temp = tushare.get_h_data(sid, start=startday, end=endday) totalData.append(temp) endday = startday startday = datetime.strftime(datetime.strptime(endday, DAYFORMAT) - timedelta(days=MAXINTERVAL), DAYFORMAT) AllData = pandas.concat(totalData) filenameX = lambda x: '_'.join(x) filename = os.path.join(datapath, filenameX([sid, start, end])) + '.csv' AllData.to_csv(filename.lower())
def append_days(self,stock, start, end): ''' 添加stock,指定时间范围内的数据 ''' data = ts.get_h_data(stock,start=start,end=end) data = data.sort_index(ascending=True) data.to_sql('day_'+stock, self.engine,if_exists='append')
def getDayLine(from_num, to_num,startDay = 0, endDay = time.strftime('%Y-%m-%d', time.localtime())): global df_base, control_num, engine for row_index, row in df_base.iterrows(): try: if control_num < from_num: pass elif control_num == to_num: break else: stocknum = row_index if startDay == 0: #download the day line data from the beginning timeToMarket = df_base.ix[stocknum]['timeToMarket'] startDay = str(timeToMarket) startDay = startDay[:4] + '-' + startDay[4:6] + '-' + startDay[6:8] qfq_history= ts.get_h_data(stocknum, start = startDay, end = endDay, retry_count=10) qfq_history.insert(0,'stocknum',stocknum) qfq_history.to_sql('qfq_day',engine,if_exists='append') control_num += 1 except: s = stocknum +'\n' f = open('qfq_err' + endDay, 'a') f.write(s) f.close() pass
def recoveDayline(startDay, endDay): global df_base, control_num, engine if os.path.exists('qfq_err' + endDay): f = open('qfq_err' + endDay) lines = f.readlines() lineNos = range(len(lines)) for lineNo in lineNos: try: line = lines[0] stocknum = line[:6] if startDay == 0: timeToMarket = df_base.ix[stocknum]['timeToMarket'] startDay = str(timeToMarket) startDay = startDay[:4] + '-' + startDay[4:6] + '-' + startDay[6:8] qfq_history = ts.get_h_data(stocknum, start = startDay, end = endDay, retry_count=10) qfq_history.insert(0,'stocknum',stocknum) qfq_history.to_sql('qfq_day',engine,if_exists='append') del lines[0] #delect the recovered data print(lineNo) except: f.close() f = open('qfq_err' + endDay, 'w') f.writelines(lines) f.close() if os.path.getsize('qfq_err' + endDay) == 0: os.remove('qfq_err' + endDay)
def get_url_data_(self): # 从 tushare.org 获取股票市场的代码列表 code_list_ = pandas.DataFrame((tushare.get_today_all())['code']) # 排序 code_list_ = code_list_.sort(columns='code', ascending=True) # 增加一自然数列做为 index code_list_['index'] = pandas.DataFrame([i for i in range(0, len(code_list_))], code_list_.index) code_list_.reindex(index=code_list_['code']) # 写库 code_list_.to_sql('code_list_', self.engine_, if_exists='replace', index=True, index_label='index') # 把 index 设为主键 self.engine_.connect().execute('alter table testdb.code_list_ add primary key(`index`)') # 根据上面股票列表逐个获取个股数据 for i in range(0, len(code_list_)): # 取的是已经复权的数据 stock_data_ = tushare.get_h_data(code_list_['code'][i]) # 因为获取的数据以 date 为 index,但是写库时不能把 date 当 index 写入,所以复制该列 stock_data_['date'] = pandas.Series(stock_data_.index, stock_data_.index) stock_data_ = stock_data_.sort_values(by='date', ascending=True) stock_data_['index'] = pandas.DataFrame([i for i in range(0, len(stock_data_))], stock_data_.index) stock_data_.to_sql(code_list_['code'][i], self.engine_, if_exists='replace', index=True, index_label='index') self.engine_.connect().execute('alter table testdb.' + code_list_['code'][i] + ' add primary key(`index`)')
def getLowestGrowth(startDate, endDate,stockList): result = {} while len(stockList) > 0: try: stockCode = stockList[-1] print stockCode,'is started' #取当天有交易的股票 if float(ts.get_realtime_quotes(stockCode).price) > 0: df_tran = ts.get_h_data(stockCode, start=startDate, end=endDate) #将收盘价转化为数值 df_tran['close'] = df_tran['close'].convert_objects(convert_numeric=True) #按日期由远及近进行排序 df_tran = df_tran.sort_index() stock = {} stock['maxPxAll'] = max(df_tran.close) stock['minPxAll'] = min(df_tran.close) stock['maxGrowthRate'] = (stock['maxPxAll'] - stock['minPxAll'])/stock['minPxAll'] result[stockCode] = stock print stockCode,'is finished' stockList.pop() else: stockList.pop() except URLError,e: print 'Error',stockCode,str(e) continue except BaseException, e: print 'Error',stockCode,str(e) stockList.pop() continue
def inital_stock_data(stock_code): ''' 初始化股票数据, 首先创建股票表格,如果没有的话,第一次加载会自动创建表格 然后获取开始时间,开始时间是股票上市的时间与上一次加载的时间中的最小时间。 加载股票数据是每三年加载一次,防止一次加载过多导致网络终端。 本方法可以自动全量以及增量加载股票数据 ''' #初始化表格,如果没有该表格,则进行创建表格. try: create_stock_info_table(stock_code) #开始时间要加一天 startdate=int(dt.mktime(last_date_table(stock_code).timetuple()))+3600*24; startdate=dt.localtime(startdate) startdate=dt.strftime("%Y-%m-%d", startdate) enddate=dt.strftime("%Y-%m-%d",dt.localtime()) print("startdate is :",startdate," end date is:",enddate, " stock code is:",stock_code) rs=ts.get_h_data(stock_code,start=startdate,end=enddate) if rs is None:#如果得到的结果为空,比如停牌,或者刚好获取的数据没有,要使用continue,不然下面的语句调用不起来 print("股票%s在这段日期内没有交易数据\n"%stock_code) return 1 pd.DataFrame.to_sql(rs, "hdata_"+stock_code, con=conn, flavor='mysql', if_exists='append',index=True) print("提取股票%s数据正确"%stock_code) return 1 #如果抽取成功,那返回1 except: print("提取股票%s数据出错"%stock_code) return 0 #抽取失败,返回0
def get_his_data(code, start='2012-01-01', end=str(datetime.datetime.today())[0:10], ma=[5, 12, 13, 18, 20, 30, 60, 120], period='day', column_name='close',index=False,if_ma=True): _start = str(datetime.datetime.strptime(start,'%Y-%m-%d')-datetime.timedelta(max(ma)*2))[:10] #print(_start,end) try : df = ts.get_h_data(code,start=_start,end=end,index=index) except Exception as e: print(e) print('Using ts.get_hist_data instead,only achieve 3 year data') df = ts.get_hist_data(code,start=_start,end=end) #print(df) try: df.index = df.index.astype(np.str) print(df.index) except Exception as e: print(e) print('Probabaily because of no data entry') if if_ma == True: df = MA_CALCULATOR(df) df = df.get_ma(ll=ma, period=period, column_name=column_name) df = df[start:] if df.isnull().any().any(): print(df) raise 'Need more date info for calculating MA before last NaN' return df
def fetchStockData(code, output_csv=None): StockDf = ts.get_h_data(code) StockDf = StockDf.sort_index(axis=0, ascending=True) #adding EMA feature StockDf['ema'] = StockDf['close'] StockDf['rise'] = StockDf['close'] DfLen = len(StockDf.index) EMA = 0; RISE = 0; for n in range(0,DfLen): idx = n Close = StockDf.ix[idx, 'close'] if(n==0): EMA = Close RISE = 0 else: EMA = StockDf.ix[idx-11, 'ema'] EMA = ((n-1)*EMA + 2*Close)/(n+1) CloseP = StockDf.ix[idx-1, 'close'] RISE = (Close - CloseP)/CloseP StockDf.ix[idx,'ema'] = EMA StockDf.ix[idx,'rise'] = RISE if(output_csv != None): StockDf.to_csv(output_csv) return StockDf
def get_security_all_history(code_list): ''' 获取code_list中所有证券股票历史行情信息,并将结果保存到对应csv文件 tushare.get_h_data()可以查询指定股票所有的历史行情, 数据只有7列: date, open, hight, close, low, volume, amount tushare.get_hist_data()只能查询指定股票3年的历史行情, 数据有14列: date, open, hight, close, low, volume, price_change, p_change, ma5, ma10, ma20, v_ma5, v_ma10, v_ma20, turnover Parameters ------ 无 return ------- 无 ''' for code in code_list: try: tmp_data_h = tushare.get_h_data(code, start='2000-01-01', end='2016-08-01') except Exception as exception: stl_logger.data_manager_logger(__file__).error('tushare.get_h_data(%s) excpetion, args: %s' % (code, exception.args.__str__())) if tmp_data_h is None: stl_logger.data_manager_logger(__file__).warning('tushare.get_h_data(%s) return none' % code) else: data_str_h = tmp_data_h.to_csv() with open('../data/origin/tushare/sh/%s.csv' % code, 'wt') as fout: fout.write(data_str_h)
def get_stock_his_day_Data(code, startDay, endDay):###generator for the stock data share by year df = ts.get_stock_basics() tmDate = df.ix[code]['timeToMarket'] if '-' in startDay: _d = startDay.split('-') startDay = _d[0]+_d[1]+_d[2] if '-' in endDay: _d = endDay.split('-') endDay = _d[0]+_d[1]+_d[2] if not isinstance(startDay, np.int64): startDay = np.int64(startDay) if not isinstance(endDay, np.int64): endDay = np.int64(endDay) if startDay < tmDate: startDay = tmDate today = np.int64( str(datetime.date.today()).replace('-','') ) if endDay > today: endDay = today #search by year, for the reliability nyears = endDay/10000 - startDay/10000 + 1 sstartDay, sendDay = str(startDay), str(endDay) for nyear in xrange(startDay/10000,endDay/10000+1): tmpStart = sstartDay[0:4]+'-'+sstartDay[4:6]+'-'+sstartDay[6:8] if nyear==startDay/10000 else str(nyear)+'-01-01' tmpEnd = sendDay[0:4]+'-'+sendDay[4:6]+'-'+sendDay[6:8] if nyear==(endDay/10000) else str(nyear)+'-12-31' logging.debug("get code:%s history data from %s to %s" %(code, tmpStart, tmpEnd)) tmpdata = ts.get_h_data(code, start=tmpStart, end=tmpEnd) yield(tmpdata)
def load_data(self, pcontract, dt_start=None, dt_end=None): dt_start = _process_dt(dt_start) if not dt_start: dt_start = _VERY_EARLY_START dt_end = _process_dt(dt_end) data = ts.get_h_data(pcontract.contract.code, start=dt_start, end=dt_end) return _process_tushare_data(data.iloc[::-1])
def get_history_index_data_by_date(self, code, start_date_str, end_date_str, frequency): if len(code) != 9: return None code = code[0:6] df1 = ts.get_hist_data(code, start=start_date_str, end=end_date_str, ktype=frequency) df1['tradedate'] = df1.index df1 = df1.reset_index(drop=True) df1['date'] = df1['tradedate'].map( lambda x: datetime.strptime(x, '%Y-%m-%d')) dfNew = df1.set_index('date') df = ts.get_h_data(code, start=start_date_str, end=end_date_str, index=True) # df = df2.copy() df['pct_chg'] = dfNew['p_change'] # del df['amount'] # print df return df
def getByDate(self, mongo, func, code, date): isNeedDate = False if (func == "tick_data"): df = ts.get_tick_data(code, date=date) elif (func == "h_data"): df = ts.get_h_data(code, start=date, end=date) elif (func == "hist_data"): print(date) print(type(date)) df = ts.get_hist_data(code, date=date) elif (func == "sina_dd"): df = ts.get_sina_dd(code, date=date) # df = ts.get_sina_dd('600848', date='2015-12-24') else: df = {} tmpJson = json.loads(df.to_json(orient='records')) for i in range(len(tmpJson)): tmpJson[i][u'code'] = code tmpJson[i][u'date'] = date print(tmpJson[i]) coll = mongo.trading[func] coll.insert(tmpJson)
def download_history_data_fq(autype='qfq', startTime=None): ''' 获取前复权的历史k线数据 ''' conn = db.get_history_data_db('D') start = startTime if startTime is None: start = utils.today_last_year(6) for code in get_all_stock_code(): df = ts.get_h_data(code, start=start, drop_factor=False) if df is not None: try: df.insert(0, 'code', code) sql_df = df.loc[:, :] sql.to_sql(sql_df, name='history_data_%s' % autype, con=conn, index=True, if_exists='append') log.info('%s,%s history qfq data download ok.' % (code, start)) except Exception as e: log.error('error:code:%s,start:%s' % (code, start))
def update_everyday(self): ''' 每天更新股票数据 ''' data = TS.memchaced_data(ts.get_stock_basics, 'get_stock_basics') for stock in data.index: try: search_sql = "select * from {0} order by date desc limit 1".format( 'day_' + stock) origin = SQL.read_sql(search_sql, self.engine) date_64 = (origin.tail(1))['date'].values[0] next = pd.to_datetime(str(date_64)) + timedelta(1) start = next.strftime("%Y-%m-%d") end = datetime.datetime.now().strftime("%Y-%m-%d") self.append_days(stock, start=start, end=end) except Exception, data: print "更新股票数据失败:{0} {1}".format(stock, data) data = ts.get_h_data(stock) if data is not None: data = data.sort_index(ascending=True) data.to_sql('day_' + stock, self.engine, if_exists='append') print "尝试加载该新股票成功"
def get_h_data(self, symbol, expire=60*6): """ 获取一支股票所有历史数据保存到本地 """ if not os.path.exists(CT.HIS_DIR): os.makedirs(CT.HIS_DIR) file_path = CT.HIS_DIR + symbol expired = date_time.check_file_expired(file_path, expire) if expired or not os.path.exists(file_path): today = date_time.get_today_str() d = ts.get_h_data(symbol, autype=None, start=CT.START, end=today, drop_factor=False) #index = [] #for i in list(d.index): # index.append(date_time.date_to_str(i)) #d = d.reindex(index, method='ffill') if d is None: return d d.to_csv(CT.HIS_DIR + symbol, sep='\t') #return d if not os.path.exists(file_path): return None d = pd.read_csv(file_path, sep='\t', index_col=0) return d
def __init__(self): hs300File = Path(FILE_LOCATION) if not hs300File.is_file(): print("HS300 file created!\n") self.download_hs300() else: hs300T = pd.read_csv(FILE_LOCATION) currentDate = time.strftime("%Y-%m-%d") latestDate = hs300T.loc[0, 'date'] if currentDate == latestDate: print("HS300 file already exists!\n") self.__hs300 = hs300T else: print("Updating HS300 file!\n") hs300P = ts.get_h_data(HS300_INDEX, index=True, start=latestDate) hs300P = hs300P.reset_index() hs300P['date'] = hs300P['date'].apply( lambda x: pd.to_datetime(x).date().isoformat()) self.__hs300 = pd.concat([hs300P, hs300T[1:]]) self.__hs300.to_csv(FILE_LOCATION, encoding='utf-8', index=False)
def _getCodeDaysFromTuShare(self, code, startDate, endDate, fields, name=None): """ 从TuShare获取个股日线数据 """ tuShareCode = code[:-3] try: # 从网易获取换手率 netEasyDf = self._getDaysFrom163(code, startDate, endDate).sort_index() # 从新浪获取复权因子,成交量是股。新浪的数据是后复权的,无复权方式是tushare根据复权因子实现的。 sinaDf = ts.get_h_data(tuShareCode, startDate, endDate, autype=None, drop_factor=False) if sinaDf is None: # If no data, TuShare return None sinaDf = pd.DataFrame(columns=['open', 'high', 'close', 'low', 'volume', 'amount', 'factor']) else: sinaDf = sinaDf.sort_index() except Exception as ex: self._info.print("从TuShare获取{}({})日线数据[{}, {}]失败: {}".format(code, name, startDate, endDate, ex), DyLogData.error) return None # construct new DF df = pd.concat([sinaDf[['open', 'high', 'close', 'low', 'volume', 'amount', 'factor']], netEasyDf['turnover']], axis=1) df.index.name = None # change to Wind's indicators df.reset_index(inplace=True) # 把时间索引转成列 df.rename(columns={'index': 'datetime', 'amount': 'amt', 'turnover': 'turn', 'factor': 'adjfactor'}, inplace=True) # 把日期的HH:MM:SS转成 00:00:00 df['datetime'] = df['datetime'].map(lambda x: x.strftime('%Y-%m-%d')) df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d') # select according @fields df = df[['datetime'] + fields] return df
def get_all_hist_data(code, stock_basics, year_interval=3): def format_date(d): return d.strftime('%Y-%m-%d') proxies = ts.get_proxies(count=10) now_date = datetime.now().date() # 获取上市时间 ipo_date = datetime.strptime(str(stock_basics.loc[code]['timeToMarket']), '%Y%m%d').date() \ if stock_basics.loc[code]['timeToMarket'] else date(2000, 1, 1) start_date = ipo_date end_date = date(start_date.year + year_interval, 1, 1) data_frames = [] while now_date >= start_date: try: batch_df = ts.get_h_data(code, start=format_date(start_date), end=format_date(end_date), proxies=proxies) except: continue data_frames.append(batch_df) start_date = end_date end_date = date(start_date.year + year_interval, 1, 1) return pd.concat(data_frames)
def import_h_data(code, start, end): print('=== import h data ===') print('start: {:%Y-%m-%d} end: {:%Y-%m-%d}'.format(start, end)) df: pd.DataFrame = tushare.get_h_data(code=code, start='{:%Y-%m-%d}'.format(start), end='{:%Y-%m-%d}'.format(end), index=True) if df is not None: for index, row in df.iterrows(): h_data = HData(code=code, date=index, open=row['open'], close=row['close'], high=row['high'], volume=row['volume'], amount=row['amount']) session.add(h_data) session.commit() print('done')
def _update(stock, conn): try: print "update ----- :", stock query = "select * from '%s' order by date" % stock df = pd.read_sql(query, conn) df = df.set_index('date') # print df.head(10),df.ix[-1],df.ix[-1].name if dt.now().weekday() == 5: today = str(pd.Timestamp(dt.now()) - pd.Timedelta(days=1))[:10] elif dt.now().weekday() == 6: today = str(pd.Timestamp(dt.now()) - pd.Timedelta(days=2))[:10] else: today = str(pd.Timestamp(dt.now()))[:10] if today != df.ix[-1].name[:10]: df = ts.get_h_data(stock, start=df.ix[-1].name[:10], retry_count=5, pause=1) df[['open', 'high', 'close', 'low', 'volume']].to_sql(stock, conn, if_exists='append') except Exception, arg: print "exceptionu:", stock, arg errorlist.append(stock)
def saveHistoricalDailyTrade_qfq(_symbols, _start_dte, _end_dte): try: print('----Start processing historical data') # get DB connection engine = APIs.getDBConn() # config parameters # _ktype = 'D' # D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟' _autype = 'qfq' # qfq-前复权 hfq-后复权 None-不复权,默认为qfq _index = False # 设定为True时认为code为指数代码 # start = _start_dte # YYYY-MM-DD 为空时取当前日期 # end = _end_dte # YYYY-MM-DD 为空时取当前日期 df = ts.get_h_data(code=_symbols, start=_start_dte, end=_end_dte, autype=_autype, index=_index, retry_count=3, pause=0.5) df['symbol'] = _symbols df.to_sql('AStocks_Trade_Daily_qfq', con=engine, if_exists='append') # print(df) print('----Finish processing historical data') except Exception as e: print('----Failed at {}'.format(sys._getframe().f_code.co_name)) dlog.writeLog( time.strftime('%Y%m%d %H:%M:%S', time.localtime()), sys._getframe().f_code.co_name, 'symbol = {}, start = {}, end = {}'.format( _symbols, _start_dte, _end_dte, ), e) finally: engine.dispose()
def get_high_test(): df = ts.get_h_data('300141', start=day30, end=day0) # 这个函数可以获取所有的历史数据 # print(df) # current= df[:1] # current=df.iloc[0] print(df) current = df.ix['2016-07-15'] print(current) current_high = current['high'].values[0] print(current_high) highest = df['high'] lowest = df['low'] price_30_max = highest.max() price_30_min = lowest.min() print(df[df.high >= price_30_max]) # 得出出现最大值的那一天 print(df[df.low <= price_30_min]) # 得出出现最小值的那一天 print(price_30_max) print(price_30_min) # oneData= df.ix['2016-07-11'] # print(oneData.iloc[0,1]) # print(type(oneData)) # for i in highest.len: # print(i) # print(type(t)) if current_high >= price_30_max: print(stock_info.ix['300141']['name'].decode('utf-8'))
import tushare as ts df = ts.get_h_data('600606', autype='hfq',start='2015-01-01',end='2017-12-31') df.to_csv('D:/day/600606.csv') df.to_csv('D:/day/600606.csv',columns=['open','high','close','low','volume','amount'])
import tushare as ts # load stock codes: tt = loadtxt('stock_codes_2017-06-04.txt', dtype=np.int) codes = array([str(x).zfill(6) for x in tt]) # get and save qian-fu-quan for i, c in enum(codes): print('\n\n', i, ':', c) ''' # tons of data (till 2016-06-08) have been downloaded:: df = ts.get_h_data(c,start='2010-01-01',end='2010-12-31') if shape(df) != (): df.to_csv('qfq_'+c+'_2010.csv') df = ts.get_h_data(c,start='2011-01-01',end='2011-12-31') if shape(df) != (): df.to_csv('qfq_'+c+'_2011.csv') df = ts.get_h_data(c,start='2012-01-01',end='2012-12-31') if shape(df) != (): df.to_csv('qfq_'+c+'_2012.csv') df = ts.get_h_data(c,start='2013-01-01',end='2013-12-31') if shape(df) != (): df.to_csv('qfq_'+c+'_2013.csv') df = ts.get_h_data(c,start='2014-01-01',end='2014-12-31') if shape(df) != (): df.to_csv('qfq_'+c+'_2014.csv') df = ts.get_h_data(c,start='2015-01-01',end='2015-12-31') if shape(df) != (): df.to_csv('qfq_'+c+'_2015.csv') df = ts.get_h_data(c,start='2016-01-01',end='2016-12-31') if shape(df) != (): df.to_csv('qfq_'+c+'_2016.csv') ''' df = ts.get_h_data(c, start='2016-06-09', end='2016-06-08') if shape(df) != (): df.to_csv('qfq_' + c + '_16to17.csv')
import tushare as ts import os import numpy filename = 'c:/bigfile.csv' ts.set_token("3e3ed9ba576210c210d0aa08959fdd3b32de36515af178850f05e151") pro = ts.pro_api() data = pro.query('stock_basic', exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date') allcode=data['symbol'].values.tolist() for a in allcode: b=ts.get_h_data(a, start='2018-08-01', end='2019-03-26') closeprice=b['close'].values.tolist() narray=numpy.array(closeprice) sum1=narray.sum() narray2=narray*narray sum2=narray2.sum() N=len(closeprice) mean=sum1/N var=sum2/N-mean**2 print(var^0.5)
#-*- coding: utf-8 -*- """ Created on 2017/3/27. """ import pandas import tushare from matplotlib import pyplot as plt import matplotlib matplotlib.style.use("ggplot") codes = tushare.get_stock_basics() market = tushare.get_h_data('000001', index=True, start="2006-01-01").sort_index() market_sz = tushare.get_h_data('399004', index=True, start="2006-01-01").sort_index() market_hs = tushare.get_h_data('000300', index=True, start="2006-01-01").sort_index() market_cy = tushare.get_h_data('399606', index=True, start="2010-01-01").sort_index() def try_para(market, p): market = market.sort_index() base = market["amount"].resample("w").mean().dropna().rolling(p).mean() delta = ((market.close.resample("w").last() - market.open.resample("w").first()) / market.open.resample("w").first()).dropna()
缺点:?? ''' print '#####1. 数据获取和处理' CODE = '002398' BODY_SIZE = 0.03 #锤子线实体线大小,波动范围%,不超过3% HEAD_SIZE = 0.5 #锤子线上影线大小,不能超过下影线长度的0.5倍 TAIL_SIZE = 2 #锤子线下影线大小,大于实体线长度的2倍以上 LENGTH = 10 #观察周期 STOP_LOST_TRIGGER = 1 #止损值,倍数, 表示价格偏离均线满足几倍标准差时止损 mean- (1* σ) --- σ: 标准差 standard deviation data = ts.get_h_data(CODE, '2012-01-01', '2017-01-01') data.sort_index(ascending=True, inplace=True) data.reset_index(inplace=True) ###重设Index, 有默认数字为Index, 而非日期) data['pct_change'] = data['close'].pct_change() data['ma'] = data['close'].rolling(LENGTH).mean() data['std'] = data['close'].rolling(LENGTH).std() del data['volume'] del data['amount'] data['yesterday_ma'] = data['ma'].shift(1) #前一天的移动平均值 data['yesterday_std'] = data['std'].shift(1) #前一天的标准差 print '#####2. 识别锤子形态和特征' data['body'] = abs(data['close'] - data['open']) data['head'] = data['high'] - data[['close', 'open']].max(axis=1)
#df=ts.get_hist_data('600848',ktype='M') #获取月k线数据 #df=ts.get_hist_data('600848',ktype='5') #获取5分钟k线数据 #ts.get_hist_data('600848',ktype='15') #获取15分钟k线数据 #ts.get_hist_data('600848',ktype='30') #获取30分钟k线数据 #df=ts.get_hist_data('000425',ktype='60') #获取60分钟k线数据 #df=ts.get_hist_data('sh') #获取上证指数k线数据,其它参数与个股一致,下同 #df=ts.get_hist_data('sz') #获取深圳成指k线数据 #df=ts.get_hist_data('hs300') #获取沪深300指数k线数据 #ts.get_hist_data('sz50') #获取上证50指数k线数据 #df=ts.get_hist_data('zxb') #获取中小板指数k线数据 #ts.get_hist_data('cyb') #获取创业板指数k线数据 #df = ts.get_stock_basics() #date = df.ix['600848']['timeToMarket'] #上市日期YYYYMMDD # df=ts.get_h_data('002292',start='2000-11-12',end='2018-11-12') #前复权 df[['open','close']].plot(kind='line') #ts.get_h_data('002337',autype='hfq') #后复权 #ts.get_h_data('002337',autype=None) #不复权 #ts.get_h_data('002337',start='2015-01-01',end='2015-03-16') #两个日期之间的前复权数据 # #df=ts.get_h_data('000425', index=True) #深圳综合指数 #df=ts.get_today_all() #实时行情 # #df = ts.get_tick_data('600848',date='2014-01-09') #df.head(10) #历史分笔 #df = ts.get_today_ticks('601333') #df.head(10) #当日历史分笔 #df = ts.get_realtime_quotes('000581') #Single stock symbol #df[['code','name','price','bid','ask','volume','amount','time']] #实时分笔
__author__ = 'xierui774' import tushare as ts import pandas as pd import numpy as np import os import datetime import matplotlib.pyplot as plt import tushare as ts import numpy as np import pandas as pd df_07 = ts.get_h_data('000001',start='2007-10-01',end='2009-10-01',index=True) df_15 = ts.get_h_data('000001',start='2014-10-01',end='2017-04-27',index=True) df = df_07['close'].sort_index() df2 = df_15['close'].sort_index() idx = np.argmax(df.values) idx2 = np.argmax(df2.values) df_new = df2.values[idx2-idx:] df = pd.DataFrame(df) df['new_close']=np.nan df.ix[:len(df_new),'new_close']=df_new df.plot() plt.show()
__author__ = 'Administrator' import tushare as ts import pandas as pd data = ts.get_hist_data('000016') print(data) data2a = ts.get_h_data('000300') #前复权 print(data2a) data2 = ts.get_h_data('002337', start='2010-01-15', end='2019-01-01') #两个日期之间的前复权数据 #df = ts.get_today_ticks('601333') #print(df) data3 = ts.get_hist_data('sh') #获取上证指数k线数据,其它参数与个股一致,下同 data4 = ts.get_hist_data('sz') #获取深圳成指k线数据 data5 = ts.get_hist_data('hs300') #获取沪深300指数k线数据 data6 = ts.get_hist_data('sz50') #获取上证50指数k线数据 data7 = ts.get_hist_data('zxb') #获取中小板指数k线数据 data8 = ts.get_hist_data('cyb') #获取创业板指数k线数据 #print(data5) pd.set_option('display.width', 1000) df = ts.get_stock_basics() date = df.ix['002337']['timeToMarket'] #上市日期YYYYMMDD #print(df) #print(date) df = ts.get_index() print(data5) print(df)
def _getDaysFromTuShareOld(self, code, startDate, endDate, fields, name=None, verify=False): """ 从tushare获取股票日线数据。 保持跟Wind接口一致,由于没法从网上获取净流入量和金额,所以这两个字段没有。 策略角度看,其实这两个字段也没什么用。 @verify: True - 不同网上的相同字段会相互做验证。 @return: df['datetime', indicators] None - errors [] - no data """ code = code[:-3] try: # 从凤凰网获取换手率,成交量是手(没有整数化过,比如2004.67手) ifengDf = ts.get_hist_data(code, startDate, endDate).sort_index() # 以无复权方式从腾讯获取OHCLV,成交量是手(整数化过) if verify: tcentDf = ts.get_k_data(code, startDate, endDate, autype=None).sort_index() # 从新浪获取复权因子,成交量是股。新浪的数据是后复权的,无复权方式是tushare根据复权因子实现的。 sinaDf = ts.get_h_data(code, startDate, endDate, autype=None, drop_factor=False) if sinaDf is None: # If no data, TuShare return None sinaDf = pd.DataFrame(columns=[ 'open', 'high', 'close', 'low', 'volume', 'amount', 'factor' ]) else: sinaDf = sinaDf.sort_index() except Exception as ex: self._info.print( "从TuShare获取{}({})日线数据[{}, {}]失败: {}".format( code, name, startDate, endDate, ex), DyLogData.error) return None # 数据相互验证 if verify: # OHLC for indicator in ['open', 'high', 'close', 'low']: if len(tcentDf[indicator].values) != len( sinaDf[indicator].values): self._info.print( "{}({})日线数据OHLC[{}, {}]: 腾讯和新浪不相同".format( code, name, startDate, endDate), DyLogData.error) return None if (tcentDf[indicator].values != sinaDf[indicator].values).sum() > 0: self._info.print( "{}({})日线数据OHLC[{}, {}]: 腾讯和新浪不相同".format( code, name, startDate, endDate), DyLogData.error) return None # volume if len(ifengDf['volume'].values) != len(sinaDf['volume'].values): self._info.print( "{}({})日线数据Volume[{}, {}]: 凤凰网和新浪不相同".format( code, name, startDate, endDate), DyLogData.error) return None if (np.round(ifengDf['volume'].values * 100) != np.round( sinaDf['volume'].values)).sum() > 0: self._info.print( "{}({})日线数据Volume[{}, {}]: 凤凰网和新浪不相同".format( code, name, startDate, endDate), DyLogData.error) return None # construct new DF df = pd.concat([ sinaDf[[ 'open', 'high', 'close', 'low', 'volume', 'amount', 'factor' ]], ifengDf['turnover'] ], axis=1) df.index.name = None # change to Wind's indicators df.reset_index(inplace=True) # 把时间索引转成列 df.rename(columns={ 'index': 'datetime', 'amount': 'amt', 'turnover': 'turn', 'factor': 'adjfactor' }, inplace=True) # 把日期的HH:MM:SS转成 00:00:00 df['datetime'] = df['datetime'].map(lambda x: x.strftime('%Y-%m-%d')) df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d') # select according @fields df = df[['datetime'] + fields] return df
import tushare as ts df = ts.get_h_data('sz50', start='2006-01-01', end='2017-12-31') df.to_csv('C:/Users/jacky/Desktop/XMU/HMM/sz501.csv')
import tushare as ts import sys df = ts.get_h_data(sys.argv[2], start='2005-01-01', end='2018-01-01', autype=sys.argv[3]) path=sys.argv[1] df.to_csv(path,encoding="utf8")
def Get_hist_data(self, code): data_h_data_all = ts.get_h_data(code) Mysql().SaveMySqlTWO(data_h_data_all, 'Stock_Basics_Info', code + 'stock_basics')
#coding=UTF-8 from sqlalchemy import create_engine import tushare as ts import time import MySQLdb as mariadb mariadb_connection = mariadb.connect('localhost', 'xxxxxxx', 'xxxxxx', 'xxxxxx') cursor = mariadb_connection.cursor() df_basics = ts.get_stock_basics() for code in df_basics.index.get_values(): print code tb_name = 'hist' + code cursor.execute("show tables like '%s'" % (tb_name)) if cursor.fetchall(): continue else: df = ts.get_h_data(code, start='2008-02-10', end='2018-02-14', pause=5) engine = create_engine( 'mysql://*****:*****@127.0.0.1/xxxxx?charset=utf8') df.to_sql(tb_name, engine) print " " # time.sleep(2)
def M1_notification(bot): ''' 消息推送:M0/M1占比分析 :return: null ''' print(strftime("%Y-%m-%d %H:%M:%S", localtime()) + ' - Start M1_notification thread ') path = sys.path[0] + '/notification_monitoring_files/' while True: now = strftime("%H:%M", localtime()) today_ISO = datetime.today().date().isoformat() ## 2018 holiday calendar holiday_calendar = ['2018-04-05', '2018-04-06', '2018-04-30', \ '2018-05-01', '2018-06-18', '2018-09-24', \ '2018-10-01', '2018-10-02', '2018-10-03', '2018-10-04', '2018-10-05'] if datetime.today().weekday() >= 5 or (today_ISO in holiday_calendar): holiday = True else: holiday = False if (now > '15:30') and (not holiday): # check if today's notification has sent? filename = '成交量M1占比' try: M1_last = pd.read_excel(path + filename + '.xlsx') except: M1_last = pd.DataFrame(columns=['date', 'M1', 'index_volume_total', 'M1_percentage' ], index=["0"]) M1_last_msg_sent_date = M1_last.iloc[0, 0] #消息推送 if M1_last_msg_sent_date != today_ISO: try: index_sh = ts.get_h_data('000001', index=True, start=today_ISO, end=today_ISO) index_sz = ts.get_h_data('399001', index=True, start=today_ISO, end=today_ISO) if (not index_sh.empty) and (not index_sz.empty): #M0/M1占比分析 M1_index_amount = (index_sh.iloc[0, 5] + index_sz.iloc[0, 5]) / 100000000 M1_index_volume = (index_sh.iloc[0, 4] + index_sz.iloc[0, 4]) / 100000000 filename = '货币供应量_宏观数据_新浪财经' M1_sina = pd.read_excel(path + filename + '.xlsx') ################################## M0 = M1_sina.iloc[0, 5] M0_percentage = 100 * M1_index_amount / M0 M1 = M1_sina.iloc[0, 3] M1_percentage = 100 * M1_index_amount / M1 M1_row = pd.DataFrame(columns=['date', 'M1', 'index_volume_total', 'M1_percentage' ], index=["0"]) M1_row.iloc[0, 0] = today_ISO M1_row.iloc[0, 1] = M1 M1_row.iloc[0, 2] = M1_index_amount M1_row.iloc[0, 3] = M1_percentage M1_last = M1_last.append(M1_row) M1_last.sort_values(by='date', ascending=False, inplace=True) filename = '成交量M1占比' M1_last.to_excel(path + filename + '.xlsx', encoding='GBK') msg = '==========================' + '\n' + \ today_ISO + ' - \n' + \ '==========================' + '\n' + \ '两市总成交额:' + str(round(M1_index_amount, 3)) + '(亿)\n' + \ '两市总成交量:' + str(round(M1_index_volume, 3)) + '(亿)\n' + \ 'M0:' + str(M0) + '(亿)\n' + \ 'M1:' + str(M1) + '(亿)\n' + \ '两市总成交额占M0:' + str(round(M0_percentage, 3)) + '% \n' + \ '两市总成交额占M1:' + str(round(M1_percentage, 3)) + '% \n' + \ '==========================' #发送消息 bot.friends().search('Yang Hui')[0].send(msg) bot.friends().search('欣')[0].send(msg) except: pass time.sleep(600) print(strftime("%Y-%m-%d %H:%M:%S", localtime()) + ' - End M1_notification thread ') return
# -*- coding: utf-8 -*- """ Created on Wed Oct 26 11:57:59 2016 @author: Richard 修改股票代码和保存文件名称,得到不同股票的价格 """ import tushare as ts if __name__ == "__main__": myData = ts.get_h_data('600036', start='2015-10-01', end='2016-10-01') #在这修改股票代码,前复权,从今天数前一年 myData.to_csv( r'E:\study\master of TJU\0Subject research\data\core\price_600036.csv' ) #在这修改保存文件名称 #myData.to_csv(r'E:\study\master of TJU\0Subject research\code\Important\get_features_of_firm\test_price.csv')#在这修改保存文件名称
def _getCodeDaysFromTuShare(self, code, startDate, endDate, fields, name=None): """ 从TuShare获取个股日线数据 """ print("{}, {} ~ {}".format(code, startDate, endDate)) tuShareCode = code[:-3] try: # 从网易获取换手率 netEasyDf = self._getDaysFrom163(code, startDate, endDate).sort_index() netEasyDf = netEasyDf[netEasyDf['volume'] > 0] # drop停牌日期的数据 netEasyDf.index = pd.to_datetime(netEasyDf.index, format='%Y-%m-%d') # 从新浪获取复权因子,成交量是股。新浪的数据是后复权的,无复权方式是tushare根据复权因子实现的。 sleepTime = self.tuShareDaysSleepTimeConst + self.tuShareDaysSleepTime try: sinaDf = ts.get_h_data(tuShareCode, startDate, endDate, autype=None, drop_factor=False, pause=sleepTime) except IOError: # We think Sina is anti-crawling self.tuShareDaysSleepTime += self.tuShareDaysSleepTimeStep print( "Sina is anti-crawling, setting additional sleep time to {}s for each request" .format(self.tuShareDaysSleepTime)) raise if self.tuShareDaysSleepTime > 0: self.tuShareDaysSleepTime -= self.tuShareDaysSleepTimeStep if sinaDf is None or sinaDf.empty: # If no data, TuShare return None sinaDf = pd.DataFrame(columns=[ 'open', 'high', 'close', 'low', 'volume', 'amount', 'factor' ]) else: sinaDf = sinaDf.sort_index() except Exception as ex: self._info.print( "从TuShare获取{}({})日线数据[{}, {}]失败: {}".format( code, name, startDate, endDate, ex), DyLogData.warning) return None # construct new DF try: df = pd.concat([ sinaDf[[ 'open', 'high', 'close', 'low', 'volume', 'amount', 'factor' ]], netEasyDf['turnover'] ], axis=1) df.index.name = None except Exception as ex: print("netEasyDf") print(netEasyDf) print("sinaDf") print(sinaDf) self._info.print( "从TuShare获取的{}({})日线数据[{}, {}]格式错误: {}".format( code, name, startDate, endDate, ex), DyLogData.warning) return None if df.isnull().sum().sum() > 0: self._info.print( "{}({})新浪日线和网易日线数据不一致[{}, {}]".format(code, name, startDate, endDate), DyLogData.warning) return None # change to Wind's indicators df.reset_index(inplace=True) # 把时间索引转成列 df.rename(columns={ 'index': 'datetime', 'amount': 'amt', 'turnover': 'turn', 'factor': 'adjfactor' }, inplace=True) # 把日期的HH:MM:SS转成 00:00:00 df['datetime'] = df['datetime'].map(lambda x: x.strftime('%Y-%m-%d')) df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d') # select according @fields df = df[['datetime'] + fields] return df
# -*- coding: utf-8 -*- # <nbformat>3.0</nbformat> # <codecell> import tushare as ts import pandas as pd import matplotlib.pyplot as plt %matplotlib inline import matplotlib matplotlib.style.use('ggplot') # <codecell> id = '300220' df = ts.get_h_data(id, autype='qfq', start='2013-06-10') # <codecell> df = pd.DataFrame(df.query('date > "2015-07-01"')) # <codecell> df['m1'] = pd.rolling_mean(df['close'], window=15, min_periods=1, center=True) df['m2'] = pd.rolling_mean(df['close'], window=30, min_periods=1, center=True) df['m4'] = pd.rolling_mean(df['close'], window=45, min_periods=1, center=True) # <codecell> print df.plot(y=['close', 'm1', 'm2', 'm4', 'volume'], title=id, secondary_y='volume', grid=True, legend=True, figsize=(16, 10))
def down_stk_cn010(qx, startTime): #def down_stk_cn010(qx): ''' 中国A股数据下载子程序 【输入】 qx (zwDatX): xtyp (str):数据类型,9,Day9,简版股票数据,可下载到2001年,其他的全部是扩充版数据,只可下载近3年数据 D=日k线 W=周 M=月 默认为D :ivar xcod (int): 股票代码 :ivar fss (str): 保存数据文件名 ''' xcod, rss, = qx.code, qx.rDay if os.path.exists(rss) == False: os.makedirs(rss) #tim0='1994-01-01';#tim0='2012-01-01'; tim0 = startTime #tim0='2016-01-01'; # #fss=rss+xcod+'.csv' fss = os.path.join(rss, xcod + '.csv') #------------------- #warning Tom change 6/24 xfg = os.path.exists(fss) xd0 = [] xd = [] if xfg: xd0 = pd.read_csv(fss, index_col=0, parse_dates=[0], encoding='gbk') #print(xd0.head()) xd0 = xd0.sort_index(ascending=False) #tim0=xd0.index[0]; _xt = xd0.index[0] #xt=xd0.index[-1];### s2 = str(_xt) tim0 = s2.split(" ")[0] print('\n', xfg, fss, ",", tim0) #----------- try: xd = ts.get_h_data(xcod, start=tim0, end=None, retry_count=5, pause=1) #Day9 #xd=ts.get_hist_data(xcod,start=tim0,end=None,retry_count=5,pause=1,ktype=xtyp); #------------- if xd is not None: if (len(xd0) > 0): xd2 = xd0.append(xd) # flt.dup xd2["index"] = xd2.index xd2.drop_duplicates(subset='index', keep='last', inplace=True) del (xd2["index"]) #xd2.index=pd.to_datetime(xd2.index) xd = xd2 xd = xd.sort_index(ascending=False) xd = np.round(xd, 3) xd.to_csv(fss, encoding='gbk') except IOError: pass #skip,error return xd