def OLSV2A(code): df=wt.get_h_data(code,start='2001-01-01') #df=get_hist_csv(code) df=df.sort_index() df.index=pd.to_datetime(df.index) df=df.drop_duplicates(keep=False) print ('\n,Caculating the analysing 1/4 statistics:\n') print (df.describe(),'\n') if code[0] in ['6','9']: ticker='000001' elif code[0] in ['0','2','3']: ticker='399001' dfi=wt.get_h_data(ticker,index=True,start='2001-01-01') dfi=dfi.sort_index() dfi.index=pd.to_datetime(dfi.index) df['cpct']=df['close'].pct_change()*100 df['lpct']=df['cpct'].shift(1) df['llpct']=df['lpct']**2 df['sina']=np.sin(df['lpct']) df['vpct']=df['volume'].pct_change()*100 dfi['indpct']=dfi['close'].pct_change()*100 try: rets=pd.concat([df['cpct'],df['lpct'],df['llpct'],df['sina'],dfi['indpct'],df['vpct'],df['turnover']],axis=1) rets=rets.dropna(how='any') X=np.array(rets.iloc[:,1:]) X=sm.add_constant(X) except: rets=pd.concat([df['cpct'],df['lpct'],df['llpct'],df['sina'],dfi['indpct'],df['vpct']],axis=1) rets=rets.dropna(how='any') X=np.array(rets.iloc[:,1:]) X=sm.add_constant(X) Y=np.array(rets.iloc[:,0]) model=sm.OLS(Y,X) results=model.fit() print (results.summary()) print ("The params for the model:",results.params) print ("The std for the model:",results.bse) #df['predict']='' #df.iloc[1:,df.columns.get_loc('predict')]=results.predict() #dd=df[['close','cpct','predict']] return results
def _get_index_data(code): """ code only in ['000001','399001'] """ if code[0] == '0': ticker = "YAHOO/SS_" + code else: ticker = 'YAHOO/SZ_' + code fn = './Quandl/' + ticker + '.csv' if not os.path.exists(fn): print('\nDownloading the data %s:' % code) df = wt.get_h_data(code, index=True, start='2000-01-01', end=today1) df.sort_index(ascending=True, inplace=True) df.rename(columns={ 'open': 'Open', 'close': 'Close', 'low': 'Low', 'high': 'High', 'volume': 'Volume', 'amount': 'Amount', 'date': 'Date' }, inplace=True) df.to_csv(fn) else: dftem = pd.read_csv(fn) tem = dftem.iloc[-1]['Date'] if tem < bftoday: print('\nUpdating data from %s for %s:' % (tem, code)) t = time.strptime(tem, "%Y-%m-%d") y, m, d = t[0:3] tt = datetime.datetime(y, m, d) bd = tt + datetime.timedelta(days=1) if bd.weekday() == 5: bd = bd + datetime.timedelta(days=2) print('It is Sat') #elif bd.weekday()==6: # bd = bd+datetime.timedelta(days=2) # print 'It is Sun' bday = bd.strftime('%Y-%m-%d') all_data1 = pd.DataFrame() all_data = wt.get_h_data(code, autype=None, start=bday, end=today) all_data1 = all_data1.append(all_data) if all_data1.empty == False: print(all_data1.head(1)) all_data1.sort_index(ascending=True, inplace=True) all_data1.to_csv(fn, header=None, mode='a') return
def get_h_hdf5(code): """ 获取历史复权数据,分为前复权和后复权数据,接口提供股票上市以来所有历史数据,默认为前复权。如果不设定开始和结束日期,则返回近一年的复权数据,从性能上考虑,推荐设定开始日期和结束日期,而且最好不要超过三年以上,获取全部历史数据,请分年段分步获取,取到数据后,请及时在本地存储。 """ h5path = './testh5df/stockdata.h5' if os.path.exists(h5path): h5 = pd.HDFStore(h5path, 'a', complevel=4, complib='blosc') else: h5 = pd.HDFStore(h5path, 'w', complevel=4, complib='blosc') if code[0] == '0' or code[0] == '3' or code[0] == '2': label = 'M/sz' + code elif code[0] == '6' or code[0] == '9': label = 'M/ss' + code try: dd = h5[label] tem = str(dd.index[-1])[0:10] if tem != today: if datetime.datetime.today().isoweekday() in [1, 2, 3, 4, 5]: #print 'Updating the data from%s for %s:'%(tem,code) t = time.strptime(tem, '%Y-%m-%d') y, m, d = t[0:3] tt = datetime.datetime(y, m, d) bd = tt + datetime.timedelta(days=1) bday = bd.strftime('%Y-%m-%d') df1 = wt.get_h_data(code, start=bday, end=today) #df1=df1.sort_index(ascending=True,inplace=True) df = dd.append(df1) #df=df.sort_index(ascending=True) h5.append(label, df, data_columns=df.columns) except: df = wt.get_h_data(code) #df=df.sort_index(ascending=True) h5.append(label, df, data_columns=df.columns) #finally: # h5.close() df.index = pd.to_datetime(df.index) return df
def get_history_data_mp(code): h5path = './testh5df/stockdata_history.h5' if os.path.exists(h5path): h5 = pd.HDFStore(h5path, 'a', complevel=4, complib='blosc') else: h5 = pd.HDFStore(h5path, 'w', complevel=4, complib='blosc') try: ddf = h5[code] #dftem=datetime.datetime.strftime('%Y-%m-%d',ddf.index[-1]) dftem = str(ddf.index[-1])[0:10] tem = dftem if tem != bftoday: print('\nUpdating data from %s for %s:' % (tem, code)) t = time.strptime(tem, "%Y-%m-%d") y, m, d = t[0:3] tt = datetime.datetime(y, m, d) bd = tt + datetime.timedelta(days=1) #if bd.weekday()==5: # bd = bd+datetime.timedelta(days=2) # print 'It is Sat' bday = bd.strftime('%Y-%m-%d') all_data1 = pd.DataFrame() all_data = wt.get_h_data(code, autype=None, start=bday, end=today) all_data1 = all_data1.append(all_data) if all_data1.empty == False: #print all_data1.head(1) #all_data1.sort_index(ascending=True,inplace=True) df = ddf.append(all_data1) h5[code] = df return df except Exception as e: print(e) print('\nDownloading the data %s:' % code) df = wt.get_h_data(code, autype=None, start='2000-01-01', end=today) #df.sort_index(ascending=True,inplace=True) h5[code] = df #h5.close() return df
def get_open_h_hdf5(code, h5): """ 获取个股全部历史交易数据 """ if code[0] == '0' or code[0] == '3' or code[0] == '2': label = 'M/sz' + code elif code[0] == '6' or code[0] == '9': label = 'M/ss' + code try: df = h5[label] tem = str(df.index[-1])[0:10] if tem < today: #if datetime.datetime.today().isoweekday() in [1,2,3,4,5]: t = time.strptime(tem, '%Y-%m-%d') y, m, d = t[0:3] tt = datetime.datetime(y, m, d) bd = tt + datetime.timedelta(days=1) bday = bd.strftime('%Y-%m-%d') df1 = wt.get_h_data(code, start=bday, end=today) df = df.append(df1) #df.index=pd.to_datetime(df.index) #df=df.sort_index(ascending=True) #print(df) h5[label] = df except: tem = wf.get_stock_basics() date = tem.loc[code]['timeToMarket'] t = time.strptime(str(date), '%Y%m%d') startt = time.strftime('%Y-%m-%d', t) df = wt.get_h_data(code, start=startt, end=today) if df is not None: #df.index=pd.to_datetime(df.index) #df=df.sort_index(ascending=True) h5[label] = df finally: pass return df
def get_h_csv(code, index=False, autype='qfq'): """ 获取历史复权数据,分为前复权和后复权数据,接口提供股票上市以来所有历史数据, 默认为前复权。如果不设定开始和结束日期,则返回近一年的复权数据,从性能上考虑, 推荐设定开始日期和结束日期,而且最好不要超过三年以上,获取全部历史数据, 请分年段分步获取,取到数据后,请及时在本地存储。 index:False(提取非指数的数据;True(提取指数的数据) autype:主要是提取复权类型,None 表示不复权;qfq 前复权;hfq 后复权 """ if index: if code[0] == '0': tick = 'SS_' + code else: tick = 'SZ_' + code fn = './stockdata/data/history/' + tick + '.csv' if not os.path.exists(fn): df = wt.get_h_data(code, index=True, start='1995-01-01', end=today) df = df.sort_index(ascending=True) df.to_csv(fn) else: df = pd.read_csv(fn, index_col='date') tem = str(df.index[-1])[0:10] #dftem=dftem.set_index('date') if tem < bftoday: print('\nUpdating data from %s for %s:' % (tem, code)) t = time.strptime(tem, "%Y-%m-%d") y, m, d = t[0:3] tt = datetime.datetime(y, m, d) bd = tt + datetime.timedelta(days=1) if bd.weekday() == 5: bd = bd + datetime.timedelta(days=2) bday = bd.strftime('%Y-%m-%d') all_data = wt.get_h_data(code, index=True, start=bday, end=today) if all_data is not None: all_data = all_data.sort_index(ascending=True) all_data.to_csv(fn, header=None, mode='a') df = df.append(all_data) df.index = pd.to_datetime(df.index) else: h5path = './stockdata/data/history/' + code + '.csv' if not os.path.exists(h5path): tem = wf.get_stock_basics() date = tem.loc[code]['timeToMarket'] t = time.strptime(str(date), '%Y%m%d') startt = time.strftime('%Y-%m-%d', t) df = wt.get_h_data(code, autype=autype, start=startt, end=today) df.to_csv(h5path) else: df = pd.read_csv(h5path, index_col='date') tem = str(df.index[-1])[0:10] if tem < today: #if datetime.datetime.today().isoweekday() in [1,2,3,4,5]: t = time.strptime(tem, '%Y-%m-%d') y, m, d = t[0:3] tt = datetime.datetime(y, m, d) bd = tt + datetime.timedelta(days=1) bday = bd.strftime('%Y-%m-%d') df1 = wt.get_h_data(code, autype=autype, start=bday, end=today) if df1 is not None: df1 = df1.sort_index(ascending=True) df = df.append(df1) df1.to_csv(h5path, mode='a', header=None) df.index = pd.to_datetime(df.index) return df