def getPAIRdata_memory(code1, code2, dict): #数据整理,基于内存 #global startdate #print(dict.get(code1)) #print(dict) startdate = dict.get('startdate') enddate = dict.get('enddate') if dict.get(code1) is None: dict[code1] = dbQueryTools.queryMySQL(code1, startdate, enddate) if dict.get(code2) is None: dict[code2] = dbQueryTools.queryMySQL(code2, startdate, enddate) df_X = dict.get(code1) df_Y = dict.get(code2) if df_X.shape[0] < 1: return df_X df = pd.DataFrame(index=df_Y.index) if df_Y.shape[0] < 1: return df_Y df["X"] = df_X["close"] df["Y"] = df_Y["close"] df3 = df.sort_index(axis=0, ascending=True) #pd.set_option('display.max_rows', None) # 打印所有行 df3 = df3.dropna(axis=0, how='any') # 删除表中任何含有NaN的行 #print(df3) return df3
def getPAIRdata_database(code1, code2): #数据整理,基于数据库 ####df_X = ts.get_hist_data(code1, start='2017-12-09', end='2018-12-14')#时间区间设置 ####df_Y = ts.get_hist_data(code2, start='2017-12-09', end='2018-12-09') #try: #df_X = ts.get_hist_data(code1, startdate, enddate) #df_Y = ts.get_hist_data(code2, startdate, enddate) # df = ts.get_hist_data('600848') #一次性获取全部日k线数据 #from findOLS import dbQueryTools df_X = dbQueryTools.queryMySQL(code1, startdate, enddate) df_Y = dbQueryTools.queryMySQL(code2, startdate, enddate) #print(df_Y) df_X['date1'] = df_X.index df_X['date2'] = df_X[ 'date1'] #df_X['date1'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d')) # 必须两个datetime df_X = df_X.set_index(['date2']) # print(df_X) df_Y['date1'] = df_Y.index df_Y['date2'] = df_Y[ 'date1'] #df_Y['date1'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d')) # 必须两个datetime df_Y = df_Y.set_index(['date2']) # print(df_Y) df = pd.DataFrame(index=df_Y.index) df["X"] = df_X["close"] df["Y"] = df_Y["close"] df3 = df.sort_index(axis=0, ascending=True) pd.set_option('display.max_rows', None) # 打印所有行 df3 = df3.dropna(axis=0, how='any') # 删除表中任何含有NaN的行 #print(df3) return df3
def getPAIRdata_database(code1, code2): #数据整理,基于数据库 df_X = dbQueryTools.queryMySQL(code1, startdate, enddate) df_Y = dbQueryTools.queryMySQL(code2, startdate, enddate) #print(df_Y) df_X['date1'] = df_X.index df_X['date2'] = df_X[ 'date1'] #df_X['date1'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d')) # 必须两个datetime df_X = df_X.set_index(['date2']) # print(df_X) df_Y['date1'] = df_Y.index df_Y['date2'] = df_Y[ 'date1'] #df_Y['date1'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d')) # 必须两个datetime df_Y = df_Y.set_index(['date2']) # print(df_Y) df = pd.DataFrame(index=df_Y.index) df["X"] = df_X["close"] df["Y"] = df_Y["close"] df3 = df.sort_index(axis=0, ascending=True) pd.set_option('display.max_rows', None) # 打印所有行 df3 = df3.dropna(axis=0, how='any') # 删除表中任何含有NaN的行 #print(df3) return df3