def GetProprocessedData(ts_code): stock_pp_file_name = tushare_data.FileNameStockPreprocessedData(ts_code) if os.path.exists(stock_pp_file_name): pp_data = pd.read_csv(stock_pp_file_name) return pp_data else: print("File not exist: %s" % stock_pp_file_name) return []
def GetProprocessedData(ts_code): stock_pp_file_name = tushare_data.FileNameStockPreprocessedData(ts_code) if os.path.exists(stock_pp_file_name): pp_data = pd.read_csv(stock_pp_file_name) pp_data = pp_data[pp_data['trade_date'] >= wave_kernel. wave_kernel_start_date].copy().reset_index(drop=True) # print(pp_data) return pp_data else: return pd.DataFrame()
def main(argv): del argv stock_pp_file_name = tushare_data.FileNameStockPreprocessedData(FLAGS.c) if os.path.exists(stock_pp_file_name): pp_data = pd.read_csv(stock_pp_file_name) TradeTest(pp_data, FLAGS.avg, 0.1, True) else: print("File not exist: %s" % stock_pp_file_name) exit()
def CreatePPMergeDataOriginal(): merge_file_name = tushare_data.FileNameMergePPDataOriginal() print(merge_file_name) if not os.path.exists(merge_file_name): merge_pp_data = pd.DataFrame() for code_index in range(0, len(code_list)): stock_code = code_list[code_index] file_name = tushare_data.FileNameStockPreprocessedData(stock_code) if os.path.exists(file_name): stock_pp_data = pd.read_csv(file_name) if len(stock_pp_data) >= 200: stock_pp_data = stock_pp_data[:400].copy() merge_pp_data = merge_pp_data.append(stock_pp_data) print("%-4d : %s 100%% merged" % (code_index, stock_code)) merge_pp_data.to_csv(merge_file_name)
def GetPPMergeDataOriginalSimplify(): merge_file_name = tushare_data.FileNameMergePPDataOriginalSimplify() if not os.path.exists(merge_file_name): retained_cols = ["trade_date", "close", "close_100_avg"] merge_pp_data = pd.DataFrame() for code_index in range(0, len(code_list)): stock_code = code_list[code_index] file_name = tushare_data.FileNameStockPreprocessedData(stock_code) if os.path.exists(file_name): stock_pp_data = pd.read_csv(file_name) if len(stock_pp_data) >= 200: stock_pp_data_s = stock_pp_data[retained_cols].copy() merge_pp_data = merge_pp_data.append(stock_pp_data_s) print("%-4d : %s 100%% merged" % (code_index, stock_code)) merge_pp_data.to_csv(merge_file_name) else: merge_pp_data = pd.read_csv(merge_file_name) return merge_pp_data
def CreateDataSetSplit(): start_date = dataset_start_date end_date = tushare_data.train_test_date date_list = tushare_data.TradeDateListRange(start_date, end_date).tolist() code_list = tushare_data.StockCodes(dataset_stock_sample_step) date_index_map = ListToIndexMap(date_list, True) data_unit_date_index = ACTURE_DATA_INDEX_DATE() valid_data_unit_num = 0 for code_index in range(0, len(code_list)): stock_code = code_list[code_index] dataset_split_file_name = FileNameDataSetSplit(stock_code) if not os.path.exists(dataset_split_file_name): stock_pp_file_name = tushare_data.FileNameStockPreprocessedData( stock_code) if os.path.exists(stock_pp_file_name): pp_data = pd.read_csv(stock_pp_file_name) else: pp_data = [] if len(pp_data) == 0: continue dataset = np.zeros( (len(date_list), 1, feature.feature_size + feature.acture_unit_size)) for day_loop in range(0, len(pp_data)): data_unit = feature.GetDataUnit1Day(pp_data, day_loop) if len(data_unit) == 0: continue temp_date = int(data_unit[data_unit_date_index]) if temp_date < start_date or temp_date > end_date: continue dateset_index1 = date_index_map[pp_data['trade_date'] [day_loop]] dataset[dateset_index1][0] = data_unit split_data_date = dataset[:, :, ACTURE_DATA_INDEX_DATE()] np.save(dataset_split_file_name, dataset) print("%-4d : %s 100%%" % (code_index, stock_code))
def UpdateFixDataSet(is_daily_data, save_unfinished_record, dataset_merge=True): if dataset_merge: if is_daily_data: dataset_file_name = FileNameFixDataSetDaily() pp_merge_data = pp_daily_update.GetPreprocessedMergeData() else: dataset_file_name = FileNameFixDataSet() if os.path.exists(dataset_file_name): print('dataset already exist: %s' % dataset_file_name) return else: path_name = FileNameFixDataSetPath() if not os.path.exists(path_name): os.makedirs(path_name) code_list = tushare_data.StockCodes() init_flag = True for code_index in range(0, len(code_list)): stock_code = code_list[code_index] if not dataset_merge: stock_dataset_file_name = FileNameFixDataSetStock(stock_code) if os.path.exists(stock_dataset_file_name): print("%-4d : %s 100%%" % (code_index, stock_code)) continue if is_daily_data: pp_data = pp_daily_update.GetPreprocessedData( pp_merge_data, stock_code) else: stock_pp_file_name = tushare_data.FileNameStockPreprocessedData( stock_code) if os.path.exists(stock_pp_file_name): pp_data = pd.read_csv(stock_pp_file_name) else: pp_data = [] if len(pp_data) > 0: pp_data = pp_data[pp_data['trade_date'] >= int( dataset_start_date)].copy().reset_index(drop=True) data_list = [] if save_unfinished_record: valid_data_num = len(pp_data) - feature.feature_days start_index = 0 else: valid_data_num = len( pp_data) - feature.feature_days - feature.label_days start_index = feature.label_days if valid_data_num > 0: for day_loop in range(start_index, start_index + valid_data_num): data_unit = feature.GetDataUnit(pp_data, day_loop) if len(data_unit) > 0: data_list.append(data_unit) temp_np_data = np.array(data_list) if dataset_merge: if init_flag: data_set = temp_np_data init_flag = False else: data_set = np.vstack((data_set, temp_np_data)) else: np.save(stock_dataset_file_name, temp_np_data) print("%-4d : %s 100%%" % (code_index, stock_code)) # print("train_data: {}".format(train_data.shape)) # print(train_data) # if (code_index > 0) and ((code_index % 100) == 0): # print("dataset: {}".format(data_set.shape)) # np.save(dataset_file_name, data_set) if dataset_merge: print("dataset: {}".format(data_set.shape)) np.save(dataset_file_name, data_set)
# if use_turnover_rate_f: # src_df.loc[day_loop, 'turnover_rate_f_5'] = trf_5_sum # src_df.loc[day_loop, 'vol_5'] = vol_5_sum loop_count += 1 temp_pre_close = 0.0 for day_loop in range(0, len(src_df)): temp_pre_close = src_df.loc[day_loop,'pre_close'] if temp_pre_close == 0.0: print('Error: pre_close == %f, trade_date: %s' % (src_df.loc[day_loop,'pre_close'], src_df.loc[day_loop,'trade_date'])) return src_df[0:0] src_df.loc[day_loop,'open_increase'] = ((src_df.loc[day_loop,'open'] / temp_pre_close) - 1.0) * 100.0 src_df.loc[day_loop,'close_increase'] = ((src_df.loc[day_loop,'close'] / temp_pre_close) - 1.0) * 100.0 src_df.loc[day_loop,'high_increase'] = ((src_df.loc[day_loop,'high'] / temp_pre_close) - 1.0) * 100.0 src_df.loc[day_loop,'low_increase'] = ((src_df.loc[day_loop,'low'] / temp_pre_close) - 1.0) * 100.0 StockDataPreProcess_AddSuspendBorder(src_df) StockDataPreProcess_AddAdjFlag(src_df) return src_df[:len(src_df)-preprocess_ref_days] if __name__ == "__main__": stock_pp_file_name = tushare_data.FileNameStockPreprocessedData('600050.SH') if os.path.exists(stock_pp_file_name): src_df = pd.read_csv(stock_pp_file_name) print(src_df) print(src_df[src_df['adj_flag'] > 0])