예제 #1
0
def GetProprocessedData(ts_code):
    stock_pp_file_name = tushare_data.FileNameStockPreprocessedData(ts_code)
    if os.path.exists(stock_pp_file_name):
        pp_data = pd.read_csv(stock_pp_file_name)
        return pp_data
    else:
        print("File not exist: %s" % stock_pp_file_name)
        return []
예제 #2
0
def GetProprocessedData(ts_code):
    stock_pp_file_name = tushare_data.FileNameStockPreprocessedData(ts_code)
    if os.path.exists(stock_pp_file_name):
        pp_data = pd.read_csv(stock_pp_file_name)
        pp_data = pp_data[pp_data['trade_date'] >= wave_kernel.
                          wave_kernel_start_date].copy().reset_index(drop=True)
        # print(pp_data)
        return pp_data
    else:
        return pd.DataFrame()
예제 #3
0
def main(argv):
    del argv

    stock_pp_file_name = tushare_data.FileNameStockPreprocessedData(FLAGS.c)
    if os.path.exists(stock_pp_file_name):
        pp_data = pd.read_csv(stock_pp_file_name)
        TradeTest(pp_data, FLAGS.avg, 0.1, True)
    else:
        print("File not exist: %s" % stock_pp_file_name)

    exit()
예제 #4
0
def CreatePPMergeDataOriginal():
    merge_file_name = tushare_data.FileNameMergePPDataOriginal()
    print(merge_file_name)
    if not os.path.exists(merge_file_name):
        merge_pp_data = pd.DataFrame()
        for code_index in range(0, len(code_list)):
            stock_code = code_list[code_index]
            file_name = tushare_data.FileNameStockPreprocessedData(stock_code)
            if os.path.exists(file_name):
                stock_pp_data = pd.read_csv(file_name)
                if len(stock_pp_data) >= 200:
                    stock_pp_data = stock_pp_data[:400].copy()
                    merge_pp_data = merge_pp_data.append(stock_pp_data)
                    print("%-4d : %s 100%% merged" % (code_index, stock_code))
        merge_pp_data.to_csv(merge_file_name)
예제 #5
0
def GetPPMergeDataOriginalSimplify():
    merge_file_name = tushare_data.FileNameMergePPDataOriginalSimplify()
    if not os.path.exists(merge_file_name):
        retained_cols = ["trade_date", "close", "close_100_avg"]
        merge_pp_data = pd.DataFrame()
        for code_index in range(0, len(code_list)):
            stock_code = code_list[code_index]
            file_name = tushare_data.FileNameStockPreprocessedData(stock_code)
            if os.path.exists(file_name):
                stock_pp_data = pd.read_csv(file_name)
                if len(stock_pp_data) >= 200:
                    stock_pp_data_s = stock_pp_data[retained_cols].copy()
                    merge_pp_data = merge_pp_data.append(stock_pp_data_s)
                    print("%-4d : %s 100%% merged" % (code_index, stock_code))
        merge_pp_data.to_csv(merge_file_name)
    else:
        merge_pp_data = pd.read_csv(merge_file_name)
    return merge_pp_data
예제 #6
0
def CreateDataSetSplit():
    start_date = dataset_start_date
    end_date = tushare_data.train_test_date
    date_list = tushare_data.TradeDateListRange(start_date, end_date).tolist()
    code_list = tushare_data.StockCodes(dataset_stock_sample_step)
    date_index_map = ListToIndexMap(date_list, True)

    data_unit_date_index = ACTURE_DATA_INDEX_DATE()
    valid_data_unit_num = 0
    for code_index in range(0, len(code_list)):
        stock_code = code_list[code_index]
        dataset_split_file_name = FileNameDataSetSplit(stock_code)
        if not os.path.exists(dataset_split_file_name):
            stock_pp_file_name = tushare_data.FileNameStockPreprocessedData(
                stock_code)
            if os.path.exists(stock_pp_file_name):
                pp_data = pd.read_csv(stock_pp_file_name)
            else:
                pp_data = []
            if len(pp_data) == 0:
                continue

            dataset = np.zeros(
                (len(date_list), 1,
                 feature.feature_size + feature.acture_unit_size))
            for day_loop in range(0, len(pp_data)):
                data_unit = feature.GetDataUnit1Day(pp_data, day_loop)
                if len(data_unit) == 0:
                    continue
                temp_date = int(data_unit[data_unit_date_index])
                if temp_date < start_date or temp_date > end_date:
                    continue
                dateset_index1 = date_index_map[pp_data['trade_date']
                                                [day_loop]]
                dataset[dateset_index1][0] = data_unit

            split_data_date = dataset[:, :, ACTURE_DATA_INDEX_DATE()]
            np.save(dataset_split_file_name, dataset)
        print("%-4d : %s 100%%" % (code_index, stock_code))
예제 #7
0
def UpdateFixDataSet(is_daily_data,
                     save_unfinished_record,
                     dataset_merge=True):
    if dataset_merge:
        if is_daily_data:
            dataset_file_name = FileNameFixDataSetDaily()
            pp_merge_data = pp_daily_update.GetPreprocessedMergeData()
        else:
            dataset_file_name = FileNameFixDataSet()
        if os.path.exists(dataset_file_name):
            print('dataset already exist: %s' % dataset_file_name)
            return
    else:
        path_name = FileNameFixDataSetPath()
        if not os.path.exists(path_name):
            os.makedirs(path_name)
    code_list = tushare_data.StockCodes()
    init_flag = True
    for code_index in range(0, len(code_list)):
        stock_code = code_list[code_index]
        if not dataset_merge:
            stock_dataset_file_name = FileNameFixDataSetStock(stock_code)
            if os.path.exists(stock_dataset_file_name):
                print("%-4d : %s 100%%" % (code_index, stock_code))
                continue
        if is_daily_data:
            pp_data = pp_daily_update.GetPreprocessedData(
                pp_merge_data, stock_code)
        else:
            stock_pp_file_name = tushare_data.FileNameStockPreprocessedData(
                stock_code)
            if os.path.exists(stock_pp_file_name):
                pp_data = pd.read_csv(stock_pp_file_name)
            else:
                pp_data = []
        if len(pp_data) > 0:
            pp_data = pp_data[pp_data['trade_date'] >= int(
                dataset_start_date)].copy().reset_index(drop=True)
            data_list = []
            if save_unfinished_record:
                valid_data_num = len(pp_data) - feature.feature_days
                start_index = 0
            else:
                valid_data_num = len(
                    pp_data) - feature.feature_days - feature.label_days
                start_index = feature.label_days
            if valid_data_num > 0:
                for day_loop in range(start_index,
                                      start_index + valid_data_num):
                    data_unit = feature.GetDataUnit(pp_data, day_loop)
                    if len(data_unit) > 0:
                        data_list.append(data_unit)
                temp_np_data = np.array(data_list)
                if dataset_merge:
                    if init_flag:
                        data_set = temp_np_data
                        init_flag = False
                    else:
                        data_set = np.vstack((data_set, temp_np_data))
                else:
                    np.save(stock_dataset_file_name, temp_np_data)
            print("%-4d : %s 100%%" % (code_index, stock_code))
            # print("train_data: {}".format(train_data.shape))
            # print(train_data)
        # if (code_index > 0) and ((code_index % 100) == 0):
        #     print("dataset: {}".format(data_set.shape))
        #     np.save(dataset_file_name, data_set)
    if dataset_merge:
        print("dataset: {}".format(data_set.shape))
        np.save(dataset_file_name, data_set)
예제 #8
0
            # if use_turnover_rate_f:
            #     src_df.loc[day_loop, 'turnover_rate_f_5'] = trf_5_sum
            # src_df.loc[day_loop, 'vol_5'] = vol_5_sum

        loop_count += 1
            
    temp_pre_close = 0.0
    for day_loop in range(0, len(src_df)):
        temp_pre_close = src_df.loc[day_loop,'pre_close']
        if temp_pre_close == 0.0:
            print('Error: pre_close == %f, trade_date: %s' % (src_df.loc[day_loop,'pre_close'], src_df.loc[day_loop,'trade_date']))
            return src_df[0:0]
        src_df.loc[day_loop,'open_increase'] = ((src_df.loc[day_loop,'open'] / temp_pre_close) - 1.0) * 100.0
        src_df.loc[day_loop,'close_increase'] = ((src_df.loc[day_loop,'close'] / temp_pre_close) - 1.0) * 100.0
        src_df.loc[day_loop,'high_increase'] = ((src_df.loc[day_loop,'high'] / temp_pre_close) - 1.0) * 100.0
        src_df.loc[day_loop,'low_increase'] = ((src_df.loc[day_loop,'low'] / temp_pre_close) - 1.0) * 100.0
    StockDataPreProcess_AddSuspendBorder(src_df)
    StockDataPreProcess_AddAdjFlag(src_df)
    return src_df[:len(src_df)-preprocess_ref_days]



if __name__ == "__main__":

    stock_pp_file_name = tushare_data.FileNameStockPreprocessedData('600050.SH')
    if os.path.exists(stock_pp_file_name):
        src_df = pd.read_csv(stock_pp_file_name)
        
        print(src_df)
        print(src_df[src_df['adj_flag'] > 0])