Exemplo n.º 1
0
def GetTrainData():
    GetTrainDataTempSave()
    features = np.load('./data/dataset/fix_dataset_temp_features.npy')
    labels = np.load('./data/dataset/fix_dataset_temp_labels.npy')
    train_data = np.append(features, labels, axis=1)
    print("train_data: {}".format(train_data.shape))
    # raw_input("Enter ...")

    print("reorder...")
    order = np.argsort(np.random.random(len(train_data)))
    train_data = train_data[order]
    train_data = train_data[:2000000]
    # raw_input("Enter ...")
    # sample_train_data = train_data[:10]

    print("get feature ...")
    train_features = train_data[:, 0:feature.FEATURE_SIZE()].copy()
    # raw_input("Enter ...")

    print("get label...")
    train_labels = train_data[:,
                              feature.COL_ACTIVE_LABEL(
                              ):feature.COL_ACTIVE_LABEL() + 1].copy()
    # raw_input("Enter ...")
    print("train_features: {}".format(train_features.shape))
    print("train_labels: {}".format(train_labels.shape))
    return train_features, train_labels
Exemplo n.º 2
0
    def Train(self):
        date_num = self.train_dataset.shape[0]
        code_num = self.train_dataset.shape[1]
        print("date_num: %u" % date_num)
        print("code_num: %u" % code_num)
        print('\n%-8s%-8s%-8s%-12s%-12s%-12s' %
              ('T', 'step', 'loss', 'increase', 'trade_count', 'max_Q_mean'))
        print('-' * 60)
        for iloop in range(0, T):
            curren_status = STATUS_OFF
            for dloop in reversed(range(0, date_num)):  # 遍历train_dataset的日期
                if curren_status == STATUS_OFF:
                    code_index = random.randint(0, code_num - 1)
                if self.train_dataset[dloop][code_index][
                        date_col_index] == 0.0:
                    continue

                current_feature = self.train_dataset[dloop][code_index][
                    0:feature.FEATURE_SIZE()]
                Q = self.GetTrainActionQ(current_feature)
                if Q > 0:
                    curren_status = STATUS_ON
                else:
                    curren_status = STATUS_OFF
                t1_date_index = self.NextValidDateIndex(
                    self.train_dataset, code_index, dloop)
                if t1_date_index < 0:
                    break
                t2_date_index = self.NextValidDateIndex(
                    self.train_dataset, code_index, t1_date_index)
                if t2_date_index < 0:
                    break
                next_feature = self.train_dataset[t1_date_index][code_index][
                    0:feature.FEATURE_SIZE()]
                reward = (self.train_dataset[t2_date_index][code_index]
                          [open_col_index] / self.train_dataset[t1_date_index]
                          [code_index][open_col_index] - 1.0) * 100.0
                self.PerceiveAndTrain(current_feature, next_feature, reward)
            increase, trade_count, max_Q_mean = self.TestTop1()
            print('%-8u%-8u%-8.5f%-12.3f%-12u%-12.6f' %
                  (iloop, self.step_num, self.LossMean(), increase,
                   trade_count, max_Q_mean))
            self.LossClean()
            self.SaveModel_(iloop)
Exemplo n.º 3
0
def GetTrainTestDataSplitByDate():
    dataset = np.load(FileNameFixDataSet())
    print("dataset: {}".format(dataset.shape))
    pos = dataset[:, feature.COL_TRADE_DATE(0)] < dataset_train_test_split_date
    train_data = dataset[pos]
    test_data = dataset[~pos]
    print("train: {}".format(train_data.shape))
    print("test: {}".format(test_data.shape))

    train_features = train_data[:, 0:feature.FEATURE_SIZE()]
    train_labels = train_data[:,
                              feature.COL_ACTIVE_LABEL(
                              ):feature.COL_ACTIVE_LABEL() + 1]

    test_features = test_data[:, 0:feature.FEATURE_SIZE()]
    test_labels = test_data[:,
                            feature.COL_ACTIVE_LABEL(
                            ):feature.COL_ACTIVE_LABEL() + 1]

    return train_features, train_labels, test_features, test_labels, test_data
Exemplo n.º 4
0
def GetTrainTestDataSampleByDate(test_ratio):
    sample_num = int(1.0 / test_ratio + 0.0001)
    dataset = np.load(FileNameFixDataSet())
    print("dataset: {}".format(dataset.shape))
    pos = ((dataset[:, feature.COL_TRADE_DATE(0)].astype(int) % 100) %
           sample_num) == 0
    test_data = dataset[pos]
    train_data = dataset[~pos]
    print("train: {}".format(train_data.shape))
    print("test: {}".format(test_data.shape))

    train_features = train_data[:, 0:feature.FEATURE_SIZE()]
    train_labels = train_data[:,
                              feature.COL_ACTIVE_LABEL(
                              ):feature.COL_ACTIVE_LABEL() + 1]

    test_features = test_data[:, 0:feature.FEATURE_SIZE()]
    test_labels = test_data[:,
                            feature.COL_ACTIVE_LABEL(
                            ):feature.COL_ACTIVE_LABEL() + 1]

    return train_features, train_labels, test_features, test_labels, test_data
Exemplo n.º 5
0
def GetTrainTestDataRandom(test_ratio):
    sample_num = int(1.0 / test_ratio + 0.0001)
    dataset = np.load(FileNameFixDataSet())
    print("dataset: {}".format(dataset.shape))
    print('sample_num:%u' % sample_num)
    # 生成数值范围在 0-(sample_num-1)的随机数组,pos是值为0的位置
    pos = (np.random.randint(0, sample_num, size=len(dataset)) == 0)
    test_data = dataset[pos]
    train_data = dataset[~pos]
    print("train: {}".format(train_data.shape))
    print("test: {}".format(test_data.shape))

    train_features = train_data[:, 0:feature.FEATURE_SIZE()]
    train_labels = train_data[:,
                              feature.COL_ACTIVE_LABEL(
                              ):feature.COL_ACTIVE_LABEL() + 1]

    test_features = test_data[:, 0:feature.FEATURE_SIZE()]
    test_labels = test_data[:,
                            feature.COL_ACTIVE_LABEL(
                            ):feature.COL_ACTIVE_LABEL() + 1]

    return train_features, train_labels, test_features, test_labels, test_data
Exemplo n.º 6
0
def GetTrainDataTempSave():
    dataset = np.load(FileNameFixDataSet())
    print("dataset: {}".format(dataset.shape))

    pos = dataset[:, feature.COL_TRADE_DATE(0)] < dataset_train_test_split_date
    train_data = dataset[pos]
    print("train_data: {}".format(train_data.shape))

    features = train_data[:, 0:feature.FEATURE_SIZE()]
    labels = train_data[:,
                        feature.COL_ACTIVE_LABEL():feature.COL_ACTIVE_LABEL() +
                        1]

    np.save('./data/dataset/fix_dataset_temp_features.npy', features)
    np.save('./data/dataset/fix_dataset_temp_labels.npy', labels)
Exemplo n.º 7
0
def SortWaveDataset(dataset):
    captions = []
    for iloop in range(0, feature.FEATURE_SIZE()):
        captions.append('f_%u' % iloop)
    captions.append('label')
    captions.append('ts_code')
    captions.append('pre_on_date')
    captions.append('on_date')
    captions.append('pre_off_date')
    captions.append('off_date')
    captions.append('holding_days')
    data_df = pd.DataFrame(dataset, columns=captions)
    data_df = data_df.sort_values(by=['pre_on_date', 'ts_code'],
                                  ascending=(True, True))
    return data_df.values
Exemplo n.º 8
0
def Predict(test_data, model, mean, std):
    predict_features = test_data[:,
                                 feature.COL_FEATURE_OFFSET(
                                 ):feature.COL_FEATURE_OFFSET() +
                                 feature.FEATURE_SIZE()]
    predict_features = train_rnn.FeaturesPretreat(predict_features, mean, std)
    predictions = model.predict(predict_features)
    print("Predict out")
    predictions_df = pd.DataFrame(predictions, columns=['pred'])

    temp_index = feature.COL_ACTURE_OFFSET(0)
    acture_unit_size = feature.ACTURE_UNIT_SIZE()
    t0_acture_data = test_data[:, temp_index:temp_index + acture_unit_size]
    acture_data_df = pd.DataFrame(t0_acture_data, columns=[ \
        'T0_open_increse', \
        'T0_low_increase', \
        'T0_open', \
        'T0_low', \
        'T0_close', \
        'stock_code', \
        'T0_trade_date'])

    temp_index = feature.COL_ACTURE_OFFSET(feature.ACTIVE_LABEL_DAY())
    td_acture_data = test_data[:, temp_index:temp_index + acture_unit_size]
    temp_df=pd.DataFrame(td_acture_data, columns=[ \
        'Td_open_increse', \
        'Td_low_increase', \
        'Td_open', \
        'Td_low', \
        'Td_close', \
        'Td_stock_code', \
        'Td_trade_date'])
    acture_data_df = pd.merge(acture_data_df,
                              temp_df,
                              left_index=True,
                              right_index=True)

    result_all = predictions_df
    result_all = pd.merge(result_all,
                          acture_data_df,
                          left_index=True,
                          right_index=True)
    return result_all
Exemplo n.º 9
0
    def Test(self):
        date_num = self.test_dataset.shape[0]
        code_num = self.test_dataset.shape[1]
        curren_status = STATUS_OFF
        trade_count = 0
        increase_sum = 0.0
        for dloop in reversed(range(0, date_num)):  # 遍历dataset的日期
            if curren_status == STATUS_OFF:
                code_index = random.randint(0, code_num - 1)
            if self.test_dataset[dloop][code_index][date_col_index] == 0.0:
                continue

            current_feature = self.test_dataset[dloop][code_index][
                0:feature.FEATURE_SIZE()]
            Q = self.GetTestActionQ(current_feature)
            if Q > 0.01:
                # print(Q)
                next_status = STATUS_ON
            else:
                next_status = STATUS_OFF

            if curren_status == STATUS_OFF:
                if next_status == STATUS_ON:
                    curren_status = STATUS_ON
                    t1_date_index = self.NextValidDateIndex(
                        self.test_dataset, code_index, dloop)
                    in_price = self.test_dataset[t1_date_index][code_index][
                        open_col_index]
            else:
                if next_status == STATUS_OFF:
                    curren_status = STATUS_OFF
                    t1_date_index = self.NextValidDateIndex(
                        self.test_dataset, code_index, dloop)
                    out_price = self.test_dataset[t1_date_index][code_index][
                        open_col_index]
                    increase = out_price / in_price - 1.0
                    increase_sum += increase
                    trade_count += 1
        print("Test increase_sum:%.3f, trade_count:%u" %
              (increase_sum, trade_count))
        return increase_sum
Exemplo n.º 10
0
 def __init__(self):
     self.replay_buffer = deque()
     self.train_dataset, self.test_dataset = dqn_dataset.GetDataSet()
     self.test_dataset = self.train_dataset
     self.step_num = 0
     self.LossClean()
     if ModelExist():
         self.epsilon = FINAL_EPSILON
         self.model, self.mean, self.std = LoadModel()
     else:
         self.epsilon = INITIAL_EPSILON
         self.model = BuildModel()
         temp_shape = self.train_dataset.shape
         train_data_2d = self.train_dataset.reshape(
             (temp_shape[0] * temp_shape[1], temp_shape[2]))
         row_rand_array = np.arange(train_data_2d.shape[0])
         np.random.shuffle(row_rand_array)
         train_data_2d_sample = train_data_2d[row_rand_array[0:10000]]
         train_features = train_data_2d_sample[:, 0:feature.FEATURE_SIZE()]
         self.mean = train_features.mean(axis=0)
         self.std = train_features.std(axis=0)
Exemplo n.º 11
0
def GetTrainTestDataSampleByDate(test_ratio):
    sample_num = int(1.0 / test_ratio + 0.0001)
    dataset = GetTrainTestDataMerge()
    print("dataset: {}".format(dataset.shape))
    pos = ((dataset[:, wave_kernel.COL_ON_PRETRADE_DATE()].astype(int) % 100) %
           sample_num) == 0
    test_data = dataset[pos]
    train_data = dataset[~pos]
    print("train: {}".format(train_data.shape))
    print("test: {}".format(test_data.shape))

    train_features = train_data[:, 0:feature.FEATURE_SIZE()]
    train_labels = train_data[:,
                              feature.FEATURE_SIZE():feature.FEATURE_SIZE() +
                              1]

    test_features = test_data[:, 0:feature.FEATURE_SIZE()]
    test_labels = test_data[:,
                            feature.FEATURE_SIZE():feature.FEATURE_SIZE() + 1]

    return train_features, train_labels, test_features, test_labels, test_data
Exemplo n.º 12
0
def GetTrainTestDataSplitByDate():
    dataset = GetTrainTestDataMerge()
    print("dataset: {}".format(dataset.shape))
    pos = dataset[:, wave_kernel.COL_ON_PRETRADE_DATE(
    )] < dataset_train_test_split_date
    train_data = dataset[pos]
    test_data = dataset[~pos]

    test_data = SortWaveDataset(test_data)

    print("train: {}".format(train_data.shape))
    print("test: {}".format(test_data.shape))

    train_features = train_data[:, 0:feature.FEATURE_SIZE()]
    train_labels = train_data[:,
                              feature.FEATURE_SIZE():feature.FEATURE_SIZE() +
                              1]

    test_features = test_data[:, 0:feature.FEATURE_SIZE()]
    test_labels = test_data[:,
                            feature.FEATURE_SIZE():feature.FEATURE_SIZE() + 1]

    return train_features, train_labels, test_features, test_labels, test_data
Exemplo n.º 13
0
def SettingName():
    temp_name = '%s_%s_%u_%u_%f' % (dqn_dataset.SettingName(), 
                            tushare_data.train_test_date, 
                            dqn_dataset.dataset_train_test_split_date, 
                            label_days, 
                            decay_ratio)
    return temp_name

if import_tushare_data and import_dqn_dataset:
    setting_name_ = SettingName()
    dqn_dataset_file_name_ = dqn_dataset.FileNameDataSet(False)
    dqn_dataset_ACTURE_DATA_INDEX_DATE_ = dqn_dataset.ACTURE_DATA_INDEX_DATE()
    dqn_dataset_ACTURE_DATA_INDEX_OPEN_ = dqn_dataset.ACTURE_DATA_INDEX_OPEN()
    dqn_dataset_ACTURE_DATA_INDEX_TSCODE_ = dqn_dataset.ACTURE_DATA_INDEX_TSCODE()
    feature_FEATURE_SIZE_ = feature.FEATURE_SIZE()
    dqn_dataset_dataset_train_test_split_date_ = dqn_dataset.dataset_train_test_split_date
else:
    dqn_dataset_ACTURE_DATA_INDEX_DATE_ = 156
    dqn_dataset_ACTURE_DATA_INDEX_OPEN_ = 152
    dqn_dataset_ACTURE_DATA_INDEX_TSCODE_ = 155
    feature_FEATURE_SIZE_ = 150
    dqn_dataset_dataset_train_test_split_date_ = 20170101
    setting_name_ = '0_30_1_10_0_0___20000101_10_20020101_20190414_%u_%u_%.6f' % (
        dqn_dataset_dataset_train_test_split_date_, 
        label_days, 
        decay_ratio)
    dqn_dataset_file_name_ = './data/dataset/dqn_0_30_1_10_0_0___20000101_10_20020101_20190414.npy'


def FileNameDataSet():
Exemplo n.º 14
0
    def TestTop1(self, print_trade_detail=False):
        test_features = self.test_dataset[:, :, 0:feature.FEATURE_SIZE()]
        predictions = self.GetTestActionQ(test_features)
        # print("predictions:{}".format(predictions.shape))
        max_Q_codes_index = np.argmax(predictions, axis=1)
        # print("max_Q_codes_index:{}".format(max_Q_codes_index.shape))
        max_Q_codes_value = np.amax(predictions, axis=1)
        max_Q_mean = np.mean(max_Q_codes_value)
        # print("max_Q_mean:{}".format(max_Q_mean))

        date_num = self.test_dataset.shape[0]
        code_num = self.test_dataset.shape[1]
        curren_status = STATUS_OFF
        trade_count = 0
        increase_sum = 0.0
        hold_days_sum = 0
        if print_trade_detail:
            print('%-8s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s' %
                  ('index', 'in_date', 'out_date', 'ts_code', 'pred', 'in',
                   'out', 'increase', 'hold_days'))
            print('-' * 80)
        dloop = date_num - 1
        while dloop >= 0:  # 遍历dataset的日期
            if curren_status == STATUS_OFF:
                code_index = max_Q_codes_index[dloop]
            if self.test_dataset[dloop][code_index][date_col_index] == 0.0:
                dloop -= 1
                continue

            Q = predictions[dloop][code_index]
            if Q > 0:
                # print(Q)
                next_status = STATUS_ON
            else:
                next_status = STATUS_OFF

            if curren_status == STATUS_OFF:
                if next_status == STATUS_ON:
                    curren_status = STATUS_ON
                    t1_date_index = self.NextValidDateIndex(
                        self.test_dataset, code_index, dloop)
                    if t1_date_index < 0:
                        break
                    in_price = self.test_dataset[t1_date_index][code_index][
                        open_col_index]
                    in_pred = Q
                    dloop = t1_date_index
                else:
                    dloop -= 1
            else:
                if next_status == STATUS_OFF:
                    curren_status = STATUS_OFF
                    t2_date_index = self.NextValidDateIndex(
                        self.test_dataset, code_index, dloop)
                    if t2_date_index < 0:
                        break
                    out_price = self.test_dataset[t2_date_index][code_index][
                        open_col_index]
                    increase = out_price / in_price - 1.0
                    hold_days = t1_date_index - t2_date_index
                    if print_trade_detail:
                        print(
                            '%-8u%-10.0f%-10.0f%-10s%-10.2f%-10.2f%-10.2f%-10.4f%-10u'
                            % (trade_count, self.test_dataset[t1_date_index]
                               [code_index][date_col_index],
                               self.test_dataset[t2_date_index][code_index]
                               [date_col_index],
                               '%06u' % self.test_dataset[t1_date_index]
                               [code_index][tscode_col_index], in_pred,
                               in_price, out_price, increase, hold_days))
                    increase_sum += increase
                    trade_count += 1
                    hold_days_sum += hold_days
                    dloop = t2_date_index
                else:
                    dloop -= 1
        if print_trade_detail:
            print('%-8s%-10s%-10s%-10s%-10s%-10s%-10s%-10.4f%-10u' %
                  ('sum', '--', '--', '--', '--', '--', '--', increase_sum,
                   hold_days_sum))
        return increase_sum, trade_count, max_Q_mean