示例#1
0
def GetTrainData():
    GetTrainDataTempSave()
    features = np.load('./data/dataset/fix_dataset_temp_features.npy')
    labels = np.load('./data/dataset/fix_dataset_temp_labels.npy')
    train_data = np.append(features, labels, axis=1)
    print("train_data: {}".format(train_data.shape))
    # raw_input("Enter ...")

    print("reorder...")
    order = np.argsort(np.random.random(len(train_data)))
    train_data = train_data[order]
    train_data = train_data[:2000000]
    # raw_input("Enter ...")
    # sample_train_data = train_data[:10]

    print("get feature ...")
    train_features = train_data[:, 0:feature.FEATURE_SIZE()].copy()
    # raw_input("Enter ...")

    print("get label...")
    train_labels = train_data[:,
                              feature.COL_ACTIVE_LABEL(
                              ):feature.COL_ACTIVE_LABEL() + 1].copy()
    # raw_input("Enter ...")
    print("train_features: {}".format(train_features.shape))
    print("train_labels: {}".format(train_labels.shape))
    return train_features, train_labels
示例#2
0
def GetTrainDataTempSave():
    dataset = np.load(FileNameFixDataSet())
    print("dataset: {}".format(dataset.shape))

    pos = dataset[:, feature.COL_TRADE_DATE(0)] < dataset_train_test_split_date
    train_data = dataset[pos]
    print("train_data: {}".format(train_data.shape))

    features = train_data[:, 0:feature.FEATURE_SIZE()]
    labels = train_data[:,
                        feature.COL_ACTIVE_LABEL():feature.COL_ACTIVE_LABEL() +
                        1]

    np.save('./data/dataset/fix_dataset_temp_features.npy', features)
    np.save('./data/dataset/fix_dataset_temp_labels.npy', labels)
示例#3
0
def GetTrainTestDataSplitByDate():
    dataset = np.load(FileNameFixDataSet())
    print("dataset: {}".format(dataset.shape))
    pos = dataset[:, feature.COL_TRADE_DATE(0)] < dataset_train_test_split_date
    train_data = dataset[pos]
    test_data = dataset[~pos]
    print("train: {}".format(train_data.shape))
    print("test: {}".format(test_data.shape))

    train_features = train_data[:, 0:feature.FEATURE_SIZE()]
    train_labels = train_data[:,
                              feature.COL_ACTIVE_LABEL(
                              ):feature.COL_ACTIVE_LABEL() + 1]

    test_features = test_data[:, 0:feature.FEATURE_SIZE()]
    test_labels = test_data[:,
                            feature.COL_ACTIVE_LABEL(
                            ):feature.COL_ACTIVE_LABEL() + 1]

    return train_features, train_labels, test_features, test_labels, test_data
示例#4
0
def GetTrainTestDataSampleByDate(test_ratio):
    sample_num = int(1.0 / test_ratio + 0.0001)
    dataset = np.load(FileNameFixDataSet())
    print("dataset: {}".format(dataset.shape))
    pos = ((dataset[:, feature.COL_TRADE_DATE(0)].astype(int) % 100) %
           sample_num) == 0
    test_data = dataset[pos]
    train_data = dataset[~pos]
    print("train: {}".format(train_data.shape))
    print("test: {}".format(test_data.shape))

    train_features = train_data[:, 0:feature.FEATURE_SIZE()]
    train_labels = train_data[:,
                              feature.COL_ACTIVE_LABEL(
                              ):feature.COL_ACTIVE_LABEL() + 1]

    test_features = test_data[:, 0:feature.FEATURE_SIZE()]
    test_labels = test_data[:,
                            feature.COL_ACTIVE_LABEL(
                            ):feature.COL_ACTIVE_LABEL() + 1]

    return train_features, train_labels, test_features, test_labels, test_data
示例#5
0
def GetTrainTestDataRandom(test_ratio):
    sample_num = int(1.0 / test_ratio + 0.0001)
    dataset = np.load(FileNameFixDataSet())
    print("dataset: {}".format(dataset.shape))
    print('sample_num:%u' % sample_num)
    # 生成数值范围在 0-(sample_num-1)的随机数组,pos是值为0的位置
    pos = (np.random.randint(0, sample_num, size=len(dataset)) == 0)
    test_data = dataset[pos]
    train_data = dataset[~pos]
    print("train: {}".format(train_data.shape))
    print("test: {}".format(test_data.shape))

    train_features = train_data[:, 0:feature.FEATURE_SIZE()]
    train_labels = train_data[:,
                              feature.COL_ACTIVE_LABEL(
                              ):feature.COL_ACTIVE_LABEL() + 1]

    test_features = test_data[:, 0:feature.FEATURE_SIZE()]
    test_labels = test_data[:,
                            feature.COL_ACTIVE_LABEL(
                            ):feature.COL_ACTIVE_LABEL() + 1]

    return train_features, train_labels, test_features, test_labels, test_data