Exemplo n.º 1
0
def test_fetch_index_day_online():
    dl = DataLoaderStock('601398')
    df = dl._DataLoaderStock__fetch_index_day_online()
    logging.info(df.columns)
    logging.info(df.head())
    assert set(dl._index_columns) == set(df.columns)
    assert not df.empty
    for col in df.columns:
        assert df[col].dtype == np.float32
Exemplo n.º 2
0
def test_fetch_stock_day():
    dl = DataLoaderStock('601398')
    df = dl._DataLoaderStock__fetch_stock_day(dl.stock_code, dl.start, dl.end)
    logging.info(df.columns)
    logging.info(df.head())
    assert set(dl._stock_columns) == set(df.columns)
    assert not df.empty
    for col in df.columns:
        assert df[col].dtype == np.float32
Exemplo n.º 3
0
def test_load():
    dl = DataLoaderStock('601398')
    df = dl.load()
    logging.info(df.columns)
    logging.info(df.head())
    assert not df.empty

    class wrapper1(Wrapper):
        def build(self, df: pd.DataFrame) -> pd.DataFrame:
            result = df.copy()
            result = result.fillna(method='ffill')
            return result.dropna()

    dl = DataLoaderStock('601398', wrapper=wrapper1())
    df = dl.load()
    assert len(df.index) != len(dl.data_raw.index)
    logging.info(df.head())
    logging.info(dl.data_raw.head())
    assert not df.empty

    dl_fillna = DataLoaderStock('601398', wrapper=Wrapper_fillna())
    df_fillna = dl_fillna.load()
    assert df.equals(df_fillna)
    logging.info(df_fillna.shape)
    for col in df.columns:
        np.array_equal(df[col].values, df_fillna[col].values)
Exemplo n.º 4
0
def test_append_codes():
    codes = [
        code for code in get_block_code('000002')
        if _test_dt(code) and _test_code(code)
    ]
    dl = DataLoaderStock('000002', wrapper=Wrapper_default(), appends=codes)
    print(codes)
    assert len(codes) > 0
    df = dl.load()
    assert not df.empty
    print(df)
    print(len(df.columns))
Exemplo n.º 5
0
 def __init__(self, path, code, window, days):
     self.code = code
     self.window = window
     self.days = days
     self.batch_size = window + days
     model_filename = 'model_{2}_{0:02d}_{1:02d}.h5'.format(window, days,
                                                            code)
     m = SequentialModel()
     m.load(os.path.join(path, model_filename),
            custom_objects={
                'root_mean_squared_error': root_mean_squared_error})
     self.model = m
     self.data = DataLoaderStock(code, wrapper=Wrapper_default()).load()
Exemplo n.º 6
0
def test_init():
    dl = DataLoaderStock('601398')
    assert '601398' == dl.stock_code
    assert '399300' == dl.benchmark_code
    assert '1990-01-01' == dl.start
    assert DataLoader.today() == dl.end
    assert 'qfq' == dl.fq
    assert False == dl.online

    dl = DataLoaderStock('601398', '000300')
    assert '601398' == dl.stock_code
    assert '000300' == dl.benchmark_code
    assert '1990-01-01' == dl.start
    assert DataLoader.today() == dl.end
    assert 'qfq' == dl.fq
    assert False == dl.online

    dl = DataLoaderStock('601398', '000300', 'bfq')
    assert '601398' == dl.stock_code
    assert '000300' == dl.benchmark_code
    assert '1990-01-01' == dl.start
    assert DataLoader.today() == dl.end
    assert 'bfq' == dl.fq
    assert False == dl.online

    dl = DataLoaderStock('601398', '000300', 'bfq', True)
    assert '601398' == dl.stock_code
    assert '000300' == dl.benchmark_code
    assert '1990-01-01' == dl.start
    assert DataLoader.today() == dl.end
    assert 'bfq' == dl.fq
    assert True == dl.online

    dl = DataLoaderStock('601398', '000300', 'bfq', True, '2000-01-01')
    assert '601398' == dl.stock_code
    assert '000300' == dl.benchmark_code
    assert '2000-01-01' == dl.start
    assert DataLoader.today() == dl.end
    assert 'bfq' == dl.fq
    assert True == dl.online

    dl = DataLoaderStock('601398', '000300', 'bfq', True, '2000-01-01',
                         '2000-12-31')
    assert '601398' == dl.stock_code
    assert '000300' == dl.benchmark_code
    assert '2000-01-01' == dl.start
    assert '2000-12-31' == dl.end
    assert 'bfq' == dl.fq
    assert True == dl.online
Exemplo n.º 7
0
def do(code,
       window=3,
       days=1,
       wrapper=Wrapper_default(),
       norm=Normalize(),
       *args,
       **kwargs):
    # exists_file = _get_model_file_path(code, window, days,
    #                                    os.path.join(nb_dir, '.train_result'))
    # if os.path.exists(exists_file) and kwargs.pop('rebuild', False) == False:
    #     if (datetime.now() - datetime.fromtimestamp(
    #             time.mktime(time.localtime(
    #                 os.stat(exists_file).st_mtime)))).days < window:
    #         logging.info(
    #             "{0}:{1} Last Modified < {2}".format(code, exists_file, window))
    #         return None

    # logging.info('{0} - {1:02d} - {2:02d} Start.'.format(code,window,days))
    start = datetime.now()
    dl = DataLoaderStock(
        code, wrapper=wrapper, appends=kwargs.pop('appends', []))
    df = dl.load()
    train, test = DataHelper.train_test_split(df,
                                              train_size=0.95,
                                              batch_size=window + days)
    split_func = kwargs.pop('split_func', DataHelper.xy_split_3)
    # print(train[0])
    X_train, Y_train = split_func(train, window, days, norm=norm)
    X_test, Y_test = split_func(test, window, days, norm=norm)

    batch_size = kwargs.pop('batch_size', 512)
    verbose = kwargs.pop('train_verbose', 0)

    X_train_arr = []
    Y_train_arr = []
    for x in X_train:
        X_train_arr.append(x.values)
    for y in Y_train:
        Y_train_arr.append(y.values)
    X_test_arr = []
    Y_test_arr = []
    for x in X_test:
        X_test_arr.append(x.values)
    for y in Y_test:
        Y_test_arr.append(y.values)

    model = SequentialModel()
    optimizer = 'rmsprop'
    ls = kwargs.pop("layers", [])
    c = kwargs.pop('compile', {'loss': root_mean_squared_error,
                               'optimizer': optimizer,
                               'metrics': ["mae", "acc"]})
    first_units = 64
    validation_split = kwargs.pop('validation_split', 0.15)
    if not ls:
        ls.append({'type': 'lstm', 'units': first_units})
        ls.append({'type': 'dense'})
    ls[0]['input_shape'] = X_train_arr[0].shape
    ls[-1]['units'] = 1 if split_func == DataHelper.xy_split_3 else days
    model.build_model(ls, c)
    history = model.train(np.array(X_train_arr),
                          np.array(Y_train_arr),
                          callbacks=kwargs.pop('cbs', None),
                          train={
                              'epochs': kwargs.pop('train_train_epochs', 500),
                              'shuffle': kwargs.pop('shuffle', False),
                              'verbose': verbose,
                              'batch_size': batch_size,
                              'validation_split': validation_split})

    if kwargs.pop('show_summary', False):
        model.model.summary()

    save_path = save_model(model.model, stockcode=code, window=window,
                           days=days)
    logging.info('model savmodeed:' + save_path)
    # his_image_path=save_history_img(history,stockcode=code,window=window,days=days,benchmark=dl.benchmark_code)
    # logging.info('history image:'+his_image_path)

    pred = model.predict(np.array(X_test_arr))
    score = model.evaluate(np.array(X_test_arr), np.array(Y_test_arr))
    # pred_slope = []
    for day in range(days):
        if split_func == DataHelper.xy_split_3:
            day = -1
        df_result = pd.DataFrame(
            {'pred': pred[:, day], 'real': np.array(Y_test_arr)[:, day]})

        # slope = stats.linregress(pred[:, day],
        #                          np.array(Y_test_arr)[:, day]).slope
        # print('Slope Day{0}:{1}'.format(day + 1, slope))
        # pred_slope.append(slope)
        # save_path=os.path.join(os.path.join(nb_dir,'.train_result'), 'pred_{2}_{0:02d}_{1:02d}.csv'.format(window,days,code))
        # os.makedirs(os.path.dirname(save_path),exist_ok=True)
        # df_result.to_csv(save_path, encoding="utf-8")
        # logging.info('pred result dataframe:'+save_path)

        plt.figure(figsize=(15, 8))
        save_path = os.path.join(os.path.join(nb_dir, '.train_result'),
                                 'pred_{2}_{0:02d}_{1:02d}_{3:02d}.svg'.format(
                                     window,
                                     days if split_func != DataHelper.xy_split_3 else days,
                                     code,
                                     day + 1 if split_func != DataHelper.xy_split_3 else days))
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        plt.title(
            '{0} Window:{1},Days:{5}/{2},BatchSize:{3},Optimizer:{4}'.format(
                code, window,
                days if split_func != DataHelper.xy_split_3 else days,
                batch_size, optimizer,
                day + 1 if split_func != DataHelper.xy_split_3 else days
            ))
        plt.plot(df_result['pred'])
        plt.plot(df_result['real'])
        plt.xticks(np.arange(0,len(df_result.index),1))
        plt.savefig(save_path, format="svg")

        if split_func == DataHelper.xy_split_3:
            break
    # RuntimeWarning: More than 20 figures have been opened. Figures created
    # through the pyplot interface (`matplotlib.pyplot.figure`) are retained
    # until explicitly closed and may consume too much memory. (To control
    # this warning, see the rcParam `figure.max_open_warning`).
    plt.close('all')
    # logging.info('pred result image:'+save_path)
    # logging.info('{0} - {1:02d} - {2:02d} Done.'.format(code,window,days))
    logging.info("".join(['-' for i in range(50)]))
    if kwargs.pop('clear', True):
        clear_session()
    end = datetime.now()
    v = {'optimizer': optimizer,
         'first_units': first_units,
         'batch_size': batch_size,
         'validation_split': validation_split,
         'spend_time': str(end - start)}
    for i in range(len(model.model.metrics_names)):
        v[model.model.metrics_names[i]] = score[i]
    save_train_record(code=code, window=window, days=days, values=v)
Exemplo n.º 8
0
def do(code='000002',
       window=3,
       days=1,
       wrapper=Wrapper_default(),
       norm=Normalize(),
       *args,
       **kwargs):
    """

    Args:
        layers [dict]: 训练层定义。默认为LSTM。第一层的`input_shape`和最后一层的`unit`会自动设置。

    """
    dl = DataLoaderStock(code,
                         wrapper=wrapper,
                         appends=kwargs.pop('appends', []))
    df = dl.load()
    # print(df.head(window+2))
    train, test = DataHelper.train_test_split(df,
                                              batch_size=window + days,
                                              train_size=kwargs.pop(
                                                  'train_size', 0.85))
    # print(train[0])
    X_train, Y_train = DataHelper.xy_split_2(train, window, days, norm=norm)
    X_test, Y_test = DataHelper.xy_split_2(test, window, days, norm=norm)
    # print(X_train[0])
    # print(Y_train[0])
    # print(X_test[0])
    # print(Y_test[0])
    batch_size = kwargs.pop('batch_size', 128)
    verbose = kwargs.pop('verbose', 0)

    X_train_arr = []
    Y_train_arr = []
    for x in X_train:
        X_train_arr.append(x.values)
    for y in Y_train:
        Y_train_arr.append(y.values)
    X_test_arr = []
    Y_test_arr = []
    for x in X_test:
        X_test_arr.append(x.values)
    for y in Y_test:
        Y_test_arr.append(y.values)

    clear_session()
    model = SequentialModel()
    # https://www.researchgate.net/publication/327967988_Predicting_Stock_Prices_Using_LSTM
    # For analyzing the efficiency of the system  we are used  the
    # Root Mean Square Error(RMSE). The error or the difference between
    # the  target  and  the  obtained  output  value  is minimized by
    # using RMSE value. RMSE is the square root of the mean/average of the
    # square of all of the error. The use of  RMSE  is  highly  common  and
    # it  makes  an  excellent general  purpose  error  metric  for
    # numerical  predictions. Compared  to  the  similar  Mean  Absolute  Error,
    # RMSE amplifies and severely punishes large errors.

    ls = kwargs.pop("layers", [])
    c = kwargs.pop(
        'compile', {
            'loss': root_mean_squared_error,
            'optimizer': 'rmsprop',
            'metrics': ["mae", "acc"]
        })
    if not ls:
        ls.append({'type': 'lstm', 'units': 128})
        ls.append({'type': 'dense'})
    ls[0]['input_shape'] = X_train_arr[0].shape
    ls[-1]['units'] = days

    start = time.time()
    model.build_model(ls, c)

    model.train(np.array(X_train_arr),
                np.array(Y_train_arr),
                callbacks=kwargs.pop('cbs', None),
                train={
                    'epochs': kwargs.pop('epochs', 500),
                    'shuffle': kwargs.pop('shuffle', False),
                    'verbose': verbose,
                    'batch_size': batch_size,
                    'validation_split': kwargs.pop('validation_split', 0.15)
                })

    # history = model.fit(
    #     np.array(X_train_arr),
    #     np.array(Y_train_arr),
    #     epochs=kwargs.pop('epochs', 500),
    #     shuffle=kwargs.pop('shuffle', False),
    #     verbose=verbose,
    #     batch_size=batch_size,
    #     validation_split=kwargs.pop('validation_split', 0.15),
    #     callbacks=cbs)
    if kwargs.pop('summary', True):
        model.model.summary()
    end = time.time()
    return {
        'start': start,
        'end': end,
        'X_test_arr': X_test_arr,
        'Y_test_arr': Y_test_arr,
        'model': model.model,
        'code': code,
        'window': window,
        'days': days,
        'batch_size': batch_size,
        'history': model.history,
        'data': df,
        'X_train': X_train,
        'Y_train': Y_train,
        'X_test': X_test,
        'Y_test': Y_test
    }