Exemplo n.º 1
0
 def transform(self, X):
     fe = make_features(X, self.target_col, self.moving_averages)
     if self.production:
         X = series_to_predict_matrix(fe, n_in=self.Tx, dropnan=True)
         return self._reshape(X)
     else:
         X, y = data_to_supervised(fe, self.Tx, self.Ty)
         return self._reshape(X), y
Exemplo n.º 2
0
def main():
    print('Making features from raw data...')

    data_dir = join(get_project_path(), 'data', 'raw')
    output_dir = join(get_project_path(), 'data', 'processed')
    makedirs(output_dir, exist_ok=True)

    coins = ['BTC', 'ETH']
    TARGET = 'close'
    Tx = 72
    Ty = 1
    TEST_SIZE = 0.05

    for SYM in coins:
        raw_data_path = join(data_dir, SYM + '.csv')
        print('Featurizing raw {} data from {}...'.format(SYM, raw_data_path))

        raw_df = pd.read_csv(raw_data_path, index_col=0)

        feature_df = make_features(
            raw_df,
            target_col=TARGET,
            keep_cols=['close', 'high', 'low', 'volumeto', 'volumefrom'],
            ma_lags=[6, 12, 24, 48],
            ma_cols=['close', 'volumefrom', 'volumeto'])

        X, y = data_to_supervised(feature_df, target_ix=-1, Tx=Tx, Ty=Ty)

        num_features = int(X.shape[1] / Tx)
        X = make_3d(X, tx=Tx, num_channels=num_features)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=TEST_SIZE, shuffle=False)

        np.save(arr=X_train, file=join(output_dir, 'X_train_{}'.format(SYM)))
        np.save(arr=X_test, file=join(output_dir, 'X_test_{}'.format(SYM)))
        np.save(arr=y_train, file=join(output_dir, 'y_train_{}'.format(SYM)))
        np.save(arr=y_test, file=join(output_dir, 'y_test_{}'.format(SYM)))
Exemplo n.º 3
0
def predict(coin=None):
    coin = coin or 'BTC'
    Tx = 72
    target = 'close'

    cryptocompare_data = retrieve_all_data(coin, Tx + 1 + 48)
    preprocessed_data = make_features(cryptocompare_data, target_col=target,
                                      keep_cols=['close', 'high', 'low', 'volumeto', 'volumefrom'],
                                      ma_lags=[6, 12, 24, 48], ma_cols=['close', 'volumefrom', 'volumeto'])
    time_series_data = series_to_predict_matrix(preprocessed_data, Tx)
    n_features = int(time_series_data.shape[1]/Tx)
    model_input_data = make_3d(arr=time_series_data, tx=Tx, num_channels=n_features)

    if coin == 'ETH':
        model = eth_model
    elif coin == 'BTC':
        model = btc_model
    else:
        abort(404)  # FIXME: More descriptive error

    with model.graph.as_default():
        prediction = model.estimator.predict(model_input_data)

    def parse_keras_prediction(keras_prediction):
        """Handles multi-output Keras models"""
        if len(keras_prediction) == 2:
            return keras_prediction[1][0]
        else:
            return keras_prediction[0]
    parsed_prediction = parse_keras_prediction(prediction)

    last_value = cryptocompare_data[target].iloc[-1]
    last_time = cryptocompare_data['timestamp'].iloc[-1]

    prediction_val = [last_value + pred/100*last_value for pred in parsed_prediction]
    time_val = [last_time + pd.Timedelta(hours=ix + 1) for ix in range(len(parsed_prediction))]
    return dict(prediction=prediction_val, time=time_val)
Exemplo n.º 4
0
"""
data_train, data_test = train_test_split(data,
                                         test_size=TEST_SIZE,
                                         shuffle=False)
data_train = data_train.dropna()
data_test = data_test.dropna().iloc[:-1]

p(data_train.shape, data_test.shape)
data_test.head()

# In[3]:
"""
Get features.
"""
feature_data_train = make_features(input_df=data_train,
                                   target_col='close',
                                   ma_cols=['volumeto', 'volumefrom'],
                                   ma_lags=[3, 6, 12])
feature_data_test = make_features(input_df=data_test,
                                  target_col='close',
                                  ma_cols=['volumeto', 'volumefrom'],
                                  ma_lags=[3, 6, 12])

feature_data_train.dropna(inplace=True)
feature_data_test.dropna(inplace=True)

feature_data_test.head()

# In[4]:
"""
Apply DWT Smooth.
"""
Exemplo n.º 5
0
SYM = 'BTC'
TARGET = 'close'
Tx = 72
Ty = 1
TEST_SIZE = 0.05

data_path = os.path.join(get_project_path(), 'data', 'raw', SYM + '.csv')
data = pd.read_csv(data_path, index_col=0)
data.head()

# In[3]:
"""
Get percent change feature and target data.
"""
df = make_features(input_df=data, target_col='close', moving_average_lags=[])
X, y = data_to_supervised(input_df=df, Tx=Tx, Ty=Ty)
p(X.shape, y.shape)
X.head()

# In[4]:
"""
Confirm data reshape and target/feature creation was done correctly.
"""
y_values_except_last = np.squeeze(y.iloc[:-1].values)
t_minus_1_x_values_except_first = X.iloc[1:, -1].values

y_values_except_last.all() == t_minus_1_x_values_except_first.all()

# In[5]:
"""