def test_num_created_batches_is_0(self, len_array, input_seq_length,
                                   output_seq_length, batch_size):
     """Tests for the ValueError raised if num_created_batches == 0
     """
     with pytest.raises(ValueError):
         make_tf_dataset(
             np.arange(len_array).reshape(-1, 1),
             input_seq_length,
             output_seq_length,
             batch_size,
         )
示例#2
0
def make_predictions(input_model_path):
    check_all_needed_files_exist(input_model_path)

    num_days = 9
    bitcoin = get_last_num_days_hourly_bitcoin_data(num_days)
    bitcoin = bitcoin.price.values.reshape(-1, 1)

    bitcoin = np.log(bitcoin)

    with open(f"{input_model_path}_scaler.pkl", "rb") as f:
        min_max = pickle.load(f)

    bitcoin_preprocessed = min_max.transform(bitcoin)

    bitcoin_ds = make_tf_dataset(
        bitcoin_preprocessed,
        input_seq_length=INPUT_SEQ_LENGTH,
        output_seq_length=0,
        # Only feed in single batches for inference for flexibility
        batch_size=1,
    )

    inference_model = build_LSTM_training(batch_size=1,
                                          timesteps=INPUT_SEQ_LENGTH)
    inference_model.load_weights(f"{input_model_path}_weights.h5")

    preds = inference_model.predict(bitcoin_ds)
    preds = pd.Series(preds.reshape(-1, ), name="preds")

    return preds
 def test_num_created_batches_is_not_0(self, len_array, input_seq_length,
                                       output_seq_length, batch_size):
     ds = make_tf_dataset(
         np.arange(len_array).reshape(-1, 1),
         input_seq_length,
         output_seq_length,
         batch_size,
     )
     num_created_batches = len(list(ds.as_numpy_iterator()))
     assert num_created_batches > 0
    def compare_feature_and_target_shapes(self, array, input_seq_length,
                                          output_seq_length, batch_size):
        expected_feature_shape = (batch_size, input_seq_length, 1)
        expected_target_shape = (batch_size, output_seq_length, 1)

        ds = make_tf_dataset(array, input_seq_length, output_seq_length,
                             batch_size)

        for x in ds.take(1):
            feature, target = x

        assert expected_feature_shape == feature.shape
        assert expected_target_shape == target.shape
    def test_array_less_than_2D(self):
        array_1D = np.arange(DATASET_SIZE)

        with pytest.raises(ValueError) as exec_info:
            make_tf_dataset(array_1D, 5, 5, 5)
    def test_array_greater_than_2D(self):
        array_3D = np.arange(DATASET_SIZE)
        array_3D = array_3D.reshape(-1, 10, 5)

        with pytest.raises(ValueError) as exec_info:
            make_tf_dataset(array_3D, 5, 5, 5)
    def test_input_is_numpy(self, input_array_2D):
        not_a_numpy_array = list(input_array_2D)

        with pytest.raises(TypeError) as exec_info:
            assert make_tf_dataset(not_a_numpy_array)
示例#8
0
def train_model(output_model_name):
    if not output_model_name.endswith(".h5"):
        raise ValueError(
            f"You must pass a model name with a .h5 extension at the end. "
            f"Received: {output_model_name}")

    output_dir = Path("models")
    if not output_dir.exists():
        output_dir.mkdir()

    # Remove .h5 at the end
    output_model_name = output_model_name[:-3]
    bitcoin = load_raw_bitcoin_df()

    # In total we have: ~70% training, 20% val, 10% test
    train, test = temporal_train_test_split(bitcoin, train_size=0.05)
    train, val = temporal_train_test_split(train, train_size=0.77)

    train = np.log(train)
    val = np.log(val)
    test = np.log(test)

    min_max = MinMaxScaler()

    train = min_max.fit_transform(train)
    val = min_max.transform(val)
    test = min_max.transform(test)

    with open(output_dir / f"{output_model_name}_scaler.pkl", "wb") as f:
        pickle.dump(min_max, f)

    train_ds = make_tf_dataset(
        train,
        input_seq_length=INPUT_SEQ_LENGTH,
        output_seq_length=OUTPUT_SEQ_LENGTH,
        batch_size=BATCH_SIZE_TRAINING,
    )
    val_ds = make_tf_dataset(
        val,
        input_seq_length=INPUT_SEQ_LENGTH,
        output_seq_length=OUTPUT_SEQ_LENGTH,
        batch_size=BATCH_SIZE_TRAINING,
    )
    test_ds = make_tf_dataset(
        test,
        input_seq_length=INPUT_SEQ_LENGTH,
        output_seq_length=OUTPUT_SEQ_LENGTH,
        batch_size=1,
    )

    model = build_LSTM_training(batch_size=BATCH_SIZE_TRAINING,
                                timesteps=INPUT_SEQ_LENGTH)

    early_stop_cb = EarlyStopping(patience=10,
                                  restore_best_weights=True,
                                  baseline=None)

    callbacks = [early_stop_cb]

    history = model.fit(
        train_ds,
        epochs=10,
        shuffle=False,
        validation_data=val_ds,
        callbacks=callbacks,
        batch_size=BATCH_SIZE_TRAINING,
    )

    model.save(output_dir / f"{output_model_name}")
    model.save_weights(output_dir / f"{output_model_name}_weights.h5")