def test_num_created_batches_is_0(self, len_array, input_seq_length, output_seq_length, batch_size): """Tests for the ValueError raised if num_created_batches == 0 """ with pytest.raises(ValueError): make_tf_dataset( np.arange(len_array).reshape(-1, 1), input_seq_length, output_seq_length, batch_size, )
def make_predictions(input_model_path): check_all_needed_files_exist(input_model_path) num_days = 9 bitcoin = get_last_num_days_hourly_bitcoin_data(num_days) bitcoin = bitcoin.price.values.reshape(-1, 1) bitcoin = np.log(bitcoin) with open(f"{input_model_path}_scaler.pkl", "rb") as f: min_max = pickle.load(f) bitcoin_preprocessed = min_max.transform(bitcoin) bitcoin_ds = make_tf_dataset( bitcoin_preprocessed, input_seq_length=INPUT_SEQ_LENGTH, output_seq_length=0, # Only feed in single batches for inference for flexibility batch_size=1, ) inference_model = build_LSTM_training(batch_size=1, timesteps=INPUT_SEQ_LENGTH) inference_model.load_weights(f"{input_model_path}_weights.h5") preds = inference_model.predict(bitcoin_ds) preds = pd.Series(preds.reshape(-1, ), name="preds") return preds
def test_num_created_batches_is_not_0(self, len_array, input_seq_length, output_seq_length, batch_size): ds = make_tf_dataset( np.arange(len_array).reshape(-1, 1), input_seq_length, output_seq_length, batch_size, ) num_created_batches = len(list(ds.as_numpy_iterator())) assert num_created_batches > 0
def compare_feature_and_target_shapes(self, array, input_seq_length, output_seq_length, batch_size): expected_feature_shape = (batch_size, input_seq_length, 1) expected_target_shape = (batch_size, output_seq_length, 1) ds = make_tf_dataset(array, input_seq_length, output_seq_length, batch_size) for x in ds.take(1): feature, target = x assert expected_feature_shape == feature.shape assert expected_target_shape == target.shape
def test_array_less_than_2D(self): array_1D = np.arange(DATASET_SIZE) with pytest.raises(ValueError) as exec_info: make_tf_dataset(array_1D, 5, 5, 5)
def test_array_greater_than_2D(self): array_3D = np.arange(DATASET_SIZE) array_3D = array_3D.reshape(-1, 10, 5) with pytest.raises(ValueError) as exec_info: make_tf_dataset(array_3D, 5, 5, 5)
def test_input_is_numpy(self, input_array_2D): not_a_numpy_array = list(input_array_2D) with pytest.raises(TypeError) as exec_info: assert make_tf_dataset(not_a_numpy_array)
def train_model(output_model_name): if not output_model_name.endswith(".h5"): raise ValueError( f"You must pass a model name with a .h5 extension at the end. " f"Received: {output_model_name}") output_dir = Path("models") if not output_dir.exists(): output_dir.mkdir() # Remove .h5 at the end output_model_name = output_model_name[:-3] bitcoin = load_raw_bitcoin_df() # In total we have: ~70% training, 20% val, 10% test train, test = temporal_train_test_split(bitcoin, train_size=0.05) train, val = temporal_train_test_split(train, train_size=0.77) train = np.log(train) val = np.log(val) test = np.log(test) min_max = MinMaxScaler() train = min_max.fit_transform(train) val = min_max.transform(val) test = min_max.transform(test) with open(output_dir / f"{output_model_name}_scaler.pkl", "wb") as f: pickle.dump(min_max, f) train_ds = make_tf_dataset( train, input_seq_length=INPUT_SEQ_LENGTH, output_seq_length=OUTPUT_SEQ_LENGTH, batch_size=BATCH_SIZE_TRAINING, ) val_ds = make_tf_dataset( val, input_seq_length=INPUT_SEQ_LENGTH, output_seq_length=OUTPUT_SEQ_LENGTH, batch_size=BATCH_SIZE_TRAINING, ) test_ds = make_tf_dataset( test, input_seq_length=INPUT_SEQ_LENGTH, output_seq_length=OUTPUT_SEQ_LENGTH, batch_size=1, ) model = build_LSTM_training(batch_size=BATCH_SIZE_TRAINING, timesteps=INPUT_SEQ_LENGTH) early_stop_cb = EarlyStopping(patience=10, restore_best_weights=True, baseline=None) callbacks = [early_stop_cb] history = model.fit( train_ds, epochs=10, shuffle=False, validation_data=val_ds, callbacks=callbacks, batch_size=BATCH_SIZE_TRAINING, ) model.save(output_dir / f"{output_model_name}") model.save_weights(output_dir / f"{output_model_name}_weights.h5")