def fitness_func(chromosome: Chromosome) -> float: fitness = 0.0 create_tf_session() params = chromosome.values print("Current Chromosome Params:") print(params) model = Model(train_x, train_y, validation_x, validation_y, preprocessor.get_seq_info_str(), architecture=Architecture.GRU.value, is_bidirectional=False, batch_size=round(params["batch_size"]), hidden_layers=round(params["hidden_layers"]), neurons_per_layer=round(params["neurons_per_layer"]), dropout=params["dropout"], initial_learn_rate=params["initial_learn_rate"], is_classification=IS_CLASSIFICATION) model.train() if IS_CLASSIFICATION: fitness = -model.score["sparse_categorical_crossentropy"] chromosome.other["sparse_categorical_crossentropy"] = model.score[ "sparse_categorical_crossentropy"] chromosome.other["accuracy"] = model.score["accuracy"] else: fitness = model.score["RSquaredMetric"] chromosome.other["mae"] = model.score["mae"] ## Cleanup del model K.clear_session() return fitness
def train_model(): preprocessor = DataPreprocesser( f"{os.environ['WORKSPACE']}/data/crypto/{SYMBOL_TO_PREDICT}.parquet", col_names=["open", "high", "low", "close", "volume"], forecast_col_name="close", sequence_length=100, forecast_period=10, is_classification=IS_CLASSIFICATION) if not preprocessor.has_loaded and SHOULD_USE_INDICATORS: indicator_df = get_select_indicator_values(preprocessor.df_original) preprocessor.change_data(indicator_df) preprocessor.print_df() preprocessor.print_df_no_std() preprocessor.preprocess() train_x, train_y = preprocessor.get_train() validation_x, validation_y = preprocessor.get_validation() # Best hyperparams from GA: # batch_size=1534, # hidden_layers=2, # neurons_per_layer=60, # dropout=0.4714171367290059, # initial_learn_rate=0.003725545984696872, model = Model(train_x, train_y, validation_x, validation_y, preprocessor.get_seq_info_str(), architecture=Architecture.GRU.value, is_bidirectional=False, batch_size=1534, hidden_layers=2, neurons_per_layer=60, dropout=0.4164892976819227, initial_learn_rate=0.004587161725770879, early_stop_patience=10, max_epochs=100, is_classification=IS_CLASSIFICATION) preprocessor.print_dataset_totals() del preprocessor # Save memory model.train() model.save_model()
def multi_test(SYMBOL_TO_PREDICT: str): params = { "architecture": Architecture.GRU.value, "is_bidirectional": False, "indicators": False, "sequence_length": 200, "forecast_period": 10, } tests = [ # Format: # (num_repetitions, {"param to change": new_value}) (5, {"architecture": Architecture.LSTM.value, "is_bidirectional": False,}), (5, {"architecture": Architecture.GRU.value, "is_bidirectional": False,}), (5, {"architecture": Architecture.LSTM.value, "is_bidirectional": True,}), (5, {"architecture": Architecture.GRU.value, "is_bidirectional": True,}), (5, {"indicators": False}), (5, {"indicators": True}), (1, {"sequence_length": 50}), (1, {"sequence_length": 100}), (1, {"sequence_length": 150}), (1, {"sequence_length": 200}), (1, {"sequence_length": 250}), (1, {"sequence_length": 300}), (1, {"sequence_length": 350}), (1, {"sequence_length": 400}), (1, {"forecast_period": 1}), (1, {"forecast_period": 5}), (1, {"forecast_period": 10}), (1, {"forecast_period": 20}), (1, {"forecast_period": 30}), (1, {"forecast_period": 50}), ] preprocessor = DataPreprocesser( f"{os.environ['WORKSPACE']}/data/crypto/{SYMBOL_TO_PREDICT}.parquet", col_names=["open", "high", "low", "close", "volume"], forecast_col_name="close", forecast_period=params["forecast_period"], sequence_length=params["sequence_length"], should_ask_load=False, is_classification=IS_CLASSIFICATION ) preprocessor.preprocess() folder = f"{os.environ['WORKSPACE']}/results/tests" for test_num, test in enumerate(tests): test_num = test_num num_repeats = test[0] additional_params = test[1] new_params = {**params, **additional_params} pre = None try: pre = preprocessor except: pass if "sequence_length" in additional_params or "forecast_period" in additional_params or "indicators" in additional_params: try: del preprocessor del pre except: pass pre = DataPreprocesser( f"{os.environ['WORKSPACE']}/data/crypto/{SYMBOL_TO_PREDICT}.parquet", col_names=["open", "high", "low", "close", "volume"], forecast_col_name="close", forecast_period=new_params["forecast_period"], sequence_length=new_params["sequence_length"], should_ask_load=False, is_classification=IS_CLASSIFICATION ) pre.preprocess() if new_params["indicators"]: indicator_df = get_select_indicator_values(pre.df_original) pre.change_data(indicator_df) pre.print_df() pre.print_df_no_std() for i in range(num_repeats): create_tf_session() print(f"Test {test_num} repeat {i}") print("Params") print(new_params) train_x, train_y = pre.get_train() validation_x, validation_y = pre.get_validation() model = Model( train_x, train_y, validation_x, validation_y, pre.get_seq_info_str(), architecture=new_params["architecture"], is_bidirectional=new_params["is_bidirectional"], batch_size=1024, hidden_layers=2, neurons_per_layer=100, dropout=0.2, initial_learn_rate=0.001, is_classification=IS_CLASSIFICATION ) model.train() train_time = float(model.train_time) / 60.0 # In Minutes file_path = f"{folder}/test{test_num}.csv" if not os.path.exists(file_path): with open(file_path, 'a') as file: if IS_CLASSIFICATION: file.write("Accuracy,Sparse Categorical Crossentropy,Train Time\n") else: file.write("R Square,MAE,Train Time\n") with open(file_path, 'a') as file: if IS_CLASSIFICATION: file.write(f"{model.score['accuracy']},{model.score['sparse_categorical_crossentropy']},{train_time}\n") else: file.write(f"{model.score['RSquaredMetric']},{model.score['mae']},{train_time}\n") ## Cleanup del model K.clear_session()