def get_split_data_paths(): file_path_train = 'test/data/simple_regression_train.csv' file_path_test = 'test/data/simple_regression_test.csv' full_path_train = os.path.join(str(project_root()), file_path_train) full_path_test = os.path.join(str(project_root()), file_path_test) return full_path_train, full_path_test
def get_cancer_case_data_paths() -> Tuple[str, str]: train_file_path = os.path.join('cases', 'data', 'benchmark', 'cancer_train.csv') test_file_path = os.path.join('cases', 'data', 'benchmark', 'cancer_test.csv') full_train_file_path = os.path.join(str(project_root()), train_file_path) full_test_file_path = os.path.join(str(project_root()), test_file_path) return full_train_file_path, full_test_file_path
def get_scoring_case_data_paths() -> Tuple[str, str]: train_file_path = os.path.join('cases', 'data', 'scoring', 'scoring_train.csv') test_file_path = os.path.join('cases', 'data', 'scoring', 'scoring_test.csv') full_train_file_path = os.path.join(str(project_root()), train_file_path) full_test_file_path = os.path.join(str(project_root()), test_file_path) return full_train_file_path, full_test_file_path
def test_metocean_forecasting_problem(): project_root_path = str(project_root()) file_path_train = os.path.join(project_root_path, 'test/data/simple_time_series.csv') file_path_test = file_path_train full_path_train = os.path.join(str(project_root()), file_path_train) full_path_test = os.path.join(str(project_root()), file_path_test) rmse = run_metocean_forecasting_problem(full_path_train, full_path_test, forecast_length=1, max_window_size=1) assert rmse < 50
def test_credit_scoring_problem(): project_root_path = str(project_root()) file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') file_path_test = file_path_train full_path_train = os.path.join(str(project_root()), file_path_train) full_path_test = os.path.join(str(project_root()), file_path_test) roc_auc_test = run_credit_scoring_problem(full_path_train, full_path_test, max_lead_time=timedelta(minutes=0.1)) assert roc_auc_test > 0.5
def get_scoring_data(): file_path_train = 'cases/data/scoring/scoring_train.csv' full_path_train = join(str(project_root()), file_path_train) # a dataset for a final validation of the composed model file_path_test = 'cases/data/scoring/scoring_test.csv' full_path_test = join(str(project_root()), file_path_test) task = Task(TaskTypesEnum.classification) train = InputData.from_csv(full_path_train, task=task) test = InputData.from_csv(full_path_test, task=task) return train, test
def get_scoring_data(): # the dataset was obtained from https://www.kaggle.com/c/GiveMeSomeCredit # a dataset that will be used as a train and test set during composition file_path_train = 'cases/data/scoring/scoring_train.csv' full_path_train = os.path.join(str(project_root()), file_path_train) # a dataset for a final validation of the composed model file_path_test = 'cases/data/scoring/scoring_test.csv' full_path_test = os.path.join(str(project_root()), file_path_test) return full_path_train, full_path_test
def test_tpot_vs_fedot_example(): project_root_path = str(project_root()) file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') file_path_test = file_path_train auc = run_tpot_vs_fedot_example(file_path_train, file_path_test) assert auc > 0.5
def run_gapfilling_case(file_path): """ The function runs an example of filling in gaps in a time series with air temperature. Real data case. :param file_path: path to the file :return: pandas dataframe with columns 'date','with_gap','ridge', 'composite','temperature' """ # Load dataframe full_path = os.path.join(str(project_root()), file_path) dataframe = pd.read_csv(full_path) dataframe['date'] = pd.to_datetime(dataframe['date']) # Filling in gaps based on inverted ridge regression model ridge_chain = get_simple_chain() ridge_gapfiller = ModelGapFiller(gap_value=-100.0, chain=ridge_chain) with_gap_array = np.array(dataframe['with_gap']) without_gap_arr_ridge = ridge_gapfiller.forward_inverse_filling( with_gap_array) dataframe['ridge'] = without_gap_arr_ridge # Filling in gaps based on a chain of 5 models composite_chain = get_composite_chain() composite_gapfiller = ModelGapFiller(gap_value=-100.0, chain=composite_chain) without_gap_composite = composite_gapfiller.forward_filling(with_gap_array) dataframe['composite'] = without_gap_composite return dataframe
def create_multi_clf_examples_from_excel(file_path: str, return_df: bool = False): """ Return dataframe from excel file or path to the csv file """ df = pd.read_excel(file_path, engine='openpyxl') train, test = split_data(df) file_dir_name = file_path.replace('.', '/').split('/')[-2] file_csv_name = f'{file_dir_name}.csv' directory_names = ['examples', 'data', file_dir_name] # Check does obtained directory exist or not ensure_directory_exists(directory_names) if return_df: # Need to return dataframe and path to the file in csv format path = os.path.join(directory_names[0], directory_names[1], directory_names[2], file_csv_name) full_file_path = os.path.join(str(project_root()), path) save_file_to_csv(df, full_file_path) return df, full_file_path else: # Need to return only paths to the files with train and test data full_train_file_path, full_test_file_path = get_split_data_paths( directory_names) save_file_to_csv(train, full_train_file_path) save_file_to_csv(train, full_test_file_path) return full_train_file_path, full_test_file_path
def test_multiclass_example(): project_root_path = str(project_root()) file_path_train = os.path.join(project_root_path, 'test/data/multiclass_classification.csv') chain = get_model(file_path_train, cur_lead_time=timedelta(seconds=1)) assert chain is not None
def test_exogenous_ts_example(): project_root_path = str(project_root()) path = os.path.join(project_root_path, 'test/data/simple_sea_level.csv') run_exogenous_experiment(path_to_file=path, len_forecast=50, with_exog=True, with_visualisation=False)
def test_lagged_with_invalid_params_fit_correctly(): """ The function define a chain with incorrect parameters in the lagged transformation. During the training of the chain, the parameter 'window_size' is corrected """ window_size = 600 len_forecast = 50 # The length of the time series is 500 elements project_root_path = str(project_root()) file_path = os.path.join(project_root_path, 'test/data/short_time_series.csv') df = pd.read_csv(file_path) time_series = np.array(df['sea_height']) task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=len_forecast)) train_input = InputData(idx=np.arange(0, len(time_series)), features=time_series, target=time_series, task=task, data_type=DataTypesEnum.ts) # Get chain with lagged transformation in it chain = get_ts_chain(window_size) # Fit it chain.fit(train_input) is_chain_was_fitted = True assert is_chain_was_fitted
def get_kc2_data(): file_path = 'cases/data/kc2/kc2.csv' full_path = join(str(project_root()), file_path) task = Task(TaskTypesEnum.classification) data = InputData.from_csv(full_path, task=task) train, test = train_test_data_setup(data) return train, test
def get_cholesterol_data(): file_path = 'cases/data/cholesterol/cholesterol.csv' full_path = join(str(project_root()), file_path) task = Task(TaskTypesEnum.regression) data = InputData.from_csv(full_path, task=task) train, test = train_test_data_setup(data) return train, test
def test_spam_detection_problem(): """ Simple launch of spam detection case """ project_root_path = str(project_root()) file_path_train = os.path.join(project_root_path, 'test/data/spam_detection.csv') # Classification task based on text data run_text_problem_from_saved_meta_file(file_path_train)
def test_evaluate_individuals(): project_root_path = str(project_root()) file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') full_path_train = os.path.join(str(project_root()), file_path_train) task = Task(TaskTypesEnum.classification) dataset_to_compose = InputData.from_csv(full_path_train, task=task) available_model_types, _ = OperationTypesRepository().suitable_operation( task_type=task.task_type) metric_function = ClassificationMetricsEnum.ROCAUC_penalty composer_requirements = GPComposerRequirements( primary=available_model_types, secondary=available_model_types) builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \ with_metrics(metric_function) composer = builder.build() train_data, test_data = train_test_data_setup( dataset_to_compose, sample_split_ration_for_tasks[dataset_to_compose.task.task_type]) metric_function_for_nodes = partial(composer.composer_metric, composer.metrics, train_data, test_data) population = [chain_first(), chain_second(), chain_third(), chain_fourth()] max_lead_time = datetime.timedelta(minutes=0.001) with CompositionTimer(max_lead_time=max_lead_time) as t: evaluate_individuals(individuals_set=population, objective_function=metric_function_for_nodes, is_multi_objective=False, timer=t) assert len(population) == 1 assert population[0].fitness is not None population = [chain_first(), chain_second(), chain_third(), chain_fourth()] max_lead_time = datetime.timedelta(minutes=5) with CompositionTimer(max_lead_time=max_lead_time) as t: evaluate_individuals(individuals_set=population, objective_function=metric_function_for_nodes, is_multi_objective=False, timer=t) assert len(population) == 4 assert all([ind.fitness is not None for ind in population])
def prepare_input_data(train_file_path, test_file_path, forecast_length): """ Function for preparing InputData for train and test algorithm :param train_file_path: path to the csv file for training :param test_file_path: path to the csv file for validation :param forecast_length: forecast length for prediction :return dataset_to_train: InputData for train :return dataset_to_validate: InputData for validation """ # specify the task to solve task_to_solve = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=forecast_length)) # Load train and test dataframes full_path_train = os.path.join(str(project_root()), train_file_path) full_path_test = os.path.join(str(project_root()), test_file_path) df_train = pd.read_csv(full_path_train) df_test = pd.read_csv(full_path_test) # Get idx for train and series for train train_feature_ts = np.ravel(np.array(df_train['wind_speed'])) train_target_ts = np.ravel(np.array(df_train['sea_height'])) idx_train = np.arange(0, len(train_feature_ts)) dataset_to_train = InputData(idx=idx_train, features=train_feature_ts, target=train_target_ts, task=task_to_solve, data_type=DataTypesEnum.ts) start_forecast = len(idx_train) end_forecast = start_forecast + forecast_length idx_test = np.arange(start_forecast, end_forecast) test_target_ts = np.ravel(np.array(df_test['sea_height'])) test_target_ts = test_target_ts[:forecast_length] dataset_to_validate = InputData(idx=idx_test, features=train_feature_ts, target=test_target_ts, task=task_to_solve, data_type=DataTypesEnum.ts) return dataset_to_train, dataset_to_validate
def test_chain_from_automl_example(): project_root_path = str(project_root()) experimental_repo_file = os.path.join('model_repository_with_automl.json') with OperationTypesRepository(experimental_repo_file) as _: file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') file_path_test = file_path_train auc = run_chain_from_automl(file_path_train, file_path_test, max_run_time=timedelta(seconds=1)) assert auc > 0.5
def test_multistep_example(): project_root_path = str(project_root()) path = os.path.join(project_root_path, 'test/data/simple_sea_level.csv') df = pd.read_csv(path) time_series = np.array(df['Level']) run_multistep_example(time_series, len_forecast=20, future_steps=40, vis=False)
def run_metocean_forecasting_problem(train_file_path, test_file_path, forecast_length=1, max_window_size=32, is_visualise=False): # specify the task to solve task_to_solve = Task( TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=forecast_length, max_window_size=max_window_size)) full_path_train = os.path.join(str(project_root()), train_file_path) dataset_to_train = InputData.from_csv(full_path_train, task=task_to_solve, data_type=DataTypesEnum.ts) # a dataset for a final validation of the composed model full_path_test = os.path.join(str(project_root()), test_file_path) dataset_to_validate = InputData.from_csv(full_path_test, task=task_to_solve, data_type=DataTypesEnum.ts) chain_simple = TsForecastingChain(PrimaryNode('linear')) chain_simple.fit(input_data=dataset_to_train, verbose=False) rmse_on_valid_simple = calculate_validation_metric( chain_simple.predict(dataset_to_validate), dataset_to_validate, f'full-simple_{forecast_length}', is_visualise=is_visualise) print(f'RMSE simple: {rmse_on_valid_simple}') chain_composite_lstm = get_composite_chain() chain_composite_lstm.fit(input_data=dataset_to_train, verbose=False) rmse_on_valid_lstm_only = calculate_validation_metric( chain_composite_lstm.predict(dataset_to_validate), dataset_to_validate, f'full-lstm-only_{forecast_length}', is_visualise=is_visualise) print(f'RMSE LSTM composite: {rmse_on_valid_lstm_only}') return rmse_on_valid_simple
def test_forecasting_model_composing_example(): project_root_path = str(project_root()) file_path_train = os.path.join(project_root_path, 'test/data/simple_time_series.csv') file_path_test = os.path.join(project_root_path, 'test/data/simple_time_series_test.csv') rmse = run_metocean_forecasting_problem(file_path_train, file_path_test, max_window_size=1, forecast_length=4, with_visualisation=False) assert rmse > 0
def create_multi_clf_examples_from_excel(file_path: str, return_df: bool = False): df = pd.read_excel(file_path) train, test = split_data(df) file_dir_name = file_path.replace('.', '/').split('/')[-2] file_csv_name = f'{file_dir_name}.csv' directory_names = ['examples', 'data', file_dir_name] ensure_directory_exists(directory_names) if return_df: path = os.path.join(directory_names[0], directory_names[1], directory_names[2], file_csv_name) full_file_path = os.path.join(str(project_root()), path) save_file_to_csv(df, full_file_path) return df, full_file_path else: full_train_file_path, full_test_file_path = get_split_data_paths( directory_names) save_file_to_csv(train, full_train_file_path) save_file_to_csv(train, full_test_file_path) return full_train_file_path, full_test_file_path
def test_river_levels_problem(): # Initialise chain for river levels prediction node_encoder = PrimaryNode('one_hot_encoding') node_scaling = SecondaryNode('scaling', nodes_from=[node_encoder]) node_ridge = SecondaryNode('ridge', nodes_from=[node_scaling]) node_lasso = SecondaryNode('lasso', nodes_from=[node_scaling]) node_final = SecondaryNode('rfr', nodes_from=[node_ridge, node_lasso]) init_chain = Chain(node_final) project_root_path = str(project_root()) file_path_train = os.path.join(project_root_path, 'test/data/station_levels.csv') run_river_experiment(file_path=file_path_train, chain=init_chain, iterations=1, tuner=ChainTuner, tuner_iterations=10) is_experiment_finished = True assert is_experiment_finished
def run_metocean_forecasting_problem(train_file_path, test_file_path, forecast_length=1, max_window_size=64, with_visualisation=True): # specify the task to solve task_to_solve = Task( TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=forecast_length, max_window_size=max_window_size, return_all_steps=False)) full_path_train = os.path.join(str(project_root()), train_file_path) dataset_to_train = InputData.from_csv(full_path_train, task=task_to_solve, data_type=DataTypesEnum.ts) # a dataset for a final validation of the composed model full_path_test = os.path.join(str(project_root()), test_file_path) dataset_to_validate = InputData.from_csv(full_path_test, task=task_to_solve, data_type=DataTypesEnum.ts) metric_function = MetricsRepository().metric_by_id( RegressionMetricsEnum.RMSE) time_limit_min = 10 available_model_types = [ 'linear', 'ridge', 'lasso', 'rfr', 'dtreg', 'knnreg', 'svr' ] if max_window_size == 1: # unit test model available_model_types = ['linear', 'ridge'] time_limit_min = 0.001 # each possible single-model chain for model in available_model_types: chain = TsForecastingChain(PrimaryNode(model)) chain.fit(input_data=dataset_to_train, verbose=False) calculate_validation_metric(chain.predict(dataset_to_validate), dataset_to_validate, is_visualise=with_visualisation, label=model) # static multiscale chain multiscale_chain = get_composite_multiscale_chain() multiscale_chain.fit(input_data=dataset_to_train, verbose=False) calculate_validation_metric(multiscale_chain.predict(dataset_to_validate), dataset_to_validate, is_visualise=with_visualisation, label='Fixed multiscale') # static all-in-one ensemble chain ens_chain = get_ensemble_chain() ens_chain.fit(input_data=dataset_to_train, verbose=False) calculate_validation_metric(ens_chain.predict(dataset_to_validate), dataset_to_validate, is_visualise=with_visualisation, label='Ensemble composite') # optimized ensemble chain composer_requirements = GPComposerRequirements( primary=available_model_types, secondary=available_model_types, max_arity=5, max_depth=2, pop_size=10, num_of_generations=10, crossover_prob=0.8, mutation_prob=0.8, max_lead_time=datetime.timedelta(minutes=time_limit_min), add_single_model_chains=False) builder = GPComposerBuilder(task=task_to_solve).with_requirements( composer_requirements).with_metrics(metric_function) composer = builder.build() chain = composer.compose_chain(data=dataset_to_train, is_visualise=False) chain.fit_from_scratch(input_data=dataset_to_train, verbose=False) if with_visualisation: ComposerVisualiser.visualise(chain) calculate_validation_metric(chain.predict(dataset_to_validate), dataset_to_validate, is_visualise=with_visualisation, label='Automated ensemble') # optimized multiscale chain available_model_types_primary = ['trend_data_model', 'residual_data_model'] available_model_types_secondary = [ 'linear', 'ridge', 'lasso', 'rfr', 'dtreg', 'knnreg', 'svr' ] available_model_types_all = available_model_types_primary + available_model_types_secondary composer_requirements = GPComposerRequirements( primary=available_model_types_all, secondary=available_model_types_secondary, max_arity=5, max_depth=2, pop_size=10, num_of_generations=30, crossover_prob=0.8, mutation_prob=0.8, max_lead_time=datetime.timedelta(minutes=time_limit_min)) builder = GPComposerBuilder(task=task_to_solve).with_requirements( composer_requirements).with_metrics( metric_function).with_initial_chain(multiscale_chain) composer = builder.build() chain = composer.compose_chain(data=dataset_to_train, is_visualise=False) chain.fit_from_scratch(input_data=dataset_to_train, verbose=False) if with_visualisation: visualiser = ChainVisualiser() visualiser.visualise(chain) rmse_on_valid = calculate_validation_metric( chain.predict(dataset_to_validate), dataset_to_validate, is_visualise=with_visualisation, label='Automated multiscale') return rmse_on_valid
class ComposerVisualiser: root_parent_path = os.path.join('../', str(project_root())) root_parent_path_dirname = os.path.dirname(root_parent_path) temp_path = os.path.join(root_parent_path_dirname, 'tmp/') if 'tmp' not in os.listdir(root_parent_path_dirname): os.mkdir(temp_path) gif_prefix = 'for_gif_' @staticmethod def visualise(chain: Chain): try: chain.sort_nodes() graph, node_labels = as_nx_graph(chain=chain) pos = node_positions(graph.to_undirected()) plt.figure(figsize=(10, 16)) nx.draw(graph, pos=pos, with_labels=True, labels=node_labels, font_size=12, font_family='calibri', font_weight='bold', node_size=7000, width=2.0, node_color=colors_by_node_labels(node_labels), cmap='Set3') plt.show() except Exception as ex: print(f'Visualisation failed with {ex}') @staticmethod def _visualise_chains(chains, fitnesses): fitnesses = deepcopy(fitnesses) last_best_chain = chains[0] prev_fit = fitnesses[0] for ch_id, chain in enumerate(chains): graph, node_labels = as_nx_graph(chain=chain) pos = node_positions(graph.to_undirected()) plt.rcParams['axes.titlesize'] = 20 plt.rcParams['axes.labelsize'] = 20 plt.rcParams['figure.figsize'] = [10, 10] plt.title('Current chain') nx.draw(graph, pos=pos, with_labels=True, labels=node_labels, font_size=12, font_family='calibri', font_weight='bold', node_size=scaled_node_size(chain.length), width=2.0, node_color=colors_by_node_labels(node_labels), cmap='Set3') path = f'{ComposerVisualiser.temp_path}ch_{ch_id}.png' plt.savefig(path, bbox_inches='tight') plt.cla() plt.clf() plt.close('all') path_best = f'{ComposerVisualiser.temp_path}best_ch_{ch_id}.png' if fitnesses[ch_id] > prev_fit: fitnesses[ch_id] = prev_fit else: last_best_chain = chain prev_fit = fitnesses[ch_id] best_graph, best_node_labels = as_nx_graph(chain=last_best_chain) pos = node_positions(best_graph.to_undirected()) plt.rcParams['axes.titlesize'] = 20 plt.rcParams['axes.labelsize'] = 20 plt.rcParams['figure.figsize'] = [10, 10] plt.title(f'Best chain after {round(ch_id)} evals') nx.draw(best_graph, pos=pos, with_labels=True, labels=best_node_labels, font_size=12, font_family='calibri', font_weight='bold', node_size=scaled_node_size(chain.length), width=2.0, node_color=colors_by_node_labels(best_node_labels), cmap='Set3') plt.savefig(path_best, bbox_inches='tight') plt.cla() plt.clf() plt.close('all') @staticmethod def _visualise_convergence(fitness_history): fitness_history = deepcopy(fitness_history) prev_fit = fitness_history[0] for fit_id, fit in enumerate(fitness_history): if fit > prev_fit: fitness_history[fit_id] = prev_fit prev_fit = fitness_history[fit_id] ts_set = list(range(len(fitness_history))) df = pd.DataFrame({ 'ts': ts_set, 'fitness': [-f for f in fitness_history] }) ind = 0 for ts in ts_set: plt.rcParams['axes.titlesize'] = 20 plt.rcParams['axes.labelsize'] = 20 plt.rcParams['figure.figsize'] = [10, 10] ind = ind + 1 plt.plot(df['ts'], df['fitness'], label='Composer') plt.xlabel('Evaluation', fontsize=18) plt.ylabel('Best ROC AUC', fontsize=18) plt.axvline(x=ts, color='black') plt.legend(loc='upper left') path = f'{ComposerVisualiser.temp_path}{ind}.png' plt.savefig(path, bbox_inches='tight') plt.cla() plt.clf() plt.close('all') @staticmethod def visualise_history(chains, fitnesses): print('START VISUALISATION') try: ComposerVisualiser._clean(with_gif=True) ComposerVisualiser._visualise_chains(chains, fitnesses) ComposerVisualiser._visualise_convergence(fitnesses) ComposerVisualiser._merge_images(len(chains)) ComposerVisualiser._combine_gifs() ComposerVisualiser._clean() except Exception as ex: print(f'Visualisation failed with {ex}') @staticmethod def _merge_images(num_images): for img_idx in (range(1, num_images)): images = list( map(Image.open, [ f'{ComposerVisualiser.temp_path}ch_{img_idx}.png', f'{ComposerVisualiser.temp_path}best_ch_{img_idx}.png', f'{ComposerVisualiser.temp_path}{img_idx}.png' ])) widths, heights = zip(*(i.size for i in images)) total_width = sum(widths) max_height = max(heights) new_im = Image.new('RGB', (total_width, max_height)) x_offset = 0 for im in images: new_im.paste(im, (x_offset, 0)) x_offset += im.size[0] new_im.save( f'{ComposerVisualiser.temp_path}{ComposerVisualiser.gif_prefix}{img_idx}.png' ) @staticmethod def _combine_gifs(): files = [ file_name for file_name in iglob( f'{ComposerVisualiser.temp_path}{ComposerVisualiser.gif_prefix}*.png' ) ] files_idx = [ int(file_name[len( f'{ComposerVisualiser.temp_path}{ComposerVisualiser.gif_prefix}' ):(len(file_name) - len('.png'))]) for file_name in iglob( f'{ComposerVisualiser.temp_path}{ComposerVisualiser.gif_prefix}*.png' ) ] files = [file for _, file in sorted(zip(files_idx, files))] with get_writer( f'{ComposerVisualiser.temp_path}final_{str(time())}.gif', mode='I', duration=0.5) as writer: for filename in files: image = imread(filename) writer.append_data(image) @staticmethod def _clean(with_gif=False): try: files = glob(f'{ComposerVisualiser.temp_path}*.png') if with_gif: files += glob(f'{ComposerVisualiser.temp_path}*.gif') for file in files: remove(file) except Exception as ex: print(ex)
print(f'RMSE simple: {rmse_on_valid_simple}') chain_composite_lstm = get_composite_chain() chain_composite_lstm.fit(input_data=dataset_to_train, verbose=False) rmse_on_valid_lstm_only = calculate_validation_metric( chain_composite_lstm.predict(dataset_to_validate), dataset_to_validate, f'full-lstm-only_{forecast_length}', is_visualise=is_visualise) print(f'RMSE LSTM composite: {rmse_on_valid_lstm_only}') return rmse_on_valid_simple if __name__ == '__main__': # the dataset was obtained from NEMO model simulation for sea surface height # a dataset that will be used as a train and test set during composition file_path_train = 'cases/data/metocean/metocean_data_train.csv' full_path_train = os.path.join(str(project_root()), file_path_train) # a dataset for a final validation of the composed model file_path_test = 'cases/data/metocean/metocean_data_test.csv' full_path_test = os.path.join(str(project_root()), file_path_test) run_metocean_forecasting_problem(full_path_train, full_path_test, forecast_length=72, max_window_size=72, is_visualise=True)