def _plot_ita(self, first_half: np.ndarray, second_half: np.ndarray, time_series_min, time_series_max): """ Plotting of the Innovative Trend Analysis results. It generates a plot that is saved as png in the folder 'results/ita/'. If the flag 'plot' is True, the plot will be shown in the screen apart of being saved in the folder. :param first_half: first half of the time series values :param second_half: second half of the time series values :param time_series_min: minimum value of the time series :param time_series_max: maximum value of the time series """ plt.figure() plt.scatter(first_half, second_half, label='data', color='red', s=2) plt.title('Innovative Trend Analysis') plt.xlabel('First half of the series') plt.xlim(time_series_min, time_series_max) plt.ylabel('Second half of the series') plt.ylim(time_series_min, time_series_max) # No trend line x_no_trend = np.array([time_series_min, time_series_max]) y_no_trend = 0 + 1 * x_no_trend plt.plot(x_no_trend, y_no_trend, label='(1:1) No trend line', color='black', linewidth=0.75, linestyle='-') plt.legend() # Save file with timestamp of the execution timestamp = generate_timestamp() if self.file_id is not None: file_id = "{0}_{1}".format(self.file_id, timestamp) else: file_id = timestamp plt.savefig("{0}/{1}_{2}.{3}".format(self.save_path, self.save_name, file_id, self.save_format)) if self.plot: plt.show() plt.close()
def trend_detection_comparison(methods_list: List[Method], file_prefix: str, folder: str = GENERATED_DIR) -> pd.DataFrame: """ Search for all the files in the folder, then perform the detection with the methods included in the list `methods_list` and generates a table with the return of every method. The table is saved in the `RESULTS_DIR` and also returned by the method. :param methods_list: list with the methods that will perform the detection :param file_prefix: prefix of the files that will be used :param folder: folder that will contain the files. Default: data/generated_data :return: table with the results of the different methods and results """ files = list() results = dict() columns = [method.name for method in methods_list] timestamp = generate_timestamp() for file_path in glob.glob(f'{folder}/{file_prefix}*'): files.append(file_path) files.sort() # sort by file name for file in files: file_results = list() name = get_name_from_path(file) x, y, trend, seasonality, noise = read_generated_csv(file) for method in methods_list: result = method.detect_trend(x, y) file_results.append(result) results[name] = file_results table = pd.DataFrame(results, columns) table.to_csv( f'{RESULTS_DIR}/trend_detection_{file_prefix}_{timestamp}.csv') return table
def main(cfg: Config) -> None: """Main script.""" # Create a submdir within the output dir named with a timestamp output_dir = Path(to_absolute_path(cfg.output_dir)) run_dir = output_dir / generate_timestamp() run_dir.mkdir(parents=True) # Set all seeds for reproducibility if cfg.seed is not None: pl.seed_everything(seed=cfg.seed) # ------------------------ # 1 INIT DATAMODULE # ------------------------ dm = AcreCascadeDataModule( data_dir=Path(to_absolute_path(cfg.data_dir)), train_batch_size=cfg.train_batch_size, val_batch_size=cfg.val_batch_size, val_pcnt=cfg.val_pcnt, num_workers=cfg.num_workers, download=cfg.download, teams=None if cfg.teams is None else [team.name for team in cfg.teams ], # type: ignore test_teams=None if cfg.test_teams is None else [team.name for team in cfg.test_teams], # type: ignore crop=None if cfg.crop is None else cfg.crop.name, # type: ignore ) # ------------------------ # 2 INIT LIGHTNING MODEL # ------------------------ loss_fn = MultiLoss({ CrossEntropyLoss(): cfg.xent_weight, DiceLoss(): cfg.dice_weight }) model = UNetSegModel( num_classes=dm.num_classes, num_layers=cfg.num_layers, features_start=cfg.features_start, lr=cfg.lr, bilinear=cfg.bilinear, loss_fn=loss_fn, T_max=cfg.T_max, ) # ------------------------ # 3 SET LOGGER # ------------------------ logger = WandbLogger(config=OmegaConf.to_container(cfg, resolve=True, enum_to_str=True), offline=cfg.log_offline) # ------------------------ # 4 INIT TRAINER # ------------------------ trainer = pl.Trainer( gpus=cfg.gpus, logger=logger, max_epochs=cfg.epochs, precision=16 if cfg.use_amp else 32, log_every_n_steps=1, ) # ------------------------ # 6 START TRAINING # ------------------------ trainer.fit(model=model, datamodule=dm) # ------------------------ # 7 START TESTING # ------------------------ trainer.test(model=model, datamodule=dm) # ------------------------ # 8 SAVE THE SUBMISSION # ------------------------ submission_fp = run_dir / "submission.json" with open(submission_fp, "w") as f: json.dump(model.submission, f) LOGGER.info(f"Submission saved to {submission_fp.resolve()}")
def experiment( data_dir: Path = typer.Option("data", "--data-dir", "-d"), output_dir: Path = typer.Option("output", "--output", "-o"), train_batch_size: int = typer.Option(16, "--train-batch-size"), val_batch_size: int = typer.Option(32, "--val-batch-size"), val_pcnt: float = typer.Option(0.2, "--val-pcnt"), num_workers: int = typer.Option(4, "--num-workers"), lr: float = typer.Option(1.0e-3, "--learning-rate", "-lr"), num_layers: int = typer.Option("--num-layers"), features_start: int = typer.Option("--features-start"), bilinear: bool = typer.Option(False, "--bilinear"), log_to_wandb: bool = typer.Option(False, "--log-to-wandb"), gpus: int = typer.Option(0, "--gpus"), epochs: int = typer.Option(100, "--epochs"), use_amp: bool = typer.Option(False, "--use-amp"), seed: Optional[int] = typer.Option(47, "--seed"), download: bool = typer.Option(False, "--download", "-dl"), ) -> None: """Main script.""" # Create a submdir within the output dir named with a timestamp run_dir = output_dir / generate_timestamp() run_dir.mkdir(parents=True) # Set all seeds for reproducibility if seed is not None: pl.seed_everything(seed=seed) # ------------------------ # 1 INIT DATAMODULE # ------------------------ dm = AcreCascadeDataModule( data_dir=data_dir, train_batch_size=train_batch_size, val_batch_size=val_batch_size, val_pcnt=val_pcnt, num_workers=num_workers, download=download, teams=["Roseau"], crop="Haricot", ) # ------------------------ # 2 INIT LIGHTNING MODEL # ------------------------ model = UNetSegModel( num_classes=dm.num_classes, num_layers=num_layers, features_start=features_start, lr=lr, bilinear=bilinear, ) # ------------------------ # 3 SET LOGGER # ------------------------ logger: Union[bool, WandbLogger] = False if log_to_wandb: logger = WandbLogger() # optional: log model topology logger.watch(model.net) # ------------------------ # 4 INIT TRAINER # ------------------------ trainer = pl.Trainer( gpus=gpus, logger=logger, max_epochs=epochs, precision=16 if use_amp else 32, ) # ------------------------ # 6 START TRAINING # ------------------------ trainer.fit(model=model, datamodule=dm) # ------------------------ # 7 START TESTING # ------------------------ trainer.test(model=model, datamodule=dm) # ------------------------ # 8 SAVE THE SUBMISSION # ------------------------ submission_fp = run_dir / "submission.json" with open(submission_fp, "w") as f: json.dump(model.submission, f) typer.echo(f"Submission saved to {submission_fp.resolve()}")
def trend_estimation_comparison(methods_list: List[Method], file_prefix: str, folder: str = GENERATED_DIR) -> pd.DataFrame: """ Search for all the files in the folder, then perform the estimation with the methods included in the list `methods_list` and generates a table with the distance between the estimation and the ground truth. The method takes the mean between the number of executions to compare with the ground truth. It also generates the plots with the ground truth and the results of all the methods for the file. The table is saved in the `RESULTS_DIR` and also returned by the method. :param methods_list: list with the methods that will perform the detection :param file_prefix: prefix of the files that will be used :param folder: folder that will contain the files. Default: data/generated_data :param test_num: number of executions per method :return: table with the results of the different methods and results """ files = list() results = dict() columns = [method.name for method in methods_list] timestamp = generate_timestamp() for file_path in glob.glob(f'{folder}/{file_prefix}*'): files.append(file_path) files.sort() # sort by file name counter = 0. for file in files: counter += 1. print('progress: ', counter / len(files)) file_results = list() name = get_name_from_path(file) params = configparser.ConfigParser(allow_no_value=True) params.read(f'{SYNTHETIC_DIR}/{name}.ini') x, y, trend, seasonality, noise = read_generated_csv(file) if params.has_option(TREND_DATA, FUNC): trend_title = params[TREND_DATA][FUNC] else: trend_title = 'Real data' if params.has_option(SEASONALITY_DATA, FUNC): seasonality_title = params[SEASONALITY_DATA][FUNC] else: seasonality_title = 'No seasonality' if params.has_option(NOISE_DATA, SIGNAL_TO_NOISE): noise_title = params[NOISE_DATA][SIGNAL_TO_NOISE] else: noise_title = 'No noise' # plt.figure(figsize=(12, 8)) # plt.plot(x, y) # plt.savefig(f'{PLOTS_DIR}/data_{name}_{timestamp}.png') # plt.close() # Set the plot # plt.figure(figsize=(12, 8)) # plt.title(f'trend: {trend_title}, ' # f'seasonality: {seasonality_title}, ' # f'snr: {noise_title}') for method in methods_list: estimation = method.estimate_trend(x, y) # plt.plot(x, estimation, label=method.name) distance = np.linalg.norm((estimation - trend)) file_results.append(distance) # plt.plot(x, trend, label='True trend', linewidth=3.0, color='k', linestyle=':') # plt.legend() # plt.savefig(f'{PLOTS_DIR}/{name}_{timestamp}.png') # plt.close() results[name] = file_results table = pd.DataFrame(results, columns) table.to_csv( f'{RESULTS_DIR}/trend_estimation_{file_prefix}_{timestamp}.csv') return table