def get_data_loaders(conf, train_part='filterbank'): train_set = WhamDataset(conf['data']['train_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc'], normalize_audio=True) val_set = WhamDataset(conf['data']['valid_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc'], normalize_audio=True) if train_part not in ['filterbank', 'separator']: raise ValueError('Part to train: {} is not available.'.format( train_part)) train_loader = DataLoader(train_set, shuffle=True, drop_last=True, batch_size=conf[train_part + '_training'][ train_part[0] + '_batch_size'], num_workers=conf[train_part + '_training'][ train_part[0] + '_num_workers']) val_loader = DataLoader(val_set, shuffle=True, drop_last=True, batch_size=conf[train_part + '_training'][ train_part[0] + '_batch_size'], num_workers=conf[train_part + '_training'][ train_part[0] + '_num_workers']) # Update number of source values (It depends on the task) conf['masknet'].update({'n_src': train_set.n_src}) return train_loader, val_loader
def main(conf): # from asteroid.data.toy_data import WavSet # train_set = WavSet(n_ex=1000, n_src=2, ex_len=32000) # val_set = WavSet(n_ex=1000, n_src=2, ex_len=32000) # Define data pipeline train_set = WhamDataset(conf['data']['train_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset(conf['data']['valid_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers']) val_loader = DataLoader(val_set, shuffle=False, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers']) conf['masknet'].update({'n_src': train_set.n_src}) # Define model and optimizer in a local function (defined in the recipe). # Two advantages to this : re-instantiating the model and optimizer # for retraining and evaluating is straight-forward. model, optimizer = make_model_and_optimizer(conf) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf['main_args']['exp_dir'] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, 'conf.yml') with open(conf_path, 'w') as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') # loss_class = PITLossContainer(pairwise_neg_sisdr, n_src=train_set.n_src) # Checkpointing callback can monitor any quantity which is returned by # validation step, defaults to val_loss here (see System). checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', mode='min', save_best_only=False) # New PL version will come the 7th of december / will have save_top_k system = System(model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, config=conf) trainer = pl.Trainer(max_nb_epochs=conf['training']['epochs'], checkpoint_callback=checkpoint, default_save_path=exp_dir, gpus=conf['main_args']['gpus'], distributed_backend='dp') trainer.fit(system)
def main(conf): # Define data pipeline with datasets and loaders train_set = WhamDataset(conf['data']['train_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset(conf['data']['valid_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['data']['batch_size'], num_workers=conf['data']['num_workers']) val_loader = DataLoader(val_set, shuffle=True, batch_size=conf['data']['batch_size'], num_workers=conf['data']['num_workers']) loaders = {'train_loader': train_loader, 'val_loader': val_loader} # Define model # First define the encoder and the decoder. # This can be either done by passing a string and the config # dictionary (with number of filters, filter size and stride, see conf.yml) # to fb.make_enc_dec. enc, dec = fb.make_enc_dec('free', **conf['filterbank']) # Or done by instantiating the filterbanks and passing them to the # Encoder and Decoder classes, as follows : # enc = fb.Encoder(fb.FreeFB(**conf['filterbank'])) # dec = fb.Encoder(fb.FreeFB(**conf['filterbank'])) # Define the mask network with input and output dimensions dictated by # by the encoder (also passing a dictionary defined in conf.yml). masker = TDConvNet(in_chan=enc.filterbank.n_feats_out, out_chan=enc.filterbank.n_feats_out, n_src=train_set.n_src, **conf['masknet']) # Pass the encoder, masker and decoder to the container class which # handles the forward for such architectures model = nn.DataParallel(Container(enc, masker, dec)) if conf['main_args']['use_cuda']: model.cuda() # Define Loss function loss_class = PITLossContainer(pairwise_neg_sisdr, n_src=train_set.n_src) # Define optimizer optimizer = make_optimizer(model.parameters(), **conf['optim']) # Pass everything to the solver with a training dicitonary defined in # the conf.yml file. Finally, call .train() and that's it. solver = Solver(loaders, model, loss_class, optimizer, model_path=conf['main_args']['model_path'], **conf['training']) solver.train()
def main(conf): model = get_model(conf) test_set = WhamDataset(conf['test_dir'], conf['task'], sample_rate=conf['sample_rate'], nondefault_nsrc=conf['nondefault_nsrc'], segment=None) loss_func = PITLossWrapper(pairwise_neg_sisdr, mode='pairwise') model_device = next(model.parameters()).device for idx in range(len(test_set)): mix, sources, _ = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix) loss, reordered_sources = loss_func(sources, est_sources, return_est=True) mix_np = mix.data.numpy()[0] sources_np = sources.data.numpy()[0] est_sources_np = reordered_sources.data.numpy()[0] # Waiting for pb_bss support to compute subset of metrics. # We will probably want SI-SDR, + add option for mir_eval SDR, stoi, # pesq input_metrics = InputMetrics(observation=mix_np, speech_source=sources_np, enable_si_sdr=True, sample_rate=conf["sample_rate"]) output_metrics = OutputMetrics(speech_prediction=est_sources_np, speech_source=sources_np, enable_si_sdr=True, sample_rate=conf["sample_rate"])
def main(conf): # Define data pipeline train_set = WhamDataset(conf['data']['train_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset(conf['data']['valid_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['data']['batch_size'], num_workers=conf['data']['num_workers']) val_loader = DataLoader(val_set, shuffle=True, batch_size=conf['data']['batch_size'], num_workers=conf['data']['num_workers']) loaders = {'train_loader': train_loader, 'val_loader': val_loader} # Define model # The encoder and decoder can directly be made from the dictionary. encoder, decoder = filterbanks.make_enc_dec(**conf['filterbank']) # The input post-processing changes the dimensions of input features to # the mask network. Different type of masks impose different output # dimensions to the mask network's output. We correct for these here. nn_in = int(encoder.n_feats_out * encoder.in_chan_mul) nn_out = int(encoder.n_feats_out * encoder.out_chan_mul) masker = TDConvNet(in_chan=nn_in, out_chan=nn_out, n_src=train_set.n_src, **conf['masknet']) # The model is defined in Container, which is passed to DataParallel. model = nn.DataParallel(Container(encoder, masker, decoder)) if conf['main_args']['use_cuda']: model.cuda() # Define Loss function : Here we use time domain SI-SDR. loss_class = PITLossContainer(pairwise_neg_sisdr, n_src=train_set.n_src) # Define optimizer : can be instantiate from dictonary as well. optimizer = make_optimizer(model.parameters(), **conf['optim']) # Pass everything to the solver and train solver = Solver(loaders, model, loss_class, optimizer, model_path=conf['main_args']['model_path'], **conf['training']) # solver.train() solver.run_one_epoch(0, validation=True)
def test_upload(): # Make dirs os.makedirs("tmp/publish_dir", exist_ok=True) populate_wham_dir("tmp/wham") # Dataset and NN train_set = WhamDataset("tmp/wham", task="sep_clean") model = ConvTasNet(n_src=2, n_repeats=2, n_blocks=2, bn_chan=16, hid_chan=4, skip_chan=8, n_filters=32) # Save publishable model_conf = model.serialize() model_conf.update(train_set.get_infos()) save_publishable("tmp/publish_dir", model_conf, metrics={}, train_conf={}) # Upload token = os.getenv("ACCESS_TOKEN") if token: # ACESS_TOKEN is not available on forks. zen, current = upload_publishable( "tmp/publish_dir", uploader="Manuel Pariente", affiliation="INRIA", use_sandbox=True, unit_test=True, # Remove this argument and monkeypatch `input()` git_username="******", ) # Assert metadata is correct meta = current.json()["metadata"] assert meta["creators"][0]["name"] == "Manuel Pariente" assert meta["creators"][0]["affiliation"] == "INRIA" assert "asteroid-models" in [ d["identifier"] for d in meta["communities"] ] # Clean up zen.remove_deposition(current.json()["id"]) shutil.rmtree("tmp/wham")
def get_data_loaders(conf, train_part="filterbank"): train_set = WhamDataset( conf["data"]["train_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], normalize_audio=True, ) val_set = WhamDataset( conf["data"]["valid_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], normalize_audio=True, ) if train_part not in ["filterbank", "separator"]: raise ValueError( "Part to train: {} is not available.".format(train_part)) train_loader = DataLoader( train_set, shuffle=True, drop_last=True, batch_size=conf[train_part + "_training"][train_part[0] + "_batch_size"], num_workers=conf[train_part + "_training"][train_part[0] + "_num_workers"], ) val_loader = DataLoader( val_set, shuffle=False, drop_last=True, batch_size=conf[train_part + "_training"][train_part[0] + "_batch_size"], num_workers=conf[train_part + "_training"][train_part[0] + "_num_workers"], ) # Update number of source values (It depends on the task) conf["masknet"].update({"n_src": train_set.n_src}) return train_loader, val_loader
def test_upload(): # Make dirs os.makedirs('tmp/publish_dir', exist_ok=True) populate_wham_dir('tmp/wham') # Dataset and NN train_set = WhamDataset('tmp/wham', task='sep_clean') model = ConvTasNet(n_src=2, n_repeats=2, n_blocks=2, bn_chan=16, hid_chan=4, skip_chan=8, n_filters=32) # Save publishable model_conf = model.serialize() model_conf.update(train_set.get_infos()) save_publishable('tmp/publish_dir', model_conf, metrics={}, train_conf={}) # if False: # Upload zen, current = upload_publishable( 'tmp/publish_dir', uploader="Manuel Pariente", affiliation="INRIA", use_sandbox=True, unit_test=True, # Remove this argument and monkeypatch `input()` ) # Assert metadata is correct meta = current.json()['metadata'] assert meta['creators'][0]['name'] == "Manuel Pariente" assert meta['creators'][0]['affiliation'] == "INRIA" assert 'asteroid-models' in [d['identifier'] for d in meta['communities']] # Clean up zen.remove_deposition(current.json()['id']) shutil.rmtree('tmp/wham')
def main(conf): train_set = WhamDataset( conf["data"]["train_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], segment=conf["data"]["segment"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) val_set = WhamDataset( conf["data"]["valid_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) # Update number of source values (It depends on the task) conf["masknet"].update({"n_src": train_set.n_src}) model = DPTNet(**conf["filterbank"], **conf["masknet"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) from asteroid.engine.schedulers import DPTNetScheduler schedulers = { "scheduler": DPTNetScheduler(optimizer, len(train_loader) // conf["training"]["batch_size"], 64), "interval": "step", } # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, scheduler=schedulers, train_loader=train_loader, val_loader=val_loader, config=conf, ) # Define callbacks checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) early_stopping = False if conf["training"]["early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=30, verbose=True) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_root_dir=exp_dir, gpus=gpus, distributed_backend="ddp", gradient_clip_val=conf["training"]["gradient_clipping"], ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") model = DPRNNTasNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = WhamDataset( conf["test_dir"], conf["task"], sample_rate=conf["sample_rate"], nondefault_nsrc=None, segment=None, ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf["exp_dir"], "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) _, indxs = torch.sort(torch.sqrt(torch.mean(est_sources**2, dim=-1)), descending=True) indxs = indxs[:, :2] # we know a-priori that there are 2 sources in WHAM-clean (WSJ0-2mix clean) # so we sort the estimated signals and take only the two with highest energy. est_sources = est_sources.gather( 1, indxs.unsqueeze(-1).repeat(1, 1, est_sources.shape[-1])) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) utt_metrics["mix_path"] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def main(conf): train_set = WhamDataset(conf['data']['train_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset(conf['data']['valid_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) val_loader = DataLoader(val_set, shuffle=False, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) # Update number of source values (It depends on the task) conf['masknet'].update({'n_src': train_set.n_src}) # Define model and optimizer model = ConvTasNet(**conf['filterbank'], **conf['masknet']) optimizer = make_optimizer(model.parameters(), **conf['optim']) # Define scheduler scheduler = None if conf['training']['half_lr']: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf['main_args']['exp_dir'] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, 'conf.yml') with open(conf_path, 'w') as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') system = System(model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf) # Define callbacks checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', mode='min', save_top_k=5, verbose=1) early_stopping = False if conf['training']['early_stop']: early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf['training']['epochs'], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_save_path=exp_dir, gpus=gpus, distributed_backend='dp', train_percent_check=1.0, # Useful for fast experiment gradient_clip_val=5.) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) # Save best model (next PL version will make this easier) best_path = [b for b, v in best_k.items() if v == min(best_k.values())][0] state_dict = torch.load(best_path) system.load_state_dict(state_dict=state_dict['state_dict']) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, 'best_model.pth'))
def main(conf): if conf["data"]["data_augmentation"]: from local.augmented_wham import AugmentedWhamDataset train_set = AugmentedWhamDataset(task=conf['data']['task'], segment = conf['data']['segment'], json_dir = conf["data"]["train_dir"], sample_rate = conf['data']['sample_rate'], nondefault_nsrc = conf['data']['nondefault_nsrc'], **conf["augmentation"]) else: train_set = WhamDataset(conf['data']['train_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], segment=conf['data']['segment'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset(conf['data']['valid_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) val_loader = DataLoader(val_set, shuffle=False, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) # Update number of source values (It depends on the task) conf['masknet'].update({'n_src': train_set.n_src}) # Define model and optimizer in a local function (defined in the recipe). # Two advantages to this : re-instantiating the model and optimizer # for retraining and evaluating is straight-forward. model, optimizer = make_model_and_optimizer(conf) # Define scheduler scheduler = None if conf['training']['half_lr']: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf['main_args']['exp_dir'] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, 'conf.yml') with open(conf_path, 'w') as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') system = System(model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf) # Define callbacks checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', mode='min', save_top_k=5, verbose=1) early_stopping = False if conf['training']['early_stop']: early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1) # Don't ask GPU if they are not available. if not torch.cuda.is_available(): print('No available GPU were found, set gpus to None') conf['main_args']['gpus'] = None trainer = pl.Trainer(max_nb_epochs=conf['training']['epochs'], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_save_path=exp_dir, gpus=conf['main_args']['gpus'], distributed_backend='dp', gradient_clip_val=conf['training']["gradient_clipping"]) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0)
def main(conf): model_path = os.path.join(conf['exp_dir'], 'best_model.pth') model = DPRNNTasNet.from_pretrained(model_path) # Handle device placement if conf['use_gpu']: model.cuda() model_device = next(model.parameters()).device test_set = WhamDataset(conf['test_dir'], conf['task'], sample_rate=conf['sample_rate'], nondefault_nsrc=model.masker.n_src, segment=None) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf['exp_dir'], 'examples/') if conf['n_save_ex'] == -1: conf['n_save_ex'] = len(test_set) save_idx = random.sample(range(len(test_set)), conf['n_save_ex']) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.squeeze().cpu().data.numpy() est_sources_np = reordered_sources.squeeze().cpu().data.numpy() utt_metrics = get_metrics(mix_np, sources_np, est_sources_np, sample_rate=conf['sample_rate']) utt_metrics['mix_path'] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf['sample_rate']) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx+1), src, conf['sample_rate']) for src_idx, est_src in enumerate(est_sources_np): sf.write(local_save_dir + "s{}_estimate.wav".format(src_idx+1), est_src, conf['sample_rate']) # Write local metrics to the example folder. with open(local_save_dir + 'metrics.json', 'w') as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf['exp_dir'], 'all_metrics.csv')) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = 'input_' + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + '_imp'] = ldf.mean() print('Overall metrics :') pprint(final_results) with open(os.path.join(conf['exp_dir'], 'final_metrics.json'), 'w') as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location='cpu') publishable = save_publishable( os.path.join(conf['exp_dir'], 'publish_dir'), model_dict, metrics=final_results, train_conf=train_conf )
def main(conf): assert (conf["training"]["batch_size"] % 2 == 0), "Batch size must be divisible by two to run this recipe" train_set = WhamDataset( conf["data"]["train_dir"], "sep_clean", sample_rate=conf["data"]["sample_rate"], segment=conf["data"]["segment"], nondefault_nsrc=None, ) val_set = WhamDataset( conf["data"]["valid_dir"], "sep_clean", sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=None, ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) model = DPRNNTasNet(**conf["filterbank"], **conf["masknet"], sample_rate=conf["data"]["sample_rate"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = { "pit": PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx"), "mixit": MixITLossWrapper(pairwise_neg_sisdr, generalized=True), } system = MixITSystem( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) callbacks.append(checkpoint) if conf["training"]["early_stop"]: callbacks.append( EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True)) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None distributed_backend = "ddp" if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend=distributed_backend, gradient_clip_val=conf["training"]["gradient_clipping"], ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): train_set = WhamDataset( conf["data"]["train_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], segment=conf["data"]["segment"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) val_set = WhamDataset( conf["data"]["valid_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) # Update number of source values (It depends on the task) conf["masknet"].update({"n_src": train_set.n_src}) model = DPRNNTasNet(**conf["filterbank"], **conf["masknet"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', verbose=True, mode='min', save_top_k=5) early_stopping = False if conf["training"]["early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=30, verbose=1) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf['training']['epochs'], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_root_dir=exp_dir, gpus=gpus, distributed_backend='ddp', gradient_clip_val=conf['training']["gradient_clipping"]) trainer.fit(system) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict['state_dict']) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): if conf["data"]["data_augmentation"]: from local.augmented_wham import AugmentedWhamDataset train_set = AugmentedWhamDataset( task=conf["data"]["task"], segment=conf["data"]["segment"], json_dir=conf["data"]["train_dir"], sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], **conf["augmentation"], ) else: train_set = WhamDataset( conf["data"]["train_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], segment=conf["data"]["segment"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) val_set = WhamDataset( conf["data"]["valid_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) # Update number of source values (It depends on the task) conf["masknet"].update({"n_src": train_set.n_src}) # Define model and optimizer in a local function (defined in the recipe). # Two advantages to this : re-instantiating the model and optimizer # for retraining and evaluating is straight-forward. model, optimizer = make_model_and_optimizer(conf) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) early_stopping = False if conf["training"]["early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=30, verbose=True) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_root_dir=exp_dir, gpus=gpus, distributed_backend="dp", gradient_clip_val=conf["training"]["gradient_clipping"], ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0)
def main(conf): # FIXME : Make a function to return loaders, which take conf['data'] as inp. # Where is the mode min of max? train_set = WhamDataset(conf['data']['train_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset(conf['data']['valid_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['data']['batch_size'], num_workers=conf['data']['num_workers'], drop_last=True) val_loader = DataLoader(val_set, shuffle=True, batch_size=conf['data']['batch_size'], num_workers=conf['data']['num_workers'], drop_last=True) # Update number of source values (It depends on the task) conf['masknet'].update({'n_src': train_set.n_src}) # Define model and optimizer in a local function (defined in the recipe). # Two advantages to this : re-instantiating the model and optimizer # for retraining and evaluating is straight-forward. model, optimizer = make_model_and_optimizer(conf) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf['main_args']['exp_dir'] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, 'conf.yml') with open(conf_path, 'w') as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, mode='pairwise') # Checkpointing callback can monitor any quantity which is returned by # validation step, defaults to val_loss here (see System). checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', mode='min', save_best_only=False) # New PL version will come the 7th of december / will have save_top_k system = System(model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, config=conf) # Don't ask GPU if they are not available. if not torch.cuda.is_available(): print('No available GPU were found, set gpus to None') conf['main_args']['gpus'] = None trainer = pl.Trainer( max_nb_epochs=conf['training']['epochs'], checkpoint_callback=checkpoint, default_save_path=exp_dir, gpus=conf['main_args']['gpus'], distributed_backend='dp', train_percent_check=1.0 # Useful for fast experiment ) trainer.fit(system)
def main(conf): # from asteroid.data.toy_data import WavSet # train_set = WavSet(n_ex=1000, n_src=2, ex_len=32000) # val_set = WavSet(n_ex=1000, n_src=2, ex_len=32000) # Define data pipeline train_set = WhamDataset( conf["data"]["train_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) val_set = WhamDataset( conf["data"]["valid_dir"], conf["data"]["task"], sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], ) conf["masknet"].update({"n_src": train_set.n_src}) # Define model and optimizer in a local function (defined in the recipe). # Two advantages to this : re-instantiating the model and optimizer # for retraining and evaluating is straight-forward. model, optimizer = make_model_and_optimizer(conf) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # loss_class = PITLossContainer(pairwise_neg_sisdr, n_src=train_set.n_src) # Checkpointing callback can monitor any quantity which is returned by # validation step, defaults to val_loss here (see System). checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint( checkpoint_dir, monitor="val_loss", mode="min", save_best_only=False ) # New PL version will come the 7th of december / will have save_top_k system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, config=conf, ) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_nb_epochs=conf["training"]["epochs"], checkpoint_callback=checkpoint, default_save_path=exp_dir, gpus=gpus, distributed_backend="dp", ) trainer.fit(system)
def main(conf): model = load_best_model(conf["train_conf"], conf["exp_dir"]) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = WhamDataset( conf["test_dir"], conf["task"], sample_rate=conf["sample_rate"], nondefault_nsrc=model.masker.n_src, segment=None, ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf["exp_dir"], "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) utt_metrics["mix_path"] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0)