def test_upload(): # Make dirs os.makedirs('tmp/publish_dir', exist_ok=True) populate_wham_dir('tmp/wham') # Dataset and NN train_set = WhamDataset('tmp/wham', task='sep_clean') model = ConvTasNet(n_src=2, n_repeats=2, n_blocks=2, bn_chan=16, hid_chan=4, skip_chan=8, n_filters=32) # Save publishable model_conf = model.serialize() model_conf.update(train_set.get_infos()) save_publishable('tmp/publish_dir', model_conf, metrics={}, train_conf={}) if False: # Upload zen, current = upload_publishable( 'tmp/publish_dir', uploader="Manuel Pariente", affiliation="INRIA", use_sandbox=True, unit_test=True, # Remove this argument and monkeypatch `input()` ) # Assert metadata is correct meta = current.json()['metadata'] assert meta['creators'][0]['name'] == "Manuel Pariente" assert meta['creators'][0]['affiliation'] == "INRIA" assert 'asteroid-models' in [d['identifier'] for d in meta['communities']] # Clean up zen.remove_deposition(current.json()['id']) shutil.rmtree('tmp/wham')
def test_save_and_load_convtasnet(fb): model1 = ConvTasNet(n_src=2, n_repeats=2, n_blocks=2, bn_chan=16, hid_chan=4, skip_chan=8, n_filters=32, fb_name=fb) test_input = torch.randn(1, 800) model_conf = model1.serialize() reconstructed_model = ConvTasNet.from_pretrained(model_conf) assert_allclose(model1.separate(test_input), reconstructed_model(test_input))
def test_convtasnet_sep(): nnet = ConvTasNet(n_src=2, n_repeats=2, n_blocks=3, bn_chan=16, hid_chan=4, skip_chan=8, n_filters=32) # Test torch input wav = torch.rand(1, 800) out = nnet.separate(wav) assert isinstance(out, torch.Tensor) # Test numpy input wav = np.random.randn(1, 800).astype('float32') out = nnet.separate(wav) assert isinstance(out, np.ndarray)
def test_upload(): # Make dirs os.makedirs("tmp/publish_dir", exist_ok=True) populate_wham_dir("tmp/wham") # Dataset and NN train_set = WhamDataset("tmp/wham", task="sep_clean") model = ConvTasNet(n_src=2, n_repeats=2, n_blocks=2, bn_chan=16, hid_chan=4, skip_chan=8, n_filters=32) # Save publishable model_conf = model.serialize() model_conf.update(train_set.get_infos()) save_publishable("tmp/publish_dir", model_conf, metrics={}, train_conf={}) # Upload token = os.getenv("ACCESS_TOKEN") if token: # ACESS_TOKEN is not available on forks. zen, current = upload_publishable( "tmp/publish_dir", uploader="Manuel Pariente", affiliation="INRIA", use_sandbox=True, unit_test=True, # Remove this argument and monkeypatch `input()` git_username="******", ) # Assert metadata is correct meta = current.json()["metadata"] assert meta["creators"][0]["name"] == "Manuel Pariente" assert meta["creators"][0]["affiliation"] == "INRIA" assert "asteroid-models" in [ d["identifier"] for d in meta["communities"] ] # Clean up zen.remove_deposition(current.json()["id"]) shutil.rmtree("tmp/wham")
def test_save_and_load_convtasnet(fb): _default_test_model( ConvTasNet( n_src=2, n_repeats=2, n_blocks=2, bn_chan=16, hid_chan=4, skip_chan=8, n_filters=32, fb_name=fb, ))
def test_separate(): nnet = ConvTasNet( n_src=2, n_repeats=2, n_blocks=3, bn_chan=16, hid_chan=4, skip_chan=8, n_filters=32, ) # Test torch input wav = torch.rand(1, 1, 8000) model = LambdaOverlapAdd(nnet, None, window_size=1000) out = separate(model, wav)
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") model = ConvTasNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device torch.no_grad().__enter__() alto, fs = sf.read( '/home/pc2752/share//Darius/data/satb_dst/test_dcs/raw_audio/DCS_TPQuartetA/DCS_TPQuartetA_alto_1.wav' ) bass, fs = sf.read( '/home/pc2752/share//Darius/data/satb_dst/test_dcs/raw_audio/DCS_TPQuartetA/DCS_TPQuartetA_bass_1.wav' ) soprano, fs = sf.read( '/home/pc2752/share//Darius/data/satb_dst/test_dcs/raw_audio/DCS_TPQuartetA/DCS_TPQuartetA_soprano_1.wav' ) tenor, fs = sf.read( '/home/pc2752/share//Darius/data/satb_dst/test_dcs/raw_audio/DCS_TPQuartetA/DCS_TPQuartetA_tenor_1.wav' ) mix, fs = sf.read( '/home/pc2752/share//Darius/Wave-U-Net/test_set_mixes/dcs/DCS_TPQuartetA_mix.wav' ) mix = torch.from_numpy(mix).type(torch.FloatTensor) outputs = model.float()(mix) sdr, sir, sar, perm = separation.bss_eval_sources( np.array([soprano, alto, tenor, bass]), outputs[:, :soprano.shape[0]].detach().numpy()) print("SDR: {}\nSIR: {}\nSAR: {}".format(sdr, sir, sar)) sf.write('./DCS_TPQuartetA_Soprano.wav', outputs[0], fs) sf.write('./DCS_TPQuartetA_Alto.wav', outputs[1], fs) sf.write('./DCS_TPQuartetA_Tenor.wav', outputs[2], fs) sf.write('./DCS_TPQuartetA_Bass.wav', outputs[3], fs)
def convtasnet(conf): sys.path.append('./asteroid') from asteroid.models import ConvTasNet from asteroid.utils import tensors_to_device from asteroid.models import save_publishable model_path = './models/convtasnet_usecase2.pth' model = ConvTasNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device torch.no_grad().__enter__() mix, fs = sf.read(conf["input_path"]) mix = torch.from_numpy(mix).type(torch.FloatTensor) outputs = model.float()(mix) return outputs
def test_convtasnet_sep(): nnet = ConvTasNet( n_src=2, n_repeats=2, n_blocks=3, bn_chan=16, hid_chan=4, skip_chan=8, n_filters=32, ) # Test torch input wav = torch.rand(1, 800) out = nnet.separate(wav) assert isinstance(out, torch.Tensor) # Test numpy input wav = np.random.randn(1, 800).astype("float32") out = nnet.separate(wav) assert isinstance(out, np.ndarray) # Test str input sf.write("tmp.wav", wav[0], 8000) nnet.separate("tmp.wav") # Warning when overwriting with pytest.warns(UserWarning): nnet.separate("tmp.wav") # Test with bad samplerate sf.write("tmp.wav", wav[0], 16000) # Raises with pytest.raises(RuntimeError): nnet.separate("tmp.wav", force_overwrite=True) # Resamples nnet.separate("tmp.wav", force_overwrite=True, resample=True)
def main(conf): train_set = SATBDataset( conf["data"]["train_dir"], sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) val_set = SATBDataset( conf["data"]["train_dir"], partition="valid", sample_rate=conf["data"]["sample_rate"], nondefault_nsrc=conf["data"]["nondefault_nsrc"], ) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) # Update number of source values (It depends on the task) conf["masknet"].update({"n_src": train_set.n_src}) # Define model and optimizer model = ConvTasNet(**conf["filterbank"], **conf["masknet"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) early_stopping = False if conf["training"]["early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=30, verbose=True) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_root_dir=exp_dir, gpus=gpus, distributed_backend="dp", train_percent_check=1.0, # Useful for fast experiment gradient_clip_val=5.0, ) trainer.fit(system.float()) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): exp_dir = conf["main_args"]["exp_dir"] # Define Dataloader """total_set = MedleydbDataset( conf["data"]["json_dir"], n_src=conf["data"]["n_inst"], n_poly=conf["data"]["n_poly"], sample_rate=conf["data"]["sample_rate"], segment=conf["data"]["segment"], threshold=conf["data"]["threshold"], ) """ train_set = SourceFolderDataset( train_dir, train_dir, conf["data"]["n_poly"], conf["data"]["sample_rate"], conf["training"]["batch_size"], ) val_set = SourceFolderDataset( val_dir, val_dir, conf["data"]["n_poly"], conf["data"]["sample_rate"], conf["training"]["batch_size"], ) #validation_size = int(conf["data"]["validation_split"] * len(total_set)) #train_size = len(total_set) - validation_size #torch.manual_seed(conf["training"]["random_seed"]) #train_set, val_set = data.random_split(total_set, [train_size, validation_size]) train_loader = data.DataLoader( train_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = data.DataLoader( val_set, shuffle=False, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) conf["masknet"].update( {"n_src": conf["data"]["n_inst"] * conf["data"]["n_poly"]}) # Define model and optimizer model = ConvTasNet(**conf["filterbank"], **conf["masknet"], sample_rate=conf["data"]["sample_rate"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=3) #monitor = "val_loss" # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") #loss_func = PITLossWrapper(SingleSrcMultiScaleSpectral, pit_from="pw_pt") #loss_func = torch.nn.L1Loss() #loss_func = pairwise_neg_sisdr system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) ''' # Callbacks checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint( checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=1 ) early_stopping = False if conf["training"]["early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=15, verbose=1) ''' # Define callbacks callbacks = [] checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint(checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True) callbacks.append(checkpoint) if conf["training"]["early_stop"]: callbacks.append( EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True)) #gpus = -1 # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], callbacks=callbacks, default_root_dir=exp_dir, gpus=gpus, distributed_backend="ddp", limit_train_batches=1.0, # Useful for fast experiment gradient_clip_val=5.0, ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) # Save best model (next PL version will make this easier) best_path = [b for b, v in best_k.items() if v == min(best_k.values())][0] state_dict = torch.load(best_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): model_path = os.path.join(conf["exp_dir"], "best_model.pth") model = ConvTasNet.from_pretrained(model_path) # Handle device placement if conf["use_gpu"]: model.cuda() model_device = next(model.parameters()).device test_set = SourceFolderDataset( os.path.join(conf["exp_dir"], "json/"), conf["wav_dir"], conf["n_src"], conf["sample_rate"], conf["batch_size"], ) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx") # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf["exp_dir"], "examples/") if conf["n_save_ex"] == -1: conf["n_save_ex"] = len(test_set) save_idx = random.sample(range(len(test_set)), conf["n_save_ex"]) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) mix = mix.unsqueeze(0) sources = sources.unsqueeze(0) est_sources = model(mix) #print(test_set[idx]) #print(est_sources.shape, sources.shape, mix.shape, len(test_set)) loss, reordered_sources = loss_func(est_sources, sources, return_est=True) #mix_np = mix.squeeze(0).cpu().data.numpy() mix_np = mix.cpu().data.numpy() sources_np = sources.squeeze(0).cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() utt_metrics = get_metrics( mix_np, sources_np, est_sources_np, sample_rate=conf["sample_rate"], metrics_list=compute_metrics, ) utt_metrics["mix_path"] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx)) os.makedirs(local_save_dir, exist_ok=True) #print(mix_np.shape) sf.write(local_save_dir + "mixture.wav", np.swapaxes(mix_np,0,1), conf["sample_rate"]) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf["sample_rate"]) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf["sample_rate"], ) # Write local metrics to the example folder. with open(local_save_dir + "metrics.json", "w") as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf["exp_dir"], "all_metrics.csv")) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = "input_" + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + "_imp"] = ldf.mean() print("Overall metrics :") pprint(final_results) with open(os.path.join(conf["exp_dir"], "final_metrics.json"), "w") as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location="cpu") os.makedirs(os.path.join(conf["exp_dir"], "publish_dir"), exist_ok=True) publishable = save_publishable( os.path.join(conf["exp_dir"], "publish_dir"), model_dict, metrics=final_results, train_conf=train_conf, )
def main(conf): # Define dataloader using ORIGINAL mixture. dataset_kwargs = { "root_path": Path(conf["data"]["root_path"]), "sample_rate": conf["data"]["sample_rate"], "num_workers": conf["training"]["num_workers"], "mixture": conf["data"]["mixture"], "task": conf["data"]["task"], } train_set = DAMPVSEPSinglesDataset( split=f"train_{conf['data']['train_set']}", random_segments=True, segment=conf["data"]["segment"], ex_per_track=conf["data"]["ex_per_track"], **dataset_kwargs, ) val_set = DAMPVSEPSinglesDataset(split="valid", **dataset_kwargs) train_loader = DataLoader( train_set, shuffle=True, batch_size=conf["training"]["batch_size"], num_workers=conf["training"]["num_workers"], drop_last=True, ) val_loader = DataLoader( val_set, shuffle=False, batch_size=1, num_workers=conf["training"]["num_workers"] ) model = ConvTasNet(**conf["filterbank"], **conf["masknet"]) optimizer = make_optimizer(model.parameters(), **conf["optim"]) # Define scheduler scheduler = None if conf["training"]["half_lr"]: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf["main_args"]["exp_dir"] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, "conf.yml") with open(conf_path, "w") as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. # Combine_Loss is not complete. Needs improvement # loss_func = Combine_Loss(alpha=conf['training']['loss_alpha'], # sample_rate=conf['data']['sample_rate']) loss_func = torch.nn.L1Loss() system = System( model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf, ) # Define callbacks checkpoint_dir = os.path.join(exp_dir, "checkpoints/") checkpoint = ModelCheckpoint( checkpoint_dir, monitor="val_loss", mode="min", save_top_k=10, verbose=True ) early_stopping = False if conf["training"]["early_stop"]: early_stopping = EarlyStopping(monitor="val_loss", patience=20, verbose=True) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf["training"]["epochs"], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_root_dir=exp_dir, gpus=gpus, distributed_backend="ddp", train_percent_check=1.0, # Useful for fast experiment gradient_clip_val=5.0, ) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) state_dict = torch.load(checkpoint.best_model_path) system.load_state_dict(state_dict=state_dict["state_dict"]) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, "best_model.pth"))
def main(conf): model_path = os.path.join(conf['exp_dir'], 'best_model.pth') model = ConvTasNet.from_pretrained(model_path) # Handle device placement if conf['use_gpu']: model.cuda() model_device = next(model.parameters()).device test_set = WhamDataset(conf['test_dir'], conf['task'], sample_rate=conf['sample_rate'], nondefault_nsrc=model.masker.n_src, segment=None) # Uses all segment length # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf['exp_dir'], 'examples/') if conf['n_save_ex'] == -1: conf['n_save_ex'] = len(test_set) save_idx = random.sample(range(len(test_set)), conf['n_save_ex']) series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) mix_np = mix[None].cpu().data.numpy() sources_np = sources.cpu().data.numpy() est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy() utt_metrics = get_metrics(mix_np, sources_np, est_sources_np, sample_rate=conf['sample_rate'], metrics_list=compute_metrics) utt_metrics['mix_path'] = test_set.mix[idx][0] series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx)) os.makedirs(local_save_dir, exist_ok=True) sf.write(local_save_dir + "mixture.wav", mix_np[0], conf['sample_rate']) # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): sf.write(local_save_dir + "s{}.wav".format(src_idx + 1), src, conf['sample_rate']) for src_idx, est_src in enumerate(est_sources_np): est_src *= np.max(np.abs(mix_np)) / np.max(np.abs(est_src)) sf.write( local_save_dir + "s{}_estimate.wav".format(src_idx + 1), est_src, conf['sample_rate']) # Write local metrics to the example folder. with open(local_save_dir + 'metrics.json', 'w') as f: json.dump(utt_metrics, f, indent=0) # Save all metrics to the experiment folder. all_metrics_df = pd.DataFrame(series_list) all_metrics_df.to_csv(os.path.join(conf['exp_dir'], 'all_metrics.csv')) # Print and save summary metrics final_results = {} for metric_name in compute_metrics: input_metric_name = 'input_' + metric_name ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name] final_results[metric_name] = all_metrics_df[metric_name].mean() final_results[metric_name + '_imp'] = ldf.mean() print('Overall metrics :') pprint(final_results) with open(os.path.join(conf['exp_dir'], 'final_metrics.json'), 'w') as f: json.dump(final_results, f, indent=0) model_dict = torch.load(model_path, map_location='cpu') os.makedirs(os.path.join(conf['exp_dir'], 'publish_dir'), exist_ok=True) publishable = save_publishable(os.path.join(conf['exp_dir'], 'publish_dir'), model_dict, metrics=final_results, train_conf=train_conf)
def main(conf): model_path = os.path.join(conf['exp_dir'], 'best_model.pth') model = ConvTasNet.from_pretrained(model_path) # Handle device placement if conf['use_gpu']: model.cuda() model_device = next(model.parameters()).device # get data for evaluation - this should change in the future to work on real test data the was not used for training dataset = SeparationDataset(combination_list_path=os.path.join( conf['exp_dir'], 'combination_list.pkl')) n_val = int( len(dataset) * conf['train_conf']['data'] ['fraction_of_examples_to_use_for_validation']) train_set, val_set = random_split(dataset, [len(dataset) - n_val, n_val]) # noqa # test_set = val_set test_set = train_set # Used to reorder sources only loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') # Randomly choose the indexes of sentences to save. ex_save_dir = os.path.join(conf['exp_dir'], 'examples/') if conf['n_save_ex'] == -1: conf['n_save_ex'] = len(test_set) save_idx = random.sample(range(len(test_set)), conf['n_save_ex']) # series_list = [] torch.no_grad().__enter__() for idx in tqdm(range(len(test_set))): # Forward the network on the mixture. mix, sources = tensors_to_device(test_set[idx], device=model_device) est_sources = model(mix[None, None]) loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True) # noqa mix_np = to_complex(mix[None].cpu().data.numpy()) sources_np = to_complex(sources.cpu().data.numpy()) est_sources_np = to_complex( reordered_sources.squeeze(0).cpu().data.numpy()) # utt_metrics = get_metrics(mix_np, sources_np, est_sources_np, # sample_rate=conf['sample_rate'], # metrics_list=compute_metrics) # utt_metrics['mix_path'] = test_set.mix[idx][0] # series_list.append(pd.Series(utt_metrics)) # Save some examples in a folder. Wav files and metrics as text. if idx in save_idx: local_save_dir = os.path.join(ex_save_dir, 'ex_{}/'.format(idx)) os.makedirs(local_save_dir, exist_ok=True) iq_data = mix_np[0] ax = plot_spectogram(iq_data, scale=False, show_plot=False) ax.figure.savefig(local_save_dir + 'mixture.png') # Loop over the sources and estimates for src_idx, src in enumerate(sources_np): iq_data = src ax = plot_spectogram(iq_data, scale=False, show_plot=False) ax.figure.savefig(local_save_dir + "s{}.png".format(src_idx + 1)) for src_idx, est_src in enumerate(est_sources_np): # est_src *= np.max(np.abs(mix_np))/np.max(np.abs(est_src)) iq_data = np.reshape(est_src, (32, 128)).T ax = plot_spectogram(iq_data, scale=False, show_plot=False) ax.figure.savefig(local_save_dir + "s{}_estimate.png".format(src_idx + 1))
def main(conf): train_set = WhamDataset(conf['data']['train_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) val_set = WhamDataset(conf['data']['valid_dir'], conf['data']['task'], sample_rate=conf['data']['sample_rate'], nondefault_nsrc=conf['data']['nondefault_nsrc']) train_loader = DataLoader(train_set, shuffle=True, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) val_loader = DataLoader(val_set, shuffle=False, batch_size=conf['training']['batch_size'], num_workers=conf['training']['num_workers'], drop_last=True) # Update number of source values (It depends on the task) conf['masknet'].update({'n_src': train_set.n_src}) # Define model and optimizer model = ConvTasNet(**conf['filterbank'], **conf['masknet']) optimizer = make_optimizer(model.parameters(), **conf['optim']) # Define scheduler scheduler = None if conf['training']['half_lr']: scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5) # Just after instantiating, save the args. Easy loading in the future. exp_dir = conf['main_args']['exp_dir'] os.makedirs(exp_dir, exist_ok=True) conf_path = os.path.join(exp_dir, 'conf.yml') with open(conf_path, 'w') as outfile: yaml.safe_dump(conf, outfile) # Define Loss function. loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from='pw_mtx') system = System(model=model, loss_func=loss_func, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader, scheduler=scheduler, config=conf) # Define callbacks checkpoint_dir = os.path.join(exp_dir, 'checkpoints/') checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_loss', mode='min', save_top_k=5, verbose=1) early_stopping = False if conf['training']['early_stop']: early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1) # Don't ask GPU if they are not available. gpus = -1 if torch.cuda.is_available() else None trainer = pl.Trainer( max_epochs=conf['training']['epochs'], checkpoint_callback=checkpoint, early_stop_callback=early_stopping, default_save_path=exp_dir, gpus=gpus, distributed_backend='dp', train_percent_check=1.0, # Useful for fast experiment gradient_clip_val=5.) trainer.fit(system) best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()} with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f: json.dump(best_k, f, indent=0) # Save best model (next PL version will make this easier) best_path = [b for b, v in best_k.items() if v == min(best_k.values())][0] state_dict = torch.load(best_path) system.load_state_dict(state_dict=state_dict['state_dict']) system.cpu() to_save = system.model.serialize() to_save.update(train_set.get_infos()) torch.save(to_save, os.path.join(exp_dir, 'best_model.pth'))
dataset=noise_ds, batch_size=args.batch_size, shuffle=False, collate_fn=lambda x: data_processing(x, args.n_frames, "noise"), **kwargs) snr_ranges_all = [-5, 0, 5, 10] loss_fn = nn.MSELoss() for seed in range(args.start_seed, 40): args.seed = seed print("Running for seed ", args.seed) # Load baseline original student model if args.is_g_ctn: from asteroid.models import ConvTasNet Orig_G_model = ConvTasNet(n_src=1) ctn_dir = "new_models_results/Gs/expr04221819_SE_G3x1024_lr5e-04_bs20_ctnTruesm-1_nfrms16000_GPU1/" Orig_G_model.load_state_dict( torch.load("{}/Dmodel_best.pt".format(ctn_dir))) else: Orig_G_model = SpeechEnhancementModel(args.G_hidden_size, args.G_num_layers, args.stft_features) load_model(Orig_G_model, args.load_SEmodel) Orig_G_model = Orig_G_model.to(args.device) for snr_ranges in snr_ranges_all: args.snr_ranges = [snr_ranges] output_directory = setup_gan_expr(args) tot_s, tot_n = init_pers_set(args)