def simple_model(simple_sine_data): item, tensor_data = simple_sine_data num_features = 129 # number of frequencies in STFT embedding_size = 20 # how many sources to estimate activation = ['sigmoid', 'unit_norm'] # activation function for embedding num_audio_channels = 1 # number of audio channels modules = { 'mix_magnitude': {}, 'log_spectrogram': { 'class': 'AmplitudeToDB' }, 'normalization': { 'class': 'BatchNorm', }, 'embedding': { 'class': 'Embedding', 'args': { 'num_features': num_features, 'hidden_size': num_features, 'embedding_size': embedding_size, 'activation': activation, 'num_audio_channels': num_audio_channels, 'dim_to_embed': [2, 3] # embed the frequency dimension (2) for all audio channels (3) } }, } connections = [ ['log_spectrogram', ['mix_magnitude', ]], ['normalization', ['log_spectrogram', ]], ['embedding', ['normalization', ]], ] output = ['embedding'] config = { 'modules': modules, 'connections': connections, 'output': output } model = ml.SeparationModel(config).to(DEVICE) untrained = ml.SeparationModel(config).to(DEVICE) loss_dictionary = { 'EmbeddingLoss': { 'class': 'WhitenedKMeansLoss' } } optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) closure = ml.train.closures.TrainClosure(loss_dictionary, optimizer, model) for i in range(10): loss_val = closure(None, tensor_data) return model, untrained, item, tensor_data
def test_ml_alias(): modules = { 'split': { 'class': 'Split', 'args': { 'split_sizes': (3, 7), 'dim': -1 } }, 'split_zero': { 'class': 'Alias', } } connections = [('split', ('data', )), ('split_zero', ('split:0', ))] outputs = ['split:0', 'split_zero'] config = { 'modules': modules, 'connections': connections, 'output': outputs } model = ml.SeparationModel(config) data = {'data': torch.randn(100, 10)} output = model(data) assert 'split_zero' in output assert torch.allclose(output['split:0'], output['split_zero'])
def overfit_model(scaper_folder): nussl.utils.seed(0) tfms = datasets.transforms.Compose([ datasets.transforms.PhaseSensitiveSpectrumApproximation(), datasets.transforms.MagnitudeWeights(), datasets.transforms.ToSeparationModel(), datasets.transforms.GetExcerpt(100) ]) dataset = datasets.Scaper(scaper_folder, transform=tfms) dataset.items = [dataset.items[5]] dataloader = torch.utils.data.DataLoader(dataset) n_features = dataset[0]['mix_magnitude'].shape[1] config = ml.networks.builders.build_recurrent_chimera( n_features, 50, 1, True, 0.3, 20, 'sigmoid', 2, 'sigmoid', normalization_class='InstanceNorm') model = ml.SeparationModel(config) model = model.to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=1e-2) loss_dictionary = { 'DeepClusteringLoss': { 'weight': 0.2 }, 'PermutationInvariantLoss': { 'args': ['L1Loss'], 'weight': 0.8 } } train_closure = ml.train.closures.TrainClosure(loss_dictionary, optimizer, model) trainer, _ = ml.train.create_train_and_validation_engines(train_closure, device=DEVICE) with tempfile.TemporaryDirectory() as tmpdir: _dir = fix_dir if fix_dir else tmpdir ml.train.add_stdout_handler(trainer) ml.train.add_validate_and_checkpoint(_dir, model, optimizer, dataset, trainer) trainer.run(dataloader, max_epochs=1, epoch_length=EPOCH_LENGTH) model_path = os.path.join(trainer.state.output_folder, 'checkpoints', 'best.model.pth') yield model_path, dataset.process_item(dataset.items[0])
def overfit_audio_model(scaper_folder): nussl.utils.seed(0) tfms = datasets.transforms.Compose([ datasets.transforms.GetAudio(), datasets.transforms.ToSeparationModel(), datasets.transforms.GetExcerpt( 32000, time_dim=1, tf_keys=['mix_audio', 'source_audio']) ]) dataset = datasets.Scaper( scaper_folder, transform=tfms) dataset.items = [dataset.items[5]] dataloader = torch.utils.data.DataLoader( dataset, batch_size=1) config = ml.networks.builders.build_recurrent_end_to_end( 256, 256, 64, 'sqrt_hann', 50, 2, True, 0.3, 2, 'sigmoid', num_audio_channels=1, mask_complex=False, rnn_type='lstm', mix_key='mix_audio') model = ml.SeparationModel(config) model = model.to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=1e-3) loss_dictionary = { 'PermutationInvariantLoss': { 'args': ['SISDRLoss'], 'weight': 1.0, 'keys': {'audio': 'estimates', 'source_audio': 'targets'} } } train_closure = ml.train.closures.TrainClosure( loss_dictionary, optimizer, model) trainer, _ = ml.train.create_train_and_validation_engines( train_closure, device=DEVICE ) with tempfile.TemporaryDirectory() as tmpdir: _dir = os.path.join(fix_dir, 'dae') if fix_dir else tmpdir ml.train.add_stdout_handler(trainer) ml.train.add_validate_and_checkpoint( _dir, model, optimizer, dataset, trainer) ml.train.add_progress_bar_handler(trainer) trainer.run(dataloader, max_epochs=1, epoch_length=EPOCH_LENGTH) model_path = os.path.join( trainer.state.output_folder, 'checkpoints', 'best.model.pth') yield model_path, dataset.process_item(dataset.items[0])
val_dataloader = torch.utils.data.DataLoader(val_dataset, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, sampler=val_sampler) n_features = dataset[0]['mix_magnitude'].shape[1] # builds a baseline model with 4 recurrent layers, 600 hidden units, bidirectional config = ml.networks.builders.build_recurrent_mask_inference( n_features, 600, 4, True, 0.3, 2, ['sigmoid'], normalization_class='BatchNorm') model = ml.SeparationModel(config).to(DEVICE) logging.info(model) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=PATIENCE) # set up the loss function loss_dictionary = { 'PermutationInvariantLoss': { 'args': ['L1Loss'], 'weight': 1.0 } }
def test_overfit_a(mix_source_folder): tfms = datasets.transforms.Compose([ datasets.transforms.PhaseSensitiveSpectrumApproximation(), datasets.transforms.ToSeparationModel(), datasets.transforms.Cache('~/.nussl/tests/cache', overwrite=True), datasets.transforms.GetExcerpt(400) ]) dataset = datasets.MixSourceFolder(mix_source_folder, transform=tfms) ml.train.cache_dataset(dataset) dataset.cache_populated = True dataloader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=len(dataset), num_workers=2) # create the model, based on the first item in the dataset # second bit of the shape is the number of features n_features = dataset[0]['mix_magnitude'].shape[1] mi_config = ml.networks.builders.build_recurrent_mask_inference( n_features, 50, 1, False, 0.0, 2, 'sigmoid', ) model = ml.SeparationModel(mi_config) device = 'cuda' if torch.cuda.is_available() else 'cpu' if device == 'cuda': epoch_length = 100 else: epoch_length = 10 model = model.to(device) # create optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3) loss_dictionary = {'L1Loss': {'weight': 1.0}} train_closure = ml.train.closures.TrainClosure(loss_dictionary, optimizer, model) val_closure = ml.train.closures.ValidationClosure(loss_dictionary, model) with tempfile.TemporaryDirectory() as tmpdir: _dir = fix_dir if fix_dir else tmpdir os.makedirs(os.path.join(_dir, 'plots'), exist_ok=True) trainer, validator = ml.train.create_train_and_validation_engines( train_closure, val_closure, device=device) # add handlers to engine ml.train.add_stdout_handler(trainer, validator) ml.train.add_validate_and_checkpoint(_dir, model, optimizer, dataset, trainer, val_data=dataloader, validator=validator) ml.train.add_tensorboard_handler(_dir, trainer) # run engine trainer.run(dataloader, max_epochs=5, epoch_length=epoch_length) model_path = os.path.join(trainer.state.output_folder, 'checkpoints', 'best.model.pth') state_dict = torch.load(model_path, map_location=lambda storage, loc: storage) model.load_state_dict(state_dict['state_dict']) history = state_dict['metadata']['trainer.state.epoch_history'] for key in history: plt.figure(figsize=(10, 4)) plt.title(f"epoch:{key}") plt.plot(np.array(history[key]).reshape(-1, )) plt.savefig( os.path.join(trainer.state.output_folder, 'plots', f"epoch:{key.replace('/', ':')}.png"))
def test_create_engine(mix_source_folder): # load dataset with transforms tfms = datasets.transforms.Compose([ datasets.transforms.PhaseSensitiveSpectrumApproximation(), datasets.transforms.ToSeparationModel(), datasets.transforms.Cache(os.path.join(fix_dir, 'cache')) ]) dataset = datasets.MixSourceFolder(mix_source_folder, transform=tfms) # create the model, based on the first item in the dataset # second bit of the shape is the number of features n_features = dataset[0]['mix_magnitude'].shape[1] mi_config = ml.networks.builders.build_recurrent_mask_inference( n_features, 50, 2, True, 0.3, 2, 'softmax', ) model = ml.SeparationModel(mi_config) # create optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3) # dummy function for processing a batch through the model def train_batch(engine, data): loss = -engine.state.iteration return {'loss': loss} # building the training and validation engines and running them # the validation engine runs within the training engine run with tempfile.TemporaryDirectory() as tmpdir: _dir = fix_dir if fix_dir else tmpdir # _dir = tmpdir trainer, validator = ml.train.create_train_and_validation_engines( train_batch, train_batch) # add handlers to engine ml.train.add_stdout_handler(trainer, validator) ml.train.add_validate_and_checkpoint(_dir, model, optimizer, dataset, trainer, dataset, validator, save_by_epoch=1) ml.train.add_tensorboard_handler(_dir, trainer, every_iteration=True) ml.train.add_progress_bar_handler(trainer) # run engine trainer.run(dataset, max_epochs=3) assert os.path.exists(trainer.state.output_folder) assert os.path.exists( os.path.join(trainer.state.output_folder, 'checkpoints', 'latest.model.pth')) assert os.path.exists( os.path.join(trainer.state.output_folder, 'checkpoints', 'best.model.pth')) assert os.path.exists( os.path.join(trainer.state.output_folder, 'checkpoints', 'latest.optimizer.pth')) assert os.path.exists( os.path.join(trainer.state.output_folder, 'checkpoints', 'best.optimizer.pth')) for i in range(1, 4): assert os.path.exists( os.path.join(trainer.state.output_folder, 'checkpoints', f'epoch{i}.model.pth')) assert len(trainer.state.epoch_history['train/loss']) == 3 assert len(trainer.state.iter_history['loss']) == 10 # try resuming model_path = os.path.join(trainer.state.output_folder, 'checkpoints', 'latest.model.pth') optimizer_path = os.path.join(trainer.state.output_folder, 'checkpoints', 'latest.optimizer.pth') opt_state_dict = torch.load(optimizer_path, map_location=lambda storage, loc: storage) state_dict = torch.load(model_path, map_location=lambda storage, loc: storage) optimizer.load_state_dict(opt_state_dict) model.load_state_dict(state_dict['state_dict']) # make sure the cache got removed in saved transforms bc it's not a portable # transform for t in state_dict['metadata']['train_dataset'][ 'transforms'].transforms: assert not isinstance(t, datasets.transforms.Cache) new_trainer, new_validator = ( ml.train.create_train_and_validation_engines(train_batch)) # add handlers to engine ml.train.add_stdout_handler(new_trainer) ml.train.add_validate_and_checkpoint(trainer.state.output_folder, model, optimizer, dataset, new_trainer) ml.train.add_tensorboard_handler(trainer.state.output_folder, new_trainer) new_trainer.load_state_dict( state_dict['metadata']['trainer.state_dict']) assert new_trainer.state.epoch == trainer.state.epoch new_trainer.run(dataset, max_epochs=3)
def test_trainer_data_parallel(mix_source_folder): # load dataset with transforms tfms = datasets.transforms.Compose([ datasets.transforms.PhaseSensitiveSpectrumApproximation(), datasets.transforms.ToSeparationModel() ]) dataset = datasets.MixSourceFolder(mix_source_folder, transform=tfms) # create the model, based on the first item in the dataset # second bit of the shape is the number of features n_features = dataset[0]['mix_magnitude'].shape[1] mi_config = ml.networks.builders.build_recurrent_mask_inference( n_features, 50, 2, True, 0.3, 2, 'softmax', ) model = ml.SeparationModel(mi_config) model = torch.nn.DataParallel(model) # create optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3) # dummy function for processing a batch through the model def train_batch(engine, data): loss = np.random.rand() return {'loss': loss} # building the training and validation engines and running them # the validation engine runs within the training engine run with tempfile.TemporaryDirectory() as tmpdir: _dir = fix_dir if fix_dir else tmpdir trainer, validator = ml.train.create_train_and_validation_engines( train_batch, train_batch) # add handlers to engine ml.train.add_stdout_handler(trainer, validator) ml.train.add_validate_and_checkpoint(_dir, model, optimizer, dataset, trainer, dataset, validator) ml.train.add_tensorboard_handler(_dir, trainer) # run engine trainer.run(dataset, max_epochs=3) assert os.path.exists(trainer.state.output_folder) assert os.path.exists( os.path.join(trainer.state.output_folder, 'checkpoints', 'latest.model.pth')) assert os.path.exists( os.path.join(trainer.state.output_folder, 'checkpoints', 'best.model.pth')) assert os.path.exists( os.path.join(trainer.state.output_folder, 'checkpoints', 'latest.optimizer.pth')) assert os.path.exists( os.path.join(trainer.state.output_folder, 'checkpoints', 'best.optimizer.pth')) assert len(trainer.state.epoch_history['train/loss']) == 3 assert len(trainer.state.iter_history['loss']) == 10
def test_gradients(mix_source_folder): os.makedirs('tests/local/', exist_ok=True) utils.seed(0) tfms = datasets.transforms.Compose([ datasets.transforms.GetAudio(), datasets.transforms.PhaseSensitiveSpectrumApproximation(), datasets.transforms.MagnitudeWeights(), datasets.transforms.ToSeparationModel(), datasets.transforms.GetExcerpt(50), datasets.transforms.GetExcerpt(3136, time_dim=1, tf_keys=['mix_audio', 'source_audio']) ]) dataset = datasets.MixSourceFolder(mix_source_folder, transform=tfms) # create the model, based on the first item in the dataset # second bit of the shape is the number of features n_features = dataset[0]['mix_magnitude'].shape[1] # make some configs names = [ 'dpcl', 'mask_inference_l1', 'mask_inference_mse_loss', 'chimera', 'open_unmix', 'end_to_end', 'dual_path' ] config_has_batch_norm = ['open_unmix', 'dual_path'] configs = [ ml.networks.builders.build_recurrent_dpcl( n_features, 50, 1, True, 0.0, 20, ['sigmoid'], normalization_class='InstanceNorm'), ml.networks.builders.build_recurrent_mask_inference( n_features, 50, 1, True, 0.0, 2, ['softmax'], normalization_class='InstanceNorm'), ml.networks.builders.build_recurrent_mask_inference( n_features, 50, 1, True, 0.0, 2, ['softmax'], normalization_class='InstanceNorm'), ml.networks.builders.build_recurrent_chimera( n_features, 50, 1, True, 0.0, 20, ['sigmoid'], 2, ['softmax'], normalization_class='InstanceNorm'), ml.networks.builders.build_open_unmix_like( n_features, 50, 1, True, .4, 2, 1, add_embedding=True, embedding_size=20, embedding_activation=['sigmoid', 'unit_norm'], ), ml.networks.builders.build_recurrent_end_to_end( 256, 256, 64, 'sqrt_hann', 50, 2, True, 0.0, 2, 'sigmoid', num_audio_channels=1, mask_complex=False, rnn_type='lstm', mix_key='mix_audio', normalization_class='InstanceNorm'), ml.networks.builders.build_dual_path_recurrent_end_to_end( 64, 16, 8, 60, 30, 50, 2, True, 25, 2, 'sigmoid', ) ] loss_dictionaries = [ { 'DeepClusteringLoss': { 'weight': 1.0 } }, { 'L1Loss': { 'weight': 1.0 } }, { 'MSELoss': { 'weight': 1.0 } }, { 'DeepClusteringLoss': { 'weight': 0.2 }, 'PermutationInvariantLoss': { 'args': ['L1Loss'], 'weight': 0.8 } }, { 'DeepClusteringLoss': { 'weight': 0.2 }, 'PermutationInvariantLoss': { 'args': ['L1Loss'], 'weight': 0.8 } }, { 'SISDRLoss': { 'weight': 1.0, 'keys': { 'audio': 'estimates', 'source_audio': 'references' } } }, { 'SISDRLoss': { 'weight': 1.0, 'keys': { 'audio': 'estimates', 'source_audio': 'references' } } }, ] def append_keys_to_model(name, model): if name == 'end_to_end': model.output_keys.extend( ['audio', 'recurrent_stack', 'mask', 'estimates']) elif name == 'dual_path': model.output_keys.extend( ['audio', 'mixture_weights', 'dual_path', 'mask', 'estimates']) for name, config, loss_dictionary in zip(names, configs, loss_dictionaries): loss_closure = ml.train.closures.Closure(loss_dictionary) utils.seed(0, set_cudnn=True) model_grad = ml.SeparationModel(config, verbose=True).to(DEVICE) append_keys_to_model(name, model_grad) all_data = {} for data in dataset: for key in data: if torch.is_tensor(data[key]): data[key] = data[key].float().unsqueeze(0).contiguous().to( DEVICE) if key not in all_data: all_data[key] = data[key] else: all_data[key] = torch.cat([all_data[key], data[key]], dim=0) # do a forward pass in batched mode output_grad = model_grad(all_data) _loss = loss_closure.compute_loss(output_grad, all_data) # do a backward pass in batched mode _loss['loss'].backward() plt.figure(figsize=(10, 10)) utils.visualize_gradient_flow(model_grad.named_parameters()) plt.tight_layout() plt.savefig(f'tests/local/{name}:batch_gradient.png') utils.seed(0, set_cudnn=True) model_acc = ml.SeparationModel(config).to(DEVICE) append_keys_to_model(name, model_acc) for i, data in enumerate(dataset): for key in data: if torch.is_tensor(data[key]): data[key] = data[key].float().unsqueeze(0).contiguous().to( DEVICE) # do a forward pass on each item individually output_acc = model_acc(data) for key in output_acc: # make sure the forward pass in batch and forward pass individually match # if they don't, then items in a minibatch are talking to each other # somehow... _data_a = output_acc[key] _data_b = output_grad[key][i].unsqueeze(0) if name not in config_has_batch_norm: assert torch.allclose(_data_a, _data_b, atol=1e-3) _loss = loss_closure.compute_loss(output_acc, data) # do a backward pass on each item individually _loss['loss'] = _loss['loss'] / len(dataset) _loss['loss'].backward() plt.figure(figsize=(10, 10)) utils.visualize_gradient_flow(model_acc.named_parameters()) plt.tight_layout() plt.savefig(f'tests/local/{name}:accumulated_gradient.png') # make sure the gradients match between batched and accumulated gradients # if they don't, then the items in a batch are talking to each other in the loss for param1, param2 in zip(model_grad.parameters(), model_acc.parameters()): assert torch.allclose(param1, param2) if name not in config_has_batch_norm: if param1.requires_grad and param2.requires_grad: assert torch.allclose(param1.grad.mean(), param2.grad.mean(), atol=1e-3)