def test_split_return_values_torch(self): seq_length_output = numpy.array([10, 5]) output = torch.ones(seq_length_output.max(), 2, 4) with unittest.mock.patch.object(ModularTrainer.logger, "error") as mock_logger: with self.assertRaises(TypeError): ModularTrainer._split_return_values(output, seq_length_output, None, False) mock_logger.assert_called_with( "No best model exists yet. Continue with the current one.")
def create_hparams(hparams_string=None, verbose=False): """ Create model hyper parameter container. Parse non default from given string. """ hparams = ModularTrainer.create_hparams(hparams_string, verbose=False) hparams.add_hparams( num_questions=None, question_file=None, # Used to add labels in plot. num_coded_sps=60, num_baps=1, load_sp=True, load_lf0=True, load_vuv=True, load_bap=True, sp_type="mcep", add_deltas=True, synth_load_org_sp=False, synth_load_org_lf0=False, synth_load_org_vuv=False, synth_load_org_bap=False, # More available metrics in the Metrics class. metrics=[ Metrics.MCD, Metrics.F0_RMSE, Metrics.VDE, Metrics.BAP_distortion ]) if verbose: logging.info(hparams.get_debug_string()) return hparams
def test_legacy_string_conversion(self): hparams = ModularTrainer.create_hparams() num_emb = 3 emb_dim = 12 in_dim = 43 # Includes embedding index. out_dim = 12 hparams.add_hparam("f_get_emb_index", [self._f_get_emb_index]) hparams.model_type = "RNNDYN-{}x{}_EMB_(0, 3, 5)-2_RELU_128-1_Batch" \ "Norm1dConv1d_18_3-1_BiLSTM_32-1_RNNTANH_8-1_FC_{}".format( num_emb, emb_dim, out_dim) model = rnn_dyn.convert_legacy_to_config( in_dim=(in_dim, ), hparams=hparams).create_model() self.assertEqual(torch.Size([128, 42 + emb_dim]), model[0][0].weight.shape) self.assertEqual(torch.Size([128, 128]), model[0][2].weight.shape) self.assertEqual(nn.BatchNorm1d, type(model[2][0])) self.assertEqual(torch.Size([4 * 32, 18 + emb_dim]), model[3].module.weight_ih_l0.shape) self.assertEqual('RNN_TANH', model[4].module.mode) self.assertEqual(torch.Size([12, 8 + emb_dim]), model[5][0].weight.shape) seq_length = torch.tensor((100, 75), dtype=torch.long) batch_size = 2 test_input = torch.ones([seq_length[0], batch_size, in_dim]) model.init_hidden(batch_size) output = model(test_input, seq_lengths_input=seq_length, max_length_inputs=seq_length[0]) self.assertEqual(torch.Size([seq_length[0], batch_size, out_dim]), output[0].shape)
def test_embeddings_everywhere(self): hparams = ModularTrainer.create_hparams() num_emb = 3 emb_dim = 12 in_dim = 43 out_dim = 12 hparams.add_hparam("f_get_emb_index", [self._f_get_emb_index]) hparams.model_type = "RNNDYN-{}x{}_EMB_(-1)-3_RELU_128-2_BiLSTM_32-1_FC_12".format( num_emb, emb_dim) model = rnn_dyn.convert_legacy_to_config( in_dim=(in_dim, ), hparams=hparams).create_model() self.assertEqual(1, len(model.emb_groups)) self.assertEqual(torch.Size([num_emb, emb_dim]), model.emb_groups["0"].weight.shape) self.assertEqual(torch.Size([128, in_dim - 1 + emb_dim]), model[0][0].weight.shape) self.assertEqual(torch.Size([12, 64 + emb_dim]), model[2][0].weight.shape) self.assertEqual(torch.Size([32 * 4, 128 + emb_dim]), model[1].weight_ih_l0.shape) self.assertEqual(torch.Size([32 * 4, 32 * 2]), model[1].weight_ih_l1_reverse.shape) pass
def test_input_to_str_list(self): # Tuple input but elements are not strings. out = ModularTrainer._input_to_str_list((121, 122)) self.assertEqual(["121", "122"], out) # Valid path to file id list. out = ModularTrainer._input_to_str_list( os.path.join("integration", "fixtures", "file_id_list.txt")) self.assertEqual(TestModularTrainer._get_id_list(), out) # Single input id. out = ModularTrainer._input_to_str_list("121") self.assertEqual(["121"], out) # Wrong input. with self.assertRaises(ValueError): ModularTrainer._input_to_str_list(numpy.array([1, 2]))
def create_hparams(hparams_string: os.PathLike = None, verbose: bool = False): hparams = ModularTrainer.create_hparams(hparams_string=hparams_string, verbose=verbose) hparams.add_hparams(class_pred_name="class_pred", class_true_name="class_true", num_classes=-1, class_names=None) return hparams
def test_nonlins(self): hparams = ModularTrainer.create_hparams() in_dim = 42 out_dim = 12 # hparams.model_type = "RNNDYN-1_FC_16-1_LIN_18-1_linear_20-1_RELU_22-1_TANH_24-1_FC_{}".format(out_dim) model_config = rnn_dyn.Config( in_dim=in_dim, batch_first=True, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=16), rnn_dyn.Config.LayerConfig(layer_type="LIN", out_dim=18), rnn_dyn.Config.LayerConfig(layer_type="linear", out_dim=20), rnn_dyn.Config.LayerConfig(layer_type="Linear", num_layers=2, out_dim=22, nonlin="ReLU"), rnn_dyn.Config.LayerConfig(layer_type="Linear", out_dim=22), rnn_dyn.Config.LayerConfig(layer_type="SELU", inplace=True), rnn_dyn.Config.LayerConfig(layer_type="Linear", out_dim=out_dim), rnn_dyn.Config.LayerConfig(layer_type="Conv1d", kernel_size=5, nonlin="ReLU", out_dim=out_dim) ], hparams=hparams) model = model_config.create_model() # print(list(model.modules())) # model = ModelFactory.create(hparams.model_type, (in_dim,), out_dim, hparams) for layer_idx in range(3): num_sublayers = len(model[layer_idx].module) if num_sublayers > 1: self.assertEqual( 1, num_sublayers, "Layer {} should not have a non linearity but has {}.". format(layer_idx, type(model[layer_idx].module[1]))) seq_layer = model[3].module self.assertEqual( torch.nn.ReLU, type(seq_layer[1]), "Layer {} should have a non-linearity {} but has {}.".format( 3, torch.nn.ReLU, type(seq_layer[1]))) self.assertEqual( torch.nn.ReLU, type(seq_layer[3]), "Layer {} should have a non-linearity {} but has {}.".format( 3, torch.nn.ReLU, type(seq_layer[1]))) layer = model[5].module[0] self.assertEqual( torch.nn.SELU, type(layer), "Layer {} should be {} but is {}.".format(5, torch.nn.SELU, type(layer))) seq_layer = model[7].module self.assertEqual( torch.nn.ReLU, type(seq_layer[1]), "Layer {} should have a non-linearity {} but has {}.".format( 3, torch.nn.ReLU, type(seq_layer[1])))
def test_conv1d(self): hparams = ModularTrainer.create_hparams() in_dim = 40 out_dim = 12 hparams.model_type = "RNNDYN-" + "-".join( ["1_BatchNorm1dConv1d_128_5"] * 2) + "-1_BiLSTM_8-1_FC_12" model = rnn_dyn.convert_legacy_to_config( in_dim=in_dim, hparams=hparams).create_model() # ModelFactory.create(hparams.model_type, (in_dim,), out_dim, hparams) self.assertEqual(in_dim, model[0][0].in_channels) self.assertEqual(128, model[0][0].out_channels) self.assertEqual((5, ), model[0][0].kernel_size) for idx in range(1, 4, 2): # Test for batch norm after each layer. self.assertEqual(torch.nn.BatchNorm1d, type(model[idx][0])) seq_length = torch.tensor((100, 75), dtype=torch.long) batch_size = 2 test_input = torch.ones([seq_length[0], batch_size, in_dim]) model.init_hidden(batch_size) output = model(test_input, seq_lengths_input=seq_length, max_length_inputs=seq_length[0]) self.assertEqual(torch.Size([seq_length[0], batch_size, out_dim]), output[0].shape) hparams.model_type = "RNNDYN-2_Conv1d_128_5x1-1_FC_12" model = rnn_dyn.convert_legacy_to_config( in_dim=in_dim, hparams=hparams).create_model() self.assertEqual((5, 1), model[0][0].kernel_size) hparams.model_type = "RNNDYN-2_Conv1d_128_5x1_s2_p5_d3_g4-1_FC_12" model = rnn_dyn.convert_legacy_to_config( in_dim=in_dim, hparams=hparams).create_model() self.assertEqual((2, ), model[0][0].stride) self.assertEqual((5, ), model[0][0].padding) self.assertEqual((3, ), model[0][0].dilation) self.assertEqual(4, model[0][0].groups) hparams.model_type = "RNNDYN-2_Conv1d_64_3_p0_s2" model = rnn_dyn.convert_legacy_to_config( in_dim=in_dim, hparams=hparams).create_model() model.init_hidden(batch_size) output, kwargs = model(test_input, seq_lengths_input=seq_length, max_length_inputs=seq_length[0]) def new_lengths(x): return (x - 3) // 2 + 1 expected_seq_lengths = new_lengths(new_lengths(seq_length)) expected_max_length = new_lengths(new_lengths(seq_length.max())) self.assertTrue( (expected_seq_lengths == kwargs["seq_lengths_input"]).all()) self.assertTrue( (expected_max_length == kwargs["max_length_inputs"]).all())
def create_hparams(hparams_string=None, verbose=False): hparams = ModularTrainer.create_hparams(hparams_string, verbose=False) hparams.add_hparams( # exclude_begin_and_end_silence=False, # htk_min_phoneme_length=50000, # phoneme_label_type="HTK full", # Specifies the format in which the .lab files are stored. # # Refer to PhonemeLabelGen.load_sample for a list of types. metrics=[Metrics.Dur_RMSE, Metrics.Dur_pearson]) if verbose: logging.info(hparams.get_debug_string()) return hparams
def plot_mgc(plotter: DataPlotter, plotter_config: DataPlotter.Config, grid_indices: List[int], id_name: str, features: np.ndarray, synth_fs: int, spec_slice: slice = None, labels: Tuple[str, str] = (None, None), xlim: Union[str, Tuple[float, float]] = (None, None), ylim: Union[str, Tuple[float, float]] = (None, None), *args, **kwargs): import librosa amp_sp = np.absolute(AudioProcessing.mcep_to_amp_sp( features, synth_fs)) amp_sp_db = librosa.amplitude_to_db(amp_sp, top_db=None) ModularTrainer.plot_specshow(plotter, plotter_config, grid_indices, id_name, amp_sp_db, spec_slice, labels, xlim, ylim, *args, **kwargs)
def test_save_load_equality(self): hparams = ModularTrainer.create_hparams() hparams.optimiser_type = "Adam" hparams.optimiser_args["lr"] = 0.1 # Add function name to path. out_dir = os.path.join(self.out_dir, "test_save_load_equality") model_path = os.path.join(out_dir, "test_model") # Create a new model, run the optimiser once to obtain a state, and save everything. in_dim, out_dim = 10, 4 total_epochs = 10 model_handler = ModularModelHandlerPyTorch() model_handler.model = rnn_dyn.Config(in_dim=in_dim, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="Linear", out_dim=out_dim) ]).create_model() model_handler.set_optimiser(hparams) seq_length = torch.tensor((10, 7), dtype=torch.long) batch_size = 2 test_input = torch.ones([seq_length[0], batch_size, in_dim]) model_handler.model.init_hidden(batch_size) output = model_handler.model(test_input, seq_lengths_input=seq_length, max_length_inputs=seq_length.max())[0] output.mean().backward() model_handler.optimiser.step() model_handler.save_checkpoint(epoch=total_epochs, model_path=model_path) # Create a new model handler and test load save. model_handler_copy = ModularModelHandlerPyTorch() model_handler_copy.load_checkpoint( hparams, model_path=model_path, load_optimiser=True, epoch=total_epochs, verbose=False) zip_params = zip(model_handler.model.parameters(), model_handler_copy.model.parameters()) self.assertTrue(all([(x == x_copy).all() for x, x_copy in zip_params]), "Loaded and saved models are not the same.") current_opt_state = model_handler.optimiser.state_dict()["state"] copy_opt_state = model_handler_copy.optimiser.state_dict()["state"] self.assertTrue(equal_iterable(current_opt_state, copy_opt_state), "Loaded and saved optimisers are not the same.") shutil.rmtree(out_dir)
def test_split_return_values(self): seq_length_output = numpy.array([10, 6, 8]) batch_size = 3 feature_dim = 50 output = numpy.empty( (seq_length_output.max(), batch_size, feature_dim)) hidden1 = numpy.empty((seq_length_output.max(), batch_size, 2)) hidden2 = numpy.empty((seq_length_output.max(), batch_size, 4)) for idx in range(batch_size): output[:, idx] = idx hidden1[:, idx] = idx * 10 hidden2[:, idx] = idx * 100 hidden = (hidden1, hidden2) batch = (output, hidden) split_batch = ModularTrainer._split_return_values( batch, seq_length_output, None, False) for idx in range(batch_size): b = split_batch[idx] out = b[0] h = b[1] h1 = h[0] h2 = h[1] self.assertTrue( (out == idx).all(), msg= "Output of batch {} is wrong, expected was all values being {}." .format(idx, idx)) self.assertTrue( (h1 == idx * 10).all(), msg= "Hidden1 of batch {} is wrong, expected was all values being {}." .format(idx, idx * 10)) self.assertTrue( (h2 == idx * 100).all(), msg= "Hidden2 of batch {} is wrong, expected was all values being {}." .format(idx, idx * 100))
def create_hparams(hparams_string=None, verbose=False): """Create model hyper-parameters. Parse non-default from given string.""" hparams = ModularTrainer.create_hparams(hparams_string, verbose=False) hparams.synth_vocoder = "raw" hparams.add_hparams( batch_first=True, frame_rate_output_Hz=16000, bit_depth=16, silence_threshold_quantized= None, # Beginning and end of audio below the threshold are trimmed. teacher_forcing_in_test=True, ema_decay=0.9999, mu=255, # Model parameters. input_type=WaveNetWrapper.Config.INPUT_TYPE_MULAW, # hinge_regularizer=True, # Only used in MoL prediction (input_type="raw"). # log_scale_min=float(np.log(1e-14)), # Only used for mixture of logistic distributions. # quantize_channels=256 ) # 256 for input type mulaw-quantize, otherwise 65536 # if hparams.input_type == "mulaw-quantize": # hparams.add_hparam("out_channels", hparams.quantize_channels) # else: # hparams.add_hparam("out_channels", 10 * 3) # num_mixtures * 3 (pi, mean, log_scale) hparams.add_hparams( # layers=24, # 20 # stacks=4, # 2 # residual_channels=512, # gate_channels=512, # skip_out_channels=256, # dropout=1 - 0.95, # kernel_size=3, # weight_normalization=True, use_cond=True, # Determines if conditioning is used. # cin_channels=63, # upsample_conditional_features=False, # upsample_scales=[ # 5, # 4, # 2 # ] ) if hparams.has_value("upsample_conditional_features"): hparams.len_in_out_multiplier = reduce(mul, hparams.upsample_scales, 1) else: hparams.len_in_out_multiplier = 1 hparams.add_hparams( # freq_axis_kernel_size=3, # gin_channels=-1, # n_speakers=1, # use_speaker_embedding=False, sp_type="mfbanks", load_sp=True, load_lf0=False, load_vuv=False, load_bap=False) if verbose: logging.info(hparams.get_debug_string()) return hparams
def test_embeddings(self): hparams = ModularTrainer.create_hparams() num_emb = 3 emb_dim = 12 in_dim = 42 # Contains the embedding index. out_dim = 12 model_config = rnn_dyn.Config( in_dim=in_dim, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=128, num_layers=2, nonlin="relu"), rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=128, num_layers=3, nonlin="tanh"), rnn_dyn.Config.LayerConfig(layer_type="LSTM", out_dim=32, num_layers=3, bidirectional=True), rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=out_dim) ], emb_configs=[ rnn_dyn.Config.EmbeddingConfig( embedding_dim=emb_dim, name="emb1", num_embedding=num_emb, affected_layer_group_indices=(0, 2, 3)) ]) model = model_config.create_model() hparams.add_hparam("f_get_emb_index", [self._f_get_emb_index]) self.assertEqual(1, len(model.emb_groups)) self.assertEqual(torch.Size([num_emb, emb_dim]), model.emb_groups["emb1"].weight.shape) self.assertEqual(torch.Size([128, in_dim + emb_dim]), model[0][0].weight.shape) self.assertEqual(torch.Size([128, 128]), model[0][2].weight.shape) self.assertEqual(torch.Size([128, 128]), model[1][0].weight.shape) self.assertEqual(torch.nn.Tanh, type(model[1][1])) self.assertEqual(torch.Size([32 * 4, 128 + emb_dim]), model[2].weight_ih_l0.shape) self.assertEqual(torch.Size([32 * 4, 32 * 2]), model[2].weight_ih_l2_reverse.shape) seq_length = torch.tensor((100, 75), dtype=torch.long) batch_size = 2 test_input = torch.ones([batch_size, seq_length[0], in_dim]) test_input_emb = torch.ones([batch_size, seq_length[0], 1]) model.init_hidden(batch_size) output = model(test_input, test_input_emb, seq_lengths_input=seq_length, max_length_inputs=seq_length[0]) self.assertEqual(torch.Size([batch_size, seq_length[0], out_dim]), output[0].shape) seq_length = torch.tensor((100, ), dtype=torch.long) batch_size = 1 test_input = torch.ones([batch_size, seq_length[0], in_dim]) test_input_emb = torch.ones([batch_size, seq_length[0], 1]) model.init_hidden(batch_size) output = model(test_input, test_input_emb, seq_lengths_input=seq_length, max_length_inputs=seq_length[0]) self.assertEqual(torch.Size([batch_size, seq_length[0], out_dim]), output[0].shape)