def get_embeddings(self): X_train, y_train, speaker_train_names = load( get_speaker_pickle(self.get_validation_data_name() + '_train_mfcc')) X_test, y_test, speaker_test_names = load( get_speaker_pickle(self.get_validation_data_name() + '_test_mfcc')) model = load(get_experiment_nets(self.name)) set_of_embeddings = [] set_of_speakers = [] set_of_num_embeddings = [] train_outputs = self.generate_outputs(X_train, model) test_outputs = self.generate_outputs(X_test, model) set_of_times = [np.zeros((len(y_test) + len(y_train)), dtype=int)] outputs, y_list, s_list = create_data_lists(False, train_outputs, test_outputs, y_train, y_test) embeddings, speakers, number_embeddings = generate_embeddings( outputs, y_list, len(model)) set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) set_of_num_embeddings.append(number_embeddings) checkpoints = [self.network_file] return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_times
def train_network(self): mixture_count = self.config.getint('gmm', 'mixturecount') X, y, speaker_names = load( get_speaker_pickle(self.config.get('train', 'pickle') + '_mfcc')) model = [] for i in range(len(X)): features = X[i] gmm = mixture.GaussianMixture(n_components=mixture_count, covariance_type='diag', n_init=1) gmm.fit(features.transpose()) speaker = {'mfccs': features, 'gmm': gmm} model.append(speaker) save(model, get_experiment_nets(self.name))
def create_embeddings(config, checkpoints, x_list, y_list, out_layer=7, seg_size=100): # Prepare return value set_of_embeddings = [] set_of_speakers = [] set_of_num_embeddings = [] set_of_total_times = [] # Values out of the loop metrics = ['accuracy'] loss = get_loss(config) custom_objects = get_custom_objects(config) optimizer = 'adadelta' for checkpoint in checkpoints: logger.info('Run checkpoint: ' + checkpoint) # Load and compile the trained network network_file = get_experiment_nets(checkpoint) model_full = load_model(network_file, custom_objects=custom_objects) model_full.compile(loss=loss, optimizer=optimizer, metrics=metrics) # Get a Model with the embedding layer as output and predict model_partial = Model(inputs=model_full.input, outputs=model_full.layers[out_layer].output) x_cluster_list = [] y_cluster_list = [] for x, y in zip(x_list, y_list): x_cluster = np.asarray(model_partial.predict(x)) x_cluster_list.append(x_cluster) y_cluster_list.append(y) embeddings, speakers, num_embeddings = \ generate_embeddings(x_cluster_list, y_cluster_list, x_cluster_list[0].shape[1]) # Fill return values set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) set_of_num_embeddings.append(num_embeddings) # Calculate the time per utterance time = TimeCalculator.calc_time_all_utterances(y_cluster_list, seg_size) set_of_total_times.append(time) return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_total_times
def train_network(self): # Get settings n_epochs = self.config.getint('pairwise_kldiv', 'n_epochs') batch_size = self.config.getint('pairwise_kldiv', 'batch_size') epoch_batches = self.config.getint('pairwise_kldiv', 'epoch_batches') train_filename = self.config.get('train', 'pickle') n_speakers = self.config.getint('train', 'n_speakers') # Create network, load path to input and output file network = create_network_n_speakers(n_speakers, self.config) train_file = get_speaker_pickle(train_filename) net_file = get_experiment_nets(self.checkpoints[0]) train_network(network=network, train_file=train_file, network_file_out=net_file, data_generator=self.dg, num_epochs=n_epochs, batch_size=batch_size, epoch_batches=epoch_batches)