def get_embeddings(self): X_train, y_train, speaker_train_names = load( get_speaker_pickle(self.get_validation_data_name() + '_train_mfcc')) X_test, y_test, speaker_test_names = load( get_speaker_pickle(self.get_validation_data_name() + '_test_mfcc')) model = load(get_experiment_nets(self.name)) set_of_embeddings = [] set_of_speakers = [] set_of_num_embeddings = [] train_outputs = self.generate_outputs(X_train, model) test_outputs = self.generate_outputs(X_test, model) set_of_times = [np.zeros((len(y_test) + len(y_train)), dtype=int)] outputs, y_list, s_list = create_data_lists(False, train_outputs, test_outputs, y_train, y_test) embeddings, speakers, number_embeddings = generate_embeddings( outputs, y_list, len(model)) set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) set_of_num_embeddings.append(number_embeddings) checkpoints = [self.network_file] return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_times
def create_embeddings(self, X_train, y_train, X_test, y_test): short_utterance = self.config.getboolean('validation', 'short_utterances') x_list, y_list, _ = create_data_lists(short_utterance, X_train, X_test, y_train, y_test) x_cluster_list = [] y_cluster_list = [] for x_data, y_data in zip(x_list, y_list): x_cluster, y_cluster = self._generate_cluster_data(x_data, y_data) x_cluster_list.append(x_cluster) y_cluster_list.append(y_cluster) # Load the network and add Batchiterator net = load(self.net_path) net.batch_iterator_test = BatchIterator(batch_size=128) # Predict the output # predict = prepare_predict(net) # output_train = predict(x_train_cluster) # output_test = predict(x_test_cluster) outputs = [None] * len(x_cluster_list) for i, x_cluster in enumerate(x_cluster_list): outputs[i] = net.predict_proba(x_cluster) embeddings, speakers, number_embeddings =\ generate_embeddings(outputs, y_cluster_list, outputs[0].shape[1]) #Calculate the time per utterance time = TimeCalculator.calc_time_all_utterances( y_cluster_list, self.config.getint('luvo', 'seg_size')) return embeddings, speakers, number_embeddings, time
def get_embeddings(self, cluster_count): logger = get_logger('kldiv', logging.INFO) logger.info('Run pairwise_kldiv') checkpoints = self.checkpoints train_data_file = self.get_validation_train_data() test_data_file = self.get_validation_test_data() # Prepare return value set_of_embeddings = [] set_of_speakers = [] set_of_num_embeddings = [] for checkpoint in checkpoints: logger.info('Run checkpoint: ' + checkpoint) network_file = get_experiment_nets(checkpoint) X_train, y_train, \ X_test, y_test = run_analysis_network(network_file, train_data_file, test_data_file) embeddings, speakers, num_embeddings = generate_embeddings( X_train, X_test, y_train, y_test, X_train.shape[1]) # Fill return values set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) set_of_num_embeddings.append(num_embeddings) return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings
def get_embeddings(self, out_layer, seg_size, vec_size): logger = get_logger('lstm', logging.INFO) logger.info('Run pairwise_lstm test') logger.info('out_layer -> ' + str(self.out_layer)) logger.info('seg_size -> ' + str(self.seg_size)) logger.info('vec_size -> ' + str(self.vec_size)) # Load and prepare train/test data x_test, speakers_test = load_and_prepare_data( self.get_validation_test_data(), self.seg_size) x_train, speakers_train = load_and_prepare_data( self.get_validation_train_data(), self.seg_size) # Prepare return values set_of_embeddings = [] set_of_speakers = [] speaker_numbers = [] checkpoints = list_all_files(get_experiment_nets(), "*pairwise_lstm*.h5") # Values out of the loop metrics = [ 'accuracy', 'categorical_accuracy', ] loss = pairwise_kl_divergence custom_objects = {'pairwise_kl_divergence': pairwise_kl_divergence} optimizer = 'rmsprop' vector_size = self.vec_size # Fill return values for checkpoint in checkpoints: logger.info('Running checkpoint: ' + checkpoint) # Load and compile the trained network network_file = get_experiment_nets(checkpoint) model_full = load_model(network_file, custom_objects=custom_objects) model_full.compile(loss=loss, optimizer=optimizer, metrics=metrics) # Get a Model with the embedding layer as output and predict model_partial = Model( inputs=model_full.input, outputs=model_full.layers[self.out_layer].output) test_output = np.asarray(model_partial.predict(x_test)) train_output = np.asarray(model_partial.predict(x_train)) logger.info('test_output len -> ' + str(test_output.shape)) logger.info('train_output len -> ' + str(train_output.shape)) embeddings, speakers, num_embeddings = generate_embeddings( train_output, test_output, speakers_train, speakers_test, vector_size) # Fill the embeddings and speakers into the arrays set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) speaker_numbers.append(num_embeddings) logger.info('Pairwise_lstm test done.') return checkpoints, set_of_embeddings, set_of_speakers, speaker_numbers
def get_embeddings(self): short_utterance = self.config.getboolean('validation', 'short_utterances') logger = get_logger('kldiv', logging.INFO) logger.info('Run pairwise_kldiv') checkpoints = self.checkpoints X_train, y_train, s_list_train = load_test_data( self.get_validation_train_data()) X_test, y_test, s_list_test = load_test_data( self.get_validation_test_data()) x_list, y_list, s_list = create_data_lists(short_utterance, X_train, X_test, y_train, y_test, s_list_train, s_list_test) # Prepare return value set_of_embeddings = [] set_of_speakers = [] set_of_num_embeddings = [] set_of_total_times = [] for checkpoint in checkpoints: logger.info('Run checkpoint: ' + checkpoint) network_file = get_experiment_nets(checkpoint) x_cluster_list = [] y_cluster_list = [] for x, y, s in zip(x_list, y_list, s_list): x_cluster, y_cluster = run_analysis_network( network_file, x, y, s) x_cluster_list.append(x_cluster) y_cluster_list.append(y_cluster) embeddings, speakers, num_embeddings =\ generate_embeddings(x_cluster_list, y_cluster_list, x_cluster_list[0].shape[1]) # Fill return values set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) set_of_num_embeddings.append(num_embeddings) # Calculate the time per utterance time = TimeCalculator.calc_time_all_utterances( y_cluster_list, config.getint('pairwise_kldiv', 'seg_size')) set_of_total_times.append(time) return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_total_times
def create_embeddings(self, train_data, test_data): x_train_cluster, y_train_cluster = load_and_prepare_data(train_data) x_test_cluster, y_test_cluster = load_and_prepare_data(test_data) # Load the network and add Batchiterator net = load(self.net_path) net.batch_iterator_test = BatchIterator(batch_size=128) # Predict the output # predict = prepare_predict(net) # output_train = predict(x_train_cluster) # output_test = predict(x_test_cluster) output_train = net.predict_proba(x_train_cluster) output_test = net.predict_proba(x_test_cluster) return generate_embeddings(output_train, output_test, y_train_cluster, y_test_cluster, output_train.shape[1])
def create_embeddings(config, checkpoints, x_list, y_list, out_layer=7, seg_size=100): # Prepare return value set_of_embeddings = [] set_of_speakers = [] set_of_num_embeddings = [] set_of_total_times = [] # Values out of the loop metrics = ['accuracy'] loss = get_loss(config) custom_objects = get_custom_objects(config) optimizer = 'adadelta' for checkpoint in checkpoints: logger.info('Run checkpoint: ' + checkpoint) # Load and compile the trained network network_file = get_experiment_nets(checkpoint) model_full = load_model(network_file, custom_objects=custom_objects) model_full.compile(loss=loss, optimizer=optimizer, metrics=metrics) # Get a Model with the embedding layer as output and predict model_partial = Model(inputs=model_full.input, outputs=model_full.layers[out_layer].output) x_cluster_list = [] y_cluster_list = [] for x, y in zip(x_list, y_list): x_cluster = np.asarray(model_partial.predict(x)) x_cluster_list.append(x_cluster) y_cluster_list.append(y) embeddings, speakers, num_embeddings = \ generate_embeddings(x_cluster_list, y_cluster_list, x_cluster_list[0].shape[1]) # Fill return values set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) set_of_num_embeddings.append(num_embeddings) # Calculate the time per utterance time = TimeCalculator.calc_time_all_utterances(y_cluster_list, seg_size) set_of_total_times.append(time) return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_total_times
def create_embeddings(self, X_train, y_train, X_test, y_test): seg_size = self.config.getint('luvo', 'seg_size') short_utterance = self.config.getboolean('validation', 'short_utterances') x_train, speakers_train = prepare_data(X_train, y_train, seg_size) x_test, speakers_test = prepare_data(X_test, y_test, seg_size) x_list, y_list, _ = create_data_lists(short_utterance, x_train, x_test, speakers_train, speakers_test) # Load the network and add Batchiterator network_file = get_experiment_nets(self.network_name + ".h5") model_full = load_model(network_file) model_full.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) # Get a Model with the embedding layer as output and predict model_partial = Model(inputs=model_full.input, outputs=model_full.layers[self.config.getint( 'luvo', 'out_layer')].output) x_cluster_list = [] y_cluster_list = [] for x_data, y_data in zip(x_list, y_list): print(x_data.shape) x_cluster = np.asarray(model_partial.predict(x_data)) x_cluster_list.append(x_cluster) y_cluster_list.append(y_data) embeddings, speakers, num_embeddings = generate_embeddings( x_cluster_list, y_cluster_list, x_cluster_list[0].shape[1]) # Calculate the time per utterance time = TimeCalculator.calc_time_all_utterances( y_cluster_list, self.config.getint('luvo', 'seg_size')) return embeddings, speakers, num_embeddings, time
def get_embeddings(self): ''' finally, testing: ''' speaker_list = self.get_validation_data_name() distrib_nb = self.config.getint('i_vector', 'distrib_nb') nbThread = self.config.getint('i_vector', 'nbThread') vector_size = self.config.getint('i_vector', 'vector_size') feature_extension = 'h5' set_of_embeddings = [] set_of_speakers = [] set_of_num_embeddings = [] set_of_times = [] checkpoints = ["/TV_{}".format(self.network_file)] #load data: ubm = sidekit.Mixture() ubm.read(get_experiment_nets() + '/ubm_{}.h5'.format(self.network_file)) ubm_list, test_list_long = self.load_data( speaker_list, os.path.splitext( os.path.split(self.get_validation_train_data())[1])[0]) ubm_list, test_list_short = self.load_data( speaker_list, os.path.splitext( os.path.split(self.get_validation_test_data())[1])[0]) tv, tv_mean, tv_sigma = sidekit.sidekit_io.read_tv_hdf5( get_experiment_nets() + "/TV_{}".format(self.network_file)) fs = sidekit.FeaturesServer(feature_filename_structure=( "{dir}/{speaker_list}/feat/{{}}.{ext}".format( dir=get_training('i_vector'), speaker_list=speaker_list, ext=feature_extension)), dataset_list=["energy", "cep", "vad"], mask="[0-12]", feat_norm="cmvn", keep_all_features=True, delta=True, double_delta=True, rasta=True, context=None) #exract ivectors test_stat_long = sidekit.StatServer(test_list_long, ubm=ubm, distrib_nb=distrib_nb, feature_size=0, index=None) test_stat_long.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( test_stat_long.segset.shape[0]), num_thread=nbThread) test_stat_short = sidekit.StatServer(test_list_short, ubm=ubm, distrib_nb=distrib_nb, feature_size=0, index=None) test_stat_short.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( test_stat_short.segset.shape[0]), num_thread=nbThread) test_iv_long = test_stat_long.estimate_hidden(tv_mean, tv_sigma, V=tv, batch_size=100, num_thread=nbThread)[0] test_iv_short = test_stat_short.estimate_hidden(tv_mean, tv_sigma, V=tv, batch_size=100, num_thread=nbThread)[0] iv_lis, y_list, s_list = create_data_lists( False, test_iv_long.stat1, test_iv_short.stat1, test_list_long.leftids.astype(int), test_list_short.leftids.astype(int)) #generate embeddings embeddings, speakers, num_embeddings = generate_embeddings( iv_lis, y_list, vector_size) set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) set_of_num_embeddings.append(num_embeddings) set_of_times = [ np.zeros( (len(test_list_long.leftids) + len(test_list_short.leftids), ), dtype=int) ] return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_times
def get_embeddings(self, cluster_count, vector_size, j): logger = get_logger('lstm', logging.INFO) logger.info('Run pairwise_lstm test\n') print("Cluster Count : " + str(cluster_count)) # Load and prepare train/test data #print("old path =============> " + self.get_validation_train_data()) x_test, speakers_test = load_and_prepare_data( self.get_validation_test_data(), cluster_count=cluster_count * 2) # print("old path =============> " + self.get_validation_train_data()) x_train, speakers_train = load_and_prepare_data( self.get_validation_train_data(), cluster_count=cluster_count * 8) # Prepare return values set_of_embeddings = [] set_of_speakers = [] speaker_numbers = [] # checkpoints = list_all_files(get_experiment_nets(), "*pairwise_lstm*.h5") # checkpoints = ["pairwise_lstm_100_00999.h5"] # Values out of the loop # metrics = ['accuracy', 'categorical_accuracy', ] # loss = pairwise_kl_divergence # custom_objects = {'pairwise_kl_divergence': pairwise_kl_divergence} # optimizer = 'rmsprop' # Fill return values for checkpoint in self.checkpoints: logger.info('Running checkpoint: ' + checkpoint) # Load and compile the trained network # network_file = get_experiment_nets(checkpoint) # model_full = load_model(network_file, custom_objects=custom_objects) # model_full.compile(loss=loss, optimizer=optimizer, metrics=metrics) # Get a Model with the embedding layer as output and predict # model_partial = Model(inputs=model_full.input, outputs=model_full.layers[2].output) model_partial = self.model_dict[checkpoint] test_output = np.asarray(model_partial.predict(x_test)) # print("------------------>>>>>>>>>>> test data size\n") # print(x_test.shape) # # print("------------------>>>>>>>>>>> prediction out\n") # print(test_output.shape) train_output = np.asarray(model_partial.predict(x_train)) # # print("------------------>>>>>>>>>>> train data size\n") # print(x_train.shape) # # print("------------------>>>>>>>>>>> prediction out\n") # print(train_output.shape) embeddings, speakers, num_embeddings = generate_embeddings( train_output, test_output, speakers_train, speakers_test, vector_size, j) print("Length of Speakers") print(len(speakers)) print("\n") # Fill the embeddings and speakers into the arrays set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) speaker_numbers.append(num_embeddings) #return 1 logger.info('Pairwise_lstm test done.') return self.checkpoints, set_of_embeddings, set_of_speakers, speaker_numbers
def get_embeddings(self): short_utterance = self.config.getboolean('validation', 'short_utterances') out_layer = self.config.getint('pairwise_lstm', 'out_layer') seg_size = self.config.getint('pairwise_lstm', 'seg_size') vec_size = self.config.getint('pairwise_lstm', 'vec_size') logger = get_logger('lstm', logging.INFO) logger.info('Run pairwise_lstm test') logger.info('out_layer -> ' + str(out_layer)) logger.info('seg_size -> ' + str(seg_size)) logger.info('vec_size -> ' + str(vec_size)) # Load and prepare train/test data x_train, speakers_train, s_list_train = load_test_data( self.get_validation_train_data()) x_test, speakers_test, s_list_test = load_test_data( self.get_validation_test_data()) x_train, speakers_train, = prepare_data(x_train, speakers_train, seg_size) x_test, speakers_test = prepare_data(x_test, speakers_test, seg_size) x_list, y_list, s_list = create_data_lists(short_utterance, x_train, x_test, speakers_train, speakers_test, s_list_train, s_list_test) # Prepare return values set_of_embeddings = [] set_of_speakers = [] speaker_numbers = [] set_of_total_times = [] checkpoints = list_all_files(get_experiment_nets(), "^pairwise_lstm.*\.h5") # Values out of the loop metrics = [ 'accuracy', 'categorical_accuracy', ] loss = pairwise_kl_divergence custom_objects = {'pairwise_kl_divergence': pairwise_kl_divergence} optimizer = 'rmsprop' vector_size = vec_size #256 * 2 # Fill return values for checkpoint in checkpoints: logger.info('Running checkpoint: ' + checkpoint) # Load and compile the trained network network_file = get_experiment_nets(checkpoint) model_full = load_model(network_file, custom_objects=custom_objects) model_full.compile(loss=loss, optimizer=optimizer, metrics=metrics) # Get a Model with the embedding layer as output and predict model_partial = Model(inputs=model_full.input, outputs=model_full.layers[out_layer].output) x_cluster_list = [] y_cluster_list = [] for x, y, s in zip(x_list, y_list, s_list): x_cluster = np.asarray(model_partial.predict(x)) x_cluster_list.append(x_cluster) y_cluster_list.append(y) embeddings, speakers, num_embeddings = generate_embeddings( x_cluster_list, y_cluster_list, vector_size) # Fill the embeddings and speakers into the arrays set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) speaker_numbers.append(num_embeddings) # Calculate the time per utterance time = TimeCalculator.calc_time_all_utterances( y_cluster_list, seg_size) set_of_total_times.append(time) logger.info('Pairwise_lstm test done.') return checkpoints, set_of_embeddings, set_of_speakers, speaker_numbers, set_of_total_times