def main(): work_dir = '/home/jose/PycharmProjects/the_speech' audio_dir = 'C:/Users/Win10/Documents/' # Input files dir_wav_ubm ='../audio/wav-bea-diktafon' dir_anon_75 = '../audio/wav_anon_75_225' audio_list_ubm = util.read_files_from_dir(dir_wav_ubm) # Reading BEA files audio_list_dementia_aug = util.read_files_from_dir(dir_anon_75) # Reading augmented dementia files # Output files observation = '2del_aug-ubm' num_mfccs = 20 #file_mfccs_dem = work_dir + '/data/hc/alzheimer/mfccs_dem_{}_{}'.format(num_mfccs, observation) #file_mfccs_dem_aug = work_dir + '/data/hc/alzheimer/mfccs_dem_{}_augv3_{}'.format(num_mfccs, observation) file_mfccs_ubm = work_dir + '/data/hc/alzheimer/mfccs_ubm_dem_{}_{}'.format(num_mfccs, observation) # ---Calculating and saving MFCCs--- # for original audios #util.save_pickle(file_mfccs_dem, compute_mfccs_bkaldi(dir_anon_75, just_original_75())) # for UBM (BEA diktafon) mf0d = compute_mfccs_psf(dir_wav_ubm, audio_list_ubm) mf1d = psf_deltas_proc.compute_deltas(mf0d, 1) # first derivative mf2d = psf_deltas_proc.compute_deltas(mf0d, 2) # second derivative deltas = psf_deltas_proc.concatenate_list_of_deltas2(mf0d, mf1d, mf2d) util.save_pickle(file_mfccs_ubm, deltas)
def compute_fishers_pretr_ubm_2(list_mfcc_files, out_dir, list_files_ubm, recipe, folder_name, feats_info): for file_ubm in list_files_ubm: # Loading File for UBM print("File for UBM:", os.path.basename(file_ubm)) parent_dir_ubm = os.path.basename( os.path.dirname(os.path.dirname(list_files_ubm[0]))) vars, means, weights, g = get_diag_gmm_params( file_diag=file_ubm, out_dir=out_dir + 'UBMs/' + parent_dir_ubm + '/GMM_fishers/') print("Fisher-vecs will be extracted using {} number of Gaussians!". format(g)) for file_name in list_mfcc_files: # This list should contain the mfcc FILES within folder_name list_feat = np.load( file_name, allow_pickle=True ) # this list should contain all the mfccs per FILE list_fishers = [] for feat in list_feat: # iterating over the wavs (mfccs) # Extracting fishers from features fish = vlf.fisher.fisher(feat.transpose(), means.transpose(), vars.transpose(), weights, improved=True) list_fishers.append(fish) # Output file (fishers) # getting info about the number of frame-level feats and the deltas used (for naming the output files) # info_num_feats = regex.findall(os.path.basename(file_name)) file_fishers = out_dir + recipe + '/' + folder_name + '/fisher/fisher-{}{}-{}del-{}g-{}.fisher'.format( feats_info[0], feats_info[2], feats_info[1], g, folder_name) util.save_pickle(file_fishers, list_fishers) # save as pickle
def compute_fishers_pretr_ubm(list_mfcc_files, out_dir, file_ubm, recipe, folder_name): # Loading File for UBM print("File for UBM:", file_ubm) vars, means, weights, g = get_diag_gmm_params(file_diag=file_ubm, out_dir=out_dir) # print(list_feats[0]) print("Fisher-vecs will be extracted using {} number of Gaussians!".format( g)) for file_name in list_mfcc_files: # This list should contain the mfcc FILES within folder_name list_feat = np.load( file_name, allow_pickle=True ) # this list should contain all the mfccs per FILE # for g in list_n_clusters: list_fishers = [] # means, covs, priors = do_gmm(array_mfccs_ubm[:2000], g) # training GMM (here not neccesary since we already have it) for feat in list_feat: # iterating over the wavs (mfccs) fish = vlf.fisher.fisher(feat.transpose(), means[:, :60].transpose(), vars[:, :60].transpose(), weights, improved=True) list_fishers.append(fish) # Extracting fishers from features # Output file (fishers) info_num_feats = regex.findall(file_name) obs = '{}del'.format(int( info_num_feats[1])) # getting number of deltas info file_fishers = out_dir + recipe + '/' + folder_name + '/fisher/fisher-{}-{}-{}g-{}.fisher'.format( int(info_num_feats[0]), obs, g, folder_name) util.save_pickle(file_fishers, list_fishers) # save as pickle # np.savetxt(file_fishers, list_fishers, fmt='%.7f') # save as txt print("{} fishers saved to:".format(len(list_fishers)), file_fishers, "with (1st ele.) shape:", list_fishers[0].shape, "\n")
def do_dimension_reduction(): print("=======dimension reduction phase========") feature_dir = work_dir + '/data/{}/'.format(recipe) for delta in [0, 1, 2]: # info-purpose parameters from the frame-level extracted features # feats_info = [ 40, delta, 'mfcc' ] # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp) obs = '_hires' # observations of the features' config (if there is such) e.g. '_hires' (when the mfccs were extracted using 'hires' params) list_files_ubm = [ work_dir + '/data/mask/train/{}_mask_{}_train_{}del{}.{}'.format( feats_info[2], feats_info[0], delta, obs, feats_info[2]) ] pca = pca_trainer(list_files_ubm[0], n_components=0.97) # train PCA using training set for folder_name in list_sets: print("\nReading dir:", feature_dir + folder_name) list_mfcc_file = util.traverse_dir_2( feature_dir + folder_name, '*{}_{}_{}del.{}'.format(feats_info[0], folder_name, feats_info[1], feats_info[2])) for item in list_mfcc_file: # transform each dataset reduced_data = pca_transformer(pca, item) util.save_pickle( feature_dir + folder_name + '*{}_{}_{}del{}_pca.{}'.format( feats_info[0], folder_name, feats_info[1], obs, feats_info[2]), reduced_data)
def train_cnn_encoder(constants): """ Features extraction takes too much time on CPU: * Takes 16 minutes on GPU. * 25x slower (InceptionV3) on CPU and takes 7 hours. * 10x slower (MobileNet) on CPU and takes 3 hours. :param constants: :return: """ img_size = constants['img_size'] # load pretrained model K.clear_session() encoder, preprocess_for_model = cnn_encoder_builder() # extract trained features img_embeds_train, img_filenames_train = apply_model(DATA_DIR + 'image_captioning/train2014.zip', encoder, preprocess_for_model, input_shape=(img_size, img_size)) save_pickle(img_embeds_train, DATA_DIR + 'image_captioning/train_img_embeds.pickle') save_pickle(img_filenames_train, DATA_DIR + 'image_captioning/train_img_fns.pickle') # extract validation features img_embeds_val, img_filenames_val = apply_model(DATA_DIR + 'image_captioning/val2014.zip', encoder, preprocess_for_model, input_shape=(img_size, img_size)) save_pickle(img_embeds_val, DATA_DIR + 'image_captioning/val_img_embed.pickle') save_pickle(img_filenames_val, DATA_DIR + 'image_captioning/val_img_fns.pickle')
def compute_flevel_feats(list_wavs, out_dir, obs, num_feats, num_deltas, recipe, folder_name, cepstral_type="mfcc", raw_energy=True, num_mel_bins=23, low_freq=20, high_freq=0): print("Extracting {} for {} wavs in: {}".format(cepstral_type, len(list_wavs), folder_name)) # Output details observation = '{}del{}'.format(num_deltas, obs) # parent_dir = os.path.basename(os.path.dirname(list_wavs[0])) if not os.path.isdir(out_dir + recipe): os.mkdir(out_dir + recipe) if not os.path.isdir(out_dir + recipe + '/' + folder_name): os.mkdir(out_dir + recipe + '/' + folder_name) # ---Calculating and saving MFCCs--- list_mfccs = [] for wav in list_wavs: mfcc = cepstral_bkaldi(wav, num_feats, num_deltas, cepstral_type=cepstral_type, raw_energy=raw_energy, num_mel_bins=num_mel_bins, low_freq=low_freq, high_freq=high_freq) list_mfccs.append(mfcc) file_mfccs = out_dir + recipe + '/' + folder_name + '/flevel/{}_{}_{}_{}_{}.{}'.format( cepstral_type, recipe, num_feats, folder_name, observation, cepstral_type) print("Extracted {} {} from {} utterances".format(len(list_mfccs), cepstral_type, len(list_wavs))) util.save_pickle(file_mfccs, list_mfccs)
file_fbanks = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_dem_40' file_fbanks_bea = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_ubm_dem_40' list_fbanks = util.read_pickle(file_fbanks) fbanks_bea = np.vstack(util.read_pickle(file_fbanks_bea)) # Output files file_pca_fbanks = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_dem_40_PCA' file_pca_bea = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_ubm_dem_40_PCA_SP' # Scaling and selecting best number of components bea_scaled, list_dem_scaled = scale_min_max(fbanks_bea, list_fbanks) c = best_components(bea_scaled) # Reducing dimensions PCA fbanks_reduced, bea_reduced = run_pca(bea_scaled, list_dem_scaled, c) # Computing deltas # list_fbanks_deltas = compute_deltas(list_fbanks, 1) #bea_deltas = python_speech_features.base.delta(feat=fbanks_bea, N=1) # 2nd deltas #list_fbanks_deltas2 = compute_deltas(list_fbanks, 2) # bea_deltas2 = python_speech_features.base.delta(feat=fbanks_bea, N=2) # Concatenating deltas #fbanks_deltas_conc = concatenate_list_of_deltas(list_fbanks, list_fbanks_deltas) #bea_deltas_conc = np.concatenate((fbanks_bea, bea_deltas)) print('deltas concatenated!') # Saving data # util.save_pickle(file_pca_fbanks, fbanks_reduced) util.save_pickle(file_pca_bea, bea_reduced)
dir_anon_75 = audio_dir + '/audio/wav_anon_75_225/' audio_list_original_dem = util.just_original_75( ) # Reading Original dementia files audio_list_ubm = util.read_files_from_dir(dir_wav_ubm) # Reading BEA files audio_list_augmented = util.read_files_from_dir( dir_anon_75) # Reading augmented files # Output files observation = '' n_mels = '256' file_melspec_dem = work_dir + '/data/melspecs/melspec_dem_{}{}'.format( n_mels, observation) #file_melspec_ubm = work_dir + '/data/melspecs/melspec_ubm_dem_{}{}'.format(n_mels, observation) file_melspec_augmented = work_dir + '/data/melspecs/melspec_ubm_dem_{}_aug{}'.format( n_mels, observation) lista = load_audio_file(dir_anon_75, util.just_original_75(), input_length) util.save_pickle(work_dir + '/data/melnmffc_dem', lista) # ---Calculating and saving Mel-Specs--- # for original audios #specs_dem = compute_mspect_librosa(dir_anon_75, util.just_original_75()) #util.pickle_dump_big(specs_dem, file_melspec_dem) # for augmented audios #specs_augmented = compute_mspect_librosa(dir_anon_75, audio_list_augmented) #util.pickle_dump_big(specs_augmented, file_melspec_augmented) # for BEA-diktafon (UBM) audios #specs_ubm = compute_mspect_librosa(dir_wav_ubm, audio_list_ubm) #util.save_pickle(file_melspec_ubm, specs_ubm)