def adaptation(args): if args.feat_type == 'mfcc': datasetlist = ["energy", "cep", "vad"] mask = "[0-12]" features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature' if args.feat_type == 'fb': datasetlist = ["fb", "vad"] mask = None features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature' # create feature server for loading feature from disk feature_server = sidekit.FeaturesServer( features_extractor=None, feature_filename_structure=features_folder + "/{}.h5", sources=None, dataset_list=datasetlist, mask=mask, feat_norm="cmvn", global_cmvn=None, dct_pca=False, dct_pca_config=None, sdc=False, sdc_config=None, delta=True if args.delta else False, double_delta=True if args.delta else False, delta_filter=None, context=None, traps_dct_nb=None, rasta=True, keep_all_features=False) enroll_idmap = sidekit.IdMap(os.getcwd() + '/task/idmap.h5') ndx = sidekit.Ndx(os.getcwd() + '/task/dev_ndx.h5') ubm = sidekit.Mixture() ubm.read(os.getcwd() + '/model/ubm.h5') enroll_stat = sidekit.StatServer(enroll_idmap, distrib_nb=ubm.distrib_nb(), feature_size=ubm.dim()) enroll_stat.accumulate_stat(ubm=ubm, feature_server=feature_server, seg_indices=range(enroll_stat.segset.shape[0]), num_thread=args.num_thread) enroll_stat.write(os.getcwd() + '/task/enroll_stat.h5') print('MAP adaptation', end='') regulation_factor = 16 enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor) enroll_sv.write(os.getcwd() + '/task/enroll_sv.h5') print('\rMAP adaptation done') print('Compute scores', end='') score = sidekit.gmm_scoring(ubm, enroll_sv, ndx, feature_server, num_thread=args.num_thread) score.write(os.getcwd() + '/task/dev_score.h5') print('\rCompute scores done')
def iv_clustering(input_diar, model, features_server): idmap_in = input_diar.id_map() local_ivectors = model.train(features_server,idmap_in, normalization=False) # extract i-vectors on the current document tmp_ivectors = copy.deepcopy(local_ivectors) # not sure this line is useful tmp_ivectors.spectral_norm_stat1(model.norm_mean[:1], model.norm_cov[:1]) ndx = sidekit.Ndx(models=tmp_ivectors.modelset, testsegs=tmp_ivectors.modelset) scores = fast_PLDA_scoring(tmp_ivectors, tmp_ivectors, ndx, model.plda_mean, model.plda_f, model.plda_sigma, p_known=0.0, scaling_factor=1.0, check_missing=False) scores.scoremat = 0.5 * (scores.scoremat + scores.scoremat.transpose()) # # Do the clustering within-show output_diar, _, __ = hac_iv(input_diar, scores, threshold=-w_threshold) return outputdiar
distribNb = 4 # number of Gaussian distributions for each GMM rsr2015Path = '/info/home/larcher/RSR2015_v1/' # Default for RSR2015 audioDir = os.path.join(rsr2015Path, 'sph/male') # Automatically set the number of parallel process to run. # The number of threads to run is set equal to the number of cores available # on the machine minus one or to 1 if the machine has a single core. nbThread = max(multiprocessing.cpu_count() - 1, 1) # Load IdMap, Ndx, Key from HDF5 files and ubm_list print('Load task definition') enroll_idmap = sidekit.IdMap('/info/home/larcher/task/3sesspwd_eval_m_trn.h5') test_ndx = sidekit.Ndx('/info/home/larcher/task/3sess-pwd_eval_m_ndx.h5') key = sidekit.Key('/info/home/larcher/task/3sess-pwd_eval_m_key.h5') with open('/info/home/larcher/task/ubm_list.txt') as inputFile: ubmList = inputFile.read().split('\n') # Process the audio to save MFCC on disk logging.info("Initialize FeaturesExtractor") extractor = sidekit.FeaturesExtractor( audio_filename_structure=audioDir + "/{}.wav", feature_filename_structure="./features/{}.h5", sampling_frequency=16000, lower_frequency=133.3333, higher_frequency=6955.4976, filter_bank="log", filter_bank_size=40,
'cosine', 'mahalanobis', '2cov', 'plda' ] # list of scoring to run on the task, could be 'cosine', 'mahalanobis', '2cov' or 'plda' # Automatically set the number of parallel process to run. # The number of threads to run is set equal to the number of cores available # on the machine minus one or to 1 if the machine has a single core. nbThread = max(multiprocessing.cpu_count() - 1, 1) ################################################################# # Load IdMap, Ndx, Key from PICKLE files and ubm_list ################################################################# print('Load task definition') enroll_idmap = sidekit.IdMap('task/sre10_coreX-coreX_m_trn.h5', 'hdf5') nap_idmap = sidekit.IdMap('task/sre04050608_m_training.h5', 'hdf5') back_idmap = sidekit.IdMap('task/sre10_coreX-coreX_m_back.h5', 'hdf5') test_ndx = sidekit.Ndx('task/sre10_coreX-coreX_m_ndx.h5', 'hdf5') test_idmap = sidekit.IdMap('task/sre10_coreX-coreX_m_test.h5', 'hdf5') keysX = [] for cond in range(9): keysX.append( sidekit.Key('task/sre10_coreX-coreX_det{}_key.h5'.format(cond + 1))) with open('task/ubm_list.txt', 'r') as inputFile: ubmList = inputFile.read().split('\n') if train: # %% ################################################################# # Process the audio to generate MFCC ################################################################# print('Create the feature server to extract MFCC features')
name = os.path.splitext(name)[0] file_dict[corpus + '/' + os.path.splitext(name)[0].lower()] = os.path.join(path, name) corpusList.append(corpus) completeFileList.append(os.path.join(path, name)) fileList.append((corpus + '/' + os.path.splitext(name)[0]).lower()) return corpusList, completeFileList, fileList, file_dict extension = '*.sph' corpusList, completeFileList, sphList, file_dict = search_files(corpora_dir, extension) with open('nist_existing_sph_files.p', "wb" ) as f: pickle.dump( (corpusList, completeFileList, sphList), f) print("After listing, {} files found\n".format(len(completeFileList))) trn_male = sidekit.IdMap('task/original_sre10_coreX-coreX_m_trn.h5') ndx_male = sidekit.Ndx('task/original_sre10_coreX-coreX_m_ndx.h5') sre10_male_sessions = np.unique(np.concatenate((trn_male.rightids, ndx_male.segset), axis=1)) # Load dataframe i4u_df = pd.read_csv('Sph_MetaData/I4U.key', low_memory=False) # Create keys corresponding to NIST info i4u_df.database.replace(corpora.keys(), corpora.values(), inplace=True) i4u_df["filename"] = np.nan i4u_df["nistkey"] = i4u_df.database + '/' + i4u_df.session i4u_df.channel.replace(['a', 'b', 'x'], ['_a', '_b', ''], inplace=True) i4u_df["sessionKey"] = i4u_df.nistkey + i4u_df.channel # Load dataframe i4u_df = pd.read_csv('Sph_MetaData/I4U.key', low_memory=False)
# logging.basicConfig(filename='log/JVPD_ubm-gmm.log',level=logging.DEBUG) distribNb = 512 # number of Gaussian distributions for each GMM JVPD_Path = r'C:\Users\yokoo takaya\Desktop\JVPD' # Default for RSR2015 audioDir = os.path.join(JVPD_Path, 'JVPD_ALLsound') # Automatically set the number of parallel process to run. # The number of threads to run is set equal to the number of cores available # on the machine minus one or to 1 if the machine has a single core. nbThread = max(multiprocessing.cpu_count()-1, 1) print('Load task definition') enroll_idmap = sidekit.IdMap('idmap_JVPD.h5') test_ndx = sidekit.Ndx('ndx_JVPD.h5') key = sidekit.Key('key_JVPD.h5') with open('JVPD_filename_all.txt') as inputFile: ubmList = inputFile.read().split('\n') logging.info("Initialize FeaturesExtractor") extractor = sidekit.FeaturesExtractor(audio_filename_structure=audioDir+"/{}.wav", feature_filename_structure="./features_PLP/{}.h5", sampling_frequency=16000, lower_frequency=133.3333, higher_frequency=6955.4976, filter_bank="lin", filter_bank_size=40, window_size=0.025, shift=0.01,