Exemplo n.º 1
0
def adaptation(args):
    if args.feat_type == 'mfcc':
        datasetlist = ["energy", "cep", "vad"]
        mask = "[0-12]"
        features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature'
    if args.feat_type == 'fb':
        datasetlist = ["fb", "vad"]
        mask = None
        features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature'

    # create feature server for loading feature from disk
    feature_server = sidekit.FeaturesServer(
        features_extractor=None,
        feature_filename_structure=features_folder + "/{}.h5",
        sources=None,
        dataset_list=datasetlist,
        mask=mask,
        feat_norm="cmvn",
        global_cmvn=None,
        dct_pca=False,
        dct_pca_config=None,
        sdc=False,
        sdc_config=None,
        delta=True if args.delta else False,
        double_delta=True if args.delta else False,
        delta_filter=None,
        context=None,
        traps_dct_nb=None,
        rasta=True,
        keep_all_features=False)

    enroll_idmap = sidekit.IdMap(os.getcwd() + '/task/idmap.h5')
    ndx = sidekit.Ndx(os.getcwd() + '/task/dev_ndx.h5')

    ubm = sidekit.Mixture()
    ubm.read(os.getcwd() + '/model/ubm.h5')
    enroll_stat = sidekit.StatServer(enroll_idmap,
                                     distrib_nb=ubm.distrib_nb(),
                                     feature_size=ubm.dim())
    enroll_stat.accumulate_stat(ubm=ubm,
                                feature_server=feature_server,
                                seg_indices=range(enroll_stat.segset.shape[0]),
                                num_thread=args.num_thread)
    enroll_stat.write(os.getcwd() + '/task/enroll_stat.h5')

    print('MAP adaptation', end='')
    regulation_factor = 16
    enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor)
    enroll_sv.write(os.getcwd() + '/task/enroll_sv.h5')
    print('\rMAP adaptation done')

    print('Compute scores', end='')
    score = sidekit.gmm_scoring(ubm,
                                enroll_sv,
                                ndx,
                                feature_server,
                                num_thread=args.num_thread)
    score.write(os.getcwd() + '/task/dev_score.h5')
    print('\rCompute scores done')
Exemplo n.º 2
0
def iv_clustering(input_diar, model, features_server):
    idmap_in = input_diar.id_map()
    local_ivectors = model.train(features_server,idmap_in, normalization=False) # extract i-vectors on the current document

    tmp_ivectors = copy.deepcopy(local_ivectors) # not sure this line is useful

    tmp_ivectors.spectral_norm_stat1(model.norm_mean[:1], model.norm_cov[:1])
    ndx = sidekit.Ndx(models=tmp_ivectors.modelset, testsegs=tmp_ivectors.modelset)
    scores = fast_PLDA_scoring(tmp_ivectors, tmp_ivectors, ndx,
                               model.plda_mean,
                               model.plda_f,
                               model.plda_sigma,
                               p_known=0.0,
                               scaling_factor=1.0,
                               check_missing=False)
    scores.scoremat = 0.5 * (scores.scoremat + scores.scoremat.transpose())
    #
    # Do the clustering within-show

    output_diar, _, __ = hac_iv(input_diar, scores, threshold=-w_threshold)  

    return outputdiar
Exemplo n.º 3
0
distribNb = 4  # number of Gaussian distributions for each GMM
rsr2015Path = '/info/home/larcher/RSR2015_v1/'

# Default for RSR2015
audioDir = os.path.join(rsr2015Path, 'sph/male')

# Automatically set the number of parallel process to run.
# The number of threads to run is set equal to the number of cores available
# on the machine minus one or to 1 if the machine has a single core.
nbThread = max(multiprocessing.cpu_count() - 1, 1)

# Load IdMap, Ndx, Key from HDF5 files and ubm_list

print('Load task definition')
enroll_idmap = sidekit.IdMap('/info/home/larcher/task/3sesspwd_eval_m_trn.h5')
test_ndx = sidekit.Ndx('/info/home/larcher/task/3sess-pwd_eval_m_ndx.h5')
key = sidekit.Key('/info/home/larcher/task/3sess-pwd_eval_m_key.h5')
with open('/info/home/larcher/task/ubm_list.txt') as inputFile:
    ubmList = inputFile.read().split('\n')

# Process the audio to save MFCC on disk

logging.info("Initialize FeaturesExtractor")
extractor = sidekit.FeaturesExtractor(
    audio_filename_structure=audioDir + "/{}.wav",
    feature_filename_structure="./features/{}.h5",
    sampling_frequency=16000,
    lower_frequency=133.3333,
    higher_frequency=6955.4976,
    filter_bank="log",
    filter_bank_size=40,
Exemplo n.º 4
0
    'cosine', 'mahalanobis', '2cov', 'plda'
]  # list of scoring to run on the task, could be 'cosine', 'mahalanobis', '2cov' or 'plda'

# Automatically set the number of parallel process to run.
# The number of threads to run is set equal to the number of cores available
# on the machine minus one or to 1 if the machine has a single core.
nbThread = max(multiprocessing.cpu_count() - 1, 1)

#################################################################
# Load IdMap, Ndx, Key from PICKLE files and ubm_list
#################################################################
print('Load task definition')
enroll_idmap = sidekit.IdMap('task/sre10_coreX-coreX_m_trn.h5', 'hdf5')
nap_idmap = sidekit.IdMap('task/sre04050608_m_training.h5', 'hdf5')
back_idmap = sidekit.IdMap('task/sre10_coreX-coreX_m_back.h5', 'hdf5')
test_ndx = sidekit.Ndx('task/sre10_coreX-coreX_m_ndx.h5', 'hdf5')
test_idmap = sidekit.IdMap('task/sre10_coreX-coreX_m_test.h5', 'hdf5')
keysX = []
for cond in range(9):
    keysX.append(
        sidekit.Key('task/sre10_coreX-coreX_det{}_key.h5'.format(cond + 1)))

with open('task/ubm_list.txt', 'r') as inputFile:
    ubmList = inputFile.read().split('\n')

if train:
    # %%
    #################################################################
    # Process the audio to generate MFCC
    #################################################################
    print('Create the feature server to extract MFCC features')
Exemplo n.º 5
0
                       name = os.path.splitext(name)[0]
                       file_dict[corpus + '/' + os.path.splitext(name)[0].lower()] = os.path.join(path, name)
                       corpusList.append(corpus)
                       completeFileList.append(os.path.join(path, name))
                       fileList.append((corpus + '/' + os.path.splitext(name)[0]).lower())
     return corpusList, completeFileList, fileList, file_dict

extension = '*.sph'
corpusList, completeFileList, sphList, file_dict = search_files(corpora_dir, extension)
with open('nist_existing_sph_files.p', "wb" ) as f:
    pickle.dump( (corpusList, completeFileList, sphList), f)

print("After listing, {} files found\n".format(len(completeFileList)))

trn_male = sidekit.IdMap('task/original_sre10_coreX-coreX_m_trn.h5')
ndx_male = sidekit.Ndx('task/original_sre10_coreX-coreX_m_ndx.h5')
sre10_male_sessions = np.unique(np.concatenate((trn_male.rightids, ndx_male.segset), axis=1))

# Load dataframe
i4u_df = pd.read_csv('Sph_MetaData/I4U.key', low_memory=False)

# Create keys corresponding to NIST info
i4u_df.database.replace(corpora.keys(), corpora.values(), inplace=True)
i4u_df["filename"] = np.nan
i4u_df["nistkey"] = i4u_df.database + '/' + i4u_df.session

i4u_df.channel.replace(['a', 'b', 'x'], ['_a', '_b', ''], inplace=True)
i4u_df["sessionKey"] = i4u_df.nistkey + i4u_df.channel

# Load dataframe
i4u_df = pd.read_csv('Sph_MetaData/I4U.key', low_memory=False)
# logging.basicConfig(filename='log/JVPD_ubm-gmm.log',level=logging.DEBUG)

distribNb = 512  # number of Gaussian distributions for each GMM
JVPD_Path = r'C:\Users\yokoo takaya\Desktop\JVPD'

# Default for RSR2015
audioDir = os.path.join(JVPD_Path, 'JVPD_ALLsound')

# Automatically set the number of parallel process to run.
# The number of threads to run is set equal to the number of cores available
# on the machine minus one or to 1 if the machine has a single core.
nbThread = max(multiprocessing.cpu_count()-1, 1)

print('Load task definition')
enroll_idmap = sidekit.IdMap('idmap_JVPD.h5')
test_ndx = sidekit.Ndx('ndx_JVPD.h5')
key = sidekit.Key('key_JVPD.h5')
with open('JVPD_filename_all.txt') as inputFile:
    ubmList = inputFile.read().split('\n')
    

logging.info("Initialize FeaturesExtractor")
extractor = sidekit.FeaturesExtractor(audio_filename_structure=audioDir+"/{}.wav",
                                      feature_filename_structure="./features_PLP/{}.h5",
                                      sampling_frequency=16000,
                                      lower_frequency=133.3333,
                                      higher_frequency=6955.4976,
                                      filter_bank="lin",
                                      filter_bank_size=40,
                                      window_size=0.025,
                                      shift=0.01,