示例#1
0
def dump_pseudo_label_mfcc(km_path, task, sample_rate, nj):
    apply_kmeans = ApplyKmeans(km_path)
    reader = MfccFeatureReader(sample_rate)
    generator, num = get_path_iterator(f"{task}/wav.scp", 1.0)
    iterator = generator()

    if nj > 1:
        feats = joblib.Parallel(n_jobs=nj)(
            joblib.delayed(reader.get_feats)(path)
                           for utt_id, path in
                           tqdm.tqdm(iterator, total=num))

        p_labs = joblib.Parallel(n_jobs=nj)(
            joblib.delayed(apply_kmeans)(feat)
            for feat in
            tqdm.tqdm(feats, total=num))
        iterator = generator()
        utt_ids = [utt_id for utt_id, _ in iterator]
    else:
        utt_ids, p_labs = [], []
        for utt_id, path in tqdm.tqdm(iterator, total=num):
            feat = reader.get_feats(path)
            p_lab = apply_kmeans(feat).tolist()
            p_labs.append(p_lab)
            utt_ids.append(utt_id)
    return utt_ids, p_labs
示例#2
0
def dump_pseudo_label_hubert(km_path, task, sample_rate, url, dir, layer):
    apply_kmeans = ApplyKmeans(km_path)
    reader = HubertFeatureReader(sample_rate, url, dir, layer)
    generator, num = get_path_iterator(f"{task}/wav.scp", 1.0)
    iterator = generator()

    utt_ids, p_labs = [], []
    for utt_id, path in tqdm.tqdm(iterator, total=num):
        feat = reader.get_feats(path)
        p_lab = apply_kmeans(feat).tolist()
        p_labs.append(p_lab)
        utt_ids.append(utt_id)
    return utt_ids, p_labs