Exemplo n.º 1
0
def main():
    args = get_args()

    # Phone symbol table
    _, phone_int2sym = load_phone_symbol_table(args.phone_symbol_table)

    # Human expert scores
    score_of, phone_of = load_human_scores(args.human_scoring_json, floor=1)

    # Prepare training data
    train_data_of = {}
    for ph_key, feat in kaldi_io.read_vec_flt_scp(args.feature_scp):
        if ph_key not in score_of:
            print(f'Warning: no human score for {ph_key}')
            continue
        ph = int(feat[0])
        if phone_int2sym is not None:
            if phone_int2sym[ph] != phone_of[ph_key]:
                print(f'Unmatch: {phone_int2sym[ph]} <--> {phone_of[ph_key]} ')
                continue
        score = score_of[ph_key]
        train_data_of.setdefault(ph, []).append((score, feat[1:]))

    # Make the dataset more blance
    train_data_of = add_more_negative_data(train_data_of)

    # Train models
    with ProcessPoolExecutor(args.nj) as ex:
        future_to_model = [(ph, ex.submit(train_model_for_phone, pairs))
                           for ph, pairs in train_data_of.items()]
        model_of = {ph: future.result() for ph, future in future_to_model}

    # Write to file
    with open(args.model, 'wb') as f:
        pickle.dump(model_of, f)
Exemplo n.º 2
0
def main():
    args = get_args()

    score_of, phone_of = load_human_scores(args.human_scoring_json, floor=1)
    _, phone_int2sym = load_phone_symbol_table(args.phone_symbol_table)

    y_true = []
    y_pred = []
    with open(args.predicted, 'rt') as f, open(args.write, 'wt') as fw:
        for line in f:
            key, score, ph = line.strip('\n').split('\t')
            score = float(score)
            ph = int(ph)
            if key not in score_of:
                print(f'Warning: no human score for {key}')
                continue
            if phone_int2sym is not None and phone_int2sym[ph] != phone_of[key]:
                print(f'Unmatch: {phone_int2sym[ph]} <--> {phone_of[key]} ')
                continue
            y_true.append(score_of[key])
            y_pred.append(score)
            fw.write(f'{key}\t{ph}\t{score_of[key]:.1f}\t{score:.1f}\n')

    print(f'MSE: {metrics.mean_squared_error(y_true, y_pred):.2f}')
    print(f'Corr: {np.corrcoef(y_true, y_pred)[0][1]:.2f}')
    print(metrics.classification_report(y_true, y_pred))
Exemplo n.º 3
0
def main():
    args = get_args()

    # Phone symbol table
    _, phone_int2sym = load_phone_symbol_table(args.phone_symbol_table)

    # Human expert scores
    score_of, phone_of = load_human_scores(args.human_scoring_json, floor=1)

    # Prepare training data
    train_data_of = {}
    for key, gops in kaldi_io.read_post_scp(args.gop_scp):
        for i, [(ph, gop)] in enumerate(gops):
            ph_key = f'{key}.{i}'
            if ph_key not in score_of:
                print(f'Warning: no human score for {ph_key}')
                continue
            if phone_int2sym is not None and phone_int2sym[ph] != phone_of[
                    ph_key]:
                print(f'Unmatch: {phone_int2sym[ph]} <--> {phone_of[ph_key]} ')
                continue
            score = score_of[ph_key]
            train_data_of.setdefault(ph, []).append((score, gop))

    # Train polynomial regression
    with ProcessPoolExecutor(args.nj) as ex:
        future_to_model = [(ph, ex.submit(train_model_for_phone, pairs))
                           for ph, pairs in train_data_of.items()]
        model_of = {ph: future.result() for ph, future in future_to_model}

    # Write to file
    with open(args.model, 'wb') as f:
        pickle.dump(model_of, f)
Exemplo n.º 4
0
def main():
    args = get_args()

    # Phone symbol table
    _, phone_int2sym = load_phone_symbol_table(args.phone_symbol_table)

    # Human expert scores
    score_of, phone_of = load_human_scores(args.human_scoring_json, floor=1)

    # Prepare training data
    train_data_of = {}
    for key, gops in kaldi_io.read_post_scp(args.gop_scp):
        for i, [(ph, gop)] in enumerate(gops):
            ph_key = f'{key}.{i}'
            if ph_key not in score_of:
                print(f'Warning: no human score for {ph_key}')
                continue
            if phone_int2sym is not None and phone_int2sym[ph] != phone_of[
                    ph_key]:
                print(f'Unmatch: {phone_int2sym[ph]} <--> {phone_of[ph_key]} ')
                continue
            score = score_of[ph_key]

            if ph not in train_data_of:
                train_data_of[ph] = []
            train_data_of[ph].append((score, gop))

    # Train polynomial regression
    poly = PolynomialFeatures(2)
    model_of = {}
    for ph, pairs in train_data_of.items():
        model = LinearRegression()
        labels = []
        gops = []
        for label, gop in pairs:
            labels.append(label)
            gops.append(gop)
        labels = np.array(labels).reshape(-1, 1)
        gops = np.array(gops).reshape(-1, 1)
        gops = poly.fit_transform(gops)
        gops, labels = balanced_sampling(gops, labels)
        model.fit(gops, labels)
        model_of[ph] = (model.coef_, model.intercept_)

    # Write to file
    with open(args.model, 'wb') as f:
        pickle.dump(model_of, f)
Exemplo n.º 5
0
def main():
    args = get_args()

    # Phone symbol table
    _, phone_int2sym = load_phone_symbol_table(args.phone_symbol_table)

    # Human expert scores
    score_of, phone_of = load_human_scores(args.human_scoring_json, floor=1)

    # Prepare training data
    train_data_of = {}
    for ph_key, feat in kaldi_io.read_vec_flt_scp(args.feature_scp):
        if ph_key not in score_of:
            print(f'Warning: no human score for {ph_key}')
            continue
        if phone_int2sym is not None:
            ph = int(feat[0])
            if phone_int2sym[ph] != phone_of[ph_key]:
                print(f'Unmatch: {phone_int2sym[ph]} <--> {phone_of[ph_key]} ')
                continue
        score = score_of[ph_key]

        if ph not in train_data_of:
            train_data_of[ph] = []
        train_data_of[ph].append((score, feat))

    # Train models
    model_of = {}
    for ph, pairs in train_data_of.items():
        model = RandomForestRegressor()
        labels = []
        feats = []
        for label, feat in pairs:
            labels.append(label)
            feats.append(feat[1:])
        labels = np.array(labels).reshape(-1, 1)
        feats = np.array(feats).reshape(-1, len(feats[0]))
        feats, labels = balanced_sampling(feats, labels)
        labels = labels.ravel()
        model.fit(feats, labels)
        model_of[ph] = model
        print(f'Model of phone {ph} trained.')

    # Write to file
    with open(args.model, 'wb') as f:
        pickle.dump(model_of, f)
def main():
    args = get_args()

    # Phone symbol table
    _, phone_int2sym = load_phone_symbol_table(args.phone_symbol_table)

    # Human expert scores
    score_of, phone_of = load_human_scores(args.human_scoring_json, floor=1)

    # Gather the features
    lables = []
    features = []
    for key, feat in kaldi_io.read_vec_flt_scp(args.feature_scp):
        if key not in score_of:
            print(f'Warning: no human score for {key}')
            continue
        ph = int(feat[0])
        if ph in range(args.min_phone_idx, args.max_phone_idx + 1):
            if phone_int2sym is not None and ph in phone_int2sym:
                ph = phone_int2sym[ph]
            lables.append(f'{ph}-{score_of[key]}')
            features.append(feat[1:])

    # Sampling
    sampled_paris = random.sample(list(zip(features, lables)),
                                  min(args.samples, len(lables)))
    features, lables = list(zip(*sampled_paris))

    # Draw scatters
    label_counter = Counter(lables)
    colors = sns.color_palette("colorblind", len(label_counter))
    features = TSNE(n_components=2).fit_transform(features)
    sns_plot = sns.scatterplot(x=features[:, 0],
                               y=features[:, 1],
                               hue=lables,
                               legend='full',
                               palette=colors)
    sns_plot.get_figure().savefig(args.output)