Пример #1
0
def main():
    check_dir('eval')
    test = wav16khz2mfcc('eval')
    P_t = 0.5
    P_nt = 1 - P_t
    fname = 'GMM_model.pkl'
    if len(sys.argv) > 1:
        fname = sys.argv[1]

    #choose one model
    with open(fname, 'rb') as f:
        Ws_t, MUs_t, COVs_t, Ws_nt, MUs_nt, COVs_nt = pickle.load(f)

    for tst in sorted(test.keys()):
        ll_t = logpdf_gmm(test[tst], Ws_t, MUs_t, COVs_t)
        ll_nt = logpdf_gmm(test[tst], Ws_nt, MUs_nt, COVs_nt)
        scr = (sum(ll_t) + np.log(P_t)) - (sum(ll_nt) + np.log(P_nt))
        tst = tst.split("/")[-1].split(".")[0]
        if scr >= 0:
            print(tst, scr, 1)
        else:
            print(tst, scr, 0)
def classification(evalData, trainClasses, weights, meanValues, covarMatrices):
    print("STEP5: Classification started.")

    file = open("audio_GMM.txt", "w")

    # For every person to evaluate, calculate the sum of LLs for evaluation data
    for evalPerson in evalData:
        llVals = {}
        name = evalPerson.split('/')[3]
        name = name.split('.')[0]
        print("Classifing person ", name)
        file.write(name)
        file.write(' ')

        for trainPerson in trainClasses:
            llVals[trainPerson] = sum(
                logpdf_gmm(evalData[evalPerson], weights[trainPerson],
                           meanValues[trainPerson],
                           covarMatrices[trainPerson]))

        llNonTarget = llVals["non-target"]
        llTarget = llVals["target"]

        softScore = (llTarget + np.log(0.5)) - (llNonTarget + np.log(0.5))

        file.write(str(softScore))

        file.write(' ')
        # Hard decision
        if (softScore > 500):
            file.write('1')
        else:
            file.write('0')

        file.write('\n')

    file.close()
    print("STEP5 Done: Classification ended.\n")

    print(
        "Classification is finished. Check file \'audio_GMM.txt\' for the results."
    )
Пример #3
0
ws2 = np.ones(m2) / m2

#fig = plt.figure()
#ims = []

# Run 30 iterations of EM algorithm to train the two GMM models
for i in range(30):
    plt.plot(x1[:,0], x1[:,1], 'r.', x2[:,0], x2[:,1], 'b.')
    for w, m, c in zip(ws1, mus1, covs1): gellipse(m, c, 100, 'r', lw=round(w * 10))
    for w, m, c in zip(ws2, mus2, covs2): gellipse(m, c, 100, 'b', lw=round(w * 10))
    ws1, mus1, covs1, ttl1 = train_gmm(x1, ws1, mus1, covs1)
    ws2, mus2, covs2, ttl2 = train_gmm(x2, ws2, mus2, covs2)
    print('Total log-likelihood: %s for class X1; %s for class X2' % (ttl1, ttl2))
    plt.show()

hard_decision = lambda x: logpdf_gmm(x, ws1, mus1, covs1) + np.log(p1) > logpdf_gmm(x, ws2, mus2, covs2) + np.log(p2)
plot2dfun(hard_decision, ax, 500)
plt.plot(x1[:,0], x1[:,1], 'r.')
plt.plot(x2[:,0], x2[:,1], 'b.')
for w, m, c in zip(ws1, mus1, covs1): gellipse(m, c, 100, 'r', lw=round(w * 10))
for w, m, c in zip(ws2, mus2, covs2): gellipse(m, c, 100, 'b', lw=round(w * 10))

plt.figure()
x1_posterior  = lambda x: logistic_sigmoid(logpdf_gmm(x, ws1, mus1, covs1) + np.log(p1) - logpdf_gmm(x, ws2, mus2, covs2) - np.log(p2))
plot2dfun(x1_posterior, ax, 500)
plt.plot(x1[:,0], x1[:,1], 'r.')
plt.plot(x2[:,0], x2[:,1], 'b.')
for w, m, c in zip(ws1, mus1, covs1): gellipse(m, c, 100, 'r', lw=round(w * 10))
for w, m, c in zip(ws2, mus2, covs2): gellipse(m, c, 100, 'b', lw=round(w * 10))
plt.show()
Ws_m = np.ones(M_m) / M_m

# Initialize parameters of feamele model
M_f = 5
MUs_f = train_f[randint(1, len(train_f), M_f)]
COVs_f = [np.var(train_f, axis=0)] * M_f
Ws_f = np.ones(M_f) / M_f

# Run 30 iterations of EM algorithm to train the two GMMs from males and females
for jj in range(30):
    [Ws_m, MUs_m, COVs_m, TTL_m] = train_gmm(train_m, Ws_m, MUs_m, COVs_m)
    [Ws_f, MUs_f, COVs_f, TTL_f] = train_gmm(train_f, Ws_f, MUs_f, COVs_f)
    print('Iteration:', jj, ' Total log-likelihood:', TTL_m, 'for males;',
          TTL_f, 'for frmales')

# Now run recognition for all male test utterances
# To do the same for females set "test_set=test_f"
score = []
for tst in test_m:
    ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m)
    ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f)
    score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f)))
print(score)

score = []
for tst in test_f:
    ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m)
    ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f)
    score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f)))
print(score)
Пример #5
0
def main():
    check_dir(os.path.dirname(negative_test_path))
    check_dir(os.path.dirname(negative_train_path))
    check_dir(os.path.dirname(positive_test_path))
    check_dir(os.path.dirname(positive_train_path))

    train_m = list(wav16khz2mfcc(positive_train_path).values())
    train_f = list(wav16khz2mfcc(negative_train_path).values())
    test_m = list(wav16khz2mfcc(positive_test_path).values())
    test_f = list(wav16khz2mfcc(negative_test_path).values())

    train_m = np.vstack(train_m)
    train_f = np.vstack(train_f)
    dim = train_m.shape[1]

    cov_tot = np.cov(np.vstack([train_m, train_f]).T, bias=True)
    d, e = scipy.linalg.eigh(cov_tot, eigvals=(dim - 2, dim - 1))

    train_m_pca = train_m.dot(e)
    train_f_pca = train_f.dot(e)
    # Classes are not well separated in 2D PCA subspace

    n_m = len(train_m)
    n_f = len(train_f)
    cov_wc = (n_m * np.cov(train_m.T, bias=True) +
              n_f * np.cov(train_f.T, bias=True)) / (n_m + n_f)
    cov_ac = cov_tot - cov_wc
    d, e = scipy.linalg.eigh(cov_ac, cov_wc, eigvals=(dim - 1, dim - 1))

    # Lets define uniform a-priori probabilities of classes:
    P_m = 0.5
    P_f = 1 - P_m

    ll_m = logpdf_gauss(test_m[0], np.mean(train_m, axis=0),
                        np.var(train_m, axis=0))
    ll_f = logpdf_gauss(test_m[0], np.mean(train_f, axis=0),
                        np.var(train_f, axis=0))

    posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m +
                                        np.exp(ll_f) * P_f)

    ll_m = logpdf_gauss(test_m[0], *train_gauss(train_m))
    ll_f = logpdf_gauss(test_m[0], *train_gauss(train_f))
    # '*' before 'train_gauss' pases both return values (mean and cov) as parameters of 'logpdf_gauss'
    posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m +
                                        np.exp(ll_f) * P_f)
    plt.figure()
    plt.plot(posterior_m, 'b')
    plt.plot(1 - posterior_m, 'r')
    plt.figure()
    plt.plot(ll_m, 'b')
    plt.plot(ll_f, 'r')

    # Again gaussian models with full covariance matrices. Now testing a female utterance

    ll_m = logpdf_gauss(test_f[1], *train_gauss(train_m))
    ll_f = logpdf_gauss(test_f[1], *train_gauss(train_f))
    # '*' before 'train_gauss' pases both return values (mean and cov) as parameters of 'logpdf_gauss'
    posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m +
                                        np.exp(ll_f) * P_f)
    plt.figure()
    plt.plot(posterior_m, 'b')
    plt.plot(1 - posterior_m, 'r')
    plt.figure()
    plt.plot(ll_m, 'b')
    plt.plot(ll_f, 'r')

    score = []
    mean_m, cov_m = train_gauss(train_m)
    mean_f, cov_f = train_gauss(train_f)
    for tst in test_m:
        ll_m = logpdf_gauss(tst, mean_m, cov_m)
        ll_f = logpdf_gauss(tst, mean_f, cov_f)
        score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f)))

    # Run recognition with 1-dimensional LDA projected data
    score = []
    mean_m, cov_m = train_gauss(train_m.dot(e))
    mean_f, cov_f = train_gauss(train_f.dot(e))
    for tst in test_m:
        ll_m = logpdf_gauss(tst.dot(e), mean_m, np.atleast_2d(cov_m))
        ll_f = logpdf_gauss(tst.dot(e), mean_f, np.atleast_2d(cov_f))
        score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f)))

    M_m = 12

    MUs_m = train_m[randint(1, len(train_m), M_m)]
    COVs_m = [np.var(train_m, axis=0)] * M_m
    Ws_m = np.ones(M_m) / M_m

    M_f = 7
    MUs_f = train_f[randint(1, len(train_f), M_f)]
    COVs_f = [np.var(train_f, axis=0)] * M_f
    Ws_f = np.ones(M_f) / M_f

    for jj in range(100):
        [Ws_m, MUs_m, COVs_m, TTL_m] = train_gmm(train_m, Ws_m, MUs_m, COVs_m)
        [Ws_f, MUs_f, COVs_f, TTL_f] = train_gmm(train_f, Ws_f, MUs_f, COVs_f)

    score = []
    testok = 0
    testnok = 0
    for tst in test_m:
        ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m)
        ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f)
        scr = (sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f))
        score.append(scr)
        if scr >= 0:
            testok += 1
        else:
            testnok += 1
    print("target is " + str(testok / (testok + testnok)))

    score = []
    testok = 0
    testnok = 0
    for tst in test_f:
        ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m)
        ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f)
        scr = (sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f))
        score.append(scr)
        if scr < 0:
            testok += 1
        else:
            testnok += 1
    print("non target is " + str(testok / (testok + testnok)))
    print('Saved as "GMM_model.pkl"')
    with open('GMM_model.pkl', 'wb') as f:
        pickle.dump([Ws_m, MUs_m, COVs_m, Ws_f, MUs_f, COVs_f], f)
Пример #6
0
    covs[index] = [cov[index]] * m
    ws[index] = np.ones(m) / m

ttl = [None] * len(train_sample)
# Run 110 iterations of EM algorithm to train GMM models
for i in range(110):
    for index in xrange(len(train_sample)):
        ws[index], mus[index], covs[index], ttl[index] = train_gmm(
            train_sample[index], ws[index], mus[index], covs[index])

ll_c = [None] * 2
ll_c_new = [None] * 2

f = open("gmm_speech.txt", "w")

for test in xrange(len(real_test_sample)):

    for index in xrange(len(train_sample)):
        ll_c[index] = sum(
            logpdf_gmm(real_test_sample[test], ws[index], mus[index],
                       covs[index])) + np.log(P_c[index])

    ll_index_max = np.argmax(ll_c)

    prob_1 = 1 - (ll_c[0] / (ll_c[0] + ll_c[1]))

    real_test_name[test] = real_test_name[test].replace(".wav", "")
    f.write(real_test_name[test].replace("data/eval/", "") + ' ' +
            str(prob_1) + ' ' + ("1" if ll_index_max == 0 else "0") + "\n")

f.close()
Пример #7
0
        n = 15
        for iteration in range(n):
            [Ws[i], MUs[i], COVs[i], TTL] = train_gmm(train, Ws[i], MUs[i], COVs[i])
            print("Training iteration: {}/{}, total log-likelihood: {}".format(iteration + 1, n, TTL))

    errors = 0
    trials = 0
    for i in range(NUM_CLASSES):
        id = i + 1
        test = list(wav16khz2mfcc("dev/{}".format(id)).values())

        for j, test_data in enumerate(test):
            log_lh = []
            for ii in range(NUM_CLASSES):
                log_lh.append(sum(logpdf_gmm(test_data, Ws[ii], MUs[ii], COVs[ii])))
            winning_class_ind = np.argmax(log_lh)
            print("Correct class {} | Winning class {} with value {}".format(i + 1, winning_class_ind + 1, log_lh[winning_class_ind]))
            errors = (errors + 1) if not ((winning_class_ind) == i) else errors
            trials += 1

    print("------------------------------------------")
    print("False predictions: {} out of {}.".format(errors, trials))
    print("Error ratio: {}".format(errors/trials))
    print("------------------------------------------")

    print("Model evaluation...")
    with open("GMM_audio_results", "w") as f:
        eval = wav16khz2mfcc("eval/")
        eval_names = [x.split("\\")[1].split(".")[0] for x in list(eval.keys())]
        eval_vals = list(eval.values())
Пример #8
0
        avg_eng = summ / len(test[j])

        #plt.plot(train[i][0][:,0])
        #plt.show()

        for k in range(0, len(test[j])):
            test[j][k][0] = test[j][k][0] - avg_eng

    cnt = 0

    for tst in test:

        ll = []
        for j in range(1, 32):
            ll.append(sum(logpdf_gmm(tst, Ws[j], MUs[j], COVs[j])))

        final += str(f[cnt])
        final += ' '
        final += str(np.argmax(ll) + 1)
        final += ' '
        for z in range(1, 32):
            final += str((sum(logpdf_gmm(tst, Ws[z], MUs[z], COVs[z]))))
            final += ' '

        final += '\n'

        cnt = cnt + 1
        #score.append(i == (np.argmax(ll) + 1))

output = 'output_voice'
Пример #9
0
P_t = 0.5
M_t = 64
MUs_t = t_train_sound[numpy.random.randint(1, len(t_train_sound), M_t)]
COVs_t = [numpy.var(t_train_sound, axis=0)] * M_t
Ws_t = numpy.ones(M_t) / M_t

P_nt = 1 - P_t
M_nt = M_t
MUs_nt = nt_train_sound[numpy.random.randint(1, len(nt_train_sound), M_nt)]
COVs_nt = [numpy.var(nt_train_sound, axis=0)] * M_nt
Ws_nt = numpy.ones(M_nt) / M_nt

n = 32
for i in range(n):
    [Ws_t, MUs_t, COVs_t, TTL_t] = train_gmm(
        t_train_sound, Ws_t, MUs_t, COVs_t)
    [Ws_nt, MUs_nt, COVs_nt, TTL_nt] = train_gmm(
        nt_train_sound, Ws_nt, MUs_nt, COVs_nt)

    print("Training iteration: " + str(i + 1) + "/" + str(n))

with open("GMM_speech_results", "w") as f:
    for i, eval in enumerate(eval_sound):
        ll_t = logpdf_gmm(eval, Ws_t, MUs_t, COVs_t)
        ll_nt = logpdf_gmm(eval, Ws_nt, MUs_nt, COVs_nt)
        val = (sum(ll_t) + numpy.log(P_t)) - (sum(ll_nt) + numpy.log(P_nt))

        f.write(eval_sound_names[i] + " " + str(val) +
                " " + ("1" if val > 0 else "0") + "\n")