except IOError:
        print "cache not found. Recomputing"
        c = fun(*args)
        np.save(fname, c)
        return c


pi, A, pi_v, theta_v, word_freq = cache_or_compute("cache/bigram.npy", compute_bigram, debug=False)

###################################################################
# Segmentation
###################################################################

starts, ends, chunks = cache_or_compute(
    "cache/chunks.npy",
    lambda arg: get_chunk_starts(arg),
    data[int(file_range[0] * len(data)) : int(file_range[1] * len(data))],
    debug=False,
)

if not isinstance(starts, list):
    starts = starts.astype(int)
    ends = ends.astype(int)

if "Segmentation" in PRINT_SET:
    print "num_chunks", len(chunks)

spaces = cache_or_compute("cache/spaces.npy", find_spaces, data, starts, ends)

N, M = 0, 1000000
ii1 = np.zeros(len(data))
Пример #2
0
Example: ./aggregate 7 sound7.*.wav
will aggregate sound files
sound7.1.wav sound7.2.wav, etc
with corresponding text files
text7.1.txt text7.2.txt
The output aggregation will be guaranteed to have perfect segmentation
'''
import sys
from mlalgs import (load_data, get_chunk_starts)
import scipy.io.wavfile
import numpy as np

tot = []
tot_txt = []
tot_chars = 0
for f in sys.argv[2:]:
    text_file = 'text' + f[5:-3] + 'txt'
    rate, data, text = load_data(f, text_file)
    starts, _, _ = get_chunk_starts(data)
    if len(starts) != len(text):
        print '%s rejected: %d != %d' % (f, len(starts), len(text))
        continue
    tot_chars += len(text)
    tot.append(data)
    tot_txt.append(text)

print 'Created data file with %d characters' % tot_chars
scipy.io.wavfile.write('sound%s.wav' % sys.argv[1], rate, np.concatenate(tot))
with open('text%s.txt' % sys.argv[1], "w") as f:
    f.write(''.join(tot_txt))
Пример #3
0
        real.append(c)
        if minj == letters.index(c):
            score += 1
    print ''.join(pred)
    print ''.join(real)
    return means, stds, score/float(len(test))

if __name__ == '__main__':
    if len(sys.argv) != 4:
        print 'Usage: %s training|test soundf textf' % sys.argv[0]

    soundf = sys.argv[2]
    textf = sys.argv[3]

    rate, data, text = load_data(soundf, textf)
    starts, ends, chunks = get_chunk_starts(data)
    f = get_features(data, starts, ends, include_fft=True, include_cepstrum=True)

    if sys.argv[1] == 'training':
        means, stds, score = naive_bayes(text, f)
        print 'Naive Bayes', score

        logreg_score, logreg = logistic_test(text, f)
        svm_score, svm = svm_test(text, f)
        joblib.dump(logreg, 'cache/logistic.pkl')
        print 'Logistic test', logreg_score
        print 'SVM test', svm_score
    else:
        try:
            logreg = joblib.load('cache/logistic.pkl')
        except: