import tflearn import utils learning_rate = 0.01 training_iters = 300000 # steps batch_size = 50 width = 20 # mfcc features height = 80 # (max) length of utterance # classes = 10 # digits speakers = ['adrian', 'zhanet'] number_classes = len(speakers) batch = utils.mfcc_batch_generator(speakers, batch_size=batch_size, utterance_len=height) # Network building # net = tflearn.input_data([None, width, height]) net = tflearn.input_data([None, height, width]) net = tflearn.lstm(net, 800, dropout=0.5) net = tflearn.fully_connected(net, number_classes, activation='softmax') net = tflearn.regression(net, optimizer='adam', learning_rate=learning_rate, loss='categorical_crossentropy') model = tflearn.DNN(net, tensorboard_verbose=3) # model.load('models/lstm.model_80_800.0.1')
# model.load('models/tflearn.lstm.model_48_128_4') # 86.32% # model.load('models/tflearn.lstm.model_48_128') # 81.79% # model.load('models/tflearn.lstm.model_80_800') # 81.18% # ZIPPED MFCC NETS # model.load('models/t2_lstm.model_2_80_128_3') # 31.25 # model.load('models/t2_lstm.model_2_48_128') # 27.27% # model.load('models/t2_lstm.model_2_48_128_4') # 36.36 # model.load('models/tflearn.lstm.model_2_48_128_4') # 49.95% # model.load('models/tflearn.lstm.model_2_48_128') # 31.81% # model.load('models/tflearn.lstm.model_2_80_128_3') # 50.04% batch = mfcc_batch_generator(speakers, batch_size = batch_size, utterance_len = height, path = path) count = 0 correct = 0 for feats, labels in batch: _y = model.predict(feats) for i, val in enumerate(_y): real_label = one_hot_to_item(labels[i], speakers) label = one_hot_to_item(val, speakers) if real_label == label: correct += 1 count += 1