def predict(): # initialize the data dictionary that will be returned from the # view result = {"success": False} # ensure an image was properly uploaded to our endpoint if flask.request.method == "POST": # initialize the data dictionary that will be returned from the # view # load wave file f = flask.request.files["audio"] #wav, _ = sf.read(io.BytesIO(file)) filename = datetime.now().strftime("%Y%m%d-%H%M%S") + ".wav" # file = "./audioSamples/salli.wav" f.save(secure_filename(filename)) wav, _ = librosa.load(filename, mono=True, sr=16000) # get mfcc feature mfcc = np.transpose( np.expand_dims(librosa.feature.mfcc(wav, 16000), axis=0), [0, 2, 1]) # run session label = sess.run(y, feed_dict={x: mfcc}) result["predictions"] = [] # print label r = data.print_index(label) for index_list in label: preds = data.index2str(index_list) result["predictions"].append(preds) # indicate that the request was a success result["success"] = True os.remove(filename) # return the data dictionary as a JSON response return flask.jsonify(result)
# CTC loss loss = logit.sg_ctc(target=y, seq_len=seq_len) decoded_sequence, _ = tf.nn.ctc_beam_search_decoder( logit.sg_transpose(perm=[1, 0, 2]), seq_len, merge_repeated=False) y = tf.sparse_to_dense(decoded_sequence[0].indices, decoded_sequence[0].dense_shape, decoded_sequence[0].values) + 1 # # train #0.0001 with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # init variables tf.sg_init(sess) # restore parameters saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint('asset/train')) #epoch[41182]-step[205919] #tf.sg_train(lr=0, loss=get_loss(input=inputs, target=labels, seq_len=seq_len), # ep_size=data.num_batch, max_ep=41182+5, sess=sess, max_keep=0, keep_interval=0, save_interval=0) with tf.sg_queue_context(): for _ in xrange(5): out = sess.run(y) print_index(out)
decoded[0].values) + 1 # regcognize audio file # perintah untuk menginput path file audio tf.sg_arg_def(file=('', 'speech wave file to recognize.')) # load audio file file = sys.argv[1] wav, sr = librosa.load(file, mono=True, sr=16000) # mendapatkan mfcc feature mfcc = np.transpose(np.expand_dims(librosa.feature.mfcc(wav, 16000), axis=0), [0, 2, 1]) # run network with tf.Session() as sess: # init variables tf.sg_init(sess) # restore parameters saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint('asset/train')) # run session label = sess.run(y, feed_dict={x: mfcc}) # print label data.print_index(label)
# restore parameters saver = tf.train.Saver(vars_to_train) saver.restore(sess, tf.train.latest_checkpoint('asset/train')) # run session for i in xrange(10000): new_loss, _, noise_out = sess.run([loss, optimizer, noise], feed_dict={ x: mfccs[index], targ: new_target.reshape((1, -1)) }) if i % 10 == 0: print "iteration ", i #targ:corpus.daniter_label[index] print new_loss if i % 100 == 0: label = sess.run(pred, feed_dict={x: mfccs[index]}) print index2str(label[0]) if index2str(label[0]) == fool: break label = sess.run(pred, feed_dict={x: mfccs[index]}) # print label print_index(label) print noise_out np.save(fool + ".npy", noise_out + mfccs[0]) #TODO: find easier examples