Пример #1
0
def predict():
    # initialize the data dictionary that will be returned from the
    # view
    result = {"success": False}
    # ensure an image was properly uploaded to our endpoint
    if flask.request.method == "POST":
        # initialize the data dictionary that will be returned from the
        # view
        # load wave file
        f = flask.request.files["audio"]
        #wav, _ = sf.read(io.BytesIO(file))
        filename = datetime.now().strftime("%Y%m%d-%H%M%S") + ".wav"
        # file = "./audioSamples/salli.wav"

        f.save(secure_filename(filename))
        wav, _ = librosa.load(filename, mono=True, sr=16000)
        # get mfcc feature
        mfcc = np.transpose(
            np.expand_dims(librosa.feature.mfcc(wav, 16000), axis=0),
            [0, 2, 1])
        # run session
        label = sess.run(y, feed_dict={x: mfcc})
        result["predictions"] = []
        # print label
        r = data.print_index(label)
        for index_list in label:
            preds = data.index2str(index_list)
        result["predictions"].append(preds)
        # indicate that the request was a success
        result["success"] = True
        os.remove(filename)
    # return the data dictionary as a JSON response
    return flask.jsonify(result)
Пример #2
0
# CTC loss
loss = logit.sg_ctc(target=y, seq_len=seq_len)

decoded_sequence, _ = tf.nn.ctc_beam_search_decoder(
    logit.sg_transpose(perm=[1, 0, 2]), seq_len, merge_repeated=False)

y = tf.sparse_to_dense(decoded_sequence[0].indices,
                       decoded_sequence[0].dense_shape,
                       decoded_sequence[0].values) + 1

#
# train
#0.0001
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
    # init variables
    tf.sg_init(sess)

    # restore parameters
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint('asset/train'))

    #epoch[41182]-step[205919]

    #tf.sg_train(lr=0, loss=get_loss(input=inputs, target=labels, seq_len=seq_len),
    #        ep_size=data.num_batch, max_ep=41182+5, sess=sess, max_keep=0, keep_interval=0, save_interval=0)
    with tf.sg_queue_context():
        for _ in xrange(5):
            out = sess.run(y)
            print_index(out)
Пример #3
0
                       decoded[0].values) + 1

# regcognize audio file

# perintah untuk menginput path file audio
tf.sg_arg_def(file=('', 'speech wave file to recognize.'))

# load audio file
file = sys.argv[1]
wav, sr = librosa.load(file, mono=True, sr=16000)

# mendapatkan mfcc feature
mfcc = np.transpose(np.expand_dims(librosa.feature.mfcc(wav, 16000), axis=0),
                    [0, 2, 1])

# run network
with tf.Session() as sess:

    # init variables
    tf.sg_init(sess)

    # restore parameters
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint('asset/train'))

    # run session
    label = sess.run(y, feed_dict={x: mfcc})

    # print label
    data.print_index(label)
Пример #4
0
    # restore parameters
    saver = tf.train.Saver(vars_to_train)
    saver.restore(sess, tf.train.latest_checkpoint('asset/train'))
    # run session
    for i in xrange(10000):

        new_loss, _, noise_out = sess.run([loss, optimizer, noise],
                                          feed_dict={
                                              x: mfccs[index],
                                              targ: new_target.reshape((1, -1))
                                          })
        if i % 10 == 0:
            print "iteration ", i  #targ:corpus.daniter_label[index]
            print new_loss

        if i % 100 == 0:
            label = sess.run(pred, feed_dict={x: mfccs[index]})
            print index2str(label[0])
            if index2str(label[0]) == fool:
                break

    label = sess.run(pred, feed_dict={x: mfccs[index]})

    # print label
    print_index(label)
    print noise_out
    np.save(fool + ".npy", noise_out + mfccs[0])

    #TODO: find easier examples