Exemplo n.º 1
0
def main(ms_file_name, line_freq, ouptut_file):
    tf.reset_default_graph()
    sess = tf.InteractiveSession()

    # load vocabulary
    int2word = read_vocab("models/vocabulary_semantic.txt")

    # Restore weights
    model = "models/semantic_model.meta"
    saver = tf.train.import_meta_graph(model)
    saver.restore(sess, model[:-5])

    graph = tf.get_default_graph()

    model_input = graph.get_tensor_by_name("model_input:0")
    seq_len = graph.get_tensor_by_name("seq_lengths:0")
    rnn_keep_prob = graph.get_tensor_by_name("keep_prob:0")
    height_tensor = graph.get_tensor_by_name("input_height:0")
    width_reduction_tensor = graph.get_tensor_by_name("width_reduction:0")
    logits = tf.get_collection("logits")[0]

    # Constants that are saved inside the model itself
    WIDTH_REDUCTION, HEIGHT = sess.run([width_reduction_tensor, height_tensor])

    decoded, _ = tf.nn.ctc_greedy_decoder(logits, seq_len)

    # split the music score into lines
    print(f"Process {ms_file_name}\n")
    lines = split_score(ms_file_name, line_freq)

    output = open(ouptut_file, "w")
    # process save file
    for idx, line in enumerate(lines):
        # write the file to sample directory for sampling
        print(f"./samples/sample{idx}.png\n")
        cv2.imwrite(f"./samples/sample{idx}.png", line)

        gray = cv2.cvtColor(line, cv2.COLOR_BGR2GRAY)
        image = ctc_utils.resize(gray, HEIGHT)
        image = ctc_utils.normalize(image)
        image = np.asarray(image).reshape(1, image.shape[0], -1, 1)

        seq_lengths = [image.shape[2] / WIDTH_REDUCTION]

        prediction = sess.run(decoded,
                              feed_dict={
                                  model_input: image,
                                  seq_len: seq_lengths,
                                  rnn_keep_prob: 1.0,
                              })

        str_predictions = ctc_utils.sparse_tensor_to_strs(prediction)

        for w in str_predictions[0]:
            description = int2word[w]
            notation, v1, v2 = parse_description(description)
            if v1 != "tie":
                if notation == "barline":
                    output.write("### ----------------\n")
                elif notation == "note" or notation == "gracenote":
                    output.write(f'- ["{notation}", "{v1}", "{v2}"]\n')
                elif notation == "rest":
                    output.write(f'- ["rest", "{v1}"]\n')

    output.close()
Exemplo n.º 2
0
        image = cv2.imread(imgpath, 0)
        image = ctc_utils.resize(image, HEIGHT)
        image = ctc_utils.normalize(image)
        image = np.asarray(image).reshape(1, image.shape[0], image.shape[1], 1)

        seq_lengths = [image.shape[2] / WIDTH_REDUCTION]

        prediction = sess.run(decoded,
                              feed_dict={
                                  input: image,
                                  seq_len: seq_lengths,
                                  rnn_keep_prob: 1.0,
                              })

        str_predictions = ctc_utils.sparse_tensor_to_strs(prediction)
        output = ""
        for w in str_predictions[0]:
            output += str(int2word[w])
            output += str('\t')

        output.rstrip()

        f = open(f'{corpus}/{x_in}/{x_in}.txt', "r")
        inputt = f.read()
        f.close

        res2 = " "
        for x in inputt:
            res2 += x.strip()
Exemplo n.º 3
0
def predict(image):
    tf.reset_default_graph()
    sess = tf.InteractiveSession()

    voc_file = 'vocabulary_agnostic.txt'
    model = './Models/model.hdf5-69000.meta'

    # Read the dictionary
    dict_file = open(voc_file, "r")
    dict_list = dict_file.read().splitlines()
    int2word = dict()
    for word in dict_list:
        word_idx = len(int2word)
        int2word[word_idx] = word
    dict_file.close()

    # Restore weights
    saver = tf.train.import_meta_graph(model)
    saver.restore(sess, model[:-5])

    graph = tf.get_default_graph()

    input = graph.get_tensor_by_name("model_input:0")
    seq_len = graph.get_tensor_by_name("seq_lengths:0")
    rnn_keep_prob = graph.get_tensor_by_name("keep_prob:0")
    height_tensor = graph.get_tensor_by_name("input_height:0")
    width_reduction_tensor = graph.get_tensor_by_name("width_reduction:0")
    logits = tf.get_collection("logits")[0]

    # Constants that are saved inside the model itself
    WIDTH_REDUCTION, HEIGHT = sess.run([width_reduction_tensor, height_tensor])

    decoded, _ = tf.nn.ctc_greedy_decoder(logits, seq_len)

    image = cv2.imread(image, False)
    image = ctc_utils.resize(image, HEIGHT)
    image = ctc_utils.normalize(image)
    image = np.asarray(image).reshape(1, image.shape[0], image.shape[1], 1)

    seq_lengths = [image.shape[2] / WIDTH_REDUCTION]

    prediction = sess.run(
        decoded, feed_dict={input: image, seq_len: seq_lengths, rnn_keep_prob: 1.0,}
    )

    str_predictions = ctc_utils.sparse_tensor_to_strs(prediction)
    notes = []
    for w in str_predictions[0]:
        temp = int2word[w].split('.')
        print(temp)
        if (len(temp) != 2):
            continue
        else:
            symbol, des = temp       
            if (symbol == 'note'):
                length, note = des.split('-', 1)
                if ('beamed' in length):
                    length = 'eigth'
                notes.append((length, notes_dict[note]))
            elif (symbol == 'rest'):
                length, _ = des.split('-', 1)
                notes.append((length, 'rest'))

    return notes