Exemplo n.º 1
0
def write_punctuations(net, text_has_pause_duration_tags, unpunctuated_text,
                       output_file_path, punctuation_reverse_map,
                       write_readable_text):
    stream = unpunctuated_text.split()  # + ["<END>"]

    word = None
    pause = 0.

    with (open(output_file_path, 'w') if
          (output_file_path != "-") else sys.stdout) as output_file:

        if text_has_pause_duration_tags:
            print("pause!!!!")
            for token in stream:

                if token.startswith("<sil="):

                    previous_pause = pause
                    pause = float(token.replace("<sil=", "").replace(">", ""))

                    word_index = utils.input_word_index(
                        net.in_vocabulary, word)
                    punctuation_index = net.predict_punctuation(
                        [word_index], np.array([previous_pause]))[0]

                    punctuation = punctuation_reverse_map[punctuation_index]

                    if punctuation == " ":
                        output_file.write("%s%s" % (punctuation, word))
                    else:
                        if write_readable_text:
                            output_file.write("%s %s" %
                                              (punctuation[:1], word))
                        else:
                            output_file.write(" %s %s" % (punctuation, word))

                else:
                    word = token
        else:
            for word in stream:

                word_index = utils.input_word_index(net.in_vocabulary, word)
                punctuation_index = net.predict_punctuation([word_index],
                                                            np.array([0.0]))[0]

                punctuation = punctuation_reverse_map[punctuation_index]

                if punctuation == " ":
                    output_file.write("%s%s" % (punctuation, word))
                else:
                    if write_readable_text:
                        output_file.write("%s %s" % (punctuation[:1], word))

                    else:
                        output_file.write(" %s %s" % (punctuation, word))
Exemplo n.º 2
0
def convert_files(file_paths, vocabulary, punctuations, batch_size, use_pauses, output_path):
    inputs = []
    outputs = []
    punctuation = " "
    pause = 0.
    
    if use_pauses:
        pauses = []

    for file_path in file_paths:
        with open(file_path, 'r') as corpus:
            for line in corpus:
                for token in line.split():
                    if token in punctuations:
                        punctuation = token
                        continue
                    elif token.startswith("<sil="):
                        pause = float(token.replace("<sil=","").replace(">",""))
                        continue
                    else:
                        inputs.append(utils.input_word_index(vocabulary, token))
                        outputs.append(utils.punctuation_index(punctuations, punctuation))
                        if use_pauses:
                            pauses.append(pause)
                        punctuation = " "
                        pause = 0.

    inputs.append(utils.input_word_index(vocabulary, "<END>"))
    outputs.append(utils.punctuation_index(punctuations, punctuation))
    if use_pauses:
        pauses.append(pause)

    assert len(inputs) == len(outputs)
    num_batches = np.floor(len(inputs) / batch_size)

    dtype = np.int32 if len(vocabulary) > 32767 else np.int16

    inputs = np.array(inputs, dtype=dtype)[:batch_size*num_batches].reshape((batch_size, num_batches)).T
    outputs = np.array(outputs, dtype=np.int16)[:batch_size*num_batches].reshape((batch_size, num_batches)).T
    if use_pauses:
        pauses = np.array(pauses, dtype=np.float32)[:batch_size*num_batches].reshape((batch_size, num_batches)).T

    total_size = batch_size*num_batches

    data = {"inputs": inputs, "outputs": outputs,
            "vocabulary": vocabulary, "punctuations": punctuations,
            "batch_size": batch_size, "total_size": total_size}
    
    if use_pauses:
        data["pauses"] = pauses

    with open(output_path, 'wb') as output_file:
        cPickle.dump(data, output_file, protocol=cPickle.HIGHEST_PROTOCOL)
Exemplo n.º 3
0
def convert_files(file_paths, vocabulary, punctuations, output_path):
    inputs = []
    outputs = []
    punctuation = " "
    
    for file_path in file_paths:
        with open(file_path, 'r') as corpus:
            for line in corpus:
                array = np.zeros(shape=(1, len(vocabulary)), dtype=np.int8)
                array[0,utils.input_word_index(vocabulary, "<START>")] = 1
                inputs.append(array)
                
                array = np.zeros(shape=(1, len(punctuations)), dtype=np.int8)
                array[0,utils.punctuation_index(punctuations, " ")] = 1
                outputs.append(array)

                for token in line.split():
                    if token in punctuations:
                        punctuation = token
                        continue
                    else:
                        array = np.zeros(shape=(1, len(vocabulary)), dtype=np.int8)
                        array[0,utils.input_word_index(vocabulary, token)] = 1
                        inputs.append(array)
                        array = np.zeros(shape=(1, len(punctuations)), dtype=np.int8)
                        array[0,utils.punctuation_index(punctuations, punctuation)] = 1
                        outputs.append(array)
                        punctuation = " "

                array = np.zeros(shape=(1, len(vocabulary)), dtype=np.int8)
                array[0,utils.input_word_index(vocabulary, "<END>")] = 1
                inputs.append(array)
                
                array = np.zeros(shape=(1, len(punctuations)), dtype=np.int8)
                array[0,utils.punctuation_index(punctuations, punctuation)] = 1
                outputs.append(array)

    assert len(inputs) == len(outputs)

    inputs = np.array(inputs, dtype=np.int8).reshape((len(inputs), 1, len(vocabulary)))
    outputs = np.array(outputs, dtype=np.int16).reshape((len(inputs), len(punctuations)))

    f = h5py.File(output_path + '.h5', "w")
    dset = f.create_dataset('inputs', data=inputs, dtype='i8')
    dset = f.create_dataset('outputs',data=outputs, dtype='i8')

    data = {"vocabulary": vocabulary, "punctuations": punctuations, 
           "total_size": len(inputs)}
    
    with open(output_path + '.pkl', 'wb') as output_file:
        cPickle.dump(data, output_file, protocol=cPickle.HIGHEST_PROTOCOL)
def write_punctuations(net, unpunctuated_text, output_file_path, punctuation_reverse_map, write_readable_text):
    stream = unpunctuated_text.split()# + ["<END>"]

    word = None
    pause = 0.

    with open(output_file_path, 'w') as output_file:
        for token in stream:

            if token.startswith("<sil="):
                
                previous_pause = pause
                pause = float(token.replace("<sil=","").replace(">",""))                

                word_index = utils.input_word_index(net.in_vocabulary, word)
                punctuation_index = net.predict_punctuation([word_index], np.array([previous_pause]))[0]

                punctuation = punctuation_reverse_map[punctuation_index]

                if punctuation == " ":
                    output_file.write("%s%s" % (punctuation, word))
                else:
                    if write_readable_text:
                        output_file.write("%s %s" % (punctuation[:1], word))
                    else:
                        output_file.write(" %s %s" % (punctuation, word))

            else:
                word = token
Exemplo n.º 5
0
def get_top_predictions(inputs, target, model, raw_word_list, n=5):
    """ Returns the top predicted filler for a target role, given a set of input roles + fillers
        
    Keyword arguments:
    inputs -- A dictionary of inputs with the role as the key and the filler as the value.
    target -- A singleton dictionary containing the target role as the key and target filler as the value.
    model -- The loaded model with which to make predictions
    raw_word_list -- A dictionary of vocabulary
    n -- The number of top predictions that should be retrieved
    """
    #print(inputs)
    raw_word_list.update(inputs)
    #print(raw_word_list)

    assert len(raw_word_list) == len(model.role_vocabulary)

    t_r = [
        model.role_vocabulary.get(r, model.unk_role_id) for r in target.keys()
    ]
    t_w = [model.unk_word_id]

    input_roles_words = {}
    for r, w in raw_word_list.items():
        input_roles_words[model.role_vocabulary[r]] = utils.input_word_index(
            model.word_vocabulary, w, model.unk_word_id, warn_unk=False)

    #print input_roles_words, t_r[0]
    input_roles_words.pop(t_r[0])

    x_w_i = numpy.asarray([input_roles_words.values()], dtype=numpy.int64)
    x_r_i = numpy.asarray([input_roles_words.keys()], dtype=numpy.int64)
    y_w_i = numpy.asarray(t_w, dtype=numpy.int64)
    y_r_i = numpy.asarray(t_r, dtype=numpy.int64)

    predicted_word_indices = model.top_words(x_w_i, x_r_i, y_w_i, y_r_i, n)
    results = []
    reverse_vocabulary = utils.get_reverse_map(model.word_vocabulary)

    for t_w_i in predicted_word_indices:
        t_w = model.word_vocabulary.get(t_w_i, model.unk_word_id)
        y_w_i = numpy.asarray([t_w_i], dtype=numpy.int64)
        p = model.p_words(x_w_i, x_r_i, y_w_i, y_r_i, batch_size=1,
                          verbose=0)[0]
        n = numpy.round(p / 0.005)
        fb = numpy.floor(n)
        hb = n % 2
        lemma = reverse_vocabulary[int(t_w_i)]
        #print u"{:<5} {:7.6f} {:<20} ".format(i+1, float(p), lemma) + u"\u2588" * int(fb) + u"\u258C" * int(hb)
        results.append((lemma, p))

    return results
Exemplo n.º 6
0
def write_punctuations(net, punctuation_reverse_map, document):
    inputs = document.split()
    inputs = fix_missing_pauses(inputs) + ["<END>"]

    word = None
    pause = 0.
    tags = []

    first_word = True

    for token in inputs:

        if is_pause(token):

            previous_pause = pause
            pause = float(token.replace("<sil=", "").replace(">", ""))

            word_index = utils.input_word_index(net.in_vocabulary, word)
            punctuation_index = net.predict_punctuation(
                [word_index], np.array([previous_pause]))[0]

            if first_word:
                punctuation = ""
            else:
                punctuation = punctuation_reverse_map[punctuation_index]

            tagstring = " ".join(tags) + " " if tags else ""
            tags = []

            if punctuation.strip() == "":
                sys.stdout.write("%s%s%s" % (punctuation, tagstring, word))
            else:
                sys.stdout.write("%s %s%s" %
                                 (punctuation[:1], tagstring, word))

            first_word = False

        else:
            if is_word(token):
                word = token
            else:
                tags.append(token)

    sys.stdout.write("\n")
    sys.stdout.flush()
Exemplo n.º 7
0
def write_punctuations(net, punctuation_reverse_map, document):
    inputs = document.split()
    inputs = fix_missing_pauses(inputs) + ["<END>"]

    word = None
    pause = 0.
    tags = []

    first_word = True

    for token in inputs:

        if is_pause(token):
            
            previous_pause = pause
            pause = float(token.replace("<sil=","").replace(">",""))                

            word_index = utils.input_word_index(net.in_vocabulary, word)
            punctuation_index = net.predict_punctuation([word_index], np.array([previous_pause]))[0]
            
            if first_word:     
                punctuation = ""
            else:
                punctuation = punctuation_reverse_map[punctuation_index]

            tagstring = " ".join(tags) + " " if tags else ""
            tags = []

            if punctuation.strip() == "":
                sys.stdout.write("%s%s%s" % (punctuation, tagstring, word))
            else:
                sys.stdout.write("%s %s%s" % (punctuation[:1], tagstring, word))

            first_word = False

        else:
            if is_word(token):
                word = token
            else:
                tags.append(token)

    sys.stdout.write("\n")
    sys.stdout.flush()
Exemplo n.º 8
0
def get_filler_prob(inputs, target, model, raw_word_list):
    """ Returns the probability of a target filler for a role, given a set of input roles + fillers
        
    Keyword arguments:
    inputs -- A dictionary of inputs with the role as the key and the filler as the value.
    target -- A singleton dictionary containing the target role as the key and target filler as the value.
    model -- The loaded model with which to make predictions
    raw_word_list -- A dictionary of vocabulary
    """
    #print(inputs)
    raw_word_list.update(inputs)
    #print(raw_word_list)

    assert len(raw_word_list) == len(model.role_vocabulary)

    t_r = [
        model.role_vocabulary.get(r, model.unk_role_id) for r in target.keys()
    ]
    t_w = [
        model.word_vocabulary.get(w, model.unk_word_id)
        for w in target.values()
    ]
    #print("Target role", t_r)
    #print("Target word", t_w)

    if t_w[0] == model.unk_word_id:
        return None

    input_roles_words = {}
    for r, w in raw_word_list.items():
        input_roles_words[model.role_vocabulary[r]] = utils.input_word_index(
            model.word_vocabulary, w, model.unk_word_id, warn_unk=False)

    #print input_roles_words, t_r[0]
    input_roles_words.pop(t_r[0])

    x_w_i = numpy.asarray([input_roles_words.values()], dtype=numpy.int64)
    x_r_i = numpy.asarray([input_roles_words.keys()], dtype=numpy.int64)
    y_w_i = numpy.asarray(t_w, dtype=numpy.int64)
    y_r_i = numpy.asarray(t_r, dtype=numpy.int64)

    return model.p_words(x_w_i, x_r_i, y_w_i, y_r_i)[0]
Exemplo n.º 9
0
def convert_file(file_path, vocab_file, punct_file, output_path):
    punctuations = {" ":0, ".":1, ",":2}
    punctuations = utils.load_punctuations(punct_file)
    vocabulary = utils.load_vocabulary(vocab_file)
    punctuation = " "
    time_steps = 1 #to be used in future experiments
    
    filename = 'database' # output file name
    f = h5py.File(os.path.join(output_path, filename+'.h5'), "w")
    input_dset = f.create_dataset('inputs', (100, time_steps,len(vocabulary)), dtype='i8', maxshape=(None, time_steps, len(vocabulary)))
    output_dset = f.create_dataset('outputs', (100, len(punctuations)), dtype='i8', maxshape=(None, len(punctuations)))
    data_counter = 0
    with open(file_path, 'r') as corpus:
        for line in corpus:
            array = np.zeros(shape=(1, len(vocabulary)), dtype=np.int8)
            array[0,utils.input_word_index(vocabulary, "<START>")] = 1
            input_dset[data_counter] = array

            array = np.zeros(shape=(1, len(punctuations)), dtype=np.int8)
            array[0,utils.punctuation_index(punctuations, " ")] = 1
            output_dset[data_counter] = array
            data_counter += 1
            if data_counter == input_dset.shape[0]:
                input_dset.resize(input_dset.shape[0]+1000, axis=0)
                output_dset.resize(output_dset.shape[0]+1000, axis=0)

            for token in line.split():
                if token in punctuations:
                    punctuation = token
                    continue
                else:
                    array = np.zeros(shape=(1, len(vocabulary)), dtype=np.int8)
                    array[0,utils.input_word_index(vocabulary, token)] = 1
                    input_dset[data_counter] = array

                    array = np.zeros(shape=(1, len(punctuations)), dtype=np.int8)
                    array[0,utils.punctuation_index(punctuations, punctuation)] = 1
                    output_dset[data_counter] = array

                    punctuation = " "
                    data_counter += 1
                    if data_counter == input_dset.shape[0]:
                        input_dset.resize(input_dset.shape[0]+1000, axis=0)
                        output_dset.resize(output_dset.shape[0]+1000, axis=0)

            array = np.zeros(shape=(1, len(vocabulary)), dtype=np.int8)
            array[0,utils.input_word_index(vocabulary, "<END>")] = 1
            input_dset[data_counter] = array
         
            array = np.zeros(shape=(1, len(punctuations)), dtype=np.int8)
            array[0,utils.punctuation_index(punctuations, punctuation)] = 1
            output_dset[data_counter] = array

            data_counter += 1
            if data_counter == input_dset.shape[0]:
                input_dset.resize(input_dset.shape[0]+1000, axis=0)
                output_dset.resize(output_dset.shape[0]+1000, axis=0)


    input_dset.resize(data_counter, axis=0)
    output_dset.resize(data_counter, axis=0)

    data = {"vocabulary": vocabulary, "punctuations": punctuations, 
           "total_size": data_counter}
    
    with open(os.path.join(output_path, filename+'.pkl'), 'wb') as output_file:
        cPickle.dump(data, output_file, protocol=cPickle.HIGHEST_PROTOCOL)

    print("Done!")
Exemplo n.º 10
0
def eval_bicknell_switch(model_name,
                         experiment_name,
                         evaluation,
                         model=None,
                         print_result=True,
                         switch_test=False):
    MODEL_NAME = experiment_name

    if model:
        net = model
    else:
        description = model_builder.load_description(MODEL_PATH, MODEL_NAME)
        net = model_builder.build_model(model_name, description)
        net.load(MODEL_PATH, MODEL_NAME, description)

    bias = net.set_0_bias()

    if print_result:
        print net.role_vocabulary

    eval_data_file = os.path.join(RF_EVAL_PATH, evaluation + '.txt')

    result_file = os.path.join(MODEL_PATH,
                               MODEL_NAME + '_' + evaluation + '.txt')

    probs = []
    baseline = []
    oov_count = 0

    if print_result:
        print eval_data_file
        print "=" * 60

    dataset = numpy.genfromtxt(eval_data_file,
                               dtype=str,
                               delimiter='\t',
                               usecols=[0, 1, 2, 3, 4])

    samples = []
    i = 0

    while True:
        d = dataset[i]
        d2 = dataset[i + 1]

        A0 = d[0][:-2]
        V = d[1][:-2]
        assert d2[0][:-2] == A0
        assert d2[1][:-2] == V

        if d[3] == 'yes':
            assert d2[3] == 'no'
            A1_correct = d[2][:-2]
            A1_incorrect = d2[2][:-2]
            b_correct = d[4]
            b_incorrect = d2[4]
        else:
            assert d[3] == 'no'
            A1_correct = d2[2][:-2]
            A1_incorrect = d[2][:-2]
            b_correct = d2[4]
            b_incorrect = d[4]

        if A1_correct not in net.word_vocabulary or A1_incorrect not in net.word_vocabulary:
            if A1_correct not in net.word_vocabulary and print_result:
                print "%s MISSING FROM VOCABULARY. SKIPPING..." % A1_correct
            if A1_incorrect not in net.word_vocabulary and print_result:
                print "%s MISSING FROM VOCABULARY. SKIPPING..." % A1_incorrect
        else:
            roles = net.role_vocabulary.values()
            del roles[net.unk_role_id]

            input_roles_words = dict((r, net.missing_word_id) for r in (roles))

            input_roles_words[
                net.role_vocabulary["A0"]] = utils.input_word_index(
                    net.word_vocabulary, A0, net.unk_word_id, warn_unk=True)
            input_roles_words[
                net.role_vocabulary["V"]] = utils.input_word_index(
                    net.word_vocabulary, V, net.unk_word_id, warn_unk=True)

            sample = (
                numpy.asarray(
                    [input_roles_words.values(),
                     input_roles_words.values()],
                    dtype=numpy.int64),  # x_w_i
                numpy.asarray(
                    [input_roles_words.keys(),
                     input_roles_words.keys()],
                    dtype=numpy.int64),  # x_r_i
                numpy.asarray([
                    net.word_vocabulary[A1_correct],
                    net.word_vocabulary[A1_incorrect]
                ],
                              dtype=numpy.int64
                              ),  # y_i (1st is correct and 2nd is incorrect
                numpy.asarray(
                    [net.role_vocabulary["A1"], net.role_vocabulary["A1"]],
                    dtype=numpy.int64),  # y_r_i
                [b_correct, b_incorrect],  # bicknell scores
                "\"" + A0 + " " + V + "\"",  # context
                [A1_correct, A1_incorrect])

            samples.append(sample)

        i += 2
        if i > len(dataset) - 2:
            break

    num_samples = len(samples)
    num_correct = 0
    num_total = 0

    if print_result:
        print "context", "correct", "incorrect", "P(correct)", "P(incorrect)", "bicnell_correct", "bicnell_incorrect"

    result_list = []

    for x_w_i, x_r_i, y_w_i, y_r_i, bicknell, context, a1 in samples:

        p = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i)

        p_correct = p[0]
        p_incorrect = p[1]

        if print_result:
            print context, a1[0], a1[1], p_correct, p_incorrect, bicknell[
                0], bicknell[1]

        if p_correct > p_incorrect:
            result_list.append(1)
        else:
            result_list.append(0)

        num_correct += p_correct > p_incorrect
        num_total += 1

    assert num_total == num_samples

    accuracy = float(num_correct) / float(num_samples)

    if print_result:
        print "Number of lines %d" % num_samples
        print "Baseline Lenci11 is 43/64=0.671875"
        print "Final score of theano model is %d/%d=%.6f" % (
            num_correct, num_samples, accuracy)

    print result_list

    if switch_test and print_result:
        print "\nSwitch A0/A1 TEST"

        input_words = []
        input_roles = []
        for i in range(1):
            roles = net.role_vocabulary.values()
            print net.unk_role_id
            roles.remove(net.unk_role_id)

            input_role_word_pairs = dict(
                (r, net.missing_word_id) for r in roles)
            input_role_word_pairs[
                net.role_vocabulary["V"]] = utils.input_word_index(
                    net.word_vocabulary, "buy", net.unk_word_id, warn_unk=True)

            input_words.append(input_role_word_pairs.values())
            input_roles.append(input_role_word_pairs.keys())

        man = utils.input_word_index(net.word_vocabulary,
                                     "man",
                                     net.unk_word_id,
                                     warn_unk=True)
        car = utils.input_word_index(net.word_vocabulary,
                                     "car",
                                     net.unk_word_id,
                                     warn_unk=True)
        a1 = net.role_vocabulary["A1"]
        a0 = net.role_vocabulary["A0"]

        a0_test = (
            numpy.asarray(input_words, dtype=numpy.int64),
            numpy.asarray(input_roles, dtype=numpy.int64),
            numpy.asarray([man, car], dtype=numpy.int64),
            numpy.asarray([a0], dtype=numpy.int64),
        )
        x_w_i, x_r_i, y_w_i, y_r_i = a0_test
        p0 = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i)
        print p0

        a1_test = (
            numpy.asarray(input_words, dtype=numpy.int64),
            numpy.asarray(input_roles, dtype=numpy.int64),
            numpy.asarray([man, car], dtype=numpy.int64),
            numpy.asarray([a1], dtype=numpy.int64),
        )
        x_w_i, x_r_i, y_w_i, y_r_i = a1_test
        p1 = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i)
        print p1

        print "man buy", p0[0]
        print "buy man", p1[0]
        print "car buy", p0[1]
        print "buy car", p1[1]

    net.set_bias(bias)

    return num_correct, num_samples, accuracy
Exemplo n.º 11
0
def query(model_name, experiment_name, inputs, target):
    MODEL_NAME = experiment_name
    description = model_builder.load_description(MODEL_PATH, MODEL_NAME)

    net = model_builder.build_model(model_name, description)
    net.load(MODEL_PATH, MODEL_NAME, description)

    # net.model.summary()
    # print net.model.get_layer(name="embedding_2").get_weights()[0]

    print net.role_vocabulary
    print("unk_word_id", net.unk_word_id)
    print("missing_word_id", net.missing_word_id)
    # net.set_0_bias()

    net.model.summary()

    propbank_map = {
        "subj"  :   "A0",
        "obj"   :   "A1",
        "ARG0"  :   "A0",
        "ARG1"  :   "A1",
        "ARG2"  :   "A2",
    }

    # tr_map = {
    #     "A0": numpy.asarray([[net.role_vocabulary["A0"]]], dtype=numpy.int64),
    #     "A1": numpy.asarray([[net.role_vocabulary["A1"]]], dtype=numpy.int64),
    #     "A2": numpy.asarray([[net.role_vocabulary["<UNKNOWN>"]]], dtype=numpy.int64)
    # }

    # net.word_vocabulary["<NOTHING>"] = net.missing_word_id
    # net.role_vocabulary["<UNKNOWN>"] = net.unk_role_id    

    reverse_vocabulary = utils.get_reverse_map(net.word_vocabulary)
    reverse_role_vocabulary = utils.get_reverse_map(net.role_vocabulary)    

    print reverse_role_vocabulary

    raw_words = dict((reverse_role_vocabulary[r], reverse_vocabulary[net.missing_word_id]) for r in net.role_vocabulary.values())

    # print raw_words

    raw_words.update(inputs)
    
    # print raw_words
    # print len(raw_words)
    assert len(raw_words) == len(net.role_vocabulary)
    # print repr(raw_words)

    # n = int(sys.argv[3])    
    t_r = [net.role_vocabulary.get(r, net.unk_role_id) for r in target.keys()]
    t_w = [net.word_vocabulary.get(w, net.unk_word_id) for w in target.values()]

    input_roles_words = {}
    for r, w in raw_words.items():
        input_roles_words[net.role_vocabulary[r]] = utils.input_word_index(net.word_vocabulary, w, net.unk_word_id, warn_unk=True)

    print input_roles_words, t_r
    input_roles_words.pop(t_r[0])

    # default_roles_words = dict((r, net.missing_word_id) for r in (net.role_vocabulary.values()))
    # default_roles_words.update(input_roles_words)
    # input_roles_words = default_roles_words
        
    x_w_i = numpy.asarray([input_roles_words.values()], dtype=numpy.int64)
    x_r_i = numpy.asarray([input_roles_words.keys()], dtype=numpy.int64)
    y_w_i = numpy.asarray(t_w, dtype=numpy.int64)
    y_r_i = numpy.asarray(t_r, dtype=numpy.int64)

    topN=20
    predicted_word_indices = net.top_words(x_w_i, x_r_i, y_w_i, y_r_i, topN)
    # print predicted_word_indices
    # print len(predicted_word_indices)

    print(x_w_i, x_r_i, y_w_i, y_r_i)

    p_w = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i, batch_size=1, verbose=0)[0]
    print ('p_t_w: ', p_w)

    resultlist = predicted_word_indices
    # print resultlist

    for i, t_w_i in enumerate(resultlist):
        t_w = net.word_vocabulary.get(t_w_i, net.unk_word_id)
        y_w_i = numpy.asarray([t_w_i], dtype=numpy.int64)
        p = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i, batch_size=1, verbose=0)[0]
        n = numpy.round(p / 0.005)
        fb = numpy.floor(n)
        hb = n % 2
        print u"{:<5} {:7.6f} {:<20} ".format(i+1, float(p), reverse_vocabulary[int(t_w_i)]) + u"\u2588" * int(fb) + u"\u258C" * int(hb)