Пример #1
0
def doAll():
    count = 0
    for tag in language_tags.keys():
        print('generating dictionary for ' + tag)
        dic = functions.generate_dictionary(tag, max_letters)
        for word in dic:
            master_dic.append(word)
            #print(word)
            #print(len(master_dic))
        vct = functions.convert_dic_to_vector(dic, max_letters)
        for vector in vct:
            word_data.append(vector)
        output_vct = functions.create_output_vector(count, len(language_tags))
        for i in range(len(vct)):
            language_data.append(output_vct)
        count += 1

    arr = []
    for i in range(len(word_data)):
        entry = []
        entry.append(master_dic[i])
        for digit in language_data[i]:
            entry.append(float(digit))
        for digit in word_data[i]:
            entry.append(float(digit))
        arr.append(entry)
    return arr
Пример #2
0
import pandas as pd
import config

word_data = []
language_data = []
master_dic = []
count = 0

#this function is used to generate dictionary for words in german and english language, and produces a final csv
#which contains each word transformed into a vector and its label.
for tag in config.language_tags.keys():
    print('generating dictionary for ' + tag)
    dic = functions.create_df(tag, max_letters)
    for word in dic:
        master_dic.append(word)
    vct = functions.convert_dic_to_vector(dic, max_letters)
    for vector in vct:
        word_data.append(vector)
    output_vct = functions.create_output_vector(count, len(language_tags))
    for i in range(len(vct)):
        language_data.append(output_vct)
    count += 1

arr = []
for i in range(len(word_data)):
    entry = []
    entry.append(master_dic[i])
    for digit in language_data[i]:
        entry.append(float(digit))
    for digit in word_data[i]:
        entry.append(float(digit))
testString = ""
spacing = 3

for rchr in removechar:
    text.replace(rchr, " ")

replaceString = text.split(" ")
i = 0
dic = []

print(replaceString)
formulae = []
for remove in replaceString:
    dic = []
    dic.append(remove)
    vct_str = convert_dic_to_vector(dic, max_letters)
    vct = np.zeros((1, 128 * max_letters - 1))
    count = 0
    for digit in vct_str[0]:
        if count == 128 * (max_letters - 1):
            break
        vct[0, count] = int(digit)
        count += 1
    prediction_vct = network.predict(vct)

    langs = list(language_tags.keys())
    for i in range(len(language_tags)):
        lang = langs[i]
        score = prediction_vct[0][i]
        print(remove + " " + lang + ': ' + str(round(100 * score, 2)) + '%')
        if (lang == "en"):