def doAll(): count = 0 for tag in language_tags.keys(): print('generating dictionary for ' + tag) dic = functions.generate_dictionary(tag, max_letters) for word in dic: master_dic.append(word) #print(word) #print(len(master_dic)) vct = functions.convert_dic_to_vector(dic, max_letters) for vector in vct: word_data.append(vector) output_vct = functions.create_output_vector(count, len(language_tags)) for i in range(len(vct)): language_data.append(output_vct) count += 1 arr = [] for i in range(len(word_data)): entry = [] entry.append(master_dic[i]) for digit in language_data[i]: entry.append(float(digit)) for digit in word_data[i]: entry.append(float(digit)) arr.append(entry) return arr
import pandas as pd import config word_data = [] language_data = [] master_dic = [] count = 0 #this function is used to generate dictionary for words in german and english language, and produces a final csv #which contains each word transformed into a vector and its label. for tag in config.language_tags.keys(): print('generating dictionary for ' + tag) dic = functions.create_df(tag, max_letters) for word in dic: master_dic.append(word) vct = functions.convert_dic_to_vector(dic, max_letters) for vector in vct: word_data.append(vector) output_vct = functions.create_output_vector(count, len(language_tags)) for i in range(len(vct)): language_data.append(output_vct) count += 1 arr = [] for i in range(len(word_data)): entry = [] entry.append(master_dic[i]) for digit in language_data[i]: entry.append(float(digit)) for digit in word_data[i]: entry.append(float(digit))
testString = "" spacing = 3 for rchr in removechar: text.replace(rchr, " ") replaceString = text.split(" ") i = 0 dic = [] print(replaceString) formulae = [] for remove in replaceString: dic = [] dic.append(remove) vct_str = convert_dic_to_vector(dic, max_letters) vct = np.zeros((1, 128 * max_letters - 1)) count = 0 for digit in vct_str[0]: if count == 128 * (max_letters - 1): break vct[0, count] = int(digit) count += 1 prediction_vct = network.predict(vct) langs = list(language_tags.keys()) for i in range(len(language_tags)): lang = langs[i] score = prediction_vct[0][i] print(remove + " " + lang + ': ' + str(round(100 * score, 2)) + '%') if (lang == "en"):