def insert_link_dictionary(table_name): con = sqlite3.connect("NNP.db") cursor = con.cursor() dic = make_dictionary.make_dictionary() i = 0 for word in dic.keys(): if i % 100 == 0: print(i) i = i + 1 if '\"' in dic[word]: continue dic[word] = dic[word].replace("_", " ") cursor.execute('INSERT INTO ' + table_name + ' VALUES("' + str(word) + '", "' + str(dic[word]) + '", 0)') con.commit() print("[insert] fill with link dictionary")
def run(start, end): randomList = [num for num in range(start, end)] random.shuffle(randomList) log = open("log.txt", 'w', encoding='utf8') con = sqlite3.connect("NNP.db") cursor = con.cursor() pair_list = make_index_kor_eng_list() dic = make_dictionary.make_dictionary() # index = start for index in range(3333): try: #if index == end+1: # 몇 개 돌리길 원하는지 # break dictionary_to_article_check(con, dic, pair_list[randomList[index]]) #index = index+ 1 except: #index = index +1 log.write("index : {idx}\n".format(idx=randomList[index])) con.commit() con.close()
import preprocess_SWy from cosine_similarity import cosine_sim from make_dictionary import make_dictionary from find_loc import findloc import sys import nltk nltk.download('punkt') from change_from_gumtree import changeFromGumTree #Vaadin "Javadoc is missing actual explanation of what \""responsive layout capabilities\"" means. " [JavadocType] server/src/com/vaadin/server/Responsive.java+refs-changes-63-2863-1 23 reload(sys) sys.setdefaultencoding('utf8') #check_family_map.txt threshold_dict = make_dictionary('sim_vals_correct.txt') threshold_arg = float(sys.argv[1]) for x, y in threshold_dict.iteritems(): threshold_dict[x] = threshold_arg num_keywords = 15 num_close_matches = 1 missing_files = file_opener.myopen('missing_files.txt', 'w+') check_family_dict = {} def make_check_family_dict(): check_fam_file = file_opener.myopen("check_family_map.txt", 'r') for line in check_fam_file: check = line.split('\t')[0].rstrip()
comm = preprocess_SWy.preprocess(comm) if warn not in dict_warn_comments: dict_warn_comments[warn] = comm else: dict_warn_comments[ warn] = dict_warn_comments[warn] + ' | ' + comm + ' ' list_warn_bow = [] for x, y in dict_warn_comments.iteritems(): comment_bow = dictionary.doc2bow(y.split()) # print(x,tfidf[comment_bow]) list_warn_bow.append((x, tfidf[comment_bow])) # for x in list_warn_bow: # print(x) check_family_map = make_dictionary('check_family_map.txt') # print(check_family_map) count = 0 mat = [] for i in range(len(list_warn_bow)): count += 1 # print("checking warn ",list_warn_bow[i][0]) sim_list = [] max_sim = 0 sim_warn = "" row = [] for j in range(len(list_warn_bow)): sim = cosine_sim(list_warn_bow[i][1], list_warn_bow[j][1]) if sim > max_sim and list_warn_bow[i][0] != list_warn_bow[j][0]:
import LCS import en_Num import ko_Num import wiki import ngram import lcslib import make_dictionary #count = herald_word_text.herald_word_text(0,6) #print(count) #div_eng.div_eng(0,400) #div_kor.div_kor(0,400) #en_Num.en_Num(0,400) #ko_Num.ko_Num(0,400) dic = make_dictionary.make_dictionary() start_idx = 0 end_idx = 400 root = ngram.getRoot("../dictionary.csv") errorLogFile = open("log.txt", 'w', encoding='utf8') idx = start_idx while 1: if idx > end_idx: break if (idx % 10 != 0): idx = idx + 1 continue print(str(idx) + ".txt") kLink, eLink, percent = wiki.check_all_pair(dic, idx)
def symbols(lines): dictionary = make_symbol_dict(lines) complete_dictionary = make_dictionary(lines, dictionary) del_symbols = remove_symbols(lines) replaced = replace_symbols(del_symbols, complete_dictionary) return replaced