Пример #1
0
def insert_link_dictionary(table_name):
    con = sqlite3.connect("NNP.db")
    cursor = con.cursor()

    dic = make_dictionary.make_dictionary()
    i = 0
    for word in dic.keys():
        if i % 100 == 0:
            print(i)
        i = i + 1
        if '\"' in dic[word]:
            continue
        dic[word] = dic[word].replace("_", " ")

        cursor.execute('INSERT INTO ' + table_name + ' VALUES("' + str(word) +
                       '", "' + str(dic[word]) + '", 0)')
        con.commit()
    print("[insert] fill with link dictionary")
Пример #2
0
def run(start, end):
    randomList = [num for num in range(start, end)]
    random.shuffle(randomList)
    log = open("log.txt", 'w', encoding='utf8')
    con = sqlite3.connect("NNP.db")
    cursor = con.cursor()
    pair_list = make_index_kor_eng_list()
    dic = make_dictionary.make_dictionary()
    # index = start
    for index in range(3333):
        try:
            #if index == end+1: # 몇 개 돌리길 원하는지
            #    break
            dictionary_to_article_check(con, dic, pair_list[randomList[index]])
            #index = index+ 1
        except:
            #index = index +1
            log.write("index : {idx}\n".format(idx=randomList[index]))
    con.commit()
    con.close()
Пример #3
0
import preprocess_SWy
from cosine_similarity import cosine_sim
from make_dictionary import make_dictionary
from find_loc import findloc
import sys
import nltk
nltk.download('punkt')

from change_from_gumtree import changeFromGumTree
#Vaadin     "Javadoc is missing actual explanation of what \""responsive layout capabilities\"" means. "    [JavadocType]   server/src/com/vaadin/server/Responsive.java+refs-changes-63-2863-1 23

reload(sys)
sys.setdefaultencoding('utf8')

#check_family_map.txt
threshold_dict = make_dictionary('sim_vals_correct.txt')
threshold_arg = float(sys.argv[1])
for x, y in threshold_dict.iteritems():
    threshold_dict[x] = threshold_arg

num_keywords = 15
num_close_matches = 1
missing_files = file_opener.myopen('missing_files.txt', 'w+')

check_family_dict = {}


def make_check_family_dict():
    check_fam_file = file_opener.myopen("check_family_map.txt", 'r')
    for line in check_fam_file:
        check = line.split('\t')[0].rstrip()
Пример #4
0
        comm = preprocess_SWy.preprocess(comm)
        if warn not in dict_warn_comments:
            dict_warn_comments[warn] = comm
        else:
            dict_warn_comments[
                warn] = dict_warn_comments[warn] + ' | ' + comm + ' '
list_warn_bow = []
for x, y in dict_warn_comments.iteritems():
    comment_bow = dictionary.doc2bow(y.split())
    # print(x,tfidf[comment_bow])
    list_warn_bow.append((x, tfidf[comment_bow]))

# for x in list_warn_bow:
# 	print(x)

check_family_map = make_dictionary('check_family_map.txt')
# print(check_family_map)

count = 0
mat = []
for i in range(len(list_warn_bow)):
    count += 1
    # print("checking warn ",list_warn_bow[i][0])
    sim_list = []
    max_sim = 0
    sim_warn = ""
    row = []
    for j in range(len(list_warn_bow)):

        sim = cosine_sim(list_warn_bow[i][1], list_warn_bow[j][1])
        if sim > max_sim and list_warn_bow[i][0] != list_warn_bow[j][0]:
Пример #5
0
import LCS
import en_Num
import ko_Num
import wiki
import ngram
import lcslib
import make_dictionary

#count = herald_word_text.herald_word_text(0,6)
#print(count)
#div_eng.div_eng(0,400)
#div_kor.div_kor(0,400)
#en_Num.en_Num(0,400)
#ko_Num.ko_Num(0,400)
dic = make_dictionary.make_dictionary()
start_idx = 0
end_idx = 400
root = ngram.getRoot("../dictionary.csv")
errorLogFile = open("log.txt", 'w', encoding='utf8')
idx = start_idx
while 1:

    if idx > end_idx:
        break

    if (idx % 10 != 0):
        idx = idx + 1
        continue

    print(str(idx) + ".txt")
    kLink, eLink, percent = wiki.check_all_pair(dic, idx)
Пример #6
0
def symbols(lines):
    dictionary = make_symbol_dict(lines)
    complete_dictionary = make_dictionary(lines, dictionary)
    del_symbols = remove_symbols(lines)
    replaced = replace_symbols(del_symbols, complete_dictionary)
    return replaced