Пример #1
0
def process(file_path):
    """
    :param file_path: the file path which is needed to process
    :return:None
    """
    try:
        doc_file = open(file_path, "r")
        docs = doc_file.read()
        doc_file.close()
    except:
        return

    doc_len = len(docs)
    d = DBHelper()

    doc_id = d.insert_record_with_var(
        "insert into wiki_doc(`doc_len`,`doc_path`) VALUES (%s,%s)",
        (doc_len, file_path))
    d = docs_to_vector(docs)
    t = term()
    d_t = doc_term()
    for word in d:
        if word not in stop_word:
            term_id = 0
            if t.check_term_exist(word):
                term_id = t.get_term_id(word)
            else:
                term_id = t.insert_term(word, 0)
            t.add_term_frequency(word)
            d_t.insert_record(term_id, doc_id, d[word])