Пример #1
0
def keywords_save():
    # 把所有keyword写入文件
    keywords = open('keywords.txt', encoding='utf-8', mode='w')

    sql = """select keyword_paper from doclda"""
    result = dbs.getTuples(sql)
    for i in range(0, len(result)):
        if (result[i][0]):
            keywords.write(result[i][0] + ',')
Пример #2
0
from algorithm.base import dbs


def strToMap(s):
    dic = {}
    list = s.split(' + ')
    for l in list:
        v = l.split('*')
        key = v[1][1:-1]
        dic[key] = float(v[0])

    return dic


sql = 'select id,name,abstract,keyword from paper'
institution_paper_list = dbs.getTuples(sql)
stopwords = [
    line.strip()
    for line in open('stopwords.txt', encoding='utf-8').readlines()
]
fill = [
    'v', 'vd', 'vn', 'vf ', 'vx ', 'vi', 'vl', 'vg', 'n', 'nr', 'nr1', 'nr2',
    'nrj', 'nrf', 'ns', 'nsf', 'nt', 'nz', 'nl', 'ng'
]
print('词典更新')
jieba.load_userdict('userdict.txt')
print('分词')
print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
DocWord = []
for paper in institution_paper_list:
    line = paper[1].strip('\n').strip('\t') + ' ' + paper[2].strip('\n').strip(