def keywords_save(): # 把所有keyword写入文件 keywords = open('keywords.txt', encoding='utf-8', mode='w') sql = """select keyword_paper from doclda""" result = dbs.getTuples(sql) for i in range(0, len(result)): if (result[i][0]): keywords.write(result[i][0] + ',')
from algorithm.base import dbs def strToMap(s): dic = {} list = s.split(' + ') for l in list: v = l.split('*') key = v[1][1:-1] dic[key] = float(v[0]) return dic sql = 'select id,name,abstract,keyword from paper' institution_paper_list = dbs.getTuples(sql) stopwords = [ line.strip() for line in open('stopwords.txt', encoding='utf-8').readlines() ] fill = [ 'v', 'vd', 'vn', 'vf ', 'vx ', 'vi', 'vl', 'vg', 'n', 'nr', 'nr1', 'nr2', 'nrj', 'nrf', 'ns', 'nsf', 'nt', 'nz', 'nl', 'ng' ] print('词典更新') jieba.load_userdict('userdict.txt') print('分词') print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) DocWord = [] for paper in institution_paper_list: line = paper[1].strip('\n').strip('\t') + ' ' + paper[2].strip('\n').strip(