def compute_vector_for_DOI(DOI): db = TinyDB(db_loc) paper = Query() this_paper_dict = db.search(paper.ownDOI == DOI)[0] #returns entry as dictionary name = this_paper_dict['filename'] ##here we call marcellos code vector = keyExtract.getRakeKeywords(text_dir+name+'.txt') vectore = normVectorGen(vector) out_path = vector_dir + name+'.pkl' output = open(out_path,'wb') pickle.dump(vector,output)
import sys import os.path import keyExtract from pprint import pprint #import RAKE test = "05629128.txt" #test = "merkel.txt" print keyExtract.keyWords(test) #print keyExtract.smartWords(test) pprint(keyExtract.smartWords(test)) #pprint(keyExtract.getRakeKeywords(test)) rakeList = keyExtract.getRakeKeywords(test) normSum = 0 for i in rakeList: normSum = normSum + i[1] newList = [] for i in rakeList: newList.append((i[0],i[1] / normSum)) #print i[1] nSum = 0 for i in newList: nSum = nSum + i[1]