def suggestauthors(topic,num): #read file and find the profs closest match to rawlabel and confirm/return actual label authordict = getauthordict('authordict.csv') ranking = [] similars = [] author = Analyze.idauthor(authordict,topic) if author == '': score=[] stops=nltk.corpus.stopwords.words('english') #stopwords to weed out stops = stops + ['we',',','.','(',')','using','new','propose','investigate'] stops = stops + ['-','show','infer','novel','method'] tokens1 = nltk.word_tokenize(topic) pairs1 = nltk.bigrams(tokens1) tokens1 = tokens1+[bg for bg in pairs1 if bg[0] not in stops and bg[1] not in stops] for auth in authordict.keys(): keyw2 = authordict[auth]['Keywords'] tokens2 = list(filter(None,re.split(r',',keyw2)+re.split(r'[ ,]',keyw2))) score = -sum(1 for token in tokens1 if token in tokens2) heappush(ranking,[score,auth]) while len(similars)<num: authscore = heappop(ranking) similars.append([authscore[1],authordict[authscore[1]]['Keywords']]) else: for auth in authordict: score = Analyze.similarauthors(authordict[author],authordict[auth]) heappush(ranking,[score,auth]) while len(similars)<num: authscore = heappop(ranking) similars.append([authscore[1],authordict[authscore[1]]['Keywords']]) print(authordict[author]['Keywords']) return similars