Пример #1
0
def suggestauthors(topic,num):
#read file and find the profs closest match to rawlabel and confirm/return actual label
  authordict = getauthordict('authordict.csv')
  ranking = []
  similars = []

  author = Analyze.idauthor(authordict,topic)
  if author == '':
    score=[]
    stops=nltk.corpus.stopwords.words('english') #stopwords to weed out
    stops = stops + ['we',',','.','(',')','using','new','propose','investigate']
    stops = stops + ['-','show','infer','novel','method']

    tokens1 = nltk.word_tokenize(topic)
    pairs1 = nltk.bigrams(tokens1)
    tokens1 = tokens1+[bg for bg in pairs1 if bg[0] not in stops and bg[1] not in stops]
    for auth in authordict.keys():
      keyw2 = authordict[auth]['Keywords']
      tokens2 = list(filter(None,re.split(r',',keyw2)+re.split(r'[ ,]',keyw2)))

      score = -sum(1 for token in tokens1 if token in tokens2)
      heappush(ranking,[score,auth])
    while len(similars)<num:
      authscore = heappop(ranking)
      similars.append([authscore[1],authordict[authscore[1]]['Keywords']])
  else:
    for auth in authordict:
      score = Analyze.similarauthors(authordict[author],authordict[auth])
      heappush(ranking,[score,auth])
    while len(similars)<num:
      authscore = heappop(ranking)
      similars.append([authscore[1],authordict[authscore[1]]['Keywords']])
    print(authordict[author]['Keywords'])
  return similars