Exemplo n.º 1
0
 obj = {}
 url = jmap['url']
 print(counter, url)
 keyword_list = jmap['keyword_list']
 text_content = jmap['text_content']
 parse_title = jmap['parse_title']
 # counter += 1
 # if counter%100 == 0:
 #     print("counter:{}".format(counter))
 doc = Document(text_content, parse_title)
 # tfidf = [item[0] for item in doc.tf_idf()[:10]]
 # bm25 = [item[0] for item in doc.bm25()[:10]]
 # textrank = doc.textrank()[:10]
 sentrank = doc.sentrank()
 # sentrank_keywords = [item[0] for item in doc.sentrank_keyword()]
 sentrank_entities, sentrank_keywords_noenti = doc.sentrank_entity()
 sentrank_keywords = [item[0] for item in sentrank_keywords_noenti]
 # print("tfidf:", doc.tf_idf()[:10])
 # print("bm25:", doc.bm25()[:10])
 print('named entities:', sentrank_entities)
 print('entrank_keywords:', sentrank_keywords)
 obj["url"] = url
 obj["title"] = parse_title
 # obj['tfidf'] = tfidf
 # obj['bm25'] = bm25
 # obj['textrank'] = textrank
 # obj['sentrank'] = sentrank
 obj['sentrank_keywords'] = sentrank_keywords
 obj['sentrank_entities'] = sentrank_entities
 obj['keyword_list'] = keyword_list
 result.append(obj)