obj = {} url = jmap['url'] print(counter, url) keyword_list = jmap['keyword_list'] text_content = jmap['text_content'] parse_title = jmap['parse_title'] # counter += 1 # if counter%100 == 0: # print("counter:{}".format(counter)) doc = Document(text_content, parse_title) # tfidf = [item[0] for item in doc.tf_idf()[:10]] # bm25 = [item[0] for item in doc.bm25()[:10]] # textrank = doc.textrank()[:10] sentrank = doc.sentrank() # sentrank_keywords = [item[0] for item in doc.sentrank_keyword()] sentrank_entities, sentrank_keywords_noenti = doc.sentrank_entity() sentrank_keywords = [item[0] for item in sentrank_keywords_noenti] # print("tfidf:", doc.tf_idf()[:10]) # print("bm25:", doc.bm25()[:10]) print('named entities:', sentrank_entities) print('entrank_keywords:', sentrank_keywords) obj["url"] = url obj["title"] = parse_title # obj['tfidf'] = tfidf # obj['bm25'] = bm25 # obj['textrank'] = textrank # obj['sentrank'] = sentrank obj['sentrank_keywords'] = sentrank_keywords obj['sentrank_entities'] = sentrank_entities obj['keyword_list'] = keyword_list result.append(obj)