def classify_passage_boson_url(): url = request.form['url'] content = get_content(url) # print 'top_k=%s' % top_k classify_result = requests.post('http://bosonnlp.com/analysis/category', {'data': content}).content keyword_result = json.loads( requests.post('http://bosonnlp.com/analysis/key?top_k=%s' % 100, {'data': content}).content) class_dict = {0: u'体育', 1: u'教育', 2: u'财经', 3: u'社会', 4: u'娱乐', 5: u'军事', 6: u'国内', 7: u'科技', 8: '互联网', 9: u'房产', 10: u'国际', 11: u'女人', 12: u'汽车', 13: u'游戏'} print classify_result classify_result = int(re.compile('\d+').findall(classify_result)[0]) jieba_textrank = jieba.analyse.textrank(content, topK=15) jieba_keywords = jieba.analyse.extract_tags(content, allowPOS=['n', 'vn', 'ns', 'v'], topK=15) topic_list = TaggingUtils.passage_second_level_classify(content) resp = make_response( json.dumps({'code': 0, 'class': class_dict[classify_result], 'keyword': keyword_result, 'jieba_textrank': jieba_textrank, 'jieba_keywords': jieba_keywords, 'topic_list': topic_list}, ensure_ascii=False), 200) return resp
def show_content_topic_prob(text_path): test_text = open(text_path, 'r').read() topics = TaggingUtils.passage_second_level_classify(test_text) for i in topics: print i['topic_tag'], i['topic_prob'] return topics