예제 #1
0
def classify_passage_boson_url():
    url = request.form['url']
    content = get_content(url)
    # print 'top_k=%s' % top_k
    classify_result = requests.post('http://bosonnlp.com/analysis/category', {'data': content}).content
    keyword_result = json.loads(
        requests.post('http://bosonnlp.com/analysis/key?top_k=%s' % 100, {'data': content}).content)
    class_dict = {0: u'体育', 1: u'教育', 2: u'财经', 3: u'社会',
                  4: u'娱乐', 5: u'军事', 6: u'国内',
                  7: u'科技', 8: '互联网', 9: u'房产', 10: u'国际',
                  11: u'女人', 12: u'汽车', 13: u'游戏'}
    print classify_result
    classify_result = int(re.compile('\d+').findall(classify_result)[0])
    jieba_textrank = jieba.analyse.textrank(content, topK=15)
    jieba_keywords = jieba.analyse.extract_tags(content, allowPOS=['n', 'vn', 'ns', 'v'], topK=15)
    topic_list = TaggingUtils.passage_second_level_classify(content)
    resp = make_response(
        json.dumps({'code': 0, 'class': class_dict[classify_result], 'keyword': keyword_result,
                    'jieba_textrank': jieba_textrank, 'jieba_keywords': jieba_keywords,
                    'topic_list': topic_list},
                   ensure_ascii=False),
        200)
    return resp
예제 #2
0
def show_content_topic_prob(text_path):
    test_text = open(text_path, 'r').read()
    topics = TaggingUtils.passage_second_level_classify(test_text)
    for i in topics:
        print i['topic_tag'], i['topic_prob']
    return topics
예제 #3
0
def show_content_topic_prob(text_path):
    test_text = open(text_path, 'r').read()
    topics = TaggingUtils.passage_second_level_classify(test_text)
    for i in topics:
        print i['topic_tag'], i['topic_prob']
    return topics