示例#1
0
def classify(weibo, flag):
    '''
    分类主函数:
    输入数据:weibo(list元素),示例:[[mid,text,...],[mid,text,...]...]
            flag(标记变量,任意设置)
    输出数据:label_data(字典元素),示例:{{'mid':类别标签},{'mid':类别标签}...}
            1表示垃圾文本,0表示新闻文本,-1表示评论文本
    '''
    label_data = start_ad(weibo, flag)  #垃圾分类

    news_weibo = []
    for i in range(0, len(weibo)):
        if label_data[str(weibo[i][0])] == 0:
            news_weibo.append(weibo[i])

    label = cut_weibo(news_weibo)  #新闻与非新闻分类
    for i in range(0, len(label)):
        if label[i] == 0:
            mid = news_weibo[i][0]
            label_data[str(mid)] = -1

    return label_data
示例#2
0
def classify(weibo, flag):
    '''
    分类主函数:
    输入数据:weibo(list元素),示例:[[mid,text,...],[mid,text,...]...]
            flag(标记变量,任意设置)
    输出数据:label_data(字典元素),示例:{{'mid':类别标签},{'mid':类别标签}...}
            1表示垃圾文本,0表示新闻文本,2表示中性文本,-1表示有极性的文本
    '''
    start = time.time()
    label_data = start_ad(weibo, flag)  #垃圾分类
    end = time.time()
    print(end - start)

    news_weibo = []
    for i in range(0, len(weibo)):
        if label_data[str(weibo[i][0])] == 0:
            news_weibo.append(weibo[i])

    start = time.time()
    label = cut_weibo(news_weibo)  #规则分类
    end = time.time()
    print 'cutting weibo by rules takes %s' % (end - start)
    start = time.time()
    for i in range(0, len(label)):
        if label[i] == 0:
            mid = news_weibo[i][0]
            text = news_weibo[i][1]
            sentiment = triple_classifier(text)  #调用中性情感分类器
            if sentiment == 0:
                label_data[str(mid)] = cut_mid_weibo(text)
                #label_data[str(mid)] = 2
            else:
                label_data[str(mid)] = -1
    end = time.time()
    print 'classifying weibo takes %s' % (end - start)

    return label_data
示例#3
0
def classify(weibo,flag):
    '''
    分类主函数:
    输入数据:weibo(list元素),示例:[[mid,text,...],[mid,text,...]...]
            flag(标记变量,任意设置)
    输出数据:label_data(字典元素),示例:{{'mid':类别标签},{'mid':类别标签}...}
            1表示垃圾文本,0表示新闻文本,2表示中性文本,-1表示有极性的文本
    '''
    start = time.time()
    label_data = start_ad(weibo,flag)#垃圾分类
    end = time.time()
    print (end-start)

    news_weibo = []
    for i in range(0,len(weibo)):
        if label_data[str(weibo[i][0])] == 0:
            news_weibo.append(weibo[i])

    start = time.time()
    label = cut_weibo(news_weibo)#规则分类
    end = time.time()
    print 'cutting weibo by rules takes %s' % (end-start)
    start = time.time()
    for i in range(0,len(label)):
        if label[i] == 0:
            mid = news_weibo[i][0]
            text = news_weibo[i][1]
            sentiment = triple_classifier(text)#调用中性情感分类器
            if sentiment == 0:
                label_data[str(mid)] = cut_mid_weibo(text)
                #label_data[str(mid)] = 2
            else:
                label_data[str(mid)] = -1    
    end = time.time()
    print 'classifying weibo takes %s' % (end-start)
    
    return label_data
示例#4
0
#-*-coding=utf-8-*-

import time
from sta_ad import start_ad

weibos = [{'_id': 1, 'text': 'Python的标准库中的os模块包含普遍的操作系统功能。如果你希望你的程序能够与平台无关的话,这个模块是尤为重要的。'}]

start_ts = time.time()
results = []
count = 0
while 1:
    results.extend(weibos)
    count += 1
    if count == 100:
        break

start_ad(results)
print time.time() - start_ts