def classify(weibo, flag): ''' 分类主函数: 输入数据:weibo(list元素),示例:[[mid,text,...],[mid,text,...]...] flag(标记变量,任意设置) 输出数据:label_data(字典元素),示例:{{'mid':类别标签},{'mid':类别标签}...} 1表示垃圾文本,0表示新闻文本,-1表示评论文本 ''' label_data = start_ad(weibo, flag) #垃圾分类 news_weibo = [] for i in range(0, len(weibo)): if label_data[str(weibo[i][0])] == 0: news_weibo.append(weibo[i]) label = cut_weibo(news_weibo) #新闻与非新闻分类 for i in range(0, len(label)): if label[i] == 0: mid = news_weibo[i][0] label_data[str(mid)] = -1 return label_data
def classify(weibo, flag): ''' 分类主函数: 输入数据:weibo(list元素),示例:[[mid,text,...],[mid,text,...]...] flag(标记变量,任意设置) 输出数据:label_data(字典元素),示例:{{'mid':类别标签},{'mid':类别标签}...} 1表示垃圾文本,0表示新闻文本,2表示中性文本,-1表示有极性的文本 ''' start = time.time() label_data = start_ad(weibo, flag) #垃圾分类 end = time.time() print(end - start) news_weibo = [] for i in range(0, len(weibo)): if label_data[str(weibo[i][0])] == 0: news_weibo.append(weibo[i]) start = time.time() label = cut_weibo(news_weibo) #规则分类 end = time.time() print 'cutting weibo by rules takes %s' % (end - start) start = time.time() for i in range(0, len(label)): if label[i] == 0: mid = news_weibo[i][0] text = news_weibo[i][1] sentiment = triple_classifier(text) #调用中性情感分类器 if sentiment == 0: label_data[str(mid)] = cut_mid_weibo(text) #label_data[str(mid)] = 2 else: label_data[str(mid)] = -1 end = time.time() print 'classifying weibo takes %s' % (end - start) return label_data
def classify(weibo,flag): ''' 分类主函数: 输入数据:weibo(list元素),示例:[[mid,text,...],[mid,text,...]...] flag(标记变量,任意设置) 输出数据:label_data(字典元素),示例:{{'mid':类别标签},{'mid':类别标签}...} 1表示垃圾文本,0表示新闻文本,2表示中性文本,-1表示有极性的文本 ''' start = time.time() label_data = start_ad(weibo,flag)#垃圾分类 end = time.time() print (end-start) news_weibo = [] for i in range(0,len(weibo)): if label_data[str(weibo[i][0])] == 0: news_weibo.append(weibo[i]) start = time.time() label = cut_weibo(news_weibo)#规则分类 end = time.time() print 'cutting weibo by rules takes %s' % (end-start) start = time.time() for i in range(0,len(label)): if label[i] == 0: mid = news_weibo[i][0] text = news_weibo[i][1] sentiment = triple_classifier(text)#调用中性情感分类器 if sentiment == 0: label_data[str(mid)] = cut_mid_weibo(text) #label_data[str(mid)] = 2 else: label_data[str(mid)] = -1 end = time.time() print 'classifying weibo takes %s' % (end-start) return label_data
#-*-coding=utf-8-*- import time from sta_ad import start_ad weibos = [{'_id': 1, 'text': 'Python的标准库中的os模块包含普遍的操作系统功能。如果你希望你的程序能够与平台无关的话,这个模块是尤为重要的。'}] start_ts = time.time() results = [] count = 0 while 1: results.extend(weibos) count += 1 if count == 100: break start_ad(results) print time.time() - start_ts