def domain_classfiy(uid_weibo):#领域分类主函数 ''' 用户领域分类主函数 输入数据示例: uid_weibo:字典 {uid1:[weibo1,weibo2,weibo3,...]} 输出数据示例: domain:标签字典 {uid1:[label1,label2,label3],uid2:[label1,label2,label3]...} 注:label1是根据粉丝结构分类的结果,label2是根据认证类型分类的结果,label3是根据用户文本分类的结果 re_label:推荐标签字典 {uid1:label,uid2:label2...} ''' weibo_text = dict() uidlist = [] for k,v in uid_weibo.items(): item = '' for i in range(0,len(v)): text = re_cut(v[i]['text']) item = item + ',' + text weibo_text[k] = item uidlist.append(k) users = get_user(uidlist) print 'len(users):',len(users) print len(uidlist) domain = dict() r_domain = dict() text_result = dict() user_result = dict() for k,v in users.items(): uid = k result_label = [] sorted_mbr = dict() field1 = getFieldFromProtou(k, protou_dict=train_users)#判断uid是否在种子用户里面 if field1 != 'Null':#该用户在种子用户里面 result_label.append(field1) else: f= get_friends([k])#返回用户的粉丝列表 friends = f[str(uid)] if len(friends): field1,sorted_mbr = user_domain_classifier_v1(friends, fields_value=txt_labels, protou_dict=proto_users) else: field1 = 'other' sorted_mbr = {'university':0, 'homeadmin':0, 'abroadadmin':0, 'homemedia':0, 'abroadmedia':0, 'folkorg':0, \ 'lawyer':0, 'politician':0, 'mediaworker':0, 'activer':0, 'grassroot':0, 'other':0, 'business':0} result_label.append(field1) r = read_by_xapian(xs,uid) if r == 'other': field2 = 'other' else: field2 = user_domain_classifier_v2(r) result_label.append(field2) field_dict,result = domain_classfiy_by_text({k: weibo_text[k]})#根据用户文本进行分类 field3 = field_dict[k] result_label.append(field3) domain[str(uid)] = result_label user_result[str(uid)] = sorted_mbr#有问题 text_result[str(uid)] = result[k]#有问题 if r == 'other': re_label = get_recommend_result('other',result_label)#没有认证类型字段 else: re_label = get_recommend_result(r['verified_type'],result_label) r_domain[str(uid)] = re_label return domain,re_label
def domain_classfiy(uid_list,uid_weibo):#领域分类主函数 ''' 用户领域分类主函数 输入数据示例: uid_list:uid列表 [uid1,uid2,uid3,...] uid_weibo:分词之后的词频字典 {uid1:{'key1':f1,'key2':f2...}...} 输出数据示例: domain:标签字典 {uid1:[label1,label2,label3],uid2:[label1,label2,label3]...} 注:label1是根据粉丝结构分类的结果,label2是根据认证类型分类的结果,label3是根据用户文本分类的结果 re_label:推荐标签字典 {uid1:label,uid2:label2...} ''' users = get_user(uid_list) frineds = get_friends(uid_list) domain = dict() r_domain = dict() text_result = dict() user_result = dict() for k,v in users.iteritems(): uid = k result_label = [] sorted_mbr = dict() field1 = getFieldFromProtou(k, protou_dict=train_users)#判断uid是否在种子用户里面 if field1 != 'Null':#该用户在种子用户里面 result_label.append(field1) else: f= frineds[k]#返回用户的粉丝列表 if len(f): field1,sorted_mbr = user_domain_classifier_v1(f, fields_value=txt_labels, protou_dict=proto_users) else: field1 = 'other' sorted_mbr = {'university':0, 'homeadmin':0, 'abroadadmin':0, 'homemedia':0, 'abroadmedia':0, 'folkorg':0, \ 'lawyer':0, 'politician':0, 'mediaworker':0, 'activer':0, 'grassroot':0, 'other':0, 'business':0} result_label.append(field1) r = v if r == 'other': field2 = 'other' else: field2 = user_domain_classifier_v2(r) result_label.append(field2) if uid_weibo.has_key(k): field_dict,result = domain_classfiy_by_text({k: uid_weibo[k]})#根据用户文本进行分类 field3 = field_dict[k] else: field3 = 'other' result_label.append(field3) domain[str(uid)] = result_label if r == 'other': re_label = get_recommend_result('other',result_label)#没有认证类型字段 else: re_label = get_recommend_result(r['verified_type'],result_label) r_domain[str(uid)] = re_label return domain,r_domain
def domain_classfiy(uid_list, uid_weibo): #领域分类主函数 ''' 用户领域分类主函数 输入数据示例: uid_list:uid列表 [uid1,uid2,uid3,...] uid_weibo:分词之后的词频字典 {uid1:{'key1':f1,'key2':f2...}...} 输出数据示例: domain:标签字典 {uid1:[label1,label2,label3],uid2:[label1,label2,label3]...} 注:label1是根据粉丝结构分类的结果,label2是根据认证类型分类的结果,label3是根据用户文本分类的结果 re_label:推荐标签字典 {uid1:label,uid2:label2...} ''' if not len(uid_weibo) and len(uid_list): domain = dict() r_domain = dict() for uid in uid_list: domain[uid] = ['other'] r_domain[uid] = ['other'] return domain, r_domain elif len(uid_weibo) and not len(uid_list): uid_list = uid_weibo.keys() elif not len(uid_weibo) and not len(uid_list): domain = dict() r_domain = dict() return domain, r_domain else: pass users = get_user(uid_list) frineds = get_friends(uid_list) domain = dict() r_domain = dict() text_result = dict() user_result = dict() for k, v in users.iteritems(): uid = k result_label = [] sorted_mbr = dict() field1 = getFieldFromProtou(k, protou_dict=train_users) #判断uid是否在种子用户里面 if field1 != 'Null': #该用户在种子用户里面 result_label.append(field1) else: f = frineds[k] #返回用户的粉丝列表 if len(f): field1, sorted_mbr = user_domain_classifier_v1( f, fields_value=txt_labels, protou_dict=proto_users) else: field1 = 'other' sorted_mbr = {'university':0, 'homeadmin':0, 'abroadadmin':0, 'homemedia':0, 'abroadmedia':0, 'folkorg':0, \ 'lawyer':0, 'politician':0, 'mediaworker':0, 'activer':0, 'grassroot':0, 'other':0, 'business':0} result_label.append(field1) r = v if r == 'other': field2 = 'other' else: field2 = user_domain_classifier_v2(r) result_label.append(field2) if uid_weibo.has_key(k) and len(uid_weibo[k]): field_dict, result = domain_classfiy_by_text({k: uid_weibo[k] }) #根据用户文本进行分类 field3 = field_dict[k] else: field3 = 'other' result_label.append(field3) domain[str(uid)] = result_label if r == 'other': re_label = get_recommend_result('other', result_label) #没有认证类型字段 else: re_label = get_recommend_result(r['verified_type'], result_label) r_domain[str(uid)] = re_label return domain, r_domain