# 特征 for i in li1: data1 = re.sub('@[\s\S]*?:','',i) data1 = re.sub('http://','',data1) data_1 = re.sub("[^a-zA-Z\u4e00-\u9fa5]","",data1) if data_1=='': wordList=[] jishu+=1 else: jishu+=1 wordList_2 = ltp(data_1) list_for_cv.append(n_gram(wordList_2)) print(jishu) t_bd_p = biaodian(i) t_fdc_p = fdc(data_1) tmp1 = zzqgc(list_ai,data_1) tmp2 = zzqgc(list_e,data_1) tmp3 = zzqgc(list_hao,data_1) tmp4= zzqgc(list_jing,data_1) tmp5 = zzqgc(list_ju,data_1) tmp6= zzqgc(list_le,data_1) tmp7 = zzqgc(list_nu,data_1) numtezheng = [tmp1,tmp2,tmp3,tmp4,tmp5,tmp6,tmp7,len(wordList_2),t_fdc_p,t_bd_p] list_num_tezheng.append(numtezheng) tezheng_sadness = tz('sadness',list_for_cv,list_num_tezheng) tezheng_like = tz('like',list_for_cv,list_num_tezheng) tezheng_disgust = tz('disgust',list_for_cv,list_num_tezheng) tezheng_surprise = tz('surprise',list_for_cv,list_num_tezheng)
jishu+=1 else: jishu+=1 wordList = ltp(data) list_ngram.append(n_gram(wordList)) print(jishu) # 否定词特征 t_fdc = fdc(data) # 标点特征 t_bd = biaodian(node.firstChild.data) # 情感词典0&1特征 tmp1 = zzqgc(list_ai,data) tmp2 = zzqgc(list_e,data) tmp3 = zzqgc(list_hao,data) tmp4= zzqgc(list_jing,data) tmp5 = zzqgc(list_ju,data) tmp6= zzqgc(list_le,data) tmp7 = zzqgc(list_nu,data) numtezheng = [tmp1,tmp2,tmp3,tmp4,tmp5,tmp6,tmp7,len(wordList),t_fdc,t_bd] # 基本特征 list_for_coo.append(numtezheng)