Python cut_sentence示例，textprocessing.cut_sentence Python示例

示例#1

0

显示文件

文件： weibo_dict_senti_analysis.py 项目： carrielsx/weibo_sentiment_analysis

def single_review_sentiment_score(weibo_sent):
    single_review_senti_score = []
    cuted_review = tp.cut_sentence(weibo_sent)  # 句子切分，单独对每个句子进行分析

    for sent in cuted_review:
        seg_sent = tp.segmentation(sent)  # 分词
        seg_sent = tp.del_stopwords(seg_sent)[:]
        #for w in seg_sent:
        #   print w,
        i = 0  # 记录扫描到的词的位置
        s = 0  # 记录情感词的位置
        poscount = 0  # 记录该分句中的积极情感得分
        negcount = 0  # 记录该分句中的消极情感得分

        for word in seg_sent:  # 逐词分析
            #print word
            if word in posdict:  # 如果是积极情感词
                #print "posword:", word
                poscount += 1  # 积极得分+1
                for w in seg_sent[s:i]:
                    poscount = match(w, poscount)
                #print "poscount:", poscount
                s = i + 1  # 记录情感词的位置变化

            elif word in negdict:  # 如果是消极情感词
                #print "negword:", word
                negcount += 1
                for w in seg_sent[s:i]:
                    negcount = match(w, negcount)
                #print "negcount:", negcount
                s = i + 1

            # 如果是感叹号，表示已经到本句句尾
            elif word == "！".decode("utf-8") or word == "!".decode('utf-8'):
                for w2 in seg_sent[::-1]:  # 倒序扫描感叹号前的情感词，发现后权值+2，然后退出循环
                    if w2 in posdict:
                        poscount += 2
                        break
                    elif w2 in negdict:
                        negcount += 2
                        break
            i += 1
        #print "poscount,negcount", poscount, negcount
        single_review_senti_score.append(
            transform_to_positive_num(poscount, negcount))  # 对得分做最后处理
    pos_result, neg_result = 0, 0  # 分别记录积极情感总得分和消极情感总得分
    for res1, res2 in single_review_senti_score:  # 每个分句循环累加
        pos_result += res1
        neg_result += res2
    #print pos_result, neg_result
    result = pos_result - neg_result  # 该条微博情感的最终得分
    result = round(result, 1)
    return result

示例#2

0

显示文件

def mysentiment_score_list(oneblog):
    cuted_data = []
    for sen in tp.cut_sentence(oneblog):
        cuted_data.append(sen)
    blog_score_list = []
    for sent in cuted_data:  #循环遍历评论中的每一个分句
        segtmp = tp.segmentation(sent)
        #print segtmp
        pos_count = 0
        neg_count = 0
        for word in segtmp:
            if word in posdict:
                pos_count += 1
            elif word in negdict:
                neg_count += 1
        blog_score_list.append([pos_count, neg_count])
    return blog_score_list

示例#3

0

显示文件

文件： sentiAnalysis.py 项目： coolspiderghy/sina_weibo_crawler

def mysentiment_score_list(oneblog):
    cuted_data = []
    for sen in tp.cut_sentence(oneblog):
        cuted_data.append(sen)
    blog_score_list = []
    for sent in cuted_data:  #循环遍历评论中的每一个分句
        segtmp = tp.segmentation(sent)
        #print segtmp
        pos_count = 0
        neg_count = 0
        for word in segtmp:
            if word in posdict:
                pos_count +=1
            elif word in negdict:
                neg_count +=1
        blog_score_list.append([pos_count,neg_count])
    return blog_score_list

示例#4

0

显示文件

def sentiment_score_list(oneblog):
    cuted_data = []
    for sen in tp.cut_sentence(oneblog):
        #print sen
        cuted_data.append(sen)
    #print 'testing..............'
    count1 = []
    count2 = []
    #for sents in cuted_data: #循环遍历每一个评论
    for sent in cuted_data:  #循环遍历评论中的每一个分句
        segtmp = tp.segmentation(sent)  #把句子进行分词，以列表的形式返回
        #segtmp =list(set(segtmp)) #去除用于的词，如果情感词出现多次，那么会被重复计算
        #print segtmp
        i = 0  #记录扫描到的词的位置
        a = 0  #记录情感词的位置
        poscount = 0  #积极词的第一次分值
        poscount2 = 0  #积极词反转后的分值
        poscount3 = 0  #积极词的最后分值（包括叹号的分值）
        negcount = 0
        negcount2 = 0
        negcount3 = 0
        for word in segtmp:
            #print word,type(word),'testing...........'
            if word in posdict:  #判断词语是否是情感词
                poscount += 1
                c = 0
                for w in segtmp[a:i]:  #扫描情感词前的程度词
                    if w in mostdict:
                        poscount *= 4.0
                    elif w in verydict:
                        poscount *= 3.0
                    elif w in moredict:
                        poscount *= 2.0
                    elif w in ishdict:
                        poscount /= 2.0
                    elif w in insufficientdict:
                        poscount /= 4.0
                    elif w in inversedict:
                        c += 1
                if judgeodd(c) == 'odd':  #扫描情感词前的否定词数
                    poscount *= -1.0
                    poscount2 += poscount
                    poscount = 0
                    poscount3 = poscount + poscount2 + poscount3
                    poscount2 = 0
                else:
                    poscount3 = poscount + poscount2 + poscount3
                    poscount = 0
                a = i + 1  #情感词的位置变化
            elif word in negdict:  #消极情感的分析，与上面一致
                negcount += 1
                d = 0
                for w in segtmp[a:i]:
                    if w in mostdict:
                        negcount *= 4.0
                    elif w in verydict:
                        negcount *= 3.0
                    elif w in moredict:
                        negcount *= 2.0
                    elif w in ishdict:
                        negcount /= 2.0
                    elif w in insufficientdict:
                        negcount /= 4.0
                    elif w in inversedict:
                        d += 1
                if judgeodd(d) == 'odd':
                    negcount *= -1.0
                    negcount2 += negcount
                    negcount = 0
                    negcount3 = negcount + negcount2 + negcount3
                    negcount2 = 0
                else:
                    negcount3 = negcount + negcount2 + negcount3
                    negcount = 0
                a = i + 1
            elif word == '！'.decode('utf8') or word == '!'.decode(
                    'utf8'):  ##判断句子是否有感叹号
                for w2 in segtmp[::-1]:  #扫描感叹号前的情感词，发现后权值+2，然后退出循环
                    if w2 in posdict or negdict:
                        poscount3 += 2
                        negcount3 += 2
                        break
            i += 1  #扫描词位置前移

        #print pos_count,neg_count,'testing...................'
    #以下是防止出现负数的情况
        pos_count = 0
        neg_count = 0
        if poscount3 < 0 and negcount3 > 0:
            neg_count += negcount3 - poscount3
            pos_count = 0
        elif negcount3 < 0 and poscount3 > 0:
            pos_count = poscount3 - negcount3
            neg_count = 0
        elif poscount3 < 0 and negcount3 < 0:
            neg_count = -poscount3
            pos_count = -negcount3
        else:
            pos_count = poscount3
            neg_count = negcount3

        count1.append([pos_count, neg_count])
    count2.append(count1)
    count1 = []
    return count2

示例#5

0

显示文件

文件： sentiAnalysis.py 项目： coolspiderghy/sina_weibo_crawler

def sentiment_score_list(oneblog):
    cuted_data = []
    for sen in tp.cut_sentence(oneblog):
        #print sen
        cuted_data.append(sen)
    #print 'testing..............'
    count1 = []
    count2 = []
    #for sents in cuted_data: #循环遍历每一个评论
    for sent in cuted_data:  #循环遍历评论中的每一个分句
        segtmp = tp.segmentation(sent)  #把句子进行分词，以列表的形式返回
        #segtmp =list(set(segtmp)) #去除用于的词，如果情感词出现多次，那么会被重复计算
        #print segtmp
        i = 0 #记录扫描到的词的位置
        a = 0 #记录情感词的位置
        poscount = 0 #积极词的第一次分值
        poscount2 = 0 #积极词反转后的分值
        poscount3 = 0 #积极词的最后分值（包括叹号的分值）
        negcount = 0
        negcount2 = 0
        negcount3 = 0
        for word in segtmp:
            #print word,type(word),'testing...........'
            if word in posdict: #判断词语是否是情感词
                poscount += 1                
                c = 0
                for w in segtmp[a:i]:  #扫描情感词前的程度词
                    if w in mostdict:
                        poscount *= 4.0
                    elif w in verydict:
                        poscount *= 3.0
                    elif w in moredict:
                        poscount *= 2.0
                    elif w in ishdict:
                        poscount /= 2.0
                    elif w in insufficientdict:
                        poscount /= 4.0
                    elif w in inversedict:
                        c += 1
                if judgeodd(c) == 'odd': #扫描情感词前的否定词数
                    poscount *= -1.0
                    poscount2 += poscount
                    poscount = 0
                    poscount3 = poscount + poscount2 + poscount3
                    poscount2 = 0
                else:
                    poscount3 = poscount + poscount2 + poscount3
                    poscount = 0
                a = i + 1 #情感词的位置变化
            elif word in negdict: #消极情感的分析，与上面一致
                negcount += 1
                d = 0
                for w in segtmp[a:i]:
                    if w in mostdict:
                        negcount *= 4.0
                    elif w in verydict:
                        negcount *= 3.0
                    elif w in moredict:
                        negcount *= 2.0
                    elif w in ishdict:
                        negcount /= 2.0
                    elif w in insufficientdict:
                        negcount /= 4.0
                    elif w in inversedict:
                        d += 1
                if judgeodd(d) == 'odd':
                    negcount *= -1.0
                    negcount2 += negcount
                    negcount = 0
                    negcount3 = negcount + negcount2 + negcount3
                    negcount2 = 0
                else:
                    negcount3 = negcount + negcount2 + negcount3
                    negcount = 0
                a = i + 1
            elif word == '！'.decode('utf8') or word == '!'.decode('utf8'): ##判断句子是否有感叹号
                for w2 in segtmp[::-1]: #扫描感叹号前的情感词，发现后权值+2，然后退出循环
                    if w2 in posdict or negdict:
                        poscount3 += 2
                        negcount3 += 2
                        break                    
            i += 1 #扫描词位置前移

        #print pos_count,neg_count,'testing...................'
    #以下是防止出现负数的情况
        pos_count = 0
        neg_count = 0
        if poscount3 < 0 and negcount3 > 0:
            neg_count += negcount3 - poscount3
            pos_count = 0
        elif negcount3 < 0 and poscount3 > 0:
            pos_count = poscount3 - negcount3
            neg_count = 0
        elif poscount3 < 0 and negcount3 < 0:
            neg_count = -poscount3
            pos_count = -negcount3
        else:
            pos_count = poscount3
            neg_count = negcount3
            
        count1.append([pos_count, neg_count])
    count2.append(count1)
    count1 = []    
    return count2