Exemplo n.º 1
0
 def cut_sentences_words(self, review):
     sent_words = []
     cuted_review = tp.cut_sentence_2(review)
     for sent in cuted_review:
         seg_sent = tp.segmentation(sent, 'list')
         #seg_sent = self.stopWordFilter(seg_sent)
         sent_words.append(seg_sent)
     return sent_words
def word_sent_count(dataset):
    word_sent_count = []
    for review in dataset:
        sents = tp.cut_sentence_2(review)  #将每条评论进行分句,得到每条评论的 分句
        words = tp.segmentation(review, 'list')  #将每条评论进行分词,保存分词结果到列表数组中
        sent_num = len(sents)  #记录每条评论分句完后的句子数量
        word_num = len(words)  #记录每条评论的 分词后的 词数
        sent_word = float(word_num) / float(sent_num)  #评论的长度 = 分词数/分句数
        word_sent_count.append([word_num, sent_num, sent_word])
    return word_sent_count
def word_sent_count(dataset):
    word_sent_count = []
    for review in dataset:
        sents = tp.cut_sentence_2(review)
        words = tp.segmentation(review,'list')
        sent_num = len(sents)
        word_num = len(words)
        sent_word = float(word_num)/float(sent_num)  # review length = word number/sentence number
        word_sent_count.append([word_num, sent_num, sent_word])
    return word_sent_count
def word_sent_count(dataset):
    word_sent_count = []
    for review in dataset:
        sents = tp.cut_sentence_2(review)
        words = tp.segmentation(review, "list")
        sent_num = len(sents)
        word_num = len(words)
        sent_word = float(word_num) / float(sent_num)  # review length = word number/sentence number
        word_sent_count.append([word_num, sent_num, sent_word])
    return word_sent_count
def sentence_sentiment_score(dataset):
    cuted_review = []
    for cell in dataset:
        cuted_review.append(tp.cut_sentence_2(cell))

    single_review_count = []
    all_review_count = []
    for review in cuted_review:
        for sent in review:
            seg_sent = tp.segmentation(sent, "list")  # 分词
            i = 0  # word position counter
            a = 0  # sentiment word position
            poscount = 0  # count a pos word
            negcount = 0
            for word in seg_sent:
                if word in posdict:
                    poscount += 1
                    for w in seg_sent[a:i]:
                        poscount = match(w, poscount)
                    a = i + 1

                elif word in negdict:
                    negcount += 1
                    for w in seg_sent[a:i]:
                        negcount = match(w, negcount)
                    a = i + 1

                elif word == "!".decode("utf8") or word == "!".decode("utf8"):
                    for w2 in seg_sent[::-1]:
                        if w2 in posdict:
                            poscount += 2
                            break
                        elif w2 in negdict:
                            negcount += 2
                            break
                else:
                    pos = tp.postagger(word, "list")
                    for k in pos:
                        if k[1] == "a":
                            fo.write(word.encode("utf8") + "\n")
                        elif k[1] == "d":
                            fo.write(word.encode("utf8") + "\n")
                        elif k[1] == "v":
                            fo.write(word.encode("utf8") + "\n")
                        elif k[1] == "n":
                            fo.write(word.encode("utf8") + "\n")
                i += 1

            single_review_count.append(transform_to_positive_num(poscount, negcount))  # 评论分句子后,每条句子的正负权值
        all_review_count.append(
            single_review_count
        )  # [[[s11_score], [s12_score], ...], [[s21_score], [s22_score], ...], ...]
        single_review_count = []

    return all_review_count
Exemplo n.º 6
0
def sentence_sentiment_score(dataset):
    dataset = dataset[1:10]
    cuted_review = []
    for cell in dataset:
        cuted_review.append(tp.cut_sentence_2(cell))
    single_review_count = []
    all_review_count = []
    for review in cuted_review:
        single_review_count = get_single_sent_count(review)
        all_review_count.append(single_review_count)
    return all_review_count
def sentence_sentiment_score(dataset):
    cuted_review = []
    for cell in dataset:
        cuted_review.append(tp.cut_sentence_2(cell))

    single_review_count = []
    all_review_count = []
    for review in cuted_review:
        for sent in review:
            seg_sent = tp.segmentation(sent, 'list')
            i = 0  #word position counter
            a = 0  #sentiment word position
            poscount = 0  #count a pos word
            negcount = 0
            for word in seg_sent:
                if word in posdict:
                    poscount += 1
                    for w in seg_sent[a:i]:
                        poscount = match(w, poscount)
                    a = i + 1

                elif word in negdict:
                    negcount += 1
                    for w in seg_sent[a:i]:
                        negcount = match(w, negcount)
                    a = i + 1

                elif word == '!'.decode('utf8') or word == '!'.decode('utf8'):
                    for w2 in seg_sent[::-1]:
                        if w2 in posdict:
                            poscount += 2
                            break
                        elif w2 in negdict:
                            negcount += 2
                            break
                i += 1

            single_review_count.append(
                transform_to_positive_num(
                    poscount, negcount))  #[[s1_score], [s2_score], ...]
        all_review_count.append(
            single_review_count
        )  # [[[s11_score], [s12_score], ...], [[s21_score], [s22_score], ...], ...]
        single_review_count = []

    return all_review_count
def sentence_sentiment_score(dataset):
    cuted_review = []
    for cell in dataset:
        cuted_review.append(tp.cut_sentence_2(cell))

    single_review_count = []
    all_review_count = []
    for review in cuted_review:
        for sent in review:
            seg_sent = tp.segmentation(sent, "list")
            i = 0  # word position counter
            a = 0  # sentiment word position
            poscount = 0  # count a pos word
            negcount = 0
            for word in seg_sent:
                if word in posdict:
                    poscount += 1
                    for w in seg_sent[a:i]:
                        poscount = match(w, poscount)
                    a = i + 1

                elif word in negdict:
                    negcount += 1
                    for w in seg_sent[a:i]:
                        negcount = match(w, negcount)
                    a = i + 1

                elif word == "!".decode("utf8") or word == "!".decode("utf8"):
                    for w2 in seg_sent[::-1]:
                        if w2 in posdict:
                            poscount += 2
                            break
                        elif w2 in negdict:
                            negcount += 2
                            break
                i += 1

            single_review_count.append(transform_to_positive_num(poscount, negcount))  # [[s1_score], [s2_score], ...]
        all_review_count.append(
            single_review_count
        )  # [[[s11_score], [s12_score], ...], [[s21_score], [s22_score], ...], ...]
        single_review_count = []

    return all_review_count
def single_review_sentiment_score(review):  # 可以计算每条评论中,正负指数。
    single_review_senti_score = []
    cuted_review = tp.cut_sentence_2(review)  # cut sentence

    for sent in cuted_review:
        seg_sent = tp.segmentation(sent, "list")  # cut word
        i = 0  # word position counter
        s = 0  # sentiment word position
        poscount = 0  # count a positive word
        negcount = 0  # count a negative word

        for word in seg_sent:
            if word in posdict:
                poscount += 1
                for w in seg_sent[s:i]:
                    poscount = match(w, poscount)  # 句子中每句话正权值
                a = i + 1

            elif word in negdict:
                negcount += 1
                for w in seg_sent[s:i]:
                    negcount = match(w, negcount)  # 句子中每句话负权值
                a = i + 1

            # Match "!" in the review, every "!" has a weight of +2
            elif word == "!".decode("utf8") or word == "!".decode("utf8"):  # 中英文!号
                for w2 in seg_sent[::-1]:  # 截取!号前的一个词
                    if w2 in posdict:
                        poscount += 2
                        break
                    elif w2 in negdict:
                        negcount += 2
                        break
            else:
                fo.write(word)
            i += 1
            # poscount,negcount 评论中每个句子的正负权值
        single_review_senti_score.append(transform_to_positive_num(poscount, negcount))

    review_sentiment_score = sumup_sentence_sentiment_score(single_review_senti_score)
    return review_sentiment_score
def single_review_sentiment_score(review):
    single_review_senti_score = []
    cuted_review = tp.cut_sentence_2(review)

    for sent in cuted_review:
        seg_sent = tp.segmentation(sent, 'list')
        i = 0  # word position counter
        s = 0  # sentiment word position
        poscount = 0  # count a positive word
        negcount = 0  # count a negative word

        for word in seg_sent:
            if word in posdict:
                poscount += 1
                for w in seg_sent[s:i]:
                    poscount = match(w, poscount)
                a = i + 1

            elif word in negdict:
                negcount += 1
                for w in seg_sent[s:i]:
                    negcount = match(w, negcount)
                a = i + 1

            # Match "!" in the review, every "!" has a weight of +2
            elif word == "!".decode('utf8') or word == "!".decode('utf8'):
                for w2 in seg_sent[::-1]:
                    if w2 in posdict:
                        poscount += 2
                        break
                    elif w2 in negdict:
                        negcount += 2
                        break
            i += 1

        single_review_senti_score.append(
            transform_to_positive_num(poscount, negcount))
        review_sentiment_score = sumup_sentence_sentiment_score(
            single_review_senti_score)

    return review_sentiment_score
def single_review_sentiment_score(review):
	single_review_senti_score = []
	cuted_review = tp.cut_sentence_2(review)

	for sent in cuted_review:
		seg_sent = tp.segmentation(sent, 'list')
		i = 0 # word position counter
		s = 0 # sentiment word position
		poscount = 0 # count a positive word
		negcount = 0 # count a negative word

		for word in seg_sent:
		    if word in posdict:
		        poscount += 1
		        for w in seg_sent[s:i]:
		           poscount = match(w, poscount)
		        a = i + 1

		    elif word in negdict:
		        negcount += 1
		        for w in seg_sent[s:i]:
		        	negcount = match(w, negcount)
		        a = i + 1

		    # Match "!" in the review, every "!" has a weight of +2
		    elif word == "!".decode('utf8') or word == "!".decode('utf8'):
		        for w2 in seg_sent[::-1]:
		            if w2 in posdict:
		            	poscount += 2
		            	break
		            elif w2 in negdict:
		                negcount += 2
		                break                    
		    i += 1

		single_review_senti_score.append(transform_to_positive_num(poscount, negcount))
		review_sentiment_score = sumup_sentence_sentiment_score(single_review_senti_score)

	return review_sentiment_score
Exemplo n.º 12
0
def sentiment_score_list(dataset):
    cuted_data=[]
    for cell in dataset:
        cuted_data.append(tp.cut_sentence_2(cell))
    count1=[]
    count2=[]
    for sents in cuted_data:                             # 遍历每一个评论
        for sent in sents:                               # 循环遍历评论中的每一个分句
            segtmp=tp.segmentation(sent,'list')          # 把句子分词,以列表返回
            i=0
            a=0
            poscount=0
            poscount2 = 0
            poscount3 = 0
            negcount=0
            negcount2 = 0
            negcount3 = 0
            for word in segtmp:
                if word in posdict:
                    poscount+=1
                    c=0
                    for w in segtmp[a:i]:
                        if w in mostdict:
                            poscount*=4.0
                        elif w in verydict:
                            poscount*=3.0
                        elif w in moredict:
                            poscount*=2.0
                        elif w in ishdict:
                            poscount/=2.0
                        elif w in insufficientdict:
                            poscount/=4.0
                        elif w in inversedict:
                            c+=1
                    if judgeodd(c)=='odd':
                        poscount*=-1
                        poscount2+=poscount
                        poscount=0
                        poscount3=poscount+poscount2+poscount3
                        poscount2=0
                    else:
                        poscount3=poscount+poscount2+poscount3
                        poscount=0
                    a=i+1
                elif word in negdict:
                    negcount+=1
                    d=0
                    for w in segtmp[a:i]:
                        if w in mostdict:
                            negcount*=4.0
                        elif w in verydict:
                            negcount*=3.0
                        elif w in moredict:
                            negcount*=2.0
                        elif w in ishdict:
                            negcount/=2.0
                        elif w in insufficientdict:
                            negcount/=4.0
                        elif w in inversedict:
                            d+=1
                    if judgeodd(d)=='odd':
                        negcount*=-1.0
                        negcount2+=negcount
                        negcount=0
                        negcount3=negcount+negcount2+negcount3
                        negcount2=0
                    else:
                        negcount3=negcount+negcount2+negcount3
                        negcount=0
                    a=i+1
                elif word=='|'.decode('utf-8') or word =='!'.decode('utf8'):
                    for w2 in segtmp[::-1]:
                        if w2 in posdict or negdict:
                            poscount3+=2
                            negcount3+=2
                            break
                i+=1
            pos_count=0
            neg_count=0
            if poscount3<0 and negcount3>0:
                neg_count+=negcount3-poscount3
                pos_count=0
            elif negcount3<0 and poscount3>0:
                pos_count=poscount3-negcount3
                neg_count=0
            elif negcount3<0 and poscount3<0:
                neg_count=-poscount3
                pos_count=-negcount3
            else:
                pos_count=poscount3
                neg_count=negcount3
            count1.append([pos_count,neg_count])
        count2.append(count1)
        count1=[]
    return count2
Exemplo n.º 13
0
def single_review_sentiment_score(review):
    cuted_review = tp.cut_sentence_2(review)
    single_review_senti_score = get_single_sent_count(cuted_review)
    review_sentiment_score = sumup_sentence_sentiment_score(
        single_review_senti_score)
    return review_sentiment_score