Exemplo n.º 1
0
    def NLPIRCutWithPos(self, isClearText=False):

        if isClearText == False:
            self.__strWithPos = list(nlpir.seg(self.__text))
        else:
            self.__strWithPos = list(nlpir.seg(self.clearText()))

        return self.__strWithPos
 def parse_sentence(self):
     feature_attr_list = ["a", "v", "z", "d", "e"]
     feature_word_count = 0
     feature_word_num = 3
     token_list = nlp.seg(self.sentence)
     for token in token_list:
         # 获取整个句子每个单词的正向、0、负向之和
         if token[0] in self.mdictionary:
             mword = self.mdictionary.get(token[0])
             self.positive_word_count += mword.positive_count
             self.negative_word_count += mword.netative_count
             self.zero_word_count += mword.zero_count
             # 如果单词有特征属性,则加入句子的单词列表
             if mword.attr[0] in feature_attr_list and feature_word_count < feature_word_num:
                 self.word_list.append(mword)
                 feature_word_count += 1
     # 如果特征单词不足三个
     if feature_word_count < feature_word_num:
         # 放入句子前两个
         if len(token_list) > 2:
             for i in range(feature_word_num - len(self.word_list)):
                 self.word_list.append(self.mdictionary[token_list[i]])
         else:
             for i in range(feature_word_num - len(self.word_list)):
                 self.word_list.append(self.mdictionary[token_list[0]])
Exemplo n.º 3
0
def ChineseWordsSegmentationByNLPIR2016(text):
    txt = nlpir.seg(text)
    seg_list = []

    for t in txt:
        seg_list.append(t[0].encode('utf-8'))

    return seg_list
Exemplo n.º 4
0
    def NLPIRCutText(self, isAddWord=False):

        if isAddWord == True:
            for i in self.__newWords:
                nlpir.AddUserWord(i)

        for i in self.__userWords:
            nlpir.AddUserWord(i)

        txt = nlpir.seg(self.__text)
        self.__seg_list = []

        for t in txt:
            self.__seg_list.append(t[0].encode('utf-8'))

        return ' '.join(self.__seg_list)
 def parse_file(self):
     """
     把训练文件的句子分解成词语,并打上标签
     :return:
     """
     f = open(self.file_path, "r")
     line = f.readline()
     label = self.get_label(line)
     text = self.get_text(line)
     for token in nlp.seg(text):
         if self.word_filter(token[1]):
             continue
         if token[0] in self.mdict.keys():
             mword = self.mdict.get(token[0])
             inc_operator = {
                 "+1": mword.inc_positive_count,
                 "0": mword.inc_zero_count,
                 "-1": mword.inc_negative_count,
             }
             inc_operator.get(label)()
         else:
             mword = MyWord(token[0], token[1])
             self.mdict[token[0]] = mword
Exemplo n.º 6
0
posstr = cutstrpos(filestr2)

print type(posstr)

# print filestr

print '**** show is end ****'

print ' '
print 'This is posster'
print posstr

strtag = [nltk.tag.str2tuple(word) for word in posstr.split()]
# for item in strtag:
#     print item
strsBySeg = nlpir.seg(filestr)
strsBySeg2 = nlpir.seg(filestr2)
strsByParagraphProcess = nlpir.ParagraphProcess(filestr, 1)
strsByParagraphProcessA = nlpir.ParagraphProcessA(
    filestr,
    ChineseWordsSegmentationByNLPIR2016(filestr)[0], 1)

print ' '
print ' '
print '**** strtag ****'

for word, tag in strtag:
    print word, "/", tag, "|",

print ' '
print ' '