Exemplo n.º 1
0
def dict_mining():
    nlp = BosonNLP('6cfIzKI1.27567.fLaZOvRXwl8f')

    s = ['整流级逆变级滤波器负载三相检测abcdq双SVM控制dqabcADRCADRCaubucu*du*quotωotωinvTrecTuqud图3基于ADRC的TSMC闭环控制系统框图Fig.3Closed-loopcontroldiagramofTSMCbasedonADRCADRC采用图1结构。',
         '但励磁绕组时间常数较大,闭环控制系统的截止频率较低,影响发电机输出端电压的响应速度。']
    data = nlp.depparser(s)
    nouns = extract_noun(data)
    print(nouns)
Exemplo n.º 2
0
class CNSegment:
    """
    封装分词工具。
    使用bosonnlp提供API
    """

    #停用词表
    stopwords = []

    def __init__(self):
        self.nlp=BosonNLP(bosonkey)

    def get_tags(self,sentences):
        """
        获取分词
        :param sentences:分词的句子或者句子list
        :return: 分词结果list
        """
        result= self.nlp.tag(sentences)
        return result

    def denoisingOne(self,tagdict , uTag = None,useStopWord = False):
        """通过词性和停用词去除噪声

            :param  tagList : 分词过后得到的列表
            :param  uTag : 需要去噪的词性标记列表,默认为('w','o','y','u')
            :return: 分词结果list
            """
        if (uTag):
            uselessTag = uTag
        else:
            uselessTag = ('w', 'o', 'y', 'u')
        tagdict
        word_list = []
        for index, it in enumerate(tagdict['tag']):
            if it[0] not in uselessTag:
                if not useStopWord:
                    word_list.append(tagdict['word'][index])
                elif tagdict['word'][index] not in self.stopwords:
                    word_list.append(tagdict['word'][index])
        return word_list

    def cut(self,sentences):
        """
        分词
        :param sentences:需要分词的语料集
        :return: 去噪后的单词list
        """
        tags=self.get_tags(sentences)
        cutedSentences=[]
        for sentence in tags:
            cutedSentences.append(self.denoisingOne(sentence))
        return cutedSentences

    def depenPars(self,sentences):
        return self.nlp.depparser(sentences)
Exemplo n.º 3
0
 def get_BosDepT(self, text):
     bos = BosonNLP(bosonkey)
     return bos.depparser(text)
Exemplo n.º 4
0
class BosonNlpp:
    def __init__(self):
        self.bonlp = BosonNLP('IKBIoANy.14545.A7GCYBnT9jIB')

    #情感分析
    def testSentiment(self, s):
        result = self.bonlp.sentiment(s)
        return result
        #print(result)

    #命名实体识别
    def lexicalAnalysis(self, s):
        result = self.bonlp.ner(s)[0]
        return result

    #依存文法分析
    def textDependency(self, s):
        result = self.bonlp.depparser(s)
        return result

    #关键词提取
    def testKeywords(self, s):
        result = self.bonlp.extract_keywords(s, top_k=10)
        return result

    #新闻分类
    def textClassify(self, s):
        resultlist = self.bonlp.classify(s)
        classifys = {
            0: '体育',
            1: '教育',
            2: '财经',
            3: '社会',
            4: '娱乐',
            5: '军事',
            6: '国内',
            7: '科技',
            8: '互联网',
            9: '房产',
            10: '国际',
            11: '女人',
            12: '汽车',
            13: '游戏'
        }
        return (classifys[resultlist[0]])

    #语义联想
    def lexicalSynonym(self, term):
        result = self.bonlp.suggest(term, top_k=10)
        return result

    #分词与词性标注
    def fenci(self, s):
        result = self.bonlp.tag(s)
        return result

    def newssubstract(self, s):
        #s=s.encode('utf8')
        s = s.decode('utf-8')
        result = self.bonlp.summary('', s)
        return result