Exemplo n.º 1
0
def classify(contents):
    nlp = BosonNLP(boson_token)
    result = nlp.classify(contents)
    topics = []
    for topic in result:
        topics.append(topic_to_id[topic])
    return topics
Exemplo n.º 2
0
def Class_ification(sentence):  #进行文本分类
    plot.rcParams['font.sans-serif'] = ['SimHei']
    plot.rcParams['axes.unicode_minus'] = False
    nlp = BosonNLP('TPDuivpZ.27572.rVuPCI9-kUlN')
    result = nlp.classify(sentence)
    info = {
        0: "体育",
        1: "教育",
        2: "财经",
        3: "社会",
        4: "娱乐",
        5: "军事",
        6: "国内",
        7: "科技",
        8: "互联网",
        9: "房产",
        10: "科技",
        11: "女人",
        12: "汽车",
        13: "游戏",
    }
    DG = nx.DiGraph()
    plot.figure(figsize=(3, 3))
    plot.subplot(1, 1, 1)
    plot.title('文本分类', color='red', fontsize=15)
    DG.add_node(info[result[0]])
    nx.draw(DG, with_labels=True, node_size=6000, node_color='lightblue')
    plot.show()
Exemplo n.º 3
0
class _BosonNLPWrapper(object):
    """
    NLP object using the BosonNLP API Python SDK.
    """

    news_categories = [
        'physical education', 'education', 'finance', 'society',
        'entertainment', 'military', 'domestic', 'science and technology',
        'the internet', 'real estate', 'international', 'women', 'car', 'game'
    ]

    def __init__(self, api_token=None):
        try:
            assert api_token is not None, "Please provide an API token"
        except AssertionError as e:
            raise

        self.token = api_token
        self.nlp = BosonNLP(self.token)

    def get_sentiment(self, text):

        pos, neg = self.nlp.sentiment(text)[0]

        return {'positive': pos, 'negative': neg}

    def classify_news(self, text):

        numbering = range(len(_BosonNLPWrapper.news_categories))
        cats_dict = dict(zip(numbering, _BosonNLPWrapper.news_categories))

        clsfy_num = self.nlp.classify(text)[0]

        return cats_dict[clsfy_num]

    def extract_keywords(self, text, top_k=3):

        result = self.nlp.extract_keywords(
            text, top_k)  # outputs in sorted order of weight

        return [{result[i][1]: result[i][0]} for i in range(len(result))]

    def segment_words_and_tag(self, text):
        """
        Splits up text into segments of "words" and tags them with their respective part of speech.
        See: http://docs.bosonnlp.com/tag.html

        Parameters
        ----------
        text (string): text passage to segment into separate "words" and tags them with parts of speech

        Returns
        -------
        list of key-value pairs {word: part-of-speech-tag}
        """
        result = self.nlp.tag(text)[0]
        words = result['word']
        tags = result['tag']

        return [{words[i]: tags[i]} for i in range(len(words))]

    def get_summary(self, content, title='', pct_limit=0.2):
        """
        Extracts a new digest (summary) of the content.
        See: http://docs.bosonnlp.com/summary.html

        Parameters
        ----------
        text (string): text passage to summarize
        title (string): title of the passage (optional, may provide more accurate results)
        pct_limit (float): max length of the summary in terms of percentage of the original word count

        Returns
        -------
        string containing the summary of the passage
        """
        summary = self.nlp.summary(title, content, pct_limit)

        return summary
Exemplo n.º 4
0
class BosonNlpp:
    def __init__(self):
        self.bonlp = BosonNLP('IKBIoANy.14545.A7GCYBnT9jIB')

    #情感分析
    def testSentiment(self, s):
        result = self.bonlp.sentiment(s)
        return result
        #print(result)

    #命名实体识别
    def lexicalAnalysis(self, s):
        result = self.bonlp.ner(s)[0]
        return result

    #依存文法分析
    def textDependency(self, s):
        result = self.bonlp.depparser(s)
        return result

    #关键词提取
    def testKeywords(self, s):
        result = self.bonlp.extract_keywords(s, top_k=10)
        return result

    #新闻分类
    def textClassify(self, s):
        resultlist = self.bonlp.classify(s)
        classifys = {
            0: '体育',
            1: '教育',
            2: '财经',
            3: '社会',
            4: '娱乐',
            5: '军事',
            6: '国内',
            7: '科技',
            8: '互联网',
            9: '房产',
            10: '国际',
            11: '女人',
            12: '汽车',
            13: '游戏'
        }
        return (classifys[resultlist[0]])

    #语义联想
    def lexicalSynonym(self, term):
        result = self.bonlp.suggest(term, top_k=10)
        return result

    #分词与词性标注
    def fenci(self, s):
        result = self.bonlp.tag(s)
        return result

    def newssubstract(self, s):
        #s=s.encode('utf8')
        s = s.decode('utf-8')
        result = self.bonlp.summary('', s)
        return result
Exemplo n.º 5
0
class _BosonNLPWrapper(object):
    """
    NLP object using the BosonNLP API Python SDK.
    """

    news_categories = ['physical education', 'education', 'finance', 'society', 'entertainment', 'military',
                       'domestic', 'science and technology', 'the internet', 'real estate', 'international',
                       'women', 'car', 'game']

    def __init__(self, api_token=None):
        try:
            assert api_token is not None, "Please provide an API token"
        except AssertionError as e:
            raise

        self.token = api_token
        self.nlp = BosonNLP(self.token)


    def get_sentiment(self, text):
        """
        Performs sentiment analysis on a text passage (works for Chinese text).
        See: http://docs.bosonnlp.com/sentiment.html

        Parameters
        ----------
        text (string): text passage to be analyzed for sentiment


        Returns
        -------
        dictionary with 'positive' and 'negative' as keys with their respective weights as values

        >>> nlp = BosonNLPWrapper('')
        >>> nlp.get_sentiment('不要打擾我')
        {'positive': 0.3704911989140307, 'negative': 0.6295088010859693}
        >>> nlp.get_sentiment('我很高興跟你見面')
        {'positive': 0.856280735624867, 'negative': 0.14371926437513308}
        """
        pos, neg = self.nlp.sentiment(text)[0]

        return {'positive': pos, 'negative': neg}


    def classify_news(self, text):
        """
        Classifies news text into 14 different categories.
        See: http://docs.bosonnlp.com/classify.html

        Parameters
        ----------
        text (string): text passage to classify into news categories defined in news_categories

        Returns
        -------
        one of the 14 categories in news_categories that the text was classified into
        """
        numbering = range(len(_BosonNLPWrapper.news_categories))
        cats_dict = dict(zip(numbering, _BosonNLPWrapper.news_categories))

        clsfy_num = self.nlp.classify(text)[0]

        return cats_dict[clsfy_num]


    def extract_keywords(self, text, top_k=3):
        """
        Extracts the top k keywords and the weight of each word in the text.
        See: http://docs.bosonnlp.com/keywords.html

        Parameters
        ----------
        text (string): text passage from which to extract keywords
        top_k (integer): number of keywords to return

        Returns
        -------
        list of key-value pairs {word: weight}


        >>> nlp = BosonNLPWrapper('')
        >>> nlp.extract_keywords('我最愛老虎堂,奶茶香醇,波霸彈Q 好香的黑糖味')
        [{'波霸彈': 0.5980681967308248}, {'黑糖': 0.4699792421671365}, {'香醇': 0.4497614275300947}]
        """
        result = self.nlp.extract_keywords(text, top_k)  # outputs in sorted order of weight

        return [{result[i][1]: result[i][0]} for i in range(len(result))]


    def segment_words_and_tag(self, text):
        """
        Splits up text into segments of "words" and tags them with their respective part of speech.
        See: http://docs.bosonnlp.com/tag.html

        Parameters
        ----------
        text (string): text passage to segment into separate "words" and tags them with parts of speech

        Returns
        -------
        list of key-value pairs {word: part-of-speech-tag}
        """
        result = self.nlp.tag(text)[0]
        words = result['word']
        tags = result['tag']

        return [{words[i]: tags[i]} for i in range(len(words))]


    def get_summary(self, content, title='', pct_limit=0.2):
        """
        Extracts a new digest (summary) of the content.
        See: http://docs.bosonnlp.com/summary.html

        Parameters
        ----------
        text (string): text passage to summarize
        title (string): title of the passage (optional, may provide more accurate results)
        pct_limit (float): max length of the summary in terms of percentage of the original word count

        Returns
        -------
        string containing the summary of the passage
        """
        summary = self.nlp.summary(title, content, pct_limit)

        return summary