Exemplo n.º 1
0
def sentiment(contents):
    sentiments = {}
    nlp = BosonNLP(boson_token)
    for model in models:
        sentiment = nlp.sentiment(contents, model=model)
        sentiments[model_to_name[model]] = sentiment
    return sentiments
Exemplo n.º 2
0
def maintain():
    apitoken = "ZUKLt9tO.24611.KI1wUPXknGRP"
    nlp = BosonNLP(apitoken)
    conn = pymysql.connect(host='123.206.68.192',
                           port=3306,
                           user='******',
                           passwd='',
                           db='news',
                           charset='utf8')
    cur = conn.cursor()
    cur.execute("DELETE FROM `data` WHERE context = 'error'")
    conn.commit()

    cur.execute("SELECT * FROM `data` WHERE	abstract = 'error'")
    data = cur.fetchall()
    for entry in data:
        result = nlp.summary('', entry[3], 50).replace('\n', '')
        if (result == 'error'):
            print '[Deleted]wrong entry: ' + entry
            cur.execute("DELETE FROM `data` WHERE ID = %s", (entry[0]))
        else:
            cur.execute("UPDATE `data` SET abstract = %s WHERE ID = %s",
                        (result, entry[0]))

    cur.close()
    conn.commit()
    conn.close()
Exemplo n.º 3
0
def classify(contents):
    nlp = BosonNLP(boson_token)
    result = nlp.classify(contents)
    topics = []
    for topic in result:
        topics.append(topic_to_id[topic])
    return topics
Exemplo n.º 4
0
def getAnswerKeys(text_set, api_key):
    keys = []
    nlp = BosonNLP(api_key)
    result = nlp.extract_keywords(text_set, top_k=30)
    for weight, word in result:
        keys.append((word, int(weight * 10)))
    return keys
Exemplo n.º 5
0
def ConfirmMuseum(text, museum, textid):
    # nlp = BosonNLP('SeJUopMY.24669.6kCKU4ruI3ss')
    # nlp = BosonNLP('lMdMTyuV.24544.0VHv6klp6Pk6')
    nlp = BosonNLP('sjWBhf9i.24699.rQmsCad9c3Jv')
    try:
        flag = 0
        text = text[0:1000]
        result = nlp.ner(text)[0]
        words = result['word']
        entities = result['entity']
        for entitie in entities:
            if entitie[2] == 'org_name':
                org_name = ''.join(words[entitie[0]:entitie[1]])
                if museum in org_name:
                    flag = 1
                    break
            elif entitie[2] == 'location':
                location = ''.join(words[entitie[0]: entitie[1]])
                if museum in location:
                    flag = 1
                    break
        if flag:
            print('Confirm!')
            return 1
        else:
            print('Not!')
            return 0
    except KeyError as e:
        print('exit in %s' % textid)
        print(e)
Exemplo n.º 6
0
def Class_ification(sentence):  #进行文本分类
    plot.rcParams['font.sans-serif'] = ['SimHei']
    plot.rcParams['axes.unicode_minus'] = False
    nlp = BosonNLP('TPDuivpZ.27572.rVuPCI9-kUlN')
    result = nlp.classify(sentence)
    info = {
        0: "体育",
        1: "教育",
        2: "财经",
        3: "社会",
        4: "娱乐",
        5: "军事",
        6: "国内",
        7: "科技",
        8: "互联网",
        9: "房产",
        10: "科技",
        11: "女人",
        12: "汽车",
        13: "游戏",
    }
    DG = nx.DiGraph()
    plot.figure(figsize=(3, 3))
    plot.subplot(1, 1, 1)
    plot.title('文本分类', color='red', fontsize=15)
    DG.add_node(info[result[0]])
    nx.draw(DG, with_labels=True, node_size=6000, node_color='lightblue')
    plot.show()
def execute(news, vectors, statements):

    nlp = BosonNLP('3KJW0U-I.24870.1PdhvJB30HgY')
    # print("\n情感分析")
    for i in range(len(news)):
        combine = ""
        for s in news[i].Sentences:
            combine += s
#        print (news[i].PressName,"\n",combine,"\n",nlp.sentiment(combine)[0][0])
#    print("")
    for new in news:
        new.Coordinate = np.zeros(len(statements))
        for i in range(len(statements)):
            if statements[i] in new.Sentences:
                for j in range(len(vectors)):
                    new.Coordinate[j] += vectors[i][j]
        ##坐标归一化
        nomalization(new.Coordinate)
        ##添加情感分析维度在新闻坐标里
        combine = ""
        for s in new.Sentences:
            combine += s
        sentiment = nlp.sentiment(combine)
        new.Sentiment = sentiment[0][0]
        # print (new.PressName,"\n",combine,"\n",sentiment[0][0])
        # new.Coordinate.append(sentiment[0][0])
        new.Coordinate = np.append(new.Coordinate,
                                   sentiment[0][0] * len(vectors) / 2)

        print("%s %s" % (new.PressName, new.Coordinate))
    distance_matrix = generate_distance_matrix(news)
    print("\n新闻距离矩阵\n", distance_matrix)
    return distance_matrix
Exemplo n.º 8
0
    def __init__(self, api_token=None):
        try:
            assert api_token is not None, "Please provide an API token"
        except AssertionError as e:
            raise

        self.token = api_token
        self.nlp = BosonNLP(self.token)
Exemplo n.º 9
0
def dict_mining():
    nlp = BosonNLP('6cfIzKI1.27567.fLaZOvRXwl8f')

    s = ['整流级逆变级滤波器负载三相检测abcdq双SVM控制dqabcADRCADRCaubucu*du*quotωotωinvTrecTuqud图3基于ADRC的TSMC闭环控制系统框图Fig.3Closed-loopcontroldiagramofTSMCbasedonADRCADRC采用图1结构。',
         '但励磁绕组时间常数较大,闭环控制系统的截止频率较低,影响发电机输出端电压的响应速度。']
    data = nlp.depparser(s)
    nouns = extract_noun(data)
    print(nouns)
def bosonnlp_segmentation(str_list):
    nlp = BosonNLP('NBSC61pl.10304.Fnwc_rUz9fyw')
    result = nlp.tag(str_list)
    for tag_map in result:
        word_tokens = tag_map['word']
        for word in word_tokens:
            print word.encode("utf-8") + "|",
        print "\n"
Exemplo n.º 11
0
def getKeyWords(command):
    nlp = BosonNLP("IrtCRUKX.4360.giOuq49VR3V-")
    r = nlp.extract_keywords(command, top_k=3)
    l = []
    for k, v in r:
        v = v.encode('utf8')
        l.append(v)
    return l
Exemplo n.º 12
0
def Entity_extraction(text):
    nlp = BosonNLP("x-gOGutn.27554.G6_6QvdJafES")
    rest = nlp.ner(text)[0]
    print(rest)
    words = rest['word']
    entities = rest['entity']
    tags = rest['tag']
    for entity in entities:
        print(" ".join(words[entity[0]:entity[1]]), entity[2])
Exemplo n.º 13
0
def extract_keywords(text, top_num=10):
    """Extract Keywords."""
    # 注意:在测试时请更换为您的 API token
    nlp = BosonNLP('')
    result = nlp.extract_keywords(text, top_k=top_num)

    result_dict = {k: v for (v, k) in result}

    return result_dict
Exemplo n.º 14
0
def test_custom_requests_session():
    import requests

    session = requests.session()
    nlp = BosonNLP(os.environ['BOSON_API_TOKEN'], session=session)
    assert nlp.session == session
    result = nlp.sentiment(['再也不来了', '美好的世界'])
    assert result[0][1] > result[0][0]
    assert result[1][0] > result[1][1]
Exemplo n.º 15
0
    def senti_by_sdk(self):
        nlp_obj = BosonNLP(self.__TOKEN)
        senti_results = nlp_obj.sentiment(self.texts, model=self.model)
        print(senti_results, flush=True)

        # 查验剩余调用次数
        limit_remain = self.senti_limits_remaining()
        print("BosonNLP 剩余调用次数:{}".format(limit_remain), flush=True)

        return senti_results
Exemplo n.º 16
0
def getAnswerNounKeys(text_set, api_key):
    nlp = BosonNLP(api_key)
    result = nlp.tag(text_set)
    words = ''
    for d in result:
        for it in zip(d['word'], d['tag']):
            if it[1] == 'n':
                words += it[0]
            # print(' '.join([ '%s/%s' % it]))
    return getAnswerKeys(words, api_key)
Exemplo n.º 17
0
def segment_tag(text):
    nlp = BosonNLP('2DgGSC-8.33497.8yeNchBP6L9n')
    result = nlp.tag(text)

    words = result[0]['word']
    tags = result[0]['tag']

    assert len(words) == len(tags)

    return words, tags
Exemplo n.º 18
0
    def __init__(self, opt):
        self.opt = opt
        self.sep = opt.seprator + " "
        if opt.cuda:
            torch.cuda.set_device(opt.gpu)
        self.bpe = BPE(codecs.open(self.opt.bpe_codes, 'r', encoding="UTF-8"),
                       self.opt.seprator, None, None)

        self.translator = onmt.Translator(opt)

        self.nlp = BosonNLP("NGhNiav2.16134.DvyEDmGzYd2S")
Exemplo n.º 19
0
def getAbstract(allContext):
    nlp = BosonNLP(apitoken)
    ret = []
    for i, text in enumerate(allContext):
        try:
            print("handling %dth abstract from buaa" % (i + 1))
            result = nlp.summary('', text, 50)
            ret.append(result.replace('\n', ''))
        except:
            print("error when handling %dth abstract from buaa" % (i + 1))
            ret.append('error')
            print(traceback.print_exc())
    return ret
Exemplo n.º 20
0
 def getAbstract(self, allContext):
     apitoken = "XB2l3mQj.14588.GJCICyNoqghJ"
     nlp = BosonNLP(apitoken)
     ret = []
     for i, text in enumerate(allContext):
         try:
             print("handling %dth abstract from %s" % (i + 1, self._school))
             result = nlp.summary('', text, 50)
             ret.append(result.replace('\n', ''))
         except:
             print("error when handling %dth abstract from %s" %
                   (i + 1, self._school))
             ret.append('error')
             print(traceback.print_exc())
     return ret
Exemplo n.º 21
0
    def __init__(self):
        args = Arguments('RSJ')
        self.rsj = RSJ(args)
        self.rsj.restore_model()
        APP_ID = '14465679'
        API_KEY = 'DDNA68lRaVxKCUHP13t79acC'
        SECRET_KEY = 'RisCmApExjn5hcSH0KHul71Uldza8vDe'
        self.feature_maps = {}
        with open('../data/feature_maps.txt',encoding='utf8') as r:
            for line in r:
                features = line.split(' ')
                self.feature_maps[features[0]] = features

        self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
        self.boson = BosonNLP('m9YSebz-.27886.-Jh0KNhk__Q2')
Exemplo n.º 22
0
def bosonnlpNER(news):
    from bosonnlp import BosonNLP
    nlp = BosonNLP('cKWUytiR.34676.f5F2YbS_EyX2')
    ner = nlp.ner(news)[0]
    print(ner)
    words = ner['word']
    entity = ner['entity']
    N = []
    # record the entity start and end. k:v = start : end
    entity_start = {}
    for e in entity:
        if e[2] in {'org_name', 'person_name'}:
            entity_start[e[0]] = e[1]
            N.append([''.join(words[e[0]:e[1]]), e[2]])
    return N, entity_start, words
Exemplo n.º 23
0
    def _boson_seg(self, text):
        nlp = BosonNLP('g8lQg9Mv.25818.fAbbwt6TYhh8')
        if type(text) == str:
            text = [text]

        corpus_len = len(text)
        word, tag = [], []
        for idx in range(corpus_len // 100 + 1):
            curr_idx = idx * 100
            result = nlp.tag(text[curr_idx:min(curr_idx + 100, corpus_len)])
            for seg in result:
                word.append(seg['word'])
                tag.append(seg['tag'])

        return word
Exemplo n.º 24
0
def ScentenceSimilar(str1, str2):
    """得到str1和str2的相似度,使用余弦相似性计算。
    采用bosonnlp分词;联网使用。

    """

    nlp = BosonNLP('wx3Ua05Y.21658.Ch876jBfuqIH')

    #获取分词结果
    tags1 = nlp.tag(str1.lower())
    tags2 = nlp.tag(str2.lower())

    tfdict1 = getTFdict(Denoising(tags1[0]))
    tfdict2 = getTFdict(Denoising(tags2[0]))

    return getSimilar_by_cos(tfdict1, tfdict2)
Exemplo n.º 25
0
def Text_Segmentation_5_1():
    input_txt = open('static/files/方滨兴_互动百科.txt', 'r', encoding='utf-8')
    # 有的文件编码使用GBK形式,在读文件时需要再添加一个参数:encoding='utf-8'
    # 有的记事本文件编码使用ANSI,读文件添加encoding='utf-8'反而会报错

    lines = input_txt.readlines()
    input_txt.close()

    for line in lines:
        nlp = BosonNLP('QhCMB7FS.33943.0OYvhfw0JCx8')
        result = nlp.tag(line)[0]['word']
        output_txt = open('static/files/方滨兴_互动百科_split_unattributed.txt',
                          mode='a',
                          encoding='utf-8')
        # output_txt.write('{}\n'.format(result))             # 以列表字符串的形式写入
        output_txt.write('{}\n'.format(' '.join(result)))  # 以纯文本的形式写入
        output_txt.close()
Exemplo n.º 26
0
def words_cut(txt_lines, isJieba=True):  #分词,返回列表
    text_cut = []
    if isJieba:
        for line in txt_lines:
            line = line.strip()  #去除空白符
            seg_line = cut(line)  #返回的是生成器,只可遍历一遍
            line_str = " ".join(seg_line) + "\n"
            text_cut.append(line_str)
        return text_cut

    nlp = BosonNLP('QhCMB7FS.33943.0OYvhfw0JCx8')
    for line in txt_lines:
        line_list = nlp.tag(line)[0][
            'word']  #分词,返回一个嵌套的列表格式为[{'word':[分好的词], ''}]
        line_str = " ".join(line_list) + '\n'  #将列表连接为字符串
        text_cut.append(line_str)
    return text_cut
Exemplo n.º 27
0
    def __init__(self, nername, phrase_list, groupid=0):
        multiprocessing.Process.__init__(self)
        self.nername = nername
        self.phrase_list = phrase_list
        self.numofphrase = len(phrase_list)

        # batch ID, and will be used for file name
        self.group_id = str(groupid)

        # load NER modules
        self.boson_ner = BosonNLP("bJ0hvqpK.21947.dpf19nyJfNHp")
        #self.conn = self.boson_ner.connect()
        #self.ltp_ner = LTPNer()

        self.jsonData = {}

        print "creating subprocess : " + self.nername + ":" + self.group_id + ", number of phrase: " + str(
            self.numofphrase)
Exemplo n.º 28
0
def judges(config, text):
    """
    通过三个NLP平台分析文本并投票决定情绪,分别调用百度AI,BosonNLP和腾讯文智的API
    :param config: dict
        {'baidu': [APP_ID, API_KEY, SECRET_KEY],
         'boson': [API_TOKEN],
         'tencent': [SECRET_ID, SECRET_KEY]}
    :param text: string
    :return: string
    """
    default_timeout = 10
    w = Wenzhi(config['tencent'][0], config['tencent'][1], 'gz', 'POST')
    cli = AipNlp(config['baidu'][0], config['baidu'][1], config['baidu'][2])
    bo = BosonNLP(config['boson'][0])
    pool = ThreadPoolExecutor(max_workers=3)
    candidate1 = pool.submit(w.text_sentiment, {'content': text})
    candidate2 = pool.submit(cli.sentimentClassify, text)
    candidate3 = pool.submit(bo.sentiment, text)
    result = [
        candidate1.result(default_timeout),
        candidate2.result(default_timeout),
        candidate3.result(default_timeout)
    ]
    post_votes = [
        result[0]['positive'], result[1]['items'][0]['positive_prob'],
        result[2][0][0]
    ]
    nega_votes = [
        result[0]['negative'], result[1]['items'][0]['negative_prob'],
        result[2][0][1]
    ]
    # return [post_votes, nega_votes]
    pv = [i for i in post_votes if i > 0.5]
    nv = [x for x in nega_votes if x > 0.5]
    result = None
    if len(pv) / len(post_votes) > 1 / 3:
        result = 'P'
    else:
        if len(nv) / len(nega_votes) > 1 / 3:
            result = 'N'
    if result is None:
        return 'N'
    else:
        return result
Exemplo n.º 29
0
def getConnection():
    global tokenLength, tokenNumber, connectionTag
    count = 0
    token = getToken()
    headers = {'X-Token': token}
    RATE_LIMIT_URL = 'http://api.bosonnlp.com/application/rate_limit_status.json'
    result = requests.get(RATE_LIMIT_URL, headers=headers).json()
    tokenLength = getTokenArrayLen()
    if result['status'] == 200:
        count = result['limits']['ner']['count-limit-remaining']
        if not count:
            if tokenLength > tokenNumber:
                tokenNumber += 1
                return getConnection()
            else:
                init()
                connectionTag = 1
                return None
        return BosonNLP(token)
Exemplo n.º 30
0
def AnalyzePositive(cfg):
    # nlp = BosonNLP('SeJUopMY.24669.6kCKU4ruI3ss')
    # nlp = BosonNLP('lMdMTyuV.24544.0VHv6klp6Pk6')
    nlp = BosonNLP('sjWBhf9i.24699.rQmsCad9c3Jv')

    # 打开数据库连接
    db = pymysql.connect(host="%s" % cfg.get('db', 'host'),
                         user='******' % cfg.get('db', 'user'),
                         passwd='%s' % cfg.get('db', 'passwd'),
                         db='%s' % cfg.get('db', 'db'),
                         port=int(cfg.get('db', 'port')),
                         charset='%s' % cfg.get('db', 'charset'))

    # 使用cursor()方法获取操作游标
    cur = db.cursor()

    # 1.查询操作
    # 编写sql 查询语句
    sql1 = "select * from news"
    flag = 0
    content = []
    try:
        cur.execute(sql1)  # 执行sql语句
        results = cur.fetchall()  # 获取查询的所有记录
        # 遍历结果
        for row in results:
            flag = 0
            content.clear()
            content.append(row[0])
            positive = nlp.sentiment(content)
            print(positive)
            if positive[0][0] > positive[0][1]:
                flag = 1
            sql2 = "UPDATE `news` SET `positive` = %s WHERE `news`.`id` = %s" % (
                flag, row[7])
            cur.execute(sql2)
            db.commit()
            print(sql2)
    except Exception as e:
        raise e
    finally:
        db.commit()
        db.close()  # 关闭连接