Exemplo n.º 1
0
def Class_ification(sentence):  #进行文本分类
    plot.rcParams['font.sans-serif'] = ['SimHei']
    plot.rcParams['axes.unicode_minus'] = False
    nlp = BosonNLP('TPDuivpZ.27572.rVuPCI9-kUlN')
    result = nlp.classify(sentence)
    info = {
        0: "体育",
        1: "教育",
        2: "财经",
        3: "社会",
        4: "娱乐",
        5: "军事",
        6: "国内",
        7: "科技",
        8: "互联网",
        9: "房产",
        10: "科技",
        11: "女人",
        12: "汽车",
        13: "游戏",
    }
    DG = nx.DiGraph()
    plot.figure(figsize=(3, 3))
    plot.subplot(1, 1, 1)
    plot.title('文本分类', color='red', fontsize=15)
    DG.add_node(info[result[0]])
    nx.draw(DG, with_labels=True, node_size=6000, node_color='lightblue')
    plot.show()
Exemplo n.º 2
0
def getAnswerKeys(text_set, api_key):
    keys = []
    nlp = BosonNLP(api_key)
    result = nlp.extract_keywords(text_set, top_k=30)
    for weight, word in result:
        keys.append((word, int(weight * 10)))
    return keys
Exemplo n.º 3
0
def sentiment(contents):
    sentiments = {}
    nlp = BosonNLP(boson_token)
    for model in models:
        sentiment = nlp.sentiment(contents, model=model)
        sentiments[model_to_name[model]] = sentiment
    return sentiments
Exemplo n.º 4
0
def maintain():
    apitoken = "ZUKLt9tO.24611.KI1wUPXknGRP"
    nlp = BosonNLP(apitoken)
    conn = pymysql.connect(host='123.206.68.192',
                           port=3306,
                           user='******',
                           passwd='',
                           db='news',
                           charset='utf8')
    cur = conn.cursor()
    cur.execute("DELETE FROM `data` WHERE context = 'error'")
    conn.commit()

    cur.execute("SELECT * FROM `data` WHERE	abstract = 'error'")
    data = cur.fetchall()
    for entry in data:
        result = nlp.summary('', entry[3], 50).replace('\n', '')
        if (result == 'error'):
            print '[Deleted]wrong entry: ' + entry
            cur.execute("DELETE FROM `data` WHERE ID = %s", (entry[0]))
        else:
            cur.execute("UPDATE `data` SET abstract = %s WHERE ID = %s",
                        (result, entry[0]))

    cur.close()
    conn.commit()
    conn.close()
Exemplo n.º 5
0
def getAnswerKeys (text_set, api_key):
	keys = []
	nlp = BosonNLP(api_key)
	result = nlp.extract_keywords(text_set, top_k=30)
	for weight, word in result:
		keys.append((word, int(weight*10)))
	return keys
Exemplo n.º 6
0
def classify(contents):
    nlp = BosonNLP(boson_token)
    result = nlp.classify(contents)
    topics = []
    for topic in result:
        topics.append(topic_to_id[topic])
    return topics
def execute(news, vectors, statements):

    nlp = BosonNLP('3KJW0U-I.24870.1PdhvJB30HgY')
    # print("\n情感分析")
    for i in range(len(news)):
        combine = ""
        for s in news[i].Sentences:
            combine += s
#        print (news[i].PressName,"\n",combine,"\n",nlp.sentiment(combine)[0][0])
#    print("")
    for new in news:
        new.Coordinate = np.zeros(len(statements))
        for i in range(len(statements)):
            if statements[i] in new.Sentences:
                for j in range(len(vectors)):
                    new.Coordinate[j] += vectors[i][j]
        ##坐标归一化
        nomalization(new.Coordinate)
        ##添加情感分析维度在新闻坐标里
        combine = ""
        for s in new.Sentences:
            combine += s
        sentiment = nlp.sentiment(combine)
        new.Sentiment = sentiment[0][0]
        # print (new.PressName,"\n",combine,"\n",sentiment[0][0])
        # new.Coordinate.append(sentiment[0][0])
        new.Coordinate = np.append(new.Coordinate,
                                   sentiment[0][0] * len(vectors) / 2)

        print("%s %s" % (new.PressName, new.Coordinate))
    distance_matrix = generate_distance_matrix(news)
    print("\n新闻距离矩阵\n", distance_matrix)
    return distance_matrix
Exemplo n.º 8
0
def ConfirmMuseum(text, museum, textid):
    # nlp = BosonNLP('SeJUopMY.24669.6kCKU4ruI3ss')
    # nlp = BosonNLP('lMdMTyuV.24544.0VHv6klp6Pk6')
    nlp = BosonNLP('sjWBhf9i.24699.rQmsCad9c3Jv')
    try:
        flag = 0
        text = text[0:1000]
        result = nlp.ner(text)[0]
        words = result['word']
        entities = result['entity']
        for entitie in entities:
            if entitie[2] == 'org_name':
                org_name = ''.join(words[entitie[0]:entitie[1]])
                if museum in org_name:
                    flag = 1
                    break
            elif entitie[2] == 'location':
                location = ''.join(words[entitie[0]: entitie[1]])
                if museum in location:
                    flag = 1
                    break
        if flag:
            print('Confirm!')
            return 1
        else:
            print('Not!')
            return 0
    except KeyError as e:
        print('exit in %s' % textid)
        print(e)
Exemplo n.º 9
0
def getKeyWords(command):
    nlp = BosonNLP("IrtCRUKX.4360.giOuq49VR3V-")
    r = nlp.extract_keywords(command, top_k=3)
    l = []
    for k, v in r:
        v = v.encode('utf8')
        l.append(v)
    return l
Exemplo n.º 10
0
def dict_mining():
    nlp = BosonNLP('6cfIzKI1.27567.fLaZOvRXwl8f')

    s = ['整流级逆变级滤波器负载三相检测abcdq双SVM控制dqabcADRCADRCaubucu*du*quotωotωinvTrecTuqud图3基于ADRC的TSMC闭环控制系统框图Fig.3Closed-loopcontroldiagramofTSMCbasedonADRCADRC采用图1结构。',
         '但励磁绕组时间常数较大,闭环控制系统的截止频率较低,影响发电机输出端电压的响应速度。']
    data = nlp.depparser(s)
    nouns = extract_noun(data)
    print(nouns)
Exemplo n.º 11
0
    def __init__(self, api_token=None):
        try:
            assert api_token is not None, "Please provide an API token"
        except AssertionError as e:
            raise

        self.token = api_token
        self.nlp = BosonNLP(self.token)
def bosonnlp_segmentation(str_list):
    nlp = BosonNLP('NBSC61pl.10304.Fnwc_rUz9fyw')
    result = nlp.tag(str_list)
    for tag_map in result:
        word_tokens = tag_map['word']
        for word in word_tokens:
            print word.encode("utf-8") + "|",
        print "\n"
Exemplo n.º 13
0
def getKeyWords(command):
    nlp = BosonNLP("IrtCRUKX.4360.giOuq49VR3V-")
    r = nlp.extract_keywords(command, top_k=3)
    l = []
    for k, v in r:
        v = v.encode("utf8")
        l.append(v)
    return l
Exemplo n.º 14
0
Arquivo: views.py Projeto: mmlfs/Tutu
def getKeyWords(command):
	nlp = BosonNLP("ofW2OZMI.4712.UzT0VvLGGkdi")
	r = nlp.extract_keywords(command, top_k=3)
	l = []
	for k,v in r:
		v = v.encode('utf8')
		l.append(v)
	return l
Exemplo n.º 15
0
def extract_keywords(text, top_num=10):
    """Extract Keywords."""
    # 注意:在测试时请更换为您的 API token
    nlp = BosonNLP('')
    result = nlp.extract_keywords(text, top_k=top_num)

    result_dict = {k: v for (v, k) in result}

    return result_dict
Exemplo n.º 16
0
def Entity_extraction(text):
    nlp = BosonNLP("x-gOGutn.27554.G6_6QvdJafES")
    rest = nlp.ner(text)[0]
    print(rest)
    words = rest['word']
    entities = rest['entity']
    tags = rest['tag']
    for entity in entities:
        print(" ".join(words[entity[0]:entity[1]]), entity[2])
Exemplo n.º 17
0
def test_custom_requests_session():
    import requests

    session = requests.session()
    nlp = BosonNLP(os.environ['BOSON_API_TOKEN'], session=session)
    assert nlp.session == session
    result = nlp.sentiment(['再也不来了', '美好的世界'])
    assert result[0][1] > result[0][0]
    assert result[1][0] > result[1][1]
Exemplo n.º 18
0
def extract_keywords(text, top_num=10):
    """Extract Keywords."""
    # 注意:在测试时请更换为您的 API token
    nlp = BosonNLP("")
    result = nlp.extract_keywords(text, top_k=top_num)

    result_dict = {k: v for (v, k) in result}

    return result_dict
Exemplo n.º 19
0
def test_custom_requests_session():
    import requests

    session = requests.session()
    nlp = BosonNLP(os.environ['BOSON_API_TOKEN'], session=session)
    assert nlp.session == session
    result = nlp.sentiment(['再也不来了', '美好的世界'])
    assert result[0][1] > result[0][0]
    assert result[1][0] > result[1][1]
Exemplo n.º 20
0
Arquivo: nlp.py Projeto: polarbird/ai
class YoNLP:
    def __init__(self, boson_api_token):
        self._nlp = BosonNLP(boson_api_token)

    def sentiment(self, contents):
        return self._nlp.sentiment(contents)

    def tag(self, contents):
        return self._nlp.tag(contents)
Exemplo n.º 21
0
class CNSegment:
    """
    封装分词工具。
    使用bosonnlp提供API
    """

    #停用词表
    stopwords = []

    def __init__(self):
        self.nlp=BosonNLP(bosonkey)

    def get_tags(self,sentences):
        """
        获取分词
        :param sentences:分词的句子或者句子list
        :return: 分词结果list
        """
        result= self.nlp.tag(sentences)
        return result

    def denoisingOne(self,tagdict , uTag = None,useStopWord = False):
        """通过词性和停用词去除噪声

            :param  tagList : 分词过后得到的列表
            :param  uTag : 需要去噪的词性标记列表,默认为('w','o','y','u')
            :return: 分词结果list
            """
        if (uTag):
            uselessTag = uTag
        else:
            uselessTag = ('w', 'o', 'y', 'u')
        tagdict
        word_list = []
        for index, it in enumerate(tagdict['tag']):
            if it[0] not in uselessTag:
                if not useStopWord:
                    word_list.append(tagdict['word'][index])
                elif tagdict['word'][index] not in self.stopwords:
                    word_list.append(tagdict['word'][index])
        return word_list

    def cut(self,sentences):
        """
        分词
        :param sentences:需要分词的语料集
        :return: 去噪后的单词list
        """
        tags=self.get_tags(sentences)
        cutedSentences=[]
        for sentence in tags:
            cutedSentences.append(self.denoisingOne(sentence))
        return cutedSentences

    def depenPars(self,sentences):
        return self.nlp.depparser(sentences)
Exemplo n.º 22
0
def segment_tag(text):
    nlp = BosonNLP('2DgGSC-8.33497.8yeNchBP6L9n')
    result = nlp.tag(text)

    words = result[0]['word']
    tags = result[0]['tag']

    assert len(words) == len(tags)

    return words, tags
Exemplo n.º 23
0
def getAnswerNounKeys(text_set, api_key):
    nlp = BosonNLP(api_key)
    result = nlp.tag(text_set)
    words = ''
    for d in result:
        for it in zip(d['word'], d['tag']):
            if it[1] == 'n':
                words += it[0]
            # print(' '.join([ '%s/%s' % it]))
    return getAnswerKeys(words, api_key)
Exemplo n.º 24
0
def getAnswerNounKeys(text_set, api_key):
	nlp = BosonNLP(api_key)
	result = nlp.tag(text_set)
	words = ''
	for d in result:
		for it in zip(d['word'], d['tag']):
			if it[1] == 'n':
				words += it[0]
			# print(' '.join([ '%s/%s' % it]))
	return getAnswerKeys(words, api_key)
Exemplo n.º 25
0
    def senti_by_sdk(self):
        nlp_obj = BosonNLP(self.__TOKEN)
        senti_results = nlp_obj.sentiment(self.texts, model=self.model)
        print(senti_results, flush=True)

        # 查验剩余调用次数
        limit_remain = self.senti_limits_remaining()
        print("BosonNLP 剩余调用次数:{}".format(limit_remain), flush=True)

        return senti_results
Exemplo n.º 26
0
    def __init__(self, opt):
        self.opt = opt
        self.sep = opt.seprator + " "
        if opt.cuda:
            torch.cuda.set_device(opt.gpu)
        self.bpe = BPE(codecs.open(self.opt.bpe_codes, 'r', encoding="UTF-8"),
                       self.opt.seprator, None, None)

        self.translator = onmt.Translator(opt)

        self.nlp = BosonNLP("NGhNiav2.16134.DvyEDmGzYd2S")
Exemplo n.º 27
0
def getAbstract(allContext):
    nlp = BosonNLP(apitoken)
    ret = []
    for i, text in enumerate(allContext):
        try:
            print("handling %dth abstract from buaa" % (i + 1))
            result = nlp.summary('', text, 50)
            ret.append(result.replace('\n', ''))
        except:
            print("error when handling %dth abstract from buaa" % (i + 1))
            ret.append('error')
            print(traceback.print_exc())
    return ret
Exemplo n.º 28
0
class QueryParser(object):
    def __init__(self):
        self.nlp = BosonNLP(bosonnlp_token)

    def parse(self, query_string):
        """
        input:
        7月22号 北京到上海的高铁票
        output:
        [{'entity': [[0, 3, 'time'], [3, 4, 'location'], [5, 6, 'location']], # 需要理解实体出现的模式,这块需要理解上下文
        'tag': ['t', 'm', 'q', 'ns', 'p', 'ns', 'ude', 'n', 'n'],
         'word': ['7月', '22', '号', '北京', '到', '上海', '的', '高铁', '票']}]
        """
        result = self.nlp.ner(query_string)[0]
        words = result['word']
        tags = result['tag']
        entities = result['entity']
        return (words, entities, tags)

    def get_entity(self, parsed_words, index_tuple):
        """
        获取已识别的实体
        采用filter
        参考 python cookbook部分

        input:
            entities : 二元组
            parsed_words : 解析好的词组
        """
        return parsed_words[index_tuple[0]:index_tuple[1]]

    def format_entities(self, entities):
        """
        给元组命名
        """
        namedentity = collections.namedtuple('namedentity', 'index_begin index_end entity_name')
        return [namedentity(entity[0], entity[1], entity[2]) for entity in entities]

    def get_format_time(self, time_entity):
        """
        output
        {'timestamp': '2013-02-28 16:30:29', 'type': 'timestamp'}
        """
        basetime = datetime.datetime.today()
        result = self.nlp.convert_time(
            time_entity,
            basetime)
        # print(result)
        timestamp = result["timestamp"]
        return timestamp.split(" ")[0]
Exemplo n.º 29
0
def bosonnlpNER(news):
    from bosonnlp import BosonNLP
    nlp = BosonNLP('cKWUytiR.34676.f5F2YbS_EyX2')
    ner = nlp.ner(news)[0]
    print(ner)
    words = ner['word']
    entity = ner['entity']
    N = []
    # record the entity start and end. k:v = start : end
    entity_start = {}
    for e in entity:
        if e[2] in {'org_name', 'person_name'}:
            entity_start[e[0]] = e[1]
            N.append([''.join(words[e[0]:e[1]]), e[2]])
    return N, entity_start, words
Exemplo n.º 30
0
 def getAbstract(self, allContext):
     apitoken = "XB2l3mQj.14588.GJCICyNoqghJ"
     nlp = BosonNLP(apitoken)
     ret = []
     for i, text in enumerate(allContext):
         try:
             print("handling %dth abstract from %s" % (i + 1, self._school))
             result = nlp.summary('', text, 50)
             ret.append(result.replace('\n', ''))
         except:
             print("error when handling %dth abstract from %s" %
                   (i + 1, self._school))
             ret.append('error')
             print(traceback.print_exc())
     return ret
Exemplo n.º 31
0
    def _boson_seg(self, text):
        nlp = BosonNLP('g8lQg9Mv.25818.fAbbwt6TYhh8')
        if type(text) == str:
            text = [text]

        corpus_len = len(text)
        word, tag = [], []
        for idx in range(corpus_len // 100 + 1):
            curr_idx = idx * 100
            result = nlp.tag(text[curr_idx:min(curr_idx + 100, corpus_len)])
            for seg in result:
                word.append(seg['word'])
                tag.append(seg['tag'])

        return word
Exemplo n.º 32
0
    def __init__(self):
        args = Arguments('RSJ')
        self.rsj = RSJ(args)
        self.rsj.restore_model()
        APP_ID = '14465679'
        API_KEY = 'DDNA68lRaVxKCUHP13t79acC'
        SECRET_KEY = 'RisCmApExjn5hcSH0KHul71Uldza8vDe'
        self.feature_maps = {}
        with open('../data/feature_maps.txt',encoding='utf8') as r:
            for line in r:
                features = line.split(' ')
                self.feature_maps[features[0]] = features

        self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
        self.boson = BosonNLP('m9YSebz-.27886.-Jh0KNhk__Q2')
Exemplo n.º 33
0
class QueryParser(object):
    def __init__(self):
        self.nlp = BosonNLP(bosonnlp_token)
    def parse(self, query_string):
        """
        input:
        7月22号 北京到上海的高铁票
        output:
        [{'entity': [[0, 3, 'time'], [3, 4, 'location'], [5, 6, 'location']], # 需要理解实体出现的模式,这块需要理解上下文
        'tag': ['t', 'm', 'q', 'ns', 'p', 'ns', 'ude', 'n', 'n'],
         'word': ['7月', '22', '号', '北京', '到', '上海', '的', '高铁', '票']}]
        """
        result = self.nlp.ner(query_string)[0]
        words = result['word']
        tags = result['tag']
        entities = result['entity']
        return (words,entities,tags)
    def get_entity(self,parsed_words,index_tuple):
        """
        获取已识别的实体
        采用filter
        参考 python cookbook部分

        input:
            entities : 二元组
            parsed_words : 解析好的词组
        """
        return parsed_words[index_tuple[0]:index_tuple[1]]

    def format_entities(self,entities):
        """
        给元组命名
        """
        namedentity = collections.namedtuple('namedentity','index_begin index_end entity_name')
        return [namedentity(entity[0],entity[1],entity[2]) for entity in entities]

    def get_format_time(self,time_entity):
        """
        output
        {'timestamp': '2013-02-28 16:30:29', 'type': 'timestamp'}
        """
        basetime = datetime.datetime.today()
        result = self.nlp.convert_time(
            time_entity,
            basetime)
        #print(result)
        timestamp = result["timestamp"]
        return timestamp.split(" ")[0]
Exemplo n.º 34
0
def ScentenceSimilar(str1, str2):
    """得到str1和str2的相似度,使用余弦相似性计算。
    采用bosonnlp分词;联网使用。

    """

    nlp = BosonNLP('wx3Ua05Y.21658.Ch876jBfuqIH')

    #获取分词结果
    tags1 = nlp.tag(str1.lower())
    tags2 = nlp.tag(str2.lower())

    tfdict1 = getTFdict(Denoising(tags1[0]))
    tfdict2 = getTFdict(Denoising(tags2[0]))

    return getSimilar_by_cos(tfdict1, tfdict2)
Exemplo n.º 35
0
def words_cut(txt_lines, isJieba=True):  #分词,返回列表
    text_cut = []
    if isJieba:
        for line in txt_lines:
            line = line.strip()  #去除空白符
            seg_line = cut(line)  #返回的是生成器,只可遍历一遍
            line_str = " ".join(seg_line) + "\n"
            text_cut.append(line_str)
        return text_cut

    nlp = BosonNLP('QhCMB7FS.33943.0OYvhfw0JCx8')
    for line in txt_lines:
        line_list = nlp.tag(line)[0][
            'word']  #分词,返回一个嵌套的列表格式为[{'word':[分好的词], ''}]
        line_str = " ".join(line_list) + '\n'  #将列表连接为字符串
        text_cut.append(line_str)
    return text_cut
Exemplo n.º 36
0
def Text_Segmentation_5_1():
    input_txt = open('static/files/方滨兴_互动百科.txt', 'r', encoding='utf-8')
    # 有的文件编码使用GBK形式,在读文件时需要再添加一个参数:encoding='utf-8'
    # 有的记事本文件编码使用ANSI,读文件添加encoding='utf-8'反而会报错

    lines = input_txt.readlines()
    input_txt.close()

    for line in lines:
        nlp = BosonNLP('QhCMB7FS.33943.0OYvhfw0JCx8')
        result = nlp.tag(line)[0]['word']
        output_txt = open('static/files/方滨兴_互动百科_split_unattributed.txt',
                          mode='a',
                          encoding='utf-8')
        # output_txt.write('{}\n'.format(result))             # 以列表字符串的形式写入
        output_txt.write('{}\n'.format(' '.join(result)))  # 以纯文本的形式写入
        output_txt.close()
Exemplo n.º 37
0
    def __init__(self, nername, phrase_list, groupid=0):
        multiprocessing.Process.__init__(self)
        self.nername = nername
        self.phrase_list = phrase_list
        self.numofphrase = len(phrase_list)

        # batch ID, and will be used for file name
        self.group_id = str(groupid)

        # load NER modules
        self.boson_ner = BosonNLP("bJ0hvqpK.21947.dpf19nyJfNHp")
        #self.conn = self.boson_ner.connect()
        #self.ltp_ner = LTPNer()

        self.jsonData = {}

        print "creating subprocess : " + self.nername + ":" + self.group_id + ", number of phrase: " + str(
            self.numofphrase)
Exemplo n.º 38
0
def getAnswerEntities(text_set, api_key, level):
	def f(x):
		return {
			'0': 'location',
			'1': 'person_name',
			'2': 'product_name',
			'3': ('org_name', 'company_name'),
			'4': ('product_name', 'org_name', 'company_name'),
		}[str(x)]
	nlp = BosonNLP(api_key)
	result = nlp.ner(text_set)[0]
	words = result['word']
	entities = result['entity']
	ul = []
	for entity in entities:
		if (entity[2] == f(level) or entity[2] in f(level)):
			ul.append(''.join(words[entity[0]:entity[1]]))
	keys = sortList(ul)
	return keys
Exemplo n.º 39
0
 def __init__(self):
     self.nlp = BosonNLP(bosonnlp_token)
Exemplo n.º 40
0
Arquivo: views.py Projeto: mmlfs/Tutu
def getAttitude(comment):
	nlp = BosonNLP('ofW2OZMI.4712.UzT0VvLGGkdi')
	# s = ['他是个傻逼', '美好的世界']
	result = nlp.sentiment(comment)
	print result
	return result
Exemplo n.º 41
0
@author: JieJ
"""

from bosonnlp import BosonNLP


if __name__ == '__main__':
    # # look up the usage condition
    # HEADERS = {'X-Token': 'RvfFdvC_.4154.f2IbbrWgZrP8'}
    # RATE_LIMIT_URL = 'http://api.bosonnlp.com/application/rate_limit_status.json'
    # import requests
    # result = requests.get(RATE_LIMIT_URL, headers=HEADERS).json()
    # for key,val in result['limits'].iteritems():
    #     print key,'\t',val

    nlp = BosonNLP('RvfFdvC_.4154.f2IbbrWgZrP8')
    nlp = BosonNLP('vQdBA8k_.4176.hUiXrb6354i2')    #LiYB's token
    nlp = BosonNLP('6pcRO9QY.4254.H0BK-v3mB5Cv')    #WangLY's token

    # s = ['对于该小孩是不是郑尚金的孩子,目前已做亲子鉴定,结果还没出来,'
    # '纪检部门仍在调查之中。成都商报记者 姚永忠']
    # result = nlp.ner(s)
    # print result
    # print ' '.join([x for x in result[0]['word']])

    fname = 'D:\\Github\\Sentiment-Analysis\\data\\nlpcc_emotion\\train\\neg_raw'
    all_texts = [x.strip() for x in open(fname).readlines()]
    for i in range(7000):
        print "handing "+str(i+1)+"th 100 documents....."
        start = i*100
        end = start+100
Exemplo n.º 42
0
def main():
    global last_extrect_tag_time, last_extrect_tag_time
    token = json.load(open('./config/token.json','r'))
    if DEBUG:
        client = EvernoteClient(token=token['en_token'],sandbox=True)
    else:
        client = EvernoteClient(token=token['en_token'])
        client.service_host = 'app.yinxiang.com'

    print '现在服务器是:',client._get_endpoint()
    #bosonNlp
    nlp = BosonNLP(token['boson_nlp_token'])
    note_store = client.get_note_store()

    #获取上一次同步状态
    if os.path.exists(data_file('sync_state')):
        last_sync_state = json.load(open(data_file('sync_state'),'r'))
        last_update_count = last_sync_state['updateCount']
        last_extrect_tag_time = last_sync_state['currentTime']

    #获取当前同步状态
    currnet_sync_state = get_current_sync_state(note_store)

    if currnet_sync_state.updateCount > last_update_count:
        new_updated_count = currnet_sync_state.updateCount - last_update_count
        print currnet_sync_state.__dict__
        new_note_filter = NoteStore.NoteFilter()
        new_note_filter.order = Types.NoteSortOrder.CREATED
        new_notes = note_store.findNotes(new_note_filter,0,new_updated_count)
        print 'totalNumber:%d\tNoteListNum:%d' %(new_notes.totalNotes,len(new_notes.notes))
    else:
        print('没有新增更新...')
        exit(1)

    # 获取用户的所有tags
    tags = Tags(note_store=note_store)
    alltags = tags.tags
    print '标签云:\n'
    print '\t'.join(alltags.keys())

    #操作新note
    for note in new_notes.notes:
        #如果笔记创建时间小于上次同步时间
        if note.created <= last_extrect_tag_time:
            continue
        print '\n'+'*'*120
        content = note_store.getNoteContent(note.guid)
        print "guid:%s\ntitle:%s\ncreated:%s\n作者:%s" %(note.guid,note.title,note.created,note.attributes.author)

        print 'author:%s\nsource:%s\nsourceURL:%s\nsourceApplication:%s' %(note.attributes.author,note.attributes.source,note.attributes.sourceURL,note.attributes.sourceApplication)

        if not note.attributes.sourceURL:
            continue
        print "现有标签(tags):%s" %(",".join(note_store.getNoteTagNames(note.guid)))
        print '-'*120
        #print "内容(%d):created:%s,\n%s" %(note.contentLength,note.created,content)

        #解析note xml 提取出所有的文字
        try:
            parser = ET.XMLParser()
            parser.entity['nbsp'] = ''
            parser.entity['ldquo'] = ''
            parser.entity['rdquo'] = ''
            parser.entity['hellip'] = ''
            tree = ET.parse(StringIO(content),parser=parser)
        except Exception,data:
            print 'ElementTree parser error'
            print content
            print 'errorData:'
            print data
            print 'exception:'
            print Exception
            exit(1)
        en_note = tree.findall('.')[0]

        content_string = ''.join(en_note.itertext())

        #写入文件
        with codecs.open(note_file(note.guid),'w+',encoding='utf-8') as f:
            f.write(content_string)
        #通过BosonNLP 拿到文章命名实体
        ner_tag_guid_list = []
        ner_tag_name_list = []
        ner = Ner(content_string).process(nlp)
        entites = ner.collect_type_entity(count=1)
        for entity in entites:
            tag = tags.add(entity)
            ner_tag_guid_list.append(tag.guid)
            ner_tag_name_list.append(tag.name)
        #通过 BosonNLP 拿到文章的关键字
        extract_keywords =  nlp.extract_keywords(content_string,top_k=20)
        keywords = [item[1].upper() for item in extract_keywords]
        print '通过 BosonNLP extract_keywords 拿到文章的前20个关键字:'
        for keyword in extract_keywords:
            print '%s \t %s' %(keyword[1],keyword[0])
        print '-'*120
        #对比 找出交集tag的guid
        keywords_tag_guid_list = []
        newKeyWords = []
        for keyword in keywords:
            if tags.exist(keyword):
                existTag = tags.get(keyword)
                keywords_tag_guid_list.append(existTag.guid)
                newKeyWords.append(existTag.name)
        print '\nextract_keywords与自己所有tag的交集:'
        print '\t'.join(newKeyWords)

        #追加新笔记的tags
        new_tag_guid_list = list(set(keywords_tag_guid_list).union(set(ner_tag_guid_list)))
        print 'extract_keywords+ner的tag:'
        newKeyWords.extend(ner_tag_name_list)
        print '\t'.join(newKeyWords)

        if note.tagGuids:
            note.tagGuids.extend(new_tag_guid_list)
        else:
            note.tagGuids = new_tag_guid_list

        note_store.updateNote(note)
Exemplo n.º 43
0
def bosonNer(text, sensitivity):
    nlp = BosonNLP('qJWJc-f3.4334.MamzfHZ-9wUL')
    return nlp.ner(text, sensitivity)
Exemplo n.º 44
0
Arquivo: boo.py Projeto: wgliang/poavp
# -*- coding: utf-8 -*-
from __future__ import print_function, unicode_literals
from bosonnlp import BosonNLP

nlp = BosonNLP("WXH_K9If.5750.ZIsluNnG8bpA")
print(nlp.sentiment("大众深陷断轴门事件"))
Exemplo n.º 45
0
def test_invalid_token_raises_HTTPError():
    nlp = BosonNLP('invalid token')
    pytest.raises(HTTPError, lambda: nlp.sentiment('美好的世界'))
Exemplo n.º 46
0
def test_invalid_token_raises_HTTPError():
    nlp = BosonNLP("invalid token")
    pytest.raises(HTTPError, lambda: nlp.sentiment("美好的世界"))
Exemplo n.º 47
0
# -*- coding: utf-8 -*-
import os
import sys
import datetime
import time
from bosonnlp import BosonNLP

myApiToken = "X0njNWj2.5612.pYnhvqV02Kgn"
nlp = BosonNLP(myApiToken)
for eachLine in open("simple.txt"):
    # print eachLine,type(eachLine)
    # break
    # print nlp.extract_keywords(eachLine)
    result = nlp.tag(eachLine)
    print result
# print nlp.sentiment("这家味道还不错")
# print nlp.extract_keywords("instructor.txt")
# -*- encoding: utf-8 -*-
from __future__ import print_function, unicode_literals

from bosonnlp import BosonNLP

# 注意:在测试时请更换为您的API token
nlp = BosonNLP('VaUKhf7X.7870.xbHiGWB_gx49')
s = ['中新网周口9月15日电(刘鹏) 15日,针对媒体报道的河南省太康县女子在当地一家KTV遭3名协警暴力殴打一事,太康县警方向记者回复称,3名打人者中两名为协警身份,其中一名协警未参与打架,但目前两名协警均被辞退。而当晚一同前往KTV娱乐的一名正式女民警被关禁闭。  据之前媒体报道,今年9月4日晚11时左右,太康县一家KTV内,一名姜姓女士在送走一位朋友后正返回KTV时,在门口碰到正从里面出来的三名男子。其中一名男子对姜女士动手动脚,另一男子则说姜女士为“小姐”。  受到羞辱的姜女士要求对方赔礼道歉。没想到竟遭到了三名男子拳脚相加。据姜女士反映,事发当晚黑衣男子对她一番推搡致其头部撞到门上;绿衣男子则直接拽着她的头发将其摁倒在地,随后又遭到了拳头打脸、脚踹并拉着衣服将其头往门上撞。姜女士试图报警,结果三名男子将其手机夺走摔到地上。为了阻止围观群众报警,白衣男子直接拿出“警官证”,称自己是刑警队人员,若是报警,不把录像删了,就把KTV店给砸了。  15日上午,太康县公安局发布对此事件的调查处理通报。通报称,9月4日晚,葛某(太康县人,无业)、师某(协警)等人到盛世年华夜总会唱歌,当晚23时结束后,师某、葛某与姜某发生争执吵骂,并引起厮打,致使姜某轻微伤。目前双方已达成调解协议,姜某对师某、葛某达成谅解。  太康县公安局负责处理此事的王姓警官透露,事发当晚,和打人者葛某、师某一同前往KTV娱乐的还有该局一名刚入职不久的女民警李某某及协警司某等人,但他们并未参与打架。后经太康县公安局党委研究决定,对违规进入娱乐场所的民警李某某先行禁闭,待调查结束后再做处理;对违规进入娱乐场所的协警师某、司某予以辞退。'
     '纪检部门仍在调查之中。成都商报记者 姚永']
result = nlp.ner(s)[0]
words = result['word']
entities = result['entity']


for entity in entities:
    print(''.join(words[entity[0]:entity[1]]), entity[2])

print(s)
result = nlp.sentiment(s)
print(result)
Exemplo n.º 49
0
# -*- coding: utf-8 -*-
from __future__ import print_function, unicode_literals
from bosonnlp import BosonNLP
import json

file_path = r"C:\workspace\Taikor_NLP_service\Thirdparty_NLP_WebAPI\Bosson\corpos\msr_test.txt"
with open(file_path, "r", encoding="utf8") as f:
    s = f.read()

nlp = BosonNLP("2ZmFSLeL.3212.Y6W7eOViuyZZ")
pos = nlp.tag(s)

dump = json.dumps(pos)

with open("pos", "w") as f:
    f.write(dump)
Exemplo n.º 50
0
def emotion_analysis(context):
    nlp = BosonNLP('GrrNaeVG.6417.dsK_xHt0qE6-')
    return nlp.sentiment(context, model='weibo')
Exemplo n.º 51
0
def bosonNer(text, sensitivity):
    nlp = BosonNLP('O8M_j1Nd.4200.wIlhsL46w9-C')
    return nlp.ner(text, sensitivity)