示例#1
0
def CkipReturn(in_text):  #in_text is string
    segmenter = CKIPSegmenter('changcheng.tu', 'a10206606')
    try:
        segmented_in_text_result = segmenter.process(unicode(in_text))
    except:
        segmented_in_text_result = segmenter.process(unicode('got an error'))
    return segmented_in_text_result
示例#2
0
def to_ckip(inp):
    segmenter = CKIPSegmenter('Bolin', 'Bolin')  #ckip連線帳戶
    try:
        result = segmenter.process(inp)  #斷詞結果
        if result['status_code'] != '0':  #若斷詞失敗
            print('Process Failure: ' + result['status'])
        SaveList = []
        SaveSeg = []
        sen_all = []
        for sentence in result['result']:
            for term in sentence:
                SaveList.append(term['term'])  #詞陣列
                SaveSeg.append(term['pos'])  #詞性陣列
        for word, pos in zip(SaveList, SaveSeg):
            wp = word + '(' + pos + ')'
            sen_all.append(wp)
        combine = ' '.join(sen_all)

        return [SaveList, SaveSeg, combine]
    except:
        print('error :', inp)
        return False
示例#3
0
    noun_terms_arr = []
    verb_terms_arr = []
    words_use = pseg.cut(test_sent)
    for word_use in words_use:
        if re.match("n", word_use.flag) != None and word_use.word not in  noun_terms_arr:
            noun_terms_arr.append(word_use.word)
        if re.match("v", word_use.flag) != None and word_use.word not in  verb_terms_arr:
            verb_terms_arr.append(word_use.word)
    result_arr.extend(noun_terms_arr)
    result_arr.extend(verb_terms_arr)
    print("no ckip")
    print(result_arr)
    return (result_arr, noun_terms_arr, verb_terms_arr)


segmenter = CKIPSegmenter('gcsn', 'rb303147258')
parser = CKIPParser('gcsn', 'rb303147258')

client = MongoClient('mongodb://localhost:27017/')
db = client['councilor']
collection = db['ntp_bills']
# collection_save = db['test']
bills = list(collection.find())

for bill in bills:
    description_verb = []
    description_nonu = []
    description_term = []
    try:
        result = segmenter.process(bill["description"])
        if result['status_code'] != '0':
示例#4
0
        sentence = sentence[pos + 1:]
        pos = sentence.find(u"。")
        if pos == -1:
            pos = sentence.find(u"?")

    if len(part_sentence) == 0:
        part_sentence.append(u"".join(sentence))
        num_part_sentence += 1

    content.append(part_sentence)

f_account = codecs.open("ckip_account.txt", 'r')
account_info = f_account.readlines()

segmenter = CKIPSegmenter(account_info[0][:-1], account_info[1][:-1])

j = 0
words = []
for sentence in content:

    for line in sentence:
        j += 1
        if line == u"\n":
            continue

        print(str(j) + "/" + str(num_part_sentence))
        print(line)

        result = segmenter.process(line)
        if result['status_code'] != '0':
示例#5
0
 def log_in(self):
     f_account = codecs.open("ckip_account.txt", 'r')
     account_info = f_account.readlines()
     self.segmenter = CKIPSegmenter(account_info[0][:-1],
                                    account_info[1][:-1])
示例#6
0
from ckip import CKIPSegmenter, CKIPParser


def traverse(root):
    """Helper function to traverse all leaf nodes of the given tree root."""
    if 'child' in root:
        for child in root['child']:
            for leaf in traverse(child):
                yield leaf
    else:
        yield root


# Usage example of the CKIPSegmenter class
segmenter = CKIPSegmenter('YOUR USERNAME', 'YOUR PASSWORD')
result = segmenter.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']:
    for term in sentence:
        print(term['term'], term['pos'])


# Usage example of the CKIPParser class
parser = CKIPParser('YOUR USERNAME', 'YOUR PASSWORD')
result = parser.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])