def CkipReturn(in_text): #in_text is string segmenter = CKIPSegmenter('changcheng.tu', 'a10206606') try: segmented_in_text_result = segmenter.process(unicode(in_text)) except: segmented_in_text_result = segmenter.process(unicode('got an error')) return segmented_in_text_result
def to_ckip(inp): segmenter = CKIPSegmenter('Bolin', 'Bolin') #ckip連線帳戶 try: result = segmenter.process(inp) #斷詞結果 if result['status_code'] != '0': #若斷詞失敗 print('Process Failure: ' + result['status']) SaveList = [] SaveSeg = [] sen_all = [] for sentence in result['result']: for term in sentence: SaveList.append(term['term']) #詞陣列 SaveSeg.append(term['pos']) #詞性陣列 for word, pos in zip(SaveList, SaveSeg): wp = word + '(' + pos + ')' sen_all.append(wp) combine = ' '.join(sen_all) return [SaveList, SaveSeg, combine] except: print('error :', inp) return False
noun_terms_arr = [] verb_terms_arr = [] words_use = pseg.cut(test_sent) for word_use in words_use: if re.match("n", word_use.flag) != None and word_use.word not in noun_terms_arr: noun_terms_arr.append(word_use.word) if re.match("v", word_use.flag) != None and word_use.word not in verb_terms_arr: verb_terms_arr.append(word_use.word) result_arr.extend(noun_terms_arr) result_arr.extend(verb_terms_arr) print("no ckip") print(result_arr) return (result_arr, noun_terms_arr, verb_terms_arr) segmenter = CKIPSegmenter('gcsn', 'rb303147258') parser = CKIPParser('gcsn', 'rb303147258') client = MongoClient('mongodb://localhost:27017/') db = client['councilor'] collection = db['ntp_bills'] # collection_save = db['test'] bills = list(collection.find()) for bill in bills: description_verb = [] description_nonu = [] description_term = [] try: result = segmenter.process(bill["description"]) if result['status_code'] != '0':
sentence = sentence[pos + 1:] pos = sentence.find(u"。") if pos == -1: pos = sentence.find(u"?") if len(part_sentence) == 0: part_sentence.append(u"".join(sentence)) num_part_sentence += 1 content.append(part_sentence) f_account = codecs.open("ckip_account.txt", 'r') account_info = f_account.readlines() segmenter = CKIPSegmenter(account_info[0][:-1], account_info[1][:-1]) j = 0 words = [] for sentence in content: for line in sentence: j += 1 if line == u"\n": continue print(str(j) + "/" + str(num_part_sentence)) print(line) result = segmenter.process(line) if result['status_code'] != '0':
def log_in(self): f_account = codecs.open("ckip_account.txt", 'r') account_info = f_account.readlines() self.segmenter = CKIPSegmenter(account_info[0][:-1], account_info[1][:-1])
from ckip import CKIPSegmenter, CKIPParser def traverse(root): """Helper function to traverse all leaf nodes of the given tree root.""" if 'child' in root: for child in root['child']: for leaf in traverse(child): yield leaf else: yield root # Usage example of the CKIPSegmenter class segmenter = CKIPSegmenter('YOUR USERNAME', 'YOUR PASSWORD') result = segmenter.process('這是一隻可愛的小花貓') if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sentence in result['result']: for term in sentence: print(term['term'], term['pos']) # Usage example of the CKIPParser class parser = CKIPParser('YOUR USERNAME', 'YOUR PASSWORD') result = parser.process('這是一隻可愛的小花貓') if result['status_code'] != '0': print('Process Failure: ' + result['status'])