Exemplos de Cuttor.add_stage em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: yaha

Classe / Tipo: Cuttor

Método / Função: add_stage

Exemplos em hotexamples.com: 4

Cuttor.add_stage em Python - 4 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de yaha.Cuttor.add_stage em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Cuttor(8)

set_stage1_regex(7)

cut_to_sentence(4)

cut(3)

add_stage(2)

cut_all(1)

exist(1)

tokenize(1)

word_type(1)

Métodos Frequentes

Cuttor (8)

set_stage1_regex (7)

cut_to_sentence (4)

cut (3)

add_stage (2)

cut_all (1)

exist (1)

tokenize (1)

word_type (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: analyzer.py Projeto: zmjm4/yaha

for t, v in get_dict(DICTS.EXT_STOPWORD).iteritems(): stop_words.append(t) for t, v in get_dict(DICTS.STOPWORD).iteritems(): stop_words.append(t) for t, v in get_dict(DICTS.STOP_SENTENCE).iteritems(): stop_words.append(t) STOP_WORDS = frozenset(stop_words) __init_stop_words() accepted_chars = re.compile(ur"[\u4E00-\u9FA5]+") _cuttor = Cuttor() _cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I | re.U)) _cuttor.add_stage(SurnameCutting()) _cuttor.add_stage(SuffixCutting()) class ChineseTokenizer(Tokenizer): def __call__(self, text, **kargs): words = _cuttor.tokenize(text, search=True) token = Token() for (w, start_pos, stop_pos) in words: if not accepted_chars.match(w): if len(w) > 1: pass else: continue token.original = token.text = w token.pos = start_pos

Exemplo n.º 2

0

Exibir arquivo

Arquivo: analyzer.py Projeto: ZoeyYoung/Bookmarks_Cloud

global STOP_WORDS stop_words = [] for t,v in get_dict(DICTS.EXT_STOPWORD).iteritems(): stop_words.append(t) for t,v in get_dict(DICTS.STOPWORD).iteritems(): stop_words.append(t) for t,v in get_dict(DICTS.STOP_SENTENCE).iteritems(): stop_words.append(t) STOP_WORDS = frozenset(stop_words) __init_stop_words() accepted_chars = re.compile(ur"[\u4E00-\u9FA5]+") _cuttor = Cuttor() _cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I|re.U)) _cuttor.add_stage(SurnameCutting()) _cuttor.add_stage(SuffixCutting()) class ChineseTokenizer(Tokenizer): def __call__(self,text,**kargs): words = _cuttor.tokenize(text, search=True) token = Token() for (w,start_pos,stop_pos) in words: if not accepted_chars.match(w): if len(w)>1: pass else: continue token.original = token.text = w token.pos = start_pos token.startchar = start_pos

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_cuttor.py Projeto: ouyanghuangzheng/yaha

#cuttor.set_topk(3) # Use stage 1 to cut english and number cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I|re.U)) # Or use stage 2 to cut english and number #cuttor.add_stage(RegexCutting(re.compile('\d+', re.I|re.U))) #cuttor.add_stage(RegexCutting(re.compile('[a-zA-Z]+', re.I|re.U))) # Use stage 3 to cut chinese name #surname = SurnameCutting() #cuttor.add_stage(surname) # Or use stage 4 to cut chinese name surname = SurnameCutting2() cuttor.add_stage(surname) # Use stage 4 to cut chinese address or english name suffix = SuffixCutting() cuttor.add_stage(suffix) #seglist = cuttor.cut(str) #print '\nCut with name \n%s\n' % ','.join(list(seglist)) #seglist = cuttor.cut_topk(str, 3) #for seg in seglist: # print ','.join(seg) #for s in cuttor.cut_to_sentence(str): # print s

Exemplo n.º 4

0

Exibir arquivo

#cuttor.set_topk(3) # Use stage 1 to cut english and number cuttor.set_stage1_regex(re.compile('(\d+)|([a-zA-Z]+)', re.I | re.U)) # Or use stage 2 to cut english and number #cuttor.add_stage(RegexCutting(re.compile('\d+', re.I|re.U))) #cuttor.add_stage(RegexCutting(re.compile('[a-zA-Z]+', re.I|re.U))) # Use stage 3 to cut chinese name #surname = SurnameCutting() #cuttor.add_stage(surname) # Or use stage 4 to cut chinese name surname = SurnameCutting2() cuttor.add_stage(surname) # Use stage 4 to cut chinese address or english name suffix = SuffixCutting() cuttor.add_stage(suffix) #seglist = cuttor.cut(str) #print '\nCut with name \n%s\n' % ','.join(list(seglist)) #seglist = cuttor.cut_topk(str, 3) #for seg in seglist: # print ','.join(seg) #for s in cuttor.cut_to_sentence(str): # print s