def run(self, key, ctx:Context): import fnmatch, re if '/' in key: lemma=key.split('/')[-1] # the key is formatted like 'word/lemma' else: lemma=ctx.lemmas[key] if self.match_method=='equals': return lemma==self.target elif self.match_method=='in': return lemma in self.target elif self.match_method=='chunk': if isinstance(self.target, list): for t in self.target: if t in ctx.chunk_pieces(key, lowercase=True): return True return False else: return self.target in ctx.chunk_pieces(key, lowercase=True) elif self.match_method=='glob': regex = fnmatch.translate(self.target) reobj = re.compile(regex) return reobj.match(lemma) is not None elif self.match_method=='regex': reobj = re.compile(self.target) return reobj.match(lemma) is not None else: raise ValueError(f"Cannot support match method {self.match_method}")
def expand(dispathcer: DispatcherIntf, data, keys, specific_domains): fixt = InspectorFixture() domains, meta = fixt.request_domains(data) ctx = Context(meta, domains) for key in keys: for chunk in ctx.chunk_pieces(key): dispathcer.execute(chunk)
def run(self, key, ctx:Context): checkers = [] lang = ctx.meta['lang'] # cnt = ' '.join(ctx.chunks['obl']) # cnt = ' '.join(ctx.chunks[key]) if self.entire: checkers.append(self.providers[self.provider](key, lang, ctx, 'sents')) else: for cnt in ctx.chunk_pieces(key): checkers.append(self.providers[self.provider](cnt, lang, ctx, key)) # print('... put %s'%self.cache_key(key)) # print(ctx.meta['intermedia']) return any(checkers)
def run(self, key, ctx:Context): result = False lang = ctx.meta['lang'] # cnt = ' '.join(ctx.chunks[key]) # cnt=ctx.get_single_chunk_text(key) requestors={'ru':lambda rc: query_entities_by_url(cf.ensure('ner_ru'), rc), } for cnt in ctx.chunk_pieces(key): data={'lang': lang, 'sents': cnt} if lang in requestors: resp=requestors[lang](data) else: resp = query_entities(data) if resp['result'] == 'success': dims = [d['entity'] for d in resp['data']] # print('entities ->', ', '.join(dims)) logger.info('entities -> %s, self.dim -> %s', ', '.join(dims), self.dim) if self.dim in dims: print('\t%s ∈' % cnt, self.dim) result = True return result
def run(self, key, ctx: Context): # 当pickup为'_'时, key就是value comp_val = key if self.pickup == '_' else '' key = self.pickup or key ex_map = { 'date_search': lambda cnt, comp: ex_date_search(key, cnt, comp, ctx), # .. extract_for('plain+date_search+date_parse', '時間'), 'date_parse': lambda cnt, comp: ex_date_parse(key, cnt, comp, ctx), 'plain': lambda cnt, comp: ex_plain(key, cnt, comp, ctx), 'word': lambda cnt, comp: ex_word(key, cnt, comp, ctx), # .. extract_for('plain+translit', 'obj'), 'translit': lambda cnt, comp: ex_translit(key, cnt, comp, ctx), 'email': lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'email'), # .. extract_for('number', 'obl'), 'number': lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'number'), # .. extract_for('time', 'advmod'), 'time': lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'time'), # .. extract_for('plain+temperature', 'ニ'), 'temperature': lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'temperature'), # example: extract_for('rasa', '_') 'rasa': lambda cnt, comp: ex_rasa(key, cnt, comp, ctx), # example: extract_for('chunk', 'verb:xcomp/obj') 'chunk': lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w: (w.text, w.upos.lower())), # example: extract_for('chunk_text', 'verb:xcomp/obj') 'chunk_text': lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w: w.text), 'chunk_feats': lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w: w.feats), # .. extract_for('feats', 'verb:_'), # extract_for('feats', 'verb:obj') 'feats': lambda cnt, comp: ex_feats(key, cnt, comp, ctx), # example: extract_for('ner', '_'), extract_for('ner', 'xcomp') 'ner': lambda cnt, comp: ex_ner(key, cnt, comp, ctx), } if self.pickup == '_' or is_full_domain_path(self.pickup): self.results['_'] = [] for comp in self.comp_as: op = ex_map[comp](comp_val, comp) self.results['_'].append((comp, op)) else: for cnt in ctx.chunk_pieces(key): self.results[key] = [] for comp in self.comp_as: ex = ex_map[comp] op = ex(cnt, comp) # self.results[comp] = op self.results[key].append((comp, op)) return True # 只负责提取, 并不参与判定, 所以始终返回True