示例#1
0
    def run(self, key, ctx:Context):
        import fnmatch, re

        if '/' in key:
            lemma=key.split('/')[-1]  # the key is formatted like 'word/lemma'
        else:
            lemma=ctx.lemmas[key]

        if self.match_method=='equals':
            return lemma==self.target
        elif self.match_method=='in':
            return lemma in self.target
        elif self.match_method=='chunk':
            if isinstance(self.target, list):
                for t in self.target:
                    if t in ctx.chunk_pieces(key, lowercase=True):
                        return True
                return False
            else:
                return self.target in ctx.chunk_pieces(key, lowercase=True)
        elif self.match_method=='glob':
            regex = fnmatch.translate(self.target)
            reobj = re.compile(regex)
            return reobj.match(lemma) is not None
        elif self.match_method=='regex':
            reobj = re.compile(self.target)
            return reobj.match(lemma) is not None
        else:
            raise ValueError(f"Cannot support match method {self.match_method}")
示例#2
0
def expand(dispathcer: DispatcherIntf, data, keys, specific_domains):
    fixt = InspectorFixture()
    domains, meta = fixt.request_domains(data)
    ctx = Context(meta, domains)
    for key in keys:
        for chunk in ctx.chunk_pieces(key):
            dispathcer.execute(chunk)
示例#3
0
    def run(self, key, ctx:Context):
        checkers = []
        lang = ctx.meta['lang']
        # cnt = ' '.join(ctx.chunks['obl'])
        # cnt = ' '.join(ctx.chunks[key])

        if self.entire:
            checkers.append(self.providers[self.provider](key, lang, ctx, 'sents'))
        else:
            for cnt in ctx.chunk_pieces(key):
                checkers.append(self.providers[self.provider](cnt, lang, ctx, key))
        # print('... put %s'%self.cache_key(key))
        # print(ctx.meta['intermedia'])
        return any(checkers)
示例#4
0
 def run(self, key, ctx:Context):
     result = False
     lang = ctx.meta['lang']
     # cnt = ' '.join(ctx.chunks[key])
     # cnt=ctx.get_single_chunk_text(key)
     requestors={'ru':lambda rc: query_entities_by_url(cf.ensure('ner_ru'), rc),
                 }
     for cnt in ctx.chunk_pieces(key):
         data={'lang': lang, 'sents': cnt}
         if lang in requestors:
             resp=requestors[lang](data)
         else:
             resp = query_entities(data)
         if resp['result'] == 'success':
             dims = [d['entity'] for d in resp['data']]
             # print('entities ->', ', '.join(dims))
             logger.info('entities -> %s, self.dim -> %s', ', '.join(dims), self.dim)
             if self.dim in dims:
                 print('\t%s ∈' % cnt, self.dim)
                 result = True
     return result
示例#5
0
    def run(self, key, ctx: Context):
        # 当pickup为'_'时, key就是value
        comp_val = key if self.pickup == '_' else ''
        key = self.pickup or key

        ex_map = {
            'date_search':
            lambda cnt, comp: ex_date_search(key, cnt, comp, ctx),
            # .. extract_for('plain+date_search+date_parse', '時間'),
            'date_parse':
            lambda cnt, comp: ex_date_parse(key, cnt, comp, ctx),
            'plain':
            lambda cnt, comp: ex_plain(key, cnt, comp, ctx),
            'word':
            lambda cnt, comp: ex_word(key, cnt, comp, ctx),
            # .. extract_for('plain+translit', 'obj'),
            'translit':
            lambda cnt, comp: ex_translit(key, cnt, comp, ctx),
            'email':
            lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'email'),
            # .. extract_for('number', 'obl'),
            'number':
            lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'number'),
            # .. extract_for('time', 'advmod'),
            'time':
            lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'time'),
            # .. extract_for('plain+temperature', 'ニ'),
            'temperature':
            lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'temperature'),
            # example: extract_for('rasa', '_')
            'rasa':
            lambda cnt, comp: ex_rasa(key, cnt, comp, ctx),
            # example: extract_for('chunk', 'verb:xcomp/obj')
            'chunk':
            lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w:
                                       (w.text, w.upos.lower())),
            # example: extract_for('chunk_text', 'verb:xcomp/obj')
            'chunk_text':
            lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w: w.text),
            'chunk_feats':
            lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w: w.feats),
            # .. extract_for('feats', 'verb:_'),
            #        extract_for('feats', 'verb:obj')
            'feats':
            lambda cnt, comp: ex_feats(key, cnt, comp, ctx),
            # example: extract_for('ner', '_'), extract_for('ner', 'xcomp')
            'ner':
            lambda cnt, comp: ex_ner(key, cnt, comp, ctx),
        }

        if self.pickup == '_' or is_full_domain_path(self.pickup):
            self.results['_'] = []
            for comp in self.comp_as:
                op = ex_map[comp](comp_val, comp)
                self.results['_'].append((comp, op))
        else:
            for cnt in ctx.chunk_pieces(key):
                self.results[key] = []
                for comp in self.comp_as:
                    ex = ex_map[comp]
                    op = ex(cnt, comp)
                    # self.results[comp] = op
                    self.results[key].append((comp, op))

        return True  # 只负责提取, 并不参与判定, 所以始终返回True