Пример #1
0
def load_from_meta_file(save_dir: str,
                        meta_filename='meta.json',
                        transform_only=False,
                        load_kwargs=None,
                        **kwargs) -> Component:
    identifier = save_dir
    load_path = save_dir
    save_dir = get_resource(save_dir)
    if save_dir.endswith('.json'):
        meta_filename = os.path.basename(save_dir)
        save_dir = os.path.dirname(save_dir)
    metapath = os.path.join(save_dir, meta_filename)
    if not os.path.isfile(metapath):
        tips = ''
        if save_dir.isupper():
            from difflib import SequenceMatcher
            similar_keys = sorted(pretrained.ALL.keys(),
                                  key=lambda k: SequenceMatcher(
                                      None, save_dir, metapath).ratio(),
                                  reverse=True)[:5]
            tips = f'Check its spelling based on the available keys:\n' + \
                   f'{sorted(pretrained.ALL.keys())}\n' + \
                   f'Tips: it might be one of {similar_keys}'
        raise FileNotFoundError(
            f'The identifier {save_dir} resolves to a non-exist meta file {metapath}. {tips}'
        )
    meta: dict = load_json(metapath)
    cls = meta.get('class_path', None)
    assert cls, f'{meta_filename} doesn\'t contain class_path field'
    try:
        obj: Component = object_from_class_path(cls, **kwargs)
        if hasattr(obj, 'load'):
            if transform_only:
                # noinspection PyUnresolvedReferences
                obj.load_transform(save_dir)
            else:
                if load_kwargs is None:
                    load_kwargs = {}
                if os.path.isfile(os.path.join(save_dir, 'config.json')):
                    obj.load(save_dir, **load_kwargs)
                else:
                    obj.load(metapath, **load_kwargs)
            obj.meta['load_path'] = load_path
        return obj
    except Exception as e:
        eprint(f'Failed to load {identifier}. See stack trace below')
        traceback.print_exc()
        model_version = meta.get("hanlp_version", "unknown")
        cur_version = version.__version__
        if model_version != cur_version:
            eprint(
                f'{identifier} was created with hanlp-{model_version}, while you are running {cur_version}. '
                f'Try to upgrade hanlp with\n'
                f'pip install --upgrade hanlp\n'
                f'If the problem persists, please submit an issue to https://github.com/hankcs/HanLP/issues .'
            )
        exit(1)
Пример #2
0
 def file_to_inputs(self, filepath: str, gold=True):
     data = load_json(filepath)
     for d in data:
         tokens = list(d['token'])
         ss, se = d['subj_start'], d['subj_end']
         os, oe = d['obj_start'], d['obj_end']
         pos = d['stanford_pos']
         ner = d['stanford_ner']
         deprel = d['stanford_deprel']
         head = [int(x) for x in d['stanford_head']]
         assert any([x == 0 for x in head])
         relation = d['relation']
         yield (tokens, pos, ner, head, deprel, ss, se, os, oe), relation
Пример #3
0
def merge_long_sent(file, lang=None):
    if not lang:
        lang = os.path.basename(file).split('.')[0]
    long_sent: dict = load_json(
        f'data/iwpt2020/test-udpipe/{lang}.fixed.long.json')
    long_sent = dict((int(x), y) for x, y in long_sent.items())
    idx = 0
    fout = file.replace('.short', '')
    with open(fout, 'w') as out:
        for sent in load_conll(file):
            long = long_sent.get(idx, None)
            if long:
                out.write(f'{long}\n\n')
                idx += 1
            out.write(f'{sent}\n\n')
            idx += 1
    return fout
Пример #4
0
def eval_sdp_and_ensemble(parser,
                          devfile,
                          dep_dev_output,
                          save_dir,
                          lang,
                          logger,
                          do_eval=True):
    long_sent: dict = load_json(devfile.replace('.short.conllu', '.long.json'))
    long_sent = dict((int(x), y) for x, y in long_sent.items())
    sdp_dev_output = f'{save_dir}/{os.path.basename(devfile.replace(".conllu", ".sdp.pred.conllu"))}'
    sdp_dev_output = sdp_dev_output.replace('.short', '')
    if not os.path.isfile(sdp_dev_output) or do_eval:
        if not parser.model:
            parser.load(save_dir)
        scores = parser.evaluate(devfile,
                                 save_dir,
                                 warm_up=False,
                                 ret_scores=True,
                                 logger=logger,
                                 batch_size=256 if lang == 'cs' else None)[-1]
        sdp_to_dag(parser, scores, sdp_dev_output, long_sent)
    score = evaluate(devfile.replace('.short', ''), sdp_dev_output)
    final_sdp_dev_output = sdp_dev_output.replace('.conllu', '.fixed.conllu')
    sdp_elas = score["ELAS"].f1
    sdp_clas = score["CLAS"].f1
    logger.info(f'SDP score for {lang}:')
    logger.info(f'ELAS: {sdp_elas * 100:.2f} - CLAS:{sdp_clas * 100:.2f}')
    print(f'Model saved in {save_dir}')
    ensemble_output = f'{save_dir}/{os.path.basename(devfile.replace(".conllu", ".ensemble.pred.conllu"))}'
    if not os.path.isfile(sdp_dev_output) or do_eval:
        sdp_to_dag(parser, scores, ensemble_output, long_sent, dep_dev_output)
    score = evaluate(devfile.replace('.short', ''), ensemble_output)
    final_ensemble_output = ensemble_output.replace('.conllu', '.fixed.conllu')
    logger.info(f'Ensemble score for {lang}:')
    ensemble_elas = score["ELAS"].f1
    logger.info(
        f'ELAS: {ensemble_elas * 100:.2f} - CLAS:{score["CLAS"].f1 * 100:.2f}')
    return (sdp_elas, final_sdp_dev_output), (ensemble_elas,
                                              final_ensemble_output)
Пример #5
0
 def from_meta(meta: Union[dict, str], **kwargs):
     if isinstance(meta, str):
         meta = load_json(meta)
     return Pipeline(*[load_from_meta(pipe) for pipe in meta['pipes']])
Пример #6
0
 def load(self, filepath):
     meta = load_json(filepath)
     self.clear()
     self.extend(Pipeline.from_meta(meta))
Пример #7
0
 def load_json(self, path):
     item = load_json(path)
     return self.copy_from(item)
Пример #8
0
 def load_meta(self, save_dir, filename='meta.json'):
     save_dir = get_resource(save_dir)
     metapath = os.path.join(save_dir, filename)
     if os.path.isfile(metapath):
         self.meta.update(load_json(metapath))