Exemplo n.º 1
0
def test_iterations(config, model_type):
    filename = "test_files/models/%s_%s_iterations" % (FORMATS[0], model_type)
    remove_existing(filename)
    config.update(dict(classifier=model_type))
    passages = list(map(load_passage, passage_files(FORMATS[0])))
    last = 0
    iterations = []
    for i in 2, 5, 9, (11, True), (4, True):
        if isinstance(i, tuple):
            i, simple = i
        else:
            simple = False
            iterations.append(Iterations("%d --word-dim=%d" % (i, i)))
        scores = list(
            Parser(model_files=filename,
                   config=config).train(
                       passages,
                       dev=passages,
                       iterations=i if simple else iterations))
        assert max(0, i - last) == len(scores)
        last = i
    for iterations in ((3, 2), (4, 4)):
        with pytest.raises(
                ValueError):  # Number of epochs must be strictly increasing
            list(
                Parser(model_files=filename,
                       config=config).train(passages,
                                            dev=passages,
                                            iterations=iterations))
Exemplo n.º 2
0
def test_empty_features(empty_features_config, model_type):
    filename = "test_files/models/%s_%s_empty_features" % (FORMATS[0], model_type)
    remove_existing(filename)
    empty_features_config.update(dict(classifier=model_type))
    passages = list(map(load_passage, passage_files(FORMATS[0])))
    p = Parser(model_files=filename, config=empty_features_config)
    list(p.train(passages, dev=passages, test=True, iterations=2))
    list(p.parse(passages, evaluate=True))
Exemplo n.º 3
0
def test_ensemble(config, model_type):
    config.update(dict(classifier=model_type, lstm_layers=0))
    filenames = ["test_files/models/%s_%s_ensemble%d" % (FORMATS[0], model_type, i) for i in range(1, 3)]
    passages = list(map(load_passage, passage_files(FORMATS[0])))
    for i, filename in enumerate(filenames, start=1):
        config.update(dict(seed=i))
        remove_existing(filename)
        list(Parser(model_files=filename, config=config).train(passages, dev=passages, iterations=2))
    list(Parser(model_files=filenames, config=config).parse(passages, evaluate=True))
Exemplo n.º 4
0
def test_copy_shared(config, model_type):
    filename = "test_files/models/%s_%s_copy_shared" % ("_".join(FORMATS), model_type)
    remove_existing(filename)
    config.update(dict(classifier=model_type, lstm_layers=0, copy_shared=[FORMATS[0]]))
    for formats in ((FORMATS[0],), FORMATS):
        p = Parser(model_files=filename, config=config)
        passages = list(map(load_passage, passage_files(*formats)))
        list(p.train(passages, dev=passages, test=True, iterations=2))
        config.update_hyperparams(ucca={"lstm_layers": 1})
Exemplo n.º 5
0
def test_extra_classifiers(config, model_type, default_setting):
    filename = "test_files/models/%s_%s%s" % (FORMATS[0], model_type, default_setting.suffix())
    remove_existing(filename)
    config.update(default_setting.dict())
    passages = list(map(load_passage, passage_files(FORMATS[0])))
    for mode in "train", "load":
        print("-- %sing %s" % (mode, model_type))
        config.update(dict(classifier=model_type, copy_shared=None))
        p = Parser(model_files=filename, config=config)
        list(p.train(passages if mode == "train" else None, dev=passages, test=True, iterations=2))
        assert p.model.is_finalized, "Model should be finalized after %sing" % mode
        assert not getattr(p.model.feature_extractor, "node_dropout", 0), p.model.feature_extractor.node_dropout
Exemplo n.º 6
0
def get_parser():
    if not UCCA_PARSER_PATH.parent.exists():
        download_ucca_model()
    update_ucca_path()
    with mock_sys_argv(['']):
        # Need to mock sysargs otherwise the parser will use try to use them and throw an exception
        return Parser(str(UCCA_PARSER_PATH))
Exemplo n.º 7
0
def get_parser(model_path):
    global PARSER
    global PARSER_PATH
    if PARSER_PATH is not model_path or PARSER is None:
        PARSER_PATH = model_path
        PARSER = Parser(model_path)
    return PARSER
Exemplo n.º 8
0
def test_train_empty(config, model_type, default_setting):
    config.update(default_setting.dict())
    config.update(dict(classifier=model_type))
    filename = "empty"
    remove_existing(filename)
    with pytest.raises(ParserException):
        list(Parser(model_files=filename, config=config).train(load_passage("nonexistent file")))
Exemplo n.º 9
0
def get_parser():
    if app.parser is None:
        print("Initializing parser...")
        print("PARSER_MODEL=" + PARSER_MODEL)
        print("PARSER_TYPE=" + PARSER_TYPE)
        app.parser = Parser(PARSER_MODEL, PARSER_TYPE)
    return app.parser
Exemplo n.º 10
0
def get_parser():
    global PARSER
    if PARSER is None:
        # import at the top of the file ruins any other importing code that
        # uses argparse
        model_path = "/cs/labs/oabend/borgr/tupa/models/bilstm"
        PARSER = Parser(model_path, "bilstm")
    return PARSER
Exemplo n.º 11
0
 def train_test(self, model_type, compare=True):
     scores = []
     p = None
     for mode in "train", "load":
         print("-- %sing %s" % (mode, model_type))
         p = Parser(model_file="test_files/models/%s" % model_type, model_type=model_type)
         p.train(self.load_passages() if mode == "train" else None, iterations=200)
         score = evaluation.Scores.aggregate([s for _, s in p.parse(self.load_passages(), evaluate=True)])
         scores.append(score.average_f1())
         print()
     print("-- average labeled f1: %.3f, %.3f\n" % tuple(scores))
     if compare:
         self.assertAlmostEqual(*scores)
     p.parse(convert.to_text(self.load_passages()[0]))
     self.assertFalse(list(p.parse(())))  # parsing nothing returns nothing
Exemplo n.º 12
0
def test_parser(config, model_type, formats, default_setting, text=True):
    filename = "test_files/models/%s_%s%s" % ("_".join(formats), model_type,
                                              default_setting.suffix())
    remove_existing(filename)
    config.update(default_setting.dict())
    scores = []
    params = []
    passages = list(map(load_passage, passage_files(*formats)))
    evaluate = ("amr" not in formats)
    for mode in "train", "load":
        print("-- %sing %s" % (mode, model_type))
        config.update(dict(classifier=model_type, copy_shared=None))
        p = Parser(model_files=filename, config=config)
        p.save_init = True
        list(
            p.train(passages if mode == "train" else None,
                    dev=passages,
                    test=True,
                    iterations=2))
        assert p.model.is_finalized, "Model should be finalized after %sing" % mode
        assert not getattr(p.model.feature_extractor, "node_dropout",
                           0), p.model.feature_extractor.node_dropout
        all_params = p.model.all_params()
        params.append(all_params)
        param1, param2 = [
            d.get("W") for d in (all_params, p.model.feature_extractor.params)
        ]
        if param1 is not None and param2 and param2.init is not None and not config.args.update_word_vectors:
            assert_allclose(param1,
                            weight_decay(p.model) * param2.init,
                            rtol=1e-6)
        text_results = results = list(p.parse(passages, evaluate=evaluate))
        if text:
            print("Converting to text and parsing...")
            text_results = list(
                p.parse([
                    p3 for p1 in passages
                    for p2 in convert.to_text(p1, sentences=False)
                    for p3 in convert.from_text(
                        p2, p1.ID, extra_format=p1.extra.get("format"))
                ]))
            assert len(results) == len(text_results)
        if evaluate:
            scores.append(Scores(tuple(zip(*results))[1]).average_f1())
            if text:
                for t, (r, s) in zip(text_results, results):
                    print("  %s F1=%.3f" % (r.ID, s.average_f1()))
        assert not list(p.parse(()))  # parsing nothing returns nothing
        print()
    assert_all_params_equal(*params)
    if evaluate:
        print("-- average f1: %.3f, %.3f\n" % tuple(scores))
        assert scores[0] == pytest.approx(scores[1], 0.1)
Exemplo n.º 13
0
    def __init__(self, model_prefix):
        """

        Parameters
        ----------
        """

        # the sys.argv assignment is a necessary: without it tupa.parse.Parser will throw exceptions
        remember_argv = sys.argv
        sys.argv = ['-m', model_prefix]

        parser = Parser(model_files=model_prefix)
        parser.models[0].load()
        parser.trained = True

        self.__parser = parser

        # Since 'parse_sentence' calls 'annotate_all' which lazily instantiated a spacey pipeline,
        # and since we want all the initialization to occur in the __init__ method, we simply call
        # 'parse_sentence' with dummy input
        self.parse_sentence('Hello dummy world')

        # undo hack side effect
        sys.argv = remember_argv
def get_ucca_parser():
    ucca_dir = RESOURCES_DIR / 'ucca'
    os.chdir(str(ucca_dir))
    model_path = ucca_dir / 'models/ucca-bilstm'
    vocab_path = ucca_dir / 'vocab'
    argv = ['script_name', '-m', str(model_path), '--vocab', str(vocab_path)]
    with unittest.mock.patch('sys.argv', argv):
        Config.reload()
        args = Config().args
    model_files = [
        base + '' + ext
        for base, ext in map(os.path.splitext, args.models or (
            args.classifier, ))
    ]
    return Parser(model_files=model_files, config=Config(), beam=1)