Пример #1
0
def evaluate(schema_name, use_gold_edu=False):
    pipeline = schemas.create_pipeline(schema_name)
    cdtb = dataset.load_cdtb_by_config()
    parses = []
    if use_gold_edu:
        print("parsing with gold EDU")
    else:
        print("parsing with auto discourse segmenter")
    for gold in tqdm(cdtb.test, desc="parsing for evaluation"):
        if use_gold_edu:
            discourse = pipeline.annotate(gold.strip())
        else:
            discourse = pipeline(gold.label, gold.text, gold.span[0],
                                 gold.span[1], gold.info)
        parses.append(discourse)
    metrics = CDTBMetrics(golds=cdtb.test, parses=parses)
    if not use_gold_edu:
        print(metrics.segmenter_report())
    print(metrics.parser_report())
    print(metrics.nuclear_report())
    print(metrics.relation_report())
Пример #2
0
def main():
    cdtb = dataset.load_cdtb_by_config()
    model = train(cdtb.train + cdtb.validate)
    model_dir = config.get("segmenter.svm", "model_dir")
    save(model, model_dir)
    evaluate(cdtb.test, model)
Пример #3
0
def main():
    cdtb = dataset.load_cdtb_by_config()
    train(cdtb)