def evaluate(schema_name, use_gold_edu=False): pipeline = schemas.create_pipeline(schema_name) cdtb = dataset.load_cdtb_by_config() parses = [] if use_gold_edu: print("parsing with gold EDU") else: print("parsing with auto discourse segmenter") for gold in tqdm(cdtb.test, desc="parsing for evaluation"): if use_gold_edu: discourse = pipeline.annotate(gold.strip()) else: discourse = pipeline(gold.label, gold.text, gold.span[0], gold.span[1], gold.info) parses.append(discourse) metrics = CDTBMetrics(golds=cdtb.test, parses=parses) if not use_gold_edu: print(metrics.segmenter_report()) print(metrics.parser_report()) print(metrics.nuclear_report()) print(metrics.relation_report())
def main(): cdtb = dataset.load_cdtb_by_config() model = train(cdtb.train + cdtb.validate) model_dir = config.get("segmenter.svm", "model_dir") save(model, model_dir) evaluate(cdtb.test, model)
def main(): cdtb = dataset.load_cdtb_by_config() train(cdtb)