예제 #1
0
파일: convert.py 프로젝트: nopper/wikibench
def reshape_dataset(annotator, src_file_name, output):
    """
    Reshape dataset once and for all
    """
    print "Reading dataset from %s" % src_file_name
    needs_reshape = True

    if src_file_name.endswith(".pkl"):
        dataset = Dataset.load(src_file_name)
        needs_reshape = False
    else:
        dataset = AIDADataset.read(src_file_name)

    print "Dataset loaded: %s" % dataset

    if needs_reshape:
        print "Reshaping dataset using %s" % annotator
        annotator.reshape(dataset)

    if output.endswith(".pkl"):
        Dataset.save(dataset, output)
    elif output.endswith(".xml"):
        Dataset.save_xml(dataset, output)
    else:
        Dataset.save_tsv(dataset, output)

    print "Dataset successfully saved in %s" % output
예제 #2
0
        micro_msg = "[micro P: %.3f R: %.3f F1: %.3f]" % (
            self.metrics.precision(), self.metrics.recall(), self.metrics.f1()
        )

        if self.metrics.has_macro():
            macro_msg = " [macro P: %.3f R: %.3f F1: %.3f]" % (
                self.metrics.macro_precision(),
                self.metrics.macro_recall(),
                self.metrics.macro_f1()
            )
        else:
            macro_msg = ""

        self.log.info("%s %s%s" % (count_msg, micro_msg, macro_msg))

if __name__ == "__main__":
    import sys
    from wikibench.dataset import Dataset
    from wikibench.utils import create_annotator, create_benchmark

    benchmark_name, annotator_name, dataset = sys.argv[1:4]

    dataset = Dataset.load(dataset)
    annotator = create_annotator(annotator_name)
    benchmark = create_benchmark(benchmark_name)

    benchmark.parse_arguments(sys.argv[4:])
    benchmark.run(dataset, annotator)
    benchmark.summary()