示例#1
0
    def __init__(self, conf_file='configurations.json'):
        self.annotators = OrderedDict()
        self.datasets = OrderedDict()
        self.experiments = OrderedDict()

        with open(conf_file, 'r') as inputfile:
            conf = json.load(inputfile)

            # Load annotators
            for annotator_conf in conf["annotators"]:
                nickname = annotator_conf["alias"]
                module_name = annotator_conf["name"]
                annotator = create_annotator(str(module_name))
                annotator.set_configuration(annotator_conf["configuration"])

                self.annotators[nickname] = annotator

            # Load datasets
            for dataset_conf in conf["datasets"]:
                self.datasets[dataset_conf["name"]] = \
                    Dataset.load_tsv(dataset_conf["file"])

            # Load experiments
            for exp_conf in conf["experiments"]:
                self.experiments[exp_conf["name"]] = \
                    create_experiment(str(exp_conf["name"]), exp_conf)
示例#2
0
def reshape_dataset(annotator, src_file_name, output):
    """
    Reshape dataset once and for all
    """
    print "Reading dataset from %s" % src_file_name
    needs_reshape = True

    if src_file_name.endswith(".pkl"):
        dataset = Dataset.load(src_file_name)
        needs_reshape = False
    else:
        dataset = AIDADataset.read(src_file_name)

    print "Dataset loaded: %s" % dataset

    if needs_reshape:
        print "Reshaping dataset using %s" % annotator
        annotator.reshape(dataset)

    if output.endswith(".pkl"):
        Dataset.save(dataset, output)
    elif output.endswith(".xml"):
        Dataset.save_xml(dataset, output)
    else:
        Dataset.save_tsv(dataset, output)

    print "Dataset successfully saved in %s" % output
示例#3
0
        micro_msg = "[micro P: %.3f R: %.3f F1: %.3f]" % (
            self.metrics.precision(), self.metrics.recall(), self.metrics.f1()
        )

        if self.metrics.has_macro():
            macro_msg = " [macro P: %.3f R: %.3f F1: %.3f]" % (
                self.metrics.macro_precision(),
                self.metrics.macro_recall(),
                self.metrics.macro_f1()
            )
        else:
            macro_msg = ""

        self.log.info("%s %s%s" % (count_msg, micro_msg, macro_msg))

if __name__ == "__main__":
    import sys
    from wikibench.dataset import Dataset
    from wikibench.utils import create_annotator, create_benchmark

    benchmark_name, annotator_name, dataset = sys.argv[1:4]

    dataset = Dataset.load(dataset)
    annotator = create_annotator(annotator_name)
    benchmark = create_benchmark(benchmark_name)

    benchmark.parse_arguments(sys.argv[4:])
    benchmark.run(dataset, annotator)
    benchmark.summary()
示例#4
0
    def __init__(self):
        parser = OptionParser()
        parser.add_option("-c", "--conf", dest="configuration",
                          help="Configuration file in json format",
                          metavar="FILE", default="configurations.json")
        parser.add_option("-s", "--strong", dest="strong_match", default=False,
                          action="store_true", help="Use strong mention match")
        parser.add_option("-b", "--best", dest="best", default=None,
                          help="Optimize metrics looking for best threshold")
        parser.add_option("--threshold", dest="threshold", default=0.0,
                          type="float", help="Threshold instances")
        parser.add_option("--optimize", dest="optimize",
                          default="macro_f1",
                          help="Target attribute to optimize")
        parser.add_option("--tablefmt", dest="tablefmt",
                          default="simple",
                          help="Format for the tables")
        parser.add_option("--recap", dest="recap", default=False,
                          action="store_true",
                          help="Report metrics for each document")
        parser.add_option("--detailed", dest="detailed", default=False,
                          action="store_true",
                          help="Print a detailed report for every document")

        (options, args) = parser.parse_args()

        self.print_recap = options.recap
        self.print_report = options.detailed
        self.use_strong_match = options.strong_match
        self.best = options.best
        self.threshold = options.threshold
        self.optimize = options.optimize
        self.tablefmt = options.tablefmt
        self.conf = conf = Configurations(options.configuration)

        if self.threshold > 0.0 and self.best is None:
            print "You did specify a threshold without best."
            self.threshold = 0.0

        for experiment_name, experiment in conf.experiments.items():
            rows = [[
                u"Dataset",
                u"Annotator",
                u"Total",
                u"TP",
                u"TN",
                u"FP",
                u"FN",
                u"μP",
                u"μR",
                u"μF1",
                u"P",
                u"R",
                u"F1",
            ]]

            if self.best or self.threshold:
                rows[-1].insert(2, "Threshold")
                rows[-1].insert(2, "Attr")

            for dataset_name, dataset in conf.datasets.items():
                for annotator_name, annotator in conf.annotators.items():
                    result_directory = os.path.join(
                        experiment.file, dataset_name, annotator_name
                    )
                    results = Dataset.load_results(result_directory)
                    t, m = self.report_for(experiment,
                                           results, dataset.instances)

                    rows.append([
                        dataset_name,
                        annotator_name,
                        m.tp + m.fn,
                        m.tp,
                        m.tn,
                        m.fp,
                        m.fn,
                        m.precision(),
                        m.recall(),
                        m.f1(),
                        m.macro_precision(),
                        m.macro_recall(),
                        m.macro_f1()
                    ])

                    if self.best or self.threshold:
                        rows[-1].insert(2, t)
                        rows[-1].insert(2, self.best)

            print tabulate(rows, headers="firstrow",
                           floatfmt=".3f", tablefmt=self.tablefmt)