def test_every_evidence_without_label_is_a_question(self):
     c = ActiveLearningCore(self.relation, self.lbl_evs([None]*3))
     self.assertEqual(len(c.questions), 3)
     c = ActiveLearningCore(self.relation, self.lbl_evs([False, True, None]))
     self.assertEqual(len(c.questions), 1)
     c = ActiveLearningCore(self.relation, self.lbl_evs([None, True, None]))
     self.assertEqual(len(c.questions), 2)
Пример #2
0
def _load_extractor(opts, relation, labeled_evidences):
    extractor_path = opts.get('--trained-extractor')
    try:
        iextractor = ActiveLearningCore.load(
            extractor_path, labeled_evidences=labeled_evidences)
    except ValueError:
        print("Error: unable to load extractor, invalid file")
        exit(1)

    if iextractor.relation != relation:
        print('The loaded extractor is not for the requested relation'
              ' but for relation {} instead'.format(iextractor.relation))
        exit(1)
    print('Extractor successfully loaded')
    return iextractor
Пример #3
0
def _construct_extractor(opts, relation, labeled_evidences, tuning_mode):
    config_filepath = opts.get("--extractor-config")
    if not config_filepath:
        config_filepath = os.path.join(INSTANCE_PATH, "extractor_config.json")

    if not os.path.exists(config_filepath):
        print("Error: extractor config does not exists, please create the "
              "file extractor_config.json or use the --extractor-config")
        exit(1)

    with open(config_filepath) as filehandler:
        try:
            extractor_config = json.load(filehandler)
        except Exception as error:
            print("Error: unable to load extractor config: {}".format(error))
            exit(1)

    iextractor = ActiveLearningCore(
        relation, labeled_evidences, extractor_config, tradeoff=tuning_mode
    )
    return iextractor
Пример #4
0
    def __call__(self, config):
        if u"class_weight" in config[u"classifier_args"]:
            d = config[u"classifier_args"][u"class_weight"]
            assert "true" in d and "false" in d and len(d) == 2
            config[u"classifier_args"][u"class_weight"] = {
                True: d["true"],
                False: d["false"]
            }

        # Prepare data
        if self.data is None or self.relname != config["relation"]:
            relation = iepy.data.models.Relation.objects.get(
                name=config["relation"])
            c_evidences = CEM.candidates_for_relation(relation)
            self.data = CEM.labels_for(relation, c_evidences,
                                       CEM.conflict_resolution_newest_wins)
            self.data = [(x, label) for x, label in self.data.items()
                         if label is not None]
            self.relname = config["relation"]
        data = self.data
        testset = {x: label for x, label in data}
        candidate_evidences = {x: None for x, _ in data}
        if not data:
            raise NotEnoughData("There is no labeled data for training")
        oracle_answers = config["oracle_answers"]
        N = len(data)
        M = N - oracle_answers  # test set size
        if M / N < 0.1:  # if there ir less than 10% left for testing
            raise NotEnoughData("There is not enough data for evaluation")

        result = {
            "train_size": oracle_answers,
            "test_size": M,
            "dataset_size": N,
            "start_time": time.time(),
        }

        # Interact with oracle
        alcore = ActiveLearningCore(config["relation"],
                                    candidate_evidences,
                                    extractor_config=config,
                                    performance_tradeoff=config["tradeoff"])
        alcore.start()
        # ^ Is acainst creenhouse emissions
        for _ in range(oracle_answers):
            q = alcore.questions[0]
            alcore.add_answer(q, testset[q])
            del testset[q]  # Once given for training cannot be part of testset
            alcore.process()

        test_evidences, test_labels = zip(*list(testset.items()))
        extractor = alcore.relation_classifier

        # Evaluate prediction
        predicted_dict = alcore.predict()
        test_evidences = list(testset)
        test_labels = [testset[x] for x in test_evidences]
        predicted_labels = [predicted_dict[x] for x in test_evidences]
        result.update(
            result_dict_from_predictions(test_evidences, test_labels,
                                         predicted_labels))

        # Evaluate ranking
        predicted_scores = extractor.decision_function(test_evidences)
        auroc = roc_auc_score(test_labels, predicted_scores)
        avgprec = average_precision_score(test_labels, predicted_scores)

        result.update({
            "auROC": auroc,
            "average_precision": avgprec,
        })
        return result
Пример #5
0
    try:
        relation = Relation.objects.get(name=relation)
    except Relation.DoesNotExist:
        print("Relation {!r} non existent".format(relation))
        print_all_relations()
        exit(1)

    extractor_config = opts.get("--extractor-config")
    if extractor_config:
        with open(extractor_config) as filehandler:
            extractor_config = json.load(filehandler)

    candidates = CandidateEvidenceManager.candidates_for_relation(relation)
    labeled_evidences = load_labeled_evidences(relation, candidates)
    iextractor = ActiveLearningCore(relation, labeled_evidences, extractor_config,
                                    performance_tradeoff=tuning_mode)
    iextractor.start()

    STOP = u'STOP'
    term = TerminalAdministration(relation,
                                  extra_options=[(STOP, u'Stop execution')])
    was_ever_trained = False
    while iextractor.questions:
        questions = list(iextractor.questions)  # copying the list
        term.update_candidate_evidences_to_label(questions)
        result = term()
        i = 0
        for c, label_value in load_labeled_evidences(relation, questions).items():
            if label_value is not None:
                iextractor.add_answer(c, label_value)
                i += 1
Пример #6
0
    try:
        relation = Relation.objects.get(name=relation)
    except Relation.DoesNotExist:
        print("Relation {!r} non existent".format(relation))
        print_all_relations()
        exit(1)

    extractor_config = opts.get("--extractor-config")
    if extractor_config:
        with open(extractor_config) as filehandler:
            extractor_config = json.load(filehandler)

    candidates = CandidateEvidenceManager.candidates_for_relation(relation)
    labeled_evidences = load_labeled_evidences(relation, candidates)
    iextractor = ActiveLearningCore(relation, labeled_evidences,
                                    extractor_config)
    iextractor.start()

    STOP = u'STOP'
    term = TerminalAdministration(relation,
                                  extra_options=[(STOP, u'Stop execution ASAP')
                                                 ])

    while iextractor.questions:
        questions = list(iextractor.questions)  # copying the list
        term.update_candidate_evidences_to_label(questions)
        result = term()
        if result == STOP:
            break

        i = 0
Пример #7
0
def run_from_command_line():
    opts = docopt(__doc__, version=iepy.__version__)
    relation = opts['<relation_name>']
    classifier_path = opts.get('--classifier')

    logging.basicConfig(level=logging.INFO, format='%(message)s')
    logging.getLogger("featureforge").setLevel(logging.WARN)

    if opts['--tune-for'] == 'high-prec':
        tuning_mode = HIPREC
    elif opts['--tune-for'] == 'high-recall':
        tuning_mode = HIREC
    else:
        print ('Invalid tuning mode')
        print (__doc__)
        exit(1)

    try:
        relation = Relation.objects.get(name=relation)
    except Relation.DoesNotExist:
        print("Relation {!r} non existent".format(relation))
        print_all_relations()
        exit(1)

    candidates = CandidateEvidenceManager.candidates_for_relation(relation)
    labeled_evidences = load_labeled_evidences(relation, candidates)

    if classifier_path:
        try:
            loaded_classifier = output.load_classifier(classifier_path)
        except ValueError:
            print("Error: unable to load classifier, invalid file")
            exit(1)

        iextractor = ActiveLearningCore(
            relation, labeled_evidences, performance_tradeoff=tuning_mode,
            classifier=loaded_classifier
        )
        was_ever_trained = True
    else:
        extractor_config = opts.get("--extractor-config")
        if extractor_config:
            with open(extractor_config) as filehandler:
                extractor_config = json.load(filehandler)

        iextractor = ActiveLearningCore(
            relation, labeled_evidences, extractor_config,
            performance_tradeoff=tuning_mode
        )
        iextractor.start()
        was_ever_trained = False


    if not opts.get("--no-questions", False):
        questions_loop(iextractor, relation, was_ever_trained)

    # Predict and store output
    predictions = iextractor.predict()
    if predictions:
        output.dump_output_loop(predictions)
        output.dump_classifier_loop(iextractor)
Пример #8
0
 def setUp(self):
     super().setUp()
     self.c = ActiveLearningCore(self.relation, self.lbl_evs([None]*3))
     patcher = mock.patch.object(self.c, 'train_relation_classifier')
     self.mock_train_classifier = patcher.start()
     self.addCleanup(patcher.stop)
Пример #9
0
class TestProcess(ActiveLearningTestMixin, ManagerTestCase):

    def setUp(self):
        super().setUp()
        self.c = ActiveLearningCore(self.relation, self.lbl_evs([None]*3))
        patcher = mock.patch.object(self.c, 'train_relation_classifier')
        self.mock_train_classifier = patcher.start()
        self.addCleanup(patcher.stop)

    def test_process_with_no_available_labels_does_nothing(self):
        self.c.process()
        self.assertFalse(self.mock_train_classifier.called)

    def test_process_with_not_both_labels_does_nothing(self):
        # by "both", we mean True and False
        self.c.add_answer(self.ev1, True)
        self.c.process()
        self.assertFalse(self.mock_train_classifier.called)
        self.c.add_answer(self.ev2, True)
        self.c.process()
        self.assertFalse(self.mock_train_classifier.called)
        self.c.add_answer(self.ev3, False)
        self.c.process()
        self.assertTrue(self.mock_train_classifier.called)

    def test_more_than_binary_labels_is_raise(self):
        self.c.add_answer(self.ev1, True)
        self.c.add_answer(self.ev2, False)
        self.c.add_answer(self.ev3, False)
        self.c.labeled_evidence[self.ev3] = 'weird thing'
        self.assertRaises(ValueError, self.c.process)
        self.assertFalse(self.mock_train_classifier.called)
Пример #10
0
 def test_every_question_answered_is_not_a_question_any_more(self):
     c = ActiveLearningCore(self.relation, self.lbl_evs([None]*3))
     c.add_answer(self.ev1, False)
     self.assertEqual(len(c.questions), 2)
     self.assertNotIn(self.ev1, c.questions)
 def setUp(self):
     super().setUp()
     self.c = ActiveLearningCore(self.relation, self.lbl_evs([None]*3))
     patcher = mock.patch.object(self.c, 'train_relation_classifier')
     self.mock_train_classifier = patcher.start()
     self.addCleanup(patcher.stop)
class TestProcess(ActiveLearningTestMixin, ManagerTestCase):

    def setUp(self):
        super().setUp()
        self.c = ActiveLearningCore(self.relation, self.lbl_evs([None]*3))
        patcher = mock.patch.object(self.c, 'train_relation_classifier')
        self.mock_train_classifier = patcher.start()
        self.addCleanup(patcher.stop)

    def test_process_with_no_available_labels_does_nothing(self):
        self.c.process()
        self.assertFalse(self.mock_train_classifier.called)

    def test_process_with_not_both_labels_does_nothing(self):
        # by "both", we mean True and False
        self.c.add_answer(self.ev1, True)
        self.c.process()
        self.assertFalse(self.mock_train_classifier.called)
        self.c.add_answer(self.ev2, True)
        self.c.process()
        self.assertFalse(self.mock_train_classifier.called)
        self.c.add_answer(self.ev3, False)
        self.c.process()
        self.assertTrue(self.mock_train_classifier.called)

    def test_more_than_binary_labels_is_raise(self):
        self.c.add_answer(self.ev1, True)
        self.c.add_answer(self.ev2, False)
        self.c.add_answer(self.ev3, False)
        self.c.labeled_evidence[self.ev3] = 'weird thing'
        self.assertRaises(ValueError, self.c.process)
        self.assertFalse(self.mock_train_classifier.called)
 def test_every_question_answered_is_not_a_question_any_more(self):
     c = ActiveLearningCore(self.relation, self.lbl_evs([None]*3))
     c.add_answer(self.ev1, False)
     self.assertEqual(len(c.questions), 2)
     self.assertNotIn(self.ev1, c.questions)
Пример #14
0
def run_from_command_line():
    opts = docopt(__doc__, version=iepy.__version__)
    relation = opts['<relation_name>']
    classifier_path = opts.get('--classifier')

    logging.basicConfig(level=logging.INFO, format='%(message)s')
    logging.getLogger("featureforge").setLevel(logging.WARN)

    if opts['--tune-for'] == 'high-prec':
        tuning_mode = HIPREC
    elif opts['--tune-for'] == 'high-recall':
        tuning_mode = HIREC
    else:
        print('Invalid tuning mode')
        print(__doc__)
        exit(1)

    try:
        relation = Relation.objects.get(name=relation)
    except Relation.DoesNotExist:
        print("Relation {!r} non existent".format(relation))
        print_all_relations()
        exit(1)

    candidates = CandidateEvidenceManager.candidates_for_relation(relation)
    labeled_evidences = load_labeled_evidences(relation, candidates)

    if classifier_path:
        try:
            loaded_classifier = output.load_classifier(classifier_path)
        except ValueError:
            print("Error: unable to load classifier, invalid file")
            exit(1)

        iextractor = ActiveLearningCore(relation,
                                        labeled_evidences,
                                        performance_tradeoff=tuning_mode,
                                        classifier=loaded_classifier)
        was_ever_trained = True
    else:
        config_filepath = opts.get("--extractor-config")
        if not config_filepath:
            config_filepath = os.path.join(INSTANCE_PATH,
                                           "extractor_config.json")

        if not os.path.exists(config_filepath):
            print("Error: extractor config does not exists, please create the "
                  "file extractor_config.json or use the --extractor-config")
            exit(1)

        with open(config_filepath) as filehandler:
            try:
                extractor_config = json.load(filehandler)
            except Exception as error:
                print(
                    "Error: unable to load extractor config: {}".format(error))
                exit(1)

        iextractor = ActiveLearningCore(relation,
                                        labeled_evidences,
                                        extractor_config,
                                        performance_tradeoff=tuning_mode)
        iextractor.start()
        was_ever_trained = False

    if not opts.get("--no-questions", False):
        questions_loop(iextractor, relation, was_ever_trained)

    # Predict and store output
    predictions = iextractor.predict()
    if predictions:
        output.dump_output_loop(predictions)
        output.dump_classifier_loop(iextractor)