示例#1
0
    def extract_labels(self, example_collection: ExampleCollection):
        example_wrappers_sp = example_collection.get_example_wrappers_sp()
        example_wrappers_clausedb = example_collection.get_example_wrappers_clausedb(
        )

        keys_of_unlabeled_examples = set()
        label_distribution = {}

        for ex_index, clause_db_ex in enumerate(
                example_wrappers_clausedb
        ):  # type: Tuple[int, ClauseDBExampleWrapper]
            if clause_db_ex.classification_term is not None:
                label = clause_db_ex.classification_term.args[
                    self.index_of_label_var]
                self.labels.add(label)
                clause_db_ex.label = label

                if label in label_distribution.keys():
                    label_distribution[label] = label_distribution[label] + 1
                else:
                    label_distribution[label] = 1
            else:
                # TODO: update this to the probabilistic way of querying
                query_results = self.engine.query(clause_db_ex.logic_program,
                                                  self.predicate_to_query)
                if len(query_results) is 0:
                    example_str = ""
                    for ex_statement in clause_db_ex:
                        print(ex_statement)
                    print("--")
                    keys_of_unlabeled_examples.add(clause_db_ex.get_key())
                    # raise Exception("Querying the predicate", self.predicate_to_query, "on the example gives no results. Example: \n", example_str)
                for answer in query_results:
                    label = answer[self.index_of_label_var]
                    self.labels.add(label)
                    clause_db_ex.label = label

                    if label in label_distribution.keys():
                        label_distribution[
                            label] = label_distribution[label] + 1
                    else:
                        label_distribution[label] = 1
            # --------------------------------
            example_wrappers_sp[ex_index].label = clause_db_ex.label
        # ---------------------------------------------
        # set flags
        example_collection.are_sp_examples_labeled = True
        example_collection.are_clausedb_examples_labeled = True

        nb_examples = len(example_wrappers_clausedb)
        print("nb of examples: " + str(nb_examples))
        nb_unlabeled_examples = len(keys_of_unlabeled_examples)
        print("nb of unlabeled examples: " + str(nb_unlabeled_examples))
        print("nb of labeled examples: " +
              str(nb_examples - nb_unlabeled_examples))
        print("indexes of unlabeled examples:")
        print(keys_of_unlabeled_examples)
        print("label distribution:")
        print(label_distribution)

        return keys_of_unlabeled_examples
示例#2
0
 def extract_labels(self, example_collection: ExampleCollection):
     for example_wrapper in example_collection.get_example_wrappers_sp():
         self.extract_label(example_wrapper)
     example_collection.are_sp_examples_labeled = True