def extract_labels(self, example_collection: ExampleCollection): example_wrappers_sp = example_collection.get_example_wrappers_sp() example_wrappers_clausedb = example_collection.get_example_wrappers_clausedb( ) keys_of_unlabeled_examples = set() label_distribution = {} for ex_index, clause_db_ex in enumerate( example_wrappers_clausedb ): # type: Tuple[int, ClauseDBExampleWrapper] if clause_db_ex.classification_term is not None: label = clause_db_ex.classification_term.args[ self.index_of_label_var] self.labels.add(label) clause_db_ex.label = label if label in label_distribution.keys(): label_distribution[label] = label_distribution[label] + 1 else: label_distribution[label] = 1 else: # TODO: update this to the probabilistic way of querying query_results = self.engine.query(clause_db_ex.logic_program, self.predicate_to_query) if len(query_results) is 0: example_str = "" for ex_statement in clause_db_ex: print(ex_statement) print("--") keys_of_unlabeled_examples.add(clause_db_ex.get_key()) # raise Exception("Querying the predicate", self.predicate_to_query, "on the example gives no results. Example: \n", example_str) for answer in query_results: label = answer[self.index_of_label_var] self.labels.add(label) clause_db_ex.label = label if label in label_distribution.keys(): label_distribution[ label] = label_distribution[label] + 1 else: label_distribution[label] = 1 # -------------------------------- example_wrappers_sp[ex_index].label = clause_db_ex.label # --------------------------------------------- # set flags example_collection.are_sp_examples_labeled = True example_collection.are_clausedb_examples_labeled = True nb_examples = len(example_wrappers_clausedb) print("nb of examples: " + str(nb_examples)) nb_unlabeled_examples = len(keys_of_unlabeled_examples) print("nb of unlabeled examples: " + str(nb_unlabeled_examples)) print("nb of labeled examples: " + str(nb_examples - nb_unlabeled_examples)) print("indexes of unlabeled examples:") print(keys_of_unlabeled_examples) print("label distribution:") print(label_distribution) return keys_of_unlabeled_examples
def extract_labels(self, example_collection: ExampleCollection): for example_wrapper in example_collection.get_example_wrappers_sp(): self.extract_label(example_wrapper) example_collection.are_sp_examples_labeled = True