def test_match_run_on_every_rule(self): mocked_rules = [rule(True)(mock.MagicMock(return_value=Token("asd"))) ] * 10 pipeline = RuleBasedCore(self.person_date_relation, self._candidates, mocked_rules) pipeline.start() for mock_rule in mocked_rules: self.assertTrue(mock_rule.called) Subject, Object = mock_rule.call_args[0] self.assertIsInstance(Subject, Pattern)
def test_rule_that_not_matches(self): @rule(True) def test_rule(Subject, Object): return Subject + Object + Token("something here") pipeline = RuleBasedCore(self.person_date_relation, self._candidates, [test_rule]) pipeline.start() facts = pipeline.known_facts() candidate = self._candidates[0] self.assertFalse(facts[candidate])
def test_rule_with_negative_answer(self): @rule(False) def test_rule(Subject, Object): anything = Question(Star(Any())) return Subject + Token("(") + Object + Token("-") + anything pipeline = RuleBasedCore(self.person_date_relation, self._candidates, [test_rule]) pipeline.start() facts = pipeline.known_facts() candidate = self._candidates[0] self.assertFalse(facts[candidate])
def test_rule_that_matches(self): @rule(True) def test_rule(Subject, Object): anything = Question(Star(Any())) return Subject + Token("(") + Object + Token("-") + anything pipeline = RuleBasedCore(self.person_date_relation, self._candidates, [test_rule]) pipeline.start() facts = pipeline.known_facts() candidate = self._candidates[0] self.assertTrue(facts[candidate])
def __call__(self, config): # Prepare data if self.data is None or self.relname != config["relation"]: self.relname = config["relation"] self.relation = iepy.data.models.Relation.objects.get( name=config["relation"]) candidates = CEM.candidates_for_relation(self.relation) self.data = CEM.labels_for(self.relation, candidates, CEM.conflict_resolution_newest_wins) self.evidences = [] self.labels = [] for evidence, label in self.data.items(): if label is not None: self.labels.append(label) self.evidences.append(evidence) if not self.data: raise NotEnoughLabeledData( "There is no labeled data for training!") result = { "dataset_size": len(self.data), "start_time": time.time(), } # Load rules in the config if config["rules"] == "<all>": rules = self.rules.values() else: for rule_name in config["rules"]: if rule_name not in self.rules.keys(): raise RuleNotFound(rule_name) rules = [ rule for rule_name, rule in self.rules.items() if rule_name in config["rules"] ] # Run the rule based pipeline pipeline = RuleBasedCore(self.relation, self.evidences, rules) pipeline.start() matched = pipeline.known_facts() predicted_labels = [e in matched for e in self.evidences] # Evaluate prediction result.update( result_dict_from_predictions(self.evidences, self.labels, predicted_labels)) return result
def test_rule_priority(self): matcher = lambda *args: True not_matcher = lambda *args: None rule_should_run = rule(True, priority=1)( mock.MagicMock(return_value=matcher)) rule_should_not_run = rule(True, priority=0)( mock.MagicMock(return_value=not_matcher)) pipeline = RuleBasedCore(self.person_date_relation, self._candidates, [rule_should_not_run, rule_should_run]) pipeline.start() # All rules are compiled on start self.assertTrue(rule_should_run.called) self.assertTrue(rule_should_not_run.called) pipeline.process() import refo with mock.patch.object(refo, 'match') as fake_refo_match: fake_refo_match.side_effect = lambda regex, evidence: regex() pipeline.predict() self.assertEqual(fake_refo_match.call_count, len(self._candidates)) # check that on every call, the called is rule_match for c_args in fake_refo_match.call_args_list: args, kwargs = c_args self.assertEqual(args[0], matcher)
def test_rule_priority(self): def rule_match(Subject, Object): anything = Question(Star(Any())) return Subject + Token("(") + Object + Token("-") + anything rule_should_run = rule(True, priority=1)( mock.MagicMock(side_effect=rule_match)) rule_should_not_run = rule(True, priority=0)( mock.MagicMock(side_effect=rule_match)) pipeline = RuleBasedCore(self.person_date_relation, self._candidates, [rule_should_not_run, rule_should_run]) pipeline.start() self.assertTrue(rule_should_run.called) self.assertFalse(rule_should_not_run.called)
def test_rule_priority(self): matcher = lambda *args: True not_matcher = lambda *args: None rule_should_run = rule(True, priority=1)(mock.MagicMock(return_value=matcher)) rule_should_not_run = rule(True, priority=0)( mock.MagicMock(return_value=not_matcher)) pipeline = RuleBasedCore(self.person_date_relation, [rule_should_not_run, rule_should_run]) pipeline.start() # All rules are compiled on start self.assertTrue(rule_should_run.called) self.assertTrue(rule_should_not_run.called) pipeline.process() import refo with mock.patch.object(refo, 'match') as fake_refo_match: fake_refo_match.side_effect = lambda regex, evidence: regex() pipeline.predict(self._candidates) self.assertEqual(fake_refo_match.call_count, len(self._candidates)) # check that on every call, the called is rule_match for c_args in fake_refo_match.call_args_list: args, kwargs = c_args self.assertEqual(args[0], matcher)
def test_empty_rules(self): pipeline = RuleBasedCore(self.person_date_relation, self._candidates, []) pipeline.start() pipeline.process() facts = pipeline.predict() self.assertEqual(len([x for x in facts if facts[x]]), 0)
def run_from_command_line(): logging.basicConfig(level=logging.INFO, format='%(message)s') try: relation_name = iepy.instance.rules.RELATION except AttributeError: logging.error("RELATION not defined in rules file") sys.exit(1) try: relation = models.Relation.objects.get(name=relation_name) except ObjectDoesNotExist: logging.error("Relation {!r} not found".format(relation_name)) sys.exit(1) # Load rules rules = load_rules() # Load evidences evidences = CandidateEvidenceManager.candidates_for_relation(relation) # Run the pipeline iextractor = RuleBasedCore(relation, evidences, rules) iextractor.start() iextractor.process() predictions = iextractor.predict() output.dump_output_loop(predictions)
def run_from_command_line(): logging.basicConfig(level=logging.INFO, format='%(message)s') try: relation_name = iepy.instance.rules.RELATION except AttributeError: logging.error("RELATION not defined in rules file") sys.exit(1) try: relation = models.Relation.objects.get(name=relation_name) except ObjectDoesNotExist: logging.error("Relation {!r} not found".format(relation_name)) sys.exit(1) # Load rules rules = load_rules() # Load evidences evidences = CandidateEvidenceManager.candidates_for_relation(relation) # Run the pipeline iextractor = RuleBasedCore(relation, rules) iextractor.start() iextractor.process() predictions = iextractor.predict(evidences) output.dump_output_loop(predictions)
def test_match_run_on_every_rule(self): mocked_rules = [ rule(True)(mock.MagicMock(return_value=Token("asd"))) ] * 10 pipeline = RuleBasedCore(self.person_date_relation, mocked_rules) pipeline.start() pipeline.process() pipeline.predict(self._candidates) for mock_rule in mocked_rules: self.assertTrue(mock_rule.called) Subject, Object = mock_rule.call_args[0] self.assertIsInstance(Subject, Pattern)
def test_rule_with_negative_answer(self): @rule(False) def test_rule(Subject, Object): anything = Question(Star(Any())) return Subject + Token("(") + Object + Token("-") + anything pipeline = RuleBasedCore(self.person_date_relation, [test_rule]) pipeline.start() pipeline.process() facts = pipeline.predict(self._candidates) candidate = self._candidates[0] self.assertFalse(facts[candidate])
def test_rule_that_not_matches(self): @rule(True) def test_rule(Subject, Object): return Subject + Object + Token("something here") pipeline = RuleBasedCore(self.person_date_relation, [test_rule]) pipeline.start() pipeline.process() facts = pipeline.predict(self._candidates) candidate = self._candidates[0] self.assertFalse(facts[candidate])
def test_rule_priority(self): def rule_match(Subject, Object): anything = Question(Star(Any())) return Subject + Token("(") + Object + Token("-") + anything rule_should_run = rule(True, priority=1)(mock.MagicMock(side_effect=rule_match)) rule_should_not_run = rule(True, priority=0)(mock.MagicMock(side_effect=rule_match)) pipeline = RuleBasedCore(self.person_date_relation, self._candidates, [rule_should_not_run, rule_should_run]) pipeline.start() pipeline.process() pipeline.predict() self.assertTrue(rule_should_run.called) self.assertFalse(rule_should_not_run.called)
def test_rule_that_matches(self): @rule(True) def test_rule(Subject, Object): anything = Question(Star(Any())) return Subject + Token("(") + Object + Token("-") + anything pipeline = RuleBasedCore(self.person_date_relation, [test_rule]) pipeline.start() pipeline.process() facts = pipeline.predict(self._candidates) candidate = self._candidates[0] self.assertTrue(facts[candidate])
if __name__ == u'__main__': logging.basicConfig(level=logging.INFO, format='%(message)s') opts = docopt(__doc__, version=iepy.__version__) try: relation = rules.RELATION except AttributeError: logging.error("RELATION not defined in rules file") sys.exit(1) relation = models.Relation.objects.get(name=rules.RELATION) # Load rules rules = [] for attr_name in dir(rules): attr = getattr(rules, attr_name) if hasattr(attr, '__call__'): # is callable if hasattr(attr, "is_rule") and attr.is_rule: rules.append(attr) # Load evidences evidences = CandidateEvidenceManager.candidates_for_relation(relation) # Run the pipeline iextractor = RuleBasedCore(relation, evidences, rules) iextractor.start() iextractor.process() predictions = iextractor.predict() output.dump_output_loop(predictions)
def test_empty_rules(self): pipeline = RuleBasedCore(self.person_date_relation, self._candidates, []) pipeline.start() facts = pipeline.known_facts() self.assertEqual(len(facts), 0)
from iepy.extraction.rules_core import RuleBasedCore from iepy.data import models from iepy.data.db import CandidateEvidenceManager import rules if __name__ == u'__main__': logging.basicConfig( level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") relation = models.Relation.objects.get(name=rules.RELATION) # Load rules rules = [] for attr_name in dir(rules): attr = getattr(rules, attr_name) if hasattr(attr, '__call__'): # is callable if hasattr(attr, "is_rule") and attr.is_rule: rules.append(attr) # Load evidences evidences = CandidateEvidenceManager.candidates_for_relation(relation) # Run the pipeline iextractor = RuleBasedCore(relation, evidences, rules) iextractor.start() facts = iextractor.known_facts() print(facts)