def generate_wrappers(self, url): wrapper_manager = WrapperGateway() example_manager = ExampleGateway(max_examples=self.max_examples, max_examples_from_db= self.max_examples_from_db, seconds_between_requests= self.secs_between_reqs) example_sets = example_manager.get_examples(self.wrapper_gen_examples, url, self.min_validity) rulers = [] for set in example_sets: log.info('Starting wrapper training for set "%s"' % set) #@UndefinedVariable if set == 'author' or set == 'editor': rulers = [MultiValuePathRuler(), SeparatorsRegexRuler(), ElementsRegexRuler(), PersonRuler()] else: try: value_guide = self.value_guides[set] pass except KeyError: value_guide = '.*' rulers = [PathRuler(value_guide), RegexRuler()] trainer = WrapperTrainer(rulers, self.wrapper_gen_examples) try: wrappers = trainer.train(example_sets[set]) wrappers = self._prune_wrappers(wrappers) wrapper_manager.persist_wrappers(url, set, wrappers) log.info('Trainer generated %d wrappers' % len(wrappers)) #@UndefinedVariable except Exception, e: log.error('Error training wrapper for set "%s": %s' % (set, e)) #@UndefinedVariable
def setUp(self): self.wt = WrapperTrainer([MockRuler()], num_examples=3) self.nsets = 5 self.example_sets = MockExampleManager()._get_examples( self.nsets, 5, 10) self.example_set = [ Example('v01', 'c01'), Example('v02', 'c02'), Example('v03', 'c03') ]
def setUp(self): self.wt = WrapperTrainer([MockRuler()], num_examples=3) self.nsets = 5 self.example_sets = MockExampleManager()._get_examples(self.nsets, 5, 10) self.example_set = [Example('v01', 'c01'), Example('v02', 'c02'), Example('v03', 'c03')]
def generate_wrappers(self, url): wrapper_manager = WrapperGateway() example_manager = ExampleGateway( max_examples=self.max_examples, max_examples_from_db=self.max_examples_from_db, seconds_between_requests=self.secs_between_reqs) example_sets = example_manager.get_examples(self.wrapper_gen_examples, url, self.min_validity) rulers = [] for set in example_sets: log.info('Starting wrapper training for set "%s"' % set) #@UndefinedVariable if set == 'author' or set == 'editor': rulers = [ MultiValuePathRuler(), SeparatorsRegexRuler(), ElementsRegexRuler(), PersonRuler() ] else: try: value_guide = self.value_guides[set] pass except KeyError: value_guide = '.*' rulers = [PathRuler(value_guide), RegexRuler()] trainer = WrapperTrainer(rulers, self.wrapper_gen_examples) try: wrappers = trainer.train(example_sets[set]) wrappers = self._prune_wrappers(wrappers) wrapper_manager.persist_wrappers(url, set, wrappers) log.info('Trainer generated %d wrappers' % len(wrappers)) #@UndefinedVariable except Exception, e: log.error('Error training wrapper for set "%s": %s' % (set, e)) #@UndefinedVariable
class TestWrapperTrainer(unittest.TestCase): def setUp(self): self.wt = WrapperTrainer([MockRuler()], num_examples=3) self.nsets = 5 self.example_sets = MockExampleManager()._get_examples( self.nsets, 5, 10) self.example_set = [ Example('v01', 'c01'), Example('v02', 'c02'), Example('v03', 'c03') ] def test_train_too_few_examples(self): self.wt.set_num_examples(20) result = self.wt.train(self.example_sets) self.failUnless(len(result) == 0) def test_train(self): self.wt.rulers = [MockRuler(), MockRuler()] wrappers = self.wt.train(self.example_sets['a']) self.failUnless(len(wrappers) == 4) self.failUnless(len(wrappers[0].rules) == 2) self.failUnless(wrappers[0].downvotes > 0) def test_get_new_example_set(self): example_set = self.wt._get_new_example_set(MockRule('xx'), self.example_set) self.failUnless(len(example_set) == 3) self.failUnless(example_set[0].content == 'c01_rxx') def test_get_rule_sets(self): rules = self.wt._get_rule_sets([MockRuler(), MockRuler()], self.example_set) self.failUnless(len(rules) == 4) length_bools = map(lambda x: len(x) == 2, rules) self.failUnless(length_bools.count(False) == 0) def test_evaluate_single_value_wrapper(self): info = 'In Germany' value = 'Germany' result = self.wt._evaluate_single_value_wrapper(info, value) self.failUnless(result == True) info = 'Germany' value = 'In Germany' result = self.wt._evaluate_single_value_wrapper(info, value) self.failUnless(result == False) def test_evaluate_multi_value_wrapper(self): info_list = ['Alberto Del Angel', 'Pierre Geurts', 'Damien Ernst'] values = [ '(Alberto.*Angel|Angel.*Alberto)', '(Geurts.*Pierre|Pierre.*Geurts)', '(Damien.*Ernst|Ernst.*Damien)' ] result = self.wt._evaluate_multi_value_wrapper(info_list, values) self.failUnless(result == True) info_list = ['Alberto Del Ange'] result = self.wt._evaluate_multi_value_wrapper(info_list, values) self.failUnless(result == False) def test_evaluate_wrapper(self): self.example_set = [ Example('c01_r22_r33', 'c01'), Example('c02_r22_r33', 'c02'), Example('incorrect_value', 'c03') ] wrapper = Wrapper(rules=[MockRule(22), MockRule(33)]) self.wt._evaluate_wrapper(wrapper, self.example_set) self.failUnless(wrapper.upvotes == 2) self.failUnless(wrapper.downvotes == 1)
class TestWrapperTrainer(unittest.TestCase): def setUp(self): self.wt = WrapperTrainer([MockRuler()], num_examples=3) self.nsets = 5 self.example_sets = MockExampleManager()._get_examples(self.nsets, 5, 10) self.example_set = [Example('v01', 'c01'), Example('v02', 'c02'), Example('v03', 'c03')] def test_train_too_few_examples(self): self.wt.set_num_examples(20) result = self.wt.train(self.example_sets) self.failUnless(len(result) == 0) def test_train(self): self.wt.rulers = [MockRuler(), MockRuler()] wrappers = self.wt.train(self.example_sets['a']) self.failUnless(len(wrappers) == 4) self.failUnless(len(wrappers[0].rules) == 2) self.failUnless(wrappers[0].downvotes > 0) def test_get_new_example_set(self): example_set = self.wt._get_new_example_set(MockRule('xx'), self.example_set) self.failUnless(len(example_set) == 3) self.failUnless(example_set[0].content == 'c01_rxx') def test_get_rule_sets(self): rules = self.wt._get_rule_sets([MockRuler(), MockRuler()], self.example_set) self.failUnless(len(rules) == 4) length_bools = map(lambda x: len(x) == 2, rules) self.failUnless(length_bools.count(False) == 0) def test_evaluate_single_value_wrapper(self): info = 'In Germany' value = 'Germany' result = self.wt._evaluate_single_value_wrapper(info, value) self.failUnless(result == True) info = 'Germany' value = 'In Germany' result = self.wt._evaluate_single_value_wrapper(info, value) self.failUnless(result == False) def test_evaluate_multi_value_wrapper(self): info_list = ['Alberto Del Angel', 'Pierre Geurts', 'Damien Ernst'] values = ['(Alberto.*Angel|Angel.*Alberto)', '(Geurts.*Pierre|Pierre.*Geurts)', '(Damien.*Ernst|Ernst.*Damien)'] result = self.wt._evaluate_multi_value_wrapper(info_list, values) self.failUnless(result == True) info_list = ['Alberto Del Ange'] result = self.wt._evaluate_multi_value_wrapper(info_list, values) self.failUnless(result == False) def test_evaluate_wrapper(self): self.example_set = [Example('c01_r22_r33', 'c01'), Example('c02_r22_r33', 'c02'), Example('incorrect_value', 'c03')] wrapper = Wrapper(rules=[MockRule(22), MockRule(33)]) self.wt._evaluate_wrapper(wrapper, self.example_set) self.failUnless(wrapper.upvotes == 2) self.failUnless(wrapper.downvotes == 1)