def test_candidates(article): """Verify we have candidates.""" # from lxml.etree import tounicode found = False wanted_hash = '04e46055' for node in article.candidates.values(): if node.hash_id == wanted_hash: found = node assert found # we have the right node, it must be deleted for some reason if it's # not still there when we need it to be. # Make sure it's not in our to drop list. for node in article._should_drop: assert node != found.node by_score = sorted([c for c in article.candidates.values()], key=attrgetter('content_score'), reverse=True) assert by_score[0].node == found.node updated_winner = check_siblings(by_score[0], article.candidates) updated_winner.node = prep_article(updated_winner.node)
def test_candidates(self): """Verify we have candidates.""" doc = Article(self.article) # from lxml.etree import tounicode found = False wanted_hash = '1f9378ed' # from breadability.logconfig import LNODE # from breadability.logconfig import set_logging_level # set_logging_level('DEBUG') # LNODE.activate() for node in doc.candidates.values(): if node.hash_id == wanted_hash: found = node self.assertTrue(found) # we have the right node, it must be deleted for some reason if it's # not still there when we need it to be. # Make sure it's not in our to drop list. for node in doc._should_drop: self.assertFalse(node == found.node) by_score = sorted([c for c in doc.candidates.values()], key=attrgetter('content_score'), reverse=True) self.assertTrue(by_score[0].node == found.node) updated_winner = check_siblings(by_score[0], doc.candidates) updated_winner.node = prep_article(updated_winner.node)
def test_candidates(article): """Verify we have candidates.""" # from lxml.etree import tounicode found = False wanted_hash = '04e46055' for node in article.candidates.values(): if node.hash_id == wanted_hash: found = node assert found # we have the right node, it must be deleted for some reason if it's # not still there when we need it to be. # Make sure it's not in our to drop list. for node in article._should_drop: assert node != found.node by_score = sorted( [c for c in article.candidates.values()], key=attrgetter('content_score'), reverse=True) assert by_score[0].node == found.node updated_winner = check_siblings(by_score[0], article.candidates) updated_winner.node = prep_article(updated_winner.node)
def test_candidates(self): """Verify we have candidates.""" doc = Article(self.article) # from lxml.etree import tounicode found = False wanted_hash = '04e46055' # from breadability.logconfig import LNODE # from breadability.logconfig import set_logging_level # set_logging_level('DEBUG') # LNODE.activate() for node in doc.candidates.values(): if node.hash_id == wanted_hash: found = node self.assertTrue(found) # we have the right node, it must be deleted for some reason if it's # not still there when we need it to be. # Make sure it's not in our to drop list. for node in doc._should_drop: self.assertFalse(node == found.node) by_score = sorted([c for c in doc.candidates.values()], key=attrgetter('content_score'), reverse=True) self.assertTrue(by_score[0].node == found.node) updated_winner = check_siblings(by_score[0], doc.candidates) updated_winner.node = prep_article(updated_winner.node)
def slice(self, before=1, reverse=True): if self.candidates: # cleanup by removing the should_drop we spotted. [n.drop_tree() for n in self._should_drop if n.getparent() is not None] # right now we return the highest scoring candidate content by_score = sorted([c for c in self.candidates.values()], key=attrgetter('content_score'), reverse=reverse) # since we have several candidates, check the winner's siblings # for extra content for winner in by_score[:before]: winner = check_siblings(winner, self.candidates) # updated_winner.node = prep_article(updated_winner.node) if winner.node is not None: yield winner.node
def test_candidates(self): """Verify we have candidates.""" doc = Article(self.article) # from lxml.etree import tounicode found = False wanted_hash = "04e46055" for node in doc.candidates.values(): if node.hash_id == wanted_hash: found = node self.assertTrue(found) # we have the right node, it must be deleted for some reason if it's # not still there when we need it to be. # Make sure it's not in our to drop list. for node in doc._should_drop: self.assertFalse(node == found.node) by_score = sorted([c for c in doc.candidates.values()], key=attrgetter("content_score"), reverse=True) self.assertTrue(by_score[0].node == found.node) updated_winner = check_siblings(by_score[0], doc.candidates) updated_winner.node = prep_article(updated_winner.node)