def get_body(self): element_body = self.get_element_body() try_drop_tree(self.parser, element_body, "li.button-social") try_drop_tree(self.parser, element_body, "aside.articlerelated") try_drop_tree(self.parser, element_body, "div.sharecount") clean_relativ_urls(element_body, "http://presseurop.eu") return self.parser.tostring(element_body)
def get_body(self): element_body = self.get_element_body() remove_from_selector_list(self.parser, element_body, ["p.auteur", "h4"]) try_remove_from_selector_list(self.parser, element_body, ["p.tag", "div.alire", self.element_title_selector, "h4"]) try_drop_tree(self.parser, element_body, "script") clean_relativ_urls(element_body, "http://ecrans.fr") return self.parser.tostring(element_body)
def get_body(self): element_body = self.get_element_body() remove_from_selector_list(self.parser, element_body, ["p.auteur", "h4"]) try_remove_from_selector_list( self.parser, element_body, ["p.tag", "div.alire", self.element_title_selector, "h4"]) try_drop_tree(self.parser, element_body, "script") clean_relativ_urls(element_body, "http://ecrans.fr") return self.parser.tostring(element_body)
def get_body(self): div = self.document.getroot().find('.//div[@class="sectbody"]') try_drop_tree(self.parser, div, "div.anchor") clean_relativ_urls(div, "http://taz.de") return self.parser.tostring(div)