def get_section_header_elements(self, extra_xpaths=[]): section_header_xpaths = [ '/html/body//b', '/html/body/div/p//b', '/html/body//strong', '/html/body/div/p/span/span[@style=\'font-family:"Courier New"\']' ] xpaths = section_header_xpaths + extra_xpaths return hrs_xml.get_elements_by_xpaths(self.tree, xpaths)
def get_elements(self): return hrs_xml.get_elements_by_xpaths(self.tree, ["/html/body/div/p"])
def get_paragraphs(self): paragraphs = hrs_xml.get_elements_by_xpaths(self.tree, ["/html/body/div/p"]) return [p.text for p in paragraphs]