def __init__(self, filepath, sections_table): self.filepath = filepath self.tree = hrs_xml.parse_html_to_xml_tree(filepath) hacks = { 'HRS_0708-8300.htm': self.hack1 } if filepath.name in hacks: hacks[filepath.name]() self.header = self.parse_for_section_header() self.section = self.get_section() self.hrs_element = self.get_hrs_element(sections_table) self.catchline = self.get_catchline() self.check_for_section_and_catchline_errors(sections_table) text = self.get_paragraphs_after_catchline() text = self.cleanup_lines(text) text = self.maybe_separate_effective(text) self.refs = self.parse_refs(text) text = self.markup_text_with_refs(text, self.refs) self.sections = self.split_text_into_sections(text) statute_text = self.sections["STATUTE"][1:] subsections = self.parse_subsections(text) indents = self.convert_parsed_subsections_to_indents(subsections)
def __init__(self, filepath, common_element=None): self.tree = hrs_xml.parse_html_to_xml_tree(filepath) if common_element is None: self.elem = HRSElement(filepath) else: self.elem = common_element self.elem.set_filepath(filepath)