def test_effective_date_for(self): """This function should be able to pull the effective date out of a few places""" xml = etree.fromstring(""" <ROOT> <P>CONTENT</P> <DATE>1999-02-03</DATE> <ORIGINALDATE>1988-06-07</ORIGINALDATE> </ROOT>""") self.assertEqual(fake.effective_date_for(xml), '1999-02-03') xml = etree.fromstring(""" <ROOT> <P>CONTENT</P> <ORIGINALDATE>1988-06-07</ORIGINALDATE> </ROOT>""") self.assertEqual(fake.effective_date_for(xml), '1988-06-07')
def tree_and_builder(filename, title, checkpoint_path=None, doc_number=None): """Reads the regulation file and parses it. Returns the resulting tree as well as a Builder object for further manipulation. Looks up the doc_number if it's not provided""" make_fake = doc_number is not None if checkpoint_path is None: checkpointer = NullCheckpointer() else: checkpointer = Checkpointer(checkpoint_path) reg_text = '' with codecs.open(filename, 'r', 'utf-8') as f: reg_text = f.read() file_digest = hashlib.sha256(reg_text.encode('utf-8')).hexdigest() if reg_text[:1] == '<': reg_xml = etree.fromstring(reg_text) else: raise ValueError("Building from text input is no longer supported") for preprocessor in class_paths_to_classes(settings.PREPROCESSORS): preprocessor().transform(reg_xml) reg_tree = checkpointer.checkpoint( "init-tree-" + file_digest, lambda: xml_parser.reg_text.build_tree(reg_xml)) title_part = reg_tree.label_id() if doc_number is None: doc_number = checkpointer.checkpoint( "doc-number-" + file_digest, lambda: Builder.determine_doc_number(reg_xml, title, title_part)) if not doc_number: raise ValueError("Could not determine document number") checkpointer.suffix = ":".join(["", title_part, str(title), doc_number]) if make_fake: fake_notice = notice_fake.build( doc_number, notice_fake.effective_date_for(reg_xml), title, title_part) else: fake_notice = None builder = Builder(cfr_title=title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer, fake_notice=fake_notice) return reg_tree, builder