def test_build_tree_with_subjgrp(self): """XML with SUBJGRPs where SUBPARTs are shouldn't cause a problem""" with XMLBuilder("ROOT") as ctx: with ctx.PART(): ctx.EAR("Pt. 123") ctx.HD(u"PART 123—SOME STUFF", SOURCE="HED") with ctx.SUBPART(): ctx.HD(u"Subpart A—First subpart") with ctx.SUBJGRP(): ctx.HD(u"Changes of Ownership") with ctx.SUBPART(): ctx.HD(u"Subpart B—First subpart") with ctx.SUBJGRP(): ctx.HD(u"Another Top Level") node = reg_text.build_tree(ctx.xml) self.assertEqual(node.label, ['123']) self.assertEqual(4, len(node.children)) subpart_a, subjgrp_1, subpart_b, subjgrp_2 = node.children self.assertEqual(subpart_a.label, ['123', 'Subpart', 'A']) self.assertEqual(subpart_b.label, ['123', 'Subpart', 'B']) self.assertEqual(subjgrp_1.label, ['123', 'Subjgrp', 'CoO']) self.assertEqual(subjgrp_2.label, ['123', 'Subjgrp', 'ATL'])
def test_build_tree_with_subjgrp(self): """XML with SUBJGRPs where SUBPARTs are shouldn't cause a problem""" with XMLBuilder("ROOT") as ctx: with ctx.PART(): ctx.EAR("Pt. 123") ctx.HD(u"PART 123—SOME STUFF", SOURCE="HED") with ctx.SUBPART(): ctx.HD(u"Subpart A—First subpart") with ctx.SUBJGRP(): ctx.HD(u"Changes of Ownership") with ctx.SUBPART(): ctx.HD(u"Subpart B—First subpart") with ctx.SUBJGRP(): ctx.HD(u"Another Top Level") node = reg_text.build_tree(ctx.xml) self.assertEqual(node.label, ['123']) self.assertEqual(4, len(node.children)) subpart_a, subjgrp_1, subpart_b, subjgrp_2 = node.children self.assertEqual(subpart_a.label, ['123', 'Subpart', 'A']) self.assertEqual(subpart_b.label, ['123', 'Subpart', 'B']) self.assertEqual(subjgrp_1.label, ['123', 'Subjgrp', 'CoO']) self.assertEqual(subjgrp_2.label, ['123', 'Subjgrp', 'ATL'])
def reg_tree(reg_str): if reg_str[:1] == '<': # XML return reg_text.build_tree(reg_str) else: raise ValueError("Building from text input is no longer " "supported")
def reg_tree(reg_str): if reg_str[:1] == '<': # XML return reg_text.build_tree(reg_str) else: raise ValueError("Building from text input is no longer " "supported")
from regparser.tree.struct import NodeEncoder from regparser.tree.xml_parser import reg_text reg_xml_file = '/vagrant/data/regulations/regulation/rege-2011-31725.xml' reg_xml = open(reg_xml_file, 'r').read() tree = reg_text.build_tree(reg_xml) print NodeEncoder().encode(tree)
def reg_tree(reg_str): if reg_str[:1] == '<': # XML return reg_text.build_tree(reg_str) else: return build_whole_regtree(reg_str)
def process_amendments(notice, notice_xml): """ Process the changes to the regulation that are expressed in the notice. """ amends = [] notice_changes = changes.NoticeChanges() amdpars_by_parent = [] for par in notice_xml.xpath('//AMDPAR'): parent = par.getparent() exists = filter(lambda aXp: aXp.parent == parent, amdpars_by_parent) if exists: exists[0].append(par) else: amdpars_by_parent.append(AmdparByParent(parent, par)) default_cfr_part = notice['cfr_parts'][0] for aXp in amdpars_by_parent: amended_labels = [] designate_labels, other_labels = [], [] context = [aXp.parent.get('PART') or default_cfr_part] for par in aXp.amdpars: als, context = parse_amdpar(par, context) amended_labels.extend(als) labels_by_part = defaultdict(list) for al in amended_labels: if isinstance(al, DesignateAmendment): subpart_changes = process_designate_subpart(al) if subpart_changes: notice_changes.update(subpart_changes) designate_labels.append(al) elif new_subpart_added(al): notice_changes.update(process_new_subpart(notice, al, par)) designate_labels.append(al) else: other_labels.append(al) labels_by_part[al.label[0]].append(al) create_xmlless_changes(other_labels, notice_changes) for cfr_part, rel_labels in labels_by_part.iteritems(): section_xml = find_section(par) if section_xml is not None: subparts = aXp.parent.xpath('.//SUBPART/HD') if subparts: subpart_label = [cfr_part, 'Subpart', subparts[0].text[8:9]] else: subpart_label = None for section in reg_text.build_from_section(cfr_part, section_xml): create_xml_changes(rel_labels, section, notice_changes, subpart_label) for appendix in parse_appendix_changes(rel_labels, cfr_part, aXp.parent): create_xml_changes(rel_labels, appendix, notice_changes) interp = parse_interp_changes(rel_labels, cfr_part, aXp.parent) if interp: create_xml_changes(rel_labels, interp, notice_changes) amends.extend(designate_labels) amends.extend(other_labels) if other_labels: # Carry cfr_part through amendments default_cfr_part = other_labels[-1].label[0] if amends: notice['amendments'] = amends notice['changes'] = notice_changes.changes elif notice['document_number'] in settings.REISSUANCES: notice['changes'] = { default_cfr_part: [{ 'action': 'PUT', 'node': reg_text.build_tree(notice_xml) }] }
def process_amendments(notice, notice_xml): """ Process the changes to the regulation that are expressed in the notice. """ amends = [] notice_changes = changes.NoticeChanges() amdpars_by_parent = [] for par in notice_xml.xpath('//AMDPAR'): parent = par.getparent() exists = filter(lambda aXp: aXp.parent == parent, amdpars_by_parent) if exists: exists[0].append(par) else: amdpars_by_parent.append(AmdparByParent(parent, par)) default_cfr_part = notice['cfr_part'] for aXp in amdpars_by_parent: amended_labels = [] designate_labels, other_labels = [], [] context = [default_cfr_part] for par in aXp.amdpars: als, context = parse_amdpar(par, context) amended_labels.extend(als) labels_by_part = defaultdict(list) for al in amended_labels: if isinstance(al, DesignateAmendment): subpart_changes = process_designate_subpart(al) if subpart_changes: notice_changes.update(subpart_changes) designate_labels.append(al) elif new_subpart_added(al): notice_changes.update(process_new_subpart(notice, al, par)) designate_labels.append(al) else: other_labels.append(al) labels_by_part[al.label[0]].append(al) create_xmlless_changes(other_labels, notice_changes) # for cfr_part, rel_labels in labels_by_part.iteritems(): labels_for_part = { part: labels for part, labels in labels_by_part.iteritems() if part == default_cfr_part } print(labels_for_part) for cfr_part, rel_labels in labels_for_part.iteritems(): section_xml = find_section(par) if section_xml is not None: subparts = aXp.parent.xpath('.//SUBPART/HD') if subparts: subpart_label = [ cfr_part, 'Subpart', subparts[0].text[8:9] ] else: subpart_label = None for section in reg_text.build_from_section( cfr_part, section_xml): create_xml_changes(rel_labels, section, notice_changes, subpart_label) for appendix in parse_appendix_changes(rel_labels, cfr_part, aXp.parent): create_xml_changes(rel_labels, appendix, notice_changes) interp = parse_interp_changes(rel_labels, cfr_part, aXp.parent) if interp: create_xml_changes(rel_labels, interp, notice_changes) amends.extend(designate_labels) amends.extend(other_labels) # if other_labels: # Carry cfr_part through amendments # default_cfr_part = other_labels[-1].label[0] if amends: notice['amendments'] = amends notice['changes'] = notice_changes.changes elif notice['document_number'] in settings.REISSUANCES: notice['changes'] = { default_cfr_part: [{ 'action': 'PUT', 'node': reg_text.build_tree(notice_xml) }] }
def reg_tree(reg_str): if reg_str[:1] == '<': # XML return reg_text.build_tree(reg_str) else: return build_whole_regtree(reg_str)