示例#1
0
def generate_diffs(reg_tree, act_title_and_section, builder, layer_cache):
    """ Generate all the diffs for the given regulation. Broken out into
        separate function to assist with profiling so it's easier to determine
        which parts of the parser take the most time """
    doc_number, checkpointer = builder.doc_number, builder.checkpointer
    all_versions = {doc_number: FrozenNode.from_node(reg_tree)}

    for last_notice, old, new_tree, notices in builder.revision_generator(
            reg_tree):
        version = last_notice['document_number']
        logger.info("Version %s", version)
        all_versions[version] = FrozenNode.from_node(new_tree)
        builder.doc_number = version
        builder.write_regulation(new_tree)
        layer_cache.invalidate_by_notice(last_notice)
        builder.gen_and_write_layers(new_tree, act_title_and_section,
                                     layer_cache, notices)
        layer_cache.replace_using(new_tree)
        del last_notice, old, new_tree, notices     # free some memory

    label_id = reg_tree.label_id()
    writer = builder.writer
    del reg_tree, layer_cache, builder  # free some memory

    # now build diffs - include "empty" diffs comparing a version to itself
    for lhs_version, lhs_tree in all_versions.iteritems():
        for rhs_version, rhs_tree in all_versions.iteritems():
            changes = checkpointer.checkpoint(
                "-".join(["diff", lhs_version, rhs_version]),
            lambda: dict(changes_between(lhs_tree, rhs_tree)))
            writer.diff(
                label_id, lhs_version, rhs_version
            ).write(changes)
示例#2
0
    def write_notice(self, doc_number, old_tree=None, reg_tree=None,
                     layers=None, last_version=''):
        """ Write a single notice out. For the XMLWriter, we need to
            include the reg_tree for the notice. """
        # Get the notice by doc number
        notice = next((n for n in self.notices
                       if n['document_number'] == doc_number), None)

        # We can optionall write out the diffs with the notice if we're
        # given the old tree.
        changes = {}
        if old_tree is not None and reg_tree is not None:
            # FrozenNode and Node are not API-compatible. This is
            # troublesome.
            changes = dict(changes_between(
                FrozenNode.from_node(old_tree),
                FrozenNode.from_node(reg_tree)))

        # Write the notice
        writer = self.writer.notice(self.cfr_part,
                                    self.doc_number,
                                    notices=self.notices,
                                    layers=layers)
        writer.write(notice, changes=changes, reg_tree=reg_tree,
                     left_doc_number=last_version)
示例#3
0
def generate_diffs(reg_tree, act_title_and_section, builder, layer_cache):
    """ Generate all the diffs for the given regulation. Broken out into
        separate function to assist with profiling so it's easier to determine
        which parts of the parser take the most time """
    doc_number, checkpointer = builder.doc_number, builder.checkpointer
    all_versions = {doc_number: FrozenNode.from_node(reg_tree)}

    for last_notice, old, new_tree, notices in builder.revision_generator(
            reg_tree):
        version = last_notice['document_number']
        logger.info("Version %s", version)
        all_versions[version] = FrozenNode.from_node(new_tree)
        builder.doc_number = version
        builder.write_regulation(new_tree)
        layer_cache.invalidate_by_notice(last_notice)
        builder.gen_and_write_layers(new_tree, act_title_and_section,
                                     layer_cache, notices)
        layer_cache.replace_using(new_tree)
        del last_notice, old, new_tree, notices  # free some memory

    label_id = reg_tree.label_id()
    writer = builder.writer
    del reg_tree, layer_cache, builder  # free some memory

    # now build diffs - include "empty" diffs comparing a version to itself
    for lhs_version, lhs_tree in all_versions.iteritems():
        for rhs_version, rhs_tree in all_versions.iteritems():
            changes = checkpointer.checkpoint(
                "-".join(["diff", lhs_version, rhs_version]),
                lambda: dict(changes_between(lhs_tree, rhs_tree)))
            writer.diff(label_id, lhs_version, rhs_version).write(changes)
    def write_notice(self,
                     doc_number,
                     old_tree=None,
                     reg_tree=None,
                     layers=None,
                     last_version=''):
        """ Write a single notice out. For the XMLWriter, we need to
            include the reg_tree for the notice. """
        # Get the notice by doc number
        notice = next(
            (n for n in self.notices if n['document_number'] == doc_number),
            None)

        # We can optionall write out the diffs with the notice if we're
        # given the old tree.
        changes = {}
        if old_tree is not None and reg_tree is not None:
            # FrozenNode and Node are not API-compatible. This is
            # troublesome.
            changes = dict(
                changes_between(FrozenNode.from_node(old_tree),
                                FrozenNode.from_node(reg_tree)))

        # Write the notice
        writer = self.writer.notice(self.cfr_part,
                                    self.doc_number,
                                    notices=self.notices,
                                    layers=layers)
        writer.write(notice,
                     changes=changes,
                     reg_tree=reg_tree,
                     left_doc_number=last_version)
def generate_diff(left_xml, right_xml):
    """ Given two full RegML trees, generate a dictionary of changes
        between the two in the style of regulations-parser.
        This wraps regulatons-parser's changes_between() function. """
    left_tree = build_reg_tree(left_xml)
    right_tree = build_reg_tree(right_xml)
    diff = dict(changes_between(FrozenNode.from_node(left_tree),
                                FrozenNode.from_node(right_tree)))
    return diff
示例#6
0
def generate_diff(left_xml, right_xml):
    """ Given two full RegML trees, generate a dictionary of changes
        between the two in the style of regulations-parser.
        This wraps regulatons-parser's changes_between() function. """
    left_tree = build_reg_tree(left_xml)
    right_tree = build_reg_tree(right_xml)
    diff = dict(
        changes_between(FrozenNode.from_node(left_tree),
                        FrozenNode.from_node(right_tree)))
    return diff
    def test_subparts(self):
        """ Create a tree with no subparts, then add subparts. """
        title = u"Regulation Title"
        sect1_title = u"§ 204.1 First Section"
        sect1 = u"(a) I believe this is (b) the best section "
        sect2_title = u"§ 204.2 Second Section"
        sect2 = u"Some sections \ndon't have \ndepth at all."

        old_text = "\n".join([title, sect1_title, sect1, sect2_title, sect2])
        older = reg_text.build_reg_text_tree(old_text, 204)

        ntitle = u"Regulation Title"
        nsubpart_a = u"Subpart A—First subpart"
        nsect1_title = u"§ 204.1 First Section"
        nsect1 = u"(a) I believe this is (b) the best section "
        nsubpart_b = u"Subpart B—Second subpart"
        nsect2_title = u"§ 204.2 Second Section"
        nsect2 = u"Some sections \ndon't have \ndepth at all."

        new_text = "\n".join([
            ntitle, nsubpart_a, nsect1_title, nsect1, nsubpart_b, nsect2_title,
            nsect2
        ])
        newer = reg_text.build_reg_text_tree(new_text, 204)

        result = dict(
            difftree.changes_between(FrozenNode.from_node(older),
                                     FrozenNode.from_node(newer)))

        self.assertEquals(
            result['204-Subpart-A'], {
                "node": {
                    "text": u"",
                    "node_type": u"subpart",
                    "tagged_text": None,
                    "label": ("204", "Subpart", "A"),
                    "child_labels": ("204-1", ),
                    "title": u"First subpart"
                },
                "op": "added"
            })
        self.assertTrue('204-Subpart-B' in result)
        self.assertEquals(result['204-Subpart'], {"op": "deleted"})
        # Sections shouldn't have changed, though
        self.assertFalse('204-1' in result)
        self.assertFalse('204-2' in result)
    def test_subparts(self):
        """ Create a tree with no subparts, then add subparts. """
        title = u"Regulation Title"
        sect1_title = u"§ 204.1 First Section"
        sect1 = u"(a) I believe this is (b) the best section "
        sect2_title = u"§ 204.2 Second Section"
        sect2 = u"Some sections \ndon't have \ndepth at all."

        old_text = "\n".join([title, sect1_title, sect1, sect2_title, sect2])
        older = reg_text.build_reg_text_tree(old_text, 204)

        ntitle = u"Regulation Title"
        nsubpart_a = u"Subpart A—First subpart"
        nsect1_title = u"§ 204.1 First Section"
        nsect1 = u"(a) I believe this is (b) the best section "
        nsubpart_b = u"Subpart B—Second subpart"
        nsect2_title = u"§ 204.2 Second Section"
        nsect2 = u"Some sections \ndon't have \ndepth at all."

        new_text = "\n".join([ntitle, nsubpart_a, nsect1_title, nsect1, nsubpart_b, nsect2_title, nsect2])
        newer = reg_text.build_reg_text_tree(new_text, 204)

        result = dict(difftree.changes_between(FrozenNode.from_node(older), FrozenNode.from_node(newer)))

        self.assertEquals(
            result["204-Subpart-A"],
            {
                "node": {
                    "text": u"",
                    "node_type": u"subpart",
                    "tagged_text": None,
                    "label": ("204", "Subpart", "A"),
                    "child_labels": ("204-1",),
                    "title": u"First subpart",
                },
                "op": "added",
            },
        )
        self.assertTrue("204-Subpart-B" in result)
        self.assertEquals(result["204-Subpart"], {"op": "deleted"})
        # Sections shouldn't have changed, though
        self.assertFalse("204-1" in result)
        self.assertFalse("204-2" in result)
def generate_xml(filename,
                 title,
                 act_title,
                 act_section,
                 notice_doc_numbers,
                 doc_number=None,
                 checkpoint=None):

    act_title_and_section = [act_title, act_section]
    #   First, the regulation tree

    reg_tree, builder = tree_and_builder(filename,
                                         title,
                                         checkpoint,
                                         writer_type='XML')
    layer_cache = LayerCacheAggregator()
    layers = builder.generate_layers(reg_tree, act_title_and_section,
                                     layer_cache)

    # Always do at least the first reg
    logger.info("Version", builder.doc_number)
    builder.write_regulation(reg_tree, layers=layers)
    all_versions = {doc_number: FrozenNode.from_node(reg_tree)}

    for last_notice, old, new_tree, notices in builder.revision_generator(
            reg_tree):
        version = last_notice['document_number']
        logger.info("Version %s", version)
        all_versions[version] = FrozenNode.from_node(new_tree)
        builder.doc_number = version
        layers = builder.generate_layers(new_tree, act_title_and_section,
                                         layer_cache, notices)
        builder.write_regulation(new_tree, layers=layers)
        builder.write_notice(version,
                             old_tree=old,
                             reg_tree=new_tree,
                             layers=layers)
        layer_cache.invalidate_by_notice(last_notice)
        layer_cache.replace_using(new_tree)
        del last_notice, old, new_tree, notices  # free some memory
示例#10
0
    # this used to assume implicitly that if gen-diffs was not specified it was
    # True; changed it to explicit check
    if args.generate_diffs:
        all_versions = {doc_number: reg_tree}

        for last_notice, old, new_tree, notices in builder.revision_generator(
                reg_tree):
            version = last_notice['document_number']
            logger.info("Version %s", version)
            all_versions[version] = new_tree
            builder.doc_number = version
            builder.write_regulation(new_tree)
            layer_cache.invalidate_by_notice(last_notice)
            builder.gen_and_write_layers(new_tree, act_title_and_section,
                                         layer_cache, notices)
            layer_cache.replace_using(new_tree)

        # convert to frozen trees
        for doc in all_versions:
            all_versions[doc] = FrozenNode.from_node(all_versions[doc])

        # now build diffs - include "empty" diffs comparing a version to itself
        for lhs_version, lhs_tree in all_versions.iteritems():
            for rhs_version, rhs_tree in all_versions.iteritems():
                changes = checkpointer.checkpoint(
                    "-".join(["diff", lhs_version, rhs_version]),
                    lambda: dict(changes_between(lhs_tree, rhs_tree)))
                builder.writer.diff(
                    reg_tree.label_id(), lhs_version, rhs_version
                ).write(changes)