示例#1
0
def ecfr_all(title, file, act_title, act_section,
         with_all_versions=False, without_versions=False,
         without_notices=False, only_notice=None):
    """ Parse eCFR into RegML """

    # Get the tree and layers
    reg_tree, builder = tree_and_builder(
        file, title, writer_type='XML')
    layer_cache = LayerCacheAggregator()
    layers = builder.generate_layers(reg_tree,
                                     [act_title, act_section],
                                     layer_cache)

    # Do the first version
    last_version = builder.doc_number
    print("Version {}".format(last_version))
    if (only_notice is not None and builder.doc_number == only_notice) \
            or only_notice is None:
        if not without_versions:
            builder.write_regulation(reg_tree, layers=layers)

    for last_notice, old, new_tree, notices in builder.revision_generator(
            reg_tree):
        version = last_notice['document_number']
        print("Version {}".format(version))
        builder.doc_number = version
        layers = builder.generate_layers(new_tree,
                                         [act_title, act_section],
                                         layer_cache,
                                         notices)
        if (only_notice is not None and version == only_notice) or \
                only_notice is None:
            if with_all_versions:
                builder.write_regulation(new_tree, layers=layers)
            if not without_notices:
                builder.write_notice(version,
                                     old_tree=old,
                                     reg_tree=new_tree,
                                     layers=layers,
                                     last_version=last_version)
        layer_cache.invalidate_by_notice(last_notice)
        layer_cache.replace_using(new_tree)
        last_version = version
        del last_notice, old, new_tree, notices     # free some memory
def generate_xml(filename,
                 title,
                 act_title,
                 act_section,
                 notice_doc_numbers,
                 doc_number=None,
                 checkpoint=None):

    act_title_and_section = [act_title, act_section]
    #   First, the regulation tree

    reg_tree, builder = tree_and_builder(filename,
                                         title,
                                         checkpoint,
                                         writer_type='XML')
    layer_cache = LayerCacheAggregator()
    layers = builder.generate_layers(reg_tree, act_title_and_section,
                                     layer_cache)

    # Always do at least the first reg
    logger.info("Version", builder.doc_number)
    builder.write_regulation(reg_tree, layers=layers)
    all_versions = {doc_number: FrozenNode.from_node(reg_tree)}

    for last_notice, old, new_tree, notices in builder.revision_generator(
            reg_tree):
        version = last_notice['document_number']
        logger.info("Version %s", version)
        all_versions[version] = FrozenNode.from_node(new_tree)
        builder.doc_number = version
        layers = builder.generate_layers(new_tree, act_title_and_section,
                                         layer_cache, notices)
        builder.write_regulation(new_tree, layers=layers)
        builder.write_notice(version,
                             old_tree=old,
                             reg_tree=new_tree,
                             layers=layers)
        layer_cache.invalidate_by_notice(last_notice)
        layer_cache.replace_using(new_tree)
        del last_notice, old, new_tree, notices  # free some memory
示例#3
0
    def test_layer_cache(self, init):
        """Integration test for layer caching"""
        init.return_value = None
        cache = LayerCacheAggregator()
        b = Builder()  # Don't need parameters as init's been mocked out
        b.cfr_title, b.cfr_part, b.doc_number = 15, '111', '111-222'
        b.writer = Mock()
        b.checkpointer = NullCheckpointer()
        write = b.writer.layer.return_value.write
        tree = Node(label=["1234"],
                    children=[
                        Node(label=["1234", "1"],
                             children=[
                                 Node("See paragraph (b)",
                                      label=["1234", "1", "a"]),
                                 Node("This is b", label=["1234", "1", "b"])
                             ])
                    ])
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-a'], arg.keys())
        cache.replace_using(tree)

        write.reset_mock()
        tree.children[0].children[1].text = "References paragraph (a)"
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-a'], arg.keys())

        write.reset_mock()
        tree.children[0].children[0].text = "Contains no references"
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-a'], arg.keys())

        write.reset_mock()
        notice = {'document_number': '111-222'}
        cache.invalidate_by_notice(notice)
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-a'], arg.keys())

        write.reset_mock()
        notice['changes'] = {'1234-1-b': 'some change'}
        cache.invalidate_by_notice(notice)
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-a', '1234-1-b'], list(sorted(arg.keys())))

        write.reset_mock()
        notice['changes'] = {'1234-Subpart-A': 'some change'}
        cache.invalidate_by_notice(notice)
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-b'], list(sorted(arg.keys())))
    def test_layer_cache(self, init):
        """Integration test for layer caching"""
        init.return_value = None
        cache = LayerCacheAggregator()
        b = Builder()   # Don't need parameters as init's been mocked out
        b.cfr_title, b.cfr_part, b.doc_number = 15, '111', '111-222'
        b.writer = Mock()
        b.checkpointer = NullCheckpointer()
        write = b.writer.layer.return_value.write
        tree = Node(label=["1234"], children=[
            Node(label=["1234", "1"], children=[
                Node("See paragraph (b)", label=["1234", "1", "a"]),
                Node("This is b", label=["1234", "1", "b"])])])
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-a'], arg.keys())
        cache.replace_using(tree)

        write.reset_mock()
        tree.children[0].children[1].text = "References paragraph (a)"
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-a'], arg.keys())

        write.reset_mock()
        tree.children[0].children[0].text = "Contains no references"
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-a'], arg.keys())

        write.reset_mock()
        notice = {'document_number': '111-222'}
        cache.invalidate_by_notice(notice)
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-a'], arg.keys())

        write.reset_mock()
        notice['changes'] = {'1234-1-b': 'some change'}
        cache.invalidate_by_notice(notice)
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-a', '1234-1-b'], list(sorted(arg.keys())))

        write.reset_mock()
        notice['changes'] = {'1234-Subpart-A': 'some change'}
        cache.invalidate_by_notice(notice)
        b.gen_and_write_layers(tree, [], cache, [])
        arg = write.call_args_list[3][0][0]
        self.assertEqual(['1234-1-b'], list(sorted(arg.keys())))
    builder.write_notices()

    #   Always do at least the first reg
    logger.info("Version %s", doc_number)
    builder.write_regulation(reg_tree)
    layer_cache = LayerCacheAggregator()
    builder.gen_and_write_layers(reg_tree, sys.argv[4:6], layer_cache)
    layer_cache.replace_using(reg_tree)
    if len(sys.argv) < 7 or sys.argv[6].lower() == 'true':
        all_versions = {doc_number: reg_tree}
        for last_notice, old, new_tree, notices in builder.revision_generator(
                reg_tree):
            version = last_notice['document_number']
            logger.info("Version %s", version)
            all_versions[version] = new_tree
            builder.doc_number = version
            builder.write_regulation(new_tree)
            layer_cache.invalidate_by_notice(last_notice)
            builder.gen_and_write_layers(new_tree, sys.argv[4:6], layer_cache,
                                         notices)
            layer_cache.replace_using(new_tree)

        # now build diffs - include "empty" diffs comparing a version to itself
        for lhs_version, lhs_tree in all_versions.iteritems():
            for rhs_version, rhs_tree in all_versions.iteritems():
                comparer = treediff.Compare(lhs_tree, rhs_tree)
                comparer.compare()
                builder.writer.diff(reg_tree.label_id(), lhs_version,
                                    rhs_version).write(comparer.changes)
示例#6
0
    builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache)
    layer_cache.replace_using(reg_tree)

    # this used to assume implicitly that if gen-diffs was not specified it was
    # True; changed it to explicit check
    if args.generate_diffs:
        all_versions = {doc_number: reg_tree}

        for last_notice, old, new_tree, notices in builder.revision_generator(
                reg_tree):
            version = last_notice['document_number']
            logger.info("Version %s", version)
            all_versions[version] = new_tree
            builder.doc_number = version
            builder.write_regulation(new_tree)
            layer_cache.invalidate_by_notice(last_notice)
            builder.gen_and_write_layers(new_tree, act_title_and_section,
                                         layer_cache, notices)
            layer_cache.replace_using(new_tree)

        # convert to frozen trees
        for doc in all_versions:
            all_versions[doc] = FrozenNode.from_node(all_versions[doc])

        # now build diffs - include "empty" diffs comparing a version to itself
        for lhs_version, lhs_tree in all_versions.iteritems():
            for rhs_version, rhs_tree in all_versions.iteritems():
                changes = checkpointer.checkpoint(
                    "-".join(["diff", lhs_version, rhs_version]),
                    lambda: dict(changes_between(lhs_tree, rhs_tree)))
                builder.writer.diff(