Exemplo n.º 1
0
    def test_from_filepath(self, neb_assembled_data):
        filepath = neb_assembled_data / 'm46882.xhtml'

        # Hit the target
        doc = Document.from_filepath(filepath)

        # Verify the metadata
        assert doc.id == 'm46882'
        expected_metadata = copy(M46882_METADATA)
        # cnx-epub metadata is mutable, so sequences are lists rather than
        # tuples.
        expected_metadata['keywords'] = list(expected_metadata['keywords'])
        expected_metadata['subjects'] = list(expected_metadata['subjects'])
        assert doc.metadata == expected_metadata

        # Verify the content is content'ish
        assert doc._xml.xpath(
            "/xhtml:body/*[@data-type='metadata']",
            namespaces=HTML_DOCUMENT_NAMESPACES,
        ) == []
        assert len(
            doc._xml.xpath(
                "//*[@id='fs-idm20141232']",
                namespaces=HTML_DOCUMENT_NAMESPACES,
            )) == 1

        # Verify the resources are attached to the object
        expected_filenames = []
        filenames = [r.filename for r in doc.resources]
        assert sorted(filenames) == expected_filenames

        # Verify the references have been rewritten
        ref = '{}/CNX_Stats_C01_M10_003.jpg'.format(REFERENCE_MARKER).encode()
        assert ref in doc.content
Exemplo n.º 2
0
    def test_from_git_filepath(self, git_assembled_data):
        filepath = git_assembled_data / 'm46882.xhtml'

        # Hit the target
        doc = Document.from_filepath(filepath)

        # Verify the metadata
        assert doc.id == 'm46882'
        expected_metadata = copy(M46882_GIT_METADATA)
        # cnx-epub metadata is mutable, so sequences are lists rather than
        # tuples.
        expected_metadata['keywords'] = list(expected_metadata['keywords'])
        expected_metadata['subjects'] = list(expected_metadata['subjects'])
        # Document.from_index_cnxml uses cnxml to parse metadata and then does
        # some conversions while Document.from_filepath uses cnx-epub. These
        # generate slightly different values
        del expected_metadata['uuid']
        expected_metadata.update({
            'license_text': None,
        })
        assert doc.metadata == expected_metadata

        # Verify the content is content'ish
        assert doc._xml.xpath(
            "/xhtml:body/*[@data-type='metadata']",
            namespaces=HTML_DOCUMENT_NAMESPACES,
        ) == []
        assert len(
            doc._xml.xpath(
                "//*[@id='fs-idm20141232']",
                namespaces=HTML_DOCUMENT_NAMESPACES,
            )) == 1
Exemplo n.º 3
0
    def test_sanatize_content(self, request, assembled):
        assembled_data = request.getfixturevalue(assembled)
        with (assembled_data / 'm46913.xhtml').open('rb') as fb:
            html = etree.parse(fb)
            # And parse a second copy for verification
            fb.seek(0)
            expected_html = etree.parse(fb)

        # Hit the target
        results = Document._sanatize_content(html)

        # Construct expected results
        body = expected_html.xpath(
            "//xhtml:body",
            namespaces=HTML_DOCUMENT_NAMESPACES,
        )[0]
        metadata_elm = body.xpath(
            "//xhtml:div[@data-type='metadata']",
            namespaces=HTML_DOCUMENT_NAMESPACES,
        )[0]
        body.remove(metadata_elm)
        body.attrib.pop('itemtype')
        body.attrib.pop('itemscope')
        expected_results = etree.tostring(expected_html)

        assert results == expected_results
Exemplo n.º 4
0
    def test_find_resources(self, request, neb_collection_data):
        loc = neb_collection_data / 'm46909'

        # Hit the target
        resources = Document._find_resources(loc)

        # Verify we discovered the resource files
        expected_filenames = [
            'Prev_m16020_DotPlot.png',
            'fig-ch01_02_01n.png',
            'm16020_DotPlot_description.html',
            'm16020_DotPlot_download.pdf',
        ]
        assert sorted([r.id for r in resources]) == expected_filenames
        assert sorted([r.filename for r in resources]) == expected_filenames
Exemplo n.º 5
0
    def test_from_git_index_cnxml(self, git_collection_data):
        filepath = git_collection_data / 'm46882' / 'index.cnxml'

        # Hit the target
        doc = Document.from_index_cnxml(filepath, mock_reference_resolver)

        # Verify the metadata
        assert doc.id == 'm46882'
        expected_metadata = copy(M46882_GIT_METADATA)
        assert doc.metadata == expected_metadata

        # Verify the content is content'ish
        assert doc._xml.xpath(
            "/xhtml:body/*[@data-type='metadata']",
            namespaces=HTML_DOCUMENT_NAMESPACES,
        ) == []
        assert len(
            doc._xml.xpath(
                "//*[@id='fs-idm20141232']",
                namespaces=HTML_DOCUMENT_NAMESPACES,
            )) == 1
Exemplo n.º 6
0
    def test_from_index_cnxml(self, neb_collection_data):
        filepath = neb_collection_data / 'm46882' / 'index.cnxml'

        # Hit the target
        doc = Document.from_index_cnxml(filepath, mock_reference_resolver)

        # Verify the metadata
        assert doc.id == 'm46882'
        expected_metadata = copy(M46882_METADATA)
        # When parsing from index.cnxml, neb uses the cnxml metadata parser
        expected_metadata['uuid'] = None
        assert doc.metadata == expected_metadata

        # Verify the content is content'ish
        assert doc._xml.xpath(
            "/xhtml:body/*[@data-type='metadata']",
            namespaces=HTML_DOCUMENT_NAMESPACES,
        ) == []
        assert len(
            doc._xml.xpath(
                "//*[@id='fs-idm20141232']",
                namespaces=HTML_DOCUMENT_NAMESPACES,
            )) == 1

        # Verify the resources are attached to the object
        expected_filenames = [
            'CNX_Stats_C01_M10_001.jpg',
            'CNX_Stats_C01_M10_002.jpg',
            'CNX_Stats_C01_M10_003.jpg',
        ]
        filenames = [r.filename for r in doc.resources]
        assert sorted(filenames) == expected_filenames

        # Verify the references have been rewritten
        ref = '{}/CNX_Stats_C01_M10_003.jpg'.format(REFERENCE_MARKER).encode()
        assert ref in doc.content
        # Verify external and non-existent resource references remain
        assert b'src="foobar.png"' in doc.content
        assert b'ef="/[email protected]"' in doc.content  # rewritten in cnxml->html
        assert b'ef="http://en.wikibooks.org/"' in doc.content
Exemplo n.º 7
0
def main():
    # Transform the modules from cnxml to html
    for id in ('m46882', 'm46909', 'm46913'):
        filepath = input_dir / id / 'index.cnxml'
        doc = Document.from_index_cnxml(filepath, mock_reference_resolver)

        # Write the html to file
        html_filepath = (output_dir / '{}.xhtml'.format(id))
        html_filepath.unlink()
        with html_filepath.open('wb') as fb:
            fb.write(bytes(formatters.HTMLFormatter(doc)))

        # Create a symbolic link back to the module's source directory
        link_to_source_dir = output_dir / id
        link_to_source_dir.unlink()
        source_dir = input_dir / id
        link_to_source_dir.symlink_to(relative_path(source_dir, output_dir))

    # Create the single-page-html
    binder = Binder.from_collection_xml(input_dir / 'collection.xml')
    with (output_dir / 'collection.assembled.xhtml').open('wb') as fb:
        fb.write(bytes(formatters.SingleHTMLFormatter(binder)))