Пример #1
0
def _test_resource(class_, basename, check, **kwargs):
    """Common code for testing the HTML and CSS classes."""
    absolute_filename = resource_filename(basename)
    absolute_path = Path(absolute_filename)
    url = path2url(absolute_filename)
    check(class_(absolute_filename, **kwargs))
    check(class_(absolute_path, **kwargs))
    check(class_(guess=absolute_filename, **kwargs))
    check(class_(guess=absolute_path, **kwargs))
    check(class_(filename=absolute_filename, **kwargs))
    check(class_(filename=absolute_path, **kwargs))
    check(class_(url, **kwargs))
    check(class_(guess=url, **kwargs))
    url = path2url(absolute_filename.encode('utf-8'))
    check(class_(url=url, **kwargs))
    with open(absolute_filename, 'rb') as fd:
        check(class_(fd, **kwargs))
    with open(absolute_filename, 'rb') as fd:
        check(class_(guess=fd, **kwargs))
    with open(absolute_filename, 'rb') as fd:
        check(class_(file_obj=fd, **kwargs))
    with open(absolute_filename, 'rb') as fd:
        content = fd.read()
    py.path.local(os.path.dirname(__file__)).chdir()
    relative_filename = os.path.join('resources', basename)
    relative_path = Path(relative_filename)
    check(class_(relative_filename, **kwargs))
    check(class_(relative_path, **kwargs))
    kwargs.pop('base_url', None)
    check(class_(string=content, base_url=relative_filename, **kwargs))
    encoding = kwargs.get('encoding') or 'utf8'
    check(class_(string=content.decode(encoding),  # unicode
                 base_url=relative_filename, **kwargs))
    with pytest.raises(TypeError):
        class_(filename='foo', url='bar')
Пример #2
0
 def add_css(self, font_config):
     css_url = urls.path2url(self.design)
     self.html.head.clear()
     css_tag = BeautifulSoup(
         '<title>{}</title><link rel="stylesheet" \
         href="{}" type="text/css">'.format(self.title, css_url),
         'html5lib')
     self.html.head.insert(0, css_tag)
Пример #3
0
def test_embedded_files_attachments(tmpdir):
    absolute_tmp_file = tmpdir.join('some_file.txt').strpath
    adata = b'12345678'
    with open(absolute_tmp_file, 'wb') as afile:
        afile.write(adata)
    absolute_url = path2url(absolute_tmp_file)
    assert absolute_url.startswith('file://')

    relative_tmp_file = tmpdir.join('äöü.txt').strpath
    rdata = b'abcdefgh'
    with open(relative_tmp_file, 'wb') as rfile:
        rfile.write(rdata)

    pdf = FakeHTML(
        string='''
          <title>Test document</title>
          <meta charset="utf-8">
          <link
            rel="attachment"
            title="some file attachment äöü"
            href="data:,hi%20there">
          <link rel="attachment" href="{0}">
          <link rel="attachment" href="{1}">
          <h1>Heading 1</h1>
          <h2>Heading 2</h2>
        '''.format(absolute_url, os.path.basename(relative_tmp_file)),
        base_url=tmpdir.strpath,
    ).write_pdf(
        attachments=[
            Attachment('data:,oob attachment', description='Hello'),
            'data:,raw URL',
            io.BytesIO(b'file like obj')
        ]
    )
    assert (
        '<{}>'.format(hashlib.md5(b'hi there').hexdigest()).encode('ascii')
        in pdf)
    assert b'/F ()' in pdf
    assert b'/UF (attachment.bin)' in pdf
    name = BOM_UTF16_BE + 'some file attachment äöü'.encode('utf-16-be')
    assert b'/Desc <' + name.hex().encode('ascii') + b'>' in pdf

    assert hashlib.md5(adata).hexdigest().encode('ascii') in pdf
    assert os.path.basename(absolute_tmp_file).encode('ascii') in pdf

    assert hashlib.md5(rdata).hexdigest().encode('ascii') in pdf
    name = BOM_UTF16_BE + 'some file attachment äöü'.encode('utf-16-be')
    assert b'/Desc <' + name.hex().encode('ascii') + b'>' in pdf

    assert hashlib.md5(b'oob attachment').hexdigest().encode('ascii') in pdf
    assert b'/Desc (Hello)' in pdf
    assert hashlib.md5(b'raw URL').hexdigest().encode('ascii') in pdf
    assert hashlib.md5(b'file like obj').hexdigest().encode('ascii') in pdf

    assert b'/EmbeddedFiles' in pdf
    assert b'/Outlines' in pdf
Пример #4
0
 def on_post_page(self, output_content, page, config, **kwargs):
     try:
         abs_dest_path = page.file.abs_dest_path
         src_path = page.file.src_path
     except AttributeError:
         abs_dest_path = page.abs_output_path
         src_path = page.input_path
     path = os.path.dirname(abs_dest_path)
     os.makedirs(path, exist_ok=True)
     filename = os.path.splitext(os.path.basename(src_path))[0]
     base_url = urls.path2url(os.path.join(path, filename))
     pdf_url = self.generator.add_article(output_content, page, base_url)
     if pdf_url:
         output_content = modify_html(output_content, pdf_url)
     return output_content
Пример #5
0
 def add_head(self):
     lines = ['<title>{}</title>'.format(self.title)]
     for key, val in (
         ("author", self.config['author'] or self.mkdconfig['site_author']),
         ("description", self.mkdconfig['site_description']),
     ):
         if val:
             lines.append('<meta name="{}" content="{}">'.format(key, val))
     for css in (self.design, self.design_extra):
         if css:
             css_tmpl = '<link rel="stylesheet" href="{}" type="text/css">'
             lines.append(css_tmpl.format(urls.path2url(css)))
     head = BeautifulSoup('\n'.join(lines), 'html5lib')
     self.html.head.clear()
     self.html.head.insert(0, head)
Пример #6
0
    def _soup_from_content(self, content: str, page):
        soup = BeautifulSoup(content, 'html.parser')

        try:
            abs_dest_path = page.file.abs_dest_path
            src_path = page.file.src_path
        except AttributeError:
            abs_dest_path = page.abs_output_path
            src_path = page.input_path

        path = os.path.dirname(abs_dest_path)
        os.makedirs(path, exist_ok=True)
        filename = os.path.splitext(os.path.basename(src_path))[0]
        base_url = urls.path2url(os.path.join(path, filename))

        return prep_combined(soup, base_url, page.file.url)
Пример #7
0
def test_image_image():
    assert_pixels(
        'test_image_image', 4, 4, '''
        rBBB
        BBBB
        BBBB
        BBBB
    ''', '''
      <style>
        @page { size: 4px 4px }
        svg { display: block }
      </style>
      <svg width="4px" height="4px" xmlns="http://www.w3.org/2000/svg">
        <image xlink:href="%s" />
      </svg>
    ''' % path2url(resource_filename('pattern.png')))
    def on_post_page(self, output_content, page, config):
        if not self.enabled:
            return output_content

        start = timer()

        self.num_files += 1

        try:
            abs_dest_path = page.file.abs_dest_path
            src_path = page.file.src_path
        except AttributeError:
            # Support for mkdocs <1.0
            abs_dest_path = page.abs_output_path
            src_path = page.input_path

        path = os.path.dirname(abs_dest_path)
        os.makedirs(path, exist_ok=True)

        filename = os.path.splitext(os.path.basename(src_path))[0]

        from weasyprint import urls
        base_url = urls.path2url(os.path.join(path, filename))
        pdf_file = filename + '.pdf'

        try:
            if self.combined:
                self.renderer.add_doc(output_content, base_url, page.file.url)
                pdf_path = self.get_path_to_pdf_from(page.file.dest_path)
                output_content = self.renderer.add_link(
                    output_content, pdf_path)
            else:
                self.renderer.write_pdf(output_content, base_url,
                                        os.path.join(path, pdf_file))
                output_content = self.renderer.add_link(
                    output_content, pdf_file)
        except Exception as e:
            print('Error converting {} to PDF: {}'.format(src_path, e),
                  file=sys.stderr)
            self.num_errors += 1

        end = timer()
        self.total_time += (end - start)

        return output_content
Пример #9
0
def test_command_line_render(tmpdir):
    css = b'''
        @page { margin: 2px; size: 8px; background: #fff }
        @media screen { img { transform: rotate(-90deg) } }
        body { margin: 0; font-size: 0 }
    '''
    html = b'<body><img src=pattern.png>'
    combined = b'<style>' + css + b'</style>' + html
    linked = b'<link rel=stylesheet href=style.css>' + html

    py.path.local(resource_filename('')).chdir()
    # Reference
    html_obj = FakeHTML(string=combined, base_url='dummy.html')
    pdf_bytes = html_obj.write_pdf()
    rotated_pdf_bytes = FakeHTML(
        string=combined, base_url='dummy.html',
        media_type='screen').write_pdf()

    tmpdir.chdir()
    with open(resource_filename('pattern.png'), 'rb') as pattern_fd:
        pattern_bytes = pattern_fd.read()
    tmpdir.join('pattern.png').write_binary(pattern_bytes)
    tmpdir.join('no_css.html').write_binary(html)
    tmpdir.join('combined.html').write_binary(combined)
    tmpdir.join('combined-UTF-16BE.html').write_binary(
        combined.decode('ascii').encode('UTF-16BE'))
    tmpdir.join('linked.html').write_binary(linked)
    tmpdir.join('style.css').write_binary(css)

    _run('combined.html out2.pdf')
    assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes

    _run('combined-UTF-16BE.html out3.pdf --encoding UTF-16BE')
    assert tmpdir.join('out3.pdf').read_binary() == pdf_bytes

    _run(tmpdir.join('combined.html').strpath + ' out4.pdf')
    assert tmpdir.join('out4.pdf').read_binary() == pdf_bytes

    _run(path2url(tmpdir.join('combined.html').strpath) + ' out5.pdf')
    assert tmpdir.join('out5.pdf').read_binary() == pdf_bytes

    _run('linked.html --debug out6.pdf')  # test relative URLs
    assert tmpdir.join('out6.pdf').read_binary() == pdf_bytes

    _run('combined.html --verbose out7')
    _run('combined.html --quiet out8')
    assert tmpdir.join('out7').read_binary() == pdf_bytes
    assert tmpdir.join('out8').read_binary() == pdf_bytes

    _run('no_css.html out9.pdf')
    _run('no_css.html out10.pdf -s style.css')
    assert tmpdir.join('out9.pdf').read_binary() != pdf_bytes
    assert tmpdir.join('out10.pdf').read_binary() == pdf_bytes

    stdout = _run('combined.html -')
    assert stdout == pdf_bytes

    _run('- out11.pdf', stdin=combined)
    assert tmpdir.join('out11.pdf').read_binary() == pdf_bytes

    stdout = _run('- -', stdin=combined)
    assert stdout == pdf_bytes

    _run('combined.html out13.pdf --media-type screen')
    _run('combined.html out12.pdf -m screen')
    _run('linked.html out14.pdf -m screen')
    assert tmpdir.join('out12.pdf').read_binary() == rotated_pdf_bytes
    assert tmpdir.join('out13.pdf').read_binary() == rotated_pdf_bytes
    assert tmpdir.join('out14.pdf').read_binary() == rotated_pdf_bytes

    stdout = _run('combined.html -')
    assert stdout.count(b'attachment') == 0
    stdout = _run('combined.html -')
    assert stdout.count(b'attachment') == 0
    stdout = _run('-a pattern.png combined.html -')
    assert stdout.count(b'attachment') == 1
    stdout = _run('-a style.css -a pattern.png combined.html -')
    assert stdout.count(b'attachment') == 2

    os.mkdir('subdirectory')
    py.path.local('subdirectory').chdir()
    with capture_logs() as logs:
        stdout = _run('- -', stdin=combined)
    assert len(logs) == 1
    assert logs[0].startswith('ERROR: Failed to load image')
    assert stdout.startswith(b'%PDF')

    with capture_logs() as logs:
        stdout = _run('--base-url= - -', stdin=combined)
    assert len(logs) == 1
    assert logs[0].startswith(
        'ERROR: Relative URI reference without a base URI')
    assert stdout.startswith(b'%PDF')

    stdout = _run('--base-url .. - -', stdin=combined)
    assert stdout == pdf_bytes

    with pytest.raises(SystemExit):
        _run('--info')

    with pytest.raises(SystemExit):
        _run('--version')
Пример #10
0
        (1, 'I', (0, 150), 'open'),
    ]], False),
    ('<h1>é', [
        [(1, 'é', (0, 0), 'open')]
    ], False),
    ('''
        <h1 style="transform: translateX(50px)">!
    ''', [
        [(1, '!', (50, 0), 'open')]
    ], False),
    ('''
        <style>
          img { display: block; bookmark-label: attr(alt); bookmark-level: 1 }
        </style>
        <img src="%s" alt="Chocolate" />
    ''' % path2url(resource_filename('pattern.png')),
     [[(1, 'Chocolate', (0, 0), 'open')]], False),
    ('''
        <h1 style="transform-origin: 0 0;
                   transform: rotate(90deg) translateX(50px)">!
    ''', [[(1, '!', (0, 50), 'open')]], True),
    ('''
        <body style="transform-origin: 0 0; transform: rotate(90deg)">
        <h1 style="transform: translateX(50px)">!
    ''', [[(1, '!', (0, 50), 'open')]], True),
))
@assert_no_logs
def test_assert_bookmarks(html, expected_by_page, round_):
    document = FakeHTML(string=html).render()
    if round_:
        _round_meta(document.pages)
Пример #11
0
    }.items())


class FakeHTML(HTML):
    """Like weasyprint.HTML, but with a lighter UA stylesheet."""
    def _ua_stylesheets(self):
        return [TEST_UA_STYLESHEET]


def resource_filename(basename):
    """Return the absolute path of the resource called ``basename``."""
    return os.path.join(os.path.dirname(__file__), 'resources', basename)


# Dummy filename, but in the right directory.
BASE_URL = path2url(resource_filename('<test>'))


class CallbackHandler(logging.Handler):
    """A logging handler that calls a function for every message."""
    def __init__(self, callback):
        logging.Handler.__init__(self)
        self.emit = callback


@contextlib.contextmanager
def capture_logs():
    """Return a context manager that captures all logged messages."""
    logger = LOGGER
    messages = []
Пример #12
0
def test_annotate_document():
    document = FakeHTML(resource_filename('doc1.html'))
    document._ua_stylesheets = lambda: [CSS(resource_filename('mini_ua.css'))]
    style_for = get_all_computed_styles(
        document, user_stylesheets=[CSS(resource_filename('user.css'))])

    # Element objects behave as lists of their children
    _head, body = document.etree_element
    h1, p, ul, div = body
    li_0, _li_1 = ul
    a, = li_0
    span1, = div
    span2, = span1

    h1 = style_for(h1)
    p = style_for(p)
    ul = style_for(ul)
    li_0 = style_for(li_0)
    div = style_for(div)
    after = style_for(a, 'after')
    a = style_for(a)
    span1 = style_for(span1)
    span2 = style_for(span2)

    assert h1['background_image'] == ((
        'url', path2url(resource_filename('logo_small.png'))), )

    assert h1['font_weight'] == 700
    assert h1['font_size'] == 40  # 2em

    # x-large * initial = 3/2 * 16 = 24
    assert p['margin_top'] == (24, 'px')
    assert p['margin_right'] == (0, 'px')
    assert p['margin_bottom'] == (24, 'px')
    assert p['margin_left'] == (0, 'px')
    assert p['background_color'] == 'currentColor'

    # 2em * 1.25ex = 2 * 20 * 1.25 * 0.8 = 40
    # 2.5ex * 1.25ex = 2.5 * 0.8 * 20 * 1.25 * 0.8 = 40
    # TODO: ex unit doesn't work with @font-face fonts, see computed_values.py
    # assert ul['margin_top'] == (40, 'px')
    # assert ul['margin_right'] == (40, 'px')
    # assert ul['margin_bottom'] == (40, 'px')
    # assert ul['margin_left'] == (40, 'px')

    assert ul['font_weight'] == 400
    # thick = 5px, 0.25 inches = 96*.25 = 24px
    assert ul['border_top_width'] == 0
    assert ul['border_right_width'] == 5
    assert ul['border_bottom_width'] == 0
    assert ul['border_left_width'] == 24

    assert li_0['font_weight'] == 700
    assert li_0['font_size'] == 8  # 6pt
    assert li_0['margin_top'] == (16, 'px')  # 2em
    assert li_0['margin_right'] == (0, 'px')
    assert li_0['margin_bottom'] == (16, 'px')
    assert li_0['margin_left'] == (32, 'px')  # 4em

    assert a['text_decoration_line'] == {'underline'}
    assert a['font_weight'] == 900
    assert a['font_size'] == 24  # 300% of 8px
    assert a['padding_top'] == (1, 'px')
    assert a['padding_right'] == (2, 'px')
    assert a['padding_bottom'] == (3, 'px')
    assert a['padding_left'] == (4, 'px')
    assert a['border_top_width'] == 42
    assert a['border_bottom_width'] == 42

    assert a['color'] == (1, 0, 0, 1)
    assert a['border_top_color'] == 'currentColor'

    assert div['font_size'] == 40  # 2 * 20px
    assert span1['width'] == (160, 'px')  # 10 * 16px (root default is 16px)
    assert span1['height'] == (400, 'px')  # 10 * (2 * 20px)
    assert span2['font_size'] == 32

    # The href attr should be as in the source, not made absolute.
    assert after['content'] == (('string', ' ['), ('string', 'home.html'),
                                ('string', ']'))
    assert after['background_color'] == (1, 0, 0, 1)
    assert after['border_top_width'] == 42
    assert after['border_bottom_width'] == 3
Пример #13
0
def test_command_line_render(tmpdir):
    css = b'''
        @page { margin: 2px; size: 8px; background: #fff }
        @media screen { img { transform: rotate(-90deg) } }
        body { margin: 0; font-size: 0 }
    '''
    html = b'<body><img src=pattern.png>'
    combined = b'<style>' + css + b'</style>' + html
    linked = b'<link rel=stylesheet href=style.css>' + html
    not_optimized = b'<body>a<img src="not-optimized.jpg">'

    tmpdir.chdir()
    for name in ('pattern.png', 'not-optimized.jpg'):
        pattern_bytes = Path(resource_filename(name)).read_bytes()
        tmpdir.join(name).write_binary(pattern_bytes)

    # Reference
    html_obj = FakeHTML(string=combined, base_url='dummy.html')
    pdf_bytes = html_obj.write_pdf()
    rotated_pdf_bytes = FakeHTML(string=combined,
                                 base_url='dummy.html',
                                 media_type='screen').write_pdf()

    tmpdir.join('no_css.html').write_binary(html)
    tmpdir.join('combined.html').write_binary(combined)
    tmpdir.join('combined-UTF-16BE.html').write_binary(
        combined.decode('ascii').encode('UTF-16BE'))
    tmpdir.join('linked.html').write_binary(linked)
    tmpdir.join('not_optimized.html').write_binary(not_optimized)
    tmpdir.join('style.css').write_binary(css)

    _run('combined.html out2.pdf')
    assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes

    _run('combined-UTF-16BE.html out3.pdf --encoding UTF-16BE')
    assert tmpdir.join('out3.pdf').read_binary() == pdf_bytes

    _run(tmpdir.join('combined.html').strpath + ' out4.pdf')
    assert tmpdir.join('out4.pdf').read_binary() == pdf_bytes

    _run(path2url(tmpdir.join('combined.html').strpath) + ' out5.pdf')
    assert tmpdir.join('out5.pdf').read_binary() == pdf_bytes

    _run('linked.html --debug out6.pdf')  # test relative URLs
    assert tmpdir.join('out6.pdf').read_binary() == pdf_bytes

    _run('combined.html --verbose out7')
    _run('combined.html --quiet out8')
    assert tmpdir.join('out7').read_binary() == pdf_bytes
    assert tmpdir.join('out8').read_binary() == pdf_bytes

    _run('no_css.html out9.pdf')
    _run('no_css.html out10.pdf -s style.css')
    assert tmpdir.join('out9.pdf').read_binary() != pdf_bytes
    assert tmpdir.join('out10.pdf').read_binary() == pdf_bytes

    stdout = _run('combined.html -')
    assert stdout == pdf_bytes

    _run('- out11.pdf', stdin=combined)
    assert tmpdir.join('out11.pdf').read_binary() == pdf_bytes

    stdout = _run('- -', stdin=combined)
    assert stdout == pdf_bytes

    _run('combined.html out13.pdf --media-type screen')
    _run('combined.html out12.pdf -m screen')
    _run('linked.html out14.pdf -m screen')
    assert tmpdir.join('out12.pdf').read_binary() == rotated_pdf_bytes
    assert tmpdir.join('out13.pdf').read_binary() == rotated_pdf_bytes
    assert tmpdir.join('out14.pdf').read_binary() == rotated_pdf_bytes

    _run('not_optimized.html out15.pdf')
    _run('not_optimized.html out16.pdf -O images')
    _run('not_optimized.html out17.pdf -O fonts')
    _run('not_optimized.html out18.pdf -O fonts -O images')
    _run('not_optimized.html out19.pdf -O all')
    _run('not_optimized.html out20.pdf -O none')
    _run('not_optimized.html out21.pdf -O none -O all')
    _run('not_optimized.html out22.pdf -O all -O none')
    # TODO: test that equivalent CLI options give equivalent PDF sizes,
    # unfortunately font optimization makes PDF generation not reproducible
    assert (len(tmpdir.join('out16.pdf').read_binary()) < len(
        tmpdir.join('out15.pdf').read_binary()) < len(
            tmpdir.join('out20.pdf').read_binary()))

    stdout = _run('combined.html -')
    assert stdout.count(b'attachment') == 0
    stdout = _run('combined.html -')
    assert stdout.count(b'attachment') == 0
    stdout = _run('-a pattern.png combined.html -')
    assert stdout.count(b'attachment') == 1
    stdout = _run('-a style.css -a pattern.png combined.html -')
    assert stdout.count(b'attachment') == 2

    os.mkdir('subdirectory')
    py.path.local('subdirectory').chdir()
    with capture_logs() as logs:
        stdout = _run('- -', stdin=combined)
    assert len(logs) == 1
    assert logs[0].startswith('ERROR: Failed to load image')
    assert stdout.startswith(b'%PDF')

    with capture_logs() as logs:
        stdout = _run('--base-url= - -', stdin=combined)
    assert len(logs) == 1
    assert logs[0].startswith(
        'ERROR: Relative URI reference without a base URI')
    assert stdout.startswith(b'%PDF')

    stdout = _run('--base-url .. - -', stdin=combined)
    assert stdout == pdf_bytes

    with pytest.raises(SystemExit):
        _run('--info')

    with pytest.raises(SystemExit):
        _run('--version')