示例#1
0
def fix_image_tables(root):
    img_tables = root.xpath(
        '//table[contains(@class, "short-table") and not(contains(@class, "infobox")) and .//a[contains(@class, "image")]]'
    )
    for table in img_tables:
        utils.remove_node_styles(table, "margin")
        utils.append_class(table, "image-table")
        max_widths = {}
        for row in table.xpath(".//tr"):
            for n, column in enumerate(row.xpath(".//td")):
                for img in column.xpath(".//img"):
                    width = utils.get_node_width(img, target_unit="px")
                    max_widths[n] = max(width, max_widths.get(n, 0))
        total_width = sum(max_widths.values())
        if total_width * config.px2pt > config.page_width_pt:
            utils.append_class(table, "wide-image-table")
            for row in table.xpath(".//tr"):
                for n, column in enumerate(row.xpath(".//td")):
                    _remove_inner_image_node_width(column, "image")
                    utils.remove_node_styles(column, ["padding-left", "padding", "margin"])
                    utils.add_node_style(
                        column, "width", "{}%".format(max_widths.get(n, 0) / total_width * 100)
                    )
        elif total_width > 0:
            for img in table.xpath(".//img"):
                _resize_image_node_width_to_pt(img)
示例#2
0
def remove_img_style_size(root):
    """
    add class to img container and remove explicit width attributes
    """
    xpath_conditions = [
        'contains(@class,"thumb") ',
        'and not(contains(@class, "tmulti"))',
        'and not(contains(@class, "thumbinner"))',
        'and not(contains(@class, "thumbcaption"))',
        'and not(contains(@class, "thumbimage"))',
    ]
    result = root.xpath("//div[{}]".format(" ".join(xpath_conditions)))
    for img_container in result:
        if "map" in img_container.attrib.get("class", ""):
            continue
        thumbinner = img_container.xpath('.//*[contains(@class,"thumbinner")]')
        for node in thumbinner:
            utils.remove_node_styles(node, ["width", "height", "max-width"])
        if not img_container.xpath(".//img"):
            log.debug("No <img> found in {}".format(etree.tostring(img_container)))
            continue
        img = img_container.xpath(".//img")[0]
        width = utils.get_node_width(img, target_unit="pt")
        utils.remove_node_styles(img, ["width", "height"])
        cols = int(round(width / (column_width_pt * 4)))
        if cols > 3:
            cols = 3
        cols = cols * 4
        utils.append_class(img_container, "col-{}".format(cols))
        utils.remove_node_width(img_container)
        utils.remove_node_width(img)
示例#3
0
def remove_style_sizes(root):
    for table in root.xpath("//table[@style]"):
        utils.remove_node_styles(table, ["width", "height"])
        utils.remove_node_width(table)
        if table.attrib.get("border"):
            del table.attrib["border"]
            utils.append_class(table, "pp_border_table")
示例#4
0
def clean_infobox_padding(root):
    for node in root.xpath(
        '//*[contains(@class, "infobox")]//*[(self::div or self::td or self::th) and @style]'
    ):
        if "padding" in node.attrib["style"]:
            utils.remove_node_styles(
                node,
                ["padding", "padding-left", "padding-right", "padding-top", "padding-bottom",],
            )
示例#5
0
def _resize_image_node_width_to_pt(node):
    """
    resize images from px to pt: 96px -> 72pt = shrink to 75%
    the scale factor is more or less deliberate but looks decent in sample pages
    """
    if node.tag != "img":
        return
    width = utils.get_node_width(node, target_unit="px")
    utils.remove_node_styles(node, ["width", "height"])
    utils.remove_node_width(node)
    utils.add_node_style(node, "width", "{}px".format(width * config.px2pt))
示例#6
0
def fix_galleries(root):
    for gallery in root.xpath('.//ul[contains(@class, "gallery")]'):
        for leaf in gallery.xpath(".//*"):
            utils.remove_node_width(leaf)
            utils.remove_node_height(leaf)
            utils.remove_node_styles(leaf, "margin")
        for leaf in gallery.xpath('.//li[contains(@class, "gallerybox")]'):
            utils.append_class(leaf, "col-4")
            img = leaf[0][0][0][0][0]
            utils.append_class(img, "thumbimage")
            url = img.attrib.get("src")
            utils.add_node_style(leaf[0][0][0], "background-image", "url({})".format(url))
示例#7
0
def fix_img_style_size_tmulti(root):
    """
    replace explicit width attributes with col-* classes and percentages
    """
    xpath_conditions = [
        'contains(@class,"thumb") ',
        'and contains(@class, "tmulti")',
        'and not(contains(@class, "thumbinner"))',
        'and not(contains(@class, "thumbcaption"))',
        'and not(contains(@class, "thumbimage"))',
    ]
    result = root.xpath("//div[{}]".format(" ".join(xpath_conditions)))
    for img_container in result:
        thumbinner = img_container.xpath('.//*[contains(@class, "thumbinner")]')[0]
        total_width = utils.get_node_size(thumbinner, attr="max-width", target_unit="pt")
        utils.remove_node_styles(thumbinner, "max-width")
        resize_node_width_to_columns(img_container, total_width)
        for tsingle in thumbinner.xpath('.//*[contains(@class, "tsingle")]'):
            width = _remove_inner_image_node_width(tsingle, inner_class="thumbimage")
            single_width = width / total_width * 100
            utils.add_node_style(tsingle, "width", "{}%".format(single_width))
示例#8
0
def remove_styles(root):
    styles = [
        "-moz-column-count",  # https://de.wikipedia.org/wiki/Decatur_County_%28Indiana%29
        "column-count",  # https://de.wikipedia.org/wiki/Decatur_County_%28Indiana%29
        "font",
        "font-size",
        "padding",  # https://en.wikipedia.org/wiki/A%26M_Records,_Inc._v._Napster,_Inc.
    ]

    _remove_styles = lambda node: utils.remove_node_styles(node, styles)

    predicate = " or ".join(['contains(@style, "{}")'.format(style) for style in styles])
    map(_remove_styles, root.xpath("//*[{}]".format(predicate)))
示例#9
0
def _remove_inner_image_node_width(node, inner_class="thumbinner"):
    """
    remove explicit widths from an image node
    Side effect: removes the node if it doesn't contain an image!
    :param node:
    :param inner_class: "thumbinner" or "thumbimage"
    :return: original width of the image in pt
    """
    utils.remove_node_styles(node, ["width", "height", "max-width"])
    wrapper_nodes = node.xpath('.//*[contains(@class,"{}")]'.format(inner_class))
    for wrapper_node in wrapper_nodes:
        utils.remove_node_styles(wrapper_node, ["width", "height", "max-width"])
    if not node.xpath(".//img"):
        log.debug("No <img> found in {}. Removing node.".format(etree.tostring(node)))
        utils.remove_node(node)
        return 0
    img = node.xpath(".//img")[0]
    width = utils.get_node_width(img, target_unit="pt")
    utils.remove_node_styles(img, ["width", "height"])
    utils.remove_node_width(img)
    return width
示例#10
0
def clean_infobox_inner_width(root):
    for node in root.xpath('//*[contains(@class, "infobox")]//div[contains(@style, "width")]'):
        if "width" in utils.get_node_style(node):
            utils.remove_node_styles(node, "width")
示例#11
0
def clean_infobox_background_color(root):
    for node in root.xpath('//*[contains(@class, "infobox")]//th[contains(@style, "background")]'):
        utils.remove_node_styles(node, ["background-color", "background"])
示例#12
0
def optimize_maps(root):
    for node in root.xpath('//div[contains(@class, "map")]'):
        for subnode in node.xpath('.//div[contains(@style, "border")]'):
            utils.remove_node_styles(subnode, "border")
示例#13
0
def remove_p_padding(root):
    for node in root.xpath('//p[contains(@style, "padding")]'):
        utils.remove_node_styles(node, "padding")
示例#14
0
def remove_styles(root):
    for my_table in root.xpath("//table"):
        utils.remove_node_styles(my_table, ["margin-left", "text-align"])
示例#15
0
def remove_pullquote_margin_styles(root):
    for table in root.xpath('//table[contains(@class, "pullquote")]'):
        utils.remove_node_styles(table, "margin")