def remove_img_style_size(root): """ add class to img container and remove explicit width attributes """ xpath_conditions = [ 'contains(@class,"thumb") ', 'and not(contains(@class, "tmulti"))', 'and not(contains(@class, "thumbinner"))', 'and not(contains(@class, "thumbcaption"))', 'and not(contains(@class, "thumbimage"))', ] result = root.xpath("//div[{}]".format(" ".join(xpath_conditions))) for img_container in result: if "map" in img_container.attrib.get("class", ""): continue thumbinner = img_container.xpath('.//*[contains(@class,"thumbinner")]') for node in thumbinner: utils.remove_node_styles(node, ["width", "height", "max-width"]) if not img_container.xpath(".//img"): log.debug("No <img> found in {}".format(etree.tostring(img_container))) continue img = img_container.xpath(".//img")[0] width = utils.get_node_width(img, target_unit="pt") utils.remove_node_styles(img, ["width", "height"]) cols = int(round(width / (column_width_pt * 4))) if cols > 3: cols = 3 cols = cols * 4 utils.append_class(img_container, "col-{}".format(cols)) utils.remove_node_width(img_container) utils.remove_node_width(img)
def fix_thumbs(root): """ remove explit width in thumbinner div otherwise correct image sizing/positioning is not guaranteed """ for div in root.xpath('.//div[contains(@class, "thumbinner")]'): utils.remove_node_width(div)
def remove_style_sizes(root): for table in root.xpath("//table[@style]"): utils.remove_node_styles(table, ["width", "height"]) utils.remove_node_width(table) if table.attrib.get("border"): del table.attrib["border"] utils.append_class(table, "pp_border_table")
def _resize_image_node_width_to_pt(node): """ resize images from px to pt: 96px -> 72pt = shrink to 75% the scale factor is more or less deliberate but looks decent in sample pages """ if node.tag != "img": return width = utils.get_node_width(node, target_unit="px") utils.remove_node_styles(node, ["width", "height"]) utils.remove_node_width(node) utils.add_node_style(node, "width", "{}px".format(width * config.px2pt))
def fix_galleries(root): for gallery in root.xpath('.//ul[contains(@class, "gallery")]'): for leaf in gallery.xpath(".//*"): utils.remove_node_width(leaf) utils.remove_node_height(leaf) utils.remove_node_styles(leaf, "margin") for leaf in gallery.xpath('.//li[contains(@class, "gallerybox")]'): utils.append_class(leaf, "col-4") img = leaf[0][0][0][0][0] utils.append_class(img, "thumbimage") url = img.attrib.get("src") utils.add_node_style(leaf[0][0][0], "background-image", "url({})".format(url))
def add_class_to_infobox_wide_images(root): """ add `infobox-wide` to images wider than 100px in an infobox and remove explicit width """ for node in root.xpath('//*[contains(@class, "infobox")]//img'): if "width" in node.attrib and int(node.attrib.get("width")) > 100: utils.append_class(node, "infobox-img-wide") utils.remove_node_width(node) utils.remove_node_height(node) for td in node.xpath("./ancestor::td"): utils.append_class(td, "contains-img-wide") elif "width" in node.attrib and int(node.attrib.get("width")) <= 100: node.attrib["width"] = str(int(node.attrib["width"]) / config.px2pt)
def resize_node_width_to_columns(node, width_in_pt, use_thirds_only=True): """ resizes a given node to columns by adding a col-* class """ utils.remove_node_width(node) target_col_width = next( (width for width in config.columns.values() if width > width_in_pt), 0) if target_col_width == 0: if width_in_pt <= config.tolerated_over_width: utils.wrap_node(node, "div", {"class": "over-wide-wrapper"}) utils.append_class(node, "over-wide") else: utils.append_class(node, "rotated-table") return cols = config.columns.values().index(target_col_width) + 1 if use_thirds_only: cols = int(4 * ceil(float(cols) / 4)) utils.append_class(node, "col-{}".format(cols))
def _remove_inner_image_node_width(node, inner_class="thumbinner"): """ remove explicit widths from an image node Side effect: removes the node if it doesn't contain an image! :param node: :param inner_class: "thumbinner" or "thumbimage" :return: original width of the image in pt """ utils.remove_node_styles(node, ["width", "height", "max-width"]) wrapper_nodes = node.xpath('.//*[contains(@class,"{}")]'.format(inner_class)) for wrapper_node in wrapper_nodes: utils.remove_node_styles(wrapper_node, ["width", "height", "max-width"]) if not node.xpath(".//img"): log.debug("No <img> found in {}. Removing node.".format(etree.tostring(node))) utils.remove_node(node) return 0 img = node.xpath(".//img")[0] width = utils.get_node_width(img, target_unit="pt") utils.remove_node_styles(img, ["width", "height"]) utils.remove_node_width(img) return width