def limit_widths(root): for node in root.iterdescendants(): css_width = utils.get_node_width(node, "px") width, height = node_size(node) print node.tag, width, css_width if css_width > width: utils.change_node_width(node, "{:.2f}pt".format(width))
def fix_image_tables(root): img_tables = root.xpath( '//table[contains(@class, "short-table") and not(contains(@class, "infobox")) and .//a[contains(@class, "image")]]' ) for table in img_tables: utils.remove_node_styles(table, "margin") utils.append_class(table, "image-table") max_widths = {} for row in table.xpath(".//tr"): for n, column in enumerate(row.xpath(".//td")): for img in column.xpath(".//img"): width = utils.get_node_width(img, target_unit="px") max_widths[n] = max(width, max_widths.get(n, 0)) total_width = sum(max_widths.values()) if total_width * config.px2pt > config.page_width_pt: utils.append_class(table, "wide-image-table") for row in table.xpath(".//tr"): for n, column in enumerate(row.xpath(".//td")): _remove_inner_image_node_width(column, "image") utils.remove_node_styles(column, ["padding-left", "padding", "margin"]) utils.add_node_style( column, "width", "{}%".format(max_widths.get(n, 0) / total_width * 100) ) elif total_width > 0: for img in table.xpath(".//img"): _resize_image_node_width_to_pt(img)
def remove_img_style_size(root): """ add class to img container and remove explicit width attributes """ xpath_conditions = [ 'contains(@class,"thumb") ', 'and not(contains(@class, "tmulti"))', 'and not(contains(@class, "thumbinner"))', 'and not(contains(@class, "thumbcaption"))', 'and not(contains(@class, "thumbimage"))', ] result = root.xpath("//div[{}]".format(" ".join(xpath_conditions))) for img_container in result: if "map" in img_container.attrib.get("class", ""): continue thumbinner = img_container.xpath('.//*[contains(@class,"thumbinner")]') for node in thumbinner: utils.remove_node_styles(node, ["width", "height", "max-width"]) if not img_container.xpath(".//img"): log.debug("No <img> found in {}".format(etree.tostring(img_container))) continue img = img_container.xpath(".//img")[0] width = utils.get_node_width(img, target_unit="pt") utils.remove_node_styles(img, ["width", "height"]) cols = int(round(width / (column_width_pt * 4))) if cols > 3: cols = 3 cols = cols * 4 utils.append_class(img_container, "col-{}".format(cols)) utils.remove_node_width(img_container) utils.remove_node_width(img)
def fix_abspos_overlays(root): for container in root.xpath( ('//*[contains(@style, "position")' ' and contains(@style, "relative")]') ): w = utils.get_node_width(container, target_unit="px") h = utils.get_node_height(container, target_unit="px") if not (w and h): img = container.xpath(".//img") if not img: continue img = img[0] w, h = get_img_size(img) for node in container.xpath( ('.//*[contains(@style, "position")' ' and contains(@style, "absolute")]') ): style = utils.get_node_style(node) left = style.get("left") top = style.get("top") for attr in ["left", "top"]: val = locals()[attr] if not val: continue if val.endswith("%"): continue elif val.endswith("px"): val = val[:-2] elif val.isdigit(): pass else: continue try: new_val = 100 * int(float(val)) / (w if attr == "left" else h) except (ValueError, ZeroDivisionError): continue utils.add_node_style(node, attr, "{}%".format(new_val))
def limit_size(root): for img in root.xpath('//img[not(contains(@class, "inline"))]'): w, h = get_img_size(img) if w == 0 or h == 0: continue if isinstance(h, unicode) or isinstance(h, str): continue in_table = any(node.tag == "table" for node in img.iterancestors()) max_height_outside = 7.5 * config.cm2px max_height_in_table = 5 * config.cm2px max_height = max_height_in_table if in_table else max_height_outside max_width = 6.03 * config.cm2px # downscale if dimensions too big scale_factor = min(min(1, max_height / h), min(1, max_width / w)) # upscale image too full width, if the image is wider than 70% of max width if scale_factor == 1: scaled_height = max_width / w * h if 0.7 * max_width < w < max_width and scaled_height < max_height: scale_factor = max_width / w if scale_factor != 1: img.set("width", str(w * scale_factor)) img.set("height", str(h * scale_factor)) for node in [n for n in img.iterancestors()]: w = utils.get_node_width(node, "px") h = utils.get_node_height(node, "px") if w: utils.change_node_width(node, w * scale_factor, unit="px") if h: utils.change_node_height(node, h * scale_factor, unit="px")
def get_img_size(node): """ get size of a node in px """ try: width = utils.get_node_width(node) height = utils.get_node_height(node) return width, height except TypeError: return 0, 0
def _resize_image_node_width_to_pt(node): """ resize images from px to pt: 96px -> 72pt = shrink to 75% the scale factor is more or less deliberate but looks decent in sample pages """ if node.tag != "img": return width = utils.get_node_width(node, target_unit="px") utils.remove_node_styles(node, ["width", "height"]) utils.remove_node_width(node) utils.add_node_style(node, "width", "{}px".format(width * config.px2pt))
def _remove_inner_image_node_width(node, inner_class="thumbinner"): """ remove explicit widths from an image node Side effect: removes the node if it doesn't contain an image! :param node: :param inner_class: "thumbinner" or "thumbimage" :return: original width of the image in pt """ utils.remove_node_styles(node, ["width", "height", "max-width"]) wrapper_nodes = node.xpath('.//*[contains(@class,"{}")]'.format(inner_class)) for wrapper_node in wrapper_nodes: utils.remove_node_styles(wrapper_node, ["width", "height", "max-width"]) if not node.xpath(".//img"): log.debug("No <img> found in {}. Removing node.".format(etree.tostring(node))) utils.remove_node(node) return 0 img = node.xpath(".//img")[0] width = utils.get_node_width(img, target_unit="pt") utils.remove_node_styles(img, ["width", "height"]) utils.remove_node_width(img) return width