def wrap_tabbed_set_content(soup: PageElement, logger: Logger = None): for ts in soup.select('div.tabbed-set'): for radio in ts.select('input'): els = [i for i in itertools.takewhile( lambda x: x.name not in ['input'], radio.next_siblings)] wrapper = soup.new_tag('div', **{'class': 'tabbed-content--wrap'}) radio.wrap(wrapper) for tag in els: wrapper.append(tag) for d in soup.select('details'): d['open'] = ''
def fix_twemoji(soup: PageElement, logger: Logger = None): """ (workaraound) replace <svg> to <img + b64encoded data/> cause, don't shown WeasyPrint 51 for after material v4.5.0 @see https://github.com/squidfunk/mkdocs-material/pull/1330 """ def fix_size(svg): ''' svg['width'] = 24 svg['height'] = 24 ''' viewbox = _parse_viewbox(svg['viewbox']) width, height = ( viewbox[2] - viewbox[0], viewbox[3] - viewbox[1] ) svg['width'] = int(width) svg['height'] = int(height) svg['style'] = 'fill: currentColor;' if logger: logger.debug('Converting emoji SVG to img(workaround).') for svg in soup.select('.twemoji svg'): try: fix_size(svg) encoded = b64encode(str(svg).encode('utf-8')).decode('ascii') data = "data:image/svg+xml;charset=utf-8;base64," + encoded img = soup.new_tag('img', src=data, **{'class': 'converted-twemoji'}) svg.replace_with(img) if logger: logger.debug(f'> svg: {svg}') logger.debug(f'< img: {img}') except Exception as e: if logger: logger.warning(f'Failed to convert SVG: {e}') pass
def parse_video_block(video_block: PageElement) -> Dict: video_object = {} video_title_el = video_block.find("h3") video_object["video_title"] = str(video_title_el.string) if video_title_el else None video_link_el = video_block.find(class_ = "btn-link video-sources video-download-button") video_object["video_link"] = video_link_el["href"] if video_link_el else None transcript_link_el = video_block.select(".wrapper-download-transcripts a") video_object["transcript_link"] = set() for srt_link in transcript_link_el: srt_url = srt_link["href"] u = urlparse(srt_url) if not u.scheme: u = u._replace(scheme='https') if not u.netloc: u = u._replace(netloc='courses.edx.org') srt_url = urlunparse(u) video_object["transcript_link"].add(srt_url) video_object["transcript_link"] = list(video_object["transcript_link"]) return video_object
def fix_image_alignment(soup: PageElement, logger: Logger = None): """ (workaraound) convert <img align=*> to `float` style. and, move <img width=*>, <image height=*> to style attributes. """ if logger: logger.info('Converting <img> alignment(workaround).') for img in soup.select('img'): try: if img.has_attr('class') and 'twemoji' in img['class']: continue styles = _parse_style(getattr(img, 'style', '')) logger.debug(f' | {img}') if img.has_attr('align'): if img['align'] == 'left': styles['float'] = 'left' styles['padding-right'] = '1rem' styles['padding-bottom'] = '0.5rem' img.attrs.pop('align') elif img['align'] == 'right': styles['float'] = 'right' styles['padding-left'] = '1rem' styles['padding-bottom'] = '0.5rem' img.attrs.pop('align') if img.has_attr('width'): styles['width'] = _convert_dimension(img['width']) img.attrs.pop('width') if img.has_attr('height'): styles['height'] = _convert_dimension(img['height']) img.attrs.pop('height') img['style'] = " ".join(f'{k}: {v};' for k, v in styles.items()) except Exception as e: if logger: logger.warning(f'Failed to convert img align: {e}') pass