def add_illustrations(self): src_illus_fpath = self.build_dir / "illustration" # if user provided a custom favicon, retrieve that if not self.conf.favicon: self.conf.favicon = Global.site["BadgeIconUrl"] handle_user_provided_file(source=self.conf.favicon, dest=src_illus_fpath) # convert to PNG (might already be PNG but it's OK) illus_fpath = src_illus_fpath.with_suffix(".png") convert_image(src_illus_fpath, illus_fpath) # resize to appropriate size (ZIM uses 48x48 so we double for retina) for size in (96, 48): resize_image(illus_fpath, width=size, height=size, method="thumbnail") with open(illus_fpath, "rb") as fh: Global.creator.add_illustration(size, fh.read()) # download and add actual favicon (ICO file) favicon_fpath = self.build_dir / "favicon.ico" handle_user_provided_file(source=Global.site["IconUrl"], dest=favicon_fpath) Global.creator.add_item_for("favicon.ico", fpath=favicon_fpath, is_front=False) # download apple-touch-icon Global.creator.add_item( URLItem(url=Global.site["BadgeIconUrl"], path="apple-touch-icon.png"))
def add_custom_about_and_css(self): channel_meta = self.db.get_channel_metadata(self.channel_id) if self.about: # if user provided a custom about page, use it with open( handle_user_provided_file(source=self.about, in_dir=self.build_dir, nocopy=True), "r", ) as fh: soup = BeautifulSoup(fh.read(), "lxml") title = soup.find("title").text content = soup.select("body > .container") # we're only interested in the first one if isinstance(content, list): content = content[0] else: title = channel_meta["name"] content = None html = self.jinja2_env.get_template("about.html").render( title=title, content=content, **channel_meta) with self.creator_lock: self.creator.add_item_for( path="about", title=title, content=html, mimetype="text/html", ) del html # if user provided a custom CSS file, use it if self.css: with open( handle_user_provided_file(source=self.css, in_dir=self.build_dir, nocopy=True), "r", ) as fh: content = fh.read() # otherwise, create a blank one else: content = "" self.creator.add_item_for("custom.css", content=content, mimetype="text/css") logger.debug("Added about page and custom CSS")
def add_favicon(self): favicon_orig = self.build_dir / "favicon" # if user provided a custom favicon, retrieve that if self.favicon: handle_user_provided_file(source=self.favicon, dest=favicon_orig) # otherwise, get thumbnail from database else: # add channel thumbnail as favicon try: favicon_prefix, favicon_data = self.db.get_channel_metadata( self.channel_id)["thumbnail"].split(";base64,", 1) favicon_data = base64.standard_b64decode(favicon_data) # favicon_mime = favicon_prefix.replace("data:", "") with open(favicon_orig, "wb") as fh: fh.write(favicon_data) del favicon_data except Exception as exc: logger.warning( "Unable to extract favicon from DB; using default") logger.exception(exc) # use a default favicon handle_user_provided_file(source=self.templates_dir / "kolibri-logo.png", dest=favicon_orig) # convert to PNG (might already be PNG but it's OK) favicon_fpath = favicon_orig.with_suffix(".png") convert_image(favicon_orig, favicon_fpath) # resize to appropriate size (ZIM uses 48x48 so we double for retina) for size in (96, 48): resize_image(favicon_fpath, width=size, height=size, method="thumbnail") with open(favicon_fpath, "rb") as fh: self.creator.add_illustration(size, fh.read()) # resize to appropriate size (ZIM uses 48x48) resize_image(favicon_fpath, width=96, height=96, method="thumbnail") # generate favicon favicon_ico_path = favicon_fpath.with_suffix(".ico") create_favicon(src=favicon_fpath, dst=favicon_ico_path) self.creator.add_item_for("favicon.png", fpath=favicon_fpath) self.creator.add_item_for("favicon.ico", fpath=favicon_ico_path)
def check_branding_values(self): """ checks that user-supplied images and colors are valid (so to fail early) Images are checked for existence or downloaded then resized Colors are check for validity """ # skip this step if none of related values were supplied if not sum([ bool(x) for x in ( self.favicon, self.main_logo, self.secondary_logo, self.main_color, self.secondary_color, self.about, ) ]): return logger.info("checking your branding files and values") images = [ (self.favicon, self.build_dir.joinpath("favicon.png"), 48, 48), (self.main_logo, self.main_logo_path, 300, 65), (self.secondary_logo, self.secondary_logo_path, 300, 65), ] for src, dest, width, height in images: if src: handle_user_provided_file(source=src, dest=dest) resize_image(dest, width=width, height=height, method="thumbnail") if self.main_color and not is_hex_color(self.main_color): raise ValueError( f"--main-color is not a valid hex color: {self.main_color}") if self.secondary_color and not is_hex_color(self.secondary_color): raise ValueError( f"--secondary_color-color is not a valid hex color: {self.secondary_color}" ) if self.about: handle_user_provided_file(source=self.about, dest=self.build_dir / "about.html")
def add_illustration(self, record=None): if self.favicon_url in self.indexed_urls: return # add illustration from favicon option or in-warc favicon logger.info("Adding illustration from " + (self.favicon_url if record is None else "WARC")) favicon_fname = pathlib.Path(urlparse(self.favicon_url).path).name src_illus_fpath = pathlib.Path(".").joinpath(favicon_fname) # reusing payload from WARC record if record: with open(src_illus_fpath, "wb") as fh: if hasattr(record, "buffered_stream"): record.buffered_stream.seek(0) fh.write(record.buffered_stream.read()) else: fh.write(record.content_stream().read()) # fetching online else: try: handle_user_provided_file(source=self.favicon_url, dest=src_illus_fpath) except Exception as exc: logger.warning( "Unable to retrieve favicon. " "ZIM won't have an illustration: {exc}".format(exc=exc)) return # convert to PNG (might already be PNG but it's OK) illus_fpath = src_illus_fpath.with_suffix(".png") convert_image(src_illus_fpath, illus_fpath) # resize to appropriate size (ZIM uses 48x48 so we double for retina) for size in (96, 48): resize_image(illus_fpath, width=size, height=size, method="thumbnail") with open(illus_fpath, "rb") as fh: self.creator.add_illustration(size, fh.read()) src_illus_fpath.unlink()
def sanitize_inputs(self): """input & metadata sanitation""" if self.conf.censor_words_list: words_list_fpath = self.build_dir.joinpath("words.list") handle_user_provided_file(source=self.conf.censor_words_list, dest=words_list_fpath) period = datetime.datetime.now().strftime("%Y-%m") if self.conf.fname: # make sure we were given a filename and not a path self.conf.fname = pathlib.Path( self.conf.fname.format(period=period)) if pathlib.Path(self.conf.fname.name) != self.conf.fname: raise ValueError( f"filename is not a filename: {self.conf.fname}") else: self.conf.fname = f"{self.conf.name}_{period}.zim" if not self.conf.title: self.conf.title = Global.site["LongName"] self.conf.title = self.conf.title.strip() if not self.conf.description: self.conf.description = Global.site["Tagline"] self.conf.description = self.conf.description.strip() if not self.conf.author: self.conf.author = "Stack Exchange" self.conf.author = self.conf.author.strip() if not self.conf.publisher: self.conf.publisher = "Openzim" self.conf.publisher = self.conf.publisher.strip() self.conf.tags = list( set(self.conf.tag + ["_category:stack_exchange", "stack_exchange"]))
def test_remote_dest(tmp_path, valid_http_url): dest = tmp_path / pathlib.Path(valid_http_url).name fpath = handle_user_provided_file(source=valid_http_url, dest=dest) assert fpath is not None assert fpath.exists() assert fpath == dest
def test_local_dest(tmp_path, png_image): dest = tmp_path / png_image.name fpath = handle_user_provided_file(source=str(png_image), dest=dest) assert fpath is not None assert fpath.exists() assert fpath == dest
def test_remote(valid_http_url): fpath = handle_user_provided_file(source=valid_http_url) assert fpath is not None assert fpath.exists() assert fpath.suffix == pathlib.Path(valid_http_url).suffix
def test_local_nocopy(png_image): fpath = handle_user_provided_file(source=str(png_image), nocopy=True) assert fpath is not None assert fpath.exists() assert str(fpath) == str(png_image)
def test_local_copy(png_image): fpath = handle_user_provided_file(source=str(png_image)) assert fpath is not None assert fpath.exists() assert fpath.suffix == png_image.suffix assert fpath.stat().st_size == png_image.stat().st_size
def test_missing_local(): with pytest.raises(IOError): handle_user_provided_file(source="/some/incorrect/path.txt")
def test_with_none(): assert handle_user_provided_file(source=None) is None
def __init__( self, archive, collection, nb_items_per_page, show_description, output_dir, no_zim, fname, debug, keep_build_dir, skip_download, language, locale_name, tags, name=None, title=None, description=None, creator=None, publisher=None, favicon=None, main_logo=None, secondary_logo=None, main_color=None, secondary_color=None, ): # options & zim params self.archive = archive self.collection = handle_user_provided_file(source=collection, nocopy=True) self.nb_items_per_page = nb_items_per_page self.show_author = True self.show_description = show_description self.fname = fname self.language = language self.tags = [t.strip() for t in tags.split(",")] self.title = title self.description = description self.creator = creator self.publisher = publisher self.name = name self.favicon = favicon self.main_logo = main_logo self.secondary_logo = secondary_logo self.main_color = main_color self.secondary_color = secondary_color # process-related self.output_dir = Path(output_dir).expanduser().resolve() self.period = datetime.datetime.now().strftime("%Y-%m") # debug/devel options self.no_zim = no_zim self.debug = debug self.keep_build_dir = keep_build_dir self.skip_download = skip_download self.build_dir = self.output_dir.joinpath("build") # store ZIM-related info self.zim_info = ZimInfo( language=language, tags=tags, title=title, description=description, creator=creator, publisher=publisher, name=name, scraper=SCRAPER, ) # set and record locale for translations locale_name = locale_name or get_language_details( self.language)["iso-639-1"] try: self.locale = setlocale(ROOT_DIR, locale_name) except locale.Error: logger.error( f"No locale for {locale_name}. Use --locale to specify it. defaulting to en_US" ) self.locale = setlocale(ROOT_DIR, "en")
def test_local_indir(tmp_path, png_image): fpath = handle_user_provided_file(source=str(png_image), in_dir=tmp_path) assert fpath is not None assert fpath.exists() assert fpath.parent == tmp_path
def test_remote_indir(tmp_path, valid_http_url): fpath = handle_user_provided_file(source=valid_http_url, in_dir=tmp_path) assert fpath is not None assert fpath.exists() assert fpath.parent == tmp_path
def test_empty_value(): assert handle_user_provided_file(source=" ") is None