def test_creator_compression(fpath, lipsum_item): """make sure we can create ZIM files with various compression algorithms also makes sure we're getting different sizes using diffrent alg. based on a piece of text that should give different results""" filesizes = {} for comp in libzim.writer.Compression.__members__.keys(): fpath_str = fpath.with_name(f"{fpath.name}_{comp}_str.zim") with Creator(fpath_str).config_compression(comp) as c: c.add_item(lipsum_item) fpath_val = fpath.with_name(f"{fpath.name}_{comp}_val.zim") comp_val = getattr(libzim.writer.Compression, comp) with Creator(fpath_val).config_compression(comp_val) as c: c.add_item(lipsum_item) assert Archive(fpath_str).checksum assert Archive(fpath_str).filesize == Archive(fpath_val).filesize filesizes[comp] = Archive(fpath_str).filesize for a, b in itertools.combinations(filesizes.keys(), 2): assert filesizes[a] != filesizes[b] # now don't specify with Creator(fpath) as c: c.add_item(lipsum_item) # default should be zstd assert Archive(fpath).filesize == filesizes["zstd"]
def test_creator_badfilename(tmpdir): # lack of perm with pytest.raises(IOError): Creator("/root/test.zim") # forward slash points to non-existing folder with pytest.raises(IOError): Creator(tmpdir / "test/test.zim")
def test_filename_param_types(tmpdir): path = tmpdir / "test.zim" with Creator(path, "welcome") as creator: assert creator.filename == path assert isinstance(creator.filename, pathlib.Path) with Creator(str(path), "welcome") as creator: assert creator.filename == path assert isinstance(creator.filename, pathlib.Path)
def test_redirect_url(tmpdir): url = "A/welcome" redirect_url = "A/home" class RedirectArticle(SimpleArticle): def is_redirect(self): return True def get_redirect_url(self): return url path = tmpdir / "test.zim" with Creator(path, "welcome") as zim_creator: zim_creator.add_article( SimpleArticle(title="Hello", mime_type="text/html", content="", url=url)) zim_creator.add_article( RedirectArticle(content="", title="", mime_type="", url=redirect_url)) with File(path) as reader: assert reader.get_article(redirect_url).is_redirect assert reader.get_article( redirect_url).get_redirect_article().longurl == url
def test_fileprovider(fpath, lipsum): lipsum_fpath = fpath.with_name("lipsum.html") with open(lipsum_fpath, "w") as fh: for _ in range(0, 10): fh.write(lipsum) item = StaticItem(path=HOME_PATH, filepath=lipsum_fpath, mimetype="text/html") assert HOME_PATH in str(item) assert item.get_title() in str(item) with Creator(fpath) as c: c.add_item(item) zim = Archive(fpath) with open(lipsum_fpath, "rb") as fh: assert bytes( zim.get_entry_by_path(HOME_PATH).get_item().content) == fh.read() # test feed streaming cp = item.get_contentprovider() b = cp.feed() while b.size(): assert isinstance(b, Blob) b = cp.feed()
def test_virtualmethods_int_exc(fpath): class AContentProvider: def get_size(self): return "" def feed(self): return Blob("") class AnItem: def get_path(self): return "" def get_title(self): return "" def get_mimetype(self): return "" def get_contentprovider(self): return AContentProvider() with Creator(fpath) as c: with pytest.raises(RuntimeError, match="TypeError: an integer is required"): c.add_item(AnItem())
def test_reimpfeed(fpath): class AContentProvider: def __init__(self): self.called = False def get_size(self): return 1 def feed(self): if self.called: return Blob("") self.called = True return Blob("1") class AnItem: def get_path(self): return "-" def get_title(self): return "" def get_mimetype(self): return "" def get_contentprovider(self): return AContentProvider() with Creator(fpath) as c: c.add_item(AnItem()) item = AnItem() cp = item.get_contentprovider() assert cp.get_size() == 1 assert cp.feed().size() == 1
def test_creator_indexing(fpath, lipsum_item, indexing, language, expected): fpath = "lolo.zim" with Creator(fpath).config_indexing(indexing, language) as c: c.add_item(lipsum_item) zim = Archive(fpath) assert zim.has_fulltext_index == indexing
def test_compression_from_string(tmpdir, compression): with Creator(tmpdir / "test.zim", "home", compression=compression) as zim_creator: zim_creator.add_article( SimpleArticle(title="Hello", mime_type="text/html", content="", url="A/home"))
def test_creator_metadata_nooverwrite(fpath, lipsum_item): with Creator(fpath) as c: c.add_item(lipsum_item) c.add_metadata("Key", "first") # re-setting a value prints a warning and ignore it c.add_metadata("Key", "second") zim = Archive(fpath) assert zim.get_metadata("Key").decode("UTF-8") == "first"
def test_article_metadata(tmpdir, metadata): with Creator( str(tmpdir / "test.zim"), main_page="welcome", index_language="eng", min_chunk_size=2048, ) as zim_creator: zim_creator.update_metadata(**metadata) assert zim_creator._metadata == metadata
def test_creator_mainpath(fpath, lipsum_item): main_path = HOME_PATH with Creator(fpath).set_mainpath(main_path) as c: c.add_item(lipsum_item) zim = Archive(fpath) assert zim.has_main_entry is True assert zim.main_entry.path == "mainPage" assert zim.main_entry.get_item().path == main_path fpath.unlink() with Creator(fpath) as c: c.add_item(lipsum_item) zim = Archive(fpath) assert zim.has_main_entry is False with pytest.raises(RuntimeError): assert zim.main_entry
def test_article_overriding_required(tmpdir, monkeypatch, no_method): """ ensure we raise properly on not-implemented methods of Article """ path, main_page = tmpdir / "test.zim", "welcome" pattern = re.compile(r"NotImplementedError.+must be implemented") monkeypatch.delattr(OverridenArticle, no_method) with pytest.raises(RuntimeError, match=pattern): with Creator(path, main_page) as zim_creator: zim_creator.add_article(OverridenArticle(no_method))
def test_creator_faviconpath(fpath, favicon_data): favicon_path = HOME_PATH favicon_item = StaticItem(mimetype="image/png", path=favicon_path, content=favicon_data) with Creator(fpath).set_faviconpath(favicon_path) as c: c.add_item(favicon_item) zim = Archive(fpath) assert zim.has_favicon_entry is True assert zim.favicon_entry.path == "favicon" assert zim.favicon_entry.get_item().path == favicon_path fpath.unlink() with Creator(fpath) as c: c.add_item(favicon_item) zim = Archive(fpath) assert zim.has_favicon_entry is False with pytest.raises(RuntimeError): assert zim.favicon_entry
def test_noleftbehind_empty(tmpdir): """ assert that ZIM with no articles don't leave files behind #41 """ fname = "test_empty.zim" with Creator( tmpdir / fname, main_page="welcome", index_language="eng", min_chunk_size=2048, ) as zim_creator: print(zim_creator) assert len([p for p in tmpdir.listdir() if p.basename.startswith(fname)]) == 1
def test_missing_contentprovider(fpath): class AnItem: def get_path(self): return "" def get_title(self): return "" def get_mimetype(self): return "" with Creator(fpath) as c: with pytest.raises(RuntimeError, match="has no attribute"): c.add_item(AnItem())
def test_in_article_exceptions(tmpdir): """ make sure we raise RuntimeError from article's virtual methods """ class BoolErrorArticle(SimpleArticle): def is_redirect(self): raise RuntimeError("OUPS Redirect") class StringErrorArticle(SimpleArticle): def get_url(self): raise IOError class BlobErrorArticle(SimpleArticle): def get_data(self): raise IOError path, main_page = tmpdir / "test.zim", "welcome" args = { "title": "Hello", "mime_type": "text/html", "content": "", "url": "welcome" } with Creator(path, main_page) as zim_creator: # make sure we can can exception of all types (except int, not used) with pytest.raises(RuntimeError, match="OUPS Redirect"): zim_creator.add_article(BoolErrorArticle(**args)) with pytest.raises(RuntimeError, match="in get_url"): zim_creator.add_article(StringErrorArticle(**args)) with pytest.raises(RuntimeError, match="IOError"): zim_creator.add_article(BlobErrorArticle(**args)) with pytest.raises(RuntimeError, match="NotImplementedError"): zim_creator.add_article(Article()) # make sure we can catch it from outside creator with pytest.raises(RuntimeError): with Creator(path, main_page) as zim_creator: zim_creator.add_article(BlobErrorArticle(**args))
def test_write_article(tmpdir, article): with Creator( str(tmpdir / "test.zim"), main_page="welcome", index_language="eng", min_chunk_size=2048, ) as zim_creator: zim_creator.add_article(article) zim_creator.update_metadata( creator="python-libzim", description="Created in python", name="Hola", publisher="Monadical", title="Test Zim", )
def test_check_mandatory_metadata(tmpdir): with Creator( str(tmpdir / "test.zim"), main_page="welcome", index_language="eng", min_chunk_size=2048, ) as zim_creator: assert not zim_creator.mandatory_metadata_ok() zim_creator.update_metadata( creator="python-libzim", description="Created in python", name="Hola", publisher="Monadical", title="Test Zim", ) assert zim_creator.mandatory_metadata_ok()
def test_item_contentprovider_none(fpath): class AnItem: def get_path(self): return "" def get_title(self): return "" def get_mimetype(self): return "" def get_contentprovider(self): return "" with Creator(fpath) as c: with pytest.raises(RuntimeError, match="ContentProvider is None"): c.add_item(AnItem())
def test_filename_article(tmpdir): class FileArticle(Article): def __init__(self, fpath, url): super().__init__() self.fpath = fpath self.url = url def is_redirect(self): return False def get_url(self): return self.url def get_title(self): return "" def get_mime_type(self): return "text/plain" def get_filename(self): return str(self.fpath) def should_compress(self): return True def should_index(self): return True def get_size(self): return self.fpath.stat().size zim_path = tmpdir / "test.zim" article_path = tmpdir / "test.txt" article_url = "A/home" content = b"abc" # write content to physical file with open(article_path, "wb") as fh: fh.write(content) with Creator(zim_path, "home") as zim_creator: zim_creator.add_article(FileArticle(article_path, article_url)) # ensure size on reader is correct with File(zim_path) as reader: assert reader.get_article(article_url).content.nbytes == len(content)
def test_stringprovider(fpath, lipsum): item = StaticItem(path=HOME_PATH, content=lipsum, mimetype="text/html") assert HOME_PATH in str(item) assert item.get_title() in str(item) with Creator(fpath) as c: c.add_item(item) zim = Archive(fpath) assert bytes(zim.get_entry_by_path( HOME_PATH).get_item().content) == lipsum.encode("UTF-8") # test feed streaming cp = item.get_contentprovider() b = cp.feed() while b.size(): assert isinstance(b, Blob) b = cp.feed()
def test_creator_params(tmpdir): path = tmpdir / "test.zim" main_page = "welcome" main_page_url = f"A/{main_page}" index_language = "eng" with Creator(path, main_page=main_page_url, index_language=index_language, min_chunk_size=2048) as zim_creator: zim_creator.add_article( SimpleArticle(title="Welcome", mime_type="text/html", content="", url=main_page_url)) zim = File(path) assert zim.filename == path assert zim.main_page_url == main_page_url assert bytes(zim.get_article("/M/Language").content).decode( "UTF-8") == index_language
<title>Monadical</title> </head> <h1> ñññ Hello, it works ñññ </h1></html>""" content2 = """<!DOCTYPE html> <html class="client-js"> <head><meta charset="UTF-8"> <title>Monadical 2</title> </head> <h1> ñññ Hello, it works 2 ñññ </h1></html>""" article = TestArticle("Monadical_SAS", "Monadical", content) article2 = TestArticle("Monadical_2", "Monadical 2", content2) rnd_str = str(uuid.uuid1()) zim_file_path = f"kiwix-test-{rnd_str}.zim" with Creator(zim_file_path, main_page="Monadical", index_language="eng", min_chunk_size=2048) as zc: zc.add_article(article) zc.add_article(article2) zc.update_metadata( creator="python-libzim", description="Created in python", name="Hola", publisher="Monadical", title="Test Zim", )
content2 = """<!DOCTYPE html> <html class="client-js"> <head><meta charset="UTF-8"> <title>Monadical 2</title> </head> <h1> ñññ Hello, it works 2 ñññ </h1></html>""" item = TestItem("Monadical_SAS", "Monadical", content) item2 = TestItem("Monadical_2", "Monadical 2", content2) zim_file_path = f"kiwix-test-{uuid.uuid1()}.zim" print(f"Testing writer for {zim_file_path}") with Creator(zim_file_path).config_indexing(True, "eng").config_minclustersize( 512 ) as zc: zc.set_mainpath("Monadical") zc.add_item(item) zc.add_item(item2) for name, value in { "creator": "python-libzim", "description": "Created in python", "name": "Hola", "publisher": "Monadical", "title": "Test Zim", }.items(): zc.add_metadata(name.title(), value.encode("UTF-8"))
def test_creator_config_poststart(fpath, name, args): with Creator(fpath) as c: with pytest.raises(RuntimeError, match="started"): getattr(c, f"config_{name}")(*args)
def test_creator_nocontext(fpath, lipsum_item): """ensure we can use the creator linearily""" creator = Creator(fpath) exc_type, exc_val, exc_tb = None, None, None creator.__enter__() creator.add_metadata("Name", "name") creator.add_item(lipsum_item) with pytest.raises(RuntimeError): creator.config_verbose(True) creator.__exit__(exc_type, exc_val, exc_tb) # now with an exception creator = Creator(fpath) creator.__enter__() creator.add_item(lipsum_item) try: creator.add_redirection("A", HOME_PATH) except Exception: exc_type, exc_val, exc_tb = sys.exc_info() with pytest.raises(TypeError): raise creator.__exit__(exc_type, exc_val, exc_tb)
def test_creator_additem(fpath, lipsum_item): # ensure we can't add if not started c = Creator(fpath) with pytest.raises(RuntimeError, match="not started"): c.add_item(lipsum_item) del c with Creator(fpath) as c: c.add_item(lipsum_item) with pytest.raises(TypeError, match="must not be None"): c.add_item(None) with pytest.raises(RuntimeError): c.add_item("hello") with pytest.raises(TypeError, match="takes no keyword arguments"): c.add_item(mimetype="text/html")
def test_creator_metadata(fpath, lipsum_item): metadata = { # kiwix-mandatory "Name": "wikipedia_fr_football", "Title": "English Wikipedia", "Creator": "English speaking Wikipedia contributors", "Publisher": "Wikipedia user Foobar", "Date": "2009-11-21", "Description": "All articles (without images) from the english Wikipedia", "Language": "eng", # optional "Longdescription": ("This ZIM file contains all articles (without images) " "from the english Wikipedia by 2009-11-10." " The topics are ..."), "Licence": "CC-BY", "Tags": "wikipedia;_category:wikipedia;_pictures:no;" "_videos:no;_details:yes;_ftindex:yes", "Flavour": "nopic", "Source": "https://en.wikipedia.org/", "Counter": "image/jpeg=5;image/gif=3;image/png=2", "Scraper": "sotoki 1.2.3", } # ensure we can't add if not started c = Creator(fpath) with pytest.raises(RuntimeError, match="not started"): key = next(iter(metadata.keys())) c.add_metadata(key, metadata.get(key)) del c with Creator(fpath) as c: c.add_item(lipsum_item) for name, value in metadata.items(): if name == "Date": continue c.add_metadata(name, value) mdate = datetime.date( *[int(x) for x in metadata.get("Date").split("-")]) c.add_metadata("Date", mdate) zim = Archive(fpath) for name, value in metadata.items(): assert zim.get_metadata(name).decode("UTF-8") == value
def test_creator_redirection(fpath, lipsum_item): # ensure we can't add if not started c = Creator(fpath) with pytest.raises(RuntimeError, match="not started"): c.add_redirection("home", "hello", HOME_PATH) del c with Creator(fpath) as c: c.add_item(lipsum_item) c.add_redirection("home", "hello", HOME_PATH) c.add_redirection("accueil", "bonjour", HOME_PATH) zim = Archive(fpath) assert zim.entry_count == 3 assert zim.has_entry_by_path("home") is True assert zim.has_entry_by_path("accueil") is True assert zim.get_entry_by_path("home").is_redirect assert (zim.get_entry_by_path("home").get_redirect_entry().path == zim.get_entry_by_path(HOME_PATH).path) assert zim.get_entry_by_path("accueil").get_item().path == HOME_PATH assert "home" in list(zim.suggest("hello")) assert "accueil" in list(zim.suggest("bonjour"))