Exemplo n.º 1
0
def test_creator_compression(fpath, lipsum_item):
    """make sure we can create ZIM files with various compression algorithms

    also makes sure we're getting different sizes using diffrent alg.
    based on a piece of text that should give different results"""
    filesizes = {}
    for comp in libzim.writer.Compression.__members__.keys():
        fpath_str = fpath.with_name(f"{fpath.name}_{comp}_str.zim")
        with Creator(fpath_str).config_compression(comp) as c:
            c.add_item(lipsum_item)

        fpath_val = fpath.with_name(f"{fpath.name}_{comp}_val.zim")
        comp_val = getattr(libzim.writer.Compression, comp)
        with Creator(fpath_val).config_compression(comp_val) as c:
            c.add_item(lipsum_item)

        assert Archive(fpath_str).checksum
        assert Archive(fpath_str).filesize == Archive(fpath_val).filesize
        filesizes[comp] = Archive(fpath_str).filesize

    for a, b in itertools.combinations(filesizes.keys(), 2):
        assert filesizes[a] != filesizes[b]

    # now don't specify
    with Creator(fpath) as c:
        c.add_item(lipsum_item)

    # default should be zstd
    assert Archive(fpath).filesize == filesizes["zstd"]
Exemplo n.º 2
0
def test_creator_badfilename(tmpdir):
    # lack of perm
    with pytest.raises(IOError):
        Creator("/root/test.zim")

    # forward slash points to non-existing folder
    with pytest.raises(IOError):
        Creator(tmpdir / "test/test.zim")
Exemplo n.º 3
0
def test_filename_param_types(tmpdir):
    path = tmpdir / "test.zim"
    with Creator(path, "welcome") as creator:
        assert creator.filename == path
        assert isinstance(creator.filename, pathlib.Path)
    with Creator(str(path), "welcome") as creator:
        assert creator.filename == path
        assert isinstance(creator.filename, pathlib.Path)
Exemplo n.º 4
0
def test_redirect_url(tmpdir):
    url = "A/welcome"
    redirect_url = "A/home"

    class RedirectArticle(SimpleArticle):
        def is_redirect(self):
            return True

        def get_redirect_url(self):
            return url

    path = tmpdir / "test.zim"
    with Creator(path, "welcome") as zim_creator:
        zim_creator.add_article(
            SimpleArticle(title="Hello",
                          mime_type="text/html",
                          content="",
                          url=url))
        zim_creator.add_article(
            RedirectArticle(content="",
                            title="",
                            mime_type="",
                            url=redirect_url))

    with File(path) as reader:
        assert reader.get_article(redirect_url).is_redirect
        assert reader.get_article(
            redirect_url).get_redirect_article().longurl == url
Exemplo n.º 5
0
def test_fileprovider(fpath, lipsum):
    lipsum_fpath = fpath.with_name("lipsum.html")
    with open(lipsum_fpath, "w") as fh:
        for _ in range(0, 10):
            fh.write(lipsum)

    item = StaticItem(path=HOME_PATH,
                      filepath=lipsum_fpath,
                      mimetype="text/html")
    assert HOME_PATH in str(item)
    assert item.get_title() in str(item)

    with Creator(fpath) as c:
        c.add_item(item)

    zim = Archive(fpath)
    with open(lipsum_fpath, "rb") as fh:
        assert bytes(
            zim.get_entry_by_path(HOME_PATH).get_item().content) == fh.read()

    # test feed streaming
    cp = item.get_contentprovider()
    b = cp.feed()
    while b.size():
        assert isinstance(b, Blob)
        b = cp.feed()
Exemplo n.º 6
0
def test_virtualmethods_int_exc(fpath):
    class AContentProvider:
        def get_size(self):
            return ""

        def feed(self):
            return Blob("")

    class AnItem:
        def get_path(self):
            return ""

        def get_title(self):
            return ""

        def get_mimetype(self):
            return ""

        def get_contentprovider(self):
            return AContentProvider()

    with Creator(fpath) as c:
        with pytest.raises(RuntimeError,
                           match="TypeError: an integer is required"):
            c.add_item(AnItem())
Exemplo n.º 7
0
def test_reimpfeed(fpath):
    class AContentProvider:
        def __init__(self):
            self.called = False

        def get_size(self):
            return 1

        def feed(self):
            if self.called:
                return Blob("")
            self.called = True
            return Blob("1")

    class AnItem:
        def get_path(self):
            return "-"

        def get_title(self):
            return ""

        def get_mimetype(self):
            return ""

        def get_contentprovider(self):
            return AContentProvider()

    with Creator(fpath) as c:
        c.add_item(AnItem())

    item = AnItem()
    cp = item.get_contentprovider()
    assert cp.get_size() == 1
    assert cp.feed().size() == 1
Exemplo n.º 8
0
def test_creator_indexing(fpath, lipsum_item, indexing, language, expected):
    fpath = "lolo.zim"
    with Creator(fpath).config_indexing(indexing, language) as c:
        c.add_item(lipsum_item)

    zim = Archive(fpath)
    assert zim.has_fulltext_index == indexing
Exemplo n.º 9
0
def test_compression_from_string(tmpdir, compression):
    with Creator(tmpdir / "test.zim", "home",
                 compression=compression) as zim_creator:
        zim_creator.add_article(
            SimpleArticle(title="Hello",
                          mime_type="text/html",
                          content="",
                          url="A/home"))
Exemplo n.º 10
0
def test_creator_metadata_nooverwrite(fpath, lipsum_item):
    with Creator(fpath) as c:
        c.add_item(lipsum_item)
        c.add_metadata("Key", "first")
        # re-setting a value prints a warning and ignore it
        c.add_metadata("Key", "second")
    zim = Archive(fpath)
    assert zim.get_metadata("Key").decode("UTF-8") == "first"
Exemplo n.º 11
0
def test_article_metadata(tmpdir, metadata):
    with Creator(
            str(tmpdir / "test.zim"),
            main_page="welcome",
            index_language="eng",
            min_chunk_size=2048,
    ) as zim_creator:
        zim_creator.update_metadata(**metadata)
        assert zim_creator._metadata == metadata
Exemplo n.º 12
0
def test_creator_mainpath(fpath, lipsum_item):
    main_path = HOME_PATH
    with Creator(fpath).set_mainpath(main_path) as c:
        c.add_item(lipsum_item)

    zim = Archive(fpath)
    assert zim.has_main_entry is True
    assert zim.main_entry.path == "mainPage"
    assert zim.main_entry.get_item().path == main_path

    fpath.unlink()

    with Creator(fpath) as c:
        c.add_item(lipsum_item)
    zim = Archive(fpath)
    assert zim.has_main_entry is False
    with pytest.raises(RuntimeError):
        assert zim.main_entry
Exemplo n.º 13
0
def test_article_overriding_required(tmpdir, monkeypatch, no_method):
    """ ensure we raise properly on not-implemented methods of Article """

    path, main_page = tmpdir / "test.zim", "welcome"
    pattern = re.compile(r"NotImplementedError.+must be implemented")
    monkeypatch.delattr(OverridenArticle, no_method)

    with pytest.raises(RuntimeError, match=pattern):
        with Creator(path, main_page) as zim_creator:
            zim_creator.add_article(OverridenArticle(no_method))
Exemplo n.º 14
0
def test_creator_faviconpath(fpath, favicon_data):
    favicon_path = HOME_PATH
    favicon_item = StaticItem(mimetype="image/png",
                              path=favicon_path,
                              content=favicon_data)
    with Creator(fpath).set_faviconpath(favicon_path) as c:
        c.add_item(favicon_item)

    zim = Archive(fpath)
    assert zim.has_favicon_entry is True
    assert zim.favicon_entry.path == "favicon"
    assert zim.favicon_entry.get_item().path == favicon_path

    fpath.unlink()

    with Creator(fpath) as c:
        c.add_item(favicon_item)
    zim = Archive(fpath)
    assert zim.has_favicon_entry is False
    with pytest.raises(RuntimeError):
        assert zim.favicon_entry
Exemplo n.º 15
0
def test_noleftbehind_empty(tmpdir):
    """ assert that ZIM with no articles don't leave files behind #41 """
    fname = "test_empty.zim"
    with Creator(
            tmpdir / fname,
            main_page="welcome",
            index_language="eng",
            min_chunk_size=2048,
    ) as zim_creator:
        print(zim_creator)

    assert len([p for p in tmpdir.listdir()
                if p.basename.startswith(fname)]) == 1
Exemplo n.º 16
0
def test_missing_contentprovider(fpath):
    class AnItem:
        def get_path(self):
            return ""

        def get_title(self):
            return ""

        def get_mimetype(self):
            return ""

    with Creator(fpath) as c:
        with pytest.raises(RuntimeError, match="has no attribute"):
            c.add_item(AnItem())
Exemplo n.º 17
0
def test_in_article_exceptions(tmpdir):
    """ make sure we raise RuntimeError from article's virtual methods """
    class BoolErrorArticle(SimpleArticle):
        def is_redirect(self):
            raise RuntimeError("OUPS Redirect")

    class StringErrorArticle(SimpleArticle):
        def get_url(self):
            raise IOError

    class BlobErrorArticle(SimpleArticle):
        def get_data(self):
            raise IOError

    path, main_page = tmpdir / "test.zim", "welcome"
    args = {
        "title": "Hello",
        "mime_type": "text/html",
        "content": "",
        "url": "welcome"
    }

    with Creator(path, main_page) as zim_creator:
        # make sure we can can exception of all types (except int, not used)
        with pytest.raises(RuntimeError, match="OUPS Redirect"):
            zim_creator.add_article(BoolErrorArticle(**args))
        with pytest.raises(RuntimeError, match="in get_url"):
            zim_creator.add_article(StringErrorArticle(**args))
        with pytest.raises(RuntimeError, match="IOError"):
            zim_creator.add_article(BlobErrorArticle(**args))
        with pytest.raises(RuntimeError, match="NotImplementedError"):
            zim_creator.add_article(Article())

    # make sure we can catch it from outside creator
    with pytest.raises(RuntimeError):
        with Creator(path, main_page) as zim_creator:
            zim_creator.add_article(BlobErrorArticle(**args))
Exemplo n.º 18
0
def test_write_article(tmpdir, article):
    with Creator(
            str(tmpdir / "test.zim"),
            main_page="welcome",
            index_language="eng",
            min_chunk_size=2048,
    ) as zim_creator:
        zim_creator.add_article(article)
        zim_creator.update_metadata(
            creator="python-libzim",
            description="Created in python",
            name="Hola",
            publisher="Monadical",
            title="Test Zim",
        )
Exemplo n.º 19
0
def test_check_mandatory_metadata(tmpdir):
    with Creator(
            str(tmpdir / "test.zim"),
            main_page="welcome",
            index_language="eng",
            min_chunk_size=2048,
    ) as zim_creator:
        assert not zim_creator.mandatory_metadata_ok()
        zim_creator.update_metadata(
            creator="python-libzim",
            description="Created in python",
            name="Hola",
            publisher="Monadical",
            title="Test Zim",
        )
        assert zim_creator.mandatory_metadata_ok()
Exemplo n.º 20
0
def test_item_contentprovider_none(fpath):
    class AnItem:
        def get_path(self):
            return ""

        def get_title(self):
            return ""

        def get_mimetype(self):
            return ""

        def get_contentprovider(self):
            return ""

    with Creator(fpath) as c:
        with pytest.raises(RuntimeError, match="ContentProvider is None"):
            c.add_item(AnItem())
Exemplo n.º 21
0
def test_filename_article(tmpdir):
    class FileArticle(Article):
        def __init__(self, fpath, url):
            super().__init__()
            self.fpath = fpath
            self.url = url

        def is_redirect(self):
            return False

        def get_url(self):
            return self.url

        def get_title(self):
            return ""

        def get_mime_type(self):
            return "text/plain"

        def get_filename(self):
            return str(self.fpath)

        def should_compress(self):
            return True

        def should_index(self):
            return True

        def get_size(self):
            return self.fpath.stat().size

    zim_path = tmpdir / "test.zim"
    article_path = tmpdir / "test.txt"
    article_url = "A/home"
    content = b"abc"

    # write content to physical file
    with open(article_path, "wb") as fh:
        fh.write(content)

    with Creator(zim_path, "home") as zim_creator:
        zim_creator.add_article(FileArticle(article_path, article_url))

    # ensure size on reader is correct
    with File(zim_path) as reader:
        assert reader.get_article(article_url).content.nbytes == len(content)
Exemplo n.º 22
0
def test_stringprovider(fpath, lipsum):
    item = StaticItem(path=HOME_PATH, content=lipsum, mimetype="text/html")
    assert HOME_PATH in str(item)
    assert item.get_title() in str(item)

    with Creator(fpath) as c:
        c.add_item(item)

    zim = Archive(fpath)
    assert bytes(zim.get_entry_by_path(
        HOME_PATH).get_item().content) == lipsum.encode("UTF-8")

    # test feed streaming
    cp = item.get_contentprovider()
    b = cp.feed()
    while b.size():
        assert isinstance(b, Blob)
        b = cp.feed()
Exemplo n.º 23
0
def test_creator_params(tmpdir):
    path = tmpdir / "test.zim"
    main_page = "welcome"
    main_page_url = f"A/{main_page}"
    index_language = "eng"
    with Creator(path,
                 main_page=main_page_url,
                 index_language=index_language,
                 min_chunk_size=2048) as zim_creator:
        zim_creator.add_article(
            SimpleArticle(title="Welcome",
                          mime_type="text/html",
                          content="",
                          url=main_page_url))

    zim = File(path)
    assert zim.filename == path
    assert zim.main_page_url == main_page_url
    assert bytes(zim.get_article("/M/Language").content).decode(
        "UTF-8") == index_language
Exemplo n.º 24
0
                <title>Monadical</title>
                </head>
                <h1> ñññ Hello, it works ñññ </h1></html>"""

content2 = """<!DOCTYPE html>
                <html class="client-js">
                <head><meta charset="UTF-8">
                <title>Monadical 2</title>
                </head>
                <h1> ñññ Hello, it works 2 ñññ </h1></html>"""

article = TestArticle("Monadical_SAS", "Monadical", content)
article2 = TestArticle("Monadical_2", "Monadical 2", content2)

rnd_str = str(uuid.uuid1())
zim_file_path = f"kiwix-test-{rnd_str}.zim"

with Creator(zim_file_path,
             main_page="Monadical",
             index_language="eng",
             min_chunk_size=2048) as zc:
    zc.add_article(article)
    zc.add_article(article2)
    zc.update_metadata(
        creator="python-libzim",
        description="Created in python",
        name="Hola",
        publisher="Monadical",
        title="Test Zim",
    )
Exemplo n.º 25
0
content2 = """<!DOCTYPE html>
<html class="client-js">
<head><meta charset="UTF-8">
<title>Monadical 2</title>
</head>
<h1> ñññ Hello, it works 2 ñññ </h1></html>"""

item = TestItem("Monadical_SAS", "Monadical", content)
item2 = TestItem("Monadical_2", "Monadical 2", content2)

zim_file_path = f"kiwix-test-{uuid.uuid1()}.zim"

print(f"Testing writer for {zim_file_path}")
with Creator(zim_file_path).config_indexing(True, "eng").config_minclustersize(
    512
) as zc:
    zc.set_mainpath("Monadical")
    zc.add_item(item)
    zc.add_item(item2)
    for name, value in {
        "creator": "python-libzim",
        "description": "Created in python",
        "name": "Hola",
        "publisher": "Monadical",
        "title": "Test Zim",
    }.items():

        zc.add_metadata(name.title(), value.encode("UTF-8"))

Exemplo n.º 26
0
def test_creator_config_poststart(fpath, name, args):
    with Creator(fpath) as c:
        with pytest.raises(RuntimeError, match="started"):
            getattr(c, f"config_{name}")(*args)
Exemplo n.º 27
0
def test_creator_nocontext(fpath, lipsum_item):
    """ensure we can use the creator linearily"""
    creator = Creator(fpath)
    exc_type, exc_val, exc_tb = None, None, None

    creator.__enter__()
    creator.add_metadata("Name", "name")
    creator.add_item(lipsum_item)
    with pytest.raises(RuntimeError):
        creator.config_verbose(True)
    creator.__exit__(exc_type, exc_val, exc_tb)

    # now with an exception
    creator = Creator(fpath)
    creator.__enter__()
    creator.add_item(lipsum_item)
    try:
        creator.add_redirection("A", HOME_PATH)
    except Exception:
        exc_type, exc_val, exc_tb = sys.exc_info()
        with pytest.raises(TypeError):
            raise
    creator.__exit__(exc_type, exc_val, exc_tb)
Exemplo n.º 28
0
def test_creator_additem(fpath, lipsum_item):
    # ensure we can't add if not started
    c = Creator(fpath)
    with pytest.raises(RuntimeError, match="not started"):
        c.add_item(lipsum_item)
    del c

    with Creator(fpath) as c:
        c.add_item(lipsum_item)
        with pytest.raises(TypeError, match="must not be None"):
            c.add_item(None)
        with pytest.raises(RuntimeError):
            c.add_item("hello")
        with pytest.raises(TypeError, match="takes no keyword arguments"):
            c.add_item(mimetype="text/html")
Exemplo n.º 29
0
def test_creator_metadata(fpath, lipsum_item):
    metadata = {
        # kiwix-mandatory
        "Name":
        "wikipedia_fr_football",
        "Title":
        "English Wikipedia",
        "Creator":
        "English speaking Wikipedia contributors",
        "Publisher":
        "Wikipedia user Foobar",
        "Date":
        "2009-11-21",
        "Description":
        "All articles (without images) from the english Wikipedia",
        "Language":
        "eng",
        # optional
        "Longdescription":
        ("This ZIM file contains all articles (without images) "
         "from the english Wikipedia by 2009-11-10."
         " The topics are ..."),
        "Licence":
        "CC-BY",
        "Tags":
        "wikipedia;_category:wikipedia;_pictures:no;"
        "_videos:no;_details:yes;_ftindex:yes",
        "Flavour":
        "nopic",
        "Source":
        "https://en.wikipedia.org/",
        "Counter":
        "image/jpeg=5;image/gif=3;image/png=2",
        "Scraper":
        "sotoki 1.2.3",
    }

    # ensure we can't add if not started
    c = Creator(fpath)
    with pytest.raises(RuntimeError, match="not started"):
        key = next(iter(metadata.keys()))
        c.add_metadata(key, metadata.get(key))
    del c

    with Creator(fpath) as c:
        c.add_item(lipsum_item)
        for name, value in metadata.items():
            if name == "Date":
                continue
            c.add_metadata(name, value)

        mdate = datetime.date(
            *[int(x) for x in metadata.get("Date").split("-")])
        c.add_metadata("Date", mdate)

    zim = Archive(fpath)
    for name, value in metadata.items():
        assert zim.get_metadata(name).decode("UTF-8") == value
Exemplo n.º 30
0
def test_creator_redirection(fpath, lipsum_item):
    # ensure we can't add if not started
    c = Creator(fpath)
    with pytest.raises(RuntimeError, match="not started"):
        c.add_redirection("home", "hello", HOME_PATH)
    del c

    with Creator(fpath) as c:
        c.add_item(lipsum_item)
        c.add_redirection("home", "hello", HOME_PATH)
        c.add_redirection("accueil", "bonjour", HOME_PATH)

    zim = Archive(fpath)
    assert zim.entry_count == 3
    assert zim.has_entry_by_path("home") is True
    assert zim.has_entry_by_path("accueil") is True
    assert zim.get_entry_by_path("home").is_redirect
    assert (zim.get_entry_by_path("home").get_redirect_entry().path ==
            zim.get_entry_by_path(HOME_PATH).path)
    assert zim.get_entry_by_path("accueil").get_item().path == HOME_PATH
    assert "home" in list(zim.suggest("hello"))
    assert "accueil" in list(zim.suggest("bonjour"))