示例#1
0
def extract_rdf_files(rdf_tarball, rdf_path):
    if path(rdf_path).exists():
        logger.info("\tRDF-files folder already exists in {}".format(rdf_path))
        return

    logger.info("\tExtracting {} into {}".format(rdf_tarball, rdf_path))

    # create destdir if not exists
    dest = path(rdf_path)
    dest.mkdir_p()

    cmd = "tar -C {dest} --strip-components 2 -x -f {tarb}".format(
        dest=rdf_path, tarb=rdf_tarball)
    exec_cmd(cmd)
    return
示例#2
0
    def optimize_epub(src, dst):
        logger.info("\t\tCreating ePUB at {}".format(dst))
        zipped_files = []
        # create temp directory to extract to
        tmpd = tempfile.mkdtemp(dir=TMP_FOLDER)
        with zipfile.ZipFile(src, 'r') as zf:
            zipped_files = zf.namelist()
            zf.extractall(tmpd)

        remove_cover = False
        for fname in zipped_files:
            fnp = os.path.join(tmpd, fname)
            if path(fname).ext in ('.png', '.jpeg', '.jpg', '.gif'):

                # special case to remove ugly cover
                if fname.endswith('cover.jpg') and is_bad_cover(fnp):
                    zipped_files.remove(fname)
                    remove_cover = True
                else:
                    optimize_image(path_for_cmd(fnp))

            if path(fname).ext in ('.htm', '.html'):
                f = open(fnp, 'r')
                html = update_html_for_static(book=book,
                                              html_content=f.read(),
                                              epub=True)
                f.close()
                with open(fnp, 'w') as f:
                    f.write(html)

            if path(fname).ext == '.ncx':
                pattern = "*** START: FULL LICENSE ***"
                f = open(fnp, 'r')
                ncx = f.read()
                f.close()
                soup = BeautifulSoup(ncx, ["lxml", "xml"])
                for tag in soup.findAll('text'):
                    if pattern in tag.text:
                        s = tag.parent.parent
                        s.decompose()
                        for s in s.next_siblings:
                            s.decompose()
                        s.next_sibling

                with open(fnp, 'w') as f:
                    f.write(soup.encode())

        # delete {id}/cover.jpg if exist and update {id}/content.opf
        if remove_cover:

            # remove cover
            path(os.path.join(tmpd, str(book.id), 'cover.jpg')).unlink_p()

            soup = None
            opff = os.path.join(tmpd, str(book.id), 'content.opf')
            if os.path.exists(opff):
                with open(opff, 'r') as fd:
                    soup = BeautifulSoup(fd.read(), ["lxml", "xml"])

                for elem in soup.findAll():
                    if getattr(elem, 'attrs', {}).get('href') == 'cover.jpg':
                        elem.decompose()

                with (open(opff, 'w')) as fd:
                    fd.write(soup.encode())

        with cd(tmpd):
            exec_cmd('zip -q0X "{dst}" mimetype'.format(dst=path_for_cmd(dst)))
            exec_cmd('zip -qXr9D "{dst}" {files}'.format(
                dst=path_for_cmd(dst),
                files=" ".join(
                    [f for f in zipped_files if not f == 'mimetype'])))

        path(tmpd).rmtree_p()
示例#3
0
 def optimize_jpeg(fpath):
     exec_cmd('jpegoptim --strip-all -m50 "{path}"'.format(path=fpath))
示例#4
0
 def optimize_png(fpath):
     pngquant = 'pngquant --nofs --force --ext=".png" "{path}"'
     advdef = 'advdef -z -4 -i 5 "{path}"'
     exec_cmd(pngquant.format(path=fpath))
     exec_cmd(advdef.format(path=fpath))
示例#5
0
 def optimize_gif(fpath):
     exec_cmd('gifsicle -O3 "{path}" -o "{path}"'.format(path=fpath))
示例#6
0
def build_zimfile(static_folder, zim_path=None,
                  languages=[], formats=[],
                  title=None, description=None,
                  only_books=[]):

    if not languages:
        languages = ['mul']

    languages.sort()
    formats.sort()

    if title is None:
        if len(languages) > 5:
            title = ("Project Gutenberg Library with {formats}"
                     .format(formats=",".join(formats)))
        else:
            title = ("Project Gutenberg Library ({langs}) with {formats}"
                     .format(langs=",".join(languages),
                             formats=",".join(formats)))

    logger.info("\tWritting ZIM for {}".format(title))

    if description is None:
        description = "The first producer of free ebooks"

    if zim_path is None:
        if len(languages) > 1:
            zim_path = "gutenberg_all_{date}.zim".format(
                    date=datetime.datetime.now().strftime('%m_%Y'))
        else:
            zim_path = "gutenberg_{lang}_all_{date}.zim".format(
                    lang=languages[0],
                    date=datetime.datetime.now().strftime('%Y-%m'))

    languages = [ISO_MATRIX.get(lang, lang) for lang in languages]
    languages.sort()

    context = {
        'languages': ','.join(languages),
        'title': title,
        'description': description,
        'creator': 'gutenberg.org',
        'publisher': 'Kiwix',

        'home': 'Home.html',
        'favicon': 'favicon.png',

        'static': static_folder,
        'zim': zim_path
    }

    cmd = ('zimwriterfs --welcome=\\"{home}\\" --favicon=\\"{favicon}\\" '
           '--language=\\"{languages}\\" --title=\\"{title}\\" '
           '--description=\\"{description}\\" '
           '--creator=\\"{creator}\\" --publisher=\\"{publisher}\\" \\"{static}\\" \\"{zim}\\"'
           .format(**context))

    logger.debug("\t\t{}".format(re.sub('\\\\"','"',cmd)))
    if exec_cmd(cmd):
        logger.info("Successfuly created ZIM file at {}".format(zim_path))
    else:
        logger.error("Unable to create ZIM file :(")
示例#7
0
    def optimize_epub(src, dst):
        logger.info("\t\tCreating ePUB at {}".format(dst))
        zipped_files = []
        # create temp directory to extract to
        tmpd = tempfile.mkdtemp(dir=TMP_FOLDER)
        with zipfile.ZipFile(src, 'r') as zf:
            zipped_files = zf.namelist()
            zf.extractall(tmpd)

        remove_cover = False
        for fname in zipped_files:
            fnp = os.path.join(tmpd, fname)
            if path(fname).ext in ('.png', '.jpeg', '.jpg', '.gif'):

                # special case to remove ugly cover
                if fname.endswith('cover.jpg') and is_bad_cover(fnp):
                    zipped_files.remove(fname)
                    remove_cover = True
                else:
                    optimize_image(path_for_cmd(fnp))

            if path(fname).ext in ('.htm', '.html'):
                f = open(fnp, 'r')
                html = update_html_for_static(book=book,
                                              html_content=f.read(),
                                              epub=True)
                f.close()
                with open(fnp, 'w') as f:
                    f.write(html)

            if path(fname).ext == '.ncx':
                pattern = "*** START: FULL LICENSE ***"
                f = open(fnp, 'r')
                ncx = f.read()
                f.close()
                soup = BeautifulSoup(ncx, ["lxml", "xml"])
                for tag in soup.findAll('text'):
                    if pattern in tag.text:
                        s = tag.parent.parent
                        s.decompose()
                        for s in s.next_siblings:
                            s.decompose()
                        s.next_sibling

                with open(fnp, 'w') as f:
                    f.write(soup.encode())

        # delete {id}/cover.jpg if exist and update {id}/content.opf
        if remove_cover:

            # remove cover
            path(os.path.join(tmpd, str(book.id), 'cover.jpg')).unlink_p()

            soup = None
            opff = os.path.join(tmpd, str(book.id), 'content.opf')
            if os.path.exists(opff):
                with open(opff, 'r') as fd:
                    soup = BeautifulSoup(fd.read(), ["lxml", "xml"])

                for elem in soup.findAll():
                    if getattr(elem, 'attrs', {}).get('href') == 'cover.jpg':
                        elem.decompose()

                with(open(opff, 'w')) as fd:
                    fd.write(soup.encode())

        with cd(tmpd):
            exec_cmd('zip -q0X "{dst}" mimetype'.format(dst=path_for_cmd(dst)))
            exec_cmd('zip -qXr9D "{dst}" {files}'
                     .format(dst=path_for_cmd(dst),
                             files=" ".join([f for f in zipped_files
                                             if not f == 'mimetype'])))

        path(tmpd).rmtree_p()
示例#8
0
 def optimize_jpeg(fpath):
     exec_cmd('jpegoptim --strip-all -m50 "{path}"'.format(path=fpath))
示例#9
0
 def optimize_png(fpath):
     pngquant = 'pngquant --nofs --force --ext=".png" "{path}"'
     advdef = 'advdef -z -4 -i 5 "{path}"'
     exec_cmd(pngquant.format(path=fpath))
     exec_cmd(advdef.format(path=fpath))
示例#10
0
 def optimize_gif(fpath):
     exec_cmd('gifsicle -O3 "{path}" -o "{path}"'.format(path=fpath))