示例#1
0
    def open(self, filename: str, html: bool = True):
        # <!DOCTYPE xdxf SYSTEM "http://xdxf.sourceforge.net/xdxf_lousy.dtd">
        from lxml import etree as ET
        self._filename = filename
        self._html = html
        if html:
            self._xdxf_to_html = xdxf_to_html_transformer()
        context = ET.iterparse(
            filename,
            events=("end", ),
        )
        for action, elem in context:
            if elem.tag in ("meta_info", "ar", "k", "abr", "dtrn"):
                break
            # every other tag before </meta_info> or </ar> is considered info
            if not elem.text:
                log.warn(f"empty tag <{elem.tag}>")
                continue
            key = self.infoKeyMap.get(elem.tag, elem.tag)
            self._glos.setInfo(key, elem.text)

        del context
        self._fileSize = os.path.getsize(filename)
        self._file = open(self._filename, mode="rb")
        self._glos.setDefaultDefiFormat("x")
示例#2
0
    def write(self) -> "Generator[None, BaseEntry, None]":
        global BeautifulSoup

        glos = self._glos
        cleanHTML = self._cleanHTML
        css = self._css
        xsl = self._xsl
        defaultPrefs = self._defaultPrefs
        prefsHTML = self._prefsHTML
        frontBackMatter = self._frontBackMatter
        jing = self._jing
        indexes = self._indexes

        xdxf_to_html = xdxf_to_html_transformer()

        if cleanHTML:
            if BeautifulSoup is None:
                loadBeautifulSoup()
            if BeautifulSoup is None:
                log.warning(
                    "cleanHTML option passed but BeautifulSoup not found. "
                    f"to fix this run "
                    f"`{pip} install lxml beautifulsoup4 html5lib`")
        else:
            BeautifulSoup = None

        dirname = self._dirname
        fileNameBase = basename(dirname).replace(".", "_")
        filePathBase = join(dirname, fileNameBase)
        # before chdir (outside indir block)
        css = abspath_or_None(css)
        xsl = abspath_or_None(xsl)
        prefsHTML = abspath_or_None(prefsHTML)
        frontBackMatter = abspath_or_None(frontBackMatter)

        generate_id = id_generator()
        generate_indexes = indexes_generator(indexes)

        myResDir = join(dirname, "OtherResources")
        if not isdir(myResDir):
            os.mkdir(myResDir)

        with open(filePathBase + ".xml", mode="w", encoding="utf-8") as toFile:
            write_header(glos, toFile, frontBackMatter)
            while True:
                entry = yield
                if entry is None:
                    break
                if entry.isData():
                    entry.save(myResDir)
                    continue

                words = entry.l_word
                word, alts = words[0], words[1:]
                defi = entry.defi

                long_title = _normalize.title_long(
                    _normalize.title(word, BeautifulSoup))
                if not long_title:
                    continue

                _id = next(generate_id)
                if BeautifulSoup:
                    title_attr = BeautifulSoup.dammit.EntitySubstitution\
                     .substitute_xml(long_title, True)
                else:
                    title_attr = str(long_title)

                content_title = long_title
                if entry.defiFormat == "x":
                    defi = xdxf_to_html(defi)
                    content_title = None
                content = prepare_content(content_title, defi, BeautifulSoup)

                toFile.write(f'<d:entry id="{_id}" d:title={title_attr}>\n' +
                             generate_indexes(long_title, alts, content,
                                              BeautifulSoup) + content +
                             "\n</d:entry>\n")

            toFile.write("</d:dictionary>\n")

        if xsl:
            shutil.copy(xsl, myResDir)

        if prefsHTML:
            shutil.copy(prefsHTML, myResDir)

        write_css(filePathBase + ".css", css)

        with open(join(dirname, "Makefile"), mode="w",
                  encoding="utf-8") as toFile:
            toFile.write(
                toStr(pkgutil.get_data(
                    __name__,
                    "templates/Makefile",
                )).format(dict_name=fileNameBase))

        copyright = glos.getInfo("copyright")
        if BeautifulSoup:
            # strip html tags
            copyright = str(
                BeautifulSoup.BeautifulSoup(copyright, features="lxml").text)

        # if DCSDictionaryXSL provided but DCSDictionaryDefaultPrefs <dict/> not
        # present in Info.plist, Dictionary.app will crash.
        with open(filePathBase + ".plist", mode="w",
                  encoding="utf-8") as toFile:
            frontMatterReferenceID = (
                "<key>DCSDictionaryFrontMatterReferenceID</key>\n"
                "\t<string>front_back_matter</string>"
                if frontBackMatter else "")
            toFile.write(
                toStr(pkgutil.get_data(
                    __name__,
                    "templates/Info.plist",
                )).format(
                    # identifier must be unique
                    CFBundleIdentifier=fileNameBase.replace(" ", ""),
                    CFBundleDisplayName=glos.getInfo("name"),
                    CFBundleName=fileNameBase,
                    DCSDictionaryCopyright=copyright,
                    DCSDictionaryManufacturerName=glos.getAuthor(),
                    DCSDictionaryXSL=basename(xsl) if xsl else "",
                    DCSDictionaryDefaultPrefs=format_default_prefs(
                        defaultPrefs),
                    DCSDictionaryPrefsHTML=basename(prefsHTML)
                    if prefsHTML else "",
                    DCSDictionaryFrontMatterReferenceID=frontMatterReferenceID,
                ))

        if jing:
            from .jing import run as jing_run
            jing_run(filePathBase + ".xml")
示例#3
0
def write(
        glos: GlossaryType,
        dirname: str,
        cleanHTML: bool = True,
        css: str = "",
        xsl: str = "",
        defaultPrefs: Optional[Dict] = None,
        prefsHTML: str = "",
        frontBackMatter: str = "",
        jing: bool = False,
        indexes: str = "",  # FIXME: rename to indexes_lang?
):
    """
	write glossary to Apple dictionary .xml and supporting files.

	:type glos: pyglossary.glossary.Glossary
	:type dirname: str, directory path, must not have extension

	:type cleanHTML: bool
	:param cleanHTML: pass True to use BeautifulSoup parser.

	:type css: str
	:param css: path to custom .css file

	:type xsl: str
	:param xsl: path to custom XSL transformations file.

	:type defaultPrefs: dict or None
	:param defaultPrefs: Default prefs in python dictionary literal format,
	i.e. {"key1": "value1", "key2": "value2", ...}.  All keys and values must
	be quoted strings; not allowed characters (e.g. single/double quotes,
	equal sign "=", semicolon) must be escaped as hex code according to
	python string literal rules.

	:type prefsHTML: str
	:param prefsHTML: path to XHTML file with user interface for dictionary's
	preferences.  refer to Apple's documentation for details.

	:type frontBackMatter: str
	:param frontBackMatter: path to XML file with top-level tag
	<d:entry id="front_back_matter" d:title="Your Front/Back Matter Title">
		your front/back matter entry content
	</d:entry>

	:type jing: bool
	:param jing: pass True to run Jing check on generated XML.

	# FIXME: rename to indexes_lang?
	:type indexes: str
	:param indexes: Dictionary.app is dummy and by default it don't know
	how to perform flexible search.  we can help it by manually providing
	additional indexes to dictionary entries.
	"""
    global BeautifulSoup

    if not isdir(dirname):
        os.mkdir(dirname)

    xdxf_to_html = xdxf_to_html_transformer()

    if cleanHTML:
        if BeautifulSoup is None:
            loadBeautifulSoup()
        if BeautifulSoup is None:
            log.warning(
                "cleanHTML option passed but BeautifulSoup not found.  " +
                "to fix this run `sudo pip3 install lxml beautifulsoup4 html5lib`"
            )
    else:
        BeautifulSoup = None

    fileNameBase = basename(dirname).replace(".", "_")
    filePathBase = join(dirname, fileNameBase)
    # before chdir (outside indir block)
    css = abspath_or_None(css)
    xsl = abspath_or_None(xsl)
    prefsHTML = abspath_or_None(prefsHTML)
    frontBackMatter = abspath_or_None(frontBackMatter)

    generate_id = id_generator()
    generate_indexes = indexes_generator(indexes)

    glos.setDefaultDefiFormat("h")

    myResDir = join(dirname, "OtherResources")
    if not isdir(myResDir):
        os.mkdir(myResDir)

    with open(filePathBase + ".xml", "w", encoding="utf-8") as toFile:
        write_header(glos, toFile, frontBackMatter)
        for entryI, entry in enumerate(glos):
            if entry.isData():
                entry.save(myResDir)
                continue

            words = entry.l_word
            word, alts = words[0], words[1:]
            defi = entry.defi

            long_title = _normalize.title_long(
                _normalize.title(word, BeautifulSoup))
            if not long_title:
                continue

            _id = next(generate_id)
            if BeautifulSoup:
                title_attr = BeautifulSoup.dammit.EntitySubstitution\
                 .substitute_xml(long_title, True)
            else:
                title_attr = str(long_title)

            content_title = long_title
            if entry.defiFormat == "x":
                defi = xdxf_to_html(defi)
                content_title = None
            content = prepare_content(content_title, defi, BeautifulSoup)

            toFile.write(
                f'<d:entry id="{_id}" d:title={title_attr}>\n' +
                generate_indexes(long_title, alts, content, BeautifulSoup) +
                content + "\n</d:entry>\n")

        toFile.write("</d:dictionary>\n")

    if xsl:
        shutil.copy(xsl, myResDir)

    if prefsHTML:
        shutil.copy(prefsHTML, myResDir)

    write_css(filePathBase + ".css", css)

    with open(join(dirname, "Makefile"), "w") as toFile:
        toFile.write(
            toStr(pkgutil.get_data(
                __name__,
                "templates/Makefile",
            )).format(dict_name=fileNameBase))

    copyright = glos.getInfo("copyright")
    if BeautifulSoup:
        # strip html tags
        copyright = str(
            BeautifulSoup.BeautifulSoup(copyright, features="lxml").text)

    # if DCSDictionaryXSL provided but DCSDictionaryDefaultPrefs <dict/> not
    # present in Info.plist, Dictionary.app will crash.
    with open(filePathBase + ".plist", "w", encoding="utf-8") as toFile:
        frontMatterReferenceID = (
            "<key>DCSDictionaryFrontMatterReferenceID</key>\n"
            "\t<string>front_back_matter</string>" if frontBackMatter else "")
        toFile.write(
            toStr(pkgutil.get_data(
                __name__,
                "templates/Info.plist",
            )).format(
                # identifier must be unique
                CFBundleIdentifier=fileNameBase.replace(" ", ""),
                CFBundleDisplayName=glos.getInfo("name"),
                CFBundleName=fileNameBase,
                DCSDictionaryCopyright=copyright,
                DCSDictionaryManufacturerName=glos.getAuthor(),
                DCSDictionaryXSL=basename(xsl) if xsl else "",
                DCSDictionaryDefaultPrefs=format_default_prefs(defaultPrefs),
                DCSDictionaryPrefsHTML=basename(prefsHTML)
                if prefsHTML else "",
                DCSDictionaryFrontMatterReferenceID=frontMatterReferenceID,
            ))

    if jing:
        from .jing import run as jing_run
        jing_run(filePathBase + ".xml")
示例#4
0
 def xdxf_setup(self):
     from pyglossary.xdxf_transform import xdxf_to_html_transformer
     self._xdxf_tr = xdxf_to_html_transformer()