예제 #1
0
파일: _dict.py 프로젝트: mozii/pyglossary
def write_entries(glos, f, cleanHTML, indexes):
    """
    :param indexes: str | None
    """
    if cleanHTML:
        BeautifulSoup = get_beautiful_soup()
        if not BeautifulSoup:
            log.warning(
                'cleanHTML option passed but BeautifulSoup not found.  ' +
                'to fix this run `sudo pip3 install lxml beautifulsoup4 html5lib`'
            )
    else:
        BeautifulSoup = None

    # write entries
    generate_id = id_generator()
    generate_indexes = indexes_generator(indexes)
    _buffer = ''

    xdxf.xdxf_init()

    glos.setDefaultDefiFormat('h')

    for i, entry in enumerate(glos):
        words = entry.getWords()
        word, alts = words[0], words[1:]
        defi = entry.getDefi()
        format = entry.getDefiFormat()
        
        long_title = _normalize.title_long(_normalize.title(word, BeautifulSoup))
        if not long_title:
            continue

        _id = next(generate_id)
        if BeautifulSoup:
            title_attr = BeautifulSoup.dammit.EntitySubstitution.substitute_xml(long_title, True)
        else:
            title_attr = '"%s"' % long_title

        begin_entry = '<d:entry id="%(id)s" d:title=%(title)s>\n' % {
            'id': _id,
            'title': title_attr,
        }

        if format == 'x':
            content = xdxf.xdxf_to_html(defi)
            content = format_clean_content(None, content, BeautifulSoup)
        else:
            content = defi
            content = format_clean_content(long_title, content, BeautifulSoup)

        indexes = generate_indexes(long_title, alts, content, BeautifulSoup)

        end_entry = '\n</d:entry>\n'

        _buffer += begin_entry
        _buffer += indexes
        _buffer += content
        _buffer += end_entry

        if i % 1000 == 0:
            f.write(_buffer)
            _buffer = ''
    f.write(_buffer)
예제 #2
0
def write(
        glos: GlossaryType,
        dirname: str,
        cleanHTML: bool = True,
        css: str = "",
        xsl: str = "",
        defaultPrefs: Optional[Dict] = None,
        prefsHTML: str = "",
        frontBackMatter: str = "",
        jing: bool = False,
        indexes: str = "",  # FIXME: rename to indexes_lang?
):
    """
	write glossary to Apple dictionary .xml and supporting files.

	:type glos: pyglossary.glossary.Glossary
	:type dirname: str, directory path, must not have extension

	:type cleanHTML: bool
	:param cleanHTML: pass True to use BeautifulSoup parser.

	:type css: str
	:param css: path to custom .css file

	:type xsl: str
	:param xsl: path to custom XSL transformations file.

	:type defaultPrefs: dict or None
	:param defaultPrefs: Default prefs in python dictionary literal format,
	i.e. {"key1": "value1", "key2": "value2", ...}.  All keys and values must
	be quoted strings; not allowed characters (e.g. single/double quotes,
	equal sign "=", semicolon) must be escaped as hex code according to
	python string literal rules.

	:type prefsHTML: str
	:param prefsHTML: path to XHTML file with user interface for dictionary's
	preferences.  refer to Apple's documentation for details.

	:type frontBackMatter: str
	:param frontBackMatter: path to XML file with top-level tag
	<d:entry id="front_back_matter" d:title="Your Front/Back Matter Title">
		your front/back matter entry content
	</d:entry>

	:type jing: bool
	:param jing: pass True to run Jing check on generated XML.

	# FIXME: rename to indexes_lang?
	:type indexes: str
	:param indexes: Dictionary.app is dummy and by default it don't know
	how to perform flexible search.  we can help it by manually providing
	additional indexes to dictionary entries.
	"""
    global BeautifulSoup

    if not isdir(dirname):
        os.mkdir(dirname)

    xdxf.xdxf_init()

    if cleanHTML:
        if BeautifulSoup is None:
            loadBeautifulSoup()
        if BeautifulSoup is None:
            log.warning(
                "cleanHTML option passed but BeautifulSoup not found.  " +
                "to fix this run `sudo pip3 install lxml beautifulsoup4 html5lib`"
            )
    else:
        BeautifulSoup = None

    fileNameBase = basename(dirname).replace(".", "_")
    filePathBase = join(dirname, fileNameBase)
    # before chdir (outside indir block)
    css = abspath_or_None(css)
    xsl = abspath_or_None(xsl)
    prefsHTML = abspath_or_None(prefsHTML)
    frontBackMatter = abspath_or_None(frontBackMatter)

    generate_id = id_generator()
    generate_indexes = indexes_generator(indexes)

    glos.setDefaultDefiFormat("h")

    myResDir = join(dirname, "OtherResources")
    if not isdir(myResDir):
        os.mkdir(myResDir)

    with open(filePathBase + ".xml", "w", encoding="utf-8") as toFile:
        write_header(glos, toFile, frontBackMatter)
        for entryI, entry in enumerate(glos):
            if entry.isData():
                entry.save(myResDir)
                continue

            words = entry.getWords()
            word, alts = words[0], words[1:]
            defi = entry.getDefi()

            long_title = _normalize.title_long(
                _normalize.title(word, BeautifulSoup))
            if not long_title:
                continue

            _id = next(generate_id)
            if BeautifulSoup:
                title_attr = BeautifulSoup.dammit.EntitySubstitution\
                 .substitute_xml(long_title, True)
            else:
                title_attr = str(long_title)

            content_title = long_title
            if entry.getDefiFormat() == "x":
                defi = xdxf.xdxf_to_html(defi)
                content_title = None
            content = format_clean_content(content_title, defi, BeautifulSoup)

            toFile.write(
                '<d:entry id="%s" d:title=%s>\n' % (_id, title_attr) +
                generate_indexes(long_title, alts, content, BeautifulSoup) +
                content + "\n</d:entry>\n")

        toFile.write("</d:dictionary>\n")

    if xsl:
        shutil.copy(xsl, myResDir)

    if prefsHTML:
        shutil.copy(prefsHTML, myResDir)

    write_css(filePathBase + ".css", css)

    with open(join(dirname, "Makefile"), "w") as toFile:
        toFile.write(
            toStr(pkgutil.get_data(
                __name__,
                "templates/Makefile",
            )) % {"dict_name": fileNameBase})

    copyright = glos.getInfo("copyright")
    if BeautifulSoup:
        # strip html tags
        copyright = str(
            BeautifulSoup.BeautifulSoup(copyright, features="lxml").text)

    # if DCSDictionaryXSL provided but DCSDictionaryDefaultPrefs <dict/> not
    # present in Info.plist, Dictionary.app will crash.
    with open(filePathBase + ".plist", "w", encoding="utf-8") as toFile:
        toFile.write(
            toStr(pkgutil.get_data(
                __name__,
                "templates/Info.plist",
            )) % {
                "CFBundleIdentifier":
                fileNameBase.replace(" ", ""),  # identifier must be unique
                "CFBundleDisplayName":
                glos.getInfo("name"),
                "CFBundleName":
                fileNameBase,
                "DCSDictionaryCopyright":
                copyright,
                "DCSDictionaryManufacturerName":
                glos.getInfo("author"),
                "DCSDictionaryXSL":
                basename(xsl) if xsl else "",
                "DCSDictionaryDefaultPrefs":
                format_default_prefs(defaultPrefs),
                "DCSDictionaryPrefsHTML":
                basename(prefsHTML) if prefsHTML else "",
                "DCSDictionaryFrontMatterReferenceID":
                "<key>DCSDictionaryFrontMatterReferenceID</key>\n"
                "\t<string>front_back_matter</string>"
                if frontBackMatter else "",
            })

    if jing:
        from .jing import run as jing_run
        jing_run(filePathBase + ".xml")