def write_entries(glos, f, cleanHTML, indexes): """ :param indexes: str | None """ if cleanHTML: BeautifulSoup = get_beautiful_soup() if not BeautifulSoup: log.warning( 'cleanHTML option passed but BeautifulSoup not found. ' + 'to fix this run `sudo pip3 install lxml beautifulsoup4 html5lib`' ) else: BeautifulSoup = None # write entries generate_id = id_generator() generate_indexes = indexes_generator(indexes) _buffer = '' xdxf.xdxf_init() glos.setDefaultDefiFormat('h') for i, entry in enumerate(glos): words = entry.getWords() word, alts = words[0], words[1:] defi = entry.getDefi() format = entry.getDefiFormat() long_title = _normalize.title_long(_normalize.title(word, BeautifulSoup)) if not long_title: continue _id = next(generate_id) if BeautifulSoup: title_attr = BeautifulSoup.dammit.EntitySubstitution.substitute_xml(long_title, True) else: title_attr = '"%s"' % long_title begin_entry = '<d:entry id="%(id)s" d:title=%(title)s>\n' % { 'id': _id, 'title': title_attr, } if format == 'x': content = xdxf.xdxf_to_html(defi) content = format_clean_content(None, content, BeautifulSoup) else: content = defi content = format_clean_content(long_title, content, BeautifulSoup) indexes = generate_indexes(long_title, alts, content, BeautifulSoup) end_entry = '\n</d:entry>\n' _buffer += begin_entry _buffer += indexes _buffer += content _buffer += end_entry if i % 1000 == 0: f.write(_buffer) _buffer = '' f.write(_buffer)
def write( glos: GlossaryType, dirname: str, cleanHTML: bool = True, css: str = "", xsl: str = "", defaultPrefs: Optional[Dict] = None, prefsHTML: str = "", frontBackMatter: str = "", jing: bool = False, indexes: str = "", # FIXME: rename to indexes_lang? ): """ write glossary to Apple dictionary .xml and supporting files. :type glos: pyglossary.glossary.Glossary :type dirname: str, directory path, must not have extension :type cleanHTML: bool :param cleanHTML: pass True to use BeautifulSoup parser. :type css: str :param css: path to custom .css file :type xsl: str :param xsl: path to custom XSL transformations file. :type defaultPrefs: dict or None :param defaultPrefs: Default prefs in python dictionary literal format, i.e. {"key1": "value1", "key2": "value2", ...}. All keys and values must be quoted strings; not allowed characters (e.g. single/double quotes, equal sign "=", semicolon) must be escaped as hex code according to python string literal rules. :type prefsHTML: str :param prefsHTML: path to XHTML file with user interface for dictionary's preferences. refer to Apple's documentation for details. :type frontBackMatter: str :param frontBackMatter: path to XML file with top-level tag <d:entry id="front_back_matter" d:title="Your Front/Back Matter Title"> your front/back matter entry content </d:entry> :type jing: bool :param jing: pass True to run Jing check on generated XML. # FIXME: rename to indexes_lang? :type indexes: str :param indexes: Dictionary.app is dummy and by default it don't know how to perform flexible search. we can help it by manually providing additional indexes to dictionary entries. """ global BeautifulSoup if not isdir(dirname): os.mkdir(dirname) xdxf.xdxf_init() if cleanHTML: if BeautifulSoup is None: loadBeautifulSoup() if BeautifulSoup is None: log.warning( "cleanHTML option passed but BeautifulSoup not found. " + "to fix this run `sudo pip3 install lxml beautifulsoup4 html5lib`" ) else: BeautifulSoup = None fileNameBase = basename(dirname).replace(".", "_") filePathBase = join(dirname, fileNameBase) # before chdir (outside indir block) css = abspath_or_None(css) xsl = abspath_or_None(xsl) prefsHTML = abspath_or_None(prefsHTML) frontBackMatter = abspath_or_None(frontBackMatter) generate_id = id_generator() generate_indexes = indexes_generator(indexes) glos.setDefaultDefiFormat("h") myResDir = join(dirname, "OtherResources") if not isdir(myResDir): os.mkdir(myResDir) with open(filePathBase + ".xml", "w", encoding="utf-8") as toFile: write_header(glos, toFile, frontBackMatter) for entryI, entry in enumerate(glos): if entry.isData(): entry.save(myResDir) continue words = entry.getWords() word, alts = words[0], words[1:] defi = entry.getDefi() long_title = _normalize.title_long( _normalize.title(word, BeautifulSoup)) if not long_title: continue _id = next(generate_id) if BeautifulSoup: title_attr = BeautifulSoup.dammit.EntitySubstitution\ .substitute_xml(long_title, True) else: title_attr = str(long_title) content_title = long_title if entry.getDefiFormat() == "x": defi = xdxf.xdxf_to_html(defi) content_title = None content = format_clean_content(content_title, defi, BeautifulSoup) toFile.write( '<d:entry id="%s" d:title=%s>\n' % (_id, title_attr) + generate_indexes(long_title, alts, content, BeautifulSoup) + content + "\n</d:entry>\n") toFile.write("</d:dictionary>\n") if xsl: shutil.copy(xsl, myResDir) if prefsHTML: shutil.copy(prefsHTML, myResDir) write_css(filePathBase + ".css", css) with open(join(dirname, "Makefile"), "w") as toFile: toFile.write( toStr(pkgutil.get_data( __name__, "templates/Makefile", )) % {"dict_name": fileNameBase}) copyright = glos.getInfo("copyright") if BeautifulSoup: # strip html tags copyright = str( BeautifulSoup.BeautifulSoup(copyright, features="lxml").text) # if DCSDictionaryXSL provided but DCSDictionaryDefaultPrefs <dict/> not # present in Info.plist, Dictionary.app will crash. with open(filePathBase + ".plist", "w", encoding="utf-8") as toFile: toFile.write( toStr(pkgutil.get_data( __name__, "templates/Info.plist", )) % { "CFBundleIdentifier": fileNameBase.replace(" ", ""), # identifier must be unique "CFBundleDisplayName": glos.getInfo("name"), "CFBundleName": fileNameBase, "DCSDictionaryCopyright": copyright, "DCSDictionaryManufacturerName": glos.getInfo("author"), "DCSDictionaryXSL": basename(xsl) if xsl else "", "DCSDictionaryDefaultPrefs": format_default_prefs(defaultPrefs), "DCSDictionaryPrefsHTML": basename(prefsHTML) if prefsHTML else "", "DCSDictionaryFrontMatterReferenceID": "<key>DCSDictionaryFrontMatterReferenceID</key>\n" "\t<string>front_back_matter</string>" if frontBackMatter else "", }) if jing: from .jing import run as jing_run jing_run(filePathBase + ".xml")