示例#1
0
	def saveEntry(self, thisEntry: BaseEntry, thisHash: str, prevHash: str, nextHash: str) -> None:
		dpath = join(self._filename, thisHash[:2])
		makeDir(dpath)
		with open(
			join(dpath, thisHash[2:]),
			"w",
			encoding=self._encoding,
		) as toFile:
			nextPath = self.hashToPath(nextHash) if nextHash else "END"
			if self._havePrevLink:
				prevPath = self.hashToPath(prevHash) if prevHash else "START"
				header = prevPath + " " + nextPath
			else:
				header = nextPath
			toFile.write("\n".join([
				header,
				escapeNTB(thisEntry.s_word, bar=False),
				thisEntry.defi,
			]))
示例#2
0
    def write(self) -> "Generator[None, BaseEntry, None]":

        encoding = self._encoding
        resources = self._resources
        max_file_size = self._max_file_size
        filename_format = self._filename_format
        escape_defi = self._escape_defi

        wordSep = ' <font color="red">|</font> '

        initFileSizeMax = 100

        glos = self._glos

        filename = self._filename
        self._encoding = encoding
        self._filename_format = filename_format

        entry_url_fmt = glos.getInfo("entry_url")

        def getEntryWebLink(entry) -> str:
            if not entry_url_fmt:
                return ""
            url = entry_url_fmt.format(word=html.escape(entry.l_word[0]))
            return f'{nbsp}<a class="no_ul" href="{url}">&#127759;</a>'

        # from math import log2, ceil
        # maxPosHexLen = int(ceil(log2(max_file_size) / 4))

        indexTxtFileObj = open(
            join(filename, "index.txt"),
            mode="w",
            encoding="utf-8",
        )
        linksTxtFileObj = open(
            join(filename, "links.txt"),
            mode="w",
            encoding="utf-8",
        )

        title = glos.getInfo("name")
        style = ""
        if self._dark:
            style = darkStyle

        if self._css:
            cssLink = '<link rel="stylesheet" href="style.css" />'
        else:
            cssLink = ""

        header = (
            '<!DOCTYPE html>\n'
            '<html><head>'
            f'<title>{{pageTitle}}</title>'
            f'<meta charset="{encoding}">'
            f'<style type="text/css">{style}{{customStyle}}</style>{cssLink}'
            '</meta></head><body>\n')

        def pageHeader(n: int):
            return header.format(
                pageTitle=f"Page {n} of {title}",
                customStyle="",
            )

        def navBar() -> str:
            links = []
            if len(self._filenameList) > 1:
                links.append(
                    f'<a href="./{self._filenameList[-2]}">&#9664;</a>')
            links.append(f'<a href="./{self.getNextFilename()}">&#9654;</a>')
            links.append(f'<a href="./info.html">ℹ️</a></div>')
            return ('<div style="text-align: center; font-size: 2.5em;">' +
                    f'{nbsp}{nbsp}{nbsp}'.join(links) + '</div>')

        tailSize = len(self._tail.encode(encoding))

        if max_file_size < len(header) + tailSize:
            raise ValueError(f"max_file_size={max_file_size} is too small")

        max_file_size -= tailSize

        if not isdir(self._filename):
            os.mkdir(self._filename)

        fileObj = self.nextFile()
        fileObj.write(pageHeader(0))
        fileObj.write(navBar())

        re_fixed_link = re.compile(
            r'<a (?:[^<>]*? )?href="#([^<>"]+?)">[^<>]+?</a>',
            re.I,
        )

        linkTargetSet = set()

        def replaceBword(text) -> str:
            return text.replace(
                ' href="bword://',
                ' href="#',
            )

        def addLinks(text: str, pos: int) -> str:
            for m in re_fixed_link.finditer(text):
                if ' class="entry_link"' in m.group(0):
                    continue
                if m.group(0).count("href=") != 1:
                    log.error(f"unexpected match: {m.group(0)}")
                target = html.unescape(m.group(1))
                linkTargetSet.add(target)
                start = m.start()
                b_start = len(text[:start].encode(encoding))
                b_size = len(text[start:m.end()].encode(encoding))
                linksTxtFileObj.write(f"{escapeNTB(target)}\t"
                                      f"{len(self._filenameList)-1}\t"
                                      f"{hex(pos+b_start)[2:]}\t"
                                      f"{hex(b_size)[2:]}\n")
                linksTxtFileObj.flush()

        self.writeInfo(filename, header)

        _word_title = self._word_title

        resDir = self._resDir
        entryIndex = -1
        while True:
            entryIndex += 1
            entry = yield
            if entry is None:
                break
            if entry.isData():
                if resources:
                    entry.save(resDir)
                continue

            if entry.defi.startswith('<!DOCTYPE html>') and defiFormat != "h":
                log.error(f"bad defiFormat={defiFormat}")
                defiFormat = "h"

            entry.detectDefiFormat()
            entry.stripFullHtml()
            defi = entry.defi
            defiFormat = entry.defiFormat

            if defiFormat == "m":
                defi = html.escape(defi)
                if "\n" in defi:
                    # could be markdown or unformatted plaintext
                    # FIXME: this changes the font to a monospace
                    defi = f'<pre>{defi}</pre>'
            elif defiFormat == "h":
                if escape_defi:
                    defi = html.escape(defi)
                defi = defi.replace(' src="./', ' src="./res/')

            entryId = f"entry{entryIndex}"

            if _word_title:
                words = [html.escape(word) for word in entry.l_word]
                title = glos.wordTitleStr(
                    wordSep.join(words),
                    sample=entry.l_word[0],
                    _class="headword",
                )

            if not title:
                title = f'Entry {entryIndex}'

            # entry_link_sym = "&#182;"
            entry_link_sym = "&#128279;"
            text = (f'<div id="{entryId}">{title}{nbsp}{nbsp}'
                    f'<a class="no_ul" class="entry_link" href="#{entryId}">'
                    f'{entry_link_sym}</a>'
                    f'{getEntryWebLink(entry)}'
                    f"<br>\n{defi}"
                    '</div>\n'
                    '<hr>\n')
            pos = fileObj.tell()
            if pos > initFileSizeMax:
                if pos > max_file_size - len(text.encode(encoding)):
                    fileObj = self.nextFile()
                    fileObj.write(pageHeader(len(self._filenameList) - 1))
                    fileObj.write(navBar())
            pos = fileObj.tell()
            tmpFilename = escapeNTB(self._filenameList[-1])
            for word in entry.l_word:
                indexTxtFileObj.write(f"{entryIndex}\t"
                                      f"{escapeNTB(word)}\t"
                                      f"{tmpFilename}\t"
                                      f"{pos}\n")
            del tmpFilename
            text = replaceBword(text)
            addLinks(text, pos)
            fileObj.write(text)

        fileObj.close()
        self._fileObj = None
        indexTxtFileObj.close()

        if linkTargetSet:
            log.info(f"{len(linkTargetSet)} link targets found")
            log.info("Fixing links, please wait...")
            self.fixLinks(linkTargetSet)

        os.remove(join(filename, "links.txt"))
示例#3
0
    def write(self) -> Generator[None, "BaseEntry", None]:

        encoding = self._encoding
        resources = self._resources
        max_file_size = self._max_file_size
        filename_format = self._filename_format
        escape_defi = self._escape_defi

        wordSep = ' <font color="red">|</font> '

        initFileSizeMax = 100

        glos = self._glos

        filename = self._filename
        self._encoding = encoding
        self._filename_format = filename_format

        entry_url_fmt = glos.getInfo("entry_url")

        def getEntryWebLink(entry) -> str:
            if not entry_url_fmt:
                return ""
            url = entry_url_fmt.format(word=html.escape(entry.l_word[0]))
            return f'&nbsp;<a class="no_ul" href="{url}">&#127759;</a>'

        # from math import log2, ceil
        # maxPosHexLen = int(ceil(log2(max_file_size) / 4))

        indexTxtFileObj = open(
            join(filename, "index.txt"),
            mode="w",
            encoding="utf-8",
        )
        linksTxtFileObj = open(
            join(filename, "links.txt"),
            mode="w",
            encoding="utf-8",
        )

        title = glos.getInfo("name")
        style = ""
        if self._dark:
            style = darkStyle

        header = ('<!DOCTYPE html>\n'
                  '<html><head>'
                  f'<title>{{pageTitle}}</title>'
                  f'<meta charset="{encoding}">'
                  f'<style type="text/css">{style}{{customStyle}}</style>'
                  '</meta></head><body>\n')

        def pageHeader(n: int):
            return header.format(
                pageTitle=f"Page {n} of {title}",
                customStyle="",
            )

        def stripEntryFullHtml(entry, defi: str) -> str:
            word = entry.s_word
            i = defi.find('<body')
            if i == -1:
                log.error(f"<body not found: word={word}")
                return defi
            defi = defi[i + 5:]
            i = defi.find('>')
            if i == -1:
                log.error(f"'>' after <body not found: word={word}")
                return defi
            defi = defi[i + 1:]
            i = defi.find('</body')
            if i == -1:
                log.error(f"</body close not found: word={word}")
                return defi
            defi = defi[:i]
            return defi

        def navBar() -> str:
            links = []
            if len(self._filenameList) > 1:
                links.append(
                    f'<a href="./{self._filenameList[-2]}">&#9664;</a>')
            links.append(f'<a href="./{self.getNextFilename()}">&#9654;</a>')
            links.append(f'<a href="./info.html">ℹ️</a></div>')
            return ('<div style="text-align: center; font-size: 2.5em;">' +
                    '&nbsp;&nbsp;&nbsp;'.join(links) + '</div>')

        tailSize = len(self._tail.encode(encoding))

        if max_file_size < len(header) + tailSize:
            raise ValueError(f"max_file_size={max_file_size} is too small")

        max_file_size -= tailSize

        if not isdir(self._filename):
            os.mkdir(self._filename)

        fileObj = self.nextFile()
        fileObj.write(pageHeader(0))
        fileObj.write(navBar())

        re_fixed_link = re.compile(
            r'<a (?:[^<>]*? )?href="#([^<>"]+?)">[^<>]+?</a>',
            re.I,
        )

        linkTargetSet = set()

        def replaceBword(text) -> str:
            return text.replace(
                ' href="bword://',
                ' href="#',
            )

        def addLinks(text: str, pos: int) -> str:
            for m in re_fixed_link.finditer(text):
                if ' class="entry_link"' in m.group(0):
                    continue
                if m.group(0).count("href=") != 1:
                    log.error(f"unexpected match: {m.group(0)}")
                target = html.unescape(m.group(1))
                linkTargetSet.add(target)
                start = m.start()
                b_start = len(text[:start].encode(encoding))
                b_size = len(text[start:m.end()].encode(encoding))
                linksTxtFileObj.write(f"{escapeNTB(target)}\t"
                                      f"{len(self._filenameList)-1}\t"
                                      f"{hex(pos+b_start)[2:]}\t"
                                      f"{hex(b_size)[2:]}\n")
                linksTxtFileObj.flush()

        self.writeInfo(filename, header)

        defiHasHeadwords = glos.getInfo("definition_has_headwords") == "True"

        resDir = self._resDir
        entryIndex = -1
        while True:
            entryIndex += 1
            entry = yield
            if entry is None:
                break
            if entry.isData():
                if resources:
                    entry.save(resDir)
                continue
            defi = entry.defi
            defiFormat = entry.defiFormat

            if defi.startswith('<!DOCTYPE html>'):
                if defiFormat != "h":
                    log.error(f"bad defiFormat={defiFormat}")
                    defiFormat = "h"
                defi = stripEntryFullHtml(entry, defi)

            if defiFormat == "m":
                defi = defi.replace("\n", "<br>")

            if escape_defi:
                defi = html.escape(defi)

            entryId = f"entry{entryIndex}"

            if defiHasHeadwords:
                headwords = f'Entry {entryIndex}'
            else:
                headwords = f'<b class="headword">{wordSep.join(entry.l_word)}</b>'
            text = (
                f'<div id="{entryId}">{headwords}&nbsp;&nbsp;'
                f'<a class="no_ul" class="entry_link" href="#{entryId}">&#128279;</a>'
                f'{getEntryWebLink(entry)}'
                f"<br>\n{defi}"
                '</div>\n'
                '<hr>\n')
            pos = fileObj.tell()
            if pos > initFileSizeMax:
                if pos > max_file_size - len(text.encode(encoding)):
                    fileObj = self.nextFile()
                    fileObj.write(pageHeader(len(self._filenameList) - 1))
                    fileObj.write(navBar())
            pos = fileObj.tell()
            tmpFilename = escapeNTB(self._filenameList[-1])
            for word in entry.l_word:
                indexTxtFileObj.write(f"{entryIndex}\t"
                                      f"{escapeNTB(word)}\t"
                                      f"{tmpFilename}\t"
                                      f"{pos}\n")
            del tmpFilename
            text = replaceBword(text)
            addLinks(text, pos)
            fileObj.write(text)

        fileObj.close()
        self._fileObj = None
        indexTxtFileObj.close()

        if linkTargetSet:
            log.info(f"\n{len(linkTargetSet)} link targets found")
            log.info("Fixing links, please wait...")
            self.fixLinks(linkTargetSet)

        os.remove(join(filename, "links.txt"))