Python splitByBarUnescapeNTB示例，pyglossary.text_utils.splitByBarUnescapeNTB Python示例

示例#1

0

显示文件

文件： edlin.py 项目： Forwardboy009/pyglossary

 def __next__(self):
     if not self._nextPath:
         log.error('iterating over a reader which is not open')
         raise StopIteration
     if self._nextPath == 'END':
         if self._pos != self._len:
             log.warning('%s words found, wordCount in info.json was %s'%(self._pos, self._len))
             self._len = self._pos
         raise StopIteration
     ###
     self._pos += 1
     ###
     with open(join(self._filename, self._nextPath), 'r', encoding=self._encoding) as fp:
         self._nextPath = fp.readline().rstrip()
         word = fp.readline().rstrip()
         defi = fp.read().rstrip()
     ###
     if self._glos.getPref('enable_alts', True):
         word = splitByBarUnescapeNTB(word)
         if len(word)==1:
             word = word[0]
     else:
         word = unescapeNTB(word, bar=True)
     ###
     #defi = unescapeNTB(defi)
     ###
     return Entry(word, defi)

示例#2

0

显示文件

 def nextPair(self) -> "Tuple[str, str]":
     if not self._file:
         raise StopIteration
     line = self.readline()
     if not line:
         raise StopIteration
     line = line.rstrip("\n")
     if not line:
         return
     ###
     word, tab, defi = line.partition("\t")
     if not tab:
         log.error(f"Warning: line starting with {line[:10]!r} has no tab!")
         return
     ###
     if self._glos.getConfig("enable_alts", True):
         word = splitByBarUnescapeNTB(word)
         if len(word) == 1:
             word = word[0]
     else:
         word = unescapeNTB(word, bar=False)
     ###
     defi = unescapeNTB(defi)
     ###
     return word, defi

示例#3

0

显示文件

文件： tabfile.py 项目： Forwardboy009/pyglossary

 def nextPair(self):
     if not self._file:
         raise StopIteration
     line = self._file.readline()
     if not line:
         raise StopIteration
     line = line.strip()## This also removed tailing newline
     if not line:
         return
     ###
     word, tab, defi = line.partition('\t')
     if not tab:
         log.error('Warning: line starting with "%s" has no tab!'%line[:10])
         return
     ###
     if self._glos.getPref('enable_alts', True):
         word = splitByBarUnescapeNTB(word)
         if len(word)==1:
             word = word[0]
     else:
         word = unescapeNTB(word, bar=True)
     ###
     defi = unescapeNTB(defi)
     ###
     return word, defi

示例#4

0

显示文件

	def nextPair(self) -> Tuple[str, str]:
		if not self._file:
			raise StopIteration
		line = self._file.readline()
		if not line:
			raise StopIteration
		line = line.strip()  # This also removes tailing newline
		if not line:
			return
		###
		word, tab, defi = line.partition("\t")
		if not tab:
			log.error(
				"Warning: line starting with \"%s\" has no tab!" % line[:10]
			)
			return
		###
		if self._glos.getPref("enable_alts", True):
			word = splitByBarUnescapeNTB(word)
			if len(word) == 1:
				word = word[0]
		else:
			word = unescapeNTB(word, bar=True)
		###
		defi = unescapeNTB(defi)
		###
		return word, defi

示例#5

0

显示文件

文件： edlin.py 项目： wxyjuly/pyglossary

    def __iter__(self) -> Iterator[BaseEntry]:
        if not self._rootPath:
            log.error("iterating over a reader which is not open")
            raise StopIteration

        wordCount = 0
        nextPath = self._rootPath
        while nextPath != "END":
            wordCount += 1
            # before or after reading word and defi
            # (and skipping empty entry)? FIXME

            with open(
                    join(self._filename, nextPath),
                    "r",
                    encoding=self._encoding,
            ) as fromFile:
                header = fromFile.readline().rstrip()
                if self._havePrevLink:
                    self._prevPath, nextPath = header.split(" ")
                else:
                    nextPath = header
                word = fromFile.readline()
                if not word:
                    yield None  # update progressbar
                    continue
                defi = fromFile.read()
                if not defi:
                    log.warning(
                        f"Edlin Reader: no definition for word {word!r}"
                        f", skipping")
                    yield None  # update progressbar
                    continue
                word = word.rstrip()
                defi = defi.rstrip()

            if self._glos.getPref("enable_alts", True):
                word = splitByBarUnescapeNTB(word)
                if len(word) == 1:
                    word = word[0]
            else:
                word = unescapeNTB(word, bar=True)

            # defi = unescapeNTB(defi)
            yield self._glos.newEntry(word, defi)

        if wordCount != self._wordCount:
            log.warning(f"{wordCount} words found, "
                        f"wordCount in info.json was {self._wordCount}")
            self._wordCount = wordCount

        resDir = self._resDir
        for fname in self._resFileNames:
            with open(join(resDir, fname), "rb") as fromFile:
                yield self._glos.newDataEntry(
                    fname,
                    fromFile.read(),
                )

示例#6

0

显示文件

文件： crawler_dir.py 项目： turion2005/pyglossary

 def _fromFile(self, fpath):
     _, ext = splitext(fpath)
     c_open = compressionOpenFunc(ext.lstrip("."))
     if not c_open:
         log.error(f"invalid extention {ext}")
         c_open = open
     with c_open(fpath, "rt", encoding="utf-8") as _file:
         words = splitByBarUnescapeNTB(_file.readline().rstrip("\n"))
         defi = _file.read()
         return self._glos.newEntry(words, defi)

示例#7

0

显示文件

文件： edlin.py 项目： zhyongwei/pyglossary

 def __next__(self):
     if not self._nextPath:
         log.error('iterating over a reader which is not open')
         raise StopIteration
     if self._nextPath == 'END':
         if self._pos != self._len:
             log.warning('%s words found, wordCount in info.json was %s' %
                         (self._pos, self._len))
             self._len = self._pos
         raise StopIteration
     ###
     self._pos += 1  ## before or after reading word and defi (and skipping empty entry)? FIXME
     ###
     with open(join(self._filename, self._nextPath),
               'r',
               encoding=self._encoding) as fp:
         header = fp.readline().rstrip()
         if self._havePrevLink:
             self._prevPath, self._nextPath = header.split(' ')
         else:
             self._nextPath = header
         word = fp.readline()
         if not word:
             return
         defi = fp.read()
         if not defi:
             log.warning(
                 'Edlin Reader: no definition for word "%s", skipping' %
                 word)
             return
         word = word.rstrip()
         defi = defi.rstrip()
     ###
     if self._glos.getPref('enable_alts', True):
         word = splitByBarUnescapeNTB(word)
         if len(word) == 1:
             word = word[0]
     else:
         word = unescapeNTB(word, bar=True)
     ###
     #defi = unescapeNTB(defi)
     ###
     return Entry(word, defi)

示例#8

0

显示文件

文件： edlin.py 项目： Wushaowei001/pyglossary

 def __next__(self):
     if not self._nextPath:
         log.error('iterating over a reader which is not open')
         raise StopIteration
     if self._nextPath == 'END':
         if self._pos != self._len:
             log.warning('%s words found, wordCount in info.json was %s'%(self._pos, self._len))
             self._len = self._pos
         raise StopIteration
     ###
     self._pos += 1 ## before or after reading word and defi (and skipping empty entry)? FIXME
     ###
     with open(join(self._filename, self._nextPath), 'r', encoding=self._encoding) as fp:
         header = fp.readline().rstrip()
         if self._havePrevLink:
             self._prevPath, self._nextPath = header.split(' ')
         else:
             self._nextPath = header
         word = fp.readline()
         if not word:
             return
         defi = fp.read()
         if not defi:
             log.warning('Edlin Reader: no definition for word "%s", skipping'%word)
             return
         word = word.rstrip()
         defi = defi.rstrip()
     ###
     if self._glos.getPref('enable_alts', True):
         word = splitByBarUnescapeNTB(word)
         if len(word)==1:
             word = word[0]
     else:
         word = unescapeNTB(word, bar=True)
     ###
     #defi = unescapeNTB(defi)
     ###
     return Entry(word, defi)

示例#9

0

显示文件

文件： edlin.py 项目： ilius/pyglossary

	def __iter__(self):
		if not self._rootPath:
			log.error("iterating over a reader which is not open")
			raise StopIteration

		wordCount = 0
		nextPath = self._rootPath
		while nextPath != "END":
			wordCount += 1
			# before or after reading word and defi
			# (and skipping empty entry)? FIXME

			with open(
				join(self._filename, nextPath),
				"r",
				encoding=self._encoding,
			) as fromFile:
				header = fromFile.readline().rstrip()
				if self._havePrevLink:
					self._prevPath, nextPath = header.split(" ")
				else:
					nextPath = header
				word = fromFile.readline()
				if not word:
					yield None  # update progressbar
					continue
				defi = fromFile.read()
				if not defi:
					log.warning(
						"Edlin Reader: no definition for word %r" % word +
						", skipping"
					)
					yield None  # update progressbar
					continue
				word = word.rstrip()
				defi = defi.rstrip()

			if self._glos.getPref("enable_alts", True):
				word = splitByBarUnescapeNTB(word)
				if len(word) == 1:
					word = word[0]
			else:
				word = unescapeNTB(word, bar=True)

			# defi = unescapeNTB(defi)
			yield self._glos.newEntry(word, defi)

		if wordCount != self._wordCount:
			log.warning(
				"%s words found, " % wordCount +
				"wordCount in info.json was %s" % self._wordCount
			)
			self._wordCount = wordCount

		resDir = self._resDir
		for fname in self._resFileNames:
			with open(join(resDir, fname), "rb") as fromFile:
				yield self._glos.newDataEntry(
					fname,
					fromFile.read(),
				)