Exemplo n.º 1
0
    def writePass1txtFiles(self):
        # Copy the target language sources into the `single.markdown`.
        # This can be useful when converting the whole book using the PanDoc utility.
        fnameout = os.path.join(self.xx_aux_dir, 'single.markdown')
        with open(fnameout, 'w', encoding='utf-8', newline='\n') as fout:
            for fname, lineno, line in gen.sourceFileLines(self.xx_src_dir):
                fout.write(line)

        # Capture the info about the generated file.
        self.log_info.append(self.short_name(fnameout))

        # Copy the target language sources with chapter/line info into a single
        # file -- mostly for debugging, not consumed later.
        fnameout = os.path.join(self.xx_aux_dir, 'pass1.txt')
        with open(fnameout, 'w', encoding='utf-8', newline='\n') as fout:
            for fname, lineno, line in gen.sourceFileLines(self.xx_src_dir):
                fout.write('{}/{}:\t{}'.format(fname[:2], lineno, line))

        # Capture the info about the generated file for logging.
        self.log_info.append(self.short_name(fnameout))

        # Do the same with the English original -- the `single.markdown`.
        # This can be useful when converting the whole book using the PanDoc utility.
        fnameout = os.path.join(self.en_aux_dir, 'single.markdown')
        with open(fnameout, 'w', encoding='utf-8', newline='\n') as fout:
            for fname, lineno, line in gen.sourceFileLines(self.en_src_dir):
                fout.write(line)
        self.log_info.append(self.short_name(fnameout))

        # ... and `pass1.txt` with chapter/line info.
        fnameout = os.path.join(self.en_aux_dir, 'pass1.txt')
        with open(fnameout, 'w', encoding='utf-8') as fout:
            for fname, lineno, line in gen.sourceFileLines(self.en_src_dir):
                fout.write('{}/{}:\t{}'.format(fname[:2], lineno, line))
        self.log_info.append(self.short_name(fnameout))
Exemplo n.º 2
0
    def loadDoclineLists(self):
        '''Loads document line objects of the source documents to the lists.

           As a side effect, the representations of the lines
           is saved into pass1doclines.txt (mostly for debugging purpose).'''

        # The target-language sources may contain some extra parts used
        # as translator notes or some other explanations of the English
        # original. When compared with the original, the parts must be
        # skipped. The `definitions/xx/extra_lines.txt` stores the definitions
        # of the skipped parts in the form that can be cut/pasted from
        # other logs (UTF-8). If the extra_lines.txt file does not exist,
        # the empty one is created.
        #
        # The definitions are loaded to the dictionary where the key
        # is the first line of the extra sequence, and the value is
        # the list of lines of the sequence.
        #
        # Note: If it happens and there are two or more sequences
        # with the same line (say some title of the included sequence),
        # just split the extra sequences to one extra sequence for
        # the first line, and the two or more sequences of the rest lines
        # (without that first line).
        extras_fname = os.path.join(self.lang_definitions_dir,
                                    'extra_lines.txt')

        # Create the empty file if it does not exist.
        if not os.path.isfile(extras_fname):
            f = open(extras_fname, 'w')
            f.close()

        # Load the content to the `extras` dictionary.
        extras = {}
        status = 0
        lst = None
        with open(extras_fname, encoding='utf-8') as f:
            for line in f:
                if status == 0:
                    # First line is the key, the list is the value.
                    lst = extras.setdefault(line, [])
                    assert len(lst) == 0    # duplicity raises the exception
                    lst.append(line)        # first line repeated in the list
                    status = 1

                elif status == 1:
                    # The sequence until the separator.
                    if line.startswith('====='):    # 5 at minimum
                        lst = None
                        status = 0
                    else:
                        lst.append(line)    # next of the sequence

                else:
                    raise NotImplementedError('status = {}\n'.format(status))

        # Capture the info about the input file with the definitions.
        self.log_info.append(self.short_name(extras_fname))

        # Loop through the lines and build the lists of Line objects
        # from the original and from the translation. The extra sequences
        # from the target languages are reported and skipped. They will be
        # deleted from the list.
        self.xx_doclines = []
        for relname, lineno, line in gen.sourceFileLines(self.xx_src_dir):
            docline = doc.Line(relname, lineno, line)
            self.xx_doclines.append(docline)

        # Delete and report the extra lines.
        xx_extra_fname = os.path.join(self.xx_aux_dir, 'pass1extra_lines.txt')
        with open(xx_extra_fname, 'w', encoding='utf-8') as fout:
            index = 0                       # index the processed element
            while index < len(self.xx_doclines): # do not optimize, the length can change
                docline = self.xx_doclines[index]# current element
                if docline.line in extras:  # is current line recognized as extra?
                    # I could be the extra sequence. Compare the other lines
                    # in the length of the sequence. Firstly, extract the following
                    # lines in the length of the extras list.
                    extra_lines = extras[docline.line]
                    src_lines = [e.line for e in self.xx_doclines[index:index+len(extra_lines)]]

                    # If the list have the same content, delete the source elements.
                    if src_lines == extra_lines:
                        # Report the skipped lines.
                        fout.write('{}/{}:\n'.format(docline.fname, docline.lineno))
                        fout.write(''.join(src_lines))
                        fout.write('====================\n\n')

                        # Delete the lines via deleting their elements.
                        del self.xx_doclines[index:index+len(extra_lines)]

                        # Decrement the index -- i.e. correction as
                        # it will be incremented later.
                        index -= 1

                # Jump to the next checked element.
                index += 1

        # Capture the info about the report file.
        self.log_info.append(self.short_name(xx_extra_fname))

        # Report the remaining target-language elements.
        xx_doclines_fname = os.path.join(self.xx_aux_dir, 'pass1doclines.txt')
        with open(xx_doclines_fname, 'w', encoding='utf-8') as fout:
            for docline in self.xx_doclines:
                fout.write('{}/{} {}: {!r}\n'.format(
                           docline.fname[:2], docline.lineno,
                           docline.type, docline.attrib))

        # Capture the info about the report file.
        self.log_info.append(self.short_name(xx_doclines_fname))

        # Report the structure of the English original.
        self.en_doclines = []
        en_doclines_fname = os.path.join(self.en_aux_dir, 'pass1doclines.txt')
        with open(en_doclines_fname, 'w', encoding='utf-8') as fout:
            for relname, lineno, line in gen.sourceFileLines(self.en_src_dir):
                docline = doc.Line(relname, lineno, line)
                self.en_doclines.append(docline)
                fout.write('{}/{} {}: {!r}\n'.format(
                           docline.fname[:2], docline.lineno,
                           docline.type, docline.attrib))

        # Capture the info about the report file.
        self.log_info.append(self.short_name(en_doclines_fname))