예제 #1
0
    def __preprocess_files(self,
                           filesToBePreprocessed,
                           extensionStr,
                           preprocessModule,
                           tempFileSeed=None,
                           options=list(),
                           verbose=False,
                           parseErrorFiles=None):
        filesToBePreprocessed = list(filesToBePreprocessed)

        encodingName = None
        preprocessorOptions = None
        prepDirs = list()
        for k, v in options:
            if k == '-c':
                encodingName = v
            elif k == '-r':
                preprocessorOptions = v
            elif k == '-n':
                prepDirs.append(v)

        cnv = easytorq.ICUConverter()
        if encodingName:
            cnv.setencoding(encodingName)

        prep = preprocessModule.getpreprocessor()
        if preprocessorOptions:
            prep.setoptions(preprocessorOptions)

        if verbose:
            progressBar = utility.ProgressReporter(len(filesToBePreprocessed))
        else:
            progressBar = utility.ProgressReporter(0)

        if len(filesToBePreprocessed) > 0:
            prefetch = FileReader(filesToBePreprocessed[0])
            prefetch.start()
        else:
            prefetch = None

        for i in xrange(len(filesToBePreprocessed)):
            if i + 1 < len(filesToBePreprocessed):
                nextPrefetch = FileReader(filesToBePreprocessed[i + 1])
                nextPrefetch.start()
            else:
                nextPrefetch = None
            prefetch.join()
            preprocessedFname = prefetch.filename + extensionStr
            #print "prepDirs=", prepDirs #debug
            #print "preprocessedFname=", preprocessedFname #debug
            preprocessedFname = to_filename_in_prepdir(preprocessedFname,
                                                       prepDirs)
            #print "preprocessedFname=", preprocessedFname #debug
            if prefetch.error is not None:
                print >> sys.stderr, "warning: not found file '%s'" % prefetch.filename
            else:
                try:
                    strUtf8 = cnv.decode(prefetch.content)
                except TypeError:
                    print >> sys.stderr, "error: invalid string (wrong character encoding?) in file '%s'" % prefetch.filename
                    raise
                parseResult = None
                try:
                    parseResult = prep.parse(strUtf8)
                except ValueError, e:
                    if parseErrorFiles is not None:
                        parseErrorFiles.append(prefetch.filename)
                    else:
                        print >> sys.stderr, "error: failure to parse file '%s'" % prefetch.filename
                        raise e
                if parseResult:
                    preorocessedFnameTemp = preprocessedFname + "-temp"
                    try:
                        f = fopen(preorocessedFnameTemp, "wb")
                    except IOError:
                        try:
                            d = os_path_split(preprocessedFname,
                                              self.__syscnv)[0]
                            os.makedirs(d)
                        except:
                            # Rarely, another process makes the directory while this process is trying to make the directory.
                            # In such case, the above mkdirs() fails, since the directory already exists.
                            if not os.path.exists(d):
                                raise  # the directory does not exist and this process fails to make the directory
                        f = fopen(preorocessedFnameTemp, "wb")
                    try:
                        f.write(parseResult)
                        f.close()
                        rename_file(
                            preorocessedFnameTemp, preprocessedFname
                        )  # rarely causes an error, the reason is unknown
                    except EnvironmentError, e:
                        print >> sys.stderr, "debug info: preorocessedFnameTemp=%s preprocessedFname=%s" % (
                            preorocessedFnameTemp, preprocessedFname)
                        remove_file_neglecting_error(preorocessedFnameTemp)
                        raise e
                progressBar.proceed(i + 1)
예제 #2
0
    def __preprocess_files_by_workers(self,
                                      maxWorkerThreads,
                                      filesToBePreprocessed,
                                      extensionStr,
                                      preprocessModule,
                                      tempFileSeed=None,
                                      options=list(),
                                      verbose=False,
                                      parseErrorFiles=None):
        assert maxWorkerThreads >= 2

        filesToBePreprocessed = list(filesToBePreprocessed)

        chunkSize = 200
        chunkSizeMax = 2000
        s2 = len(filesToBePreprocessed) / 64
        if s2 > chunkSize:
            chunkSize = s2
        if chunkSize > chunkSizeMax:
            chunkSize = chunkSizeMax

        commands = list()
        tempFiles = list()
        fi = 0
        while fi < len(filesToBePreprocessed):
            fiStart, fiEnd = fi, min(fi + chunkSize,
                                     len(filesToBePreprocessed))
            fi += chunkSize
            cmd = [sys.executable, __file__, preprocessModule.getname()]
            for k, v in options:
                cmd.append(k)
                cmd.append(v)
            fn = make_temp_filename(tempFileSeed, self.__syscnv)
            tempFiles.append(fn)

            f = fopen(fn, "wb")
            for i in xrange(fiStart, fiEnd):
                f.write(filesToBePreprocessed[i])
                f.write('\n')
            f.close()

            cmd.append('-i')
            cmd.append(fn)
            commands.append(cmd)

            if parseErrorFiles is not None:
                en = make_temp_filename(tempFileSeed, self.__syscnv)
                parseErrorFiles.append(en)
                tempFiles.append(en)
                cmd.append("--parseerrors=%s" % en)

        if verbose:
            progressBar = utility.ProgressReporter(len(commands))
        else:
            progressBar = utility.ProgressReporter(0)

        doneCount = 0
        for index, result in threadingutil.multithreading_iter(
                invoke_subprocess, commands, maxWorkerThreads):
            if result != 0:
                raise RuntimeError, "error in invocation of subprocess"
            doneCount += 1
            #progressBar.proceed(doneCount)

        if parseErrorFiles is not None:
            for en in parseErrorFiles:
                f = fopen(en, "r")
                if not f:
                    print >> sys.stderr, "error: can't open a temporary file '%s'" % en
                    sys.exit(2)
                parseErrorFiles.append(f.readlines())
                f.close()

        for fn in tempFiles:
            remove_file_neglecting_error(fn)

        progressBar.done()