def __preprocess_files_by_workers(self, maxWorkerThreads, filesToBePreprocessed, extensionStr, preprocessModule, tempFileSeed=None, options=list(), verbose=False, parseErrorFiles=None): assert maxWorkerThreads >= 2 filesToBePreprocessed = list(filesToBePreprocessed) chunkSize = 200 chunkSizeMax = 2000 s2 = len(filesToBePreprocessed) / 64 if s2 > chunkSize: chunkSize = s2 if chunkSize > chunkSizeMax: chunkSize = chunkSizeMax commands = list() tempFiles = list() fi = 0 while fi < len(filesToBePreprocessed): fiStart, fiEnd = fi, min(fi + chunkSize, len(filesToBePreprocessed)) fi += chunkSize cmd = [sys.executable, __file__, preprocessModule.getname()] for k, v in options: cmd.append(k) cmd.append(v) fn = make_temp_filename(tempFileSeed, self.__syscnv) tempFiles.append(fn) f = fopen(fn, "wb") for i in xrange(fiStart, fiEnd): f.write(filesToBePreprocessed[i]) f.write('\n') f.close() cmd.append('-i') cmd.append(fn) commands.append(cmd) if parseErrorFiles is not None: en = make_temp_filename(tempFileSeed, self.__syscnv) parseErrorFiles.append(en) tempFiles.append(en) cmd.append("--parseerrors=%s" % en) if verbose: progressBar = utility.ProgressReporter(len(commands)) else: progressBar = utility.ProgressReporter(0) doneCount = 0 for index, result in threadingutil.multithreading_iter( invoke_subprocess, commands, maxWorkerThreads): if result != 0: raise RuntimeError, "error in invocation of subprocess" doneCount += 1 #progressBar.proceed(doneCount) if parseErrorFiles is not None: for en in parseErrorFiles: f = fopen(en, "r") if not f: print >> sys.stderr, "error: can't open a temporary file '%s'" % en sys.exit(2) parseErrorFiles.append(f.readlines()) f.close() for fn in tempFiles: remove_file_neglecting_error(fn) progressBar.done()
usage = "Usage: testthreadingutil.py [NUMWORKER [INPUTSIZE]]" numWorker = 4 inputSize = 30 if len(sys.argv) >= 2: if sys.argv[1] == "-h": print usage sys.exit(0) numWorker = int(sys.argv[1]) if len(sys.argv) >= 3: inputSize = int(sys.argv[2]) if len(sys.argv) >= 4: print usage sys.exit(1) def genargslist(size): for v in xrange(size): yield (v, ) t1 = time.time() #for index, result in threadingutil.multithreading_iter(f, [ args for args in genargslist(inputSize) ], numWorker): for index, result in threadingutil.multithreading_iter( f, genargslist(inputSize), numWorker): print "index = ", index, ", result = ", result print print "NUMWORKER = %d, INPUTSIZE = %d" % (numWorker, inputSize) print "elapsed time: %g" % (time.time() - t1)
def __preprocess_files_by_workers( self, maxWorkerThreads, filesToBePreprocessed, extensionStr, preprocessModule, tempFileSeed=None, options=list(), verbose=False, parseErrorFiles=None, ): assert maxWorkerThreads >= 2 filesToBePreprocessed = list(filesToBePreprocessed) chunkSize = 200 chunkSizeMax = 2000 s2 = len(filesToBePreprocessed) / 64 if s2 > chunkSize: chunkSize = s2 if chunkSize > chunkSizeMax: chunkSize = chunkSizeMax commands = list() tempFiles = list() fi = 0 while fi < len(filesToBePreprocessed): fiStart, fiEnd = fi, min(fi + chunkSize, len(filesToBePreprocessed)) fi += chunkSize cmd = [sys.executable, __file__, preprocessModule.getname()] for k, v in options: cmd.append(k) cmd.append(v) fn = make_temp_filename(tempFileSeed, self.__syscnv) tempFiles.append(fn) f = fopen(fn, "wb") for i in xrange(fiStart, fiEnd): f.write(filesToBePreprocessed[i]) f.write("\n") f.close() cmd.append("-i") cmd.append(fn) commands.append(cmd) if parseErrorFiles is not None: en = make_temp_filename(tempFileSeed, self.__syscnv) parseErrorFiles.append(en) tempFiles.append(en) cmd.append("--parseerrors=%s" % en) if verbose: progressBar = utility.ProgressReporter(len(commands)) else: progressBar = utility.ProgressReporter(0) doneCount = 0 for index, result in threadingutil.multithreading_iter(invoke_subprocess, commands, maxWorkerThreads): if result != 0: raise RuntimeError, "error in invocation of subprocess" doneCount += 1 # progressBar.proceed(doneCount) if parseErrorFiles is not None: for en in parseErrorFiles: f = fopen(en, "r") if not f: print >> sys.stderr, "error: can't open a temporary file '%s'" % en sys.exit(2) parseErrorFiles.append(f.readlines()) f.close() for fn in tempFiles: remove_file_neglecting_error(fn) progressBar.done()
if __name__ == '__main__': usage = "Usage: testthreadingutil.py [NUMWORKER [INPUTSIZE]]" numWorker = 4 inputSize = 30 if len(sys.argv) >= 2: if sys.argv[1] == "-h": print usage sys.exit(0) numWorker = int(sys.argv[1]) if len(sys.argv) >= 3: inputSize = int(sys.argv[2]) if len(sys.argv) >= 4: print usage sys.exit(1) def genargslist(size): for v in xrange(size): yield ( v, ) t1 = time.time() #for index, result in threadingutil.multithreading_iter(f, [ args for args in genargslist(inputSize) ], numWorker): for index, result in threadingutil.multithreading_iter(f, genargslist(inputSize), numWorker): print "index = ", index, ", result = ", result print print "NUMWORKER = %d, INPUTSIZE = %d" % ( numWorker, inputSize ) print "elapsed time: %g" % (time.time() - t1)