def main(): sniffer_count = min([cpu_count(), len(argv[1:])]) sniffer_queue = JoinableQueue() db_queue = JoinableQueue() for filename in argv[1:]: if os.path.isfile(filename): print('Will be loading from file', filename) else: print('Will sniff from interface', filename) sniffer_queue.put(filename) if argv[1:] == []: sniffer_queue.put('*') if sniffer_count == 0: sniffer_count = 1 sniffers = [] for _ in range(sniffer_count): p = Process(target=sniffer, args=(sniffer_queue, db_queue)) p.start() sniffers.append(p) sniffer_queue.put(None) db_proc = Process(target=db_worker, args=(sniffer_count, db_queue)) db_proc.start() interfaces = [] sniffer_queue.close() db_proc.join() for _ in sniffers: _.join()
class multiproc_calculator(): def __init__(self, num_processes, shape=(4, 4)): self.matrix_list = [] self.task_queue = JoinableQueue() self.result_queue = Queue() self.result = np.zeros(shape) self.processes = [ matmul_process(self.task_queue, self.result_queue) for i in range(num_processes) ] def get_result(self): self.task_queue.join() self.task_queue.close() while not self.result_queue.empty(): np.add(self.result, self.result_queue.get(), out=self.result) return self.result def start_proc(self): for proc in self.processes: proc.daemon = True proc.start() def add_new_matrix(self, matrix): # генерируем все возможные пары for a in self.matrix_list: self.task_queue.put((a, matrix)) self.task_queue.put((matrix, a)) self.matrix_list.append(matrix)
def annotate_gtf_parallel(input_gtf_file, output_gtf_file, gtf_sample_attr, num_processors, tmp_dir): # create queue input_queue = JoinableQueue(maxsize=num_processors * 3) # start worker processes procs = [] worker_gtf_files = [] for i in xrange(num_processors): worker_gtf_file = os.path.join(tmp_dir, "annotate_worker%03d.gtf" % (i)) worker_gtf_files.append(worker_gtf_file) args = (input_queue, worker_gtf_file, gtf_sample_attr) p = Process(target=annotate_gtf_worker, args=args) p.daemon = True p.start() procs.append(p) for lines in parse_loci(open(input_gtf_file)): input_queue.put(lines) # stop workers for p in procs: input_queue.put([]) # close queue input_queue.join() input_queue.close() # join worker processes for p in procs: p.join() # merge/sort worker gtf files logging.debug("Merging %d worker GTF file(s)" % (num_processors)) merge_sort_gtf_files(worker_gtf_files, output_gtf_file, tmp_dir=tmp_dir) # remove worker gtf files for filename in worker_gtf_files: if os.path.exists(filename): os.remove(filename)
def main(argv): logging.basicConfig(filename=options.log_filename, level=logging.INFO + 10 * (options.quiet - options.verbose)) log = logging.getLogger(os.path.basename(sys.argv[0])) FORMAT = '%(asctime)s|%(levelname)s|%(process)d|%(module)s.py|%(funcName)s|%(lineno)d| %(message)s' if 1: handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.DEBUG) formatter = logging.Formatter(FORMAT, datefmt="%Y-%m-%d %H:%M:%S") handler.setFormatter(formatter) #pprint(dir(handler)) log.addHandler(handler) #log.info('test') #e() file_object_cache = FileObjectCache() #key_name, value_kwargs = args #value = Value(file_object_cache, content=None, filename=None, md5=None, offset=None, path=None, size=None, bucket_name=None) start = time.time() if 1: put_queue = JoinableQueue(1024 * options.processes) stat_queue = JoinableQueue() walk = {'filesystem': walk_filesystem}[options.walk] args = [ '/auto/fina-data/share/FARepository/prod/CIGActgS11/position/processing/Priority_2/PositionSide/122654_DESK_CDRG183872PositionSide.bcp.SSrvr' ] walker_process = Process(target=walker, args=(walk, put_queue, args, options)) walker_process.start() if 1: put = {'update': put_update}[options.put] #print put #e() putter_processes = list( islice( repeatedly(Process, target=putter, args=(put, put_queue, stat_queue, options)), options.processes)) for putter_process in putter_processes: #print putter_process putter_process.start() walker_process.join() if 1: statter_process = Process(target=statter, args=(stat_queue, start, options)) statter_process.start() for putter_process in putter_processes: put_queue.put(None) put_queue.close() for putter_process in putter_processes: putter_process.join() stat_queue.put(None) stat_queue.close() statter_process.join() put_queue.join_thread() stat_queue.join_thread()
def readCEFFile(afile,pygtail): if exists(afile): #sometimes files can move/archive while we iterate the list try: #start a process to post our stuff. logcache=JoinableQueue() postingProcess=Process(target=postLogs,args=(logcache,),name="cef2mozdefHTTPPost") postingProcess.start() #have pygtail feed us lines for line in pygtail: pygtail._update_offset_file() cefDict=parseCEF(line) #logger.debug(json.dumps(cefDict)) #append json to the list for posting if cefDict is not None: logcache.put(json.dumps(cefDict)) logger.info('{0} done'.format(afile)) logger.info('waiting for posting to finish') logcache.put(None) logcache.close() #logger.info('posting done') except KeyboardInterrupt: sys.exit(1) except ValueError as e: logger.fatal('Exception while handling CEF message: %r'%e) sys.exit(1)
class GHDDIMultiProcessPool: def __init__(self, target, database=None): self._inputQueue = Queue() self._outputQueue = Queue() jobs = [] for i in range(0, os.cpu_count()): jobs.append(GHDDIProcess(target, database, self._inputQueue, self._outputQueue)) self._jobs = jobs def __del__(self): print('processPool del') self._inputQueue.join() self._outputQueue.join() self._inputQueue.close() self._outputQueue.close() for p in self._jobs: p.terminate() p.close() def startAll(self): for p in self._jobs: p.start() def finishAll(self): pass def putTask(self, taskArgs, block=True, timeout=None): self._inputQueue.put(taskArgs, block=block, timeout=timeout) def getTaskRet(self, block=True, timeout=None): return self._outputQueue.get(block=block, timeout=timeout)
def search8(q, path): jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count() job_count = 0 for f in os.scandir('data'): jobs.put(f.path) job_count = job_count + 1 [ Process(target=work, args=(i, q, jobs, result)).start() for i in range(NUMBER_OF_PROCESSES) ] matches = [] for t in range(job_count): r = result.get() result.task_done() if r: matches.append(r) matches.sort() for w in range(NUMBER_OF_PROCESSES): jobs.put(None) result.join() jobs.close() result.close() return matches
def crunch(file_name, ext_type, handler, pool_size=4, queue_size=40, limit=None): print 'Crunching file: %s, limit: %s' % (file_name, limit) q = JoinableQueue(queue_size) q_feats = Queue() pool = Pool(pool_size, wrap_handler(handler), ((q, q_feats),)) with file_reader(file_name) as reader: idx = 0 for entry in reader: if (entry.pathname.find(ext_type) != -1): text = [b for b in entry.get_blocks()] key = entry.pathname.split('/')[-1].split('.')[0] q.put((key, text), True) idx += 1 print 'Processing:', entry.pathname, idx if limit and idx >= limit: print 'Reached the limit' break q.close() q.join() pool.close() result = [] for i in range(q_feats.qsize()): result.append(q_feats.get()) return result
def readCEFFile(afile, pygtail): if exists(afile ): #sometimes files can move/archive while we iterate the list try: #start a process to post our stuff. logcache = JoinableQueue() postingProcess = Process(target=postLogs, args=(logcache, ), name="cef2mozdefHTTPPost") postingProcess.start() #have pygtail feed us lines for line in pygtail: pygtail._update_offset_file() cefDict = parseCEF(line) #logger.debug(json.dumps(cefDict)) #append json to the list for posting if cefDict is not None: logcache.put(json.dumps(cefDict)) logger.info('{0} done'.format(afile)) logger.info('waiting for posting to finish') logcache.put(None) logcache.close() #logger.info('posting done') except KeyboardInterrupt: sys.exit(1) except ValueError as e: logger.fatal('Exception while handling CEF message: %r' % e) sys.exit(1)
def main(): jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count() tasks = ["1", "2", "3", "4", "5"] for w in tasks: jobs.put(w) [ Process(target=work, args=(i, jobs, result)).start() for i in range(NUMBER_OF_PROCESSES) ] print('starting workers') for t in range(len(tasks)): r = result.get() time.sleep(0.5) print(r) result.task_done() for w in range(NUMBER_OF_PROCESSES): jobs.put(None) result.join() jobs.close() result.close()
class Queue: def __init__(self): self._queue = JoinableQueue() def put(self, element): if self._queue is not None: self._queue.put(element) def get(self): if self._queue is not None: try: return self._queue.get() except: return None def join(self): if self._queue is not None: self._queue.join() def task_done(self): if self._queue is not None: self._queue.task_done() def unblock_gets(self): if self._queue is not None: self._queue.close() self._queue = JoinableQueue()
def queueManager(numProc, myList, function, *args): '''queueManager(numProc, myList, function, *args): generic function used to start worker processes via the multiprocessing Queue object numProc - number of processors to use myList - a list of objects to be iterated over function - target function *args - additional arguments to pass to function Return - an unordered list of the results from myList ''' qIn = Queue() qOut = JoinableQueue() if args: arguments = (qIn, qOut,) + args else: arguments = (qIn, qOut,) results = [] # reduce processer count if proc count > files i = 0 for l in myList: qIn.put((i,l)) i += 1 for _ in range(numProc): p = Process(target = function, args = arguments).start() sys.stdout.write("Progress: {:>3}%".format(0) ) curProgress = 0 lastProgress = 0 while qOut.qsize() < len(myList): #sys.stdout.write("\b\b\b\b{:>3}%".format(int(ceil(100*qOut.qsize()/len(myList))))) curProgress = int(ceil(100*qOut.qsize()/len(myList))) if curProgress - lastProgress > 10: lastProgress += 10 sys.stdout.write("\nProgress: {:>3}%".format(lastProgress)) sys.stdout.flush() sys.stdout.write("\nProgress: {:>3}%".format(100)) #sys.stdout.write("\b\b\b\b{:>3}%".format(100)) sys.stdout.write("\n") for _ in range(len(myList)): # indicate done results processing results.append(qOut.get()) qOut.task_done() #tell child processes to stop for _ in range(numProc): qIn.put('STOP') orderedRes = [None]*len(results) for i, res in results: orderedRes[i] = res qOut.join() qIn.close() qOut.close() return orderedRes
def main(): print("______POPULATE FEATURE NAMES START__") populateFeatureNames(trainFile) print("___POPULATE FEATURE NAMES ENDS__") ################ print("building Queue start") q = JoinableQueue(20) q_feats = Queue() print("building Queue end") print("building pool start") pool = Pool(16, populateFeatures, ((q, q_feats),)) print("buiding pool ends") returnedList = [] print("onlyfiles start") onlyfiles = [f for f in os.listdir(path) if ".asm" in f] print("onlyfiles") print(onlyfiles) print("onlyfiles ends") print("___FEATURE EXTRACTION STARTS FOR PATH__") for ffile in onlyfiles: q.put((ffile, path)) start = time.asctime(time.localtime(time.time())) print("Start Time : " + start) q.close() print("Q closed") start = time.asctime(time.localtime(time.time())) print("Start Time : " + start) # time.sleep(100) q.join() print("Q joined") start = time.asctime(time.localtime(time.time())) print("Start Time : " + start) # time.sleep(100) pool.close() print("Pool closed") start = time.asctime(time.localtime(time.time())) print("Start Time : " + start) for i in range(q_feats.qsize()): returnedList.append(q_feats.get()) # returnedList=p.map(functools.partial(populateFeatures, filePath=path), onlyfiles) # time.sleep(10) # p.close() # time.sleep(100) # p.join() # time.sleep(10) print("___ PROCESSING OUTPUT OF MAP FUNCTION FOR FEATURE_EXTRACTION STARTS___") # except: # print("Something went wrong") generateHeader() generateFeatures(returnedList) print("_____ PROCESSING OUTPUT OF MAP FUNCTION FOR FEATURE_EXTRACTION ENDS____") print("_____FEATURE EXTRACTION ENDS____")
def tcount(samtools, samples, chromosomes, num_workers, q, verbose=False): # Define a Lock and a shared value for log printing through ProgressBar err_lock = Lock() counter = Value('i', 0) progress_bar = pb.ProgressBar(total=len(samples) * len(chromosomes), length=40, lock=err_lock, counter=counter, verbose=verbose) # Establish communication queues tasks = JoinableQueue() results = Queue() # Enqueue jobs jobs_count = 0 for bam in samples: for chro in chromosomes: tasks.put((bam[0], bam[1], chro)) jobs_count += 1 # Setting up the workers workers = [ TotalCounter(tasks, results, progress_bar, samtools, q, verbose) for i in range(min(num_workers, jobs_count)) ] # Add a poison pill for each worker for i in range(len(workers)): tasks.put(None) # Start the workers for w in workers: w.start() # Wait for all of the tasks to finish tasks.join() # Get the results sorted_results = {} for i in range(jobs_count): res = results.get() sorted_results[res[0], res[1]] = res[2] # Close Queues tasks.close() results.close() # Ensure each worker terminates for w in workers: w.terminate() w.join() return sorted_results
def parallel(self): from multiprocessing import Process, Queue, JoinableQueue self.ntrajs = [] for i in range(self.cpus): self.ntrajs.append(min(int(floor(float(self.ntraj) /self.cpus)), self.ntraj-sum(self.ntrajs))) cnt = sum(self.ntrajs) while cnt<self.ntraj: for i in range(self.cpus): self.ntrajs[i] += 1 cnt+=1 if (cnt>=self.ntraj): break self.ntrajs = np.array(self.ntrajs) self.ntrajs = self.ntrajs[np.where(self.ntrajs>0)] self.nprocs = len(self.ntrajs) sols = [] processes = [] resq = JoinableQueue() print "Number of cpus:", self.cpus print "Trying to start", self.nprocs, "process(es)." print "Number of trajectories for each process:" print self.ntrajs for i in range(self.nprocs): p = Process(target=self.evolve_serial, args=((resq,self.ntrajs[i],i,self.seed*(i+1)),)) p.start() processes.append(p) resq.join() cnt = 0 while True: try: sols.append(resq.get()) resq.task_done() cnt += 1 if (cnt >= self.nprocs): break except KeyboardInterrupt: break except: pass resq.join() for proc in processes: try: proc.join() except KeyboardInterrupt: print("Cancel thread on keyboard interrupt") proc.terminate() proc.join() resq.close() return sols
class KnowledgeBase(Daemon): def __init__(self, config): set_logging(config) self.config = config self.pidfile = os.path.abspath(config['pidfile']) self.time_lock = Lock() self.teller_queue = JoinableQueue() self.session_factory = get_sasession(self.config) session = self.session_factory() def run(self): if int(self.config['instant_duration']): self.clock = Ticker(self.config, self.session_factory(), self.time_lock, self.teller_queue) self.clock.start() host = self.config['kb_host'] port = int(self.config['kb_port']) nproc = int(self.config['teller_processes']) for n in range(nproc): teller = Teller(self.config, self.session_factory, self.teller_queue) teller.daemon = True teller.start() self.socket = Listener((host, port)) while True: try: client = self.socket.accept() except InterruptedError: return self.time_lock.acquire() self.teller_queue.put(client) self.time_lock.release() def cleanup(self, signum, frame): """cleanup tasks""" nproc = int(self.config['teller_processes']) for n in range(nproc): self.teller_queue.put(None) self.teller_queue.close() try: self.clock.ticking = False except AttributeError: pass self.teller_queue.join() try: self.clock.join() except AttributeError: pass logger.warn('bye from {n}, received signal {p}'.format(n=mp.current_process().name, p=str(signum)))
def __iter__(self): queue = JoinableQueue(maxsize=self.max_queue_size) n_batches, job_queue = self._start_producers(queue) # Run as consumer (read items from queue, in current thread) for x in xrange(n_batches): item = queue.get() #print queue.qsize(), "GET" yield item # Yield the item to the consumer (user) queue.task_done() queue.close() job_queue.close()
class Multiplexer(object): def __init__(self, worker, writer, threads=4): self.worker = worker self.writer = writer self.q = JoinableQueue() self.done = Value(c_bool, False) self.consumer = Process(target=self.consume) self.pool = Pool(threads, init_opener) def start(self): self.done.value = False self.consumer.start() def addjob(self, url, data=None): params = [url] if data: params.append(data) try: return self.pool.apply_async(self.worker, params, callback=self.q.put) except: logger.error('[!] failed to scrape ' + url) logger.error(traceback.format_exc()) raise def finish(self): self.pool.close() logger.info('closed pool') self.pool.join() logger.info('joined pool') self.done.value = True self.q.close() logger.info('closed q') self.consumer.join() logger.info('joined consumer') #self.q.join() #logger.info('joined q') def consume(self): param = [0, 0] while True: job = None try: job = self.q.get(True, timeout=1) except Empty: if self.done.value == True: break if job: param = self.writer(job, param) self.q.task_done() logger.info('added/updated: %s' % param)
class Mothership(object): """ Monitor of producer and consumers """ def __init__(self, producer, consumers, graceful=False): self._queue = JoinableQueue() self._producer_proxy = ProducerProxy(self._queue, producer) self._consumer_pool = list(ConsumerProxy(self._queue, cons) for cons in consumers) self._graceful = graceful def start(self): try: """ Start working """ logger.info('Starting Producers'.center(20, '=')) self._producer_proxy.start() time.sleep(0.1) logger.info('Starting Consumers'.center(20, '=')) for consumer in self._consumer_pool: consumer.start() self._producer_proxy.join() self._queue.join() for consumer in self._consumer_pool: consumer.join() self._queue.close() except KeyboardInterrupt: self._producer_proxy.stop() self._producer_proxy.join() if self._graceful: logger.info('Shutting Down gracefully...') self._queue.join() for consumer in self._consumer_pool: consumer.stop() consumer.join() self._queue.close() def __enter__(self): return self def __exit__(self, types, value, tb): return
def __iter__(self): queue = JoinableQueue(maxsize=params.N_PRODUCERS * 2) n_batches, job_queue = self.start_producers(queue) # Run as consumer (read items from queue, in current thread) for x in xrange(n_batches): item = queue.get() #print len(item[0]), queue.qsize(), "GET" yield item queue.task_done() #queue.join() #Lock until queue is fully done queue.close() job_queue.close()
def __iter__(self): queue = JoinableQueue(maxsize=params.N_PRODUCERS*2) n_batches, job_queue = self.start_producers(queue) # Run as consumer (read items from queue, in current thread) for x in xrange(n_batches): item = queue.get() #print len(item[0]), queue.qsize(), "GET" yield item queue.task_done() #queue.join() #Lock until queue is fully done queue.close() job_queue.close()
def main(): workers=cpu_count() line_queue=JoinableQueue(workers*2) # Keep at most 2*workers lines in flight input_file=open(sys.argv[1], 'rU') output_file=open(sys.argv[2], 'w') output_queue=Queue(workers*3) processes=[] for i in xrange(workers): this_process=Process(target=process_queue, args=(line_queue, output_queue, LINES_AT_ONCE)) this_process.start() processes.append(this_process) # Start the output processor output_processor=Process(target=retrieve_output, args=(output_queue, output_file, LINES_AT_ONCE)) output_processor.start() small_queue=[] block_number=0 for l in input_file: small_queue.append(l) if len(small_queue)>=LINES_AT_ONCE: line_queue.put((block_number, small_queue)) block_number+=1 small_queue=[] if len(small_queue)>0: line_queue.put((block_number, small_queue)) for i in xrange(workers): line_queue.put('STOP') print "Waiting for all tasks to end." line_queue.close() line_queue.join() for p in processes: p.join() print "All tasks ended. Dumping the final output." output_queue.put(None) output_queue.close() output_processor.join() print "Done. Exiting." output_file.close() return
class TaskControl: def __init__(self, cls_worker, count, *args, **kwargs): self.queue = JoinableQueue() self.stopped = Event() self.count_processed = Value('i', 0) self.processes = [cls_worker(self, *args) for _ in range(count)] map(Process.start, self.processes) def is_active(self): return not self.stopped.is_set() def is_alive(self): alive = filter(bool, map(Process.is_alive, self.processes)) print '---- %d child processes are still alive' % len(alive) return alive def stop(self): self.stopped.set() self.queue.close() print '-- waiting for processes to finish' map(Process.join, self.processes) self.queue.cancel_join_thread() def send_chunk(self, items): map(self.queue.put, items) print '--- waiting for queue to complete' while self.get_stats()[1] and self.is_alive(): time.sleep(1) def get(self): while self.is_active(): try: yield self.queue.get(timeout=1) except Queue.Empty: pass def tick(self): self.queue.task_done() self.count_processed.value += 1 if not self.count_processed.value % 20: print '%d items processed' % self.count_processed.value time.sleep(0.5) def get_stats(self): stats = self.count_processed.value, self.queue.qsize() print '--- %d items processed, %d queued' % stats return stats
class Multiplexer(object): def __init__(self, worker, writer, threads=4): self.worker=worker self.writer=writer self.q=JoinableQueue() self.done = Value(c_bool,False) self.consumer=Process(target=self.consume) self.pool = Pool(threads, init_opener) def start(self): self.done.value=False self.consumer.start() def addjob(self, url, data=None): params=[url] if data: params.append(data) try: return self.pool.apply_async(self.worker,params,callback=self.q.put) except: logger.error('[!] failed to scrape '+ url) logger.error(traceback.format_exc()) raise def finish(self): self.pool.close() logger.info('closed pool') self.pool.join() logger.info('joined pool') self.done.value=True self.q.close() logger.info('closed q') self.consumer.join() logger.info('joined consumer') #self.q.join() #logger.info('joined q') def consume(self): param=[0,0] while True: job=None try: job=self.q.get(True, timeout=1) except Empty: if self.done.value==True: break if job: param = self.writer(job, param) self.q.task_done() logger.info('added/updated: %s' % param)
def qwork(command_file, nproc): """Queue up commands to run in parallel.""" print("Queuing work using %d processes...\n" % nproc) queue = JoinableQueue() for command in command_file: queue.put(command.decode('utf8').rstrip('\n')) for ii in range(nproc): Runner(queue) queue.join() queue.close() print("\n...done!")
def main(): processes = cpu_count() * 2 queue = JoinableQueue() get_links(queue) create_folder() for i in range(processes): # .start() - Not sure what that actually returns.... p = Process(target=save_image, args=(queue,)) p.start() for i in range(processes): queue.put(None) ## Tell the processes to end queue.join() queue.close()
def insert_files(self, out,cfg, producer,return_dict, skip_header=0, rec_delim=os.linesep): self.opt.skip_header = skip_header self.opt.rec_delim = rec_delim log = logging.getLogger('cli') self.scfg, self.tcfg = cfg file_object_cache = FileObjectCache() start = time.time() stat_queue = JoinableQueue() if 1: put_queue = JoinableQueue(1024 * self.opt.processes) if 1: put = {'update': self.put_update}[self.opt.put] putter_processes = list(islice(repeatedly(Process, target=self.putter, args=(put, put_queue, stat_queue, return_dict)), self.opt.processes)) for putter_process in putter_processes: putter_process.start() if 1: statter_process = Process(target=self.statter, args=(stat_queue, start)) statter_process.start() out_names=[] #walk = {'filesystem': self.walk_filesystem}[self.opt.walk] for file in producer[0](*producer[1]): out_names.append(file) put_queue.put(file) #time.sleep(3) out.dump_files=out_names for putter_process in putter_processes: put_queue.put(None) put_queue.close() for putter_process in putter_processes: putter_process.join() stat_queue.put(None) stat_queue.close() statter_process.join() put_queue.join_thread() stat_queue.join_thread() print 77777, counter.value() print 77777, self.total_ins print 7777, (return_dict.values())
def clear_area_around_eye(size = 256, image_dir = 'I:/AI_for_an_eyes/test/test/', target_dir = 'I:/AI_for_an_eyes/test/test_zonder_meuk_256/'): if not os.path.exists(target_dir): os.makedirs(target_dir) util.update_progress(0) tasks = glob.glob(image_dir+'*.jpeg') job_total = len(tasks) print 'Processing images matching ' + image_dir+ '*.jpeg' jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count()*2 for im_name in tasks: jobs.put(im_name) for i in xrange(NUMBER_OF_PROCESSES): p = Thread(target=worker, args=(i, jobs, result, target_dir, size)) p.daemon = True p.start() print 'Starting workers (', NUMBER_OF_PROCESSES, ')!' n_complete = 0 for t in xrange(len(tasks)): r = result.get() n_complete += 1 util.update_progress(n_complete/job_total) result.task_done() #print t, 'done' for w in xrange(NUMBER_OF_PROCESSES): jobs.put(None) util.update_progress(1) print 'Done!' time.sleep(1) result.join() jobs.close() result.close()
def process_task(num_workers): logging.info("Started") task_queue = JoinableQueue() done_queue = Queue() def worker(name): """ represents an 'expensive' task """ logging.info("Started process : %s" % name) for task in iter(task_queue.get, 'Stop'): done_queue.put(task) time.sleep(1) task_queue.task_done() # This is for the poison pill task task_queue.task_done() logging.info("Done process : %s" % name) # First we start the workers, and give them a list that we can look at after for i in range(num_workers): Process(target=worker, args=("P-%s" % (i+1), )).start() # Now the main thread populates the Queue num_tasks = num_workers * 5 for i in range(num_tasks): task_queue.put(i) # Now, administer the poison pill which tells processes that we are done populating the Q for i in range(num_workers): task_queue.put('Stop') # Now wait for workers to finish their work task_queue.close() task_queue.join() logging.info("Workers are done") # Now verify that all tasks are done by seeing them in the done queue done_queue.put('Stop') done_tasks = [task for task in iter(done_queue.get, 'Stop')] assert len(done_tasks) == num_tasks logging.info("Verified work - done!")
def __iter__(self): queue = JoinableQueue(maxsize=params.N_PRODUCERS * 8) n_batches, job_queue = self.start_producers(queue) # Run as consumer (read items from queue, in current thread) for x in xrange(n_batches): item = queue.get() yield item queue.task_done() queue.close() job_queue.close() if self.shuffle: shuffled_idx = np.random.permutation(len(self.X)) X_new = [] for i in range(len(self.X)): X_new += [self.X[shuffled_idx[i]]] self.X = X_new
def _process_test_q(self, source: JoinableQueue, local_test_q: JoinableQueue): count = 0 while count < self._num_processes: test_batch = source.get() source.task_done() if test_batch is not None \ and len(test_batch.test_ids) >1 \ and test_batch.restriction == TestExecutionConstraint.SINGLE_NODE: for test_id in test_batch.test_ids: local_test_q.put(TestBatch([test_id])) local_test_q.join() else: local_test_q.put(test_batch) local_test_q.join() if test_batch is None: count += 1 local_test_q.close()
def insert_files(self, file_names, out,cfg, skip_header=0, rec_delim=os.linesep): self.opt.skip_header = skip_header self.opt.rec_delim = rec_delim log = logging.getLogger('cli') self.scfg, self.tcfg = cfg file_object_cache = FileObjectCache() start = time.time() if 1: put_queue = JoinableQueue(1024 * self.opt.processes) stat_queue = JoinableQueue() #walk = {'filesystem': self.walk_filesystem}[self.opt.walk] for file in file_names.file_names: put_queue.put(file) if 1: put = {'update': self.put_update}[self.opt.put] putter_processes = list(islice(repeatedly(Process, target=self.putter, args=(put, put_queue, stat_queue)), self.opt.processes)) for putter_process in putter_processes: putter_process.start() if 1: statter_process = Process(target=self.statter, args=(stat_queue, start)) statter_process.start() for putter_process in putter_processes: put_queue.put(None) put_queue.close() for putter_process in putter_processes: putter_process.join() stat_queue.put(None) stat_queue.close() statter_process.join() put_queue.join_thread() stat_queue.join_thread() #print(3334,file_names.file_names ) #e() out.file_names = ['%s.gz' % os.path.basename(x[0]) for x in file_names.file_names] #pp(out.file_names) #e() out.file_keys = ['%s.gz' % x[0] for x in file_names.file_names] out.file_location = os.path.dirname(file_names.file_names[0][0])
def readCEFFile(afile): if exists(afile ): #sometimes files can move/archive while we iterate the list try: #start a process to post our stuff. logcache = JoinableQueue() postingProcess = Process(target=postLogs, args=(logcache, ), name="cef2mozdefHTTPPost") postingProcess.start() #tail a file to feed us lines #yielding a line on newline, buffering input in between fh = os.open(afile, os.O_RDONLY | os.O_NONBLOCK) os.lseek(fh, 0, os.SEEK_END) bufa = Buffer() bufb = Buffer() while True: time.sleep(0.001) # Wait a little bufa.append(nonBlockRead(fh)) if '\n' in ''.join(bufa): #new line/end of log is found for line in ''.join(bufa).splitlines(True): if '\n' in line: cefDict = parseCEF(line.strip()) #logger.debug(json.dumps(cefDict)) #append json to the list for posting if cefDict is not None: logcache.put(json.dumps(cefDict)) else: bufb.append(line) bufa.clear() bufa.append(''.join(bufb)) bufb.clear() logger.info('{0} done'.format(afile)) logger.info('waiting for posting to finish') logcache.put(None) logcache.close() #logger.info('posting done') except KeyboardInterrupt: sys.exit(1) except ValueError as e: logger.fatal('Exception while handling CEF message: %r' % e) sys.exit(1)
def hist_eq(image_dir='test_hist/', target_dir='test_result_hist/', method='CLAHE'): if not os.path.exists(target_dir): os.makedirs(target_dir) tasks = glob.glob(image_dir + '*.jpeg') job_total = len(tasks) print 'Processing images matching ' + image_dir + '*.jpeg' jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count() * 2 for im_name in tasks: jobs.put(im_name) for i in xrange(NUMBER_OF_PROCESSES): p = Thread(target=worker, args=(i, jobs, result, target_dir, method)) p.daemon = True p.start() print 'Starting workers (', NUMBER_OF_PROCESSES, ')!' n_complete = 0 for t in xrange(len(tasks)): r = result.get() n_complete += 1 util.update_progress(n_complete / job_total) result.task_done() #print t, 'done' for w in xrange(NUMBER_OF_PROCESSES): jobs.put(None) print 'Done!' result.join() jobs.close() result.close()
def __run_chm_test_procs(mems, model, regions, ntasks, nthreads): """Starts ntasks processes running __run_chm_test_proc then calls __run_chm_test_parallel.""" from multiprocessing import JoinableQueue, Process from time import sleep print("Running CHM test with %d task%s and %d thread%s per task" % (ntasks, 's' if ntasks > 1 else '', nthreads, 's' if nthreads > 1 else '')) nthreads_full = ntasks * nthreads # Start the child processes q = JoinableQueue() args = (mems, model, nthreads, q) processes = [ Process(target=__run_chm_test_proc, name="CHM-test-%d" % p, args=args) for p in xrange(ntasks) ] for p in processes: p.daemon = True p.start() sleep(0) # Run the CHM-test in parallel try: out = __run_chm_test_parallel(mems, model, regions, q, processes, nthreads_full) except: __clear_queue(q) __kill_processes(processes) raise # Tell all processes we are done and make sure they all actually terminate for _ in xrange(ntasks): q.put_nowait(None) q.close() q.join() q.join_thread() for p in processes: p.join() # Done! Return the output image return out
def parallel_for(a, cls, args=[], kwargs={}, num_processes=None): from multiprocessing import Process, JoinableQueue, cpu_count, Pipe if num_processes is None: num_processes = cpu_count() # Note that JoinableQueue uses an integer for tracking locations in the queue. # Because it's using shared memory it's not terribly flexible and gives annoyingly # unclear errors if you go over the limit. We'd like the queue to be as large as # possible so that we can avoid contention, but without allocating a max possible # size queue unless we need it, thus the calculation below. 32767 is a hard limit. q = JoinableQueue(maxsize=min(len(a)+num_processes, 2**15 - 1)) output_pipes = [Pipe(duplex=False) for _ in range(num_processes)] send_pipes = [p for _, p in output_pipes] recv_pipes = [p for p, _ in output_pipes] pool = [Process(target=_parallel_for, args=(q, cls, pipe) + tuple(args), kwargs=kwargs) for pipe in send_pipes] output_watcher = MultiPipeWatcher(recv_pipes) try: for p in pool: p.start() output_watcher.start() for x in a: q.put(x) for _ in range(num_processes): q.put(None) # End markers q.close() q.join_thread() q.join() for p in pool: p.join() output_watcher.flush() output_watcher.join() combined_output = output_watcher.merged return combined_output except KeyboardInterrupt: print "Interrupted -- terminating worker processes" for p in pool: p.terminate() for p in pool: p.join() raise
def parallel_for(a, cls, args=[], kwargs={}, num_processes=None): from multiprocessing import Process, JoinableQueue, cpu_count, Pipe if num_processes is None: num_processes = cpu_count() # Note that JoinableQueue uses an integer for tracking locations in the queue. # Because it's using shared memory it's not terribly flexible and gives annoyingly # unclear errors if you go over the limit. We'd like the queue to be as large as # possible so that we can avoid contention, but without allocating a max possible # size queue unless we need it, thus the calculation below. 32767 is a hard limit. q = JoinableQueue(maxsize=min(len(a)+num_processes, 2**15 - 1)) output_pipes = [Pipe(duplex=False) for _ in range(num_processes)] send_pipes = [p for _, p in output_pipes] recv_pipes = [p for p, _ in output_pipes] pool = [Process(target=_parallel_for, args=(q, cls, pipe) + tuple(args), kwargs=kwargs) for pipe in send_pipes] output_watcher = MultiPipeWatcher(recv_pipes) try: for p in pool: p.start() output_watcher.start() for x in a: q.put(x) for _ in range(num_processes): q.put(None) # End markers q.close() q.join_thread() q.join() for p in pool: p.join() output_watcher.flush() output_watcher.join() combined_output = output_watcher.merged return combined_output except KeyboardInterrupt: print("Interrupted -- terminating worker processes") for p in pool: p.terminate() for p in pool: p.join() raise
def hist_eq(image_dir = 'test_hist/', target_dir = 'test_result_hist/', method = 'CLAHE'): if not os.path.exists(target_dir): os.makedirs(target_dir) tasks = glob.glob(image_dir+'*.jpeg') job_total = len(tasks) print 'Processing images matching ' + image_dir+ '*.jpeg' jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count()*2 for im_name in tasks: jobs.put(im_name) for i in xrange(NUMBER_OF_PROCESSES): p = Thread(target=worker, args=(i, jobs, result, target_dir, method)) p.daemon = True p.start() print 'Starting workers (', NUMBER_OF_PROCESSES, ')!' n_complete = 0 for t in xrange(len(tasks)): r = result.get() n_complete += 1 util.update_progress(n_complete/job_total) result.task_done() #print t, 'done' for w in xrange(NUMBER_OF_PROCESSES): jobs.put(None) print 'Done!' result.join() jobs.close() result.close()
def crunch(file_name, ext_type, handler, pool_size=4, queue_size=40, limit=None): print('Crunching file: %s, limit: %s' % (file_name, limit)) q = JoinableQueue(queue_size) q_feats = Queue() pool = Pool(pool_size, wrap_handler(handler), ((q, q_feats), )) with file_reader(file_name) as reader: idx = 0 for entry in reader: if (entry.pathname.find(ext_type) != -1): text = [b for b in entry.get_blocks()] key = entry.pathname.split('/')[-1].split('.')[0] q.put((key, text), True) idx += 1 print('Processing:', entry.pathname, idx) if limit and idx >= limit: print('Reached the limit') break q.close() q.join() pool.close() result = [] for i in range(q_feats.qsize()): result.append(q_feats.get()) return result
def readCEFFile(afile): if exists(afile): #sometimes files can move/archive while we iterate the list try: #start a process to post our stuff. logcache=JoinableQueue() postingProcess=Process(target=postLogs,args=(logcache,),name="cef2mozdefHTTPPost") postingProcess.start() #tail a file to feed us lines #yielding a line on newline, buffering input in between fh = os.open(afile, os.O_RDONLY | os.O_NONBLOCK) os.lseek(fh, 0, os.SEEK_END) bufa=Buffer() bufb=Buffer() while True: time.sleep(0.001) # Wait a little bufa.append(nonBlockRead(fh)) if '\n' in ''.join(bufa): #new line/end of log is found for line in ''.join(bufa).splitlines(True): if '\n' in line: cefDict=parseCEF(line.strip()) #logger.debug(json.dumps(cefDict)) #append json to the list for posting if cefDict is not None: logcache.put(json.dumps(cefDict)) else: bufb.append(line) bufa.clear() bufa.append(''.join(bufb)) bufb.clear() logger.info('{0} done'.format(afile)) logger.info('waiting for posting to finish') logcache.put(None) logcache.close() #logger.info('posting done') except KeyboardInterrupt: sys.exit(1) except ValueError as e: logger.fatal('Exception while handling CEF message: %r'%e) sys.exit(1)
def __iter__(self): queue = JoinableQueue(maxsize=params.N_PRODUCERS * 8) n_batches, job_queue = self.start_producers(queue) # Run as consumer (read items from queue, in current thread) for x in xrange(n_batches): item = queue.get() yield item queue.task_done() queue.close() job_queue.close() if self.shuffle: shuffled_idx = np.random.permutation(len(self.X)) X_new = [] y_new = [] for i in range(len(self.X)): X_new += [self.X[shuffled_idx[i]]] y_new += [self.y[shuffled_idx[i]]] self.X = X_new self.y = y_new
def main(): print("______POPULATE FEATURE NAMES START__") populateFeatureNames(trainFile) print("___POPULATE FEATURE NAMES ENDS__") ################ q = JoinableQueue(20) q_feats = Queue() pool = Pool(6,populateFeatures, ((q, q_feats),)) returnedList=[] print("__onlyfile population starts__") onlyfiles = [ f for f in os.listdir(path) if ".asm" in f] print("___FEATURE EXTRACTION STARTS FOR PATH__") for ffile in onlyfiles: q.put((ffile,path),True) q.close() #time.sleep(100) q.join() #time.sleep(100) pool.close() for i in range(q_feats.qsize()): returnedList.append(q_feats.get()) #returnedList=p.map(functools.partial(populateFeatures, filePath=path), onlyfiles) #time.sleep(10) #p.close() #time.sleep(100) #p.join() #time.sleep(10) print("___ PROCESSING OUTPUT OF MAP FUNCTION FOR FEATURE_EXTRACTION STARTS___") #except: # print("Something went wrong") generateHeader() generateFeatures(returnedList) print("_____ PROCESSING OUTPUT OF MAP FUNCTION FOR FEATURE_EXTRACTION ENDS____") print("_____FEATURE EXTRACTION ENDS____")
def parexec(signal, out, num_consumers, iterator): t = time.time() tasks = JoinableQueue() results = Queue() print 'starting consumers' consumers = [Consumer(tasks, results, [signal]) for _ in range(num_consumers)] for w in consumers: w.start() print 'adding tasks' for i in iterator: tasks.put(Task(i, signal)) for i in range(num_consumers): tasks.put(None) print 'collecting' for _ in range(len(iterator)): out.append(results.get()) if _%100000 == 0: print _ tasks.close() tasks.join_thread() print 'closing' for w in consumers: w.join() print time.time() - t
def annotate_gtf_parallel(input_gtf_file, output_gtf_file, gtf_sample_attr, num_processors, tmp_dir): # create queue input_queue = JoinableQueue(maxsize=num_processors*3) # start worker processes procs = [] worker_gtf_files = [] for i in xrange(num_processors): worker_gtf_file = os.path.join(tmp_dir, "annotate_worker%03d.gtf" % (i)) worker_gtf_files.append(worker_gtf_file) args = (input_queue, worker_gtf_file, gtf_sample_attr) p = Process(target=annotate_gtf_worker, args=args) p.daemon = True p.start() procs.append(p) for lines in parse_loci(open(input_gtf_file)): input_queue.put(lines) # stop workers for p in procs: input_queue.put([]) # close queue input_queue.join() input_queue.close() # join worker processes for p in procs: p.join() # merge/sort worker gtf files logging.debug("Merging %d worker GTF file(s)" % (num_processors)) merge_sort_gtf_files(worker_gtf_files, output_gtf_file, tmp_dir=tmp_dir) # remove worker gtf files for filename in worker_gtf_files: if os.path.exists(filename): os.remove(filename)
def main(): """Main loop""" start_time = time.time() motd() args = get_args() print 'Started: ', time.strftime("%a %b %d, %Y %H:%M:%S", time.localtime(start_time)) # build our configuration object w/ input params conf = ConfigParser.ConfigParser() conf.read(args.config) params = Parameters(conf) # create the db and tables, returning connection # and cursor if params.db.create: conn, cur = db.create_db_and_new_tables(params.db.name) # alert user we're dropping reads print "[WARN] Dropping all demultiplexed reads ≤ {0} bp long".format(params.quality.drop_len) # get num reads and split up work print "Splitting reads into work units..." num_reads, work = get_work(params, args.job_size) print "There are {:,d} reads".format(num_reads) # give some indication of progress for longer runs if num_reads > 999: sys.stdout.write('Running...\n') #pdb.set_trace() #r1out = open('r1-reads.fasta', 'w', 1) #r2out = open('r2-reads.fasta', 'w', 1) # MULTICORE if params.parallelism.cores > 1: jobs = Queue() results = JoinableQueue() # We're stacking groups of jobs on the work # Queue, conceivably to save the overhead of # placing them on there one-by-one. print "Adding jobs to work queue..." for unit in work: jobs.put(unit) print "There are {} jobs...".format(num_reads / args.job_size) # setup the processes for the jobs print "Starting {} workers...".format(params.parallelism.cores) # start the worker processes [Process(target=multiproc, args=(jobs, results, params)).start() for i in xrange(params.parallelism.cores)] # we're putting single results on the results Queue so # that the db can (in theory) consume them at # a rather consistent rate rather than in spurts #for unit in xrange(num_reads): count = 0 for unit in xrange(num_reads): dmux = results.get() rowid = db.insert_record_to_db(cur, dmux) write_results_out(cur, rowid, params, dmux) results.task_done() progress(rowid, 10000, 100000) count += 1 # make sure we put None at end of Queue # in an amount equiv. to num_procs for unit in xrange(params.parallelism.cores): jobs.put(None) # join the results, so that they can finish results.join() # close up our queues jobs.close() results.close() # SINGLECORE else: # fake a multiprocessing queue, so stacking and accessing results # is identical. fake_queue = ListQueue() results = singleproc(work, fake_queue, params) count = 0 for dmux in results: rowid = db.insert_record_to_db(cur, dmux) write_results_out(cur, rowid, params, dmux) progress(rowid, 10000, 100000) count += 1 params.storage.close() conn.commit() cur.close() conn.close() end_time = time.time() pretty_end_time = time.strftime("%a %b %d, %Y %H:%M:%S", time.localtime(end_time)) print "\nEnded: {} (run time {} minutes)".format(pretty_end_time, round((end_time - start_time)/60, 3))
def run_parallel(config): """ runs assembly in parallel and merges output from child processes config: RunConfig object """ # create temp directory tmp_dir = os.path.join(config.output_dir, "tmp") if not os.path.exists(tmp_dir): logging.debug("Creating tmp directory '%s'" % (tmp_dir)) os.makedirs(tmp_dir) # create queue input_queue = JoinableQueue(maxsize=config.num_processors*3) # shared memory values locus_id_value_obj = LockValue(1) gene_id_value_obj = LockValue(1) tss_id_value_obj = LockValue(1) t_id_value_obj = LockValue(1) # start worker processes procs = [] worker_prefixes = [] for i in xrange(config.num_processors): worker_prefix = os.path.join(tmp_dir, "worker%03d" % (i)) worker_prefixes.append(worker_prefix) args = (input_queue, locus_id_value_obj, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, worker_prefix, config) p = Process(target=assembly_worker, args=args) p.daemon = True p.start() procs.append(p) # parse gtf file for lines in parse_loci(open(config.gtf_input_file)): input_queue.put(lines) # stop workers for p in procs: input_queue.put([]) # close queue input_queue.join() input_queue.close() # join worker processes for p in procs: p.join() # merge gtf files if config.create_gtf: logging.info("Merging %d worker GTF files" % (config.num_processors)) worker_gtf_files = [prefix + ".gtf" for prefix in worker_prefixes] output_gtf_file = os.path.join(config.output_dir, "assembly.gtf") merge_sort_gtf_files(worker_gtf_files, output_gtf_file, tmp_dir=tmp_dir) # remove worker gtf files for filename in worker_gtf_files: if os.path.exists(filename): os.remove(filename) # merge bed files if config.create_bed: logging.info("Merging %d worker BED files" % (config.num_processors)) worker_bed_files = [p + ".bed" for p in worker_prefixes] output_bed_file = os.path.join(config.output_dir, "assembly.bed") merge_sort_files(worker_bed_files, output_bed_file, sort_func=sort_bed, tmp_dir=tmp_dir) # write bed file track description line track_name = os.path.basename(config.output_dir) track_line = ' '.join(['track name="%s"' % (track_name), 'description="%s"' % (track_name), 'visibility=pack', 'useScore=1']) track_file = os.path.join(config.output_dir, "assembly.bed.ucsc_track") fileh = open(track_file, "w") print >>fileh, track_line fileh.close() # merge bedgraph files if config.create_bedgraph: logging.info("Merging %d worker bedGraph files" % (config.num_processors)) for strand in xrange(0,3): strand_name = STRAND_NAMES[strand] bgfiles = ['%s_%s.bedgraph' % (p, strand_name) for p in worker_prefixes] output_file = os.path.join(config.output_dir, "assembly_%s.bedgraph" % strand_name) merge_sort_files(bgfiles, output_file, sort_func=sort_bed, tmp_dir=tmp_dir) track_name = '%s_%s' % (os.path.basename(config.output_dir), strand_name) track_line = ' '.join(['track type=bedGraph', 'name="%s"' % (track_name), 'description="%s"' % (track_name), 'visibility=full', 'color=%s' % (STRAND_COLORS[strand]), 'autoScale=on', 'alwaysZero=on', 'maxHeightPixels=64:64:11']) track_file = os.path.join(config.output_dir, "assembly_%s.bedgraph.ucsc_track" % strand_name) fileh = open(track_file, "w") print >>fileh, track_line fileh.close() # cleanup if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir) logging.info("Done") return 0
def assemble_parallel(args, results, num_samples): ''' args: object containing parameters to configure the assembly process results: Results object containing input and output filenames num_samples: number of samples in assembly Args ==== - guided_strand - guided_ends - guided_assembly - change_point - change_point_pvalue - change_point_fold_change - change_point_trim - path_graph_kmax - path_frac - max_paths - isoform_frac - max_isoforms - assemble_unstranded Results ======= Input file attributes: - locus_index_file - transfrags_bed_file Output file attributes: - bedgraph_files - splice_bed_file - splice_graph_gtf_file - change_point_gtf_file - path_graph_stats_file - assembly_gtf_file - assembly_bed_file ''' logging.info('Assembling in parallel using %d processes' % (args.num_processes)) # create queue input_queue = JoinableQueue(maxsize=args.num_processes * 2) bed_file = results.transfrags_bed_file global_ids = GlobalIds() # start worker processes procs = [] worker_results = [] for i in xrange(args.num_processes): worker_id = 'worker%03d' % i worker_dir = os.path.join(results.tmp_dir, worker_id) if not os.path.exists(worker_dir): logging.debug("\tcreating worker directory '%s'" % (worker_dir)) os.makedirs(worker_dir) worker_results.append(Results(worker_dir)) worker_state = WorkerState(bed_file, input_queue, global_ids, args, num_samples, worker_dir) p = Process(target=assemble_worker, args=(worker_state,)) p.start() procs.append(p) # parse locus file for locus in parse_locus_index(results.locus_index_file): input_queue.put(locus) for p in procs: input_queue.put(None) # close input queue input_queue.join() input_queue.close() # join worker processes for p in procs: p.join() # merge output files def merge(input_files, output_file, key, header=None): fhs = [open(f, 'rb', 64*1024) for f in input_files] with open(output_file, 'wb', 64*1024) as output: if header is not None: output.write(header) iterator = batch_merge(key, *fhs) output.writelines(iterator) for fh in fhs: fh.close() logging.info('Merging output files') logging.debug('\tmerging bedgraph files') for i, output_file in enumerate(results.bedgraph_files): input_files = [r.bedgraph_files[i] for r in worker_results] merge(input_files, output_file, sort_key_bed) logging.debug('\tmerging splice bed file') header = ('track name=junctions description="Splice Junctions" ' 'graphType=junctions\n') merge(input_files=[r.splice_bed_file for r in worker_results], output_file=results.splice_bed_file, key=sort_key_bed, header=header) logging.debug('\tmerging splice graph gtf file') merge(input_files=[r.splice_graph_gtf_file for r in worker_results], output_file=results.splice_graph_gtf_file, key=sort_key_gtf) logging.debug('\tmerging change point gtf file') merge(input_files=[r.change_point_gtf_file for r in worker_results], output_file=results.change_point_gtf_file, key=sort_key_gtf) logging.debug('\tmerging path graph stats file') header = ['chrom', 'start', 'end', 'strand', 'k', 'kmax', 'transfrags', 'short_transfrags', 'short_expr', 'lost_short', 'lost_short_expr', 'kmers', 'lost_kmers', 'tot_expr', 'graph_expr', 'expr_frac', 'valid', 'opt', 'is_opt\n'] header = '\t'.join(header) merge(input_files=[r.path_graph_stats_file for r in worker_results], output_file=results.path_graph_stats_file, key=sort_key_bed, header=header) logging.debug('\tmerging assembly bed file') merge(input_files=[r.assembly_bed_file for r in worker_results], output_file=results.assembly_bed_file, key=sort_key_bed) logging.debug('\tmerging assembly gtf file') merge(input_files=[r.assembly_gtf_file for r in worker_results], output_file=results.assembly_gtf_file, key=sort_key_gtf) # cleanup worker data logging.info('Removing temporary files') def shutil_error_callback(func, path, excinfo): logging.error('Error removing tmp files path=%s message=%s' % (path, excinfo)) for r in worker_results: shutil.rmtree(r.output_dir, onerror=shutil_error_callback) logging.info('Done') return 0
def run_parallel(config): """ runs assembly in parallel and merges output from child processes config: RunConfig object """ # create temp directory tmp_dir = os.path.join(config.output_dir, "tmp") if not os.path.exists(tmp_dir): logging.debug("Creating tmp directory '%s'" % (tmp_dir)) os.makedirs(tmp_dir) # create queue input_queue = JoinableQueue(maxsize=config.num_processors * 3) # shared memory values locus_id_value_obj = LockValue(1) gene_id_value_obj = LockValue(1) tss_id_value_obj = LockValue(1) t_id_value_obj = LockValue(1) # start worker processes procs = [] worker_prefixes = [] for i in xrange(config.num_processors): worker_prefix = os.path.join(tmp_dir, "worker%03d" % (i)) worker_prefixes.append(worker_prefix) args = ( input_queue, locus_id_value_obj, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, worker_prefix, config, ) p = Process(target=assembly_worker, args=args) p.daemon = True p.start() procs.append(p) # parse gtf file for lines in parse_loci(open(config.gtf_input_file)): input_queue.put(lines) # stop workers for p in procs: input_queue.put([]) # close queue input_queue.join() input_queue.close() # join worker processes for p in procs: p.join() # merge gtf files if config.create_gtf: logging.info("Merging %d worker GTF files" % (config.num_processors)) worker_gtf_files = [prefix + ".gtf" for prefix in worker_prefixes] output_gtf_file = os.path.join(config.output_dir, "assembly.gtf") merge_sort_gtf_files(worker_gtf_files, output_gtf_file, tmp_dir=tmp_dir) # remove worker gtf files for filename in worker_gtf_files: if os.path.exists(filename): os.remove(filename) # merge bed files if config.create_bed: logging.info("Merging %d worker BED files" % (config.num_processors)) worker_bed_files = [p + ".bed" for p in worker_prefixes] output_bed_file = os.path.join(config.output_dir, "assembly.bed") merge_sort_files(worker_bed_files, output_bed_file, sort_func=sort_bed, tmp_dir=tmp_dir) # write bed file track description line track_name = os.path.basename(config.output_dir) track_line = " ".join( ['track name="%s"' % (track_name), 'description="%s"' % (track_name), "visibility=pack", "useScore=1"] ) track_file = os.path.join(config.output_dir, "assembly.bed.ucsc_track") fileh = open(track_file, "w") print >> fileh, track_line fileh.close() # merge bedgraph files if config.create_bedgraph: logging.info("Merging %d worker bedGraph files" % (config.num_processors)) for strand in xrange(0, 3): strand_name = STRAND_NAMES[strand] bgfiles = ["%s_%s.bedgraph" % (p, strand_name) for p in worker_prefixes] output_file = os.path.join(config.output_dir, "assembly_%s.bedgraph" % strand_name) merge_sort_files(bgfiles, output_file, sort_func=sort_bed, tmp_dir=tmp_dir) track_name = "%s_%s" % (os.path.basename(config.output_dir), strand_name) track_line = " ".join( [ "track type=bedGraph", 'name="%s"' % (track_name), 'description="%s"' % (track_name), "visibility=full", "color=%s" % (STRAND_COLORS[strand]), "autoScale=on", "alwaysZero=on", "maxHeightPixels=64:64:11", ] ) track_file = os.path.join(config.output_dir, "assembly_%s.bedgraph.ucsc_track" % strand_name) fileh = open(track_file, "w") print >> fileh, track_line fileh.close() # cleanup if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir) logging.info("Done") return 0
counts = count_bytes(lines) q_out.put([name, counts]) q_in.task_done() q = JoinableQueue(20) q_feats = Queue() pool = Pool(6, get_features, ((q, q_feats),)) with libarchive.public.file_reader(TRAIN_PATH) as archive: for entry in archive: # Use only .bytes if (entry.pathname.find('.bytes') != -1): text = [] for b in entry.get_blocks(): text.append(b) q.put((entry.pathname, text), True) q.close() q.join() pool.close() # Now you can get a list of features like that feats = [] for i in range(q_feats.qsize()): feats.append(q_feats.get())
def parallelbuild(self, numworkers): class Worker(Process): def __init__(self, task_queue, results, chromosomes, reference, lengths): Process.__init__(self) self.task_queue = task_queue self.result_queue = results self.chromosomes = chromosomes self.reference = reference self.lengths = lengths def run(self): while True: next_task = self.task_queue.get() if next_task is None: # Poison pill means shutdown self.task_queue.task_done() break self.buildHaplotype(next_task) self.task_queue.task_done() self.result_queue.put(next_task[-1]) return def buildChromosome(self, sequence, haplotype, reference): referencesequence = { bi: sequence[bi[0]:bi[1]] for bi in reference } result = '' for bi in haplotype: result += ''.join(referencesequence[bi]) return result def buildHaplotype(self, task): originalfa, haplotypes, haplengths, output = task with open(originalfa, 'r') as fa: with open(output, 'w') as out: name = '' sequence = [] for line in fa: if line != '': if line[0] == '>': if name != '' and name in self.chromosomes and haplengths[ name] > 0: out.write(">{}\n".format(name)) assert (len(sequence) == self.lengths[name]) out.write("{}\n".format( self.buildChromosome( sequence, haplotypes[name], self.reference[name]))) name = line.strip()[1:] sequence = [] else: if name in self.chromosomes and haplengths[ name] > 0: sequence += list(line.strip()) if len(sequence) > 0: if name != '' and name in self.chromosomes: out.write(">{}\n".format(name)) assert (len(sequence) == self.lengths[name]) out.write("{}\n".format( self.buildChromosome( sequence, haplotypes[name], self.reference[name]))) # Establish communication queues tasks = JoinableQueue() results = Queue() # Enqueue jobs c = copy.deepcopy jobs_count = 0 for clone in self.tumor.clones: originalfa = c(clone.humanGenome.maternalfa) haplotypes = { c(chro): c(clone.genome[chro].maternalHaplotype) for chro in clone.humanGenome.chromosomes } haplengths = { c(chro): c(clone.genome[chro].maternalHaplotypeLength) for chro in clone.humanGenome.chromosomes } output = os.path.join(self.xdir, '{}.maternal.fa'.format(c(clone.label))) tasks.put((originalfa, haplotypes, haplengths, output)) jobs_count += 1 originalfa = c(clone.humanGenome.paternalfa) haplotypes = { c(chro): c(clone.genome[chro].paternalHaplotype) for chro in clone.humanGenome.chromosomes } haplengths = { c(chro): c(clone.genome[chro].paternalHaplotypeLength) for chro in clone.humanGenome.chromosomes } output = os.path.join(self.xdir, '{}.paternal.fa'.format(c(clone.label))) tasks.put((originalfa, haplotypes, haplengths, output)) jobs_count += 1 # Setting up the workers workers = [ Worker(task_queue=tasks, results=results, chromosomes=c(self.tumor.human.chromosomes), reference={ c(chro): c(self.tumor.root.genome[chro].reference) for chro in self.tumor.human.chromosomes }, lengths={ c(chro): c(self.tumor.root.genome[chro].length) for chro in self.tumor.human.chromosomes }) for i in range(min(numworkers, jobs_count)) ] # Add a poison pill for each worker for i in range(len(workers)): tasks.put(None) # Start the workers for w in workers: w.start() # Wait for all of the tasks to finish tasks.join() # Collect results collect = [] for i in range(jobs_count): collect.append(results.get()) # Close Queues tasks.close() results.close() # Ensure each worker terminates for w in workers: w.terminate() w.join() return collect
def aggregate_parallel(samples, args, results): ''' Process and aggregate GTF input files samples: list of Sample objects args: from Argparse module. command-line arguments to configure the assembly process results: Results object containing input and output filenames ''' logging.info('Aggregating in parallel using %d processes' % (args.num_processes)) if args.filter_splice_juncs and args.ref_genome_fasta_file: # test opening FastaFile logging.info('Indexing reference genome fasta file (if necessary)') fasta_fh = FastaFile(args.ref_genome_fasta_file) fasta_fh.close() # create queue input_queue = JoinableQueue(maxsize=args.num_processes * 2) # start worker processes procs = [] worker_results = [] for i in xrange(args.num_processes): worker_id = 'aggregate_worker%03d' % i worker_dir = os.path.join(results.tmp_dir, worker_id) if not os.path.exists(worker_dir): os.makedirs(worker_dir) worker_results.append(Results(worker_dir)) p = Process(target=aggregate_worker, args=(input_queue, args, worker_dir)) p.start() procs.append(p) # reference gtf if args.ref_gtf_file is not None: logging.debug('Reference: %s' % args.ref_gtf_file) input_queue.put(Sample(args.ref_gtf_file, Sample.REF_ID)) # parse samples for sample in samples: input_queue.put(sample) for p in procs: input_queue.put(None) # close input queue input_queue.join() input_queue.close() # join worker processes for p in procs: p.join() # merge output files logging.info('Merging aggregated files') logging.debug('\tmerging bed files') retcode = merge_bed(input_files=[r.transfrags_bed_file for r in worker_results], output_file=results.transfrags_bed_file, num_processes=args.num_processes, tmp_dir=results.tmp_dir) if retcode != 0: raise TacoError('Error running linux merge') logging.debug('\tmerging filtered bed files') retcode = merge_bed(input_files=[r.transfrags_filtered_bed_file for r in worker_results], output_file=results.transfrags_filtered_bed_file, num_processes=args.num_processes, tmp_dir=results.tmp_dir) if retcode != 0: raise TacoError('Error running linux merge') logging.debug('\tmerging sample stats') def sort_key_field0(line): fields = line.split('\t', 1) return fields[0] stats_header = ['sample_id', 'num_transfrags', 'filtered_length', 'filtered_expr', 'filtered_splice\n'] stats_header = '\t'.join(stats_header) merge_files(input_files=[r.sample_stats_file for r in worker_results], output_file=results.sample_stats_file, key=sort_key_field0, header=stats_header) # cleanup worker data logging.info('Removing temporary files') def shutil_error_callback(func, path, excinfo): logging.error('Error removing tmp files path=%s message=%s' % (path, excinfo)) for r in worker_results: shutil.rmtree(r.output_dir, onerror=shutil_error_callback) logging.info('Aggregate done') return 0
def main(argv): parser = OptionParser() group = OptionGroup(parser, 'S3 options') group.add_option('--bucket', metavar='BUCKET', help='set bucket') group.add_option('--insecure', action='store_false', dest='secure', help='use insecure connection') group.add_option('--secure', action='store_true', default=True, dest='secure', help='use secure connection') parser.add_option_group(group) group = OptionGroup(parser, 'Source options') group.add_option('--walk', choices=('filesystem', 'tar'), default='filesystem', metavar='MODE', help='set walk mode (filesystem or tar)') parser.add_option_group(group) group = OptionGroup(parser, 'Put options') group.add_option('--content-type', metavar='CONTENT-TYPE', help='set content type') group.add_option('--gzip', action='store_true', help='gzip values and set content encoding') group.add_option('--put', choices=('add', 'stupid', 'update'), default='update', metavar='MODE', help='set put mode (add, stupid, or update)') group.add_option('--prefix', default='', metavar='PREFIX', help='set key prefix') group.add_option('--resume', action='append', default=[], metavar='FILENAME', help='resume from log file') group.add_option('--grant', metavar='GRANT', default=None, choices=CannedACLStrings, help='A canned ACL policy to be applied to each file uploaded.\nChoices: %s' % ', '.join(CannedACLStrings)) parser.add_option_group(group) group = OptionGroup(parser, 'Logging options') group.add_option('--log-filename', metavar='FILENAME', help='set log filename') group.add_option('--quiet', '-q', action='count', default=0, help='less output') group.add_option('--verbose', '-v', action='count', default=0, help='more output') parser.add_option_group(group) group = OptionGroup(parser, 'Debug and performance tuning options') group.add_option('--dry-run', action='store_true', help='don\'t write to S3') group.add_option('--limit', metavar='N', type=int, help='set maximum number of keys to put') group.add_option('--processes', default=8, metavar='PROCESSES', type=int, help='set number of putter processes') parser.add_option_group(group) options, args = parser.parse_args(argv[1:]) logging.basicConfig(filename=options.log_filename, level=logging.INFO + 10 * (options.quiet - options.verbose)) logger = logging.getLogger(os.path.basename(sys.argv[0])) if len(args) < 1: logger.error('missing source operand') return 1 if not options.bucket: logger.error('missing bucket') return 1 connection = S3Connection(is_secure=options.secure) bucket = connection.get_bucket(options.bucket) del bucket del connection start = time.time() put_queue = JoinableQueue(1024 * options.processes) stat_queue = JoinableQueue() walk = {'filesystem': walk_filesystem, 'tar': walk_tar}[options.walk] walker_process = Process(target=walker, args=(walk, put_queue, args, options)) walker_process.start() put = {'add': put_add, 'stupid': put_stupid, 'update': put_update}[options.put] putter_processes = list(islice(repeatedly(Process, target=putter, args=(put, put_queue, stat_queue, options)), options.processes)) for putter_process in putter_processes: putter_process.start() statter_process = Process(target=statter, args=(stat_queue, start, options)) statter_process.start() walker_process.join() for putter_process in putter_processes: put_queue.put(None) put_queue.close() for putter_process in putter_processes: putter_process.join() stat_queue.put(None) stat_queue.close() statter_process.join() put_queue.join_thread() stat_queue.join_thread()
class PartonicRunner2: def __init__(self, m2, q2, Delta, nlf, f, fp, Neta, nProcesses=cpu_count()): # parameters self.m2 = m2 self.q2 = q2 self.Delta = Delta self.nlf = nlf self.f = f self.fp = fp self.Neta = Neta self.nProcesses = nProcesses # vars self.__qIn = JoinableQueue() self.__qOut = Queue() self.__js = [] self.__etas = [] self.__ks = [] self.__q2s = [] self.__data = {} self.__processes = [] # setup default grid def _getGrid(self): self.__js = range(self.Neta) self.__etas = [10.0 ** (-3.0 + 6.0 / (self.Neta - 1) * j) for j in self.__js] g = [] for proj in ["G", "L", "P"]: for j in self.__js: g.append({"proj": proj, "j": j, "eta": self.__etas[j], "f": self.f, "res": np.nan}) return g # setup Marcos grid def _getGridMarco(self): self.__etas = [j1 / 2.0 * 10.0 ** (j2) for j1 in xrange(2, 19) for j2 in [-3, -2, -1, 0, 1, 2]] self.__etas.append(1e3) self.__etas.sort() self.__js = range(len(self.__etas)) self.__q2s = [-1e3] # [-1e-2,-1e0,-1e1,-1e2,-1e3] self.__ks = range(len(self.__q2s)) g = [] for proj in ["G", "L"]: for k in self.__ks: for j in self.__js: g.append( { "proj": proj, "j": j, "eta": self.__etas[j], "k": k, "q2": self.__q2s[k], "f": self.f, "res": np.nan, } ) return g # start processes def _compute(self, g): # start processes oArgs = { "G": (self.m2, self.q2, self.Delta, ElProduction.projT.G, self.nlf), "L": (self.m2, self.q2, self.Delta, ElProduction.projT.L, self.nlf), "P": (self.m2, self.q2, self.Delta, ElProduction.projT.P, self.nlf), } lenParams = len(g) processes = [] for j in xrange(self.nProcesses): processes.append(Process(target=_threadWorker, args=(self.__qIn, self.__qOut, oArgs, lenParams))) [p.start() for p in processes] # fill for e in g: self.__qIn.put(e) # add EOF for n in xrange(self.nProcesses): self.__qIn.put(None) # run try: self.__qIn.join() except KeyboardInterrupt: [p.terminate() for p in processes] self.__qIn.close() sys.stdout.write("\n") # reorder def _reorder(self): self.__data = {} self.__data["G"] = [np.nan for j in self.__js] self.__data["L"] = [np.nan for j in self.__js] self.__data["P"] = [np.nan for j in self.__js] while not self.__qOut.empty(): p = self.__qOut.get() self.__data[p["proj"]][p["j"]] = p["res"] # reorder Marcos data def _reorderMarco(self): self.__data = {} self.__data["G"] = [[np.nan for j in self.__js] for k in self.__ks] self.__data["L"] = [[np.nan for j in self.__js] for k in self.__ks] while not self.__qOut.empty(): p = self.__qOut.get() self.__data[p["proj"]][p["k"]][p["j"]] = p["res"] # write data def _write(self): with open(self.fp, "w") as f: for j in self.__js: dataT = self.__data["G"][j] + self.__data["L"][j] / 2.0 f.write("%e\t%e\t%e\t%e\n" % (self.__etas[j], dataT, self.__data["L"][j], self.__data["P"][j])) # write Marcos data def _writeMarco(self): with open(self.fp, "w") as f: for k in self.__ks: for j in self.__js: vs = [] vs.append(self.__etas[j]) vs.append(-self.__q2s[k]) vs.append(self.__data["L"][k][j]) dataT = self.__data["G"][k][j] + self.__data["L"][k][j] / 2.0 vs.append(dataT) # vs.append(self.__data["G"][k][j]) f.write(("\t").join("%e" % v for v in vs) + "\n") # run program def run(self): self._compute(self._getGrid()) self._reorder() self._write() # run program to compare to Marco def runMarco(self): self._compute(self._getGridMarco()) self._reorderMarco() self._writeMarco()
class Factory: """Like a Pool, but workers must work, not swim! """ def __init__(self, size=None, autostart=True, max_queue_size=None): self.size = size or cpu_count() if max_queue_size is None: max_queue_size = self.size * 3 self.max_queue_size = max_queue_size self._task_id_counter = _counter() if autostart: self.start() def start(self): """Start the factory, making it possible to run tasks. """ if getattr(self, '_running', False): return self._running = True logger.info('Starting factory') self.queue = JoinableQueue(self.max_queue_size) self.workers = [] for x in range(self.size): proc = Process(target=self._worker_process, args=(x, self.queue,), daemon=True) self.workers.append(proc) proc.start() def _worker_process(self, idx, queue): # SIGINT is handled by controller process signal.signal(signal.SIGINT, signal.SIG_IGN) logger.info('[Worker %s] Entering main loop', idx) while True: if queue.empty(): logger.info('[Worker %s] idling', idx) task = queue.get(block=True) logger.info('[Worker %s] Accepted new task: %s', idx, task) logger.debug('Queue size is now %s', queue.qsize()) try: task.function(*task.args, **task.kwargs) except: logger.exception( '[Worker %s] Exception raised while running task', idx) else: logger.info('[Worker %s] Task complete: %s', idx, str(task)) finally: queue.task_done() def run(self, func, *args, **kwargs): """Runs a function, asynchronously in the pool Args: func: the function name *args: arguments to the function to be called **kwargs: keyword arguments to called function Return: int: the task id """ task_id = self._get_next_task_id() task = Task(task_id, func, args, kwargs) logger.info('Scheduling task: %s', str(task)) self.queue.put(Task(task_id, func, args, kwargs)) def shutdown(self): """Shutdown the factory. Will wait until all the queued processes have been completed, then shuts down worker processes. """ logger.info('Shutting down (waiting for tasks to complete)') self.queue.close() self.queue.join() logger.info('Processing complete. Shutting down workers') self.terminate() def terminate(self): """Immediately terminate the factory. Will send a SIGTERM to all worker processes; running tasks will be interrupted, queued ones will be lost. """ for idx, proc in enumerate(self.workers): logger.info('Terminating worker %s (pid %s)', idx, proc.pid) proc.terminate() proc.join() self._running = False def _get_next_task_id(self): return next(self._task_id_counter)
def MCMC(n, theta_0, priors_dict, beta, rho, chains, burn_rate=0.1, down_sample=1, max_attempts=6, pflag=True, cpu=None, randomize=True): # Check input parameters mcmcChecks(n, theta_0, beta, rho, chains, burn_rate, down_sample, max_attempts) print("Performing MCMC Analysis") # Selecting optimal temperature hyper_theta, beta = hyperparameter_fitting(theta_0, priors_dict, beta, rho, max_attempts) if pflag == True: check_proposals(hyper_theta, 50) # Overdisperse chains if randomize == True: print("Dispersing chains") if chains > 1: chains_list = disperse_chains(hyper_theta, priors_dict, chains) else: chains_list = [hyper_theta] else: chains_list = [hyper_theta for i in range(chains)] # Sample using MCMC print("Sampling from posterior distribution") if chains >= cpu_count(): NUMBER_OF_PROCESSES = cpu_count() - 1 else: NUMBER_OF_PROCESSES = chains if cpu != None: NUMBER_OF_PROCESSES = cpu # Manual override of core number selection print("Using {} threads".format(NUMBER_OF_PROCESSES)) with open(results_dir + 'progress.txt', 'w') as f: # clear previous progress report f.write('') jobs = Queue() # put jobs on queue result = JoinableQueue() countQ = JoinableQueue() if NUMBER_OF_PROCESSES == 1: jobs.put([chains_list[0], beta, rho, n, priors_dict]) mh(0, jobs, result, countQ) else: for m in range(chains): jobs.put([chains_list[m], beta, rho, n, priors_dict]) [ Process(target=mh, args=(i, jobs, result, countQ)).start() for i in range(NUMBER_OF_PROCESSES) ] # pull in the results from each thread pool_results = [] chain_attempts = [] for m in range(chains): r = result.get() pool_results.append(r) result.task_done() a = countQ.get() chain_attempts.append(a) # tell the workers there are no more jobs for w in range(NUMBER_OF_PROCESSES): jobs.put(None) # close all extra threads result.join() jobs.close() result.close() countQ.close() # Perform data analysis average_acceptance = np.mean([el[1] for el in chain_attempts]) print("Average acceptance rate was {:.1f}%".format(average_acceptance)) samples = get_parameter_distributions(pool_results, burn_rate, down_sample) plot_parameter_autocorrelations(samples.drop('gamma', axis=1)) get_summary_statistics(samples.drop('gamma', axis=1)) with open(results_dir + 'simulation_summary.txt', 'w') as f: f.write('Temperature used was {}\n'.format(beta)) f.write('Number of chains = {}\n'.format(chains)) f.write( "Average acceptance rate was {:.1f}%\n".format(average_acceptance)) f.write("Initial conditions were\n") for i in chains_list: f.write(str(i)) f.write("\n")
def continue_sampling(n, n_old, priors_dict, rho, burn_rate, down_sample, cpu=None): variableNames = [] stdDevs = [] type_of_dists = [] extra_bounds = [] with open(results_dir + 'simulation_summary.txt', 'r') as f: beta = float(f.readline()[21:-1]) numChains = int(f.readline()[19:-1]) f.readline() f.readline() distribution_descriptions = f.readline()[2:-3] distribution_descriptions = distribution_descriptions.split('], [') example_theta = [] for variable in distribution_descriptions: var = variable.replace('\'', '').split(', ') example_theta.append(var) variableNames = [el[0] for el in example_theta] stdDevs = [float(el[2]) for el in example_theta] type_of_dists = [el[3] for el in example_theta] extra_bounds = [el[4] if len(el) == 5 else [] for el in example_theta] with open(results_dir + 'chain_lengths.txt', 'r') as f: chainLengths = list(map(int, f.readline().split(', ')[:])) chains = pd.read_csv(results_dir + 'complete_samples.csv', index_col=0) chainList = [] end_points = [] hyper_theta_valList = [] readChainLengths = [0] + chainLengths + [-1] for i in range(numChains): chainList.append(chains.iloc[readChainLengths[i]:readChainLengths[i] + readChainLengths[i + 1] - 1]) end_points.append(chainList[-1].iloc[-1]) hyper_theta_valList.append(chainList[-1].iloc[0]) hyper_theta = [[[ variableNames[i], hyper_theta_valList[c][variableNames[i]], stdDevs[i], type_of_dists[i] ] + extra_bounds[i] for i in range(len(variableNames))] for c in range(numChains)] restarting_points = [[[ variableNames[i], end_points[c][variableNames[i]], stdDevs[i], type_of_dists[i] ] + extra_bounds[i] for i in range(len(variableNames))] for c in range(numChains)] pre_pool_results = [[] for i in range(numChains)] for q in range(numChains): pre_pool_results[q] = [[[ variableNames[i], chainList[q].iloc[j][variableNames[i]], stdDevs[i], type_of_dists[i] ] + extra_bounds[i] for i in range(len(variableNames))] for j in range(len(chainList[q]))] print("Continuing MCMC Analysis") print("Sampling from posterior distribution") if numChains >= cpu_count(): NUMBER_OF_PROCESSES = cpu_count() - 1 else: NUMBER_OF_PROCESSES = numChains if cpu != None: NUMBER_OF_PROCESSES = cpu # Manual override of core number selection print("Using {} threads".format(NUMBER_OF_PROCESSES)) f = open('progress.txt', 'w') f.close() jobs = Queue() result = JoinableQueue() for m in range(numChains): jobs.put([restarting_points[m], beta, rho, n, priors_dict]) [ Process(target=mh, args=(i, jobs, result)).start() for i in range(NUMBER_OF_PROCESSES) ] # pull in the results from each thread pool_results = [] for m in range(numChains): r = result.get() pool_results.append(r) result.task_done() # tell the workers there are no more jobs for w in range(NUMBER_OF_PROCESSES): jobs.put(None) # close all extra threads result.join() jobs.close() result.close() # Combine old results with new results for j in range(len(pool_results)): pool_results[j] = pre_pool_results[j] + pool_results[j] # Perform data analysis total_samples = sum([len(i) for i in pool_results]) print("Average acceptance rate was {:.1f}%".format( total_samples * 100 / ((n + n_old) * numChains))) samples = get_parameter_distributions(pool_results, burn_rate, down_sample) plot_parameter_autocorrelations(samples) get_summary_statistics(samples) with open(results_dir + 'simulation_summary.txt', 'w') as f: f.write('Temperature used was {}\n'.format(beta)) f.write('Number of chains = {}\n'.format(numChains)) f.write("Average acceptance rate was {:.1f}%\n".format( total_samples * 100 / ((n + n_old) * numChains))) f.write("Initial conditions were\n") for i in hyper_theta: f.write(str(hyper_theta)) f.write("\n")
def main(course_file='courses.txt', clear_db=True): """Main method/entrypoint """ # Courses work_queue = JoinableQueue() skipped_queue = Queue(0) with open(course_file, "r") as f: for line in f: work_queue.put(line.strip()) # For holding the database info db_queue = Queue() db_lock = Lock() # Create the threads process_list = [] for i in range(multiprocessing.cpu_count()): p = multiprocessing.Process(target=process_data, args=(work_queue, skipped_queue, db_queue, db_lock)) process_list.append(p) p.start() work_queue.join() work_queue.close() db_lock.acquire() print('Done work. Got {0} courses, skipped {1}'.format(db_queue.qsize(), skipped_queue.qsize())) db_lock.release() print() # Announce skipped courses with open('skippedCourses.txt', 'w') as f: if not skipped_queue.empty(): print('These courses were skipped: ') while not skipped_queue.empty(): skipped_course = skipped_queue.get() print(' {0}'.format(skipped_course)) to_file = skipped_course.split(',', 1)[0] f.write(u'{0}\n'.format(to_file).encode('utf8')) print() db_courses = Queue(0) db_sections = Queue(0) db_activities = Queue(0) while not db_queue.empty(): course = db_queue.get() # course name db_courses.put(course[0]) # sections for section in course[1]: db_sections.put(section) # activities for activity in course[2]: db_activities.put(activity) # Print total count of all items print('Courses: {0}'.format(db_courses.qsize())) print('Sections: {0}'.format(db_sections.qsize())) print('Activities: {0}'.format(db_activities.qsize())) # Write courses to files with open('db_courses.csv', 'w' if clear_db else 'a') as f: while not db_courses.empty(): f.write(u'{0}\n'.format(db_courses.get()).encode('utf8')) # Write sections to files with open('db_sections.csv', 'w' if clear_db else 'a') as f: while not db_sections.empty(): f.write(u'{0}\n'.format(db_sections.get()).encode('utf8')) # Write activities to files with open('db_activities.csv', 'w' if clear_db else 'a') as f: while not db_activities.empty(): f.write(u'{0}\n'.format(db_activities.get()).encode('utf8'))
def parallel(self): from multiprocessing import Process, Queue, JoinableQueue if debug: print(inspect.stack()[0][3]) self.ntrajs = [] for i in range(self.cpus): self.ntrajs.append(min(int(np.floor(float(self.ntraj) / self.cpus)), self.ntraj - sum(self.ntrajs))) cnt = sum(self.ntrajs) while cnt < self.ntraj: for i in range(self.cpus): self.ntrajs[i] += 1 cnt += 1 if (cnt >= self.ntraj): break self.ntrajs = np.array(self.ntrajs) self.ntrajs = self.ntrajs[np.where(self.ntrajs > 0)] self.nprocs = len(self.ntrajs) sols = [] processes = [] resq = JoinableQueue() resq.join() if debug: print("Number of cpus: " + str(self.cpus)) print("Trying to start " + str(self.nprocs) + " process(es).") print("Number of trajectories for each process: " + str(self.ntrajs)) for i in range(self.nprocs): p = Process(target=self.evolve_serial, args=((resq, self.ntrajs[i], i, self.seed * (i + 1)),)) p.start() processes.append(p) cnt = 0 while True: try: sols.append(resq.get()) resq.task_done() cnt += 1 if (cnt >= self.nprocs): break except KeyboardInterrupt: break except: pass resq.join() for proc in processes: try: proc.join() except KeyboardInterrupt: if debug: print("Cancel thread on keyboard interrupt") proc.terminate() proc.join() resq.close() return sols