Пример #1
0
class emailSubsystem(object):
    def __init__(self):
        ### will move to Celery eventually; with Celery, the app would be able to periodically
        # wakeup and check on replyQueue to see which emails were send, which were not and
        # what to do ...

        self.emailQueue = JoinableQueue()
        self.replyQueue = JoinableQueue()

        self.worker = Process(target=sendEmailWorker, args=(self.emailQueue, self.replyQueue))

    def start(self):
        # temporarily comment out starting a new process as it seems to leave zombies
        # and causes app not to start as max process limit is reached.
        #self.worker.start()
        return

    def shutdown(self):
        # post poison pill
        # wait on the queue to be done; ie join on emailQueue
        # wait on the worker process to die; ie join on worker

        self.emailQueue.put(None)
        self.emailQueue.join()
        self.worker.join()
Пример #2
0
def init(opts):

    tasks = JoinableQueue()
    manager = Manager()
    stats = manager.dict()
    stats['processed'] = 0
    stats['missing'] = 0
    
    # start up our workers threads
    log.info("Creating %d workers" % opts.threads)
    
    workers = [ Worker(tasks, opts, stats) for i in xrange(opts.threads)]
    for w in workers:
        w.start()
        
    # queue up the bibcodes
    for doc in get_docs(opts):
        tasks.put(doc)
    
    # add some poison pills to the end of the queue
    log.info("poisoning our task threads")
    for i in xrange(opts.threads):
        tasks.put(None)
    
    # join the results queue. this should
    # block until all tasks in the task queue are completed
    log.info("Joining the task queue")
    tasks.join()
    
    log.info("Joining the task threads")
    for w in workers:
        w.join()
        
    log.info("processed: %d" % stats['processed'])
    log.info("records with no existing extract dir: %d" % stats['missing'])        
def main(multiplier):
    # Establish communication queues
    tasks = JoinableQueue()
    results = Queue()

    # Start consumers
    num_consumers = cpu_count() * multiplier
    print 'Creating %d consumers' % num_consumers
    consumers = [Consumer(tasks, results) for i in xrange(num_consumers)]
    for w in consumers:
        w.start()
    
    fout = open(os.path.join(settings.PERSIST_DIR, 'doc_matrix_comparison.csv'), 'w', 0)
    rw = ResultWriter(results, csv.writer(fout))
    rw.start()

    #num_docs = 801781
    num_docs = 25
    for i in xrange(num_docs):
        tasks.put(Task(i))


    # Add a poison pill for each consumer
    for i in xrange(num_consumers):
        tasks.put(None)

    # Wait for all of the tasks to finish
    tasks.join()
    results.put('STOP')
def upload(args=None, authdata=None):
    """
    Initialize the containers and pseudo-directories for what is to be
    uploaded.  Separates jobs into sub-jobs based on container.
    Up to 100 containers per second.
    """
    #initalize the containers in parallel
    containers = []
    for obj in os.listdir(args['dir']):
        if args['container']:
            containers.append(args['container'])
            break
        #if os.path.isdir(os.path.abspath(args['dir']+'/'+obj)):
        if os.path.isdir(os.path.join(args['dir'], obj)):
            containers.append(obj)
    if containers:
        #set container job count to the less of args['cc'] or container count
        if args['cc'] < len(containers):
            args['cc'] = len(containers)
        #create queue and jobs
        container_queue = JoinableQueue()
        for container_worker in range(args['cc']):
            job = Process(target=container_consumer,
                          args=(args, authdata, container_queue,))
            job.daemon=False
            job.start()
        for container in containers:
            container_queue.put(container)
        #tailing the queue with a Null marker so the works shut down nicely.
        for container in range(args['cc']):
            container_queue.put(None)
        container_queue.join()
def crunch(file_name, ext_type, handler, pool_size=4, queue_size=40,
           limit=None):

    print 'Crunching file: %s, limit: %s' % (file_name, limit)

    q = JoinableQueue(queue_size)
    q_feats = Queue()

    pool = Pool(pool_size, wrap_handler(handler), ((q, q_feats),))

    with file_reader(file_name) as reader:
        idx = 0
        for entry in reader:

            if (entry.pathname.find(ext_type) != -1):
                text = [b for b in entry.get_blocks()]
                key = entry.pathname.split('/')[-1].split('.')[0]

                q.put((key, text), True)
                idx += 1

                print 'Processing:', entry.pathname, idx

                if limit and idx >= limit:
                    print 'Reached the limit'
                    break

        q.close()
        q.join()
        pool.close()

    result = []
    for i in range(q_feats.qsize()):
        result.append(q_feats.get())
    return result
Пример #6
0
 def getdata_multiprocess(self,task_funcsiter=None,task_funcsiterparas={},
                         task_funcsconst=None,task_funcsconstparas={},processnum=None,
                         threadnum=2):
     def _start_processes(taskqueue,resultqueue,taskqueue_lk,task_funcsconst,
                          task_funcsconstparas,processnum,threadnum):
         for i in range(processnum):
             p = Process(target=self.multiprocess_task, args=(taskqueue,resultqueue,
                                      taskqueue_lk,threadnum,
                                      task_funcsconst,task_funcsconstparas
                                      ),name='P'+str(i))
             p.daemon=True
             p.start()
             
     processnum=processnum if processnum else multiprocessing.cpu_count()
     #任务传送queue
     taskqueue=JoinableQueue()
     #任务写入/唤醒lock
     taskqueue_lk = multiprocessing.Condition(multiprocessing.Lock())
     #结果传送queue
     resultqueue=Queue()
     
     _start_processes(taskqueue,resultqueue,taskqueue_lk,task_funcsconst,
                         task_funcsconstparas,processnum,threadnum)
     #放入任务,唤醒进程
     if task_funcsconst is None:
         self._put_tasks(zip(task_funcsiter,task_funcsiterparas),taskqueue,taskqueue_lk)
     else:
         self._put_tasks(task_funcsiterparas,taskqueue,taskqueue_lk)
     logger.info('main join!')
     taskqueue.join()
     logger.info('main end!')
     return self._get_results(resultqueue)
Пример #7
0
def solve(iterations, proc_count):

    queue = JoinableQueue()
    partition = get_iterations_partition(iterations, proc_count)
    for iteration in partition:
        queue.put(iteration)
    for i in range(process_count):
        queue.put(None)

    manager = Manager()
    result = manager.list()
    processes = []

    cur_time = time.time()
    for i in range(process_count):
        proc = Process(target=worker, args=(queue, result,))
        proc.start()
        processes.append(proc)

    queue.join()
    for proc in processes:
        proc.join()

    cur_time = time.time() - cur_time
    print_results(cur_time, result, iterations)
Пример #8
0
def setup_queue(options):	
	probe_servers = Queue()
	progress_queue = Queue()

	run = Probe.ProbeRun.objects.get(id = options.run_id)

	summary_top = Results.ResultSummaryList.objects.get(part_of_run=run)
	summary_top.setup()

	connection.close()
	
	threads = [] 
	for i in range(options.threads):
		new_thread = Process(target=SetupQueueThread, args=(i,run, probe_servers, progress_queue))
		new_thread.daemon = True
		new_thread.start()
		threads.append(new_thread)
		
	progress_thread = threading.Thread(target=__ProgressCounter, args=(run,  progress_queue, threads,options))
	progress_thread.daemon = True
	progress_thread.start()

	i = 0;
	if options.input_filename and (not options.count or i < options.count):
		for hostname_line in fileinput.input(options.input_filename, openhook=fileinput.hook_compressed):
			probe_servers.put(hostname_line)
			i+=1
			if options.count and i >= options.count:
				break;

	probe_servers.join()
	progress_queue.join()
	
	return run
Пример #9
0
def aggress(map):
    global startMap
    startMap = map

    #print "Regressing..."
    state = State()

    jobs = []

    longestSolution = Value('d', 20)
    highestScore = Value('d', 0)

    queue = JoinableQueue()

    manager = Manager()

    d = manager.dict()
    d.clear()

    l = RLock()

    if multiProc:
        queue.put((state, map, 1))

        for i in range(numProcs):
           p = Process(target = multiMain, args=(startMap, l, d, queue,highestScore))
           p.start()

        queue.join()
    else:
        a(l, highestScore, d, None, state, map, 1)
Пример #10
0
    def run(self):

        # Changes the process name shown by ps for instance
        setProcTitle ("agentcluster master [version: %s] [monitoring: %d seconds]" % (__version__,self.monitoring_period) );

        try:
            logger.info ( 'Agent cluster server starting' );

            logger.info ( 'Configurations will be scanned in directories:' );
            for directory in confdir.data:
                logger.info ( '  o %s', os.path.abspath(directory) );

            self.watchdog = Watchdog(self.monitoring_period)
            self.watchdog.start()

            # Generates a deadlock to enter in sleep mode
            # Only an external signal can break this deadlock
            logger.info ( 'Agent cluster server started' );
            queue = JoinableQueue()
            queue.put(object());
            queue.join();

        except KeyboardInterrupt:
            logger.info ( 'Agent cluster server interrupted' );
        except Exception:
            logger.error ( 'Exception catched in main process: %s', sys.exc_info()[1] );
            logger.debug ( "", exc_info=True );
        finally:
            # First stop the monitoring to avoid restarting killed agents
            if self.watchdog is not None:
                self.watchdog.shutdown = True
                self.watchdog.join()
            logger.info ( 'Agent cluster server end' );
            logging.shutdown()
Пример #11
0
def main(workers=10):
    """
    Executes main function of mini-framework's Control thread.
    :param workers: Integer detailing number of worker FIFO threads to employ
    """
    start_logging()
    log_info("New multiprocessing session with {} workers".format(workers))
    
    # Input JoinableQueue and Output Queue
    inq = JoinableQueue(maxsize=int(workers*1.5))
    outq = Queue(maxsize=int(workers*1.5))
    
    ot = OutThread(workers, outq)
    ot.start()
    
    for _ in range(workers):
        w = WorkerThread(inq, outq)
        w.start()
    
    # Create a sequence of a 1000 random alphabetic characters
    random_chars = (ascii_letters[randint(0, 51)] for _ in range(1000))
    
    # Keep input queue loaded for as long as possible
    # Feed the process pool with work units
    for work in enumerate(random_chars):
        inq.put(work)
    
    # Fill the input queue with Nones to shut the worker threads down
    # which terminates the process pool
    for _ in range(workers):
        inq.put(None)
        
    inq.join()
    print("Control process terminating")
Пример #12
0
class FlightProducer(Process):

    def __init__(self, options={}, date_group=[]):
        self.options    = options
        self.date_group = date_group
        self.date_queue = JoinableQueue()

    def start(self):
        consumers_list = []
        consumers_num  = cpu_count() * 2

        # Consumers
        for i in xrange(consumers_num):
            consumers_list.append(FlightConsumer(self.options, self.date_queue))

        for consumer in consumers_list:
            consumer.start()

        # Put each date group to queue
        for date_item in self.date_group:
            self.date_queue.put(date_item)

        # Tell consumers can exit
        for i in xrange(consumers_num):
            self.date_queue.put(None)

        # Wait for all of the consumers to finish
        self.date_queue.join()

        print('Done')
Пример #13
0
class ProcessPool(object):

    def __init__(self, size=1):
        self.size = size
        self.jobs = Queue()
        self.results = Queue()
        self.processes = []

    def start(self):
        '''start all processes'''

        for i in range(self.size):
            self.processes.append(ProcessWorker(self))

        for process in self.processes:
            process.start()

    def append_job(self, job, *args, **kwargs):
        self.jobs.put((job, args, kwargs))

    def join(self):
        '''waiting all jobs done'''
        self.jobs.join()

    def stop(self):
        '''kill all processes'''
        for process in self.processes:
            process.stop()

        for process in self.processes:  # waiting processes completing
            if process.is_alive():
                process.join()

        del self.processes[:]  # reset processes to empty
Пример #14
0
def main(opts, files):
    
    if opts.threads == 1:
        log.info("running synchronously")
        run_syncronous(opts, files)
    else:
        Q = JoinableQueue()
        workers = [Worker(Q, opts) for i in xrange(opts.threads)]
        
        log.info("initializing %d threads" % opts.threads)
        for w in workers:
            w.start()
            
        # push log events onto the queue
        events_iter = events(files, opts)
        if opts.limit:
            events_iter = itertools.islice(events_iter, opts.limit)
            
        for event in events_iter:
            Q.put(event)
        
        # add poison pills 
        for i in xrange(opts.threads):
            Q.put(None)
            
        Q.join()
        log.info("work complete. shutting down threads.")
        for w in workers:
            w.join()   
Пример #15
0
class JavaMultipleParserExecutor:
    def __init__(self, output_dir, repo_path, processes=None):
        self.target_blobs = JoinableQueue()

        self.num_consumers = processes if processes else cpu_count()
        self.consumers = [JavaConsumer(self.target_blobs, repo_path, output_dir)
                          for i in range(self.num_consumers)]

        for consumer in self.consumers:
            consumer.start()

        self.closed = False

    def parse_blob(self, blob):
        if self.closed:
            return
        self.target_blobs.put(blob.hexsha)

    def join(self):
        if self.closed:
            return
        for i in range(self.num_consumers):
            self.target_blobs.put(None)

        self.target_blobs.join()
        self.closed = True
def launch_mesos_tf(marathon_url_str, tsknom_str, cpu_float, mem_float, ntasks_int, uri_str, marathon_usr, marathon_usrpwd, localhost_str, mxattempts=10):
   toret_nodes = dict()

   docker = False
   if uri_str.find('docker') > -1:
      uri_str = uri_str.replace('docker://', '')
      docker = True
 
   uri_str = uri_str.rstrip('/')
   marathon_url_str = marathon_url_str.rstrip('/') 

   counter = 0
   tq = JoinableQueue()
   q = Queue()
   plist = list()

   consumers = [ Consumer(tq, q) for i in xrange(ntasks_int) ]
   for c in consumers:
      c.start()

   for i in xrange(ntasks_int):
      tq.put(Task(post_marathon_tasks, (marathon_url_str, tsknom_str, cpu_float, mem_float, i+1, ntasks_int, uri_str, marathon_usr, marathon_usrpwd, localhost_str, mxattempts, docker)))

   for i in xrange(ntasks_int):
      tq.put(None)

   tq.join()

   for i in xrange(1, ntasks_int+1):
      toret_nodes[i] = q.get()

   return toret_nodes
def main():
    jobs = JoinableQueue()
    result = JoinableQueue()


    numToProcess = -1
    scores = pd.DataFrame(columns=['query','fmeasure','precision','recall',
                                   'size','maxDistance','topHits',"contextSteps"])

    print len(datasets)

    for key in datasets:
        jobs.put(key)

    processed_count = Counter()
        
    for i in xrange(NUMBER_OF_PROCESSES):
        p = Process(target=work, args=(i, jobs, result, processed_count))
        p.daemon = True
        p.start()

    #work(1, jobs, result, processed_count)

    automated_annotations = {}
    distances = {}

    jobs.join()

    dataset_index = collections.defaultdict(set)
    annotated_datasets = set()
    while not result.empty():
        dataset, classes = result.get()
        if len(classes) == 0:
            annotated_datasets.add(dataset)
        for c in classes.keys():
            dataset_index[c].add(dataset)
            owl_class = Class(c, graph=graph)
            for parent in owl_class.parents:
                dataset_index[parent.identifier].add(dataset)
        result.task_done()

    print '\n'
    
    for query, c in queries.items():
        manual = ground_truth[query]
        automated = dataset_index[c]
        hits = manual & automated
        misses = manual - automated
        precision = np.nan if len(automated) == 0 else float(len(hits)) / len(automated)
        recall = np.nan if len(manual) == 0 else float(len(hits)) / len(manual)
        if precision != 0 or recall != 0:
            fmeasure = 0 if np.isnan(precision) or np.isnan(recall) else 2 * (precision * recall) / (precision + recall)
        else:
            fmeasure = 0
        scores = scores.append(dict(query=query, size=len(manual), precision=precision, recall=recall, fmeasure=fmeasure,topHits=topHits, maxDistance=maxDistance, contextSteps = context_steps),
                        ignore_index=True)
        print "Hits for", query, c
        print '\n'.join(sorted(hits))
    print scores
    print "Annotated", len(annotated_datasets), "datasets."
Пример #18
0
 def convert(self):
     tmp_unpack_path = self._create_tmp_unpack_folder(self.output_filepath)
     
     header, files_attrs = self._get_archive_metadata()
     file_list = [attrs["Path"] for attrs in files_attrs if attrs["Attributes"] == "....A"]
     
     files_input_queue = JoinableQueue(200)
     files_processed_queue = JoinableQueue(200)
     
     extractor_processes = Pool(self._threads_num, self._extract_files, (files_input_queue, files_processed_queue, tmp_unpack_path))
     archiver_process = Process(target=self._archive_files, args = (files_processed_queue, tmp_unpack_path))
     archiver_process.start()
     
     for archived_file in file_list:
         files_input_queue.put(archived_file)
     
     
     files_input_queue.join()
     for i in xrange(self._threads_num):
         files_input_queue.put(None)
     extractor_processes.close()
     extractor_processes.join()
     
     files_processed_queue.join()
     files_processed_queue.put(None)
     archiver_process.join()
 
     shutil.rmtree(tmp_unpack_path, True)
Пример #19
0
def get_citations(**args):
    """
    Method to prepare the actual citation dictionary creation
    """
    # create the queues
    tasks = JoinableQueue()
    results = JoinableQueue()
    # how many threads are there to be used
    if 'threads' in args:
        threads = args['threads']
    else:
        threads = cpu_count()
    # initialize the "harvesters" (each harvester get the citations for a bibcode)
    harvesters = [ CitationHarvester(tasks, results) for i in range(threads)]
    # start the harvesters
    for b in harvesters:
        b.start()
    # put the bibcodes in the tasks queue
    for bib in args['bibcodes']:
        tasks.put(bib)
    # add some 'None' values at the end of the tasks list, to faciliate proper closure
    for i in range(threads):
        tasks.put(None)

    tasks.join()
    for b in harvesters:
        b.join()

    return [item for sublist in cit_dict.values() for item in sublist]
Пример #20
0
def main(max):
	rand_word = ''
	filename_tmp = "file_tmp.txt"
	final_file = "dataset.txt"
	q = JoinableQueue()
	_worker = Process(target=tuan, args=(q, filename_tmp,))
	_worker.start()
	alphanum = printable[0:62]
	alpha = printable[10:62]
	with open(final_file, "w") as f:
		print("Writing in {}".format(final_file))
		for x in range(max):
			rand_word = random_word(64, alpha)
			if randint(0, 100) <= 50:
				q.put(rand_word)
			else:
				q.put(random_word(64, alphanum))
			print(rand_word, random_word(64, alphanum),
					sep='\n', file=f)
		else:
			q.put(None)
			q.join()
			_worker.join()
			print(file=f)
			with open(filename_tmp, "r") as f2:
				copyfileobj(f2, f)
			print("Removing {}".format(filename_tmp))
			remove(filename_tmp)
	print("Work is done.")
Пример #21
0
class Mothership(object):

    """ Monitor of producer and consumers """

    def __init__(self, producer, consumers):
        self._queue = JoinableQueue()

        self._producer_proxy = ProducerProxy(self._queue, producer)
        self._consumer_pool = list(ConsumerProxy(self._queue, consumer) \
                                   for consumer in consumers)

    def start(self):
        """ Start working """
        logger.info('Starting Producers'.center(20, '='))
        self._producer_proxy.start()

        logger.info('Starting Consumers'.center(20, '='))
        for consumer in self._consumer_pool:
            consumer.start()

        self._producer_proxy.join()
        self._queue.join()

    def __enter__(self):
        return self

    def __exit__(self, types, value, tb):
        return
Пример #22
0
def scheduler(db,category):
    task=JoinableQueue()
    for i in range(cpu_count()):
        pid=os.fork()
        if pid==0:
            consumer(category,task)
            os._exit(0) # 防止子进程向下执行
            # print('此处不会被执行')    
        elif pid<0:
            logging.error('创建子进程失败')   

    with ThreadPoolExecutor() as executor:
        cursor = db['image_match_result_{}'.format(category)].find(
            {'$or': [{'robot_processed': False}, {'robot_processed': {'$exists': False}}]}, 
            {'_id': 1, 'b_image_url': 1, 'c_image_url': 1}
        )
        for item in cursor:
            item['mark']=True # 标错
            executor.submit(producer, item, task)
        cursor = db['item_match_result_{}'.format(category)].find(
            {'$or': [{'robot_processed': False}, {'robot_processed': {'$exists': False}}]}, 
            {'_id': 1, 'b_image_url': 1, 'c_image_url': 1}
        )
        for item in cursor:
            item['mark']=False # 标对
            executor.submit(producer, item, task)
    task.join()
    os.kill(0,signal.SIGKILL)
Пример #23
0
def evaluate(points,meshToBasis,kernel,quadRule,coeffs,nprocs=None):
    """Evaluate a kernel using the given coefficients"""


    if nprocs==None: nprocs=cpu_count()

    inputQueue=JoinableQueue()

    nelements=meshToBasis.nelements

    for elem in meshToBasis: inputQueue.put(elem)

    buf=sharedctypes.RawArray('b',len(points[0])*numpy.dtype(numpy.complex128).itemsize)
    result=numpy.frombuffer(buf,dtype=numpy.complex128)
    result[:]=numpy.zeros(1,dtype=numpy.complex128)

    time.sleep(.5)
    workers=[]

    for id in range(nprocs):
        worker=EvaluationWorker(points,kernel,quadRule,coeffs,inputQueue,result)
        worker.start()
        workers.append(worker)


    inputQueue.join()
    for worker in workers: worker.join()

    return result.copy()
Пример #24
0
class QuickReader():
	def __init__(self, writer_name,handle_raw_assertion,add_lines_to_queue, isTest = False, num_threads = 5):
		self.writer_name = writer_name
		self.num_threads = num_threads
		self.handle_raw_assertion = handle_raw_assertion
		self.add_lines_to_queue = add_lines_to_queue
		self.queue = JoinableQueue()
		self.isTest = isTest

	def start(self):
		print "begin writing " + self.writer_name
		self.create_processes()
		self.add_lines_to_queue(self.queue)
		self.queue.join()
		print "finished writing " + self.writer_name

	def pull_lines(self,q,writer):
	    while 1:
	        raw_assertion = q.get()
	        edges = self.handle_raw_assertion(raw_assertion)
	        for edge in edges:
	            writer.write(edge)
	        q.task_done()

	def create_processes(self):
	    for i in range(self.num_threads):
	        writer = MultiWriter(self.writer_name + "_" + str(i),self.isTest)
	        p = Process(target = self.pull_lines, args = (self.queue, writer))
	        #p.daemon=True
	        p.start()
Пример #25
0
def main():
    from multiprocessing import JoinableQueue
    from genmod.vcf import vcf_header
    from genmod.utils import annotation_parser
    parser = argparse.ArgumentParser(description="Parse different kind of pedigree files.")
    parser.add_argument('variant_file', type=str, nargs=1 , help='A file with variant information.')
    parser.add_argument('annotation_file', type=str, nargs=1 , help='A file with feature annotations.')
    parser.add_argument('-phased', '--phased', action="store_true", help='If variant file is phased.')    
    parser.add_argument('-v', '--verbose', action="store_true", help='Increase output verbosity.')
    
    args = parser.parse_args()
    infile = args.variant_file[0]
    if args.verbose:
        print('Parsing annotationfile...')
        start_time_annotation = datetime.now()
    my_anno_parser = annotation_parser.AnnotationParser(args.annotation_file[0], 'ref_gene')
    
    if args.verbose:
        print('annotation parsed. Time to parse annotation: %s\n' % str(datetime.now() - start_time_annotation))
    
    my_head_parser = vcf_header.VCFParser(infile)
    my_head_parser.parse()
    print(my_head_parser.__dict__)
    variant_queue = JoinableQueue()
    start_time = datetime.now()        
    
    my_parser = VariantFileParser(infile, variant_queue, my_head_parser, my_anno_parser, args)
    nr_of_batches = my_parser.parse()
    print(nr_of_batches)
    for i in range(nr_of_batches):
        variant_queue.get()
        variant_queue.task_done()
    
    variant_queue.join()
    print('Time to parse variants: %s ' % str(datetime.now()-start_time))
Пример #26
0
def build(opts):
    tasks = JoinableQueue()
    results = JoinableQueue()
    
    if opts.remove:
        log.info("Removing existing docs collection")
        session = utils.get_session(config)
        session.docs.drop()
        
    # start up our builder threads
    log.info("Creating %d Builder processes" % opts.threads)
    builders = [ Builder(tasks, results) for i in xrange(opts.threads)]
    for b in builders:
        b.start()
        
    # queue up the bibcodes
    for bib in get_bibcodes(opts):
        tasks.put(bib)
    
    # add some poison pills to the end of the queue
    log.info("poisoning our task threads")
    for i in xrange(opts.threads):
        tasks.put(None)
    
    # join the results queue. this should
    # block until all tasks in the task queue are completed
    log.info("Joining the task queue")
    tasks.join()
    log.info("Joining the task threads")
    for b in builders:
        b.join()
    
    log.info("All work complete")
def main():

    fetch_queue = JoinableQueue()
    reject_queue = JoinableQueue(maxsize = 1000)

    log_processor = Process(target=job_creator, args=(fetch_queue, './search_log_valid_2010_06_17'), name='log-processor')
    
    writers = [ ]
    write_queues = []

    for num in DATA_SETS:
        queue, writer = create_writer(reject_queue, num) 
        writers.append(writer)
        write_queues.append(queue)

    fetchers = [ create_fetcher(fetch_queue, write_queues, reject_queue, num) for num in xrange(NUM_FETCHERS) ]
    reject_writer = Process(target=reject, args=(reject_queue, './rejected-lines'), name='related-search-reject-writer')

    log_processor.start()
    reject_writer.start()
    start_processes(writers)
    start_processes(fetchers)

    log_processor.join()
    print 'DONE? '
    fetch_queue.join()
    write_queue.join()
    reject_writer.join()
Пример #28
0
def generate_cache(path_mapping,rep_func, attributes,num_procs, call_back, stop_check):

    all_files = set()
    for pm in path_mapping:
        if stop_check is not None and stop_check():
            return
        all_files.update(pm)
    all_files = sorted(all_files)
    stopped = Stopped()
    job_queue = JoinableQueue(100)
    file_ind = 0
    while True:
        if file_ind == len(all_files):
            break
        try:
            job_queue.put(all_files[file_ind],False)
        except Full:
            break
        file_ind += 1
    manager = Manager()
    return_dict = manager.dict()
    procs = []

    counter = Counter()
    #if call_back is not None:
    #    call_back('Generating representations...')
    #    cb = Process(target = call_back_worker,
    #                args = (call_back, counter, len(all_files), stop_check))
    #    procs.append(cb)
    for i in range(num_procs):
        p = RepWorker(job_queue,
                      return_dict,rep_func,attributes, counter, stopped)
        procs.append(p)
        p.start()
    time.sleep(2)
    if call_back is not None:
        call_back('Generating representations...')
        prev = 0
    val = 0
    while True:
        if file_ind == len(all_files):
            break
        if stop_check is not None and stop_check():
            stopped.stop()
            time.sleep(1)
            break
        job_queue.put(all_files[file_ind])

        if call_back is not None:
            value = counter.value()
            call_back(value)
        file_ind += 1
    job_queue.join()
    time.sleep(2)

    for p in procs:
        p.join()

    return return_dict
Пример #29
0
def setup_queue(options):	
	probe_servers = Queue()
	progress_queue = Queue()
	
	if options.queue_id:
		queue_list = Probe.PreparedQueueList.objects.get(id=options.queue_id)
	else:
		queue_name=options.queue_name.strip('"').strip()

		queue_list,created = Probe.PreparedQueueList.objects.get_or_create( 
									list_name=queue_name, 
									defaults = dict(list_description=options.description.strip('"').strip()),
									)
		
	if options.run_id:
		run = Probe.ProbeRun.objects.get(id=options.run_id)
		
		cursor = connection.cursor()
		
		cursor.execute("""INSERT INTO probedata2_preparedqueueitem (part_of_queue_id, server_id) 
				SELECT %s AS part_of_queue_id, server_id FROM probedata2_probequeue
				WHERE part_of_run_id = %s""", [str(queue_list.id),str(run.id)]
			)
		transaction.commit_unless_managed()
		return queue_list

	connection.close()
	
	threads = [] 
	for i in range(options.threads):
		new_thread = Process(target=SetupQueueThread, args=(i,queue_list, probe_servers, progress_queue))
		new_thread.daemon = True
		new_thread.start()
		threads.append(new_thread)
		
	progress_thread = threading.Thread(target=__ProgressCounter, args=(progress_queue, threads,options))
	progress_thread.daemon = True
	progress_thread.start()

	i = 0;
	if not options.file_list_only:
		for host in Probe.Server.objects.filter(enabled = True).values_list("id",flat=True):
			probe_servers.put(host)
			i+=1
			if options.count and i >= options.count:
				break;

	if options.input_filename and (not options.count or i < options.count):
		for hostname_line in fileinput.input(options.input_filename, openhook=fileinput.hook_compressed):
			probe_servers.put(hostname_line)
			i+=1
			if options.count and i >= options.count:
				break;

	probe_servers.join()
	progress_queue.join()
	
	return queue_list
Пример #30
0
def queueManager(numProc, myList, function, *args):
	'''queueManager(numProc, myList, function, *args):
	generic function used to start worker processes via the multiprocessing Queue object
	numProc - number of processors to use
	myList - a list of objects to be iterated over
	function - target function
	*args - additional arguments to pass to function

	Return - an unordered list of the results from myList
	'''
	qIn = Queue()
	qOut = JoinableQueue()
	if args:
		arguments = (qIn, qOut,) + args
	else:
		arguments = (qIn, qOut,)
	results = []
	
	# reduce processer count if proc count > files
	
	i = 0
	for l in myList:
		qIn.put((i,l))
		i += 1

	for _ in range(numProc):
		p = Process(target = function, args = arguments).start()
	sys.stdout.write("Progress: {:>3}%".format(0)
)
	curProgress = 0
	lastProgress = 0
	while qOut.qsize() < len(myList):
		#sys.stdout.write("\b\b\b\b{:>3}%".format(int(ceil(100*qOut.qsize()/len(myList)))))
		curProgress = int(ceil(100*qOut.qsize()/len(myList)))
		if curProgress - lastProgress > 10:
			lastProgress += 10
			sys.stdout.write("\nProgress: {:>3}%".format(lastProgress))
			sys.stdout.flush()
	sys.stdout.write("\nProgress: {:>3}%".format(100))
	#sys.stdout.write("\b\b\b\b{:>3}%".format(100))
	sys.stdout.write("\n")
	for _ in range(len(myList)):
		# indicate done results processing
		results.append(qOut.get())
		qOut.task_done()
	#tell child processes to stop
	for _ in range(numProc):
		qIn.put('STOP')

	orderedRes = [None]*len(results)
	for i, res in results:
		orderedRes[i] = res

	qOut.join()

	qIn.close()
	qOut.close()
	return orderedRes
Пример #31
0
        return results


def thread():
    while True:
        worker = userQ.get()
        portscan(worker)
        userQ.task_done()


# This sets how many threads you want to run and starts them
for x in range(100):
    t = threading.Thread(target=thread)
    t.daemon = True
    t.start()

smartList = [
    0, 21, 22, 23, 25, 53, 79, 80, 110, 113, 119, 135, 137, 138, 139, 143, 389,
    443, 445, 555, 631, 666, 902, 912, 1001, 1002, 1024, 1025, 1026, 1027,
    1028, 1029, 1030, 1243, 1433, 1434, 1720, 1900, 2000, 4380, 4381, 5000,
    5040, 5088, 5354, 5432, 6463, 6667, 6670, 6711, 6776, 6969, 7000, 7680,
    8080, 8733, 12345, 12346, 13148, 15292, 15393, 21554, 22222, 27015, 27017,
    27275, 27374, 29559, 31337, 31338, 49664, 49665, 49666, 49668, 49684,
    49731, 49765, 49774, 50698, 50760, 51229, 54860, 54870, 57621
]

for worker in smartList:
    userQ.put(worker)

userQ.join()
Пример #32
0
evaluation_queue = JoinableQueue()
result_queue = Queue()
for i in range(3):
    worker = Process(target=worker_func, args=(evaluation_queue, result_queue))
    worker.start()
    print("work %d start!" % i)
num = 100
for i in range(num):

    evaluation_queue.put(i)

for i in range(3):
    evaluation_queue.put(None)

evaluation_queue.join()
for i in range(num):
    data = result_queue.get()
    print(data)

import tensorflow as tf
from dkrl_model import DKRL
from transe_model import TransE
import pickle as pkl
from dkrl_data_loader import DataLoader

import numpy as np
from multiprocessing import JoinableQueue, Queue, Process

import os
Пример #33
0
        if wt > 0.9:
            in_queue.put(None)
            print('stop producer')
            break
def consumer(in_queue, out_queue):
    while 1:
        task = in_queue.get()
        if task is None:
            break
        func, arg = task
        result = func(arg)
        in_queue.task_done()
        out_queue.put(result)
processes = []
p = Process(target=producer, args=(tasks_queue,))
p.start()
processes.append(p)
p = Process(target=consumer, args=(tasks_queue, results_queue))
p.start()
processes.append(p)

tasks_queue.join()

for p in processes:
    p.join()
while 1:
    if results_queue.empty():
        break
    result = results_queue.get()
    print('Result:', result)
Пример #34
0
class WorkerMapper(Mapper):
    """Work mapper implementation using multiple worker processes and task
    queue.

    Uses the python multiprocessing module to spawn multiple worker
    processes which watch a task queue of walker segments.
    """
    def __init__(self,
                 num_workers=None,
                 worker_type=None,
                 worker_attributes=None,
                 **kwargs):
        """Constructor for WorkerMapper.

        kwargs are ignored.


        Parameters
        ----------
        num_workers : int
            The number of worker processes to spawn.

        worker_type : callable, optional
            Callable that generates an object implementing the Worker
            interface, typically a type from a Worker class.

        worker_attributes : dictionary
            A dictionary of values that are passed to the worker
            constructor as key-word arguments.

        """

        if worker_attributes is not None:
            self._worker_attributes = worker_attributes
        else:
            self._worker_attributes = {}

        self._num_workers = num_workers
        self._worker_segment_times = {i: [] for i in range(self.num_workers)}

        # choose the type of the worker
        if worker_type is None:
            self._worker_type = Worker
            warn("worker_type not given using the default base class")
            logging.warn("worker_type not given using the default base class")
        else:
            self._worker_type = worker_type

    @property
    def num_workers(self):
        """The number of worker processes."""
        return self._num_workers

    # TODO remove after testing
    # @num_workers.setter
    # def num_workers(self, num_workers):
    #     """Setter for the number of workers

    #     Parameters
    #     ----------
    #     num_workers : int

    #     """
    #     self._num_workers = num_workers

    @property
    def worker_type(self):
        """The callable that generates a worker object.

        Typically this is just the type from the class definition of
        the Worker where the constructor is called.

        """
        return self._worker_type

    # TODO remove after testing
    # @worker_type.setter
    # def worker_type(self, worker_type):
    #     """

    #     Parameters
    #     ----------
    #     worker_type :

    #     Returns
    #     -------

    #     """
    #     self._worker_type = worker_type

    def init(self, num_workers=None, **kwargs):
        """Runtime initialization and setting of function to map over walkers.

        Parameters
        ----------
        num_workers : int
            The number of worker processes to spawn

        segment_func : callable implementing the Runner.run_segment interface

        """

        super().init(**kwargs)

        # the number of workers must be given here or set as an object attribute
        if num_workers is None and self.num_workers is None:
            raise ValueError(
                "The number of workers must be given, received {}".format(
                    num_workers))

        # if the number of walkers was given for this init() call use
        # that, otherwise we use the default that was specified when
        # the object was created
        elif num_workers is None and self.num_workers is not None:
            num_workers = self.num_workers

        # Establish communication queues
        self._task_queue = JoinableQueue()
        self._result_queue = Queue()

        # Start workers, giving them all the queues
        self._workers = []
        for i in range(num_workers):
            worker = self.worker_type(i, self._task_queue, self._result_queue,
                                      **self._worker_attributes)
            self._workers.append(worker)

        # start the worker processes
        for worker in self._workers:
            worker.start()

            logging.info("Worker process started as name: {}; PID: {}".format(
                worker.name, worker.pid))

    def cleanup(self, **kwargs):
        """Runtime post-simulation tasks.

        This is run either at the end of a successful simulation or
        upon an error in the main process of the simulation manager
        call to `run_cycle`.

        The Mapper class performs no actions here and all arguments
        are ignored.

        """

        # send poison pills (Stop signals) to the queues to stop them in a nice way
        # and let them finish up
        for i in range(self.num_workers):
            self._task_queue.put((None, None))

        # delete the queues and workers
        self._task_queue = None
        self._result_queue = None
        self._workers = None

    def _make_task(self, *args, **kwargs):
        """Generate a task from 'segment_func' attribute.

        Similar to partial evaluation (or currying).

        Args will be eventually used as the arguments to the call of
        'segment_func' by the worker processes when they receive the
        task from the queue.

        Returns
        -------
        task : Task object

        """
        return Task(self._func, *args, **kwargs)

    def map(self, *args):
        # docstring in superclass

        map_process = mp.current_process()
        logging.info("Mapping from process {}; PID {}".format(
            map_process.name, map_process.pid))

        # make tuples for the arguments to each function call
        task_args = zip(*args)

        num_tasks = len(args[0])
        # Enqueue the jobs
        for task_idx, task_arg in enumerate(task_args):

            # a task will be the actual task and its task idx so we can
            # sort them later
            self._task_queue.put((task_idx, self._make_task(*task_arg)))

        logging.info("Waiting for tasks to be run")

        # Wait for all of the tasks to finish
        self._task_queue.join()

        # workers_done = [worker.done for worker in self._workers]

        # if all(workers_done):

        # get the results out in an unordered way. We rely on the
        # number of tasks we know we put out because if you just try
        # to get from the queue until it is empty it will just wait
        # forever, since nothing is there. ALternatively it is risky
        # to implement a wait timeout or no wait in case there is a
        # small wait time.
        logging.info("Retrieving results")

        n_results = num_tasks
        results = []
        while n_results > 0:

            logging.info("trying to retrieve result: {}".format(n_results))

            result = self._result_queue.get()
            results.append(result)

            logging.info("Retrieved result {}: {}".format(n_results, result))

            n_results -= 1

        logging.info("No more results")

        logging.info("Retrieved results")

        # sort the results according to their task_idx
        results.sort()

        # save the task run times, so they can be accessed if desired,
        # after clearing the task times from the last mapping
        self._worker_segment_times = {i: [] for i in range(self.num_workers)}
        for task_idx, worker_idx, task_time, result in results:
            self._worker_segment_times[worker_idx].append(task_time)

        # then just return the values of the function
        return [result for task_idx, worker_idx, task_time, result in results]
Пример #35
0
class TwitterTracker(object):
    def __init__(self, l_query, timeout, fileName_partialData):
        self.timeout = timeout
        self.l_query = l_query
        self.collection_name = os.environ[
            "COLLECTION_NAME"]  #str(uuid.uuid4())
        self.file_data = fileName_partialData

        self.status = STOPPED

        self.q_tweet = JoinableQueue(maxsize=0)
        self.collection = self.__DBConnection()
        self.__startTracker()

    def __DBConnection(self):
        client = MongoClient()
        self.db = client[C.TWITTER_DB]
        return self.db[self.collection_name]

    def __startTracker(self):

        process_tweetProcessor = Process(target=self.__runTweetProcessor)
        process_listener = Process(target=self.__runListener)

        process_listener.start()
        process_tweetProcessor.start()

        self.q_tweet.join()

        process_tweetProcessor.join()
        process_listener.join()

        self.status = FINISHED

    def __runTweetProcessor(self):
        tweetProcessor = TweetProcessor(self.q_tweet, self.collection,
                                        self.l_query, self.file_data)
        tweetProcessor.run()

    def __runListener(self):
        auth = tweepy.OAuthHandler(C.CONSUMER_KEY, C.CONSUMER_SECRET)
        auth.set_access_token(C.ACCESS_TOKEN, C.ACCESS_TOKEN_SECRET)
        listener = Listener(self.q_tweet)
        stream = tweepy.Stream(auth, listener)
        self.status = INITIATED
        start_time = time.time()
        try:
            stream.filter(languages=['en'], track=self.l_query, async=True)

            pass_time = time.time() - start_time
            while (pass_time < self.timeout):
                time.sleep(self.timeout - pass_time)
                pass_time = time.time() - start_time
                #print "SLEEPING" + str(pass_time)

            self.q_tweet.put(C.TOKEN_LAST_TWEET)
            stream.disconnect()

        except Exception as e:
            s = str(e)
            self.status = ERROR
            stream.disconnect()

    def getStatus(self):
        return self.status

    def getTweets(self):
        if self.status == FINISHED:
            return [t for t in self.collection.find()]
        else:
            return None

    def getTweets_ByTrackKey(self, key):
        if self.status == FINISHED:
            result = self.db.command('text', self.collection_name, search=key)
            return [t['obj'] for t in result['results']]
        else:
            return None
Пример #36
0
class Servo:
    def __init__(self,
                 channel,
                 pulse_width_start,
                 pulse_width_stop,
                 init_angle,
                 turnoff_timeout=0):
        """Define a new software controllable servo with adjustable speed control

        Keyword arguments:
        pulse_width_start -- The minimum pulse width defining the lowest angle
        pulse_width_stop -- The maximum pulse width defining the biggest angle
        init_angle -- Initial angle that the servo should take when it is powered on. Range is 0 to 180deg
        turnoff_timeout -- number of seconds after which the servo is turned off if no command is received. 0 = never turns off
        """

        self.pulse_width_start = pulse_width_start
        self.pulse_width_stop = pulse_width_stop
        self.turnoff_timeout = turnoff_timeout

        self.current_pulse_width = init_angle * (
            self.pulse_width_stop -
            self.pulse_width_start) / 180.0 + self.pulse_width_start
        self.last_pulse_width = self.current_pulse_width

        self.queue = JoinableQueue(1000)
        self.lastCommandTime = 0

        self.t = Thread(target=self._wait_for_event)
        self.t.daemon = True
        self.running = True
        self.t.start()

        self.pwm = PWM_pin(channel, 100, self.current_pulse_width)

        # Set up the Shift register for enabling this servo
        if channel == "P9_14":
            shiftreg_nr = 3
        elif channel == "P9_16":
            shiftreg_nr = 2
        else:
            logging.warning(
                "Tried to assign servo to an unknown channel/pin: " +
                str(channel))
            return

        ShiftRegister.make()
        self.shift_reg = ShiftRegister.registers[shiftreg_nr]
        self.set_enabled()

    def set_enabled(self, is_enabled=True):
        if is_enabled:
            self.shift_reg.add_state(0x01)
        else:
            self.shift_reg.remove_state(0x01)

    def set_angle(self, angle, speed=60, asynchronous=True):
        ''' Set the servo angle to the given value, in degree, with the given speed in deg / sec '''
        pulse_width = angle * (self.pulse_width_stop - self.pulse_width_start
                               ) / 180.0 + self.pulse_width_start
        last_angle = (self.last_pulse_width - self.pulse_width_start) / float(
            self.pulse_width_stop - self.pulse_width_start) * 180.0

        t = (math.fabs(angle - last_angle) / speed) / math.fabs(angle -
                                                                last_angle)

        for w in xrange(int(self.last_pulse_width * 1000),
                        int(pulse_width * 1000),
                        (1 if pulse_width >= self.last_pulse_width else -1)):
            self.queue.put((w / 1000.0, t))

        self.last_pulse_width = pulse_width

        if not asynchronous:
            self.queue.join()

    def turn_off(self):
        self.pwm.set_enabled(False)

    def stop(self):
        self.running = False
        self.t.join()
        self.turn_off()

    def _wait_for_event(self):
        while self.running:
            try:
                ev = self.queue.get(block=True, timeout=1)
            except Queue.Empty:
                if self.turnoff_timeout > 0 and self.lastCommandTime > 0 and time.time(
                ) - self.lastCommandTime > self.turnoff_timeout:
                    self.lastCommandTime = 0
                    self.turn_off()
                continue
            except Exception:
                # To avoid exception printed on output
                pass

            self.current_pulse_width = ev[0]
            self.pwm.set_value(self.current_pulse_width)
            self.lastCommandTime = time.time()
            time.sleep(ev[1])

            self.queue.task_done()
Пример #37
0
def main():
    parser = argparse.ArgumentParser(description='Computing TFLite accuracy')
    parser.add_argument('--model',
                        required=True,
                        help='Path to the model (protocol buffer binary file)')
    parser.add_argument(
        '--alphabet',
        required=True,
        help=
        'Path to the configuration file specifying the alphabet used by the network'
    )
    parser.add_argument('--lm',
                        required=True,
                        help='Path to the language model binary file')
    parser.add_argument(
        '--trie',
        required=True,
        help=
        'Path to the language model trie file created with native_client/generate_trie'
    )
    parser.add_argument('--csv',
                        required=True,
                        help='Path to the CSV source file')
    parser.add_argument(
        '--proc',
        required=False,
        default=cpu_count(),
        type=int,
        help='Number of processes to spawn, defaulting to number of CPUs')
    args = parser.parse_args()

    work_todo = JoinableQueue()
    work_done = Queue()

    processes = []
    for i in range(args.proc):
        worker_process = Process(target=tflite_worker,
                                 args=(args.model, args.alphabet, args.lm,
                                       args.trie, work_todo, work_done, i),
                                 daemon=True,
                                 name='tflite_process_{}'.format(i))
        worker_process.start()
        processes.append(worker_process)

    print([x.name for x in processes])

    ground_truths = []
    predictions = []
    losses = []

    with open(args.csv, 'r') as csvfile:
        csvreader = csv.DictReader(csvfile)
        for row in csvreader:
            work_todo.put({
                'filename': row['filename'],
                'transcript': row['transcript']
            })
    work_todo.join()

    while (not work_done.empty()):
        msg = work_done.get()
        losses.append(0.0)
        ground_truths.append(msg['ground_truth'])
        predictions.append(msg['prediction'])

    distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]

    wer, cer, samples = calculate_report(ground_truths, predictions, distances,
                                         losses)
    mean_loss = np.mean(losses)

    print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss))
Пример #38
0
def main():
    # For py2exe builds
    freeze_support()

    # Handle SIGINT to terminate processes
    signal.signal(signal.SIGINT, sigint_handler)

    start_time = time()
    #--PLUGINS INITIALIZATION--
    sslyze_plugins = PluginsFinder()
    available_plugins = sslyze_plugins.get_plugins()
    available_commands = sslyze_plugins.get_commands()
    # Create the command line parser and the list of available options
    sslyze_parser = CommandLineParser(available_plugins, PROJECT_VERSION)

    try:  # Parse the command line
        (command_list, target_list,
         shared_settings) = sslyze_parser.parse_command_line()
    except CommandLineParsingError as e:
        print e.get_error_msg()
        return

    if not shared_settings['quiet'] and shared_settings['xml_file'] != '-':
        print '\n\n\n' + _format_title('Available plugins')
        print ''
        for plugin in available_plugins:
            print '  ' + plugin.__name__
        print '\n\n'

    #--PROCESSES INITIALIZATION--
    # Three processes per target from MIN_PROCESSES up to MAX_PROCESSES
    nb_processes = max(MIN_PROCESSES, min(MAX_PROCESSES, len(target_list) * 3))
    if command_list.https_tunnel:
        nb_processes = 1  # Let's not kill the proxy

    task_queue = JoinableQueue()  # Processes get tasks from task_queue and
    result_queue = JoinableQueue(
    )  # put the result of each task in result_queue

    # Spawn a pool of processes, and pass them the queues
    for _ in xrange(nb_processes):
        priority_queue = JoinableQueue()  # Each process gets a priority queue
        p = WorkerProcess(priority_queue, task_queue, result_queue, available_commands, \
                          shared_settings)
        p.start()
        process_list.append(
            (p,
             priority_queue))  # Keep track of each process and priority_queue

    #--TESTING SECTION--
    # Figure out which hosts are up and fill the task queue with work to do
    if not shared_settings['quiet'] and shared_settings['xml_file'] != '-':
        print _format_title('Checking host(s) availability')

    targets_OK = []
    targets_ERR = []

    # Each server gets assigned a priority queue for aggressive commands
    # so that they're never run in parallel against this single server
    cycle_priority_queues = cycle(process_list)
    target_results = ServersConnectivityTester.test_server_list(
        target_list, shared_settings)
    for target in target_results:
        if target is None:
            break  # None is a sentinel here

        # Send tasks to worker processes
        targets_OK.append(target)
        (_, current_priority_queue) = cycle_priority_queues.next()

        for command in available_commands:
            if getattr(command_list, command):
                args = command_list.__dict__[command]

                if command in sslyze_plugins.get_aggressive_commands():
                    # Aggressive commands should not be run in parallel against
                    # a given server so we use the priority queues to prevent this
                    current_priority_queue.put((target, command, args))
                else:
                    # Normal commands get put in the standard/shared queue
                    task_queue.put((target, command, args))

    for exception in target_results:
        targets_ERR.append(exception)

    if not shared_settings['quiet'] and shared_settings['xml_file'] != '-':
        print ServersConnectivityTester.get_printable_result(
            targets_OK, targets_ERR)
        print '\n\n'

    # Put a 'None' sentinel in the queue to let the each process know when every
    # task has been completed
    for (proc, priority_queue) in process_list:
        task_queue.put(None)  # One sentinel in the task_queue per proc
        priority_queue.put(None)  # One sentinel in each priority_queue

    # Keep track of how many tasks have to be performed for each target
    task_num = 0
    for command in available_commands:
        if getattr(command_list, command):
            task_num += 1

    # --REPORTING SECTION--
    processes_running = nb_processes

    # XML output
    xml_output_list = []

    # Each host has a list of results
    result_dict = {}
    for target in targets_OK:
        result_dict[target] = []

    # If all processes have stopped, all the work is done
    while processes_running:
        result = result_queue.get()

        if result is None:  # Getting None means that one process was done
            processes_running -= 1

        else:  # Getting an actual result
            (target, command, plugin_result) = result
            result_dict[target].append((command, plugin_result))

            if len(result_dict[target]) == task_num:  # Done with this target
                # Print the results and update the xml doc
                if shared_settings['xml_file']:
                    xml_output_list.append(
                        _format_xml_target_result(target, result_dict[target]))
                    if not shared_settings[
                            'quiet'] and shared_settings['xml_file'] != '-':
                        print _format_txt_target_result(
                            target, result_dict[target])
                else:
                    print _format_txt_target_result(target,
                                                    result_dict[target])

        result_queue.task_done()

    # --TERMINATE--

    # Make sure all the processes had time to terminate
    task_queue.join()
    result_queue.join()
    #[process.join() for process in process_list] # Causes interpreter shutdown errors
    exec_time = time() - start_time

    # Output XML doc to a file if needed
    if shared_settings['xml_file']:
        result_xml_attr = {
            'httpsTunnel': str(shared_settings['https_tunnel_host']),
            'totalScanTime': str(exec_time),
            'defaultTimeout': str(shared_settings['timeout']),
            'startTLS': str(shared_settings['starttls'])
        }

        result_xml = Element('results', attrib=result_xml_attr)

        # Sort results in alphabetical order to make the XML files (somewhat) diff-able
        xml_output_list.sort(key=lambda xml_elem: xml_elem.attrib['host'])
        for xml_element in xml_output_list:
            result_xml.append(xml_element)

        xml_final_doc = Element('document',
                                title="SSLyze Scan Results",
                                SSLyzeVersion=PROJECT_VERSION,
                                SSLyzeWeb=PROJECT_URL)
        # Add the list of invalid targets
        xml_final_doc.append(
            ServersConnectivityTester.get_xml_result(targets_ERR))
        # Add the output of the plugins
        xml_final_doc.append(result_xml)

        # Remove characters that are illegal for XML
        # https://lsimons.wordpress.com/2011/03/17/stripping-illegal-characters-out-of-xml-in-python/
        xml_final_string = tostring(xml_final_doc, encoding='UTF-8')
        illegal_xml_chars_RE = re.compile(
            u'[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]')
        xml_sanitized_final_string = illegal_xml_chars_RE.sub(
            '', xml_final_string)

        # Hack: Prettify the XML file so it's (somewhat) diff-able
        xml_final_pretty = minidom.parseString(
            xml_sanitized_final_string).toprettyxml(indent="  ",
                                                    encoding="utf-8")

        if shared_settings['xml_file'] == '-':
            # Print XML output to the console if needed
            print xml_final_pretty
        else:
            # Otherwise save the XML output to the console
            with open(shared_settings['xml_file'], 'w') as xml_file:
                xml_file.write(xml_final_pretty)

    if not shared_settings['quiet'] and shared_settings['xml_file'] != '-':
        print _format_title('Scan Completed in {0:.2f} s'.format(exec_time))
Пример #39
0
def main(_):
    print_flags()
    initialize_folders()

    env = gym.make(FLAGS.env_name)

    if FLAGS.is_train and FLAGS.is_monitor:

        def monitor_frequency_func(iteration):
            return (iteration +
                    FLAGS.monitor_frequency) % FLAGS.monitor_frequency == 0

        env = wrappers.Monitor(env,
                               FLAGS.log_dir + "/" + FLAGS.scope,
                               video_callable=monitor_frequency_func,
                               resume=FLAGS.is_load)

    job_queue = JoinableQueue()
    result_queue = Queue()
    e = 0

    if FLAGS.n_processes == 1 or not FLAGS.is_train:
        reacher = TRPOAgent(FLAGS.env_name, FLAGS.scope, FLAGS.max_kl,
                            job_queue, result_queue)
        reacher.start()
    else:
        # PARALLEL TRAINING OFFERS ALMOST LINEAR IMPROVEMENT ON 2 processors
        proll = ParallelRollout(FLAGS.env_name, FLAGS.traj_len,
                                FLAGS.n_processes, FLAGS.max_kl)
        parallel_reacher = ParallelTRPOAgent(FLAGS.env_name, FLAGS.scope,
                                             FLAGS.max_kl, job_queue,
                                             result_queue, proll)
        parallel_reacher.start()

    if FLAGS.is_load:
        job_queue.put(('load', (FLAGS.checkpoint_dir + '/' + FLAGS.scope, )))
        job_queue.join()

    try:
        while True:

            e += 1
            if FLAGS.is_train:
                print("EPISODE =", e)
                start = time.time()
                job_queue.put(
                    ('learn', (FLAGS.gamma, FLAGS.n_trajs, FLAGS.traj_len)))
                job_queue.join()
                end = time.time()
                print("ROLLOUT TAKES", end - start)

            obs = env.reset()
            for i in range(FLAGS.traj_len):
                job_queue.put(('act', (obs, )))
                job_queue.join()
                obs, _, done, _ = env.step(result_queue.get())
                if not FLAGS.is_train:
                    env.render()
                if done: break

            if e % FLAGS.checkpoint_freq == 0 and FLAGS.is_train:
                job_queue.put(
                    ('save', (FLAGS.checkpoint_dir + '/' + FLAGS.scope, )))
                job_queue.join()
                job_queue.put(('log', (
                    FLAGS.log_dir + '/' + FLAGS.scope,
                    'my_log.json',
                )))
                job_queue.join()

    except KeyboardInterrupt:
        print('You pressed Ctrl+C!')
        if FLAGS.is_train and FLAGS.is_monitor:
            env.close()
        proll.end()
        parallel_reacher.join()
        sys.exit(0)
Пример #40
0
    for x in outs:
        print(x.name)
    for x in blobs:
        print(x.name)
    blobs = list(
        filter(
            lambda x: os.path.basename(x.name).split(".")[0] not in list(
                map(lambda x: "_".join(x.name.split("_")[1:]).split(".")[0],
                    outs)), blobs))
    print(len(blobs))
    list(map(lambda x: downloads.put(x), blobs))

    downloaders = map(
        lambda x: Process(target=utils.consume,
                          args=(downloads, download_event,
                                ((IN_ACC, IN_KEY, IN_CONTAINER), files
                                 ), download_blob)), range(0, 2))
    list(map(lambda proc: proc.start(), downloaders))

    processors = map(
        lambda x: Process(target=utils.consume,
                          args=(files, process_event,
                                (RUNNER,
                                 (OUT_ACC, OUT_KEY, OUT_CONTAINER)), process)),
        range(0, 1))
    list(map(lambda proc: proc.start(), processors))

    downloads.join()
    files.join()
    download_event.set()
    process_event.set()
Пример #41
0
    #upload to cloud
    print('import to Cloud')
    queue = JoinableQueue()
    pr = 1
    pro = []

    for z in ZPGFile.objects.filter(is_movie=0, on_cloud=0).order_by("file_date"):
        queue.put(z)

    for i in range(pr):
        p = Process(target=uploadF, args=(cli,))
        p.daemon = True
        p.start()
        pro.append(p)

    queue.join()

    for p in pro:
        p.join()


    # бекапим файл с бд
    dbfile_path = settings.DATABASES['default']['NAME']
    dbfile_name = settings.DATABASES['default']['NAME'].split('/')[-1]


    cli.move_folder_or_file(dbfile_name, dbfile_name+"_old")
        #remove_folder_or_file(dbfile_name)
    cli.upload_file(dbfile_path, dbfile_name )

    print ("DB file uploaded")
Пример #42
0
def files_to_map(
    files: List[str],
    out_dir: str = ".",
    min_zoom: int = 0,
    title: str = "FitsMap",
    task_procs: int = 0,
    procs_per_task: int = 0,
    catalog_delim: str = ",",
    cat_wcs_fits_file: str = None,
    max_catalog_zoom: int = -1,
    tile_size: Tuple[int, int] = [256, 256],
    image_engine: str = IMG_ENGINE_MPL,
    norm_kwargs: dict = {},
    rows_per_column: int = np.inf,
    prefer_xy: bool = False,
) -> None:
    """Converts a list of files into a LeafletJS map.

    Args:
        files (List[str]): List of files to convert into a map, can include image
                           files (.fits, .png, .jpg) and catalog files (.cat)
        out_dir (str): Directory to place the genreated web page and associated
                       subdirectories
        min_zoom (int): The minimum zoom to create tiles for. The default value
                        is 0, but if it can be helpful to set it to a value
                        greater than zero if your running out of memory as the
                        lowest zoom images can be the most memory intensive.
        title (str): The title to placed on the webpage
        task_procs (int): The number of tasks to run in parallel
        procs_per_task (int): The number of tiles to process in parallel
        catalog_delim (str): The delimited for catalog (.cat) files. Deault is
                             whitespace.
        cat_wcs_fits_file (str): A fits file that has the WCS that will be used
                                 to map ra and dec coordinates from the catalog
                                 files to x and y coordinates in the map
        max_catalog_zoom (int): The zoom level to stop clustering on, the
                                default is the max zoom level of the image. For
                                images with a high source density, setting this
                                higher than the max zoom will help with
                                performance.
        tile_size (Tuple[int, int]): The tile size for the leaflet map. Currently
                                     only [256, 256] is supported.
        image_engine (str): The method that converts the image data into image
                            tiles. The default is convert.IMG_ENGINE_MPL
                            (matplotlib) the other option is
                            convert.IMG_ENGINE_PIL (pillow). Pillow can render
                            FITS files but doesn't do any scaling. Pillow may
                            be more performant for only PNG images.
        norm_kwargs (dict): Optional normalization keyword arguments passed to
                            `astropy.visualization.simple_norm`. The default is
                            linear scaling using min/max values. See documentation
                            for more information: https://docs.astropy.org/en/stable/api/astropy.visualization.mpl_normalize.simple_norm.html
        rows_per_column (int): If converting a catalog, the number of items in
                               have in each column of the marker popup.
                               By default produces all values in a single
                               column. Setting this value can make it easier to
                               work with catalogs that have a lot of values for
                               each object.
    Returns:
        None
    """

    if len(files) == 0:
        raise ValueError("No files provided `files` is an empty list")

    unlocatable_files = list(filterfalse(os.path.exists, files))
    if len(unlocatable_files) > 0:
        raise FileNotFoundError(unlocatable_files)

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    if "js" not in os.listdir(out_dir):
        os.mkdir(os.path.join(out_dir, "js"))

    if "css" not in os.listdir(out_dir):
        os.mkdir(os.path.join(out_dir, "css"))

    img_f_kwargs = dict(
        tile_size=tile_size,
        min_zoom=min_zoom,
        image_engine=image_engine,
        out_dir=out_dir,
        mp_procs=procs_per_task,
        norm_kwargs=norm_kwargs,
    )

    img_files = filter_on_extension(files, IMG_FORMATS)
    img_layer_names = list(map(get_map_layer_name, img_files))
    img_job_f = partial(tile_img, **img_f_kwargs)

    cat_files = filter_on_extension(files, CAT_FORMAT)
    cat_layer_names = list(map(get_map_layer_name, cat_files))

    max_x, max_y = utils.peek_image_info(img_files)
    max_dim = max(max_x, max_y)
    if len(cat_files) > 0:
        # get highlevel image info for catalogging function
        max_zoom = int(np.log2(2**np.ceil(np.log2(max_dim)) / tile_size[0]))
        max_dim = 2**max_zoom * tile_size[0]
        if max_catalog_zoom == -1:
            max_zoom = int(np.log2(2**np.ceil(np.log2(max_dim)) /
                                   tile_size[0]))
        else:
            max_zoom = max_catalog_zoom

        cat_job_f = partial(
            tile_markers,
            cat_wcs_fits_file,
            out_dir,
            catalog_delim,
            procs_per_task,
            prefer_xy,
            min_zoom,
            max_zoom,
            tile_size[0],
            max_dim,
            max_dim,
        )
    else:
        cat_job_f = None

    pbar_locations = count(0)

    img_tasks = zip(repeat(img_job_f), zip(img_files, pbar_locations))
    cat_tasks = zip(repeat(cat_job_f), zip(cat_files, pbar_locations))
    tasks = chain(img_tasks, cat_tasks)

    if task_procs:
        q = JoinableQueue()
        any(map(lambda t: q.put(t), tasks))

        workers = [
            Process(target=async_worker, args=[q]) for _ in range(task_procs)
        ]
        [w.start() for w in workers]  # can use any-map if this returns None

        q.join()
    else:
        any(map(lambda func_args: func_args[0](*func_args[1]), tasks))

    ns = "\n" * (next(pbar_locations) - 1)
    print(ns + "Building index.html")

    if cat_wcs_fits_file is not None:
        cat_wcs = WCS(cat_wcs_fits_file)
    else:
        cat_wcs = None

    cartographer.chart(
        out_dir,
        title,
        img_layer_names,
        cat_layer_names,
        cat_wcs,
        rows_per_column,
        (max_x, max_y),
    )
    print("Done.")
class DeepZoomStaticTiler(object):
    """Handles generation of tiles and metadata for all images in a slide."""

    def __init__(self, slidepath, basename, format, tile_size, overlap,
                limit_bounds, quality, workers, with_viewer):
        if with_viewer:
            # Check extra dependency before doing a bunch of work
            import jinja2
        self._slide = open_slide(slidepath)
        self._basename = basename
        self._format = format
        self._tile_size = tile_size
        self._overlap = overlap
        self._limit_bounds = limit_bounds
        self._queue = JoinableQueue(2 * workers)
        self._workers = workers
        self._with_viewer = with_viewer
        self._dzi_data = {}
        for _i in range(workers):
            TileWorker(self._queue, slidepath, tile_size, overlap,
                        limit_bounds, quality).start()

    def run(self):
        self._run_image()
        if self._with_viewer:
            for name in self._slide.associated_images:
                self._run_image(name)
            self._write_html()
            self._write_static()
        self._shutdown()

    def _run_image(self, associated=None):
        """Run a single image from self._slide."""
        if associated is None:
            image = self._slide
            if self._with_viewer:
                basename = os.path.join(self._basename, VIEWER_SLIDE_NAME)
            else:
                basename = self._basename
        else:
            image = ImageSlide(self._slide.associated_images[associated])
            basename = os.path.join(self._basename, self._slugify(associated))
        dz = DeepZoomGenerator(image, self._tile_size, self._overlap,
                    limit_bounds=self._limit_bounds)
        tiler = DeepZoomImageTiler(dz, basename, self._format, associated,
                    self._queue)
        tiler.run()
        self._dzi_data[self._url_for(associated)] = tiler.get_dzi()

    def _url_for(self, associated):
        if associated is None:
            base = VIEWER_SLIDE_NAME
        else:
            base = self._slugify(associated)
        return '%s.dzi' % base

    def _write_html(self):
        import jinja2
        env = jinja2.Environment(loader=jinja2.PackageLoader(__name__),
                    autoescape=True)
        template = env.get_template('slide-multipane.html')
        associated_urls = dict((n, self._url_for(n))
                    for n in self._slide.associated_images)
        try:
            mpp_x = self._slide.properties[openslide.PROPERTY_NAME_MPP_X]
            mpp_y = self._slide.properties[openslide.PROPERTY_NAME_MPP_Y]
            mpp = (float(mpp_x) + float(mpp_y)) / 2
        except (KeyError, ValueError):
            mpp = 0
        # Embed the dzi metadata in the HTML to work around Chrome's
        # refusal to allow XmlHttpRequest from file:///, even when
        # the originating page is also a file:///
        data = template.render(slide_url=self._url_for(None),
                    slide_mpp=mpp,
                    associated=associated_urls,
                    properties=self._slide.properties,
                    dzi_data=json.dumps(self._dzi_data))
        with open(os.path.join(self._basename, 'index.html'), 'w') as fh:
            fh.write(data)

    def _write_static(self):
        basesrc = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                    'static')
        basedst = os.path.join(self._basename, 'static')
        self._copydir(basesrc, basedst)
        self._copydir(os.path.join(basesrc, 'images'),
                    os.path.join(basedst, 'images'))

    def _copydir(self, src, dest):
        if not os.path.exists(dest):
            os.makedirs(dest)
        for name in os.listdir(src):
            srcpath = os.path.join(src, name)
            if os.path.isfile(srcpath):
                shutil.copy(srcpath, os.path.join(dest, name))

    @classmethod
    def _slugify(cls, text):
        text = normalize('NFKD', text.lower()).encode('ascii', 'ignore').decode()
        return re.sub('[^a-z0-9]+', '_', text)

    def _shutdown(self):
        for _i in range(self._workers):
            self._queue.put(None)
        self._queue.join()
Пример #44
0
    p.daemon = True
    p.start()
    span_procs[span] = p

if CONFIG["report_csv"]:
    prepare_csv()

while len(spans_to_process) > 0 or len(span_procs) > 0:
    s = span_done.get()
    span = s["span"]
    span_procs[s["span"]].join()
    log_messages.put("%s done" %s['span'])
    del span_procs[s["span"]]

    # Create a new process if needed
    print("still %s spans to process" %len(spans_to_process))
    if len(spans_to_process) > 0:
        next_span = spans_to_process.pop()
        span_procs[next_span] = Process(target = process_span, args = (next_span, span_done, log_messages))
        span_procs[next_span].daemon = True
        span_procs[next_span].start()
        print("new process on %s" %next_span)

    if CONFIG["report_csv"]:
        csv_writing(s)
        span_done.task_done()

span_done.join()
log_messages.join()
logger.terminate()
Пример #45
0
        writer.writerow([
            'hospitalName', 'importDate', 'device', 'project', 'markPurpose',
            'collectPath', 'source'
        ])

    hospitalName = "CS898001"
    importDate = str(datetime.datetime.now()).split(' ')[0]
    device = "CT"
    project = "CT_Chest"
    source = "-1"
    lists = [hospitalName, importDate, device, project, source]
    q = JoinableQueue()
    multiprocessing = []
    for i in range(cpu_count() - 2):
        p = Process(target=Worker, args=(q, ))
        p.daemon = True
        p.start()
        multiprocessing.append(p)
    for filename in os.listdir(src_path):
        if filename.endswith('.csv'):
            continue
        dcm_dir = os.path.join(src_path, filename)
        q.put([dcm_dir, csv_path, lists])
    q.join()
    for i in range(0, cpu_count() - 2):
        q.put(None)
    for p in multiprocessing:
        p.join()

    print('消耗的时间: ', float(time.time() - start))
Пример #46
0
    def parallel(self):
        from multiprocessing import Process, Queue, JoinableQueue

        if debug:
            print(inspect.stack()[0][3])

        self.ntrajs = []
        for i in range(self.cpus):
            self.ntrajs.append(
                min(int(np.floor(float(self.ntraj) / self.cpus)),
                    self.ntraj - sum(self.ntrajs)))
        cnt = sum(self.ntrajs)
        while cnt < self.ntraj:
            for i in range(self.cpus):
                self.ntrajs[i] += 1
                cnt += 1
                if (cnt >= self.ntraj):
                    break
        self.ntrajs = np.array(self.ntrajs)
        self.ntrajs = self.ntrajs[np.where(self.ntrajs > 0)]
        self.nprocs = len(self.ntrajs)
        sols = []
        processes = []
        resq = JoinableQueue()
        resq.join()

        if debug:
            print("Number of cpus: " + str(self.cpus))
            print("Trying to start " + str(self.nprocs) + " process(es).")
            print("Number of trajectories for each process: " +
                  str(self.ntrajs))

        for i in range(self.nprocs):
            p = Process(target=self.evolve_serial,
                        args=((resq, self.ntrajs[i], i,
                               self.seed * (i + 1)), ))
            p.start()
            processes.append(p)
        cnt = 0

        while True:
            try:
                sols.append(resq.get())
                resq.task_done()
                cnt += 1
                if (cnt >= self.nprocs):
                    break
            except KeyboardInterrupt:
                break
            except:
                pass

        resq.join()
        for proc in processes:
            try:
                proc.join()
            except KeyboardInterrupt:
                if debug:
                    print("Cancel thread on keyboard interrupt")
                proc.terminate()
                proc.join()
        resq.close()
        return sols
Пример #47
0
class DAQLogger():
    def __init__(self, options):

        self.process = None
        self.listener = None

        #self.log_queue = BufferedReadQueue()
        self.log_queue = JoinableQueue()
        self.raw_queue = JoinableQueue()

        settings = options.settings

        self.settings = {}

        if os.name == 'nt':
            self.od = "%s\\" % settings.GetStrElemVal("LogDir", 'log')
        elif os.name == 'posix':
            self.od = "%s/" % settings.GetStrElemVal("LogDir", 'log')
        if not os.path.exists(self.od):
            os.makedirs(self.od)

        self.settings['od'] = self.od

        self.settings['name'] = settings.GetStrElemVal("station_name",
                                                       'station_name')
        print "Station Name: %s" % self.settings['name']

        self.settings['email'] = settings.GetIntElemVal("ErrorEmail", 1)
        print "Error email: ON" if self.settings[
            'email'] == 1 else "Error email: OFF"
        if self.settings['email'] == 1:
            self.settings['ErrorEmailFrom'] = settings.GetStrElemVal(
                "ErrorEmailFrom", "ErrorEmailFrom")
            self.settings['ErrorEmailTo'] = settings.GetStrElemVal(
                "ErrorEmailTo", "ErrorEmailTo")
            self.settings['ErrorEmailPw'] = settings.GetStrElemVal(
                "ErrorEmailPw", "ErrorEmailPw")

            print "Sending log emails from: %s" % self.settings[
                'ErrorEmailFrom']
            print "Sending log emails to: %s" % self.settings["ErrorEmailTo"]
        self.settings['post'] = settings.GetIntElemVal("ErrorPost", 1)
        print "Error POST: ON" if self.settings[
            'post'] == 1 else "Error POST: OFF"

        # this is main logger level
        self.settings['loggerlevel'] = self._process_log_level(
            "DEBUG")  # Hard code to debug to allow max flexibility

        #self.settings['consolelevel'] = self._process_log_level(settings.GetStrElemVal("ConsoleLevel", "TIMESTAMP"))

        if options.debug_on:
            #print "Setting console display level to 'DEBUG'"
            #self.settings['consolelevel'] = self._process_log_level("DEBUG")
            self.settings['consolelevel'] = self._process_log_level("DEBUG")
        else:
            self.settings['consolelevel'] = self._process_log_level(
                settings.GetStrElemVal("ConsoleLevel", "TIMESTAMP"))
            #print "Setting console display level to 'TIMESTAMP'"
            #self.settings['consolelevel'] = self._process_log_level("TIMESTAMP")
            #self.settings['consolelevel'] = TIMESTAMP
        print "Setting console display level: %s" % self._process_log_level(
            self.settings['consolelevel'])

        self.settings['logfilelevel'] = self._process_log_level(
            settings.GetStrElemVal("LogFileLevel", "WARNING"))
        print "Setting logfile level: %s" % self._process_log_level(
            self.settings['logfilelevel'])

        self.settings['postfilelevel'] = self._process_log_level(
            settings.GetStrElemVal("PostLevel", "WARNING"))
        print "Setting POST level: %s" % self._process_log_level(
            self.settings['postfilelevel'])

        self.settings['posturl'] = settings.GetStrElemVal(
            "LogPostUrl", "/field_sites_logs/logging.php")
        self.settings['postserver'] = settings.GetStrElemVal(
            "LogPostServer", "vlf-engineering.stanford.edu:80")

        # Setup basic logger. Will overwrite
        self.logger = logging.getLogger()
        self.logger.setLevel(logging.DEBUG)

        #print "Logger level set."

        self.listener = Process(target=_log_listener,
                                args=(self.log_queue, self.raw_queue))
        self.listener.daemon = True
        self.listener.start()
        self.lpid = self.listener.pid
        self._log("Starting Logger Listener: %d" % self.lpid)

    def start(self):
        self.process = Process(target=_log_processing,
                               args=(self.settings, self.raw_queue))
        self.process.daemon = True
        self.process.start()
        self.pid = self.process.pid
        self._log("Starting Logging Thread: %d" % self.pid)

    def stop(self):
        self._log("Closing down logging queue.")
        self.log_queue.put(None)

        self.raw_queue.join()
        self.log_queue.join()

        self.process.terminate()
        logging.shutdown()

        #print "Logger finished."

    def _process_log_level(self, level):
        """
        if level == "DEBUG":
            return logging.DEBUG
        elif level == "INFO":
            return logging.INFO
        elif level == "WARNING":
            return logging.WARNING
        elif level == "CRITICAL":
            return logging.CRITICAL
        else:
            return logging.INFO
        """
        return getLevelName(level)

    def _log(self, message):
        # Log messages within logger
        #print "got into log"
        #Add timestamp

        log_entry = LogRecord("LOG", logging.INFO, "", 0, message, (), None,
                              None)

        self.log_queue.put(log_entry)
Пример #48
0
def main(_):
    parser = argparse.ArgumentParser(description='ProjE.')
    parser.add_argument('--data',
                        dest='data_dir',
                        type=str,
                        help="Data folder",
                        default='./data/FB15k/')
    parser.add_argument('--lr',
                        dest='lr',
                        type=float,
                        help="Learning rate",
                        default=0.01)
    parser.add_argument("--dim",
                        dest='dim',
                        type=int,
                        help="Embedding dimension",
                        default=200)
    parser.add_argument("--batch",
                        dest='batch',
                        type=int,
                        help="Batch size",
                        default=200)
    parser.add_argument("--comb",
                        dest="combination_method",
                        type=str,
                        help="Combination method",
                        default='simple')
    parser.add_argument("--worker",
                        dest='n_worker',
                        type=int,
                        help="Evaluation worker",
                        default=3)
    parser.add_argument("--generator",
                        dest='n_generator',
                        type=int,
                        help="Data generator",
                        default=10)
    parser.add_argument("--eval_batch",
                        dest="eval_batch",
                        type=int,
                        help="Evaluation batch size",
                        default=500)
    parser.add_argument("--save_dir",
                        dest='save_dir',
                        type=str,
                        help="Model path",
                        default='./ProjE_ckpt/')
    parser.add_argument("--load_model",
                        dest='load_model',
                        type=str,
                        help="Model file",
                        default="")
    parser.add_argument("--save_per",
                        dest='save_per',
                        type=int,
                        help="Save per x iteration",
                        default=10)
    parser.add_argument("--eval_per",
                        dest='eval_per',
                        type=int,
                        help="Evaluate every x iteration",
                        default=1)
    parser.add_argument("--max_iter",
                        dest='max_iter',
                        type=int,
                        help="Max iteration",
                        default=100)
    parser.add_argument("--summary_dir",
                        dest='summary_dir',
                        type=str,
                        help="summary directory",
                        default='./ProjE_summary/')
    parser.add_argument("--keep",
                        dest='drop_out',
                        type=float,
                        help="Keep prob (1.0 keep all, 0. drop all)",
                        default=0.5)
    parser.add_argument("--optimizer",
                        dest='optimizer',
                        type=str,
                        help="Optimizer",
                        default='adam')
    parser.add_argument("--prefix",
                        dest='prefix',
                        type=str,
                        help="model_prefix",
                        default='DEFAULT')
    parser.add_argument("--loss_weight",
                        dest='loss_weight',
                        type=float,
                        help="Weight on parameter loss",
                        default=1e-5)
    parser.add_argument("--neg_weight",
                        dest='neg_weight',
                        type=float,
                        help="Sampling weight on negative examples",
                        default=0.5)

    args = parser.parse_args()

    print(args)

    model = ProjE(args.data_dir,
                  embed_dim=args.dim,
                  combination_method=args.combination_method,
                  dropout=args.drop_out,
                  neg_weight=args.neg_weight)

    train_hrt_input, train_hrt_weight, train_trh_input, train_trh_weight, \
    train_loss, train_op = train_ops(model, learning_rate=args.lr,
                                     optimizer_str=args.optimizer,
                                     regularizer_weight=args.loss_weight)
    test_input, test_head, test_tail = test_ops(model)

    with tf.Session() as session:
        # tf.initialize_all_variables().run()
        tf.global_variables_initializer().run()

        saver = tf.train.Saver()

        iter_offset = 0

        if args.load_model is not None and os.path.exists(args.load_model):
            saver.restore(session, args.load_model)
            iter_offset = int(
                args.load_model.split('.')[-2].split('_')[-1]) + 1
            print("Load model from %s, iteration %d restored." %
                  (args.load_model, iter_offset))

        total_inst = model.n_train

        # training data generator
        raw_training_data_queue = Queue()
        training_data_queue = Queue()
        data_generators = list()
        for i in range(args.n_generator):
            data_generators.append(
                Process(target=data_generator_func,
                        args=(raw_training_data_queue, training_data_queue,
                              model.tr_h, model.hr_t, model.n_entity,
                              args.neg_weight)))
            data_generators[-1].start()

        evaluation_queue = JoinableQueue()
        result_queue = Queue()
        for i in range(args.n_worker):
            worker = Process(target=worker_func,
                             args=(evaluation_queue, result_queue, model.hr_t,
                                   model.tr_h))
            worker.start()

        for data_func, test_type in zip(
            [model.validation_data, model.testing_data], ['VALID', 'TEST']):
            accu_mean_rank_h = list()
            accu_mean_rank_t = list()
            accu_filtered_mean_rank_h = list()
            accu_filtered_mean_rank_t = list()

            evaluation_count = 0

            for testing_data in data_func(batch_size=args.eval_batch):
                head_pred, tail_pred = session.run([test_head, test_tail],
                                                   {test_input: testing_data})

                evaluation_queue.put((testing_data, head_pred, tail_pred))
                evaluation_count += 1

            for i in range(args.n_worker):
                evaluation_queue.put(None)

            print("waiting for worker finishes their work")
            evaluation_queue.join()
            print("all worker stopped.")
            while evaluation_count > 0:
                evaluation_count -= 1

                (mrh, fmrh), (mrt, fmrt) = result_queue.get()
                accu_mean_rank_h += mrh
                accu_mean_rank_t += mrt
                accu_filtered_mean_rank_h += fmrh
                accu_filtered_mean_rank_t += fmrt

            print(
                "[%s] INITIALIZATION [HEAD PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f"
                %
                (test_type, np.mean(accu_mean_rank_h),
                 np.mean(accu_filtered_mean_rank_h),
                 np.mean(np.asarray(accu_mean_rank_h, dtype=np.int32) < 10),
                 np.mean(
                     np.asarray(accu_filtered_mean_rank_h, dtype=np.int32) < 10
                 )))

            print(
                "[%s] INITIALIZATION [TAIL PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f"
                %
                (test_type, np.mean(accu_mean_rank_t),
                 np.mean(accu_filtered_mean_rank_t),
                 np.mean(np.asarray(accu_mean_rank_t, dtype=np.int32) < 10),
                 np.mean(
                     np.asarray(accu_filtered_mean_rank_t, dtype=np.int32) < 10
                 )))

        for n_iter in range(iter_offset, args.max_iter):
            start_time = timeit.default_timer()
            accu_loss = 0.
            accu_re_loss = 0.
            ninst = 0

            print("initializing raw training data...")
            nbatches_count = 0
            for dat in model.raw_training_data(batch_size=args.batch):
                raw_training_data_queue.put(dat)
                nbatches_count += 1
            print("raw training data initialized.")

            while nbatches_count > 0:
                nbatches_count -= 1

                hr_tlist, hr_tweight, tr_hlist, tr_hweight = training_data_queue.get(
                )

                l, rl, _ = session.run(
                    [train_loss, model.regularizer_loss, train_op], {
                        train_hrt_input: hr_tlist,
                        train_hrt_weight: hr_tweight,
                        train_trh_input: tr_hlist,
                        train_trh_weight: tr_hweight
                    })

                accu_loss += l
                accu_re_loss += rl
                ninst += len(hr_tlist) + len(tr_hlist)

                if ninst % (5000) is not None:
                    print(
                        '[%d sec](%d/%d) : %.2f -- loss : %.5f rloss: %.5f ' %
                        (timeit.default_timer() - start_time, ninst,
                         total_inst, float(ninst) / total_inst, l /
                         (len(hr_tlist) + len(tr_hlist)), args.loss_weight *
                         (rl / (len(hr_tlist) + len(tr_hlist)))),
                        end='\r')
            print("")
            print("iter %d avg loss %.5f, time %.3f" %
                  (n_iter, accu_loss / ninst,
                   timeit.default_timer() - start_time))

            if n_iter % args.save_per == 0 or n_iter == args.max_iter - 1:
                save_path = saver.save(
                    session,
                    os.path.join(
                        args.save_dir, "ProjE_" + str(args.prefix) + "_" +
                        str(n_iter) + ".ckpt"))
                print("Model saved at %s" % save_path)

            if n_iter % args.eval_per == 0 or n_iter == args.max_iter - 1:

                for data_func, test_type in zip(
                    [model.validation_data, model.testing_data],
                    ['VALID', 'TEST']):
                    accu_mean_rank_h = list()
                    accu_mean_rank_t = list()
                    accu_filtered_mean_rank_h = list()
                    accu_filtered_mean_rank_t = list()

                    evaluation_count = 0

                    for testing_data in data_func(batch_size=args.eval_batch):
                        head_pred, tail_pred = session.run(
                            [test_head, test_tail], {test_input: testing_data})

                        evaluation_queue.put(
                            (testing_data, head_pred, tail_pred))
                        evaluation_count += 1

                    for i in range(args.n_worker):
                        evaluation_queue.put(None)

                    print("waiting for worker finishes their work")
                    evaluation_queue.join()
                    print("all worker stopped.")
                    while evaluation_count > 0:
                        evaluation_count -= 1

                        (mrh, fmrh), (mrt, fmrt) = result_queue.get()
                        accu_mean_rank_h += mrh
                        accu_mean_rank_t += mrt
                        accu_filtered_mean_rank_h += fmrh
                        accu_filtered_mean_rank_t += fmrt

                    print(
                        "[%s] ITER %d [HEAD PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f"
                        %
                        (test_type, n_iter, np.mean(accu_mean_rank_h),
                         np.mean(accu_filtered_mean_rank_h),
                         np.mean(
                             np.asarray(accu_mean_rank_h, dtype=np.int32) < 10
                         ),
                         np.mean(
                             np.asarray(accu_filtered_mean_rank_h,
                                        dtype=np.int32) < 10)))

                    print(
                        "[%s] ITER %d [TAIL PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f"
                        %
                        (test_type, n_iter, np.mean(accu_mean_rank_t),
                         np.mean(accu_filtered_mean_rank_t),
                         np.mean(
                             np.asarray(accu_mean_rank_t, dtype=np.int32) < 10
                         ),
                         np.mean(
                             np.asarray(accu_filtered_mean_rank_t,
                                        dtype=np.int32) < 10)))
Пример #49
0
class Shredder(object):
    def __init__(self,
                 work_generator,
                 work_fn,
                 aggregator,
                 num_processes=0,
                 log_level='info'):

        if log_level.lower() not in ['warn', 'info', 'debug']:
            raise Exception('invalid log level')

        logging.basicConfig(level=getattr(logging, log_level.upper()))
        self.logger = logging.getLogger('shredder')

        if not num_processes:
            self.num_processes = cpu_count()
            self.logger.info(
                "num_processes not set, "
                "defaulting to cpu_count %d", self.num_processes)
        else:
            self.num_processes = num_processes

        self.work_generator = work_generator
        self.work_fn = work_fn
        self.aggregator = aggregator
        self.queue = JoinableQueue()
        self.workers = Workers()

    def signal_handler(self, signum, stack_handler):
        self.logger.info("shutting down")
        signal.setitimer(signal.ITIMER_REAL, 0, 0)  # clear
        self.workers.shutdown()
        sys.exit(0)

    def aggregate_results(self, signum, stack_handler):
        msgs = self.workers.read()
        for msg in msgs:
            self.aggregator(msg)

    def launch_workers(self):
        for i in range(0, self.num_processes):
            self.logger.info("launching worker-%d", i)
            worker = self.launch(i)
            self.workers.add(worker)

    def shred(self):
        count = 0

        for chunk in self.work_generator():
            if chunk is None:
                self.logger.warn("Got None from generator...ignoring")
                continue

            count += 1
            self.queue.put(copy.deepcopy(chunk))

            while self.queue.qsize() > self.num_processes:
                self.logger.debug(
                    "Queue size exceeds process count, sleeping..")
                sleep(5)

        self.logger.debug("sent %d messages to the queue", count)

    def start(self):
        self.launch_workers()

        signal.signal(signal.SIGINT, self.signal_handler)

        signal.signal(signal.SIGALRM, self.aggregate_results)
        signal.setitimer(signal.ITIMER_REAL, 5, 5)

        self.shred()

        self.logger.info("Shredded; workers will shutdown when queue empties")

        self.workers.send_poison_pill(self.queue)
        self.queue.join()

        # make sure there aren't any results to aggregate before exiting
        self.aggregate_results(None, None)
        self.workers.cleanup()

        self.logger.info("Done")

    def launch(self, name):
        """ Start a new Worker process that will consume work from the queue.
        """
        parent_pipe, child_pipe = Pipe()

        process = Process(target=Worker.start,
                          args=(name, self.queue, child_pipe, self.work_fn))
        process.start()

        worker = WorkerContext(name, process, parent_pipe)
        return worker
Пример #50
0
def cleanup(days, project, concurrency, silent, model, router, timed):
    """Delete a portion of trailing data based on creation date.

    All data that is older than `--days` will be deleted.  The default for
    this is 30 days.  In the default setting all projects will be truncated
    but if you have a specific project you want to limit this to this can be
    done with the `--project` flag which accepts a project ID or a string
    with the form `org/project` where both are slugs.
    """
    if concurrency < 1:
        click.echo('Error: Minimum concurrency is 1', err=True)
        raise click.Abort()

    # Make sure we fork off multiprocessing pool
    # before we import or configure the app
    from multiprocessing import Process, JoinableQueue as Queue

    pool = []
    task_queue = Queue(1000)
    for _ in xrange(concurrency):
        p = Process(target=multiprocess_worker, args=(task_queue,))
        p.daemon = True
        p.start()
        pool.append(p)

    from sentry.runner import configure
    configure()

    from django.db import router as db_router
    from sentry.app import nodestore
    from sentry.db.deletion import BulkDeleteQuery
    from sentry import models

    if timed:
        import time
        from sentry.utils import metrics
        start_time = time.time()

    # list of models which this query is restricted to
    model_list = {m.lower() for m in model}

    def is_filtered(model):
        if router is not None and db_router.db_for_write(model) != router:
            return True
        if not model_list:
            return False
        return model.__name__.lower() not in model_list

    # Deletions that use `BulkDeleteQuery` (and don't need to worry about child relations)
    # (model, datetime_field, order_by)
    BULK_QUERY_DELETES = [
        (models.EventMapping, 'date_added', '-date_added'),
        (models.EventAttachment, 'date_added', None),
        (models.UserReport, 'date_added', None),
        (models.GroupEmailThread, 'date', None),
        (models.GroupRuleStatus, 'date_added', None),
    ] + EXTRA_BULK_QUERY_DELETES

    # Deletions that use the `deletions` code path (which handles their child relations)
    # (model, datetime_field, order_by)
    DELETES = (
        (models.Event, 'datetime', 'datetime'),
        (models.Group, 'last_seen', 'last_seen'),
    )

    if not silent:
        click.echo('Removing expired values for LostPasswordHash')

    if is_filtered(models.LostPasswordHash):
        if not silent:
            click.echo('>> Skipping LostPasswordHash')
    else:
        models.LostPasswordHash.objects.filter(
            date_added__lte=timezone.now() - timedelta(hours=48)
        ).delete()

    for model in [models.ApiGrant, models.ApiToken]:
        if not silent:
            click.echo(u'Removing expired values for {}'.format(model.__name__))

        if is_filtered(model):
            if not silent:
                click.echo(u'>> Skipping {}'.format(model.__name__))
        else:
            model.objects.filter(expires_at__lt=timezone.now()).delete()

    project_id = None
    if project:
        click.echo(
            "Bulk NodeStore deletion not available for project selection", err=True)
        project_id = get_project(project)
        if project_id is None:
            click.echo('Error: Project not found', err=True)
            raise click.Abort()
    else:
        if not silent:
            click.echo("Removing old NodeStore values")

        cutoff = timezone.now() - timedelta(days=days)
        try:
            nodestore.cleanup(cutoff)
        except NotImplementedError:
            click.echo(
                "NodeStore backend does not support cleanup operation", err=True)

    for bqd in BULK_QUERY_DELETES:
        if len(bqd) == 4:
            model, dtfield, order_by, chunk_size = bqd
        else:
            chunk_size = 10000
            model, dtfield, order_by = bqd

        if not silent:
            click.echo(
                u"Removing {model} for days={days} project={project}".format(
                    model=model.__name__,
                    days=days,
                    project=project or '*',
                )
            )
        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping %s' % model.__name__)
        else:
            BulkDeleteQuery(
                model=model,
                dtfield=dtfield,
                days=days,
                project_id=project_id,
                order_by=order_by,
            ).execute(chunk_size=chunk_size)

    for model, dtfield, order_by in DELETES:
        if not silent:
            click.echo(
                u"Removing {model} for days={days} project={project}".format(
                    model=model.__name__,
                    days=days,
                    project=project or '*',
                )
            )

        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping %s' % model.__name__)
        else:
            imp = '.'.join((model.__module__, model.__name__))

            q = BulkDeleteQuery(
                model=model,
                dtfield=dtfield,
                days=days,
                project_id=project_id,
                order_by=order_by,
            )

            for chunk in q.iterator(chunk_size=100):
                task_queue.put((imp, chunk))

            task_queue.join()

    # Clean up FileBlob instances which are no longer used and aren't super
    # recent (as there could be a race between blob creation and reference)
    if not silent:
        click.echo("Cleaning up unused FileBlob references")
    if is_filtered(models.FileBlob):
        if not silent:
            click.echo('>> Skipping FileBlob')
    else:
        cleanup_unused_files(silent)

    # Shut down our pool
    for _ in pool:
        task_queue.put(_STOP_WORKER)

    # And wait for it to drain
    for p in pool:
        p.join()

    if timed:
        duration = int(time.time() - start_time)
        metrics.timing('cleanup.duration', duration, instance=router)
        click.echo("Clean up took %s second(s)." % duration)
Пример #51
0
        print('%s吃了%s' % (name, food))
        q.task_done()  # 告诉队列你已经取出一个数据完毕了


if __name__ == '__main__':
    # q = Queue()
    q = JoinableQueue()
    p1 = Process(target=producer, args=("大厨egon", "包子", q))
    p2 = Process(target=producer, args=('tank', '馒头', q))
    c1 = Process(target=consumer, args=("春哥", q))
    c2 = Process(target=consumer, args=("牛哥", q))

    p1.start()
    p2.start()
    # 守护进程,当队列取完内部计数器为0时就结束
    c1.daemon = True
    c2.daemon = True
    c1.start()
    c2.start()

    p1.join()
    p2.join()
    # 等待生产者生产完毕之后,往队列中添加特点的结束符号
    # q.put(None)
    q.join()  # 等待队列中所有的数据取完再执行代码
    """
    JoinableQueue 每当我往队列中存数据,内部计数器会+1
    取出数据时,内部计数器会-1
    q.join() 当计数器为0的时候,才往后运行
    """
Пример #52
0
    in_queue = JoinableQueue()
    out_queue = Queue()
    for _ in range(num_workers):
        p = Process(target=worker, args=(in_queue, out_queue,))
        p.daemon = True
        p.start()

    p_consume = Process(target=consumer, args=(out_queue,))
    p_consume.daemon = True
    p_consume.start()

    lags_to_try = [i for i in range(1, 9, 2)]
    num_series = train_data.shape[0]
    print("Setting input queue...")
    print("data enqued: 0.00%", flush=True)
    for index in range(num_series):
        series = train_data.iloc[index, :]
        in_queue.put((lags_to_try, series))
        if (index+1) % 10000 == 0:
            print("data enqued: {:.2f}%".format(100.0*(index+1)/num_series),
                  flush=True)
    print("data enqued: 100.00%", flush=True)
    in_queue.close()
    in_queue.join()
    print("Input queue joined", flush=True)
    out_queue.put(None)
    p_consume.join()
    print("consumer joined", flush=True)

    print("All tasks done")
    sys.exit(0)
Пример #53
0
def score_mp(iterable,
             function,
             num_procs,
             call_back,
             stop_check,
             debug=False,
             chunk_size=500):
    job_queue = JoinableQueue(100)
    scored_queue = Queue()
    stopped = Stopped()
    done = False
    while not done:
        chunk = []
        while len(chunk) < chunk_size:
            try:
                item = next(iterable)
            except StopIteration:
                done = True
                break
            chunk.append(item)
        try:
            job_queue.put(chunk, False)
        except Full:
            break
    procs = []

    counter = Counter()
    for i in range(num_procs):
        p = ScoreWorker(job_queue, scored_queue, function, counter, stopped)
        procs.append(p)
        p.start()
    val = 0
    done = False
    while not done:
        if stop_check is None:
            break
        if stop_check is not None and stop_check():
            stopped.stop()
            time.sleep(1)
            break
        chunk = []
        while len(chunk) < chunk_size:
            try:
                item = next(iterable)
            except StopIteration:
                done = True
                break
            chunk.append(item)
        job_queue.put(chunk)

        if call_back is not None:
            value = counter.value()
            call_back(value)
    job_queue.join()
    time.sleep(2)
    if debug:
        print('queueadder joined!')
    return_list = list()
    error = None
    while True:
        try:
            l = scored_queue.get(timeout=2)
        except Empty:
            break
        if isinstance(l, Exception):
            error = l
        else:
            return_list.extend(l)
    if debug:
        print('emptied result queue')
    for p in procs:
        p.join()
    if error is not None:
        raise (error)
    if debug:
        print('joined')
        print(len(return_list))
    return return_list
Пример #54
0
def IFN_2Dscan(modelfile, param1, param2, t_list, spec, custom_params=False,
                  cpu=None, doseNorm=1, suppress=False, verbose=1):
    # initialization
    jobs = Queue()
    result = JoinableQueue()
    if cpu == None or cpu >= cpu_count():
        NUMBER_OF_PROCESSES = cpu_count()-1
    else:
        NUMBER_OF_PROCESSES = cpu
    if verbose != 0: print("Using {} threads".format(NUMBER_OF_PROCESSES))
    # build task list
    params=[]
    if verbose != 0: print("Building tasks")
    if type(custom_params) == list:
        for val1 in param1[1]:
            for val2 in param2[1]:
                params.append([[param1[0],val1],[param2[0],val2]]+[c for c in custom_params])        
    else:
        for val1 in param1[1]:
            for val2 in param2[1]:
                params.append([[param1[0],val1],[param2[0],val2]])        

    # Write modelfile
    imported_model = __import__(modelfile,fromlist=['ifnmodels'])
    py_output = export(imported_model.model, 'python')
    with open('ODE_system.py','w') as f:
        f.write(py_output)
				
    tasks = [[modelfile, t_list, spec, p] for p in params]
    # put jobs on the queue
    if verbose != 0: print("There are {} tasks to compute".format(len(params)))
    if verbose != 0: print("Putting tasks on the queue")
	
    for w in tasks:
        jobs.put(w)
		
    if verbose != 0: print("Computing scan")
	
    # start up the workers
    [Process(target=IFN_2Dscan_helper, args=(i, jobs, result)).start()
            for i in range(NUMBER_OF_PROCESSES)]
    
    # pull in the results from each worker
    pool_results=[]
    for t in range(len(tasks)):
        r = result.get()
        pool_results.append(r)
        result.task_done()
    # tell the workers there are no more jobs
    for w in range(NUMBER_OF_PROCESSES):
        jobs.put(None)
    # close all extra threads
    result.join()
    jobs.close()
    result.close()
    if verbose != 0: print("Done scan")
    response_image = image_builder(pool_results, doseNorm, (len(param1[1]),len(param2[1])))
    # plot heatmap if suppress==False
    if suppress==False:
        	IFN_heatmap(response_image, "response image - {}".format(param1[0]), param2[0])
    #return the scan 
    return response_image
Пример #55
0
def test_model():

    flags.entTotal = len(data_loader.ent_dict)
    flags.relTotal = len(data_loader.rel_dict)
    test = Test(test_data_size=len(data_loader.test_data_list),
                loader=data_loader)
    evaluation_queue = JoinableQueue()
    result_queue = Queue()
    for i in range(flags.num_worker):
        worker = Process(target=worker_func,
                         args=(evaluation_queue, result_queue, test))
        worker.start()
        print("work %d start!" % i)

    with tf.Graph().as_default():
        with tf.Session() as sess:

            if flags.model == "dkrl":
                model = DKRL(flags,
                             data_loader.lengths,
                             data_loader.vocab2id,
                             is_training=False,
                             desciption_data=data_loader.get_all_description(),
                             lengths=data_loader.get_all_content_len())
            elif flags.model == "transE":
                model = TransE(flags)

            saver = tf.train.Saver(max_to_keep=flags.num_checkpoint)
            # saver.restore(sess,"./res-dkrl/400-model.tf")
            saver.restore(sess, "./res-transe/500-model.tf")
            if flags.model == "dkrl":
                sess.run([model.get_ent_cnn_embedding()])

            l_raw_mean_rank = float(0.0)
            l_filter_mean_rank = float(0.0)
            l_raw_hit_10 = float(0.0)
            l_filter_hit_10 = float(0.0)

            r_raw_mean_rank = 0.0
            r_filter_mean_rank = 0.0
            r_raw_hit_10 = 0.0
            r_filter_hit_10 = 0.0

            def test_step(h, r, t, test_words, content_len):
                feedDict = {
                    model.test_h: h,
                    model.test_r: r,
                    model.test_t: t
                    # model.test_h_words:test_words[0],
                    # model.test_t_words:test_words[1],
                    # model.test_h_content_len: content_len[0],
                    # model.test_t_content_len: content_len[1]
                }
                res = sess.run([model.predict], feed_dict=feedDict)
                return res

            evaluation_count = 0
            for data in data_loader.get_predict_instance():

                if test.index == len(data_loader.test_data_list):
                    break
                temp_h, temp_t, item, head_words, tail_words, head_content_len, tail_content_len = data

                predict_h = test_step(temp_h[:, 0], temp_h[:, 2], temp_h[:, 1],
                                      head_words, head_content_len)
                predict_t = test_step(temp_t[:, 0], temp_t[:, 2], temp_t[:, 1],
                                      tail_words, tail_content_len)

                evaluation_queue.put(
                    (predict_h[0], temp_h, predict_t[0], temp_t, item))
                evaluation_count += 1
                # test.test_head(predict_h[0], temp_h, item)
                # test.test_tail(predict_t[0], temp_t, item)
                # test.index += 1
                # test.Print()
            for i in range(flags.num_worker):
                evaluation_queue.put(None)

            print("waiting for worker finishes their work")
            evaluation_queue.join()
            print("all worker stopped.")

            index = evaluation_count
            while evaluation_count > 0:

                evaluation_count = evaluation_count - 1
                l, r = result_queue.get()
                l_r_rank, l_f_rank, l_r_hit_10, l_f_hit_10 = l
                r_r_rank, r_f_rank, r_r_hit_10, r_f_hit_10 = r

                l_raw_mean_rank += l_r_rank
                r_raw_mean_rank += r_r_rank

                l_filter_mean_rank += l_f_rank
                r_filter_mean_rank += r_f_rank

                l_raw_hit_10 += l_r_hit_10
                r_raw_hit_10 += r_r_hit_10

                l_filter_hit_10 += l_f_hit_10
                r_filter_hit_10 += r_f_hit_10

            print(
                "l_raw_mean_rank {:.3f} l_filter_mean_rank {:.3f} l_raw_hit_10 {:.5f} l_filter_hit_10 {:.5f}"
                .format(l_raw_mean_rank / index, l_filter_mean_rank / index,
                        l_raw_hit_10 / index, l_filter_hit_10 / index))

            print(
                "r_raw_mean_rank {:.3f} r_filter_mean_rank {:.3f} r_raw_hit_10 {:.5f} r_filter_hit_10 {:.5f}"
                .format(r_raw_mean_rank / index, r_filter_mean_rank / index,
                        r_raw_hit_10 / index, r_filter_hit_10 / index))
Пример #56
0
class ParallelSampler():
    """
        Generate rollouts by N parallel Sampler Processes
        1. Sampler process runs async, generates rollouts by exec policy
        on each own random seeded Env
        2. Sampler writes rollouts to Queue to communicate with Agent process
        3. Sampler reads policy weights from Queue updated by Agent Process
    """

    def __init__(self,
                 n_sampler,
                 env_name,
                 policy,
                 max_step=1000,
                 batch_size=10000,
                 animate=False):
        """
            n_sampler: num of sampler processes to generate rollouts
            env_name: ai gym environment, e.g. 'HalfCheetah-V2'
            policy: policy object with ops
            max_step: maximum sample size of a episode
            batch_size: number of steps in a training batch
            animate: boolean, True uses env.render() method to animate episode
        """
        # Queue and Event
        self.tasks = JoinableQueue()
        self.results = Queue()
        self.weights_ready_event = Event()

        self.policy = policy
        self.n_sampler = n_sampler
        self.batch_size = batch_size
        self.clear_rollouts()

        self.samplers = []
        #TODO, add monitor in one process
        for sid in range(self.n_sampler):
            self.samplers.append(
                Sampler(sid, self.tasks, self.results,
                        self.weights_ready_event, env_name, policy,
                        max_step, batch_size, animate))
        for sampler in self.samplers:
            # each sampler start running async
            sampler.start()

    def set_policy_weights(self, weights):
        """
            save policy weights to tasks Queue,
            signal each sampler process to assign the weights
        """
        self.weights_ready_event.clear()
        for i in range(self.n_sampler):
            self.tasks.put(weights)
        self.tasks.join()
        self.weights_ready_event.set()

    def gen_rollouts(self):
        """
            N Sampler Processes to generate rollouts in parallel,
            for efficiency, each Sampler collects one rollout (episode)
        """
        start = time.time()
        for i in range(self.n_sampler):
            # task to collect experience for each sampler
            self.tasks.put(1)
        # wait for experience collection tasks finishing
        self.tasks.join()

        self.clear_rollouts()
        print('reading result')
        total_steps = 0
        for i in range(self.n_sampler):
            res = self.results.get()
            total_steps += self.add_rollouts(res)
        #print('running {} min to collect total steps {}'.format(
            #(time.time() - start) / 60.0, total_steps))
        return self.rollouts

    def clear_rollouts(self):
        self.rollouts = []

    def add_rollouts(self, to_add):
        """
            add rollouts: self.rollouts += to_add
            self.rollouts: list of rollout (map)
            rollout: map of {'observers' : NumPy array of states from episode
                             'actions' : NumPy array of actions from episode
                             'rewards' : NumPy array of (un-discounted) rewards from episode
                             'unscaled_obs' : NumPy array of (un-discounted) rewards from episode
                             }
            input:
                to_add: list of rollout
            output:
                n_steps in to_add rollouts
        """
        n_steps = 0
        for rollout in to_add:
            self.rollouts.append(rollout)
            n_steps += rollout['observes'].shape[0]
        return n_steps

    def exit(self):
        """
            task: sampler finish
        """
        for i in range(self.n_sampler):
            self.tasks.put(2)
Пример #57
0
from multiprocessing import Process, JoinableQueue
import time

queue = JoinableQueue()


def put():
    for i in range(10):
        print('添加元素: {}'.format(i))
        queue.put(i)


def get():
    while True:
        time.sleep(0.1)
        print('获取元素: {}'.format(queue.get()))
        queue.task_done()


p1 = Process(target=put)
p1.daemon = True
p1.start()

p2 = Process(target=get)
p2.daemon = True
p2.start()

time.sleep(0.5)  # 由于进程启动比较慢, 所以导致字进程任务还没有执行, 就来到这里了, 程序就提前结束了,解决方案: 稍微等待
queue.join()  # 让主进程等待队列任务完成
Пример #58
0
            # Push output df to write queue
            write_queue.put((os.path.join(output_dir, level + "_quantiles.csv"), q_df))

    pool = Pool(processes=args.nprocs)
    for _ in tqdm.tqdm(
        pool.imap_unordered(_process_date, dates),
        total=len(dates),
        desc="Postprocessing dates",
        dynamic_ncols=True,
    ):
        pass
    pool.close()
    pool.join()  # wait until everything is done

    to_write.join()  # wait until queue is empty
    to_write.put(None)  # send signal to term loop
    to_write.join()  # wait until write_thread handles it
    write_thread.join()  # join the write_thread

    # sort output csvs
    if not args.no_sort:
        for a_level in args.levels:
            filename = os.path.join(output_dir, a_level + "_quantiles.csv")
            logging.info("Sorting output file " + filename + "...")
            out_df = pd.read_csv(filename)

            # TODO we can avoid having to set index here once readable_column names is complete
            # set index and sort them
            out_df = out_df.set_index([a_level, "date", "quantile"]).sort_index()
            # sort columns alphabetically
Пример #59
0
class MCTSParallelPlayer(Agent):
    def __init__(self,
                 env,
                 sim_count=16,
                 trade_off=3,
                 neighbor_range=1,
                 process_num=0):
        self.sim_count = sim_count
        self.neighborhood = lambda loc: itertools.product(
            range(loc[0] - neighbor_range, loc[0] + neighbor_range + 1),
            range(loc[1] - neighbor_range, loc[1] + neighbor_range + 1))
        self.env = env
        self.trade_off = trade_off
        self.process_num = process_num if process_num else cpu_count()
        self.parallel()
        self.reset()

    def parallel(self):
        self.task_queue = JoinableQueue(1)
        self.result_queue = JoinableQueue(self.process_num)
        self.workers = [
            Process(target=self.rollout_worker,
                    args=(self.env, self.task_queue, self.result_queue))
            for _ in range(self.process_num)
        ]
        for worker in self.workers:
            worker.start()
        self.update_thread = Thread(target=self.update_statics)
        self.update_thread.start()

    def reset(self):
        self.count = KeyHashDefaultDict(int)
        self.uob = KeyHashDefaultDict(int)
        self.win = KeyHashDefaultDict(int)
        self.child_info = KeyHashDefaultDict(list)
        self.father_info = KeyHashDefaultDict()

    def get_winner(self, state):
        return self.env.get_winner(state)

    def hash_convert(self, state):
        board, player = state
        return (strfboard(board), player)

    def simulation(self, state):
        leaf_state = self.select(state)
        if self.get_winner(leaf_state) != None:
            self.back_propagate(leaf_state, self.get_winner(leaf_state))
        else:
            if self.count[leaf_state] + self.uob[leaf_state] == 0:
                self.rollout(leaf_state)
            else:
                self.expand(leaf_state)
                next_state = random.choice(self.child_info[leaf_state])
                if self.get_winner(next_state) != None:
                    self.back_propagate(next_state,
                                        self.get_winner(next_state))
                else:
                    self.rollout(next_state)

    def select(self, state):
        cur_state = state
        while cur_state in self.child_info:
            self.uob[cur_state] += 1
            childs = self.child_info[cur_state]
            unexplored_childs = [
                child for child in childs
                if self.count[child] + self.uob[child] == 0
            ]
            if unexplored_childs:
                cur_state = random.choice(unexplored_childs)
            else:
                ucbs = [
                    self.win[next_state] / (self.count[next_state] + epision) +
                    np.sqrt(self.trade_off * np.log(self.count[cur_state] +
                                                    self.uob[cur_state]) /
                            (self.count[next_state] + self.uob[next_state]))
                    for next_state in childs
                ]
                max_ucb = max(ucbs)
                best_childs = [
                    child for child, ucb in zip(childs, ucbs) if ucb == max_ucb
                ]
                cur_state = random.choice(best_childs)
        return cur_state

    def expand(self, state):
        board, player = state
        cur_state = state
        self.uob[cur_state] += 1
        if np.any(board != EMPTY):
            neighbor_valid_actions = reduce(operator.concat, [[
                act for act in self.neighborhood(action)
                if 0 <= act[0] < board.shape[0]
                and 0 <= act[1] < board.shape[1] and board[act] == EMPTY
            ] for action in np.argwhere(board != EMPTY)])
        else:
            neighbor_valid_actions = [
                (np.array(board.shape) / 2).astype(np.int8)
            ]
        for action in neighbor_valid_actions:
            next_state, winner, done, _ = self.env.next_step(cur_state, action)
            if done:
                self.child_info[cur_state] = [next_state]
                self.father_info[next_state] = cur_state
                break
            self.child_info[cur_state].append(next_state)
            self.father_info[next_state] = cur_state

    def expand_worker(self, state):
        pass

    def rollout(self, state):
        self.uob[state] += 1
        pre_actions = []
        cur_state = state
        while self.father_info[cur_state]:
            pre_actions.append(
                np.argwhere(
                    self.father_info[cur_state][0] - cur_state[0] != EMPTY)[0])
            cur_state = self.father_info[cur_state]
        self.task_queue.put(
            {
                "mode": "rollout",
                "node": state,
                "pre_actions": pre_actions
            },
            block=True)

    @staticmethod
    def rollout_worker(env, task_q, result_q):
        trans = Transfer()
        while True:
            data = task_q.get(True)
            state = data["node"]
            pre_actions = data["pre_actions"]
            board, player = state
            while True:
                vaild_actions = list(zip(*np.where(board == 0)))
                if vaild_actions:
                    # random_action = random.choice(vaild_actions)
                    action = trans.decide(pre_actions)
                    (board, player), winner, done, info = env.next_step(
                        (board, player), action)
                else:
                    result_q.put((state, random.choice([WHITE, BLACK])),
                                 block=True)
                    task_q.task_done()
                    break
                if done:
                    result_q.put((state, winner), block=True)
                    task_q.task_done()
                    break

    def back_propagate(self, state, winner):
        cur_state = state
        while cur_state != None:
            self.uob[cur_state] -= 1
            self.count[cur_state] += 1
            if winner == cur_state[1]: self.win[cur_state] += 1
            cur_state = self.father_info[cur_state]

    def update_statics(self):
        while True:
            data = self.result_queue.get(True)
            self.back_propagate(*data)
            self.result_queue.task_done()

    def closeout(self):
        self.task_queue.join()
        self.result_queue.join()

    def decide(self, state):
        board, player = state
        self.father_info[state] = None
        # while self.count[state] < self.sim_count:
        for i in range(self.sim_count):
            self.simulation(state)
        self.closeout()
        childs = self.child_info[state]
        win_rates = [
            self.win[next_state] / (self.count[next_state] + epision)
            for next_state in childs
        ]
        max_win_rate = max(win_rates)
        best_childs = [
            child for child, win_rate in zip(childs, win_rates)
            if win_rate == max_win_rate
        ]
        best_child = random.choice(best_childs)
        return np.argwhere(best_child[0] - board)[0]
Пример #60
0
    def _parallelly_make_dataset(self):
        import multiprocessing
        from multiprocessing import Process
        from multiprocessing import JoinableQueue as Queue

        name_file = '{}/video_list.npy'.format(self.loc)
        len_file = '{}/video_lengths.npy'.format(self.loc)

        if isfile(name_file):
            video_list = np.load(name_file)
            video_lengths = np.load(len_file)
            return video_list, video_lengths

        q = Queue()
        qvideo_list = Queue()

        fnames_list = []
        for root, _, fnames in tqdm(os.walk(self.root)):
            for fname in sorted(fnames):
                fnames_list.append(os.path.join(root, fname))

        def parallel_worker(fnames_chunk):
            item = q.get()
            for fname in tqdm(fnames_chunk):
                if has_file_allowed_extension(fname, VIDEO_EXTENSION):
                    video_path = fname
                    vc = cv2.VideoCapture(video_path)
                    length = int(vc.get(cv2.CAP_PROP_FRAME_COUNT))
                    if length > 0 and vc.isOpened():
                        qvideo_list.put((video_path, length))
                        qvideo_list.task_done()
                    vc.release()
            q.task_done()

        processes = 32
        n = len(fnames_list)
        chunk = int(n / processes)
        if chunk == 0:
            chunk = 1
        fnames_chunks = [fnames_list[i*chunk:(i+1)*chunk] \
                        for i in range((n + chunk - 1) // chunk)]
        for i in range(processes):
            q.put(i)
            multiprocessing.Process(target=parallel_worker,
                                    args=(fnames_chunks[i], )).start()

        q.join()
        qvideo_list.join()

        video_list = []
        video_lengths = []

        while qvideo_list.qsize() != 0:
            video, length = qvideo_list.get()
            video_list.append(video)
            video_lengths.append(length)

        np.save(name_file, video_list)
        np.save(len_file, video_lengths)

        return video_list, video_lengths