Пример #1
0
def main():
    jobs = Queue()
    result = JoinableQueue()
    NUMBER_OF_PROCESSES = cpu_count()

    tasks = ["1", "2", "3", "4", "5"]

    for w in tasks:
        jobs.put(w)

    [
        Process(target=work, args=(i, jobs, result)).start()
        for i in range(NUMBER_OF_PROCESSES)
    ]

    print('starting workers')

    for t in range(len(tasks)):
        r = result.get()
        time.sleep(0.5)
        print(r)
        result.task_done()

    for w in range(NUMBER_OF_PROCESSES):
        jobs.put(None)

    result.join()
    jobs.close()
    result.close()
def worker(node: str, worker_name: str, queue_calls: JoinableQueue,
           queue_error: Queue, queue_success: Queue):
    """ Consumes some data from queue_calls and works on it."""
    connection = NodeCall(node)
    counter = 0
    start = time.perf_counter()
    print("Started worker {0}!".format(worker_name))
    for args in iter(queue_calls.get, sentinel):
        queue_calls.task_done()
        counter += 1
        if not counter % 100:
            print(f"Worker {worker_name} done {counter} jobs.")


#        print("Worker {0} got {1} args.".format(worker_name, args))
        result = connection.call_wrapper(*args)
        stop = time.perf_counter()
        if isinstance(result, dict) and result.get('error', ''):
            queue_error.put(result)
        else:
            queue_success.put({args[1]: stop - start})
        start = stop

    print(f"Worker {worker_name} done {counter} jobs.")
    queue_calls.task_done()
    print(f"{queue_calls.qsize()} the approximate size of the queue.")
Пример #3
0
def consumer(in_queue: JoinableQueue, out_queue: JoinableQueue):
    while True:
        item = in_queue.get()
        sleep(0.5)
        s = str(item)
        out_queue.put(s)
        in_queue.task_done()
Пример #4
0
def producer(in_queue: JoinableQueue):
    while True:
        item = in_queue.get()
        sleep(0.5)
        n = int(item)
        print(n)
        in_queue.task_done()
class QueueTask:
    def __init__(self):
        self.queue = JoinableQueue()
        self.event = Event()
        atexit.register( self.queue.join )

        process = Process(target=self.work)
        process.daemon = True
        process.start()


    def work(self):
        while True:
            func, args, wait_for = self.queue.get()

            for evt in wait_for: 
                evt.wait()
            func(*args)
            self.event.set()

            self.queue.task_done()


    def enqueue(self, func, args=[], wait_for=[]):
        self.event.clear()
        self.queue.put( (func, args, wait_for) )

        return self.event 
Пример #6
0
class AlarmExecutor:
    def __init__(self):
        self.queue = JoinableQueue(10)
        self.running = False
        self.t = Thread(target=self._run, name="AlarmExecutor")

    def _run(self):
        while self.running:
            try:
                alarm = self.queue.get(block=True, timeout=1)
                alarm.execute() 
                logging.debug("Alarm executed")
                self.queue.task_done()       
            except Queue.Empty:
                continue
            
    def start(self):
        logging.debug("Starting alarm executor")
        self.running = True
        self.t.start()

    def stop(self):
        if self.running:
            logging.debug("Stoppping alarm executor")
            self.running = False
            self.t.join()
        else:
            logging.debug("Attempted to stop alarm executor when it is not running")
Пример #7
0
    def _drain_and_join_queue(q: mp.JoinableQueue, join: bool = True) -> None:
        """
        Drains a queue completely, such that it is joinable

        :param q: Queue to join
        :param join: Whether to join the queue or not
        """
        # Do nothing when it's not set
        if q is None:
            return

        # Call task done up to the point where we get a ValueError. We need to do this when child processes already
        # started processing on some tasks and got terminated half-way.
        n = 0
        try:
            while True:
                q.task_done()
                n += 1
        except ValueError:
            pass

        try:
            while not q.empty() or n != 0:
                q.get(block=True, timeout=1.0)
                n -= 1
        except (queue.Empty, EOFError):
            pass

        # Join
        if join:
            q.join()
Пример #8
0
def worker(q: JoinableQueue, i: int, output, print_lock: Lock,
           FLAGS: Tuple[Any]) -> None:
    """Retrieves files from the queue and annotates them."""
    if FLAGS.in_memory:
        with open(FLAGS.alias_db, 'rb') as f:
            alias_db = pickle.load(f)
        with open(FLAGS.relation_db, 'rb') as f:
            relation_db = pickle.load(f)
        with open(FLAGS.wiki_db, 'rb') as f:
            wiki_db = pickle.load(f)
    else:
        alias_db = SqliteDict(FLAGS.alias_db, flag='r')
        relation_db = SqliteDict(FLAGS.relation_db, flag='r')
        wiki_db = SqliteDict(FLAGS.wiki_db, flag='r')

    annotator = Annotator(alias_db,
                          relation_db,
                          wiki_db,
                          distance_cutoff=FLAGS.cutoff,
                          match_aliases=FLAGS.match_aliases,
                          unmatch=FLAGS.unmatch,
                          prune_clusters=FLAGS.prune_clusters)
    while True:
        logger.debug('Worker %i taking a task from the queue', i)
        json_data = q.get()
        if json_data is None:
            break
        annotation = annotator.annotate(json_data)
        print_lock.acquire()
        output.write(json.dumps(annotation) + '\n')
        print_lock.release()
        q.task_done()
        logger.debug('Worker %i finished a task', i)
Пример #9
0
def calculate_set(num_processes):
    todo_queue = JoinableQueue()
    results_queue = JoinableQueue()

    # setup and launch workers
    # we'll make them daemon processes so they shut down automatically when this process exits, but
    # we'll also shut them down ourselves when we finish
    workers = [
        Process(target=worker, args=(todo_queue, results_queue))
        for i in xrange(num_processes)
    ]
    for individual in workers:
        individual.daemon = True
        individual.start()

    result = numpy.zeros([ny, nx])
    for i in xrange(ny):
        y = i * dy + ylo
        for j in xrange(nx):
            x = j * dx + xlo
            todo_queue.put((x, y, i, j))
    todo_queue.join()

    while not results_queue.empty():
        i, j, val = results_queue.get()
        result[i, j] = val
        results_queue.task_done()

    # shutdown the compute processes
    for individual in workers:
        individual.terminate()

    return result
Пример #10
0
class AlarmExecutor:
    def __init__(self):
        self.queue = JoinableQueue(10)
        self.running = False
        self.t = Thread(target=self._run, name="AlarmExecutor")

    def _run(self):
        while self.running:
            try:
                alarm = self.queue.get(block=True, timeout=1)
                alarm.execute()
                logging.debug("Alarm executed")
                self.queue.task_done()
            except queue.Empty:
                continue

    def start(self):
        logging.debug("Starting alarm executor")
        self.running = True
        self.t.start()

    def stop(self):
        if self.running:
            logging.debug("Stopping alarm executor")
            self.running = False
            self.t.join()
        else:
            msg = "Attempted to stop alarm executor when it is not running"
            logging.debug(msg)
Пример #11
0
def threaded_db_input(pipe: mp.JoinableQueue, len_seeds: int) -> NoReturn:
    """Runs DB operation in a separate process

    Args:
        :param pipe: connection with the parent.
        :param len_seeds: total number of seeds.

    Returns:
    Executes the queries from the queue.
    """
    con, dbname = get_db_con(len_seeds)
    stmt = pipe.get(timeout=3600)
    pid = None
    while stmt is not None:
        try:
            pid.join()
        except Exception as e:
            if pid:
                print(e)
        # try:
        # con = con = lite.connect(dbname, timeout=3000, check_same_thread=False, isolation_level=None)
        # con.commit()
        pid = mp.Process(target=stmt[0], args=(con, ) + stmt[1])
        pid.start()
        # except Exception as e:
        #     print('Found exception in db input:')
        #     print(e)
        #     print('Arguments that caused exception: ')
        #     print(stmt)
        # finally:
        pipe.task_done()
        stmt = pipe.get()
    print('DB thread exiting...')
    con.close()
Пример #12
0
def search8(q, path):
    jobs = Queue()
    result = JoinableQueue()
    NUMBER_OF_PROCESSES = cpu_count()

    job_count = 0
    for f in os.scandir('data'):
        jobs.put(f.path)
        job_count = job_count + 1

    [
        Process(target=work, args=(i, q, jobs, result)).start()
        for i in range(NUMBER_OF_PROCESSES)
    ]

    matches = []
    for t in range(job_count):
        r = result.get()
        result.task_done()
        if r:
            matches.append(r)

    matches.sort()

    for w in range(NUMBER_OF_PROCESSES):
        jobs.put(None)

    result.join()
    jobs.close()
    result.close()

    return matches
Пример #13
0
class Queue:
    def __init__(self):
        self._queue = JoinableQueue()

    def put(self, element):
        if self._queue is not None:
            self._queue.put(element)

    def get(self):
        if self._queue is not None:
            try:
                return self._queue.get()
            except:
                return None

    def join(self):
        if self._queue is not None:
            self._queue.join()

    def task_done(self):
        if self._queue is not None:
            self._queue.task_done()

    def unblock_gets(self):
        if self._queue is not None:
            self._queue.close()
            self._queue = JoinableQueue()
class Renderer:
    queue = None

    def __init__(self, nb_workers=2):
        self.queue = JoinableQueue()
        self.processes = [Process(target=self.upload) for i in range(nb_workers)]
        for p in self.processes:
            p.start()

    def render(self, item):
        self.queue.put(item)

    def upload(self):
        while True:
            item = self.queue.get()
            if item is None:
                break

            # process your item here

            self.queue.task_done()

    def terminate(self):
        """ wait until queue is empty and terminate processes """
        self.queue.join()
        for p in self.processes:
            p.terminate()
Пример #15
0
def main():
    from multiprocessing import JoinableQueue
    from genmod.vcf import vcf_header
    from genmod.utils import annotation_parser
    parser = argparse.ArgumentParser(description="Parse different kind of pedigree files.")
    parser.add_argument('variant_file', type=str, nargs=1 , help='A file with variant information.')
    parser.add_argument('annotation_file', type=str, nargs=1 , help='A file with feature annotations.')
    parser.add_argument('-phased', '--phased', action="store_true", help='If variant file is phased.')    
    parser.add_argument('-v', '--verbose', action="store_true", help='Increase output verbosity.')
    
    args = parser.parse_args()
    infile = args.variant_file[0]
    if args.verbose:
        print('Parsing annotationfile...')
        start_time_annotation = datetime.now()
    my_anno_parser = annotation_parser.AnnotationParser(args.annotation_file[0], 'ref_gene')
    
    if args.verbose:
        print('annotation parsed. Time to parse annotation: %s\n' % str(datetime.now() - start_time_annotation))
    
    my_head_parser = vcf_header.VCFParser(infile)
    my_head_parser.parse()
    print(my_head_parser.__dict__)
    variant_queue = JoinableQueue()
    start_time = datetime.now()        
    
    my_parser = VariantFileParser(infile, variant_queue, my_head_parser, my_anno_parser, args)
    nr_of_batches = my_parser.parse()
    print(nr_of_batches)
    for i in range(nr_of_batches):
        variant_queue.get()
        variant_queue.task_done()
    
    variant_queue.join()
    print('Time to parse variants: %s ' % str(datetime.now()-start_time))
class QueueTask:
    def __init__(self):
        self.queue = JoinableQueue()
        self.event = Event()
        atexit.register(self.queue.join)

        process = Process(target=self.work)
        process.daemon = True
        process.start()

    def work(self):
        while True:
            func, args, wait_for = self.queue.get()

            for evt in wait_for:
                evt.wait()
            func(*args)
            self.event.set()

            self.queue.task_done()

    def enqueue(self, func, args=[], wait_for=[]):
        self.event.clear()
        self.queue.put((func, args, wait_for))

        return self.event
Пример #17
0
 def generator(test_q: JoinableQueue) -> Iterator[TestBatch]:
     test = test_q.get()
     while test:
         test_q.task_done()
         yield test
         test = test_q.get()
     test_q.task_done()
Пример #18
0
def InternalSet(Achild:Queue, Bchild:Queue, outqueue:Queue):
    """Take the output of two LeafSet's and take the union."""
    logger = multiprocessing.log_to_stderr()
    logger.setLevel(logging.INFO)
    AminusB = set()
    BminusA = set()
    morestuff = True
    while morestuff:
        a = Achild.get()
        b = Bchild.get()
        logger.info("Internal:%s:%s" % (a, b))
        if a in BminusA:
            BminusA.remove(a)
        elif a not in AminusB:
            AminusB.add(a)
            outqueue.put(a)
        if b in AminusB:
            AminusB.remove(b)
        elif b not in BminusA:
            BminusA.add(b)
            outqueue.put(b)
        Achild.task_done()
        Bchild.task_done()
        if (a == SIGOBJ) or (b == SIGOBJ):
            outqueue.put(SIGOBJ)
            morestuff = False
    logger.info("internal done")
Пример #19
0
class ScheduleContainer(object):
    def __init__(self):
        self.schedule_container = JoinableQueue(maxsize=0)
        self.scheduler = schedule
        self._run = True
        worker = Thread(target=self.work)
        worker.start()

    def append(self, request_form):
        self.schedule_container.put(request_form)

    @staticmethod
    def task(temp):
        def inner():
            t.change_temp(temp)

        return inner

    def work(self):
        lock = Lock()

        while self._run:
            lock.acquire()
            if not self.schedule_container.empty():
                schedule_obj = self.schedule_container.get()
                job = self.scheduler.every()
                job.start_day = str(schedule_obj.day)
                job.unit = 'weeks'
                job.at(str(schedule_obj.time)).do(self.task(schedule_obj.temp))
                print('schedule made into job')
                schedule_obj.save()
                self.schedule_container.task_done()
            lock.release()
            schedule.run_pending()
            time.sleep(1)
Пример #20
0
class Requester(object):
    def __init__(self, num_workers=2):
        self.queue = JoinableQueue()
        self.processes = [
            Process(target=self.request) for _ in range(num_workers)
        ]

    def add_url(self, url):
        self.queue.put(url)

    def request(self):
        url = self.queue.get()
        while url is not None:

            # TODO - actually send a request here

            self.queue.task_done()

            url = self.queue.get()

    def terminate(self):

        # send the terminate command
        for _ in self.processes:
            self.queue.put(None)

        # wait for processing to finish
        for p in self.processes:
            p.join()
def main():
    jobs = JoinableQueue()
    result = JoinableQueue()


    numToProcess = -1
    scores = pd.DataFrame(columns=['query','fmeasure','precision','recall',
                                   'size','maxDistance','topHits',"contextSteps"])

    print len(datasets)

    for key in datasets:
        jobs.put(key)

    processed_count = Counter()
        
    for i in xrange(NUMBER_OF_PROCESSES):
        p = Process(target=work, args=(i, jobs, result, processed_count))
        p.daemon = True
        p.start()

    #work(1, jobs, result, processed_count)

    automated_annotations = {}
    distances = {}

    jobs.join()

    dataset_index = collections.defaultdict(set)
    annotated_datasets = set()
    while not result.empty():
        dataset, classes = result.get()
        if len(classes) == 0:
            annotated_datasets.add(dataset)
        for c in classes.keys():
            dataset_index[c].add(dataset)
            owl_class = Class(c, graph=graph)
            for parent in owl_class.parents:
                dataset_index[parent.identifier].add(dataset)
        result.task_done()

    print '\n'
    
    for query, c in queries.items():
        manual = ground_truth[query]
        automated = dataset_index[c]
        hits = manual & automated
        misses = manual - automated
        precision = np.nan if len(automated) == 0 else float(len(hits)) / len(automated)
        recall = np.nan if len(manual) == 0 else float(len(hits)) / len(manual)
        if precision != 0 or recall != 0:
            fmeasure = 0 if np.isnan(precision) or np.isnan(recall) else 2 * (precision * recall) / (precision + recall)
        else:
            fmeasure = 0
        scores = scores.append(dict(query=query, size=len(manual), precision=precision, recall=recall, fmeasure=fmeasure,topHits=topHits, maxDistance=maxDistance, contextSteps = context_steps),
                        ignore_index=True)
        print "Hits for", query, c
        print '\n'.join(sorted(hits))
    print scores
    print "Annotated", len(annotated_datasets), "datasets."
Пример #22
0
def Cvpointgray(imgs: multiprocessing.JoinableQueue, conn, l: multiprocessing.Lock):
    while True:
        try:
            img, d = imgs.get()
        except Empty:
            break
        x = []
        mean = np.mean(img)
        for j in range(img.shape[0]):
            x0 = 0
            y0 = 0
            ym = []
            for i in range(1, img.shape[1]):
                ym.append(img[j, i])
                x0 = x0 + int(img[j, i]) ** 4
                y0 = y0 + int(img[j, i]) ** 4 * i
            if x0 == 0 or np.mean(ym) < mean:
                y = 0
            else:
                y = y0 / x0
            y = round(y)
            x.append(y)
        print(d)
        l.acquire()
        conn.send([d, x])
        l.release()
        imgs.task_done()
Пример #23
0
class Thread_Pool_Manager(object):
    def __init__(self, thread_num=cpu_count()):
        self.thread_num = thread_num
        print(thread_num)
        self.work_queue = JoinableQueue()
        self.work_num = Semaphore(0)
        self.mutex = Lock()

    def start_threads(self):
        for i in range(self.thread_num):
            thread = Process(target=self.do_job)
            thread.daemon = True  # set thread as daemon
            thread.start()

    def do_job(self):
        global Numbers
        while True:
            # print(1)
            self.work_num.acquire()
            with self.mutex:
                print(1, self.work_queue.qsize())
                thread_job = self.work_queue.get()
                print(0, self.work_queue.qsize())
            thread_job.do_job(self.work_queue, self.work_num)
            print(self.work_queue.qsize())
            self.work_queue.task_done()

    def join(self):
        self.work_queue.join()

    def add_job(self, job):
        self.work_queue.put(job)
        self.work_num.release()
Пример #24
0
class StoreWorker(Process):
    def __init__(self, cache=10):
        super(StoreWorker, self).__init__()
        self.store_q = JoinableQueue()
        self.cache = cache

    def run(self):
        while True:
            template = self.store_q.get()

            if template is None:
                print('Store received None')
                self.store_q.task_done()
                break

            # Set up the environment before storing the objects.
            self.res = self._handle(template)
            '''
            if template.objects:
                # Call to the functions in this class
                func = getattr(self, template.func, None)
                if func:
                    result = func(template.objects, **template.kws)

                    if not result:
                        print('Failed with template', template.name)
                        print(template)
            '''
            self.store_q.task_done()
        print('stopping store')

    def create(self, objects, *args, **kwargs):
        '''
        Writes a list of objects to the database specified in the template.
        This wraps around functions of the database wrappers.
        '''
        raise NotImplementedError

    def read(self, *args, **kwargs):
        '''
        Read an entry from the database.
        '''
        raise NotImplementedError

    def update(self, *args, **kwargs):
        '''
        Performs an update to the database based on the key specified.
        If no key is specified, the url of object is used.
        By default creates an object in the database, if none exists.
        '''
        raise NotImplementedError

    def delete(self, *args, **kwargs):
        '''
        Deletes an entry from the database.
        Not implemented yet in any of the DatabaseAdapters.
        '''
        raise NotImplementedError
Пример #25
0
def queueManager(numProc, myList, function, *args):
	'''queueManager(numProc, myList, function, *args):
	generic function used to start worker processes via the multiprocessing Queue object
	numProc - number of processors to use
	myList - a list of objects to be iterated over
	function - target function
	*args - additional arguments to pass to function

	Return - an unordered list of the results from myList
	'''
	qIn = Queue()
	qOut = JoinableQueue()
	if args:
		arguments = (qIn, qOut,) + args
	else:
		arguments = (qIn, qOut,)
	results = []
	
	# reduce processer count if proc count > files
	
	i = 0
	for l in myList:
		qIn.put((i,l))
		i += 1

	for _ in range(numProc):
		p = Process(target = function, args = arguments).start()
	sys.stdout.write("Progress: {:>3}%".format(0)
)
	curProgress = 0
	lastProgress = 0
	while qOut.qsize() < len(myList):
		#sys.stdout.write("\b\b\b\b{:>3}%".format(int(ceil(100*qOut.qsize()/len(myList)))))
		curProgress = int(ceil(100*qOut.qsize()/len(myList)))
		if curProgress - lastProgress > 10:
			lastProgress += 10
			sys.stdout.write("\nProgress: {:>3}%".format(lastProgress))
			sys.stdout.flush()
	sys.stdout.write("\nProgress: {:>3}%".format(100))
	#sys.stdout.write("\b\b\b\b{:>3}%".format(100))
	sys.stdout.write("\n")
	for _ in range(len(myList)):
		# indicate done results processing
		results.append(qOut.get())
		qOut.task_done()
	#tell child processes to stop
	for _ in range(numProc):
		qIn.put('STOP')

	orderedRes = [None]*len(results)
	for i, res in results:
		orderedRes[i] = res

	qOut.join()

	qIn.close()
	qOut.close()
	return orderedRes
Пример #26
0
def worker_func(in_queue: JoinableQueue, out_queue: Queue, hr_t, tr_h):
    while True:
        dat = in_queue.get()
        if dat is None:
            in_queue.task_done()
            continue
        testing_data, head_pred, tail_pred = dat
        out_queue.put(test_evaluation(testing_data, head_pred, tail_pred, hr_t, tr_h))
        in_queue.task_done()
Пример #27
0
def process_pcap(pcap, base):
    """
    Process a pcap file
    :param pcap: pcap file as read by pyshark module
    :param base: base name of the file being processed
    :return: None
    """
    # Establish communication queues
    tasks = {}
    packets_to_process = {}
    results = JoinableQueue()

    for pkt in pcap:

        try:

            # Obtain VLAN id
            vlan_id = pkt.layers[1].id

            # Create queues for the corresponding VLAN if they do not exist
            if vlan_id not in packets_to_process.keys():
                packets_to_process[vlan_id] = JoinableQueue()

            # Create and start tasks for the corresponding VLAN if they do not exist
            if vlan_id not in tasks.keys():
                print 'Creating Analyzer for VLAN id {0}'.format(vlan_id)
                # results[vlan_id] = m.dict()
                tasks[vlan_id] = Analyzer(vlan_id, packets_to_process[vlan_id],
                                          results)
                tasks[vlan_id].start()

            # # Add package to queue for the corresponding VLAN
            # if gtp.version == '1':
            #     packets_to_process[vlan_id].put(Packet(int(gtp.version), int(gtp.message)))
            # elif gtp.version == '2':
            #     packets_to_process[vlan_id].put(Packet(int(gtp.version), int(gtp.message_type)))

            packets_to_process[vlan_id].put(Packet(pkt))

        except AttributeError as e:
            print 'AttributeError: {0}'.format(e)

        except KeyError as e:
            print 'KeyError: {0}'.format(e)

    # Add a poison pill for each consumer
    for i in tasks.keys():
        packets_to_process[i].put(None)

    # Wait for all of the tasks to finish
    for i in tasks.keys():
        packets_to_process[i].join()

    results.task_done()

    # Print results
    print_results(results, base)
def main(model_to_load):
    pass
    df = pd.read_csv(f_path + 'test.csv')
    # shuffle to get faster processing time
    df = df.sample(frac=1).reset_index(drop=True)
    img_name_array, image_category = get_img_and_landmark(df)
    data_len = image_category.shape[0]
    batch_size_o = 20
    image_queue = JoinableQueue(3)
    predict_queue = JoinableQueue(16)
    landmark_queue = JoinableQueue(8)
    model = load_model(model_to_load, {'custom_loss': custom_loss})
    print("model read completed")

    p_i = Process(target=img_reader,
                  args=(image_queue, img_name_array, data_len, batch_size_o))
    p_i.daemon = True
    p_i.start()

    c_pool = []
    for _i in range(8):
        c = Process(target=get_landmarker,
                    args=(predict_queue, landmark_queue))
        c.daemon = True
        c.start()
        c_pool.append(c)

    p1 = Process(target=predictor,
                 args=(landmark_queue, t0, image_category, img_name_array))
    p1.daemon = True

    p1.start()

    for i in range(math.ceil(data_len / batch_size_o)):
        img_array = image_queue.get(timeout=1800)
        batch_size = img_array["img_array"].shape[0]
        tmp_out_1 = model.predict(img_array["img_array"],
                                  batch_size=batch_size)
        tmp_out_flip = model.predict(img_array["img_flip_array"],
                                     batch_size=batch_size)
        image_queue.task_done()
        predict_queue.put(
            {
                'id': [
                    i for i in range(i * batch_size_o, i * batch_size_o +
                                     batch_size)
                ],
                'heatmap':
                tmp_out_1[0],
                'heatmap_flip':
                tmp_out_flip[0],
                'image_category':
                image_category[i * batch_size_o:i * batch_size_o + batch_size]
            },
            timeout=1800)

    p1.join()
Пример #29
0
class TaskManager:
    # noinspection PyPep8Naming
    def __init__(self,
                 jobs_queue_capacity: int,
                 workers_num: int,
                 WorkerClass: Worker.__class__ = Worker):
        # empty job queue
        self._queue = JoinableQueue(maxsize=jobs_queue_capacity)
        logger.info(
            f'Queue size set to accept at most {jobs_queue_capacity} before pausing job assignment.'
        )
        self.WorkerClass = WorkerClass
        self.workers_num = max_number_of_workers(workers_num)

    _workers = []

    def wake_up_workers(self):
        self._workers: List[Worker] = [
            self.WorkerClass(self._queue) for _ in range(self.workers_num)
        ]
        for worker in self._workers:
            worker.start()

    def assign_task(self, job: Task):
        self._queue.put(job)

    def stop_workers(self):
        logger.info('waiting all workers to finish')
        # usual termination condition is to put None on the queue. Queues are FIFO but from Python 3.8 docs:
        # https://docs.python.org/3.8/library/multiprocessing.html#pipes-and-queues
        # "If multiple processes are enqueuing objects, it is possible for the objects to be received at the other
        # end out-of-order. However, objects enqueued by the same process will always be in the expected order
        # with respect to each other.". So, when there's a single producer, that's not an issue; when there are many
        # producers it may happen that even if Nones are enqueued at the end of the queue, consumers pick 'em
        # before other items in the queue (breaking the FIFO assumption). In this case the workers would leave
        # before the queue is empty. To avid this, before sending Nones, it's better to wait for the queue to be
        # consumed.

        while not self._queue.empty(
        ):  # not bullet-proof as empty() and qsize() return approx. values, but it helps
            print(f"jobs waiting to be assigned: {self._queue.qsize()}")
            sleep(1)
        for _ in self._workers:
            self._queue.put(None, block=True, timeout=None)
        self._queue.join()
        logger.info('all processes_finished')

    def discard_waiting_tasks(self):
        while not self._queue.empty():
            try:
                self._queue.get(False)
            except Empty:
                continue
            self._queue.task_done()

    def number_of_waiting_tasks(self):
        return self._queue.qsize()
Пример #30
0
def worker_func(in_queue: JoinableQueue, out_queue: Queue,tr_h,hr_t,ht_r):
    while True:
        dat=in_queue.get()
        if dat is None:
            in_queue.task_done()
            continue
        testing_data,candidates,candidate_tail_pred = dat
        out_queue.put(candidate_evaluation(testing_data,candidates,candidate_tail_pred,tr_h,hr_t,ht_r))
        in_queue.task_done()
Пример #31
0
class WorkerQueue(object):

    def __init__(self, num_workers = 20):
        self.queue = Queue()
        self.pool = []
        self._setup_workers(num_workers)

    def _setup_workers(self, num_workers):
        """ Sets up the worker threads
              NOTE: undefined behaviour if you call this again.
        """
        self.pool = []

        for _ in range(num_workers):
            self.pool.append(Thread(target=self.threadloop))

        for a_thread in self.pool:
            a_thread.setDaemon(True)
            a_thread.start()


    def do(self, f, *args, **kwArgs):
        """ puts a function on a queue for running later.
        """
        self.queue.put((f, args, kwArgs))


    def stop(self):
        """ Stops the WorkerQueue, waits for all of the threads to finish up.
        """
        self.queue.put(STOP)
        for thread in self.pool:
            thread.join()


    def threadloop(self): #, finish = False):
        """ Loops until all of the tasks are finished.
        """
        while True:
            args = self.queue.get()
            if args is STOP:
                self.queue.put(STOP)
                self.queue.task_done()
                break
            else:
                try:
                    args[0](*args[1], **args[2])
                finally:
                    # clean up the queue, raise the exception.
                    self.queue.task_done()
                    #raise


    def wait(self):
        """ waits until all tasks are complete.
        """
        self.queue.join()
Пример #32
0
def worker_func(in_queue: JoinableQueue, out_queue: Queue, hr_t, tr_h):
    while True:
        dat = in_queue.get()
        if dat is None:
            in_queue.task_done()
            continue
        testing_data, head_pred, tail_pred = dat
        out_queue.put(test_evaluation(testing_data, head_pred, tail_pred, hr_t, tr_h))
        in_queue.task_done()
Пример #33
0
    def apply_mt(self, xs, parallelism, **kwargs):
        """Run the UDF multi-threaded using python multiprocessing"""
        if snorkel_conn_string.startswith('sqlite'):
            raise ValueError('Multiprocessing with SQLite is not supported. Please use a different database backend,'
                             ' such as PostgreSQL.')

        # Fill a JoinableQueue with input objects
        in_queue = JoinableQueue()
        for x in xs:
            in_queue.put(x)

        # If the UDF has a reduce step, we collect the output of apply in a
        # Queue. This is also used to track progress via the the UDF sentinel
        out_queue = JoinableQueue()

        # Keep track of progress counts
        total_count = in_queue.qsize()
        count = 0

        # Start UDF Processes
        for i in range(parallelism):
            udf = self.udf_class(in_queue=in_queue, out_queue=out_queue,
                add_to_session=(self.reducer is None), **self.udf_init_kwargs)
            udf.apply_kwargs = kwargs
            self.udfs.append(udf)

        # Start the UDF processes, and then join on their completion
        for udf in self.udfs:
            udf.start()

        while any([udf.is_alive() for udf in self.udfs]) and count < total_count:
            y = out_queue.get()

            # Update progress whenever an item was processed
            if y == UDF.TASK_DONE_SENTINEL:
                count += 1
                if self.pb is not None:
                    self.pb.update(1)

            # If there is a reduce step, do now on this thread
            elif self.reducer is not None: 
                self.reducer.reduce(y, **kwargs)
                out_queue.task_done()

            else:
                raise ValueError("Got non-sentinel output without reducer.")

        if self.reducer is None:
            for udf in self.udfs:
                udf.join()
        else:
            self.reducer.session.commit()
            self.reducer.session.close()

        # Flush the processes
        self.udfs = []
Пример #34
0
    def apply_mt(self, xs, parallelism, **kwargs):
        """Run the UDF multi-threaded using python multiprocessing"""
        if Asterisk_conn_string.startswith('sqlite'):
            raise ValueError('Multiprocessing with SQLite is not supported. Please use a different database backend,'
                             ' such as PostgreSQL.')

        # Fill a JoinableQueue with input objects
        in_queue = JoinableQueue()
        for x in xs:
            in_queue.put(x)

        # If the UDF has a reduce step, we collect the output of apply in a
        # Queue. This is also used to track progress via the the UDF sentinel
        out_queue = JoinableQueue()

        # Keep track of progress counts
        total_count = in_queue.qsize()
        count = 0

        # Start UDF Processes
        for i in range(parallelism):
            udf = self.udf_class(in_queue=in_queue, out_queue=out_queue,
                add_to_session=(self.reducer is None), **self.udf_init_kwargs)
            udf.apply_kwargs = kwargs
            self.udfs.append(udf)

        # Start the UDF processes, and then join on their completion
        for udf in self.udfs:
            udf.start()

        while any([udf.is_alive() for udf in self.udfs]) and count < total_count:
            y = out_queue.get()

            # Update progress whenever an item was processed
            if y == UDF.TASK_DONE_SENTINEL:
                count += 1
                if self.pb is not None:
                    self.pb.update(1)

            # If there is a reduce step, do now on this thread
            elif self.reducer is not None: 
                self.reducer.reduce(y, **kwargs)
                out_queue.task_done()

            else:
                raise ValueError("Got non-sentinel output without reducer.")

        if self.reducer is None:
            for udf in self.udfs:
                udf.join()
        else:
            self.reducer.session.commit()
            self.reducer.session.close()

        # Flush the processes
        self.udfs = []
Пример #35
0
def assemble(inp: mp.JoinableQueue, outp: mp.JoinableQueue):
    conn = make_connection()
    print('assembler started, looping')
    journals = dict(
        conn.execute('select JournalId, DisplayName from Journals'))
    conference_series = dict(
        conn.execute(
            'select ConferenceSeriesId, DisplayName from ConferenceSeries'))
    conference_instances = dict(
        conn.execute(
            'select ConferenceInstanceId, DisplayName from ConferenceInstances'
        ))
    cited_by_gen = generate_citation_from_gzip()
    paper_citation_id, paper_citations = next(cited_by_gen)
    for paper in iter(inp.get, 'STOP'):
        paperid = paper['id']
        # print(paperid, paper_citation_id)
        paper['author'], paper['affiliations'] = generate_author_affiliations(
            conn, paperid)
        paper['urls'] = list(generate_urls(conn, paperid))
        paper['references'] = list(generate_references(conn, paperid))
        paper['references_count'] = len(paper['references'])
        # paper['resources'] = list(generate_resources(paperid))
        cii = paper.pop('ConferenceInstanceId')
        if type(cii) is int:
            paper['conferenceinstance'] = conference_instances[cii]
        csi = paper.pop('ConferenceSeriesId')
        if type(csi) is int:
            paper['conferenceseries'] = conference_series[csi]
        ji = paper.pop('JournalId')
        if type(ji) is int:
            paper['journal'] = journals[ji]
        if paper_citation_id == paperid:
            print('found citation information')
            paper['cited_by'] = list(paper_citations)
            paper['cited_by_count'] = len(paper_citations)
            try:
                paper_citation_id, paper_citations = next(cited_by_gen)
            except StopIteration:
                print('citations were finished')
                paper_citation_id = 0
                paper_citations = []
        # elif paper_citation_id < paperid:
        #     print(f'onoz, we skipped something? pid {paperid} > {paper_citation_id}')
        #     break
        # else:
        #     print(f'seems to be in order: cited_id {paper_citation_id}, paper_id {paperid}')
        strip_empty_fields(paper)
        jsonl = json.dumps(paper, ensure_ascii=False) + '\n'
        outp.put(jsonl)
        inp.task_done()
    print('DONE, joining inp')
    inp.task_done()
    inp.join()
    outp.put('STOP')
    print('REALLY DONE')
Пример #36
0
def longest(queue: multiprocessing.JoinableQueue, nextqueue: multiprocessing.JoinableQueue):
    longest_word = ("", 0, False)
    while True:
        word_count = queue.get()
        if word_count[1] >= longest_word[1]:
            longest_word = (word_count[0], word_count[1], True)
            nextqueue.put(longest_word)
        else:
            nextqueue.put((word_count[0], word_count[1], False))
        queue.task_done()
Пример #37
0
class ImageCrawler:

    NUM_PER_FETCH = 100
    NUM_PROCESSES = 10

    def __init__(self, database_config_path):
        self.queue = JoinableQueue()
        self.logger = Logger("image_crawler")
        self.adapter = ImageStoreAdapter(database_config_path, self.logger)

    def produce(self):
        while True:
            if self.queue.empty():
                for image_id, link in self.adapter.load_undownloaded_images(
                        self.NUM_PER_FETCH):
                    self.logger.log("Producer: add new image to crawl:" +
                                    image_id + " " + link)
                    self.queue.put((image_id, link))
            time.sleep(10)

    def consume(self, process_id):
        while True:
            self.logger.log("Consumer process:" + str(process_id) +
                            " fetch new image from queue")
            if not self.queue.empty():
                image_id, link = self.queue.get()
                self.logger.log("Consumer process:" + str(process_id) +
                                " start crawling " + str(link))
                image = common_utils.page_crawl(link)
                if image != None:
                    self.logger.log(link + "crawled successfully")
                    self.adapter.store_image(image_id, image)
                else:
                    self.logger.log(link + " failed at crawling")
                    self.adapter.update_image_status(
                        image_id, ImageIndexStatus.DOWNLOAD_FAILED)
                self.queue.task_done()
                time.sleep(1)
            else:
                self.logger.log("Queue empty")
                time.sleep(10)

    def run(self):
        producer = Process(target=self.produce)
        producer.start()
        consumers = []
        for i in range(self.NUM_PROCESSES):
            consumer = Process(target=self.consume, args=(i, ))
            consumers.append(consumer)
            consumer.start()

        for consumer in consumers:
            consumer.join()
        producer.join()
        self.queue.join()
            def get_data_for_df_test(
                start_date,
                end_date,
                period,
                test,
            ):
                """
                """
                process_id = 0
                process_cnt = 0
                velo_instances = []
                velo_instances_ret = []
                queue = JoinableQueue()
                date_format = "%m/%d/%Y"
                for i in range(3):
                    end_date_o = datetime.strptime(
                        end_date[i],
                        date_format,
                    ).date()
                    start_date_o = datetime.strptime(
                        start_date[i],
                        date_format,
                    ).date()

                    process_name = "process_{:03d}".format(process_id)
                    velo_inst = Velo(
                        process_id=process_id,
                        process_name=process_name,
                        queue=queue,
                        date_id=date,
                    )

                    process = Process(target=velo_inst.run)

                    process_id += 1
                    process_cnt += 1
                    velo_instances.append(velo_inst)
                    Multiprocess.processes.append(process)

                for i in range(process_cnt):
                    Multiprocess.processes[i].start()

                for i in range(process_cnt):
                    msg_process_id = None
                    msg_from_queue = ""
                    while True:
                        msg_from_queue = queue.get()
                        msg_process_id = msg_from_queue[0]
                        velo_instances_ret.append(msg_from_queue[1])
                        queue.task_done()
                        break

                    Multiprocess.processes[msg_process_id].join()

                return velo_instances_ret
Пример #39
0
def main():
    jobs = JoinableQueue()
    result = JoinableQueue()

    print len(datasets)
    numToProcess = 10
    scores = pd.DataFrame(columns=[
        'precision', 'recall', 'fmeasure', 'numResult', 'minScore', 'topHits',
        'contentWeight', 'relationWeight'
    ])
    manual_annotations = get_manual_annotations(numToProcess)
    manual_tuples = get_ir_tuples(manual_annotations)

    for key in manual_annotations.keys():
        jobs.put(key)

    processed_count = Counter()

    for i in xrange(NUMBER_OF_PROCESSES):
        p = Process(target=work, args=(i, jobs, result, processed_count))
        p.daemon = True
        p.start()

    #work(1, jobs, result, processed_count)

    automated_annotations = {}

    jobs.join()

    while not result.empty():
        dataset, classes = result.get()
        automated_annotations[dataset] = classes
        result.task_done()

    automated_tuples = get_ir_tuples(automated_annotations)
    hits = manual_tuples & automated_tuples
    misses = manual_tuples - automated_tuples

    precision = float(len(hits)) / len(automated_tuples)
    recall = float(len(hits)) / len(manual_tuples)
    fmeasure = 2 * (precision * recall) / (precision + recall)
    # print '\t'.join([str(x) for x in [precision, recall, fmeasure,
    #                              numResult, minScore, topHits]])
    scores = scores.append(dict(precision=precision,
                                recall=recall,
                                fmeasure=fmeasure,
                                numResult=numResult,
                                minScore=minScore,
                                topHits=topHits,
                                contentWeight=contentWeight,
                                relationWeight=relationWeight),
                           ignore_index=True)

    print scores
Пример #40
0
class Analyzer(object):
	def __init__(self, data_root, working_dir, tpr, index=True, index_output='index.h5'):
		# list of analysis objects
		self.__analyses = []
		self.__working_dir = working_dir
		self.__fs = file_system.SH3FileSystem(data_root, index=True, index_output=index_output)
		self.__loader = loader.Loader(working_dir)
		self.__task_queue = JoinableQueue(8)
		self.__tpr = tpr

	def run(self):
		# start a queue of size max 8, block if no empty slots
		# populate the task queue with (analysis, xtc) items 
		for i in range(0, 8):
			p = Process(target=self.__worker)
			p.start()

		for batch in self.__fs.xtc_files():
			print "batch", batch
			for xtc in batch:
				for analysis in self.__analyses:
					print "queuing", analysis.name(), "and", xtc.name()
					self.__task_queue.put([analysis, xtc], True, None)

			print "waiting for these tasks to finish"
			self.__task_queue.join()
			print "tasks have finished"

			print "PID", os.getpid(), "loading analysis"
			for xtc in batch:
				for a in self.__analyses:
					self.__loader.load(a, xtc)	

	def add(self, analysis):
		self.__analyses.append(analysis)
	
	def remove(self, analysis):
		self.__analyses.append(analysis)

	def __worker(self):
		# TODO: use pool because it looks like the processes sometimes don't die if it fails
		# get one item from queue
		# block if queue is empty
		while True:
			try:
				# timeout after 30 seconds
				analysis,xtc = self.__task_queue.get(True, 30)
			except Empty:
				break
			else:
				analysis.run(xtc, self.__tpr)
				self.__task_queue.task_done()
Пример #41
0
 def parallel(self):
     from multiprocessing import Process, Queue, JoinableQueue
     self.ntrajs = []
     for i in range(self.cpus):
         self.ntrajs.append(min(int(floor(float(self.ntraj)
             /self.cpus)),
             self.ntraj-sum(self.ntrajs)))
     cnt = sum(self.ntrajs)
     while cnt<self.ntraj:
         for i in range(self.cpus):
             self.ntrajs[i] += 1
             cnt+=1
             if (cnt>=self.ntraj):
                 break
     self.ntrajs = np.array(self.ntrajs)
     self.ntrajs = self.ntrajs[np.where(self.ntrajs>0)]
     self.nprocs = len(self.ntrajs)
     sols = []
     processes = []
     resq = JoinableQueue()
     print "Number of cpus:", self.cpus
     print "Trying to start", self.nprocs, "process(es)."
     print "Number of trajectories for each process:"
     print self.ntrajs
     for i in range(self.nprocs):
         p = Process(target=self.evolve_serial,
                 args=((resq,self.ntrajs[i],i,self.seed*(i+1)),))
         p.start()
         processes.append(p)
     resq.join()
     cnt = 0
     while True:
         try:
             sols.append(resq.get())
             resq.task_done()
             cnt += 1
             if (cnt >= self.nprocs): break
         except KeyboardInterrupt:
             break
         except:
             pass
     resq.join()
     for proc in processes:
         try:
             proc.join()
         except KeyboardInterrupt:
             print("Cancel thread on keyboard interrupt")
             proc.terminate()
             proc.join()
     resq.close()
     return sols
Пример #42
0
	def __iter__(self):
		queue = JoinableQueue(maxsize=self.max_queue_size)

		n_batches, job_queue = self._start_producers(queue)

		# Run as consumer (read items from queue, in current thread)
		for x in xrange(n_batches):
			item = queue.get()
			#print queue.qsize(), "GET"
			yield item # Yield the item to the consumer (user)
			queue.task_done()

		queue.close()
		job_queue.close()
Пример #43
0
def worker(input_q: JoinableQueue, output: Queue):
    from django import db
    db.connection.close()
    while True:
        task = input_q.get()
        if task == "end":
            break
        html = urlOpen.get_html(task.url)
        if html:
            text = textParser.get_text_from_html(html)
        input_q.task_done()
        # info()
        output.put(task.url)
    print("exit")
def main():
    jobs = JoinableQueue()
    result = JoinableQueue()


    print len(datasets)
    numToProcess = 10
    scores = pd.DataFrame(columns=['precision','recall','fmeasure',
                                   'numResult','minScore','topHits',
                                   'contentWeight','relationWeight'])
    manual_annotations = get_manual_annotations(numToProcess)
    manual_tuples = get_ir_tuples(manual_annotations)

    for key in manual_annotations.keys():
        jobs.put(key)

    processed_count = Counter()
        
    for i in xrange(NUMBER_OF_PROCESSES):
        p = Process(target=work, args=(i, jobs, result, processed_count))
        p.daemon = True
        p.start()

    #work(1, jobs, result, processed_count)

    automated_annotations = {}

    jobs.join()

    while not result.empty():
        dataset, classes = result.get()
        automated_annotations[dataset] = classes
        result.task_done()

    automated_tuples = get_ir_tuples(automated_annotations)
    hits = manual_tuples & automated_tuples
    misses = manual_tuples - automated_tuples
    
    precision = float(len(hits)) / len(automated_tuples)
    recall = float(len(hits)) / len(manual_tuples)
    fmeasure = 2 * (precision * recall) / (precision + recall)
    # print '\t'.join([str(x) for x in [precision, recall, fmeasure,
    #                              numResult, minScore, topHits]])
    scores = scores.append(dict(precision=precision, recall=recall, fmeasure=fmeasure,
                                numResult=numResult, minScore=minScore, topHits=topHits,
                                contentWeight=contentWeight, relationWeight=relationWeight),
                        ignore_index=True)

    print scores
Пример #45
0
class ImageCrawler:
    
    NUM_PER_FETCH = 100
    NUM_PROCESSES = 10
    def __init__(self, database_config_path):
        self.queue = JoinableQueue()
        self.logger = Logger("image_crawler")
        self.adapter = ImageStoreAdapter(database_config_path, self.logger)
        
    def produce(self):
        while True:
            if self.queue.empty():
                for image_id, link in self.adapter.load_undownloaded_images(self.NUM_PER_FETCH):
                    self.logger.log("Producer: add new image to crawl:" + image_id + " " + link)
                    self.queue.put((image_id, link))
            time.sleep(10)
            
    def consume(self, process_id):
        while True:
            self.logger.log("Consumer process:" + str(process_id) + " fetch new image from queue")
            if not self.queue.empty():
                image_id, link = self.queue.get()
                self.logger.log("Consumer process:"+ str(process_id) + " start crawling " + str(link))
                image = common_utils.page_crawl(link)
                if image != None:
                    self.logger.log(link + "crawled successfully")
                    self.adapter.store_image(image_id, image)
                else:
                    self.logger.log(link + " failed at crawling")
                    self.adapter.update_image_status(image_id, ImageIndexStatus.DOWNLOAD_FAILED)
                self.queue.task_done()
                time.sleep(1)
            else:
                self.logger.log("Queue empty")
                time.sleep(10)
    
    def run(self):
        producer = Process(target=self.produce)
        producer.start()
        consumers = []
        for i in range(self.NUM_PROCESSES):
            consumer = Process(target=self.consume, args=(i,))
            consumers.append(consumer)
            consumer.start()
        
        for consumer in consumers:
            consumer.join()
        producer.join()
        self.queue.join()
	def __iter__(self):
		queue = JoinableQueue(maxsize=params.N_PRODUCERS*2)

		n_batches, job_queue = self.start_producers(queue)

		# Run as consumer (read items from queue, in current thread)
		for x in xrange(n_batches):
			item = queue.get()
			#print len(item[0]), queue.qsize(), "GET"
			yield item
			queue.task_done()

		#queue.join() #Lock until queue is fully done
		queue.close()
		job_queue.close()
Пример #47
0
    def apply_mt(self, xs, parallelism, **kwargs):
        """Run the UDF multi-threaded using python multiprocessing"""
        if snorkel_conn_string.startswith('sqlite'):
            raise ValueError('Multiprocessing with SQLite is not supported. Please use a different database backend,'
                             ' such as PostgreSQL.')

        # Fill a JoinableQueue with input objects
        in_queue = JoinableQueue()
        for x in xs:
            in_queue.put(x)

        # If the UDF has a reduce step, we collect the output of apply in a Queue
        out_queue = None
        if hasattr(self.udf_class, 'reduce'):
            out_queue = JoinableQueue()

        # Start UDF Processes
        for i in range(parallelism):
            udf              = self.udf_class(in_queue=in_queue, out_queue=out_queue, **self.udf_init_kwargs)
            udf.apply_kwargs = kwargs
            self.udfs.append(udf)

        # Start the UDF processes, and then join on their completion
        for udf in self.udfs:
            udf.start()

        # If there is a reduce step, do now on this thread
        if hasattr(self.udf_class, 'reduce'):
            while any([udf.is_alive() for udf in self.udfs]):
                while True:
                    try:
                        y = out_queue.get(True, QUEUE_TIMEOUT)
                        self.reducer.reduce(y, **kwargs)
                        out_queue.task_done()
                    except Empty:
                        break
                self.reducer.session.commit()
            self.reducer.session.close()

        # Otherwise just join on the UDF.apply actions
        else:
            for i, udf in enumerate(self.udfs):
                udf.join()

        # Terminate and flush the processes
        for udf in self.udfs:
            udf.terminate()
        self.udfs = []
Пример #48
0
class Multiplexer(object):
    def __init__(self, worker, writer, threads=4):
        self.worker=worker
        self.writer=writer
        self.q=JoinableQueue()
        self.done = Value(c_bool,False)
        self.consumer=Process(target=self.consume)
        self.pool = Pool(threads, init_opener)

    def start(self):
        self.done.value=False
        self.consumer.start()

    def addjob(self, url, data=None):
        params=[url]
        if data: params.append(data)
        try:
           return self.pool.apply_async(self.worker,params,callback=self.q.put)
        except:
            logger.error('[!] failed to scrape '+ url)
            logger.error(traceback.format_exc())
            raise

    def finish(self):
        self.pool.close()
        logger.info('closed pool')
        self.pool.join()
        logger.info('joined pool')
        self.done.value=True
        self.q.close()
        logger.info('closed q')
        self.consumer.join()
        logger.info('joined consumer')
        #self.q.join()
        #logger.info('joined q')

    def consume(self):
        param=[0,0]
        while True:
            job=None
            try:
                job=self.q.get(True, timeout=1)
            except Empty:
                if self.done.value==True: break
            if job:
                param = self.writer(job, param)
                self.q.task_done()
        logger.info('added/updated: %s' % param)
Пример #49
0
def LeafSet(inchan:Queue, outchan:Queue):
    """Report the distinct elements of inchan on outchan."""
    sf = set()
    logger = multiprocessing.log_to_stderr()
    logger.setLevel(logging.INFO)
    morestuff = True
    while morestuff:
        x = inchan.get()
        logger.info("Leaf:%s" % x)
        if x not in sf:
            sf.add(x)
            outchan.put(x)
        inchan.task_done()
        if x == SIGOBJ:
            morestuff = False
    logger.info("leafdone")
Пример #50
0
def printer(chan:Queue, fmt:str, destination:file):
    """A data sink that prints the contents of chan as strings to destination
    using a format string"""
    logger = multiprocessing.log_to_stderr()
    logger.setLevel(logging.INFO)
    logger.info("sink started")
    i = 0
    morestuff = True
    while morestuff:
        elt = chan.get()
        logger.info(fmt % (i, elt))
        i += 1
        chan.task_done()
        if elt == SIGOBJ:
            morestuff = False
    logger.info('printer done')
def clear_area_around_eye(size = 256, image_dir = 'I:/AI_for_an_eyes/test/test/', target_dir = 'I:/AI_for_an_eyes/test/test_zonder_meuk_256/'):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    util.update_progress(0)


    tasks = glob.glob(image_dir+'*.jpeg')
    job_total = len(tasks)

    print 'Processing images matching ' + image_dir+ '*.jpeg'

    jobs = Queue()
    result = JoinableQueue()
    NUMBER_OF_PROCESSES = cpu_count()*2

    for im_name in tasks:
        jobs.put(im_name)

    for i in xrange(NUMBER_OF_PROCESSES):
        p = Thread(target=worker, args=(i, jobs, result, target_dir, size))
        p.daemon = True
        p.start()

    print 'Starting workers (', NUMBER_OF_PROCESSES, ')!'

    n_complete = 0
    for t in xrange(len(tasks)):
        r = result.get()
        n_complete += 1
        util.update_progress(n_complete/job_total)
        result.task_done()
        #print t, 'done'

    for w in xrange(NUMBER_OF_PROCESSES):
        jobs.put(None)

    util.update_progress(1)

    print 'Done!'
    time.sleep(1)
    result.join()
    jobs.close()
    result.close()
def hist_eq(image_dir = 'test_hist/', target_dir = 'test_result_hist/', method = 'CLAHE'):

    if not os.path.exists(target_dir):
        os.makedirs(target_dir)


    tasks = glob.glob(image_dir+'*.jpeg')
    job_total = len(tasks)

    print 'Processing images matching ' + image_dir+ '*.jpeg'

    jobs = Queue()
    result = JoinableQueue()
    NUMBER_OF_PROCESSES = cpu_count()*2

    for im_name in tasks:
        jobs.put(im_name)

    for i in xrange(NUMBER_OF_PROCESSES):
        p = Thread(target=worker, args=(i, jobs, result, target_dir, method))
        p.daemon = True
        p.start()

    print 'Starting workers (', NUMBER_OF_PROCESSES, ')!'

    n_complete = 0
    for t in xrange(len(tasks)):
        r = result.get()
        n_complete += 1
        util.update_progress(n_complete/job_total)
        result.task_done()
        #print t, 'done'

    for w in xrange(NUMBER_OF_PROCESSES):
        jobs.put(None)

    print 'Done!'
    result.join()
    jobs.close()
    result.close()
Пример #53
0
class MMapPool(object):
    def __init__(self, n, mmap_size):
        self.n = n
        self.mmap_size = mmap_size
        self.pool = [mmap.mmap(-1, mmap_size) for _ in range(n)]
        self.free_mmaps = set(range(n))
        self.free_queue = JoinableQueue()

    def new(self):
        if not self.free_mmaps:
            self.free_mmaps.add(self.free_queue.get())
            self.free_queue.task_done()
        while True:
            try:
                self.free_mmaps.add(self.free_queue.get_nowait())
                self.free_queue.task_done()
            except Empty:
                break
        mmap_idx = self.free_mmaps.pop()
        return mmap_idx, self.pool[mmap_idx]

    def join(self):
        while len(self.free_mmaps) < self.n:
            self.free_mmaps.add(self.free_queue.get())
            self.free_queue.task_done()

    def get(self, idx):
        return self.pool[idx]

    def free(self, idx):
        self.free_queue.put(idx)
Пример #54
0
class FileReader(Process):
    def __init__(self, filename, buffer_size=1000):
        super(FileReader, self).__init__()
        self.filename = filename
        self.que = JoinableQueue(buffer_size)
        self.event = Event()
        self.event.set()
        self.started = Event()
        self.started.clear()

    # It's crucial to call task_done on the queue after the item was processed
    def get_queue(self):
        return self.que

    def get_event(self):
        return self.event

    def is_done(self):
        return not self.event.is_set() and self.que.empty()

    def run(self):
        self.started.set()
        self.proc()
        self.event.clear()

    def proc(self):
        with open_gz(self.filename, encoding='utf-8') as file:
            for line in file:
                self.que.put(line)

    def __iter__(self):
        self.start()
        self.started.wait()
        while not self.is_done():
            try:
                text = self.que.get(timeout=0.1)
                yield text
                self.que.task_done()
            except Empty:
                pass
Пример #55
0
    def __iter__(self):
        queue = JoinableQueue(maxsize=params.N_PRODUCERS * 8)

        n_batches, job_queue = self.start_producers(queue)

        # Run as consumer (read items from queue, in current thread)
        for x in xrange(n_batches):
            item = queue.get()
            yield item
            queue.task_done()

        queue.close()
        job_queue.close()
        if self.shuffle:
            shuffled_idx = np.random.permutation(len(self.X))
            X_new = []
            y_new = []
            for i in range(len(self.X)):
                X_new += [self.X[shuffled_idx[i]]]
                y_new += [self.y[shuffled_idx[i]]]
            self.X = X_new
            self.y = y_new
Пример #56
0
def main(file_list, outputFile):
    jobs = JoinableQueue(10000)
    result = JoinableQueue()

    loader_processes = 3
    NUMBER_OF_PROCESSES = cpu_count() - loader_processes
    files = [ [] for x in range(loader_processes)]
    for i, f in enumerate(open(file_list)):
        files[i%loader_processes].append(f)

    for i in range(loader_processes):
        Process(target=read, args=(jobs, files[i])).start()

    for i in xrange(NUMBER_OF_PROCESSES):
        p = Process(target=work, args=(i, jobs, result))
        p.daemon = True
        p.start()

    o = csv.writer(open(outputFile, 'w'), delimiter=',')
    o.writerow(stat_cols)
    while not submitted.is_set() and not processed.is_set():
        row = result.get()
        o.writerow(row)
        result.task_done()
class Processor:
     
    queue = None
                 
    def __init__(self, 
                 shared_array,
                 data_shape,
                 number_of_workers=1):
              
        # create a joinable queue
        self.queue = JoinableQueue()
        
        # keep reference to shared memory array
        self.shared_array = shared_array
        self.data_shape = data_shape
        
        # create the processes
        self.processes = [Process(target=self.compute_indicator) for _ in range(number_of_workers)]
        for p in self.processes:
            p.start()
                 
    def add_work_item(self, item):
         
        # add the parameters list to the parameters queue
        self.queue.put(item)
 
    def compute_indicator(self):
         
        while True:
              
            # get a list of arguments from the queue
            arguments = self.queue.get()
              
            # if we didn't get one we keep looping
            if arguments is None:
                break
  
            # process the arguments here
            index = arguments[0] 
            month_scale = arguments[1] 
            valid_min = arguments[2] 
            valid_max = arguments[3] 
                 
            # turn the shared array into a numpy array     
            data = np.ctypeslib.as_array(self.shared_array)
            data = data.reshape(self.data_shape)
                
            # only process non-empty grid cells, i.e. data array contains at least some non-NaN values
            if (isinstance(data[:, index], np.ma.MaskedArray) and data[:, index].mask.all()) \
                or np.isnan(data[:, index]).all() or (data[:, index] < 0).all():
             
                pass         
                  
            else:  # we have some valid values to work with
             
                logger.info('Processing latitude: {}'.format(index))
             
                # perform a fitting to gamma     
                fitted_values = indices.spi_gamma(data[:, index],
                                                  month_scale, 
                                                  valid_min, 
                                                  valid_max)
 
                # update the shared array
                data[:, index] = fitted_values
                
            # indicate that the task has completed
            self.queue.task_done()
 
    def terminate(self):
 
        # terminate all processes
        for p in self.processes:
            p.terminate()

    def wait_on_all(self):
 
        #wait until queue is empty
        self.queue.join()
Пример #58
0
def main():

    #--PLUGINS INITIALIZATION--
    start_time = time()
    print '\n\n\n' + _format_title('Registering available plugins')
    sslyze_plugins = PluginsFinder()
    available_plugins = sslyze_plugins.get_plugins()
    available_commands = sslyze_plugins.get_commands()
    print ''
    for plugin in available_plugins:
        print '  ' + plugin.__name__
    print '\n\n'

    # Create the command line parser and the list of available options
    sslyze_parser = CommandLineParser(available_plugins, PROJECT_VERSION)

    try: # Parse the command line
        (command_list, target_list, shared_settings) = sslyze_parser.parse_command_line()
    except CommandLineParsingError as e:
        print e.get_error_msg()
        return
    print command_list    

    #--PROCESSES INITIALIZATION--
    nb_processes = command_list.nb_processes
    if command_list.https_tunnel:
        nb_processes = 1 # Let's not kill the proxy
        
    task_queue = JoinableQueue() # Processes get tasks from task_queue and
    result_queue = JoinableQueue() # put the result of each task in result_queue

    # Spawn a pool of processes, and pass them the queues
    process_list = []
    for _ in xrange(nb_processes):
        p = WorkerProcess(task_queue, result_queue, available_commands, \
                            shared_settings)
        p.start()
        process_list.append(p) # Keep track of the processes that were started


    #--TESTING SECTION--
    # Figure out which hosts are up and fill the task queue with work to do
    print _format_title('Checking host(s) availability')


    targets_OK = []
    targets_ERR = []
    target_results = ServersConnectivityTester.test_server_list(target_list, 
                                                                shared_settings)
    for target in target_results:
        if target is None:
            break # None is a sentinel here
        
        # Send tasks to worker processes
        targets_OK.append(target)
        for command in available_commands:
            if getattr(command_list, command):
                args = command_list.__dict__[command]
                task_queue.put( (target, command, args) )
    
    for exception in target_results:
        targets_ERR.append(exception)
        
    print ServersConnectivityTester.get_printable_result(targets_OK, targets_ERR)
    print '\n\n'

    # Put a 'None' sentinel in the queue to let the each process know when every
    # task has been completed
    [task_queue.put(None) for _ in process_list]

    # Keep track of how many tasks have to be performed for each target
    task_num=0
    for command in available_commands:
        if getattr(command_list, command):
            task_num+=1


    # --REPORTING SECTION--
    processes_running = nb_processes
    
    # XML output
    if shared_settings['xml_file']:
        xml_output_list = []

    # Each host has a list of results
    result_dict = {}
    for target in targets_OK:
        result_dict[target] = []

    # If all processes have stopped, all the work is done
    while processes_running:
        result = result_queue.get()

        if result == None: # Getting None means that one process was done
            processes_running -= 1

        else: # Getting an actual result
            (target, command, plugin_result) = result
            result_dict[target].append((command, plugin_result))

            if len(result_dict[target]) == task_num: # Done with this target
                # Print the results and update the xml doc
                print _format_txt_target_result(target, result_dict[target])
                if shared_settings['xml_file']:
                    xml_output_list.append(_format_xml_target_result(target, result_dict[target]))
                           
        result_queue.task_done()


    # --TERMINATE--
    
    # Make sure all the processes had time to terminate
    task_queue.join()
    result_queue.join()
    #[process.join() for process in process_list] # Causes interpreter shutdown errors
    exec_time = time()-start_time
    
    # Output XML doc to a file if needed
    if shared_settings['xml_file']:
        result_xml_attr = {'httpsTunnel':str(shared_settings['https_tunnel_host']),
                           'totalScanTime' : str(exec_time), 
                           'defaultTimeout' : str(shared_settings['timeout']), 
                           'startTLS' : str(shared_settings['starttls'])}
        
        result_xml = Element('results', attrib = result_xml_attr)
        
        # Sort results in alphabetical order to make the XML files (somewhat) diff-able
        xml_output_list.sort(key=lambda xml_elem: xml_elem.attrib['host'])
        for xml_element in xml_output_list:
            result_xml.append(xml_element)
            
        xml_final_doc = Element('document', title = "SSLyze Scan Results",
                                SSLyzeVersion = PROJECT_VERSION, 
                                SSLyzeWeb = PROJECT_URL)
        # Add the list of invalid targets
        xml_final_doc.append(ServersConnectivityTester.get_xml_result(targets_ERR))
        # Add the output of the plugins
        xml_final_doc.append(result_xml)

        # Hack: Prettify the XML file so it's (somewhat) diff-able
        xml_final_pretty = minidom.parseString(tostring(xml_final_doc, encoding='UTF-8'))
        with open(shared_settings['xml_file'],'w') as xml_file:
            xml_file.write(xml_final_pretty.toprettyxml(indent="  ", encoding="utf-8" ))
            

    print _format_title('Scan Completed in {0:.2f} s'.format(exec_time))
Пример #59
0
    tasks = [Task(q, out_queue) for i in range(NUM_WORKERS)]
    for w in tasks:
        w.start()

    logging.info("Items left in queue: {0}".format(q.qsize()))
    logging.debug("Joining q")
    # q.join()
    # qf.join()

    if False:
        processes_active = True
        while processes_active:
            for w in tasks:
                processes_active = False or w.is_alive()
                logging.debug(w.is_alive())
            sleep(0.2)

    for y in tasks:
        y.join()

    logging.info("Elapsed time with {0} threads and {1} as maximum number: {2}".format(NUM_WORKERS,
                                                                                       MAX_PRIME_NUMBER,
                                                                                       datetime.now()-start_time))

    count = 0
    while not out_queue.empty():
        out_queue.get()
        out_queue.task_done()
        count += 1
    logging.info("Total primes found: {0}".format(count))
Пример #60
0
def main():
    # For py2exe builds
    freeze_support()

    # Handle SIGINT to terminate processes
    signal.signal(signal.SIGINT, sigint_handler)

    # --PLUGINS INITIALIZATION--
    start_time = time()
    print "\n\n\n" + _format_title("Registering available plugins")
    sslyze_plugins = PluginsFinder()
    available_plugins = sslyze_plugins.get_plugins()
    available_commands = sslyze_plugins.get_commands()
    print ""
    for plugin in available_plugins:
        print "  " + plugin.__name__
    print "\n\n"

    # Create the command line parser and the list of available options
    sslyze_parser = CommandLineParser(available_plugins, PROJECT_VERSION)

    try:  # Parse the command line
        (command_list, target_list, shared_settings) = sslyze_parser.parse_command_line()
    except CommandLineParsingError as e:
        print e.get_error_msg()
        return

    # --PROCESSES INITIALIZATION--
    # Three processes per target from MIN_PROCESSES up to MAX_PROCESSES
    nb_processes = max(MIN_PROCESSES, min(MAX_PROCESSES, len(target_list) * 3))
    if command_list.https_tunnel:
        nb_processes = 1  # Let's not kill the proxy

    task_queue = JoinableQueue()  # Processes get tasks from task_queue and
    result_queue = JoinableQueue()  # put the result of each task in result_queue

    # Spawn a pool of processes, and pass them the queues
    for _ in xrange(nb_processes):
        priority_queue = JoinableQueue()  # Each process gets a priority queue
        p = WorkerProcess(priority_queue, task_queue, result_queue, available_commands, shared_settings)
        p.start()
        process_list.append((p, priority_queue))  # Keep track of each process and priority_queue

    # --TESTING SECTION--
    # Figure out which hosts are up and fill the task queue with work to do
    print _format_title("Checking host(s) availability")

    targets_OK = []
    targets_ERR = []

    # Each server gets assigned a priority queue for aggressive commands
    # so that they're never run in parallel against this single server
    cycle_priority_queues = cycle(process_list)
    target_results = ServersConnectivityTester.test_server_list(target_list, shared_settings)
    for target in target_results:
        if target is None:
            break  # None is a sentinel here

        # Send tasks to worker processes
        targets_OK.append(target)
        (_, current_priority_queue) = cycle_priority_queues.next()

        for command in available_commands:
            if getattr(command_list, command):
                args = command_list.__dict__[command]

                if command in sslyze_plugins.get_aggressive_commands():
                    # Aggressive commands should not be run in parallel against
                    # a given server so we use the priority queues to prevent this
                    current_priority_queue.put((target, command, args))
                else:
                    # Normal commands get put in the standard/shared queue
                    task_queue.put((target, command, args))

    for exception in target_results:
        targets_ERR.append(exception)

    print ServersConnectivityTester.get_printable_result(targets_OK, targets_ERR)
    print "\n\n"

    # Put a 'None' sentinel in the queue to let the each process know when every
    # task has been completed
    for (proc, priority_queue) in process_list:
        task_queue.put(None)  # One sentinel in the task_queue per proc
        priority_queue.put(None)  # One sentinel in each priority_queue

    # Keep track of how many tasks have to be performed for each target
    task_num = 0
    for command in available_commands:
        if getattr(command_list, command):
            task_num += 1

    # --REPORTING SECTION--
    processes_running = nb_processes

    # XML output
    xml_output_list = []

    # Each host has a list of results
    result_dict = {}
    for target in targets_OK:
        result_dict[target] = []

    # If all processes have stopped, all the work is done
    while processes_running:
        result = result_queue.get()

        if result is None:  # Getting None means that one process was done
            processes_running -= 1

        else:  # Getting an actual result
            (target, command, plugin_result) = result
            result_dict[target].append((command, plugin_result))

            if len(result_dict[target]) == task_num:  # Done with this target
                # Print the results and update the xml doc
                print _format_txt_target_result(target, result_dict[target])
                if shared_settings["xml_file"]:
                    xml_output_list.append(_format_xml_target_result(target, result_dict[target]))

        result_queue.task_done()

    # --TERMINATE--

    # Make sure all the processes had time to terminate
    task_queue.join()
    result_queue.join()
    # [process.join() for process in process_list] # Causes interpreter shutdown errors
    exec_time = time() - start_time

    # Output XML doc to a file if needed
    if shared_settings["xml_file"]:
        result_xml_attr = {
            "httpsTunnel": str(shared_settings["https_tunnel_host"]),
            "totalScanTime": str(exec_time),
            "defaultTimeout": str(shared_settings["timeout"]),
            "startTLS": str(shared_settings["starttls"]),
        }

        result_xml = Element("results", attrib=result_xml_attr)

        # Sort results in alphabetical order to make the XML files (somewhat) diff-able
        xml_output_list.sort(key=lambda xml_elem: xml_elem.attrib["host"])
        for xml_element in xml_output_list:
            result_xml.append(xml_element)

        xml_final_doc = Element(
            "document", title="SSLyze Scan Results", SSLyzeVersion=PROJECT_VERSION, SSLyzeWeb=PROJECT_URL
        )
        # Add the list of invalid targets
        xml_final_doc.append(ServersConnectivityTester.get_xml_result(targets_ERR))
        # Add the output of the plugins
        xml_final_doc.append(result_xml)

        # Hack: Prettify the XML file so it's (somewhat) diff-able
        xml_final_pretty = minidom.parseString(tostring(xml_final_doc, encoding="UTF-8"))
        with open(shared_settings["xml_file"], "w") as xml_file:
            xml_file.write(xml_final_pretty.toprettyxml(indent="  ", encoding="utf-8"))

    print _format_title("Scan Completed in {0:.2f} s".format(exec_time))