Пример #1
0
def progdev_all(boffile, gain):
    """ Initialize all roach boards with boffile and gain settings """
    roachlist = ['rofl%i'%i for i in range(1,16+1)]
    n_roach = len(roachlist)
    
    print "Programming all roaches with %s"%boffile
    print "Gain value: %ix"%gain
    print "Please wait..."
    # Create threads and message queue
    procs = []
    q     = JoinableQueue()
    for i in range(n_roach):
        p = Process(target=progdev_adc16, args=(roachlist[i], q, boffile, gain))
        procs.append(p)
    # Start threads
    for p in procs:
        p.start()
    # Join threads      
    for p in procs:
        p.join()
    
    # Print messages
    while q.empty() is False:
        print q.get()
    print "OK"
Пример #2
0
def test_add_sentinels():
    queue = JoinableQueue()
    run.add_sentinels(queue, 2)

    queue.get()

    assert queue.get() == None
Пример #3
0
def InternalSet(Achild:Queue, Bchild:Queue, outqueue:Queue):
    """Take the output of two LeafSet's and take the union."""
    logger = multiprocessing.log_to_stderr()
    logger.setLevel(logging.INFO)
    AminusB = set()
    BminusA = set()
    morestuff = True
    while morestuff:
        a = Achild.get()
        b = Bchild.get()
        logger.info("Internal:%s:%s" % (a, b))
        if a in BminusA:
            BminusA.remove(a)
        elif a not in AminusB:
            AminusB.add(a)
            outqueue.put(a)
        if b in AminusB:
            AminusB.remove(b)
        elif b not in BminusA:
            BminusA.add(b)
            outqueue.put(b)
        Achild.task_done()
        Bchild.task_done()
        if (a == SIGOBJ) or (b == SIGOBJ):
            outqueue.put(SIGOBJ)
            morestuff = False
    logger.info("internal done")
Пример #4
0
    def _drain_and_join_queue(q: mp.JoinableQueue, join: bool = True) -> None:
        """
        Drains a queue completely, such that it is joinable

        :param q: Queue to join
        :param join: Whether to join the queue or not
        """
        # Do nothing when it's not set
        if q is None:
            return

        # Call task done up to the point where we get a ValueError. We need to do this when child processes already
        # started processing on some tasks and got terminated half-way.
        n = 0
        try:
            while True:
                q.task_done()
                n += 1
        except ValueError:
            pass

        try:
            while not q.empty() or n != 0:
                q.get(block=True, timeout=1.0)
                n -= 1
        except (queue.Empty, EOFError):
            pass

        # Join
        if join:
            q.join()
Пример #5
0
class Requester(object):
    def __init__(self, num_workers=2):
        self.queue = JoinableQueue()
        self.processes = [
            Process(target=self.request) for _ in range(num_workers)
        ]

    def add_url(self, url):
        self.queue.put(url)

    def request(self):
        url = self.queue.get()
        while url is not None:

            # TODO - actually send a request here

            self.queue.task_done()

            url = self.queue.get()

    def terminate(self):

        # send the terminate command
        for _ in self.processes:
            self.queue.put(None)

        # wait for processing to finish
        for p in self.processes:
            p.join()
Пример #6
0
def threaded_db_input(pipe: mp.JoinableQueue, len_seeds: int) -> NoReturn:
    """Runs DB operation in a separate process

    Args:
        :param pipe: connection with the parent.
        :param len_seeds: total number of seeds.

    Returns:
    Executes the queries from the queue.
    """
    con, dbname = get_db_con(len_seeds)
    stmt = pipe.get(timeout=3600)
    pid = None
    while stmt is not None:
        try:
            pid.join()
        except Exception as e:
            if pid:
                print(e)
        # try:
        # con = con = lite.connect(dbname, timeout=3000, check_same_thread=False, isolation_level=None)
        # con.commit()
        pid = mp.Process(target=stmt[0], args=(con, ) + stmt[1])
        pid.start()
        # except Exception as e:
        #     print('Found exception in db input:')
        #     print(e)
        #     print('Arguments that caused exception: ')
        #     print(stmt)
        # finally:
        pipe.task_done()
        stmt = pipe.get()
    print('DB thread exiting...')
    con.close()
Пример #7
0
class MMapPool(object):
    def __init__(self, n, mmap_size):
        self.n = n
        self.mmap_size = mmap_size
        self.pool = [mmap.mmap(-1, mmap_size) for _ in range(n)]
        self.free_mmaps = set(range(n))
        self.free_queue = JoinableQueue()

    def new(self):
        if not self.free_mmaps:
            self.free_mmaps.add(self.free_queue.get())
            self.free_queue.task_done()
        while True:
            try:
                self.free_mmaps.add(self.free_queue.get_nowait())
                self.free_queue.task_done()
            except Empty:
                break
        mmap_idx = self.free_mmaps.pop()
        return mmap_idx, self.pool[mmap_idx]

    def join(self):
        while len(self.free_mmaps) < self.n:
            self.free_mmaps.add(self.free_queue.get())
            self.free_queue.task_done()

    def get(self, idx):
        return self.pool[idx]

    def free(self, idx):
        self.free_queue.put(idx)
Пример #8
0
 def generator(test_q: JoinableQueue) -> Iterator[TestBatch]:
     test = test_q.get()
     while test:
         test_q.task_done()
         yield test
         test = test_q.get()
     test_q.task_done()
Пример #9
0
def main():
    from multiprocessing import JoinableQueue
    from genmod.vcf import vcf_header
    from genmod.utils import annotation_parser
    parser = argparse.ArgumentParser(description="Parse different kind of pedigree files.")
    parser.add_argument('variant_file', type=str, nargs=1 , help='A file with variant information.')
    parser.add_argument('annotation_file', type=str, nargs=1 , help='A file with feature annotations.')
    parser.add_argument('-phased', '--phased', action="store_true", help='If variant file is phased.')    
    parser.add_argument('-v', '--verbose', action="store_true", help='Increase output verbosity.')
    
    args = parser.parse_args()
    infile = args.variant_file[0]
    if args.verbose:
        print('Parsing annotationfile...')
        start_time_annotation = datetime.now()
    my_anno_parser = annotation_parser.AnnotationParser(args.annotation_file[0], 'ref_gene')
    
    if args.verbose:
        print('annotation parsed. Time to parse annotation: %s\n' % str(datetime.now() - start_time_annotation))
    
    my_head_parser = vcf_header.VCFParser(infile)
    my_head_parser.parse()
    print(my_head_parser.__dict__)
    variant_queue = JoinableQueue()
    start_time = datetime.now()        
    
    my_parser = VariantFileParser(infile, variant_queue, my_head_parser, my_anno_parser, args)
    nr_of_batches = my_parser.parse()
    print(nr_of_batches)
    for i in range(nr_of_batches):
        variant_queue.get()
        variant_queue.task_done()
    
    variant_queue.join()
    print('Time to parse variants: %s ' % str(datetime.now()-start_time))
Пример #10
0
class MMapPool(object):
    def __init__(self, n, mmap_size):
        self.n = n
        self.mmap_size = mmap_size
        self.pool = [mmap.mmap(-1, mmap_size) for _ in range(n)]
        self.free_mmaps = set(range(n))
        self.free_queue = JoinableQueue()

    def new(self):
        if not self.free_mmaps:
            self.free_mmaps.add(self.free_queue.get())
            self.free_queue.task_done()
        while True:
            try:
                self.free_mmaps.add(self.free_queue.get_nowait())
                self.free_queue.task_done()
            except Empty:
                break
        mmap_idx = self.free_mmaps.pop()
        return mmap_idx, self.pool[mmap_idx]

    def join(self):
        while len(self.free_mmaps) < self.n:
            self.free_mmaps.add(self.free_queue.get())
            self.free_queue.task_done()

    def get(self, idx):
        return self.pool[idx]

    def free(self, idx):
        self.free_queue.put(idx)
Пример #11
0
def run_joinable_queue_task_done():
    """Test if I need to mark task done on a joinable queue that has a timeout."""
    import time
    from queue import Empty
    from multiprocessing import JoinableQueue
    que = JoinableQueue()

    que.put(1)
    one = que.get(timeout=1)
    que.task_done()

    try:
        two_fail = que.get(timeout=1)
    except Empty:
        pass

    que.put(2)
    two = que.get(timeout=1)
    que.task_done()

    que.put(3)
    three = que.get(timeout=1)
    que.task_done()

    que.join()
    print('end')
Пример #12
0
class TaskManager:
    # noinspection PyPep8Naming
    def __init__(self,
                 jobs_queue_capacity: int,
                 workers_num: int,
                 WorkerClass: Worker.__class__ = Worker):
        # empty job queue
        self._queue = JoinableQueue(maxsize=jobs_queue_capacity)
        logger.info(
            f'Queue size set to accept at most {jobs_queue_capacity} before pausing job assignment.'
        )
        self.WorkerClass = WorkerClass
        self.workers_num = max_number_of_workers(workers_num)

    _workers = []

    def wake_up_workers(self):
        self._workers: List[Worker] = [
            self.WorkerClass(self._queue) for _ in range(self.workers_num)
        ]
        for worker in self._workers:
            worker.start()

    def assign_task(self, job: Task):
        self._queue.put(job)

    def stop_workers(self):
        logger.info('waiting all workers to finish')
        # usual termination condition is to put None on the queue. Queues are FIFO but from Python 3.8 docs:
        # https://docs.python.org/3.8/library/multiprocessing.html#pipes-and-queues
        # "If multiple processes are enqueuing objects, it is possible for the objects to be received at the other
        # end out-of-order. However, objects enqueued by the same process will always be in the expected order
        # with respect to each other.". So, when there's a single producer, that's not an issue; when there are many
        # producers it may happen that even if Nones are enqueued at the end of the queue, consumers pick 'em
        # before other items in the queue (breaking the FIFO assumption). In this case the workers would leave
        # before the queue is empty. To avid this, before sending Nones, it's better to wait for the queue to be
        # consumed.

        while not self._queue.empty(
        ):  # not bullet-proof as empty() and qsize() return approx. values, but it helps
            print(f"jobs waiting to be assigned: {self._queue.qsize()}")
            sleep(1)
        for _ in self._workers:
            self._queue.put(None, block=True, timeout=None)
        self._queue.join()
        logger.info('all processes_finished')

    def discard_waiting_tasks(self):
        while not self._queue.empty():
            try:
                self._queue.get(False)
            except Empty:
                continue
            self._queue.task_done()

    def number_of_waiting_tasks(self):
        return self._queue.qsize()
Пример #13
0
def drain_queue(q: mp.JoinableQueue) -> List:
    """Keeps taking items until we get a `None`."""
    items = []
    item = q.get()
    q.task_done()
    while item is not None:
        items.append(item)
        item = q.get()
        q.task_done()

    return items
Пример #14
0
def main():
    from multiprocessing import JoinableQueue
    from genmod.vcf import vcf_header
    from genmod.utils import annotation_parser
    parser = argparse.ArgumentParser(
        description="Parse different kind of pedigree files.")
    parser.add_argument('variant_file',
                        type=str,
                        nargs=1,
                        help='A file with variant information.')
    parser.add_argument('annotation_file',
                        type=str,
                        nargs=1,
                        help='A file with feature annotations.')
    parser.add_argument('-phased',
                        '--phased',
                        action="store_true",
                        help='If variant file is phased.')
    parser.add_argument('-v',
                        '--verbose',
                        action="store_true",
                        help='Increase output verbosity.')

    args = parser.parse_args()
    infile = args.variant_file[0]
    if args.verbose:
        print('Parsing annotationfile...')
        start_time_annotation = datetime.now()
    my_anno_parser = annotation_parser.AnnotationParser(
        args.annotation_file[0], 'ref_gene')

    if args.verbose:
        print('annotation parsed. Time to parse annotation: %s\n' %
              str(datetime.now() - start_time_annotation))

    my_head_parser = vcf_header.VCFParser(infile)
    my_head_parser.parse()
    print(my_head_parser.__dict__)
    variant_queue = JoinableQueue()
    start_time = datetime.now()

    my_parser = VariantFileParser(infile, variant_queue, my_head_parser,
                                  my_anno_parser, args)
    nr_of_batches = my_parser.parse()
    print(nr_of_batches)
    for i in range(nr_of_batches):
        variant_queue.get()
        variant_queue.task_done()

    variant_queue.join()
    print('Time to parse variants: %s ' % str(datetime.now() - start_time))
Пример #15
0
def threaded_copy(pipe: mp.JoinableQueue) -> NoReturn:
    """Recieves filenames (A, B) from the pipe and tries to copy A into B

    Args:
        :param pipe: connection with the parent

    Returns:
    Copies files in the background.
    """
    stmt = pipe.get(timeout=3600)
    while stmt is not None:
        # with COPY_LOCK:
        cp2(stmt[0], stmt[1])
        pipe.task_done()
        stmt = pipe.get(timeout=1800)
Пример #16
0
def calculate_set(num_processes):
    todo_queue = JoinableQueue()
    results_queue = JoinableQueue()

    # setup and launch workers
    # we'll make them daemon processes so they shut down automatically when this process exits, but
    # we'll also shut them down ourselves when we finish
    workers = [
        Process(target=worker, args=(todo_queue, results_queue))
        for i in xrange(num_processes)
    ]
    for individual in workers:
        individual.daemon = True
        individual.start()

    result = numpy.zeros([ny, nx])
    for i in xrange(ny):
        y = i * dy + ylo
        for j in xrange(nx):
            x = j * dx + xlo
            todo_queue.put((x, y, i, j))
    todo_queue.join()

    while not results_queue.empty():
        i, j, val = results_queue.get()
        result[i, j] = val
        results_queue.task_done()

    # shutdown the compute processes
    for individual in workers:
        individual.terminate()

    return result
Пример #17
0
def data_generator_func(in_queue: JoinableQueue, out_queue: Queue, tr_h, hr_t, n_entity, neg_weight):
    while True:
        dat = in_queue.get()
        if dat is None:
            break
        # [head(tail), relation, #of_total_positive_candidates, positive_instances..., negative_instances...]
        hr_tlist = list()
        hr_tweight = list()
        tr_hlist = list()
        tr_hweight = list()

        htr = dat

        for idx in range(htr.shape[0]):
            if np.random.uniform(-1, 1) > 0:  # t r predict h
                tr_hweight.append(
                    [1. if x in tr_h[htr[idx, 1]][htr[idx, 2]] else y for
                     x, y in enumerate(np.random.choice([0., -1.], size=n_entity, p=[1 - neg_weight, neg_weight]))])
                tr_hlist.append([htr[idx, 1], htr[idx, 2]])
            else:  # h r predict t
                hr_tweight.append(
                    [1. if x in hr_t[htr[idx, 0]][htr[idx, 2]] else y for
                     x, y in enumerate(np.random.choice([0., -1.], size=n_entity, p=[1 - neg_weight, neg_weight]))])

                hr_tlist.append([htr[idx, 0], htr[idx, 2]])

        out_queue.put((np.asarray(hr_tlist, dtype=np.int32), np.asarray(hr_tweight, dtype=np.float32),
                       np.asarray(tr_hlist, dtype=np.int32), np.asarray(tr_hweight, dtype=np.float32)))
Пример #18
0
def data_generator_func(in_queue: JoinableQueue, out_queue: Queue, tr_h, hr_t, n_entity, neg_weight):
    while True:
        dat = in_queue.get()
        if dat is None:
            break
        # [head(tail), relation, #of_total_positive_candidates, positive_instances..., negative_instances...]
        hr_tlist = list()
        hr_tweight = list()
        tr_hlist = list()
        tr_hweight = list()

        htr = dat

        for idx in range(htr.shape[0]):
            if np.random.uniform(-1, 1) > 0:  # t r predict h
                tr_hweight.append(
                    [1. if x in tr_h[htr[idx, 1]][htr[idx, 2]] else y for
                     x, y in enumerate(np.random.choice([0., -1.], size=n_entity, p=[1 - neg_weight, neg_weight]))])
                tr_hlist.append([htr[idx, 1], htr[idx, 2]])
            else:  # h r predict t
                hr_tweight.append(
                    [1. if x in hr_t[htr[idx, 0]][htr[idx, 2]] else y for
                     x, y in enumerate(np.random.choice([0., -1.], size=n_entity, p=[1 - neg_weight, neg_weight]))])

                hr_tlist.append([htr[idx, 0], htr[idx, 2]])

        out_queue.put((np.asarray(hr_tlist, dtype=np.int32), np.asarray(hr_tweight, dtype=np.float32),
                       np.asarray(tr_hlist, dtype=np.int32), np.asarray(tr_hweight, dtype=np.float32)))
Пример #19
0
    def _parallel(self, reps, keys, treatments, num_threads):
        jobs = JoinableQueue()
        results = JoinableQueue()

        for t in treatments:
            kwargs = dict(zip(keys, t)) if keys else {}
            for r in range(reps):
                jobs.put((str(t), kwargs))

        def op(jobs, results):
            while True:
                name, kwargs = jobs.get()
                res = self.sim.run(self.builder(**kwargs))
                results.put((name, res))
                jobs.task_done()

        for th in range(num_threads):
            process = Process(target=op, name=str(th), args=[jobs, results])
            process.start()

        jobs.join()

        formatted_results = {}
        while not results.empty():
            n, r = results.get()
            if n in formatted_results:
                formatted_results[n].append(r)
            else:
                formatted_results[n] = [r]
        if len(formatted_results) == 1:
            return formatted_results[list(formatted_results.keys())[0]]
        return formatted_results
Пример #20
0
def producer(in_queue: JoinableQueue):
    while True:
        item = in_queue.get()
        sleep(0.5)
        n = int(item)
        print(n)
        in_queue.task_done()
Пример #21
0
def consumer(in_queue: JoinableQueue, out_queue: JoinableQueue):
    while True:
        item = in_queue.get()
        sleep(0.5)
        s = str(item)
        out_queue.put(s)
        in_queue.task_done()
Пример #22
0
def reduce_sum(array):
    results = JoinableQueue()
    result_size = len(array)

    n_consumers = cpu_count()

    for item in array:
        results.put(item)

    while result_size > 1:
        tasks = results
        results = JoinableQueue()

        consumers = [
            ReductionConsumer(tasks, results) for i in range(n_consumers)
        ]
        for consumer in consumers:
            consumer.start()

        for i in range(n_consumers):
            tasks.put(None)

        tasks.join()
        result_size = result_size // 2 + (result_size % 2)
        print('-' * 40)

    return results.get()
def main():
    jobs = JoinableQueue()
    result = JoinableQueue()


    numToProcess = -1
    scores = pd.DataFrame(columns=['query','fmeasure','precision','recall',
                                   'size','maxDistance','topHits',"contextSteps"])

    print len(datasets)

    for key in datasets:
        jobs.put(key)

    processed_count = Counter()
        
    for i in xrange(NUMBER_OF_PROCESSES):
        p = Process(target=work, args=(i, jobs, result, processed_count))
        p.daemon = True
        p.start()

    #work(1, jobs, result, processed_count)

    automated_annotations = {}
    distances = {}

    jobs.join()

    dataset_index = collections.defaultdict(set)
    annotated_datasets = set()
    while not result.empty():
        dataset, classes = result.get()
        if len(classes) == 0:
            annotated_datasets.add(dataset)
        for c in classes.keys():
            dataset_index[c].add(dataset)
            owl_class = Class(c, graph=graph)
            for parent in owl_class.parents:
                dataset_index[parent.identifier].add(dataset)
        result.task_done()

    print '\n'
    
    for query, c in queries.items():
        manual = ground_truth[query]
        automated = dataset_index[c]
        hits = manual & automated
        misses = manual - automated
        precision = np.nan if len(automated) == 0 else float(len(hits)) / len(automated)
        recall = np.nan if len(manual) == 0 else float(len(hits)) / len(manual)
        if precision != 0 or recall != 0:
            fmeasure = 0 if np.isnan(precision) or np.isnan(recall) else 2 * (precision * recall) / (precision + recall)
        else:
            fmeasure = 0
        scores = scores.append(dict(query=query, size=len(manual), precision=precision, recall=recall, fmeasure=fmeasure,topHits=topHits, maxDistance=maxDistance, contextSteps = context_steps),
                        ignore_index=True)
        print "Hits for", query, c
        print '\n'.join(sorted(hits))
    print scores
    print "Annotated", len(annotated_datasets), "datasets."
Пример #24
0
def Cvpointgray(imgs: multiprocessing.JoinableQueue, conn, l: multiprocessing.Lock):
    while True:
        try:
            img, d = imgs.get()
        except Empty:
            break
        x = []
        mean = np.mean(img)
        for j in range(img.shape[0]):
            x0 = 0
            y0 = 0
            ym = []
            for i in range(1, img.shape[1]):
                ym.append(img[j, i])
                x0 = x0 + int(img[j, i]) ** 4
                y0 = y0 + int(img[j, i]) ** 4 * i
            if x0 == 0 or np.mean(ym) < mean:
                y = 0
            else:
                y = y0 / x0
            y = round(y)
            x.append(y)
        print(d)
        l.acquire()
        conn.send([d, x])
        l.release()
        imgs.task_done()
Пример #25
0
class Thread_Pool_Manager(object):
    def __init__(self, thread_num=cpu_count()):
        self.thread_num = thread_num
        print(thread_num)
        self.work_queue = JoinableQueue()
        self.work_num = Semaphore(0)
        self.mutex = Lock()

    def start_threads(self):
        for i in range(self.thread_num):
            thread = Process(target=self.do_job)
            thread.daemon = True  # set thread as daemon
            thread.start()

    def do_job(self):
        global Numbers
        while True:
            # print(1)
            self.work_num.acquire()
            with self.mutex:
                print(1, self.work_queue.qsize())
                thread_job = self.work_queue.get()
                print(0, self.work_queue.qsize())
            thread_job.do_job(self.work_queue, self.work_num)
            print(self.work_queue.qsize())
            self.work_queue.task_done()

    def join(self):
        self.work_queue.join()

    def add_job(self, job):
        self.work_queue.put(job)
        self.work_num.release()
Пример #26
0
    def _run_on_all(self, fn_to_run, *args, **kwargs):
        q = JoinableQueue()
        for s in self.snap_boards:
            s_name = s.host
            try:
                method = getattr(s, fn_to_run)
            except AttributeError:
                try:
                    method = getattr(s.adc, fn_to_run)
                except AttributeError:
                    raise RuntimeError("Cannot find method %s" % fn_to_run)

            # Setup arguments and keyword args
            all_args = [q, s_name, method]
            if kwargs is None:
                kwargs = {}
            if args is not None:
                for aa in args:
                    all_args.append(aa)
            t = Thread(target=self._run,
                       name=s_name,
                       args=all_args,
                       kwargs=kwargs)
            t.daemon = True
            t.start()
        q.join()

        # Iterate through queue and
        outdict = {}
        for ii in range(0, len(self.snap_boards)):
            d_key, d_out = q.get()
            outdict[d_key] = d_out
        return outdict
class GHDDIMultiProcessPool:

    def __init__(self, target, database=None):
        self._inputQueue = Queue()
        self._outputQueue = Queue()
        jobs = []
        for i in range(0, os.cpu_count()):
            jobs.append(GHDDIProcess(target, database, self._inputQueue, self._outputQueue))
        self._jobs = jobs

    def __del__(self):
        print('processPool del')
        self._inputQueue.join()
        self._outputQueue.join()

        self._inputQueue.close()
        self._outputQueue.close()
        for p in self._jobs:
            p.terminate()
            p.close()

    def startAll(self):
        for p in self._jobs:
            p.start()

    def finishAll(self):
        pass

    def putTask(self, taskArgs, block=True, timeout=None):
        self._inputQueue.put(taskArgs, block=block, timeout=timeout)

    def getTaskRet(self, block=True, timeout=None):
        return self._outputQueue.get(block=block, timeout=timeout)
class Renderer:
    queue = None

    def __init__(self, nb_workers=2):
        self.queue = JoinableQueue()
        self.processes = [Process(target=self.upload) for i in range(nb_workers)]
        for p in self.processes:
            p.start()

    def render(self, item):
        self.queue.put(item)

    def upload(self):
        while True:
            item = self.queue.get()
            if item is None:
                break

            # process your item here

            self.queue.task_done()

    def terminate(self):
        """ wait until queue is empty and terminate processes """
        self.queue.join()
        for p in self.processes:
            p.terminate()
def main():

    num_page = 6000
    num_processes = 60
    num_works = num_page / num_processes
    q = JoinableQueue()
    pool = list()
    final_set = set()
    
    for index in xrange(1,num_processes+1):
        p =  Process(target=fetch_feature,args=(q,index,num_works))
        p.start()
    
    for index in xrange(1,num_processes+1):    
        final_set = final_set.union(q.get())
    
        #p.join()
    #    pool.append(p)
        
    #for p in pool:
    #   p.join()
    result_file = open('result.out','w');

    for feature in final_set:
        print feature
        result_file.write(feature+'\n')
   
    result_file.close()    
    print len(final_set)
def data_generator_func(in_queue: JoinableQueue,out_queue: Queue,right_num,left_num,tr_h,hr_t,ht_r,n_entity,n_relation):
    while True:
        dat = in_queue.get()
        if dat is None:
            break
        pos_triple_batch = []
        neg_triple_batch = []
        
        neg_rel_triple_batch=dat.copy()
        htr=dat.copy()


        for i in range(1):
            tmp_pos_triple_batch= list(dat.copy())
            tmp_neg_entity_triple_batch = list(dat.copy())
            #construct negative-triple
            for idx in range(htr.shape[0]):
                h=htr[idx,0]
                t=htr[idx,1]
                r=htr[idx,2]
                tmp_t=np.random.randint(0,n_entity-1)
                while tmp_t in hr_t[h][r]:
                    tmp_t=np.random.randint(0,n_entity-1)
                tmp_neg_entity_triple_batch[idx][1]=tmp_t
            pos_triple_batch += tmp_pos_triple_batch
            neg_triple_batch += tmp_neg_entity_triple_batch
        out_queue.put((np.asarray(pos_triple_batch),np.asarray(neg_triple_batch)))
    def test_hyperband_executor_basic(self):
        # Create executor
        inputs_queue = JoinableQueue()
        results_queue = JoinableQueue()
        executor = executors.HyperbandExecutor.HyperbandExecutor(
            inputs_queue, results_queue, hyperband_epochs_budget=100)
        executor.get_data_loading_pipelines = get_data_loading_pipelines_override

        # Load sample data
        data_uri = utils.utils.get_git_root(
            os.path.dirname(
                os.path.abspath(__file__))) + "/test_data/185_baseball"
        assert (os.path.exists(data_uri))
        problem_doc, dataset = utils.utils.load_data_from_dir(data_uri,
                                                              mode="train")

        # Process item
        inputs_queue.put((problem_doc, dataset))
        executor.start()
        inputs_queue.join()

        # Gather results
        results = []
        while not results_queue.empty():
            print("Gathering...")
            results.append(
                results_queue.get(True, executors.Executor.QUEUE_TIMEOUT))

        executor.terminate()
class QueueTask:
    def __init__(self):
        self.queue = JoinableQueue()
        self.event = Event()
        atexit.register( self.queue.join )

        process = Process(target=self.work)
        process.daemon = True
        process.start()


    def work(self):
        while True:
            func, args, wait_for = self.queue.get()

            for evt in wait_for: 
                evt.wait()
            func(*args)
            self.event.set()

            self.queue.task_done()


    def enqueue(self, func, args=[], wait_for=[]):
        self.event.clear()
        self.queue.put( (func, args, wait_for) )

        return self.event 
Пример #33
0
class AlarmExecutor:
    def __init__(self):
        self.queue = JoinableQueue(10)
        self.running = False
        self.t = Thread(target=self._run, name="AlarmExecutor")

    def _run(self):
        while self.running:
            try:
                alarm = self.queue.get(block=True, timeout=1)
                alarm.execute()
                logging.debug("Alarm executed")
                self.queue.task_done()
            except queue.Empty:
                continue

    def start(self):
        logging.debug("Starting alarm executor")
        self.running = True
        self.t.start()

    def stop(self):
        if self.running:
            logging.debug("Stopping alarm executor")
            self.running = False
            self.t.join()
        else:
            msg = "Attempted to stop alarm executor when it is not running"
            logging.debug(msg)
Пример #34
0
def main():
    jobs = Queue()
    result = JoinableQueue()
    NUMBER_OF_PROCESSES = cpu_count()

    tasks = ["1", "2", "3", "4", "5"]

    for w in tasks:
        jobs.put(w)

    [
        Process(target=work, args=(i, jobs, result)).start()
        for i in range(NUMBER_OF_PROCESSES)
    ]

    print('starting workers')

    for t in range(len(tasks)):
        r = result.get()
        time.sleep(0.5)
        print(r)
        result.task_done()

    for w in range(NUMBER_OF_PROCESSES):
        jobs.put(None)

    result.join()
    jobs.close()
    result.close()
Пример #35
0
class ScheduleContainer(object):
    def __init__(self):
        self.schedule_container = JoinableQueue(maxsize=0)
        self.scheduler = schedule
        self._run = True
        worker = Thread(target=self.work)
        worker.start()

    def append(self, request_form):
        self.schedule_container.put(request_form)

    @staticmethod
    def task(temp):
        def inner():
            t.change_temp(temp)

        return inner

    def work(self):
        lock = Lock()

        while self._run:
            lock.acquire()
            if not self.schedule_container.empty():
                schedule_obj = self.schedule_container.get()
                job = self.scheduler.every()
                job.start_day = str(schedule_obj.day)
                job.unit = 'weeks'
                job.at(str(schedule_obj.time)).do(self.task(schedule_obj.temp))
                print('schedule made into job')
                schedule_obj.save()
                self.schedule_container.task_done()
            lock.release()
            schedule.run_pending()
            time.sleep(1)
Пример #36
0
def worker(q: JoinableQueue, i: int, output, print_lock: Lock,
           FLAGS: Tuple[Any]) -> None:
    """Retrieves files from the queue and annotates them."""
    if FLAGS.in_memory:
        with open(FLAGS.alias_db, 'rb') as f:
            alias_db = pickle.load(f)
        with open(FLAGS.relation_db, 'rb') as f:
            relation_db = pickle.load(f)
        with open(FLAGS.wiki_db, 'rb') as f:
            wiki_db = pickle.load(f)
    else:
        alias_db = SqliteDict(FLAGS.alias_db, flag='r')
        relation_db = SqliteDict(FLAGS.relation_db, flag='r')
        wiki_db = SqliteDict(FLAGS.wiki_db, flag='r')

    annotator = Annotator(alias_db,
                          relation_db,
                          wiki_db,
                          distance_cutoff=FLAGS.cutoff,
                          match_aliases=FLAGS.match_aliases,
                          unmatch=FLAGS.unmatch,
                          prune_clusters=FLAGS.prune_clusters)
    while True:
        logger.debug('Worker %i taking a task from the queue', i)
        json_data = q.get()
        if json_data is None:
            break
        annotation = annotator.annotate(json_data)
        print_lock.acquire()
        output.write(json.dumps(annotation) + '\n')
        print_lock.release()
        q.task_done()
        logger.debug('Worker %i finished a task', i)
Пример #37
0
def search8(q, path):
    jobs = Queue()
    result = JoinableQueue()
    NUMBER_OF_PROCESSES = cpu_count()

    job_count = 0
    for f in os.scandir('data'):
        jobs.put(f.path)
        job_count = job_count + 1

    [
        Process(target=work, args=(i, q, jobs, result)).start()
        for i in range(NUMBER_OF_PROCESSES)
    ]

    matches = []
    for t in range(job_count):
        r = result.get()
        result.task_done()
        if r:
            matches.append(r)

    matches.sort()

    for w in range(NUMBER_OF_PROCESSES):
        jobs.put(None)

    result.join()
    jobs.close()
    result.close()

    return matches
Пример #38
0
class AlarmExecutor:
    def __init__(self):
        self.queue = JoinableQueue(10)
        self.running = False
        self.t = Thread(target=self._run, name="AlarmExecutor")

    def _run(self):
        while self.running:
            try:
                alarm = self.queue.get(block=True, timeout=1)
                alarm.execute() 
                logging.debug("Alarm executed")
                self.queue.task_done()       
            except Queue.Empty:
                continue
            
    def start(self):
        logging.debug("Starting alarm executor")
        self.running = True
        self.t.start()

    def stop(self):
        if self.running:
            logging.debug("Stoppping alarm executor")
            self.running = False
            self.t.join()
        else:
            logging.debug("Attempted to stop alarm executor when it is not running")
Пример #39
0
class Queue:
    def __init__(self):
        self._queue = JoinableQueue()

    def put(self, element):
        if self._queue is not None:
            self._queue.put(element)

    def get(self):
        if self._queue is not None:
            try:
                return self._queue.get()
            except:
                return None

    def join(self):
        if self._queue is not None:
            self._queue.join()

    def task_done(self):
        if self._queue is not None:
            self._queue.task_done()

    def unblock_gets(self):
        if self._queue is not None:
            self._queue.close()
            self._queue = JoinableQueue()
Пример #40
0
def queueManager(numProc, myList, function, *args):
	'''queueManager(numProc, myList, function, *args):
	generic function used to start worker processes via the multiprocessing Queue object
	numProc - number of processors to use
	myList - a list of objects to be iterated over
	function - target function
	*args - additional arguments to pass to function

	Return - an unordered list of the results from myList
	'''
	qIn = Queue()
	qOut = JoinableQueue()
	if args:
		arguments = (qIn, qOut,) + args
	else:
		arguments = (qIn, qOut,)
	results = []
	
	# reduce processer count if proc count > files
	
	i = 0
	for l in myList:
		qIn.put((i,l))
		i += 1

	for _ in range(numProc):
		p = Process(target = function, args = arguments).start()
	sys.stdout.write("Progress: {:>3}%".format(0)
)
	curProgress = 0
	lastProgress = 0
	while qOut.qsize() < len(myList):
		#sys.stdout.write("\b\b\b\b{:>3}%".format(int(ceil(100*qOut.qsize()/len(myList)))))
		curProgress = int(ceil(100*qOut.qsize()/len(myList)))
		if curProgress - lastProgress > 10:
			lastProgress += 10
			sys.stdout.write("\nProgress: {:>3}%".format(lastProgress))
			sys.stdout.flush()
	sys.stdout.write("\nProgress: {:>3}%".format(100))
	#sys.stdout.write("\b\b\b\b{:>3}%".format(100))
	sys.stdout.write("\n")
	for _ in range(len(myList)):
		# indicate done results processing
		results.append(qOut.get())
		qOut.task_done()
	#tell child processes to stop
	for _ in range(numProc):
		qIn.put('STOP')

	orderedRes = [None]*len(results)
	for i, res in results:
		orderedRes[i] = res

	qOut.join()

	qIn.close()
	qOut.close()
	return orderedRes
Пример #41
0
def worker_func(in_queue: JoinableQueue, out_queue: Queue, hr_t, tr_h):
    while True:
        dat = in_queue.get()
        if dat is None:
            in_queue.task_done()
            continue
        testing_data, head_pred, tail_pred = dat
        out_queue.put(test_evaluation(testing_data, head_pred, tail_pred, hr_t, tr_h))
        in_queue.task_done()
Пример #42
0
class WorkerQueue(object):

    def __init__(self, num_workers = 20):
        self.queue = Queue()
        self.pool = []
        self._setup_workers(num_workers)

    def _setup_workers(self, num_workers):
        """ Sets up the worker threads
              NOTE: undefined behaviour if you call this again.
        """
        self.pool = []

        for _ in range(num_workers):
            self.pool.append(Thread(target=self.threadloop))

        for a_thread in self.pool:
            a_thread.setDaemon(True)
            a_thread.start()


    def do(self, f, *args, **kwArgs):
        """ puts a function on a queue for running later.
        """
        self.queue.put((f, args, kwArgs))


    def stop(self):
        """ Stops the WorkerQueue, waits for all of the threads to finish up.
        """
        self.queue.put(STOP)
        for thread in self.pool:
            thread.join()


    def threadloop(self): #, finish = False):
        """ Loops until all of the tasks are finished.
        """
        while True:
            args = self.queue.get()
            if args is STOP:
                self.queue.put(STOP)
                self.queue.task_done()
                break
            else:
                try:
                    args[0](*args[1], **args[2])
                finally:
                    # clean up the queue, raise the exception.
                    self.queue.task_done()
                    #raise


    def wait(self):
        """ waits until all tasks are complete.
        """
        self.queue.join()
Пример #43
0
def mpqueue():
    queue = JoinableQueue()
    #parent_conn, child_conn = os.pipe()
    p = Process(target=mpqueue_f, args=(queue,))
    p.start()
    data = queue.get()   # prints "[42, None, 'hello']"
    print(data.shape)
    queue.join()
    p.join()
Пример #44
0
    def apply_mt(self, xs, parallelism, **kwargs):
        """Run the UDF multi-threaded using python multiprocessing"""
        if snorkel_conn_string.startswith('sqlite'):
            raise ValueError('Multiprocessing with SQLite is not supported. Please use a different database backend,'
                             ' such as PostgreSQL.')

        # Fill a JoinableQueue with input objects
        in_queue = JoinableQueue()
        for x in xs:
            in_queue.put(x)

        # If the UDF has a reduce step, we collect the output of apply in a
        # Queue. This is also used to track progress via the the UDF sentinel
        out_queue = JoinableQueue()

        # Keep track of progress counts
        total_count = in_queue.qsize()
        count = 0

        # Start UDF Processes
        for i in range(parallelism):
            udf = self.udf_class(in_queue=in_queue, out_queue=out_queue,
                add_to_session=(self.reducer is None), **self.udf_init_kwargs)
            udf.apply_kwargs = kwargs
            self.udfs.append(udf)

        # Start the UDF processes, and then join on their completion
        for udf in self.udfs:
            udf.start()

        while any([udf.is_alive() for udf in self.udfs]) and count < total_count:
            y = out_queue.get()

            # Update progress whenever an item was processed
            if y == UDF.TASK_DONE_SENTINEL:
                count += 1
                if self.pb is not None:
                    self.pb.update(1)

            # If there is a reduce step, do now on this thread
            elif self.reducer is not None: 
                self.reducer.reduce(y, **kwargs)
                out_queue.task_done()

            else:
                raise ValueError("Got non-sentinel output without reducer.")

        if self.reducer is None:
            for udf in self.udfs:
                udf.join()
        else:
            self.reducer.session.commit()
            self.reducer.session.close()

        # Flush the processes
        self.udfs = []
Пример #45
0
class Analyzer(object):
	def __init__(self, data_root, working_dir, tpr, index=True, index_output='index.h5'):
		# list of analysis objects
		self.__analyses = []
		self.__working_dir = working_dir
		self.__fs = file_system.SH3FileSystem(data_root, index=True, index_output=index_output)
		self.__loader = loader.Loader(working_dir)
		self.__task_queue = JoinableQueue(8)
		self.__tpr = tpr

	def run(self):
		# start a queue of size max 8, block if no empty slots
		# populate the task queue with (analysis, xtc) items 
		for i in range(0, 8):
			p = Process(target=self.__worker)
			p.start()

		for batch in self.__fs.xtc_files():
			print "batch", batch
			for xtc in batch:
				for analysis in self.__analyses:
					print "queuing", analysis.name(), "and", xtc.name()
					self.__task_queue.put([analysis, xtc], True, None)

			print "waiting for these tasks to finish"
			self.__task_queue.join()
			print "tasks have finished"

			print "PID", os.getpid(), "loading analysis"
			for xtc in batch:
				for a in self.__analyses:
					self.__loader.load(a, xtc)	

	def add(self, analysis):
		self.__analyses.append(analysis)
	
	def remove(self, analysis):
		self.__analyses.append(analysis)

	def __worker(self):
		# TODO: use pool because it looks like the processes sometimes don't die if it fails
		# get one item from queue
		# block if queue is empty
		while True:
			try:
				# timeout after 30 seconds
				analysis,xtc = self.__task_queue.get(True, 30)
			except Empty:
				break
			else:
				analysis.run(xtc, self.__tpr)
				self.__task_queue.task_done()
Пример #46
0
 def parallel(self):
     from multiprocessing import Process, Queue, JoinableQueue
     self.ntrajs = []
     for i in range(self.cpus):
         self.ntrajs.append(min(int(floor(float(self.ntraj)
             /self.cpus)),
             self.ntraj-sum(self.ntrajs)))
     cnt = sum(self.ntrajs)
     while cnt<self.ntraj:
         for i in range(self.cpus):
             self.ntrajs[i] += 1
             cnt+=1
             if (cnt>=self.ntraj):
                 break
     self.ntrajs = np.array(self.ntrajs)
     self.ntrajs = self.ntrajs[np.where(self.ntrajs>0)]
     self.nprocs = len(self.ntrajs)
     sols = []
     processes = []
     resq = JoinableQueue()
     print "Number of cpus:", self.cpus
     print "Trying to start", self.nprocs, "process(es)."
     print "Number of trajectories for each process:"
     print self.ntrajs
     for i in range(self.nprocs):
         p = Process(target=self.evolve_serial,
                 args=((resq,self.ntrajs[i],i,self.seed*(i+1)),))
         p.start()
         processes.append(p)
     resq.join()
     cnt = 0
     while True:
         try:
             sols.append(resq.get())
             resq.task_done()
             cnt += 1
             if (cnt >= self.nprocs): break
         except KeyboardInterrupt:
             break
         except:
             pass
     resq.join()
     for proc in processes:
         try:
             proc.join()
         except KeyboardInterrupt:
             print("Cancel thread on keyboard interrupt")
             proc.terminate()
             proc.join()
     resq.close()
     return sols
Пример #47
0
	def __iter__(self):
		queue = JoinableQueue(maxsize=self.max_queue_size)

		n_batches, job_queue = self._start_producers(queue)

		# Run as consumer (read items from queue, in current thread)
		for x in xrange(n_batches):
			item = queue.get()
			#print queue.qsize(), "GET"
			yield item # Yield the item to the consumer (user)
			queue.task_done()

		queue.close()
		job_queue.close()
Пример #48
0
def worker(input_q: JoinableQueue, output: Queue):
    from django import db
    db.connection.close()
    while True:
        task = input_q.get()
        if task == "end":
            break
        html = urlOpen.get_html(task.url)
        if html:
            text = textParser.get_text_from_html(html)
        input_q.task_done()
        # info()
        output.put(task.url)
    print("exit")
def main():
    jobs = JoinableQueue()
    result = JoinableQueue()


    print len(datasets)
    numToProcess = 10
    scores = pd.DataFrame(columns=['precision','recall','fmeasure',
                                   'numResult','minScore','topHits',
                                   'contentWeight','relationWeight'])
    manual_annotations = get_manual_annotations(numToProcess)
    manual_tuples = get_ir_tuples(manual_annotations)

    for key in manual_annotations.keys():
        jobs.put(key)

    processed_count = Counter()
        
    for i in xrange(NUMBER_OF_PROCESSES):
        p = Process(target=work, args=(i, jobs, result, processed_count))
        p.daemon = True
        p.start()

    #work(1, jobs, result, processed_count)

    automated_annotations = {}

    jobs.join()

    while not result.empty():
        dataset, classes = result.get()
        automated_annotations[dataset] = classes
        result.task_done()

    automated_tuples = get_ir_tuples(automated_annotations)
    hits = manual_tuples & automated_tuples
    misses = manual_tuples - automated_tuples
    
    precision = float(len(hits)) / len(automated_tuples)
    recall = float(len(hits)) / len(manual_tuples)
    fmeasure = 2 * (precision * recall) / (precision + recall)
    # print '\t'.join([str(x) for x in [precision, recall, fmeasure,
    #                              numResult, minScore, topHits]])
    scores = scores.append(dict(precision=precision, recall=recall, fmeasure=fmeasure,
                                numResult=numResult, minScore=minScore, topHits=topHits,
                                contentWeight=contentWeight, relationWeight=relationWeight),
                        ignore_index=True)

    print scores
Пример #50
0
class ImageCrawler:
    
    NUM_PER_FETCH = 100
    NUM_PROCESSES = 10
    def __init__(self, database_config_path):
        self.queue = JoinableQueue()
        self.logger = Logger("image_crawler")
        self.adapter = ImageStoreAdapter(database_config_path, self.logger)
        
    def produce(self):
        while True:
            if self.queue.empty():
                for image_id, link in self.adapter.load_undownloaded_images(self.NUM_PER_FETCH):
                    self.logger.log("Producer: add new image to crawl:" + image_id + " " + link)
                    self.queue.put((image_id, link))
            time.sleep(10)
            
    def consume(self, process_id):
        while True:
            self.logger.log("Consumer process:" + str(process_id) + " fetch new image from queue")
            if not self.queue.empty():
                image_id, link = self.queue.get()
                self.logger.log("Consumer process:"+ str(process_id) + " start crawling " + str(link))
                image = common_utils.page_crawl(link)
                if image != None:
                    self.logger.log(link + "crawled successfully")
                    self.adapter.store_image(image_id, image)
                else:
                    self.logger.log(link + " failed at crawling")
                    self.adapter.update_image_status(image_id, ImageIndexStatus.DOWNLOAD_FAILED)
                self.queue.task_done()
                time.sleep(1)
            else:
                self.logger.log("Queue empty")
                time.sleep(10)
    
    def run(self):
        producer = Process(target=self.produce)
        producer.start()
        consumers = []
        for i in range(self.NUM_PROCESSES):
            consumer = Process(target=self.consume, args=(i,))
            consumers.append(consumer)
            consumer.start()
        
        for consumer in consumers:
            consumer.join()
        producer.join()
        self.queue.join()
Пример #51
0
def test_basic():
    in_queue = JoinableQueue()

    mysql_reader = Mysqlio('localhost','3600','test','root','') 
    mysql_reader.scan_and_queue(in_queue,"SELECT * FROM swallow")

    assert in_queue.qsize() == 3

    res = []
    while not in_queue.empty():
        res.append(in_queue.get())

    expected_res = [{'id':1,'libelle':'test'},{'id':2,'libelle':'john'},{'id':3,'libelle':'woo'}]

    assert res == expected_res
	def __iter__(self):
		queue = JoinableQueue(maxsize=params.N_PRODUCERS*2)

		n_batches, job_queue = self.start_producers(queue)

		# Run as consumer (read items from queue, in current thread)
		for x in xrange(n_batches):
			item = queue.get()
			#print len(item[0]), queue.qsize(), "GET"
			yield item
			queue.task_done()

		#queue.join() #Lock until queue is fully done
		queue.close()
		job_queue.close()
Пример #53
0
def main():
	try:
		result = {}
		sellers ={}
		seller_list = list()
		q = JoinableQueue()
		for x in range(len(sys.argv)-1):
			p = Process(target=fetch_feature,args=(q,sys.argv[x+1]))
			p.start()
		
		for x in range(len(sys.argv)-1):
			result.update(q.get())

		if(len(result)==0):
			print "0 Results";
			return
		total_count = len(result)
		total_price = 0
		for url in result:
			(product_name,price,seller) = result[url]
			total_price = price+total_price
			if seller not in sellers:
				sellers[seller] = list()
			sellers[seller].append((product_name,price,url))
			
		for key, value in sellers.iteritems():
		    temp = [key,value]
		    seller_list.append(temp)	

		seller_list = sorted(seller_list,key=lambda x: -len(x[1]))


		avg_price = total_price / total_count
		
		#print total_price, total_count ,avg_price
		print "<table>"
		print "<tr><th>Number of Matching Product</th><td>"+str(total_count)+"</td></tr>"
		print "<tr><th>Average Price</th><td>"+str(avg_price)+"</td> </tr>";
		print "<tr><th>Number of Matching Seller</th><td>"+str(len(sellers))+"</td></tr>"
		print "</table>"
		#print sellers
		print "<table>"
		print "<tr><th>Seller ID</th><th>Number of Matching Products</th></tr>"
		for x in seller_list:
			print "<tr><td>"+x[0].encode('utf-8')+"</td><td>"+str(len(x[1]))+"</td></tr>"
		print "</table>"
	except Exception, e:
		print e
Пример #54
0
class Multiplexer(object):
    def __init__(self, worker, writer, threads=4):
        self.worker=worker
        self.writer=writer
        self.q=JoinableQueue()
        self.done = Value(c_bool,False)
        self.consumer=Process(target=self.consume)
        self.pool = Pool(threads, init_opener)

    def start(self):
        self.done.value=False
        self.consumer.start()

    def addjob(self, url, data=None):
        params=[url]
        if data: params.append(data)
        try:
           return self.pool.apply_async(self.worker,params,callback=self.q.put)
        except:
            logger.error('[!] failed to scrape '+ url)
            logger.error(traceback.format_exc())
            raise

    def finish(self):
        self.pool.close()
        logger.info('closed pool')
        self.pool.join()
        logger.info('joined pool')
        self.done.value=True
        self.q.close()
        logger.info('closed q')
        self.consumer.join()
        logger.info('joined consumer')
        #self.q.join()
        #logger.info('joined q')

    def consume(self):
        param=[0,0]
        while True:
            job=None
            try:
                job=self.q.get(True, timeout=1)
            except Empty:
                if self.done.value==True: break
            if job:
                param = self.writer(job, param)
                self.q.task_done()
        logger.info('added/updated: %s' % param)
Пример #55
0
    def apply_mt(self, xs, parallelism, **kwargs):
        """Run the UDF multi-threaded using python multiprocessing"""
        if snorkel_conn_string.startswith('sqlite'):
            raise ValueError('Multiprocessing with SQLite is not supported. Please use a different database backend,'
                             ' such as PostgreSQL.')

        # Fill a JoinableQueue with input objects
        in_queue = JoinableQueue()
        for x in xs:
            in_queue.put(x)

        # If the UDF has a reduce step, we collect the output of apply in a Queue
        out_queue = None
        if hasattr(self.udf_class, 'reduce'):
            out_queue = JoinableQueue()

        # Start UDF Processes
        for i in range(parallelism):
            udf              = self.udf_class(in_queue=in_queue, out_queue=out_queue, **self.udf_init_kwargs)
            udf.apply_kwargs = kwargs
            self.udfs.append(udf)

        # Start the UDF processes, and then join on their completion
        for udf in self.udfs:
            udf.start()

        # If there is a reduce step, do now on this thread
        if hasattr(self.udf_class, 'reduce'):
            while any([udf.is_alive() for udf in self.udfs]):
                while True:
                    try:
                        y = out_queue.get(True, QUEUE_TIMEOUT)
                        self.reducer.reduce(y, **kwargs)
                        out_queue.task_done()
                    except Empty:
                        break
                self.reducer.session.commit()
            self.reducer.session.close()

        # Otherwise just join on the UDF.apply actions
        else:
            for i, udf in enumerate(self.udfs):
                udf.join()

        # Terminate and flush the processes
        for udf in self.udfs:
            udf.terminate()
        self.udfs = []
Пример #56
0
def LeafSet(inchan:Queue, outchan:Queue):
    """Report the distinct elements of inchan on outchan."""
    sf = set()
    logger = multiprocessing.log_to_stderr()
    logger.setLevel(logging.INFO)
    morestuff = True
    while morestuff:
        x = inchan.get()
        logger.info("Leaf:%s" % x)
        if x not in sf:
            sf.add(x)
            outchan.put(x)
        inchan.task_done()
        if x == SIGOBJ:
            morestuff = False
    logger.info("leafdone")
Пример #57
0
def printer(chan:Queue, fmt:str, destination:file):
    """A data sink that prints the contents of chan as strings to destination
    using a format string"""
    logger = multiprocessing.log_to_stderr()
    logger.setLevel(logging.INFO)
    logger.info("sink started")
    i = 0
    morestuff = True
    while morestuff:
        elt = chan.get()
        logger.info(fmt % (i, elt))
        i += 1
        chan.task_done()
        if elt == SIGOBJ:
            morestuff = False
    logger.info('printer done')
Пример #58
0
def coordinate_workers(num_workers, key_file, server_ip, file_list_filename):
  q1 = JoinableQueue()
  q2 = Queue()
  workers = []
  print "starting workers"
  for worker_idx in range(num_workers):
    q1.put((key_file, file_list_filename),)
    p = Process(target=run_worker, args=(worker_idx, server_ip, q1, q2))
    workers.append(p)
    p.start()
  print "waiting for workers to finish initialization"
  q1.join()
  print "sending start tokens"
  for worker_idx in range(num_workers): q2.put(worker_idx)
  print "waiting for results"
  results = []
  total_size = 0
  num_files = None
  total_errors = 0
  for i in range(num_workers):
    (worker_num, result, worker_total_size, worker_errors) = q1.get()
    print "Result for worker %d:\n  %s\n  %s\n  %d errors" % \
          (worker_num, result,
           _format_avg_size(worker_total_size, result.num_events,
                            result.total_duration),
           worker_errors)
    total_size += worker_total_size
    total_errors += worker_errors
    if num_files:
      assert result.num_events==num_files, \
             "Worker %d saw %d files, which does not agree with previous count of %d"%\
             (worker_num, result.num_events, num_files)
    else:
      num_files = result.num_events
    results.append(result)
  print "got all results"
  all = PerfCounter.average_counters(results)
  print all
  print _format_avg_size(float(total_size)/float(num_workers), num_files,
                         all.total_duration)
  bw = (float(total_size)/1000000.0)/all.total_duration
  print "Total Bandwidth: %.2f mb/s" % bw
  print "Total errors: %d" % total_errors
  return 0
def clear_area_around_eye(size = 256, image_dir = 'I:/AI_for_an_eyes/test/test/', target_dir = 'I:/AI_for_an_eyes/test/test_zonder_meuk_256/'):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    util.update_progress(0)


    tasks = glob.glob(image_dir+'*.jpeg')
    job_total = len(tasks)

    print 'Processing images matching ' + image_dir+ '*.jpeg'

    jobs = Queue()
    result = JoinableQueue()
    NUMBER_OF_PROCESSES = cpu_count()*2

    for im_name in tasks:
        jobs.put(im_name)

    for i in xrange(NUMBER_OF_PROCESSES):
        p = Thread(target=worker, args=(i, jobs, result, target_dir, size))
        p.daemon = True
        p.start()

    print 'Starting workers (', NUMBER_OF_PROCESSES, ')!'

    n_complete = 0
    for t in xrange(len(tasks)):
        r = result.get()
        n_complete += 1
        util.update_progress(n_complete/job_total)
        result.task_done()
        #print t, 'done'

    for w in xrange(NUMBER_OF_PROCESSES):
        jobs.put(None)

    util.update_progress(1)

    print 'Done!'
    time.sleep(1)
    result.join()
    jobs.close()
    result.close()
Пример #60
0
class FileReader(Process):
    def __init__(self, filename, buffer_size=1000):
        super(FileReader, self).__init__()
        self.filename = filename
        self.que = JoinableQueue(buffer_size)
        self.event = Event()
        self.event.set()
        self.started = Event()
        self.started.clear()

    # It's crucial to call task_done on the queue after the item was processed
    def get_queue(self):
        return self.que

    def get_event(self):
        return self.event

    def is_done(self):
        return not self.event.is_set() and self.que.empty()

    def run(self):
        self.started.set()
        self.proc()
        self.event.clear()

    def proc(self):
        with open_gz(self.filename, encoding='utf-8') as file:
            for line in file:
                self.que.put(line)

    def __iter__(self):
        self.start()
        self.started.wait()
        while not self.is_done():
            try:
                text = self.que.get(timeout=0.1)
                yield text
                self.que.task_done()
            except Empty:
                pass