Exemplo n.º 1
0
def run_with_multiprocessing(nprocesses, ntasks, niterations):
    def task(n, name):
        for i in atpbar(range(n), name=name):
            time.sleep(0.0001)

    def worker(reporter, task, queue):
        register_reporter(reporter)
        while True:
            args = queue.get()
            if args is None:
                queue.task_done()
                break
            task(*args)
            queue.task_done()

    reporter = find_reporter()
    queue = multiprocessing.JoinableQueue()
    for i in range(nprocesses):
        p = multiprocessing.Process(target=worker,
                                    args=(reporter, task, queue))
        p.start()
    for i in range(ntasks):
        name = 'task {}'.format(i)
        n = niterations[i]
        queue.put((n, name))
    for i in range(nprocesses):
        queue.put(None)
        queue.join()
    flush()
Exemplo n.º 2
0
    def close(self):
        """close the drop box

        Returns
        -------
        None
        """

        # end workers
        if self.workers:
            for i in range(len(self.workers)):
                self.task_queue.put(None)
            self.task_queue.join()
            self.workers = []

        # end logging listener
        try:
            self.logging_queue.put(None)
        except (AssertionError, ValueError):
            # the queue is already closed
            # AssertionError: Python 3.7
            # ValueError: Python 3.8+
            pass
        self.loggingListener.join()

        self.task_queue.close()
        self.result_queue.close()
        self.logging_queue.close()

        if self.progressbar:
            atpbar.flush()
Exemplo n.º 3
0
def process(n_threads, possibilities, doh_server):
    sections = len(possibilities) // n_threads
    starts = []
    ends = []
    start = 0
    end = sections
    result = {}
    threads = []

    for i in range(n_threads):
        starts.append(start)
        ends.append(end)
        start += sections
        end += sections

    for i in range(len(starts)):
        t = threading.Thread(target=worker,
                             args=(
                                 possibilities,
                                 starts[i],
                                 ends[i],
                                 i,
                                 result,
                                 doh_server,
                             ))
        threads.append(t)
        t.start()

    for t in threads:
        t.join()

    flush()

    return result
    def _updateExistingGenomes(self):

        workerQueue = mp.Queue()
        writerQueue = mp.JoinableQueue()
        reportlist = mp.Manager().list()
        shortcheckmlist = mp.Manager().list()
        tasklist = mp.Manager().list()

        for record in self.list_checkm_records:
            workerQueue.put(record)

        for _ in range(self.cpus):
            workerQueue.put(None)

        try:
            workerProc = [
                mp.Process(target=self.worker_updateExistingGenomes,
                           args=(workerQueue, i, tasklist, reportlist,
                                 shortcheckmlist, writerQueue))
                for i in range(self.cpus)
            ]
            writeProc = mp.Process(target=self.__progress,
                                   args=(len(self.list_checkm_records),
                                         writerQueue))

            writeProc.start()
            for p in workerProc:
                #print('starting', p.name, '=', p.is_alive())
                p.start()

            for p in workerProc:
                #print('stopping', p.name, '=', p.is_alive())
                p.join()

            writerQueue.put(None)
            writeProc.join()

            taskProc = []
            for i, list_sql in enumerate(tasklist):
                subproc = threading.Thread(target=self.task_sql_command,
                                           args=(list_sql, i))
                subproc.start()
                taskProc.append(subproc)
            for tp in taskProc:
                tp.join()
            flush()
        except:
            for p in workerProc:
                p.terminate()

            writeProc.terminate

        self.logger.info('We write a report')
        for reportlist_item in reportlist:
            self.report_database_update.write(reportlist_item)
        result = []
        for it in shortcheckmlist:
            result.append(it)
        return result
Exemplo n.º 5
0
def kraft_download_via_manifest(ctx, workdir=None, manifest=None,
                                equality=None, version=None, use_git=False,
                                skip_verify=False):
    """
    """
    threads = list()

    def kraft_download_component_thread(localdir=None, manifest=None,
                                        equality=ManifestVersionEquality.EQ,
                                        version=None, use_git=False,
                                        skip_verify=False,
                                        override_existing=False):
        with ctx:
            kraft_download_component(
                localdir=localdir,
                manifest=manifest,
                equality=equality,
                version=version,
                use_git=use_git,
                skip_verify=skip_verify,
                override_existing=override_existing
            )

    if workdir is None:
        localdir = manifest.localdir
    elif manifest.type == ComponentType.CORE:
        localdir = os.path.join(workdir, manifest.type.workdir)
    else:
        localdir = os.path.join(workdir, manifest.type.workdir, manifest.name)

    thread = ErrorPropagatingThread(
        target=kraft_download_component_thread,
        kwargs={
            'localdir': localdir,
            'manifest': manifest,
            'equality': equality,
            'version': version,
            'use_git': use_git,
            'skip_verify': skip_verify
        }
    )
    threads.append((manifest, thread))
    thread.start()

    for manifest, thread in threads:
        try:
            thread.join()
        except Exception as e:
            logger.error("Error pulling manifest: %s " % e)

            if ctx.obj.verbose:
                import traceback
                logger.error(traceback.format_exc())

    if sys.stdout.isatty():
        flush()
    def _checkPathorRemoveRecord(self):

        workerQueue = mp.Queue()
        writerQueue = mp.Queue()
        reportlist = mp.Manager().list()
        tasklist = mp.Manager().list()

        for record in self.dict_existing_records:
            workerQueue.put(record)

        for _ in range(self.cpus):
            workerQueue.put(None)

        # try:
        workerProc = [
            mp.Process(target=self.worker_checkPathorRemoveRecord,
                       args=(workerQueue, i, tasklist, reportlist,
                             writerQueue)) for i in range(self.cpus)
        ]
        writeProc = mp.Process(target=self.__progress,
                               args=(len(self.dict_existing_records),
                                     writerQueue))

        writeProc.start()

        for p in workerProc:
            p.start()

        for p in workerProc:
            p.join()

        writerQueue.put(None)
        writeProc.join()

        taskProc = []
        for i, list_sql in enumerate(tasklist):
            subproc = threading.Thread(target=self.task_sql_command,
                                       args=(list_sql, i))
            subproc.start()
            taskProc.append(subproc)
        for tp in taskProc:
            tp.join()
        flush()

        #=========================================================================
        #         except:
        #             for p in workerProc:
        #                 p.terminate()
        #
        #             writeProc.terminate
        #=========================================================================

        self.logger.info('We write a report')
        for reportlist_item in reportlist:
            self.report_database_update.write(reportlist_item)
Exemplo n.º 7
0
def run_with_threading(nthreads=3, niterations=[5, 5, 5]):
    def task(n, name):
        for i in atpbar(range(n), name=name):
            time.sleep(0.0001)

    threads = []
    for i in range(nthreads):
        name = 'thread {}'.format(i)
        n = niterations[i]
        t = threading.Thread(target=task, args=(n, name))
        t.start()
        threads.append(t)
    for t in threads:
        t.join()
    flush()
Exemplo n.º 8
0
    def close(self):
        """close the drop box

        Returns
        -------
        None
        """

        # end workers
        if self.workers:
            for i in range(len(self.workers)):
                self.task_queue.put(None)
            self.task_queue.join()
            self.workers = [ ]

        atpbar.flush()
Exemplo n.º 9
0
    def receive(self):
        """Return pairs of task indices and results.

        This method waits until all tasks finish.
        """

        messages = [ ] # a list of (task_idx, result)
        while self.n_ongoing_tasks >= 1:
            messages.extend(self._receive_finished())
            time.sleep(0.0001)

        # sort in the order of task_idx
        messages = sorted(messages, key=itemgetter(0))

        atpbar.flush()

        return messages
Exemplo n.º 10
0
    def run_with_threading(nthreads=3,
                           niterations=[5, 5, 5],
                           time_starting_task=0):
        def task(n, name, time_starting):
            time.sleep(
                time_starting)  # When starting time is long, the loop in
            # the main thread might already end by the
            # time the loop in this task starts.
            for i in atpbar(range(n), name=name):
                time.sleep(0.0001)

        threads = []
        for i in atpbar(
                range(nthreads)):  # `atpbar` is used here while `atpbar`
            # is also used in threads being
            # launched in this loop. If none of
            # the `atpbar`s in threads has started
            # by the end of this loop, the
            # `atpbar` for this loop waits until
            # the progress bar for this loop
            # finish updating. Otherwise, progress
            # bars from threads are being updated
            # together with the progress bar for
            # this loop and the `atpbar` will not
            # wait.

            name = 'thread {}'.format(i)
            n = niterations[i]
            t = threading.Thread(target=task,
                                 args=(n, name, time_starting_task))
            t.start()
            threads.append(t)

            time.sleep(0.01)
            # sleep sometime so this loop doesn't end too quickly. Without this
            # sleep, this loop could end before an `atpbar` in any of the
            # threads start even if `time_starting_task` is zero.

        for t in threads:
            t.join()

        flush()
Exemplo n.º 11
0
            f.write(item + '\n')

            end_time = time.time()
            elapsed_time = end_time - start_time

            if elapsed_time < seconds_to_wait_between_requests:
                time.sleep(seconds_to_wait_between_requests - elapsed_time)


    return filename

reporter = find_reporter()

with Pool(processes = num_processes, initializer = register_reporter, initargs = [reporter]) as pool:
    item_part_files = pool.map(job, split_ids_to_fetch)
    flush()

formatted_date = datetime.utcnow().date().strftime("%Y_%m_%d")
subprocess.run(f'cat data/* > data/all_items_{formatted_date}.json', shell = True, check = True)

all_items_full_path = f'{os.getcwd()}/data/all_items_{formatted_date}.json'

try:
    # cleanup any previously staged files
    cur.execute('remove @load_db.hackernews.%items;')

    # the table stage is an implicit stage created for every table so no need to create it
    # snowflake put will auto_compress by default into gz
    cur.execute(f'put file://{all_items_full_path} @load_db.hackernews.%items;')

    cur.execute('truncate load_db.hackernews.items;')