Exemplo n.º 1
0
 def _jobs_to_queue(self, jobs):
     queue = mp.JoinableQueue()
     for idx, job in enumerate(jobs):
         queue.put((idx, job))
     for _ in range(self.workers):
         queue.put(None)
     return queue
Exemplo n.º 2
0
    def __init__(self, branches, domain, n_iters, executables, research_path,
                 update_config, update_domain, n_updates):
        self.branches = branches
        self.domain = domain
        self.n_iters = n_iters
        self.executables = executables
        self.research_path = research_path

        if update_config is not None and update_config['cache'] > 0:
            update_config['function'] = lru_cache(
                maxsize=update_config['cache'])(update_config['function'])

        self.update_config = update_config

        self.n_branches = branches if isinstance(branches,
                                                 int) else len(branches)

        self.domain = domain
        self.update_domain = update_domain
        self.n_updates = n_updates
        self.update_idx = 0

        self._domain_size = self.domain.size
        if self.update_domain is not None:
            self.domain.set_update(**self.update_domain)

        self.generator = self._generate_config(self.domain)

        self._queue = mp.JoinableQueue()

        self.generated_jobs = 0
Exemplo n.º 3
0
 def start(self):
     ''' Create tasks and results queues, and start consumers. '''
     mp.freeze_support()
     self.tasks = mp.JoinableQueue()
     self.results = mp.Queue()
     self.consumers = [
         Consumer(self.tasks, self.results)
         for i in range(self.getNConsumers())
     ]
     for c in self.consumers:
         c.start()
Exemplo n.º 4
0
 def __init__(self, measureFlowchart, numberProc=1):
     """
     Object for parallel processing and preprocessing of image frames
     """
     # Flowchart object, queues and processes
     self.measureFlowchart = measureFlowchart
     self.input_queue = multiprocessing.JoinableQueue(1)
     self.output_queue = multiprocessing.Queue()
     self.numberProc = numberProc
     self.processes = [
         ProcessQueue(self.input_queue, self.output_queue)
         for _ in range(self.numberProc)
     ]
Exemplo n.º 5
0
    def __init__(self, env_id, make_env, reward_predictor, num_workers, max_timesteps_per_episode, seed):
        self.num_workers = num_workers
        self.predictor = reward_predictor

        self.tasks_q = multiprocess.JoinableQueue()
        self.results_q = multiprocess.Queue()

        self.actors = []
        for i in range(self.num_workers):
            new_seed = seed * 1000 + i  # Give each actor a uniquely seeded env
            self.actors.append(Actor(self.tasks_q, self.results_q, env_id, make_env, new_seed, max_timesteps_per_episode))

        for a in self.actors:
            a.start()

        # we will start by running 20,000 / 1000 = 20 episodes for the first iteration  TODO OLD
        self.average_timesteps_in_episode = 1000
Exemplo n.º 6
0
    def run(self, jobs_queue, bar=False):
        """ Run disributor and workers.

        Parameters
        ----------
        jobs_queue : DynamicQueue of tasks

        n_iters : int or None

        logfile : str (default: 'research.log')

        errorfile : str (default: 'errors.log')

        bar : bool or callable

        args, kwargs
            will be used in worker
        """
        self.jobs_queue = jobs_queue

        if isinstance(bar, bool):
            bar = tqdm if bar else _DummyBar()

        self.logger.info('Distributor [id:{}] is preparing workers'.format(
            os.getpid()))

        if isinstance(self.workers, int):
            workers = [
                self.worker_class(devices=self.devices[i],
                                  worker_name=i,
                                  timeout=self.timeout,
                                  trials=self.trials,
                                  logger=self.logger)
                for i in range(self.workers)
            ]
        else:
            workers = [
                self.worker_class(devices=self.devices[i],
                                  worker_name=i,
                                  timeout=self.timeout,
                                  trials=self.trials,
                                  logger=self.logger,
                                  worker_config=worker_config)
                for i, worker_config in enumerate(self.workers)
            ]
        try:
            self.logger.info('Create queue of jobs')
            self.results = mp.JoinableQueue()
        except Exception as exception:  #pylint:disable=broad-except
            self.logger.error(exception)
        else:
            if len(workers) > 1:
                msg = 'Run {} workers'
            else:
                msg = 'Run {} worker'
            self.logger.info(msg.format(len(workers)))
            for worker in workers:
                try:
                    mp.Process(target=worker,
                               args=(self.jobs_queue, self.results)).start()
                except Exception as exception:  #pylint:disable=broad-except
                    self.logger.error(exception)
            previous_domain_jobs = 0
            n_updates = 0
            finished_iterations = dict()
            with bar(total=None) as progress:
                while True:
                    n_jobs = self.jobs_queue.next_jobs(len(workers) + 1)
                    jobs_in_queue = n_jobs
                    finished_jobs = 0
                    rest_of_generator = 0
                    while finished_jobs != jobs_in_queue:
                        progress.set_description('Domain updated: ' +
                                                 str(n_updates))

                        estimated_size = self.jobs_queue.total
                        if estimated_size is not None:
                            total = rest_of_generator + previous_domain_jobs + estimated_size
                            if self.n_iters is not None:
                                total *= self.n_iters
                            progress.total = total
                        signal = self.results.get()
                        if self.n_iters is not None:
                            finished_iterations[signal.job] = signal.iteration
                        if signal.done:
                            finished_jobs += 1
                            finished_iterations[signal.job] = self.n_iters
                            each = self.jobs_queue.domain.update_each
                            if isinstance(each,
                                          int) and finished_jobs % each == 0:
                                was_updated = self.jobs_queue.update()
                                if was_updated:
                                    rest_of_generator = jobs_in_queue
                                n_updates += was_updated
                            if n_jobs > 0:
                                n_jobs = self.jobs_queue.next_jobs(1)
                                jobs_in_queue += n_jobs
                        if self.n_iters is not None:
                            progress.n = sum(finished_iterations.values())
                        else:
                            progress.n += signal.done
                        progress.refresh()
                    if self.jobs_queue.domain.update_each == 'last':
                        was_updated = self.jobs_queue.update()
                        n_updates += 1
                        if not was_updated:
                            break
                    else:
                        self.jobs_queue.stop_workers(len(workers))
                        self.jobs_queue.join()
                        break
                    previous_domain_jobs += finished_jobs
        self.logger.info('All workers have finished the work')
        logging.shutdown()
Exemplo n.º 7
0
    def __call__(self, queue, results):
        """ Run worker.

        Parameters
        ----------
        queue : multiprocessing.Queue
            queue of jobs for worker
        results : multiprocessing.Queue
            queue for feedback
        """
        _devices = [item['device'] for item in self.devices]
        self.logger.info('Start {} [id:{}] (devices: {})'.format(
            self.worker_name, os.getpid(), _devices))

        try:
            job = queue.get()
        except Exception as exception:  #pylint:disable=broad-except
            self.logger.error(exception)
        else:
            while job is not None:
                try:
                    finished = False
                    self.logger.info(self.worker_name +
                                     ' is creating process for Job ' +
                                     str(job[0]))
                    for trial in range(self.trials):
                        one_job_queue = mp.JoinableQueue()
                        one_job_queue.put(job)
                        feedback_queue = mp.JoinableQueue()
                        last_update_time = mp.Value('d', time.time())

                        task = mp.Process(target=self._run_task,
                                          args=(one_job_queue, feedback_queue,
                                                trial, last_update_time))
                        task.start()
                        pid = feedback_queue.get()
                        final_signal = Signal(worker=self.worker_name,
                                              job=job[0],
                                              iteration=0,
                                              n_iters=job[1].n_iters,
                                              trial=trial,
                                              done=False,
                                              exception=None)

                        while True:
                            try:
                                signal = feedback_queue.get(timeout=1)
                            except EmptyException:
                                signal = None
                            if signal is None:
                                execution_time = (time.time() -
                                                  last_update_time.value) / 60
                                if self.timeout is not None and execution_time > self.timeout:
                                    p = psutil.Process(pid)
                                    p.terminate()
                                    message = f'Job {job[0]} [{pid}] failed in {self.worker_name} because of timeout'
                                    self.logger.info(message)
                                    final_signal.exception = TimeoutError(
                                        message)
                                    results.put(copy(final_signal))
                                    break
                            if signal is not None and signal.done:
                                finished = True
                                final_signal = signal
                                break
                            if signal is not None:
                                final_signal = signal
                                results.put(copy(final_signal))
                        if finished:
                            break
                except Exception as exception:  #pylint:disable=broad-except
                    self.logger.error(exception)
                    final_signal.exception = exception
                    results.put(copy(final_signal))
                if final_signal.done:
                    results.put(copy(final_signal))
                else:
                    final_signal.exception = RuntimeError(
                        'Job {} [{}] failed {} times in {}'.format(
                            job[0], pid, self.trials, self.worker_name))
                    final_signal.done = True
                    results.put(copy(final_signal))
                queue.task_done()
                job = queue.get()
            queue.task_done()
Exemplo n.º 8
0
    def run(self,
            jobs,
            dirname,
            n_jobs,
            n_iters,
            logfile=None,
            errorfile=None,
            progress_bar=False,
            *args,
            **kwargs):
        """ Run disributor and workers.

        Parameters
        ----------
        jobs : iterable

        dirname : str

        logfile : str (default: 'research.log')

        errorfile : str (default: 'errors.log')

        progress_bar : bool

        args, kwargs
            will be used in worker
        """
        self.logfile = logfile or 'research.log'
        self.errorfile = errorfile or 'errors.log'

        self.logfile = os.path.join(dirname, self.logfile)
        self.errorfile = os.path.join(dirname, self.errorfile)

        kwargs['logfile'] = self.logfile
        kwargs['errorfile'] = self.errorfile

        self.log_info('Distributor [id:{}] is preparing workers'.format(
            os.getpid()),
                      filename=self.logfile)

        if isinstance(self.workers, int):
            workers = [
                self.worker_class(gpu=self._get_worker_gpu(self.workers, i),
                                  worker_name=i,
                                  timeout=self.timeout,
                                  trials=self.trials,
                                  *args,
                                  **kwargs) for i in range(self.workers)
            ]
        else:
            workers = [
                self.worker_class(gpu=self._get_worker_gpu(
                    len(self.workers), i),
                                  worker_name=i,
                                  config=config,
                                  timeout=self.timeout,
                                  trials=self.trials,
                                  *args,
                                  **kwargs)
                for i, config in enumerate(self.workers)
            ]
        try:
            self.log_info('Create queue of jobs', filename=self.logfile)
            self.queue = self._jobs_to_queue(jobs)
            self.results = mp.JoinableQueue()
        except Exception as exception:  #pylint:disable=broad-except
            logging.error(exception, exc_info=True)
        else:
            if len(workers) > 1:
                msg = 'Run {} workers'
            else:
                msg = 'Run {} worker'
            self.log_info(msg.format(len(workers)), filename=self.logfile)
            for worker in workers:
                worker.log_info = self.log_info
                worker.log_error = self.log_error
                try:
                    mp.Process(target=worker,
                               args=(self.queue, self.results)).start()
                except Exception as exception:  #pylint:disable=broad-except
                    logging.error(exception, exc_info=True)

            self.answers = [0 for _ in range(n_jobs)]
            self.finished_jobs = []

            if progress_bar:
                if n_iters is not None:
                    print(
                        "Distributor has {} jobs with {} iterations. Totally: {}"
                        .format(n_jobs, n_iters, n_jobs * n_iters))
                    with tqdm(total=n_jobs * n_iters) as progress:
                        while True:
                            position = self._get_position()
                            progress.n = position
                            progress.refresh()
                            if len(self.finished_jobs) == n_jobs:
                                break
                else:
                    print("Distributor has {} jobs".format(n_jobs))
                    with tqdm(total=n_jobs) as progress:
                        while True:
                            answer = self.results.get()
                            if answer.done:
                                self.finished_jobs.append(answer.job)
                            position = len(self.finished_jobs)
                            progress.n = position
                            progress.refresh()
                            if len(self.finished_jobs) == n_jobs:
                                break
            else:
                self.queue.join()
        self.log_info('All workers have finished the work',
                      filename=self.logfile)
        logging.shutdown()
Exemplo n.º 9
0
    def __call__(self, queue, results):
        """ Run worker.

        Parameters
        ----------
        queue : multiprocessing.Queue
            queue of jobs for worker
        results : multiprocessing.Queue
            queue for feedback
        """
        _gpu = 'default' if len(self.gpu) == 0 else self.gpu
        self.log_info('Start {} [id:{}] (gpu: {})'.format(
            self.name, os.getpid(), _gpu),
                      filename=self.logfile)

        if len(self.gpu) > 0:
            os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(
                [str(gpu) for gpu in self.gpu])

        try:
            job = queue.get()
        except Exception as exception:  #pylint:disable=broad-except
            self.log_error(exception, filename=self.errorfile)
        else:
            while job is not None:
                try:
                    finished = False
                    self.log_info(self.name + ' is creating process for Job ' +
                                  str(job[0]),
                                  filename=self.logfile)
                    for trial in range(self.trials):
                        sub_queue = mp.JoinableQueue()
                        sub_queue.put(job)
                        feedback_queue = mp.JoinableQueue()

                        worker = mp.Process(target=self._run_job,
                                            args=(sub_queue, feedback_queue,
                                                  self.name, trial))
                        worker.start()
                        pid = feedback_queue.get()
                        silence = 0
                        default_signal = Signal(self.name, job[0], 0,
                                                job[1].n_iters, trial, False,
                                                None)

                        while True:
                            try:
                                signal = feedback_queue.get(timeout=1)
                            except Empty:
                                signal = None
                                silence += 1
                            if signal is None and silence / 60 > self.timeout:
                                p = psutil.Process(pid)
                                p.terminate()
                                message = 'Job {} [{}] failed in {}'.format(
                                    job[0], pid, self.name)
                                self.log_info(message, filename=self.logfile)
                                default_signal.exception = TimeoutError(
                                    message)
                                results.put(default_signal)
                                break
                            elif signal is not None and signal.done:
                                finished = True
                                default_signal = signal
                                break
                            elif signal is not None:
                                default_signal = signal
                                results.put(default_signal)
                                silence = 0
                        if finished:
                            break
                except Exception as exception:  #pylint:disable=broad-except
                    self.log_error(exception, filename=self.errorfile)
                    default_signal.exception = exception
                    results.put(default_signal)
                if default_signal.done:
                    results.put(default_signal)
                else:
                    default_signal.exception = RuntimeError(
                        'Job {} [{}] failed {} times in {}'.format(
                            job[0], pid, self.trials, self.name))
                    results.put(default_signal)
                queue.task_done()
                job = queue.get()
            queue.task_done()
Exemplo n.º 10
0
def fetchseq(ids,
             species,
             write=False,
             output_name='',
             delim='\t',
             id_type='brute',
             server=None,
             source="SQL",
             database="bioseqdb",
             database_path=None,
             host='localhost',
             driver='psycopg2',
             version='1.0',
             user='******',
             passwd='',
             email='',
             batch_size=50,
             output_type="fasta",
             verbose=1,
             n_threads=1,
             n_subthreads=1,
             add_length=(0, 0),
             indent=0):
    if isgenerator(ids):
        if verbose > 1:
            print('Received generator!', indent=indent)
    elif isinstance(ids, list):
        if verbose > 1:
            print('Received list!', indent=indent)
    else:
        if verbose > 1:
            print('Reading ID File... ', indent=indent)
        with ids.open('w') as in_handle:
            id_prelist = [line.strip() for line in in_handle
                          ]  # list of each line in the file
            print('Done!', indent=indent)
        ids = [id_item for id_item in filter(None, id_prelist) if id_item]
        if not id_prelist or id_prelist is None:
            if verbose:
                print('id_prelist is empty!', indent=indent)
            return 'None'
    for id_item in ids:
        assert len(id_item) == 12, (
            "Item {0} in id_list has {1} items, not 5!\n"
            "Format should be: "
            "chr, (start,end), id, score, strand, thickStart, thickEnd, rgb, blockcount,"
            " blockspans, blockstarts, query_span"
            "!").format(
                " ".join((" ".join(item) if not isinstance(item, str) else item
                          for item in id_item)), len(id_item))
    if verbose > 1:
        print('Readied ids!', indent=indent)

    id_list = multiprocessing.JoinableQueue()
    results = multiprocessing.Queue()

    if 'sql' in source.lower():
        if server is None:
            try:
                if verbose > 1:
                    print('No server received, opening server...',
                          indent=indent)
                server = BioSeqDatabase.open_database(driver=driver,
                                                      user=user,
                                                      passwd=passwd,
                                                      host=host,
                                                      database=database)
                if verbose > 1:
                    print('Done!', indent=indent)
            except Exception as err:
                if verbose > 1:
                    print('Failed to open server!', indent=indent)
                    print(str(type(err)), err, sep=' ', indent=indent)
                raise
        else:
            if verbose > 1:
                print('Received server handle:', indent=indent)
                print(server, indent=indent)
            if verbose > 2:
                print('Please note the sub_databases of server:\n\t',
                      [str(i) for i in server.keys()],
                      indent=indent)
    elif source.lower() in ['fasta', '2bit', 'twobit']:
        print('Search type: ', source, indent=indent)
    else:
        raise SearchEngineNotImplementedError(
            'Search using source {} has not yet been implemented!'.format(
                source))
    if verbose > 1:
        print('Creating FecSeq Processes...', indent=indent)
    fs_instances = [
        FetchSeqMP(id_queue=id_list,
                   seq_out_queue=results,
                   delim=delim,
                   id_type=id_type,
                   server=server,
                   species=species,
                   source=source,
                   database=database,
                   database_path=database_path,
                   host=host,
                   driver=driver,
                   version=version,
                   user=user,
                   passwd=passwd,
                   email=email,
                   output_type=output_type,
                   batch_size=batch_size,
                   verbose=verbose,
                   n_subthreads=n_subthreads,
                   add_length=add_length,
                   indent=indent + 1) for _ in range(n_threads)
    ]
    if verbose > 1:
        print('Done! Starting processes...', indent=indent)
    for fs in fs_instances:
        fs.start()
    if verbose > 1:
        print('Done!', indent=indent)
        print('Assigning FetchSeq records to queue... ', indent=indent)
    id_order = []
    for i, id_rec in enumerate(ids):
        try:
            id_order.append("{0}:{1}-{2}".format(id_rec[0], id_rec[1][0],
                                                 id_rec[1][1]))
        except IndexError:
            id_order.append("{0}".format(id_rec[0]))
        try:
            id_list.put(FetchSeq(id_rec=id_rec))
        except AssertionError as err:
            print(i, type(err), err, sep=' ')
            break
    for _ in fs_instances:
        id_list.put(None)
    if verbose > 1:
        print('Done!', indent=indent)
    output_dict = dict()
    missing_items_list = list()
    if verbose > 1:
        print('Getting sequences from processes... ', indent=indent)
    n_jobs = len(ids)
    while n_jobs:
        seq, missing = results.get()
        output_dict[seq[0]] = seq[1]
        missing_items_list.append(missing)
        n_jobs -= 1
    if verbose > 1:
        print('Done! Finished fetching sequences!', indent=indent)
        print('Closing processes!', indent=indent)
    for fs in fs_instances:
        if fs.is_alive():
            fs.join()
    output_list = [output_dict[i] for i in id_order if i in output_dict]
    if write:
        SeqIO.write(output_list, output_name, output_type)
        return
    else:
        if missing_items_list == [None]:
            missing_items_list = None
        return output_list, missing_items_list
Exemplo n.º 11
0
def biosql_get_record(id_list,
                      sub_db_name,
                      passwd='',
                      id_type='accession',
                      driver="psycopg2",
                      indent=0,
                      user="******",
                      host="localhost",
                      database="bioseqdb",
                      num_proc=2,
                      verbose=True,
                      server=None):
    """

    :param sub_db_name:
    :param passwd:
    :param id_list:
    :param id_type:
    :param driver:
    :param indent:
    :param user:
    :param host:
    :param database:
    :param num_proc:
    :param verbose:
    :param server:
    :return:
    if __name__ == '__main__':
        biosql_get_record(sub_db_name='MyoLuc2.0', passwd='',
                             id_list=['NW_005871148', 'NW_005871300', 'NW_005871148'], id_type='accession',
                             driver="psycopg2", user="******",
                             host="localhost", database="bioseqdb", verbose=True)
    """
    idents = multiprocessing.JoinableQueue()
    results = multiprocessing.Queue()

    # num = multiprocessing.cpu_count() * 2
    if verbose > 2:
        print('\tStarting biosql_get_record_mp', indent=indent)
    id_list = id_list if isinstance(id_list, list) else [id_list]
    num_jobs = len(id_list)
    seqdict = dict()
    getseqs = [
        GetSeqMP(idents,
                 results,
                 database=database,
                 host=host,
                 driver=driver,
                 user=user,
                 passwd=passwd,
                 sub_db_name=sub_db_name,
                 verbose=verbose,
                 server=server) for _ in range(num_proc)
    ]
    for gs in getseqs:
        gs.start()

    for item in id_list:
        idents.put(
            BioSeqLookupCascade(id_type=id_type,
                                identifier=item,
                                verbose=verbose,
                                indent=indent))

    for i in range(num_proc):
        idents.put(None)

    while num_jobs:
        temp = results.get()
        print(temp, indent=indent)
        temp[1].name = temp[0]
        seqdict[temp[0]] = temp[1]
        num_jobs -= 1
    if verbose:
        print('Done with biosql_get_record_mp!', indent=indent)
        print('Closing processes!', indent=indent)
    for gs in getseqs:
        if gs.is_alive():
            gs.join()

    itemsnotfound = [i for i in id_list if i not in seqdict.keys()]

    return seqdict, itemsnotfound