Exemplo n.º 1
0
    def __init__(self,
                 hosts=[],
                 channel_type="mpi",
                 preamble=None,
                 retry_jobs=True,
                 no_wait=True,
                 verbose=True,
                 max_retries=2,
                 use_threading=False):
        self.hosts = []
        self.job_list = deque()
        self.idle_codes = []
        self.retry_jobs = retry_jobs
        self.max_retries = max_retries
        self._finished_jobs = deque()
        self.preamble = preamble
        self.pool = AsyncRequestsPool()
        self.number_available_codes = 0
        self.number_starting_codes = 0
        self.no_wait = no_wait
        self.last_finished_job = None
        self.use_threading = use_threading
        self.verbose = verbose
        if self.verbose:
            print("AMUSE JobServer launching")

        self.add_hosts(hosts=hosts, channel_type=channel_type)
Exemplo n.º 2
0
    def test22(self):

        pool = AsyncRequestsPool()

        x = self.ForTestingInterface()
        y = self.ForTestingInterface()
        request1 = x.sleep.asynchronous(0.5)
        request2 = y.sleep.asynchronous(1.5)
        finished_requests = []

        def handle_result(request, index):
            self.assertTrue(request.is_result_available())
            finished_requests.append(index)

        pool.add_request(request1, handle_result, [1])
        pool.add_request(request2, handle_result, [2])

        pool.wait()
        self.assertEquals(len(finished_requests), 1)
        self.assertEquals(len(pool), 1)

        pool.wait()
        self.assertEquals(len(finished_requests), 2)
        self.assertEquals(len(pool), 0)

        self.assertTrue(request1.is_result_available())
        self.assertTrue(request2.is_result_available())

        self.assertEquals(request1.result(), 0)
        self.assertEquals(request2.result(), 0)

        y.stop()
        x.stop()
Exemplo n.º 3
0
def step_les_models(model_time, work_queue, offset=les_spinup):
    global errorFlag
    les_wall_times = []
    if not any(les_models):
        return les_wall_times
    if les_queue_threads >= len(
            les_models):  # Step all dales models in parallel
        if async_evolve:  # evolve all dales models with asynchronous Amuse calls
            reqs = []
            pool = AsyncRequestsPool()
            for les in les_models:
                req = les.evolve_model.asynchronous(model_time +
                                                    (offset | units.s),
                                                    exactEnd=True)
                reqs.append(req)
                pool.add_request(req)
            # now while the dales threads are working, sync the netcdf to disk
            spio.sync_root()
            # wait for all threads
            pool.waitall()
            try:
                les_wall_times = [r.result().value_in(units.s) for r in reqs]
                log.info("async step_les_models() done. Elapsed times:" +
                         str(['%5.1f' % t for t in les_wall_times]))
            except Exception as e:
                log.error("Exception caught while gathering results: %s" %
                          e.message)

        else:  # evolve all dales models using python threads
            threads = []
            for les in les_models:
                t = threading.Thread(target=step_les,
                                     args=(les, model_time, offset),
                                     name=str(les.grid_index))
                # t.setDaemon(True)
                threads.append(t)
                t.start()
            # now while the dales threads are working, sync the netcdf to disk
            spio.sync_root()
            # wait for all threads
            for t in threads:
                # log.info("Waiting to join thread %s..." % t.name)
                t.join()
            # log.info("joined thread %s" % t.name)
    elif les_queue_threads > 1:
        for les in les_models:
            work_queue.put((les, model_time))  # enqueue all dales instances
        # now while the dales threads are working, sync the netcdf to disk
        spio.sync_root()
        work_queue.join()  # wait for all dales work to be completed
        if errorFlag:
            log.info("One thread failed - exiting ...")
            # stop_worker_threads(work_queue)  #  signal worker threads to quit - now an atexit function, should not
            # need it here
            finalize()
            sys.exit(1)
    else:  # sequential version
        for les in les_models:
            step_les(les, model_time, offset)
    return les_wall_times
Exemplo n.º 4
0
    def test29(self):

        pool = AsyncRequestsPool()

        x = self.ForTestingInterface()
        y = self.ForTestingInterface()
        sequenced_requests_indices = []

        def next_request(index):
            if index < 4:
                sequenced_requests_indices.append(index)
                return x.sleep.asynchronous(0.5)
            else:
                return None

        request1 = ASyncRequestSequence(next_request)
        request2 = y.sleep.asynchronous(1.0)
        finished_requests = []

        def handle_result(request, index):
            self.assertTrue(request.is_result_available())
            self.assertTrue(request.is_finished)
            finished_requests.append(index)

        pool.add_request(request1, handle_result, [1])
        pool.add_request(request2, handle_result, [2])

        pool.wait()
        self.assertEquals(len(finished_requests), 1)
        self.assertEquals(len(pool), 1)
        self.assertEquals(finished_requests, [2])
        self.assertTrue(len(sequenced_requests_indices) > 0)

        pool.wait()
        self.assertEquals(len(finished_requests), 2)
        self.assertEquals(len(pool), 0)
        x.sleep(0.1)
        self.assertEquals(sequenced_requests_indices, [0, 1, 2, 3])

        self.assertTrue(request1.is_result_available())
        self.assertTrue(request2.is_result_available())

        self.assertEquals(request1.result(), [0, 0, 0, 0])
        self.assertEquals(request2.result(), 0)

        y.stop()
        x.stop()
Exemplo n.º 5
0
    def test23(self):

        pool = AsyncRequestsPool()

        x = ForTestingInterface(channel_type='sockets')
        y = ForTestingInterface(channel_type='sockets')
        request1 = x.sleep.asynchronous(0.2)
        request2 = y.sleep.asynchronous(0.2)
        finished_requests = []

        def handle_result(request, index):
            self.assertTrue(request.is_result_available())
            finished_requests.append(index)

        pool.add_request(request1, handle_result, [1])
        pool.add_request(request2, handle_result, [2])

        time.sleep(1.0)

        pool.wait()
        pool.wait()

        self.assertEquals(len(finished_requests), 2)
        self.assertEquals(len(pool), 0)

        self.assertTrue(request1.is_result_available())
        self.assertTrue(request2.is_result_available())

        self.assertEquals(request1.result(), 0)
        self.assertEquals(request2.result(), 0)

        pool.wait()
        self.assertEquals(len(pool), 0)

        y.stop()
        x.stop()
Exemplo n.º 6
0
    def test25(self):
        """ more test of pool: calls of same code """
        from amuse.rfi.async_request import AsyncRequestsPool
        instance1 = ForTesting(self.exefile)

        r1 = instance1.do_sleep(1, return_request=True)
        r2 = instance1.echo_int(2, return_request=True)

        p1 = AsyncRequestsPool()
        r1.wait()
        r2.wait()
        p1.add_request(r1)
        p1.add_request(r2)

        #~ p1=r1.join(r2)

        p1.waitall()

        self.assertEqual(r2.result(), 2)

        instance1.stop()
Exemplo n.º 7
0
class JobServer(object):
    def __init__(self,
                 hosts=[],
                 channel_type="mpi",
                 preamble=None,
                 retry_jobs=True,
                 no_wait=True,
                 verbose=True,
                 max_retries=2,
                 use_threading=False):
        self.hosts = []
        self.job_list = deque()
        self.idle_codes = []
        self.retry_jobs = retry_jobs
        self.max_retries = max_retries
        self._finished_jobs = deque()
        self.preamble = preamble
        self.pool = AsyncRequestsPool()
        self.number_available_codes = 0
        self.number_starting_codes = 0
        self.no_wait = no_wait
        self.last_finished_job = None
        self.use_threading = use_threading
        self.verbose = verbose
        if self.verbose:
            print("AMUSE JobServer launching")

        self.add_hosts(hosts=hosts, channel_type=channel_type)

    def no_hosts(self):
        if self.number_available_codes == 0 and self.number_starting_codes == 0:
            return True
        return False

    def add_hosts(self, hosts=[], channel_type="mpi"):
        self.hosts.append(hosts)
        if self.verbose:
            print("JobServer: connecting %i hosts" % len(hosts))
        if not self.use_threading:
            for host in hosts:
                self.number_starting_codes += 1
                self._startup(channel_type=channel_type,
                              hostname=host,
                              label=host,
                              copy_worker_code=True,
                              redirection="none")
        else:
            threads = []
            for host in hosts:
                kwargs = dict(channel_type=channel_type,
                              hostname=host,
                              label=host,
                              copy_worker_code=True,
                              redirection="none")
                threads.append(
                    threading.Thread(target=self._startup, kwargs=kwargs))
            for thread in threads:
                self.number_starting_codes += 1
                thread.daemon = True
                thread.start()
            if not self.no_wait:
                if self.verbose:
                    print("... waiting")
                for thread in threads:
                    thread.join()
            else:
                if self.verbose:
                    print("... waiting for first available host")
                while self.number_available_codes == 0 and self.number_starting_codes > 0:
                    sleep(0.1)
        if self.no_wait:
            if self.verbose:
                print("JobServer: launched")
        else:
            if self.verbose:
                print("JobServer: launched with", len(self.idle_codes),
                      "hosts")

    def _startup(self, *args, **kwargs):
        try:
            code = RemoteCodeInterface(*args, **kwargs)
        except Exception as ex:
            self.number_starting_codes -= 1
            print("JobServer: startup failed on", kwargs['hostname']
                  or "default")
            print(ex)
        else:
            if self.preamble is not None:
                code.execute(self.preamble)

            self.number_available_codes += 1
            self.number_starting_codes -= 1
            if self.no_wait:
                if self.number_available_codes & (self.number_available_codes -
                                                  1) == 0:
                    if self.verbose:
                        print("JobServer: hosts now available:",
                              self.number_available_codes)
                if self.number_starting_codes == 0:
                    if self.verbose:
                        print("JobServer: hosts in total:",
                              self.number_available_codes)
            if self.job_list:
                self._add_job(self.job_list.popleft(), code)
            else:
                self.idle_codes.append(code)

    def exec_(self, arg):
        while self.number_starting_codes > 0:
            sleep(0.1)
        self.waitall()
        for code in self.idle_codes:
            code.execute(arg)

    def submit_job(self, f, args=(), kwargs={}):
        if len(self.pool) == 0 and not self.job_list:
            if self.verbose:
                print("JobServer: submitting first job on queue")
        job = Job(f, args, kwargs)
        self.job_list.append(job)
        if self.idle_codes:
            self._add_job(self.job_list.popleft(), self.idle_codes.pop())
        return job

    def wait(self):
        if self._finished_jobs:
            self.last_finished_job = self._finished_jobs.popleft()
            return True
        elif len(self.pool) == 0 and not self.job_list:
            if self.verbose:
                print("JobServer: no more jobs on queue or running")
            return False
        else:
            while len(self.pool) == 0 and self.job_list:
                if self.number_available_codes > 0:
                    raise Exception("JobServer: this should not happen")
                if self.number_starting_codes == 0:
                    raise Exception("JobServer: no codes available")
            self.pool.wait()
            self.last_finished_job = self._finished_jobs.popleft()
            return True

    def waitall(self):
        while len(self.pool) == 0 and self.job_list:
            if self.number_available_codes > 0:
                raise Exception("JobServer: this should not happen")
            if self.number_starting_codes == 0:
                raise Exception("JobServer: no codes available")
        while len(self.pool) > 0 or self.job_list:
            self.pool.wait()
            self.last_finished_job = self._finished_jobs[-1]

    @property
    def finished_jobs(self):
        while self._finished_jobs:
            yield self._finished_jobs.popleft()

    def _finalize_job(self, request, job, code):
        try:
            job.result = request.result()
            job.err = None
        except Exception as ex:
            job.result = None
            job.err = ex
        if job.err and not isinstance(job.err, RemoteCodeException):
            del code
            self.number_available_codes -= 1
            if self.retry_jobs and job.retries < self.max_retries:
                retry = Job(job.f, job.args, job.kwargs, job.retries + 1)
                self.job_list.append(retry)
        else:
            self.idle_codes.append(code)
        if self.job_list and self.idle_codes:
            self._add_job(self.job_list.popleft(), self.idle_codes.pop())
            if not self.job_list:
                if self.verbose:
                    print("JobServer: last job dispatched")
        self._finished_jobs.append(job)

    def _add_job(self, job, code):
        job.request = code.async_func(job.f, *job.args, **job.kwargs)
        self.pool.add_request(job.request, self._finalize_job, [job, code])

    def __del__(self):
        if not self.no_hosts():
            self.waitall()
        if self.job_list:
            warnings.warn(
                "JobServer: Warning: shutting down with unfinished jobs")
        for code in self.idle_codes:
            code.stop()
        if self.number_starting_codes > 0:
            warnings.warn(
                "JobServer: Warning: some hosts startup threads possibly blocking"
            )
Exemplo n.º 8
0
           channel_type='sockets',
           number_of_workers=1,
           case='bomex')

# explicitly initialize the codes
# otherwise implicitly done when calling evolve_model
d1.commit_parameters()
d2.commit_parameters()

# add parameter redirection='none' to see DALES diagnostics output

target_time = 120 | units.s  # target time

# create a pool for managing asynchronous requests
t = time.time()
pool = AsyncRequestsPool()

# add requests to the two codes to the pool
request1 = d1.evolve_model.asynchronous(target_time, exactEnd=True)
pool.add_request(request1)

request2 = d2.evolve_model.asynchronous(target_time, exactEnd=True)
pool.add_request(request2)

print('Generating asynchronous requests  %f s' % (time.time() - t))

# wait for the requests to finish
print('Calling pool.waitall()')
t = time.time()
pool.waitall()
print('pool.waitall() returned %f s' % (time.time() - t))