Exemplo n.º 1
0
class IPClusterEnsembleJPL2(SurveyEnsemble):
    """Parallelized survey ensemble based on IPython parallel (ipcluster)
    
    """
    def __init__(self, ensemble_controller=None, ensemble_mode='', **specs):

        SurveyEnsemble.__init__(self, **specs)

        # allow bail-out
        if 'init-only' in ensemble_mode:
            self.vprint("SurveyEnsemble: initialize-only mode")
            return
        if 'standalone' in ensemble_mode:
            self.vprint("SurveyEnsemble: standalone mode: no ipyparallel")
            self.standalone = True
            return
        self.standalone = False

        self.verb = specs.get('verbose', True)

        # specify the cluster
        if ensemble_controller:
            if '.json' in ensemble_controller:
                arglist = dict(url_file=ensemble_controller)
            else:
                arglist = dict(profile=ensemble_controller)
        else:
            arglist = dict()
        # access the cluster
        self.rc = Client(**arglist)
        self.dview = self.rc[:]
        self.dview.block = True
        # these are the imports needed by the run_one()
        with self.dview.sync_imports():
            import EXOSIMS, EXOSIMS.util.get_module, EXOSIMS_local, \
                time, os, os.path, random, numpy, cPickle, gzip, traceback
        if 'logger' in specs:
            specs.pop('logger')
        # pop the seed from the specs to force re-seeding
        if 'seed' in specs:
            specs.pop('seed')
        # push the specs to the engines
        self.dview.push(dict(specs=specs))
        # instantiate a SurveySimulation in the global workspace on each engine
        res = self.dview.execute(
            "SS = EXOSIMS.util.get_module.get_module_from_specs" +
            "(specs, 'SurveySimulation')(**specs)")
        self.vprint("Created SurveySimulation objects on %d engines." %
                    len(self.rc.ids))
        # pull the seeds from each engine
        seeds = self.dview.pull('SS.seed', block=True)
        # print stdout/stderr of each engine's activity - this is likely to be captured
        # in the invoking function.  Note, we don't have access to the parent SS.seed here.
        if True:
            for row, erow, id, seed in zip(res.stdout, res.stderr,
                                           res.engine_id, seeds):
                print('==== Engine = %d, Seed = %d ====' % (id, seed))
                if erow:
                    msg = ''.join([
                        '[#%d] Error: %s\n' % (id, line)
                        for line in erow.split('\n') if line
                    ])
                    print(msg)
                    sys.stderr.write(msg)
                print(''.join(
                    ['[#%d] %s\n' % (id, line) for line in row.split('\n')]))
        # we will use the load-balanced view for cluster Exosims runs
        self.lview = self.rc.load_balanced_view()

    def run_ensemble(self,
                     sim,
                     nb_run_sim,
                     run_one=None,
                     genNewPlanets=True,
                     rewindPlanets=True,
                     kwargs={}):

        if self.standalone:
            return self.run_ensemble_stand(sim, nb_run_sim, run_one,
                                           genNewPlanets, rewindPlanets,
                                           kwargs)
        else:
            return self.run_ensemble_ipp(sim, nb_run_sim, run_one,
                                         genNewPlanets, rewindPlanets, kwargs)

    def run_ensemble_stand(self,
                           sim,
                           nb_run_sim,
                           run_one,
                           genNewPlanets=True,
                           rewindPlanets=True,
                           kwargs={}):
        r'''Stand-alone simulation runner.'''
        t1 = time.time()
        res = []
        for j in range(nb_run_sim):
            if nb_run_sim > 1:
                print('Survey simulation: %s/%s' % (j + 1, int(nb_run_sim)))
            seed = sim.seed
            fn = os.path.join(kwargs['outpath'], 'log',
                              'log-%d.out' % (seed, ))
            with RedirectStdStreams(stdout=open(fn, 'w')):
                ar = run_one(genNewPlanets=genNewPlanets,
                             rewindPlanets=rewindPlanets,
                             **kwargs)
            res.append(ar)
        t2 = time.time()
        self.vprint("Completed %s simulation(s) in %d sec" %
                    (int(nb_run_sim), t2 - t1))
        return res

    def run_ensemble_ipp(self,
                         sim,
                         nb_run_sim,
                         run_one=None,
                         genNewPlanets=True,
                         rewindPlanets=True,
                         kwargs={}):

        if not run_one:
            raise ValueError, 'Require a run_one function to be provided'
        t1 = time.time()
        async_res = []
        for j in range(nb_run_sim):
            ar = self.lview.apply_async(run_one,
                                        genNewPlanets=genNewPlanets,
                                        rewindPlanets=rewindPlanets,
                                        **kwargs)
            async_res.append(ar)
        print("Submitted %d tasks." % len(async_res))
        ar = self.rc._asyncresult_from_jobs(async_res)
        # ad hoc status-reporting
        progress = 0
        while not ar.ready():
            ar.wait(10.)
            clear_output(wait=True)
            if ar.progress == 0:
                forecast = 'not yet able to forecast time remaining.'
            elif ar.progress > progress:
                # update forecast right after we learn more about job-completion rate,
                # otherwise, the accuracy of the rate is diminished
                progress = ar.progress
                timeleft = ar.elapsed / ar.progress * (nb_run_sim -
                                                       ar.progress)
                if timeleft > 3600.:
                    timeleftstr = "%2.2f hours" % (timeleft / 3600.)
                elif timeleft > 60.:
                    timeleftstr = "%2.2f minutes" % (timeleft / 60.)
                else:
                    timeleftstr = "%2.2f seconds" % timeleft
                forecast = 'about ' + timeleftstr + ' to go.'

            print("%4i/%i tasks finished after %4i s -- %s" %
                  (ar.progress, nb_run_sim, ar.elapsed, forecast),
                  end="")
            sys.stdout.flush()
        #self.rc.wait(async_res)
        #self.rc.wait_interactive(async_res)
        t2 = time.time()
        print("\nCompleted in %d sec" % (t2 - t1))
        # output the ipp engine stdout's to log-files
        for j, ar1 in enumerate(async_res):
            # retrieve result - just the seed, actually
            seed1 = ar1.get()
            fn = os.path.join(kwargs['outpath'], 'log', 'log-%s.out' % seed1)
            with open(fn, 'w') as fp:
                for line in ar1.stdout:
                    fp.write(line)
            if ar1.stderr:
                fn = os.path.join(kwargs['outpath'], 'log',
                                  'log-%s.err' % seed1)
                with open(fn, 'w') as fp:
                    for line in ar1.stderr:
                        fp.write(line)
        # return the list of seeds
        return [ar.get() for ar in async_res]
Exemplo n.º 2
0
class IPClusterEnsemble(SurveyEnsemble):
    """Parallelized suvey ensemble based on IPython parallel (ipcluster)
    
    """

    def __init__(self, **specs):
        
        SurveyEnsemble.__init__(self, **specs)

        self.verb = specs.get('verbose', True)
        
        # access the cluster
        self.rc = Client()
        self.dview = self.rc[:]
        self.dview.block = True
        with self.dview.sync_imports(): import EXOSIMS, EXOSIMS.util.get_module, \
                os, os.path, time, random, pickle, traceback, numpy
        if 'logger' in specs:
            specs.pop('logger')
        if 'seed' in specs:
            specs.pop('seed')
        self.dview.push(dict(specs=specs))
        self.vprint("Building SurveySimulation object on all workers.")
        res = self.dview.execute("SS = EXOSIMS.util.get_module.get_module(specs['modules'] \
                ['SurveySimulation'], 'SurveySimulation')(**specs)")
        
        res2 = self.dview.execute("SS.reset_sim()")

        self.vprint("Created SurveySimulation objects on %d engines."%len(self.rc.ids))
        #for row in res.stdout:
        #    self.vprint(row)

        self.lview = self.rc.load_balanced_view()

        self.maxNumEngines = len(self.rc.ids)

    def run_ensemble(self, sim, nb_run_sim, run_one=None, genNewPlanets=True,
        rewindPlanets=True, kwargs={}):
        """
        Args:
            sim:

        """
        hangingRunsOccured = False # keeps track of whether hanging runs have occured
        t1 = time.time()
        async_res = []
        for j in range(nb_run_sim):
            ar = self.lview.apply_async(run_one, genNewPlanets=genNewPlanets,
                    rewindPlanets=rewindPlanets, **kwargs)
            async_res.append(ar)
        
        print("Submitted %d tasks."%len(async_res))
        
        engine_pids = self.rc[:].apply(os.getpid).get_dict()
        #ar2 = self.lview.apply_async(os.getpid)
        #pids = ar2.get_dict()
        print('engine_pids')
        print(engine_pids)

        runStartTime = time.time()#create job starting time
        avg_time_per_run = 0.
        tmplenoutstandingset = nb_run_sim
        tLastRunFinished = time.time()
        ar= self.rc._asyncresult_from_jobs(async_res)
        while not ar.ready():
            ar.wait(10.)
            clear_output(wait=True)
            if ar.progress > 0:
                timeleft = ar.elapsed/ar.progress * (nb_run_sim - ar.progress)
                if timeleft > 3600.:
                    timeleftstr = "%2.2f hours"%(timeleft/3600.)
                elif timeleft > 60.:
                    timeleftstr = "%2.2f minutes"%(timeleft/60.)
                else:
                    timeleftstr = "%2.2f seconds"%timeleft
            else:
                timeleftstr = "who knows"

            #Terminate hanging runs
            outstandingset = self.rc.outstanding#a set of msg_ids that have been submitted but resunts have not been received
            if len(outstandingset) > 0 and len(outstandingset) < nb_run_sim:#there is at least 1 run still going and we have not just started
                avg_time_per_run = (time.time() - runStartTime)/float(nb_run_sim - len(outstandingset))#compute average amount of time per run
                if len(outstandingset) < tmplenoutstandingset:#The scheduler has finished a run
                    tmplenoutstandingset = len(outstandingset)#update this. should decrease by ~1 or number of cores...
                    tLastRunFinished = time.time()#update tLastRunFinished to the last time a simulation finished (right now)
                    #self.vprint("tmplenoutstandingset %d, tLastRunFinished %0.6f"%(tmplenoutstandingset,tLastRunFinished))
                if time.time() - tLastRunFinished > avg_time_per_run*(1. + self.maxNumEngines*2.)*4.:
                    #nb_run_sim = len(self.rc.outstanding)
                    #restartRuns = True
                    self.vprint('Aborting ' + str(len(self.rc.outstanding)) + 'qty outstandingset jobs')
                    #runningPIDS = os.listdir('/proc') # get all running pids
                    self.vprint('queue_status')
                    self.vprint(str(self.rc.queue_status()))
                    self.rc.abort()
                    ar.wait(20)
                    runningPIDS = [int(tpid) for tpid in os.listdir('/proc') if tpid.isdigit()]
                    #[self.rc.queue_status()[eind] for eind in np.arange(self.maxNumEngines) if self.rc.queue_status()[eind]['tasks']>0]
                    for engineInd in [eind for eind in np.arange(self.maxNumEngines) if self.rc.queue_status()[eind]['tasks']>0]:
                        os.kill(engine_pids[engineInd],15)
                        time.sleep(20)
                    # for pid in [engine_pids[eind] for eind in np.arange(len(engine_pids))]:
                    #     if pid in runningPIDS:
                    #         os.kill(pid,9) # send kill command to stop this worker
                    stopIPClusterCommand = subprocess.Popen(['ipcluster','stop'])
                    stopIPClusterCommand.wait()
                    time.sleep(60) # doing this instead of waiting for ipcluster to terminate
                    stopIPClusterCommand = subprocess.Popen(['ipcluster','stop'])
                    stopIPClusterCommand.wait()
                    time.sleep(60) # doing this instead of waiting for ipcluster to terminate
                    hangingRunsOccured = True # keeps track of whether hanging runs have occured
                    break
                    #stopIPClusterCommand.wait() # waits for process to terminate
                    #call(["ipcluster","stop"]) # send command to stop ipcluster
                    #self.rc.abort(jobs=self.rc.outstanding.copy().pop())
                    #self.rc.abort()#by default should abort all outstanding jobs... #it is possible that this will not stop the jobs running
                    #ar.wait(100)
                    #self.rc.purge_everything() # purge all results if outstanding *because rc.abort() didn't seem to do the job right
                    tLastRunFinished = time.time()#update tLastRunFinished to the last time a simulation was restarted (right now)

            print("%4i/%i tasks finished after %4i s. About %s to go." % (ar.progress, nb_run_sim, ar.elapsed, timeleftstr), end="")
            sys.stdout.flush()
        #numRunStarts += 1 # increment number of run restarts



        t2 = time.time()
        print("\nCompleted in %d sec" % (t2 - t1))
        
        if hangingRunsOccured: #hanging runs have occured
            res = [1]
        else:
            res = [ar.get() for ar in async_res]
        
        return res
Exemplo n.º 3
0
class IPClusterEnsemble(SurveyEnsemble):
    """Parallelized suvey ensemble based on IPython parallel (ipcluster)
    
    """
    def __init__(self, **specs):

        SurveyEnsemble.__init__(self, **specs)

        self.verb = specs.get('verbose', True)

        # access the cluster
        self.rc = Client()
        self.dview = self.rc[:]
        self.dview.block = True
        with self.dview.sync_imports():            import EXOSIMS, EXOSIMS.util.get_module, \
os, os.path, time, random, pickle, traceback, numpy
        if 'logger' in specs:
            specs.pop('logger')
        if 'seed' in specs:
            specs.pop('seed')
        self.dview.push(dict(specs=specs))
        self.vprint("Building SurveySimulation object on all workers.")
        res = self.dview.execute(
            "SS = EXOSIMS.util.get_module.get_module(specs['modules'] \
                ['SurveySimulation'], 'SurveySimulation')(**specs)")

        res2 = self.dview.execute("SS.reset_sim()")

        self.vprint("Created SurveySimulation objects on %d engines." %
                    len(self.rc.ids))
        #for row in res.stdout:
        #    self.vprint(row)

        self.lview = self.rc.load_balanced_view()

        self.maxNumEngines = len(self.rc.ids)

    def run_ensemble(self,
                     sim,
                     nb_run_sim,
                     run_one=None,
                     genNewPlanets=True,
                     rewindPlanets=True,
                     kwargs={}):
        """
        Args:
            sim:

        """
        hangingRunsOccured = False  # keeps track of whether hanging runs have occured
        t1 = time.time()
        async_res = []
        for j in range(nb_run_sim):
            ar = self.lview.apply_async(run_one,
                                        genNewPlanets=genNewPlanets,
                                        rewindPlanets=rewindPlanets,
                                        **kwargs)
            async_res.append(ar)

        print("Submitted %d tasks." % len(async_res))

        engine_pids = self.rc[:].apply(os.getpid).get_dict()
        #ar2 = self.lview.apply_async(os.getpid)
        #pids = ar2.get_dict()
        print('engine_pids')
        print(engine_pids)

        runStartTime = time.time()  #create job starting time
        avg_time_per_run = 0.
        tmplenoutstandingset = nb_run_sim
        tLastRunFinished = time.time()
        ar = self.rc._asyncresult_from_jobs(async_res)
        while not ar.ready():
            ar.wait(10.)
            clear_output(wait=True)
            if ar.progress > 0:
                timeleft = ar.elapsed / ar.progress * (nb_run_sim -
                                                       ar.progress)
                if timeleft > 3600.:
                    timeleftstr = "%2.2f hours" % (timeleft / 3600.)
                elif timeleft > 60.:
                    timeleftstr = "%2.2f minutes" % (timeleft / 60.)
                else:
                    timeleftstr = "%2.2f seconds" % timeleft
            else:
                timeleftstr = "who knows"

            #Terminate hanging runs
            outstandingset = self.rc.outstanding  #a set of msg_ids that have been submitted but resunts have not been received
            if len(outstandingset) > 0 and len(
                    outstandingset
            ) < nb_run_sim:  #there is at least 1 run still going and we have not just started
                avg_time_per_run = (time.time() - runStartTime) / float(
                    nb_run_sim - len(outstandingset)
                )  #compute average amount of time per run
                if len(
                        outstandingset
                ) < tmplenoutstandingset:  #The scheduler has finished a run
                    tmplenoutstandingset = len(
                        outstandingset
                    )  #update this. should decrease by ~1 or number of cores...
                    tLastRunFinished = time.time(
                    )  #update tLastRunFinished to the last time a simulation finished (right now)
                    #self.vprint("tmplenoutstandingset %d, tLastRunFinished %0.6f"%(tmplenoutstandingset,tLastRunFinished))
                if time.time() - tLastRunFinished > avg_time_per_run * (
                        1. + self.maxNumEngines * 2.) * 4.:
                    #nb_run_sim = len(self.rc.outstanding)
                    #restartRuns = True
                    self.vprint('Aborting ' + str(len(self.rc.outstanding)) +
                                'qty outstandingset jobs')
                    #runningPIDS = os.listdir('/proc') # get all running pids
                    self.vprint('queue_status')
                    self.vprint(str(self.rc.queue_status()))
                    self.rc.abort()
                    ar.wait(20)
                    runningPIDS = [
                        int(tpid) for tpid in os.listdir('/proc')
                        if tpid.isdigit()
                    ]
                    #[self.rc.queue_status()[eind] for eind in np.arange(self.maxNumEngines) if self.rc.queue_status()[eind]['tasks']>0]
                    for engineInd in [
                            eind for eind in np.arange(self.maxNumEngines)
                            if self.rc.queue_status()[eind]['tasks'] > 0
                    ]:
                        os.kill(engine_pids[engineInd], 15)
                        time.sleep(20)
                    # for pid in [engine_pids[eind] for eind in np.arange(len(engine_pids))]:
                    #     if pid in runningPIDS:
                    #         os.kill(pid,9) # send kill command to stop this worker
                    stopIPClusterCommand = subprocess.Popen(
                        ['ipcluster', 'stop'])
                    stopIPClusterCommand.wait()
                    time.sleep(
                        60
                    )  # doing this instead of waiting for ipcluster to terminate
                    stopIPClusterCommand = subprocess.Popen(
                        ['ipcluster', 'stop'])
                    stopIPClusterCommand.wait()
                    time.sleep(
                        60
                    )  # doing this instead of waiting for ipcluster to terminate
                    hangingRunsOccured = True  # keeps track of whether hanging runs have occured
                    break
                    #stopIPClusterCommand.wait() # waits for process to terminate
                    #call(["ipcluster","stop"]) # send command to stop ipcluster
                    #self.rc.abort(jobs=self.rc.outstanding.copy().pop())
                    #self.rc.abort()#by default should abort all outstanding jobs... #it is possible that this will not stop the jobs running
                    #ar.wait(100)
                    #self.rc.purge_everything() # purge all results if outstanding *because rc.abort() didn't seem to do the job right
                    tLastRunFinished = time.time(
                    )  #update tLastRunFinished to the last time a simulation was restarted (right now)

            print("%4i/%i tasks finished after %4i s. About %s to go." %
                  (ar.progress, nb_run_sim, ar.elapsed, timeleftstr),
                  end="")
            sys.stdout.flush()
        #numRunStarts += 1 # increment number of run restarts

        t2 = time.time()
        print("\nCompleted in %d sec" % (t2 - t1))

        if hangingRunsOccured:  #hanging runs have occured
            res = [1]
        else:
            res = [ar.get() for ar in async_res]

        return res
Exemplo n.º 4
0
class IPClusterEnsemble(SurveyEnsemble):
    """Parallelized suvey ensemble based on IPython parallel (ipcluster)
    
    """
    def __init__(self, **specs):

        SurveyEnsemble.__init__(self, **specs)

        self.verb = specs.get('verbose', True)

        # access the cluster
        self.rc = Client()
        self.dview = self.rc[:]
        self.dview.block = True
        with self.dview.sync_imports():            import EXOSIMS, EXOSIMS.util.get_module, \
os, os.path, time, random, cPickle, traceback
        if specs.has_key('logger'):
            specs.pop('logger')
        if specs.has_key('seed'):
            specs.pop('seed')
        self.dview.push(dict(specs=specs))
        res = self.dview.execute(
            "SS = EXOSIMS.util.get_module.get_module(specs['modules'] \
                ['SurveySimulation'], 'SurveySimulation')(**specs)")

        self.vprint("Created SurveySimulation objects on %d engines." %
                    len(self.rc.ids))
        #for row in res.stdout:
        #    self.vprint(row)

        self.lview = self.rc.load_balanced_view()

    def run_ensemble(self,
                     sim,
                     nb_run_sim,
                     run_one=None,
                     genNewPlanets=True,
                     rewindPlanets=True,
                     kwargs={}):

        t1 = time.time()
        async_res = []
        for j in range(nb_run_sim):
            ar = self.lview.apply_async(run_one,
                                        genNewPlanets=genNewPlanets,
                                        rewindPlanets=rewindPlanets,
                                        **kwargs)
            async_res.append(ar)

        print("Submitted %d tasks." % len(async_res))

        ar = self.rc._asyncresult_from_jobs(async_res)
        while not ar.ready():
            ar.wait(10.)
            clear_output(wait=True)
            if ar.progress > 0:
                timeleft = ar.elapsed / ar.progress * (nb_run_sim -
                                                       ar.progress)
                if timeleft > 3600.:
                    timeleftstr = "%2.2f hours" % (timeleft / 3600.)
                elif timeleft > 60.:
                    timeleftstr = "%2.2f minutes" % (timeleft / 60.)
                else:
                    timeleftstr = "%2.2f seconds" % timeleft
            else:
                timeleftstr = "who knows"

            print("%4i/%i tasks finished after %4i s. About %s to go." %
                  (ar.progress, nb_run_sim, ar.elapsed, timeleftstr),
                  end="")
            sys.stdout.flush()

        #self.rc.wait(async_res)
        #self.rc.wait_interactive(async_res)
        t2 = time.time()
        print("\nCompleted in %d sec" % (t2 - t1))

        res = [ar.get() for ar in async_res]

        return res
Exemplo n.º 5
0
class IPClusterEnsembleJPL(SurveyEnsemble):
    """Parallelized survey ensemble based on IPython parallel (ipcluster)
    
    """
    def __init__(self, ensemble_controller=None, ensemble_mode=None, **specs):

        SurveyEnsemble.__init__(self, **specs)

        # allow bail-out
        if ensemble_mode and 'init-only' in ensemble_mode:
            self.vprint("SurveyEnsemble: initialize-only mode")
            return

        self.verb = specs.get('verbose', True)

        # specify the cluster
        if ensemble_controller:
            if '.json' in ensemble_controller:
                arglist = dict(url_file=ensemble_controller)
            else:
                arglist = dict(profile=ensemble_controller)
        else:
            arglist = dict()
        # access the cluster
        self.rc = Client(**arglist)
        self.dview = self.rc[:]
        self.dview.block = True
        # these are the imports needed by the run_one()
        with self.dview.sync_imports():
            import EXOSIMS, EXOSIMS.util.get_module, \
                time, os, os.path, random, cPickle, gzip, traceback
        if specs.has_key('logger'):
            specs.pop('logger')
        if specs.has_key('seed'):
            specs.pop('seed')
        self.dview.push(dict(specs=specs))
        res = self.dview.execute(
            "SS = EXOSIMS.util.get_module.get_module_from_specs" +
            "(specs, 'SurveySimulation')(**specs)")

        self.vprint("Created SurveySimulation objects on %d engines." %
                    len(self.rc.ids))
        # optionally print stdout of each engine's activity
        if False:
            for row, id in zip(res.stdout, res.engine_id):
                print(''.join(
                    ['[#%d] %s\n' % (id, line) for line in row.split('\n')]))

        self.lview = self.rc.load_balanced_view()

    def run_ensemble(self,
                     sim,
                     nb_run_sim,
                     run_one=None,
                     genNewPlanets=True,
                     rewindPlanets=True,
                     kwargs={}):

        if not run_one:
            raise ValueError, 'Require a run_one function to be provided'
        t1 = time.time()
        async_res = []
        for j in range(nb_run_sim):
            ar = self.lview.apply_async(run_one,
                                        genNewPlanets=genNewPlanets,
                                        rewindPlanets=rewindPlanets,
                                        **kwargs)
            async_res.append(ar)

        print("Submitted %d tasks." % len(async_res))

        ar = self.rc._asyncresult_from_jobs(async_res)
        while not ar.ready():
            ar.wait(10.)
            clear_output(wait=True)
            if ar.progress > 0:
                timeleft = ar.elapsed / ar.progress * (nb_run_sim -
                                                       ar.progress)
                if timeleft > 3600.:
                    timeleftstr = "%2.2f hours" % (timeleft / 3600.)
                elif timeleft > 60.:
                    timeleftstr = "%2.2f minutes" % (timeleft / 60.)
                else:
                    timeleftstr = "%2.2f seconds" % timeleft
                forecast = 'about ' + timeleftstr + ' to go.'
            else:
                forecast = 'not yet able to forecast time remaining.'

            print("%4i/%i tasks finished after %4i s -- %s" %
                  (ar.progress, nb_run_sim, ar.elapsed, forecast),
                  end="")
            sys.stdout.flush()

        #self.rc.wait(async_res)
        #self.rc.wait_interactive(async_res)
        t2 = time.time()
        print("\nCompleted in %d sec" % (t2 - t1))

        res = [ar.get() for ar in async_res]

        return res
Exemplo n.º 6
0
def power_of_test(data1,
                  data2,
                  rvs_func='rvs_pairs',
                  tests=['chi2_2samp'],
                  rvs_key={},
                  test_key={},
                  parallel=None,
                  sync=True):
    """Compute the corresponding p-values for each histrogram pairs from the random variates of the given 2 samples/frequencies for size_times.
    
    Parameters
    ----------
    data1, data2 : sequence of 1-D ndarrays
        Input data. Observed samples or frequencies.
    rvs_func : [callable|str], optional, default : "rvs_pairs"
        The random variates function. The rvs_func can be either a callable or one of the following strings::

            String                     Description
            "rvs_pairs"                Compute the histogram pairs from the random
                                       variates of the given 2 samples/frequencies
                                       for size_times. 

    tests : ([callable|str],...), optional, default : ["chi2_2samp"]
        A list of *test* statistical functions. The *test* can be either a callable or one of the following strings::

            String                     Description
            "chi2_2samp"               Read TS.chi2_2samp for further information.
            "BDM_2samp"                Read TS.BDM_2samp for further information.
            "likelihoodratio_ksamp"    Read TS.likelihoodratio_ksamp for further information.
            "likelihoodvalue_ksamp"    Read TS.likelihoodvalue_ksamp for further information.
            "ks_2samp"                 Read TS.ks_2samp for further information.
            "anderson_ksamp"           Read TS.anderson_ksamp for further information.
            "CVM_2samp"                Read TS.CVM_2samp for further information.

    rvs_key : dict, optional, default : {}
        Keyword arguments for the rvs function, rvs_func.
    test_key : dict, optional
        Keyword arguments for the test statistical function, test.
    parallel : bool, optional, default : None
        If True, import IPyParallel package to do the parallel computation. If parallel is None,
        the global variable PARALLEL will be used instead.
    sync : bool, optional, default : True
        When sync is False, an IPyParallel AsyncResult Object will be returned instead. Onyl affect
        when parallel is True.
    
    Returns
    -------
    [p1, p2, ...] : 1-D array
        The corresponding p-values for each histogram pairs.
    """
    if parallel == None: parallel = PARALLEL
    if parallel:
        try:
            global client
            client = Client(**ipp_profile)
            size = rvs_key['size']
            N = len(client)
            jobs = []
            for i in range(N):
                rvs_key['size'] = (size // N +
                                   1) if (i < size % N) else size // N
                jobs.append(client[client.ids[i]].apply_async(
                    power_of_test, data1, data2, rvs_func, test, rvs_key,
                    test_key, False))
            ars = client._asyncresult_from_jobs(jobs)
            if sync:
                ars.wait_interactive()
                ret = {}
                for key, val in ars.get():
                    ret.setdefault(key, []).extend(val)
            else:
                return ars
        finally:
            client.close()
        return ret
    if type(rvs_func) == str:
        rvs_func = globals()[rvs_func]
    if type(tests) not in (list, tuple):
        tests = [tests]
    tests = [(t, getattr(TS, t)) if type(t) == str else (str(t), t)
             for t in tests]
    ret = {}
    for rvs1, rvs2 in rvs_func(data1, data2, **rvs_key):
        for tname, test in tests:
            ret.setdefault(tname, []).append(
                test(rvs1, rvs2, binned=True, **test_key).pvalue)
    return ret
Exemplo n.º 7
0
class IPClusterEnsemble(SurveyEnsemble):
    """Parallelized suvey ensemble based on IPython parallel (ipcluster)
    
    """
    def __init__(self, **specs):

        SurveyEnsemble.__init__(self, **specs)

        self.verb = specs.get('verbose', True)

        # access the cluster
        self.rc = Client()
        self.dview = self.rc[:]
        self.dview.block = True
        with self.dview.sync_imports():            import EXOSIMS, EXOSIMS.util.get_module, \
os, os.path, time, random, cPickle, traceback
        if specs.has_key('logger'):
            specs.pop('logger')
        if specs.has_key('seed'):
            specs.pop('seed')
        self.dview.push(dict(specs=specs))
        res = self.dview.execute(
            "SS = EXOSIMS.util.get_module.get_module(specs['modules'] \
                ['SurveySimulation'], 'SurveySimulation')(**specs)")

        res2 = self.dview.execute("SS.reset_sim()")

        self.vprint("Created SurveySimulation objects on %d engines." %
                    len(self.rc.ids))
        #for row in res.stdout:
        #    self.vprint(row)

        self.lview = self.rc.load_balanced_view()

        self.maxNumEngines = len(self.rc.ids)

    def run_ensemble(self,
                     sim,
                     nb_run_sim,
                     run_one=None,
                     genNewPlanets=True,
                     rewindPlanets=True,
                     kwargs={}):
        """
        Args:
            sim:

        """

        t1 = time.time()
        async_res = []
        for j in range(nb_run_sim):
            ar = self.lview.apply_async(run_one,
                                        genNewPlanets=genNewPlanets,
                                        rewindPlanets=rewindPlanets,
                                        **kwargs)
            async_res.append(ar)

        print("Submitted %d tasks." % len(async_res))

        runStartTime = time.time()  #create job starting time
        avg_time_per_run = 0.
        tmplenoutstandingset = nb_run_sim
        tLastRunFinished = time.time()
        ar = self.rc._asyncresult_from_jobs(async_res)
        while not ar.ready():
            ar.wait(10.)
            clear_output(wait=True)
            if ar.progress > 0:
                timeleft = ar.elapsed / ar.progress * (nb_run_sim -
                                                       ar.progress)
                if timeleft > 3600.:
                    timeleftstr = "%2.2f hours" % (timeleft / 3600.)
                elif timeleft > 60.:
                    timeleftstr = "%2.2f minutes" % (timeleft / 60.)
                else:
                    timeleftstr = "%2.2f seconds" % timeleft
            else:
                timeleftstr = "who knows"

            #Terminate hanging runs
            outstandingset = self.rc.outstanding  #a set of msg_ids that have been submitted but resunts have not been received
            if len(outstandingset) > 0 and len(
                    outstandingset
            ) < nb_run_sim:  #there is at least 1 run still going and we have not just started
                avg_time_per_run = (time.time() - runStartTime) / float(
                    nb_run_sim - len(outstandingset)
                )  #compute average amount of time per run
                if len(
                        outstandingset
                ) < tmplenoutstandingset:  #The scheduler has finished a run
                    tmplenoutstandingset = len(
                        outstandingset
                    )  #update this. should decrease by ~1 or number of cores...
                    tLastRunFinished = time.time(
                    )  #update tLastRunFinished to the last time a simulation finished (right now)
                    #self.vprint("tmplenoutstandingset %d, tLastRunFinished %0.6f"%(tmplenoutstandingset,tLastRunFinished))
                if time.time() - tLastRunFinished > avg_time_per_run * (
                        1 + self.maxNumEngines * 2):
                    self.vprint('Aborting ' + str(len(self.rc.outstanding)) +
                                'qty outstandingset jobs')
                    self.rc.abort(
                    )  #by default should abort all outstanding jobs... #it is possible that this will not stop the jobs running

            print("%4i/%i tasks finished after %4i s. About %s to go." %
                  (ar.progress, nb_run_sim, ar.elapsed, timeleftstr),
                  end="")
            sys.stdout.flush()

        t2 = time.time()
        print("\nCompleted in %d sec" % (t2 - t1))

        res = [ar.get() for ar in async_res]

        return res