示例#1
0
def prepare_engine(submit_type='local',
                    duration_job_min=60*4):
    # ---------------------
    Log.set_loglevel(20)
    logger.info("Start")

    foldername = expanduser("~")+'/slurm_jobs'
    if not os.path.exists(foldername):
        os.makedirs(foldername)

    logger.info("Setting engine folder to %s" % foldername)
    logger.info("Creating batch parameter instance")
    johns_slurm_hack = "#SBATCH --partition=intel-ivy,wrkstn,compute"
    timestr = time.strftime("%Y%m%d-%H%M%S")
    batch_parameters = BatchClusterParameters(max_walltime=duration_job_min,
        foldername=foldername,
        job_name_base="sim_"+timestr+"_",
        parameter_prefix=johns_slurm_hack)

    if submit_type =='slurm':
        logger.info("Creating slurm engine instance")
        engine = SlurmComputationEngine(batch_parameters)
    elif submit_type == "local":
        logger.info("Creating serial engine instance")
        engine = SerialComputationEngine()
    # ---------------------

    return engine
示例#2
0
    def compute(self):

        p = self.p
        data_source = self.data_source
        r = self.rep
        n = self.n
        job_func = self.job_func
        data = data_source.sample(n, seed=r)
        with util.ContextTimer() as t:
            tr, te = data.split_tr_te(tr_proportion=tr_proportion, seed=r + 21)
            prob_label = self.prob_label
            logger.info("computing. %s. prob=%s, r=%d,\
                    n=%d" % (job_func.__name__, prob_label, r, n))

            job_result = job_func(p, data_source, tr, te, r)

            # create ScalarResult instance
            result = SingleResult(job_result)
            # submit the result to my own aggregator
            self.aggregator.submit_result(result)
            func_name = job_func.__name__
        logger.info("done. ex2: %s, prob=%s, r=%d, n=%d. Took: %.3g s " %
                    (func_name, prob_label, r, n, t.secs))

        # save result
        fname = "%s-%s-n%d_r%d_a%.3f_trp%.2f.p" % (
            prob_label,
            func_name,
            n,
            r,
            alpha,
            tr_proportion,
        )
        glo.ex_save_result(ex, job_result, prob_label, fname)
    def compute(self):
        
        sample_source = self.sample_source 
        r = self.rep
        ni = self.ni 
        n = self.n
        job_func = self.job_func
        logger.info("computing. %s. r=%d, n=%d"%(job_func.__name__, r, n))

        tst_data = sample_source.sample(n, seed=r)
        tr, te = tst_data.split_tr_te(tr_proportion=tr_proportion, seed=r+20 )
        prob_label = self.prob_label
        test_result = job_func(prob_label, tr, te, r, ni, n)

        # create ScalarResult instance
        result = SingleResult(test_result)
        # submit the result to my own aggregator
        self.aggregator.submit_result(result)
        logger.info("done. ex1: %s, r=%d, n=%d,  "%(job_func.__name__, r, n))

        # save result
        func_name = job_func.__name__
        fname = '%s-%s-J%d_r%d_n%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, n, alpha, tr_proportion)
        glo.ex_save_result(ex, test_result, prob_label, fname)
    def compute(self):
        
        sample_source = self.sample_source 
        r = self.rep
        d = sample_source.dim()
        job_func = self.job_func
        logger.info("computing. %s. r=%d, d=%d"%(job_func.__name__, r, d))

        # sample_size is a global variable
        tst_data = sample_source.sample(sample_size, seed=r)
        tr, te = tst_data.split_tr_te(tr_proportion=tr_proportion, seed=r+20 )
        prob_label = self.prob_label
        test_result = job_func(sample_source, tr, te, r, self.n_locs)

        # create ScalarResult instance
        result = SingleResult(test_result)
        # submit the result to my own aggregator
        self.aggregator.submit_result(result)
        logger.info("done. ex2: %s, r=%d, d=%d,  "%(job_func.__name__, r, d))

        # save result
        func_name = job_func.__name__
        J = self.n_locs
        fname = '%s-%s-J%d_n%d_r%d_a%.3f_trp%.2f.p' \
            %(prob_label, func_name, J, sample_size, r, alpha, tr_proportion)
        glo.ex_save_result(ex, test_result, prob_label, fname)
示例#5
0
    def compute(self):

        # randomly wait a few seconds so that multiple processes accessing the same
        # Theano function do not cause a lock problem. I do not know why.
        # I do not know if this does anything useful.
        # Sleep in seconds.
        time.sleep(np.random.rand(1) * 3)

        paired_source = self.paired_source
        r = self.rep
        n = self.n
        job_func = self.job_func

        pdata = paired_source.sample(n, seed=r)
        with util.ContextTimer() as t:
            logger.info("computing. %s. prob=%s, r=%d, n=%d" %
                        (job_func.__name__, pdata.label, r, n))
            tr, te = pdata.split_tr_te(tr_proportion=tr_proportion,
                                       seed=r + 21)
            prob_label = self.prob_label

            job_result = job_func(paired_source, tr, te, r)

            # create ScalarResult instance
            result = SingleResult(job_result)
            # submit the result to my own aggregator
            self.aggregator.submit_result(result)
            func_name = job_func.__name__
        logger.info("done. ex1: %s, prob=%s, r=%d, n=%d. Took: %.3g s " %
                    (func_name, pdata.label, r, n, t.secs))

        # save result
        fname = '%s-%s-r%d_n%d_a%.3f_trp%.2f.p' \
            %(prob_label, func_name,  r, n, alpha, tr_proportion)
        glo.ex_save_result(ex, job_result, prob_label, fname)
示例#6
0
    def compute(self):

        p = self.p
        data_source = self.data_source
        r = self.rep
        prob_param = self.prob_param
        job_func = self.job_func
        # sample_size is a global variable
        data = data_source.sample(sample_size, seed=r)
        with util.ContextTimer() as t:
            tr, te = data.split_tr_te(tr_proportion=tr_proportion, seed=r + 21)
            prob_label = self.prob_label
            logger.info("computing. %s. prob=%s, r=%d,\
                    param=%.3g" %
                        (job_func.__name__, prob_label, r, prob_param))

            job_result = job_func(p, data_source, tr, te, r)

            # create ScalarResult instance
            result = SingleResult(job_result)
            # submit the result to my own aggregator
            self.aggregator.submit_result(result)
            func_name = job_func.__name__
        logger.info("done. ex2: %s, prob=%s, r=%d, param=%.3g. Took: %.3g s " %
                    (func_name, prob_label, r, prob_param, t.secs))

        # save result
        fname = '%s-%s-n%d_r%d_p%g_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, sample_size, r, prob_param, alpha,
                        tr_proportion)
        glo.ex_save_result(ex, job_result, prob_label, fname)
示例#7
0
    def compute(self):

        P = self.P
        Q = self.Q
        data_source = self.data_source
        r = self.rep
        n = self.n
        met_func = self.met_func
        prob_label = self.prob_label

        logger.info("computing. %s. prob=%s, r=%d,\
                n=%d" % (met_func.__name__, prob_label, r, n))
        with util.ContextTimer() as t:
            job_result = met_func(P, Q, data_source, n, r)

            # create ScalarResult instance
            result = SingleResult(job_result)
            # submit the result to my own aggregator
            self.aggregator.submit_result(result)
            func_name = met_func.__name__

        logger.info("done. ex2: %s, prob=%s, r=%d, n=%d. Took: %.3g s " %
                    (func_name, prob_label, r, n, t.secs))

        # save result
        fname = '%s-%s-n%d_r%d_a%.3f.p' \
                %(prob_label, func_name, n, r, alpha )
        glo.ex_save_result(ex, job_result, prob_label, fname)
    def submit_job(self, job):
        # first step: check how many jobs are there in the (internal, not cluster) queue, and if we
        # should wait for submission until this has dropped under a certain value
        if self.max_jobs_in_queue > 0 and \
           self._get_num_unfinished_jobs() >= self.max_jobs_in_queue and \
           not isinstance(job, FireAndForgetJob): # never block for fire and forget jobs
            logger.info(
                "Reached maximum number of %d unfinished jobs in queue." %
                self.max_jobs_in_queue)
            self._wait_until_n_unfinished(self.max_jobs_in_queue)

        # save myself every few submissions (also done one wait_for_all is called)
        if len(self.all_jobs) % 100 == 0:
            self.save_all_job_list()

        # replace job's wrapped_aggregator by PBS wrapped_aggregator to allow
        # FS based communication

        # use a unique job name, but check that this folder doesnt yet exist
        job_name = self.create_job_name()

        aggregator_filename = self.get_aggregator_filename(job_name)
        job.aggregator = ResultAggregatorWrapper(job.aggregator,
                                                 aggregator_filename, job_name,
                                                 self.do_clean_up,
                                                 self.store_fire_and_forget)

        self.submit_wrapped_pbs_job(job, job_name)

        return job.aggregator
示例#9
0
 def submit_job(self, job):
     # first step: check how many jobs are there in the (internal, not cluster) queue, and if we
     # should wait for submission until this has dropped under a certain value
     if self.max_jobs_in_queue > 0 and \
        self._get_num_unfinished_jobs() >= self.max_jobs_in_queue:
         logger.info("Reached maximum number of %d unfinished jobs in queue." % 
                     self.max_jobs_in_queue)
         self._wait_until_n_unfinished(self.max_jobs_in_queue)
     
     # save myself every few submissions (also done one wait_for_all is called)
     if len(self.all_jobs) % 100 == 0:
         self.save_all_job_list()
     
     # replace job's wrapped_aggregator by PBS wrapped_aggregator to allow
     # FS based communication
     
     # use a unique job name, but check that this folder doesnt yet exist
     job_name = self.create_job_name()
     
     aggregator_filename = self.get_aggregator_filename(job_name)
     job.aggregator = PBSResultAggregatorWrapper(job.aggregator,
                                                 aggregator_filename,
                                                 job_name,
                                                 self.do_clean_up,
                                                 self.store_fire_and_forget)
     
     self.submit_wrapped_pbs_job(job, job_name)
     
     return job.aggregator
示例#10
0
    def compute(self):

        sample_source = self.sample_source
        r = self.rep
        ni = self.ni
        n = self.n
        job_func = self.job_func
        logger.info("computing. %s. r=%d, n=%d" % (job_func.__name__, r, n))

        tst_data = sample_source.sample(n, seed=r)
        tr, te = tst_data.split_tr_te(tr_proportion=tr_proportion, seed=r + 20)
        prob_label = self.prob_label
        test_result = job_func(prob_label, tr, te, r, ni, n)

        # create ScalarResult instance
        result = SingleResult(test_result)
        # submit the result to my own aggregator
        self.aggregator.submit_result(result)
        logger.info("done. ex1: %s, r=%d, n=%d,  " % (job_func.__name__, r, n))

        # save result
        func_name = job_func.__name__
        fname = '%s-%s-J%d_r%d_n%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, n, alpha, tr_proportion)
        glo.ex_save_result(ex, test_result, prob_label, fname)
示例#11
0
    def compute(self):
        
        sample_source = self.sample_source 
        r = self.rep
        d = sample_source.dim()
        job_func = self.job_func
        logger.info("computing. %s. r=%d, d=%d"%(job_func.__name__, r, d))

        # sample_size is a global variable
        tst_data = sample_source.sample(sample_size, seed=r)
        tr, te = tst_data.split_tr_te(tr_proportion=tr_proportion, seed=r+20 )
        prob_label = self.prob_label
        test_result = job_func(sample_source, tr, te, r)

        # create ScalarResult instance
        result = SingleResult(test_result)
        # submit the result to my own aggregator
        self.aggregator.submit_result(result)
        logger.info("done. ex2: %s, r=%d, d=%d,  "%(job_func.__name__, r, d))

        # save result
        func_name = job_func.__name__
        fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
        glo.ex_save_result(ex, test_result, prob_label, fname)
 def store_results(self, result, runtime):
     logger.info("Storing results in %s" % self.db_fname)
     submit_dict = {}
     for k, v in self.param_dict.items():
         submit_dict[k] = v
     submit_dict[self.result_name] = result
     submit_dict["_runtime"] = runtime
     submit_dict["_seed"] = self.seed
     store_results(self.db_fname, **submit_dict)
 def store_results(self, result, runtime):
     logger.info("Storing results in %s" % self.db_fname)
     submit_dict = {}
     for k, v in list(self.param_dict.items()):
         submit_dict[k] = v
     submit_dict[self.result_name] = result
     submit_dict["_runtime"] = runtime
     submit_dict["_seed"] = self.seed
     store_results(self.db_fname, **submit_dict)
示例#14
0
 def compute(self):
     logger.info("computing")
     
     sleep_time = np.random.randint(10)
     logger.info("sleeping for %d seconds" % sleep_time)
     sleep(sleep_time)
     
     # the compute method submits a result object to the aggregator
     result = ScalarResult(sleep_time)
     self.aggregator.submit_result(result)
示例#15
0
    def compute_result(self):
        """
        Note that this method directly computes and returns the result itself.
        There is no aggregators and no result instances being passed around at
        this point.
        """
        sleep_time = np.random.randint(3)
        logger.info("sleeping for %d seconds" % sleep_time)
        sleep(sleep_time)

        return self.x**2 + self.y**2 + np.random.randn()*0.1
示例#16
0
 def compute(self):
     result = ScalarResult(self.sleep_time)
     
     if self.sleep_time >= 0:
         sleep_time = self.sleep_time
     else:
         sleep_time = np.random.randint(10)
         
     logger.info("Sleeping for %d" % sleep_time)
     sleep(sleep_time)
         
     self.aggregator.submit_result(result)
    def submit_wrapped_pbs_job(self, wrapped_job, job_name):
        job_folder = self.get_job_foldername(job_name)

        # try to create folder if not yet exists
        job_filename = self.get_job_filename(job_name)
        logger.info("Creating job with file %s" % job_filename)
        try:
            makedirs(job_folder)
        except OSError:
            pass

        Serialization.serialize_object(wrapped_job, job_filename)

        # allow the queue to process things
        time.sleep(self.submission_delay)

        dispatcher_string = self._get_dispatcher_string(job_filename)

        # get computing ressource constraints from job
        walltime, memory, nodes = wrapped_job.get_walltime_mem_nodes()
        job_string = self.create_batch_script(job_name, dispatcher_string,
                                              walltime, memory, nodes)

        # put the custom parameter string in front if existing
        # but not as first line to avoid problems with #/bin/bash things
        if self.batch_parameters.parameter_prefix != "":
            lines = job_string.split(os.linesep)
            job_string = os.linesep.join(
                [lines[0], self.batch_parameters.parameter_prefix] + lines[1:])

        f = open(
            job_folder + os.sep +
            BatchClusterComputationEngine.batch_script_filename, "w")
        f.write(job_string)
        f.close()

        job_id = self.submit_to_batch_system(job_string)

        if job_id == "":
            raise RuntimeError(
                "Could not parse job_id. Something went wrong with the job submission"
            )

        f = open(
            job_folder + os.sep +
            BatchClusterComputationEngine.job_id_filename, 'w')
        f.write(job_id + os.linesep)
        f.close()

        if not isinstance(wrapped_job, FireAndForgetJob):
            # track submitted (and unfinished) jobs and their start time
            self._insert_job_time_sorted(job_name, job_id)
示例#18
0
def run_dataset(prob_label):
    """Run the experiment"""
    sample_source, n = get_sample_source(prob_label)

    # ///////  submit jobs //////////
    # create folder name string
    home = os.path.expanduser("~")
    foldername = os.path.join(home, "freqopttest_slurm", 'e%d'%ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(
        foldername=foldername, job_name_base="e%d_"%ex, parameter_prefix="")
    def compute(self):
        param_string = ", ".join(["%s=%s" % (str(k), str(v)) for k, v in self.param_dict.items()])

        logger.info("Setting numpy random seed to %d" % self.seed)
        np.random.seed(self.seed)

        logger.info("Computing result for %s" % param_string)
        start_time = time.time()
        result = self.compute_result()
        end_time = time.time()
        runtime = end_time - start_time
        
        self.store_results(result, runtime)
        self.aggregator.submit_result(result)
        
        # the engine will not call this, as it "forgets"
        self.aggregator.clean_up()
 def submit_wrapped_pbs_job(self, wrapped_job, job_name):
     job_folder = self.get_job_foldername(job_name)
     
     # try to create folder if not yet exists
     job_filename = self.get_job_filename(job_name)
     logger.info("Creating job with file %s" % job_filename)
     try:
         makedirs(job_folder)
     except OSError:
         pass
     
     Serialization.serialize_object(wrapped_job, job_filename)
     
     # allow the queue to process things        
     time.sleep(self.submission_delay)
     
     dispatcher_string = self._get_dispatcher_string(job_filename)
     
     # get computing ressource constraints from job
     walltime, memory, nodes = wrapped_job.get_walltime_mem_nodes()
     job_string = self.create_batch_script(job_name, dispatcher_string, walltime, memory, nodes)
     
     # put the custom parameter string in front if existing
     # but not as first line to avoid problems with #/bin/bash things
     if self.batch_parameters.parameter_prefix != "":
         lines = job_string.split(os.linesep)
         job_string = os.linesep.join([lines[0],
                                              self.batch_parameters.parameter_prefix] + lines[1:])
     
     f = open(job_folder + os.sep + BatchClusterComputationEngine.batch_script_filename, "w")
     f.write(job_string)
     f.close()
     
     job_id = self.submit_to_batch_system(job_string)
     
     if job_id == "":
         raise RuntimeError("Could not parse job_id. Something went wrong with the job submission")
     
     f = open(job_folder + os.sep + BatchClusterComputationEngine.job_id_filename, 'w')
     f.write(job_id + os.linesep)
     f.close()
     
     if not isinstance(wrapped_job, FireAndForgetJob):
         # track submitted (and unfinished) jobs and their start time
         self._insert_job_time_sorted(job_name, job_id)
示例#21
0
    def compute(self):
        param_string = ", ".join(
            ["%s=%s" % (str(k), str(v)) for k, v in self.param_dict.items()])

        logger.info("Setting numpy random seed to %d" % self.seed)
        np.random.seed(self.seed)

        logger.info("Computing result for %s" % param_string)
        start_time = time.time()
        result = self.compute_result()
        end_time = time.time()
        runtime = end_time - start_time

        self.store_results(result, runtime)
        self.aggregator.submit_result(result)

        # the engine will not call this, as it "forgets"
        self.aggregator.clean_up()
示例#22
0
    def compute(self):

        # randomly wait a few seconds so that multiple processes accessing the same
        # Theano function do not cause a lock problem. I do not know why.
        # I do not know if this does anything useful.
        # Sleep in seconds.
        time.sleep(np.random.rand(1) * 2)

        # load the data and construct a PairedSource here
        # The data can be big. We have to load it in this job function i.e.,
        # each computing node loads by itself (no data passing).
        folder_path = self.folder_path
        prob_label = self.prob_label
        paired_source, _, is_h0 = exglo.get_problem_pickle(
            folder_path, prob_label + '.n0')

        n = self.n
        r = self.rep
        job_func = self.job_func

        pdata = paired_source.sample(n, seed=r)
        with util.ContextTimer() as t:
            logger.info("computing. %s. prob=%s, r=%d, n=%d" %
                        (job_func.__name__, pdata.label, r, n))
            tr, te = pdata.split_tr_te(tr_proportion=tr_proportion,
                                       seed=r + 21)
            prob_label = self.prob_label

            job_result = job_func(paired_source, tr, te, r)

            # create ScalarResult instance
            result = SingleResult(job_result)
            # submit the result to my own aggregator
            self.aggregator.submit_result(result)
            func_name = job_func.__name__
        logger.info("done. ex1: %s, prob=%s, r=%d, n=%d. Took: %.3g s " %
                    (func_name, pdata.label, r, n, t.secs))

        # save result
        fname = '%s-%s-r%d_n%d_a%.3f_trp%.2f.p' \
            %(prob_label, func_name, r, n, alpha, tr_proportion)
        glo.ex_save_result(ex, job_result, prob_label, fname)
    def _resubmit(self, job_name):
        new_job_name = self.create_job_name()
        logger.info("Re-submitting under name %s" % new_job_name)

        # remove from unfinished jobs list
        for i in range(len(self.submitted_jobs)):
            if self.submitted_jobs[i][0] == job_name:
                del self.submitted_jobs[i]
                break

        # remove from all jobs list
        for i in range(len(self.all_jobs)):
            if self.all_jobs[i] == job_name:
                del self.all_jobs[i]
                break

        # load job from disc and re-submit under new name
        job_filename = self.get_job_filename(job_name)
        wrapped_job = Serialization.deserialize_object(job_filename)
        self.submit_wrapped_pbs_job(wrapped_job, new_job_name)
示例#24
0
 def _resubmit(self, job_name):
     new_job_name = self.create_job_name()
     logger.info("Re-submitting under name %s" % new_job_name)
     
     # remove from unfinished jobs list
     for i in range(len(self.submitted_jobs)):
         if self.submitted_jobs[i][0] == job_name:
             del self.submitted_jobs[i]
             break
     
     # remove from all jobs list
     for i in range(len(self.all)):
         if self.all_jobs[i] == job_name:
             del self.all_jobs[i]
             break
     
     # load job from disc and re-submit under new name
     job_filename = self.get_job_filename(job_name)
     wrapped_job = Serialization.deserialize_object(job_filename)
     self.submit_wrapped_pbs_job(wrapped_job, new_job_name)
示例#25
0
    def compute(self, data,mod_prm):
        logger.info("computing")
#
        #------------------------------
        mu_g = mod_prm['mu_g']
        s_g = mod_prm['s_g']
        h = mod_prm['h']
        s_s = mod_prm['s_s']
        model = Model(mu_g,s_g,h,s_s)
        #------------------------------
        F1 = data['F1']
        F2 = data['F2']
        Y = data['Y']
        #------------------------------
        x = model.llh(F1,F2,Y,n_samp=n_samp)
        #------------------------------
#
        result = SingleResult([x])
        self.aggregator.submit_result(result)
        logger.info("done computing")
        mypath = self.save_dir+'/'+self.name+".p"
        logger.info("saving:"+mypath)

        d = {'data':self.data,'mod_prm':self.mod_prm,'llh':x}
        pickle.dump( d, open(mypath, "wb" ) )
    def store_results(self, result, runtime):
        logger.info("Storing results in %s" % self.db_fname)
        if '_array' in result:
            N_samples = result['N_samples']
            submit_dict = {}
            del result['N_samples']
            del result['_array']
            submit_dict = result
            for k, v in self.param_dict.items():
                submit_dict[k] = str(v)

            #submit_dict[self.result_name] = result
            submit_dict["_runtime"] = runtime
            submit_dict["_seed"] = self.seed
            submit_dict["_job_ID"] = self.job_ID

            current_time = time.strftime("%Y-%m-%d_%H:%M:%S", time.gmtime())
            submit_dict["current_time"] = current_time
            df = pd.DataFrame(submit_dict)
        else:
            submit_dict = result
            for k, v in self.param_dict.items():
                submit_dict[k] = v

            #submit_dict[self.result_name] = result
            submit_dict["_runtime"] = runtime
            submit_dict["_seed"] = self.seed
            submit_dict["_job_ID"] = self.job_ID

            current_time = time.strftime("%Y-%m-%d_%H:%M:%S", time.gmtime())
            columns = list(submit_dict.keys())
            df = pd.DataFrame([[submit_dict[k] for k in columns]],
                              index=[current_time],
                              columns=columns)

        store_results(self.db_fname, df)
示例#27
0
    def compute(self):
        
        r = self.rep
        sample_source, nmax = get_sample_source(self.prob_label)
        d = sample_source.dim()
        job_func = self.job_func
        logger.info("computing. %s. r=%d "%(job_func.__name__, r ))

        tst_data = sample_source.sample(self.n, seed=r)
        tr, te = tst_data.split_tr_te(tr_proportion=tr_proportion, seed=r+20 )
        prob_label = self.prob_label
        job_result = job_func(sample_source, tr, te, r)

        # create ScalarResult instance
        result = SingleResult(job_result)
        # submit the result to my own aggregator
        self.aggregator.submit_result(result)
        logger.info("done. ex2: %s, r=%d "%(job_func.__name__, r))

        # save result
        func_name = job_func.__name__
        fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
        glo.ex_save_result(ex, job_result, prob_label, fname)
示例#28
0
 def compute(self):
     logger.info("computing")
     # job is to sleep for some time and return this time as an instance
     # of ScalarResult, which is a provided sub-class of JobResult
     sleep_time = randint(10)
     
     logger.info("sleeping for %d seconds" % sleep_time)
     sleep(sleep_time)
     
     # create ScalarResult instance
     result = ScalarResult(sleep_time)
     
     # submit the result to my own aggregator
     self.aggregator.submit_result(result)
     logger.info("done computing")
    def _wait_until_n_unfinished(self, desired_num_unfinished):
        """
        Iteratively checks all non-finished jobs and updates whether they are
        finished. Blocks until there are less or exactly desired_num_unfinished
        unfinished jobs in the queue. Messages a "waiting for" info message
        for the oldest job in the queue.
        """

        # save all job list to file for reconstructing results later
        self.save_all_job_list()

        last_printed = self._get_oldest_job_in_queue()
        logger.info("Waiting for %s and %d other jobs" %
                    (last_printed, self._get_num_unfinished_jobs() - 1))
        while self._get_num_unfinished_jobs() > desired_num_unfinished:

            oldest = self._get_oldest_job_in_queue()
            if oldest != last_printed:
                last_printed = oldest
                logger.info(
                    "Waiting for %s and %d other jobs" %
                    (last_printed, self._get_num_unfinished_jobs() - 1))

            # delete all finished jobs from internal list
            i = 0
            while i < len(self.submitted_jobs):
                job_name = self.submitted_jobs[i][0]
                if self._check_job_done(job_name):
                    del self.submitted_jobs[i]
                    # dont change i as it is now the index of the next element
                else:
                    i += 1

            # check for re-submissions
            if self.batch_parameters.resubmit_on_timeout:
                for job_name in self._get_max_wait_time_exceed_jobs():
                    # load job ressources
                    job_filename = self.get_job_filename(job_name)
                    job = Serialization.deserialize_object(job_filename)
                    logger.info("%s exceeded maximum waiting time of %dh" %
                                (job_name, job.walltime))
                    self._resubmit(job_name)

            time.sleep(self.check_interval)
示例#30
0
 def _wait_until_n_unfinished(self, desired_num_unfinished):
     """
     Iteratively checks all non-finished jobs and updates whether they are
     finished. Blocks until there are less or exactly desired_num_unfinished
     unfinished jobs in the queue. Messages a "waiting for" info message
     for the oldest job in the queue.
     """
     
     # save all job list to file for reconstructing results later
     self.save_all_job_list()
     
     last_printed = self._get_oldest_job_in_queue()
     logger.info("Waiting for %s and %d other jobs" % (last_printed,
                                                       self._get_num_unfinished_jobs() - 1))
     while self._get_num_unfinished_jobs() > desired_num_unfinished:
         
         oldest = self._get_oldest_job_in_queue()
         if oldest != last_printed:
             last_printed = oldest
             logger.info("Waiting for %s and %d other jobs" % (last_printed,
                                                               self._get_num_unfinished_jobs() - 1))
             
         
         # delete all finished jobs from internal list
         i = 0
         while i < len(self.submitted_jobs):
             job_name = self.submitted_jobs[i][0]
             if self._check_job_done(job_name):
                 del self.submitted_jobs[i]
                 # dont change i as it is now the index of the next element
             else:
                 i += 1
                     
         # check for re-submissions
         if self.batch_parameters.resubmit_on_timeout:
             for job_name in self._get_max_wait_time_exceed_jobs():
                 # load job ressources
                 job_filename = self.get_job_filename(job_name)
                 job = Serialization.deserialize_object(job_filename)
                 logger.info("%s exceeded maximum waiting time of %dh" 
                             % (job_name, job.walltime))
                 self._resubmit(job_name)
                 
         time.sleep(self.check_interval)
示例#31
0
    # the serial one runs everything locally
    engine = SerialComputationEngine()
#     engine = SGEComputationEngine(batch_parameters)
#     engine = SlurmComputationEngine(batch_parameters)

    # On submission, the engine returns aggregators that can be
    # used to retreive results after potentially doing postprocessing
    returned_aggregators = []
    
    for i in range(3):
        job = MyJob(ScalarResultAggregator())
        agg = engine.submit_job(job)
        returned_aggregators.append(agg)
        
    # This call blocks until all jobs are finished (magic happens here)
    logger.info("Waiting for all jobs to be completed.")
    engine.wait_for_all()
    
    # now that everything is done, we can collect the results
    # and or do postprocessing
    logger.info("Collecting results")
    results = np.zeros(len(returned_aggregators))
    for i, agg in enumerate(returned_aggregators):
        # the aggregator might implement postprocessing
        agg.finalize()
        
        # aggregators[i].get_final_result() here returns a ScalarResult instance,
        # which we need to extract the number from
        results[i] = agg.get_final_result().result
    
    print "Results", results
示例#32
0
 def wait_for_all(self):
     self._wait_until_n_unfinished(0)
     logger.info("All jobs finished.")
示例#33
0
def run_dataset(prob_label):
    """Run the experiment"""
    sample_source, n = get_sample_source(prob_label)

    # ///////  submit jobs //////////
    # create folder name string
    home = os.path.expanduser("~")
    foldername = os.path.join(home, "freqopttest_slurm", 'e%d'%ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(
        foldername=foldername, job_name_base="e%d_"%ex, parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    engine = SlurmComputationEngine(batch_parameters, do_clean_up=True)
    n_methods = len(method_job_funcs)
    # repetitions x  #methods
    aggregators = np.empty((reps, n_methods ), dtype=object)
    d = sample_source.dim()
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            # name used to save the result
            func_name = f.__name__
            fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
            if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                logger.info('%s exists. Load and return.'%fname)
                test_result = glo.ex_load_result(ex, prob_label, fname)
                sra = SingleResultAggregator()
                sra.submit_result(SingleResult(test_result))

                aggregators[r, mi] = sra
            else:
                # result not exists or rerun
                job = Ex5Job(SingleResultAggregator(), prob_label, r, n, f)
                agg = engine.submit_job(job)
                aggregators[r, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    test_results = np.empty((reps, n_methods), dtype=object)
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            logger.info("Collecting result (%s, r=%d)" % (f.__name__, r ))
            # let the aggregator finalize things
            aggregators[r, mi].finalize()

            # aggregators[i].get_final_result() returns a SingleResult instance,
            # which we need to extract the actual result
            test_result = aggregators[r, mi].get_final_result().result
            test_results[r, mi] = test_result

            func_name = f.__name__
            fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
            glo.ex_save_result(ex, test_result, prob_label, fname)

    func_names = [f.__name__ for f in method_job_funcs]
    func2labels = exglobal.get_func2label_map()
    method_labels = [func2labels[f] for f in func_names if f in func2labels]
    # save results 
    results = {'results': test_results, 'n': n, 'data_fname':label2fname[prob_label],
            'alpha': alpha, 'J': J, 'sample_source': sample_source, 
            'tr_proportion': 0.5, 'method_job_funcs': method_job_funcs, 
            'prob_label': prob_label, 'method_labels': method_labels}
    
    # class name 
    fname = 'ex%d-%s-me%d_J%d_rs%d_nma%d_d%d_a%.3f_trp%.2f.p' \
        %(ex, prob_label, n_methods, J, reps, n, d, alpha, tr_proportion)
    glo.ex_save_result(ex, results, fname)
    logger.info('Saved aggregated results to %s'%fname)
示例#34
0
from independent_jobs.aggregators.ScalarResultAggregator import ScalarResultAggregator
from independent_jobs.engines.BatchClusterParameters import BatchClusterParameters
from independent_jobs.engines.SGEComputationEngine import SGEComputationEngine
from independent_jobs.engines.SerialComputationEngine import SerialComputationEngine
from independent_jobs.examples.MyJob import MyJob
from independent_jobs.tools.Log import Log
from independent_jobs.tools.Log import logger
import numpy as np


# See other file for implementation of MyJob
# Since we are using ScalarResult, we can use the already implemented aggregator
# ScalarResultAggregator
if __name__ == '__main__':
    Log.set_loglevel(logger.info)
    logger.info("Start")
    # create an instance of the SGE engine, with certain parameters
    
    # create folder name string
    home = expanduser("~")
    foldername = os.sep.join([home, "minimal_example"])
    logger.info("Setting engine folder to %s" % foldername)
    
    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername)
    
    # possibly create SGE engine instance, which can be used to submit jobs to
    # there are more engines available.
#     logger.info("creating SGE engine instance")
#     engine = SGEComputationEngine(batch_parameters, check_interval=1)
示例#35
0
    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    engine = SlurmComputationEngine(batch_parameters)
    n_methods = len(method_job_funcs)
    # repetitions x  #methods
    aggregators = np.empty((reps, n_methods ), dtype=object)
    d = sample_source.dim()
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            # name used to save the result
            func_name = f.__name__
            fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
            if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                logger.info('%s exists. Load and return.'%fname)
                test_result = glo.ex_load_result(ex, prob_label, fname)

                sra = SingleResultAggregator()
                if test_result is SingleResult:
                    sra.submit_result(test_result)
                else:
                    sra.submit_result(SingleResult(test_result))

                aggregators[r, mi] = sra
            else:
                # result not exists or rerun
                job = Ex4Job(SingleResultAggregator(), prob_label, r, n, f)
                agg = engine.submit_job(job)
                aggregators[r, mi] = agg
示例#36
0
def run_problem(prob_label):
    """Run the experiment"""
    L = get_pqsource_list(prob_label)
    prob_params, ps, data_sources = zip(*L)
    # make them lists
    prob_params = list(prob_params)
    ps = list(ps)
    data_sources = list(data_sources)

    # ///////  submit jobs //////////
    # create folder name string
    #result_folder = glo.result_folder()
    from kgof.config import expr_configs
    tmp_dir = expr_configs['scratch_path']
    foldername = os.path.join(tmp_dir, 'kgof_slurm', 'e%d' % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    engine = SlurmComputationEngine(batch_parameters)
    #engine = SlurmComputationEngine(batch_parameters, partition='wrkstn,compute')
    n_methods = len(method_job_funcs)
    # repetitions x len(prob_params) x #methods
    aggregators = np.empty((reps, len(prob_params), n_methods), dtype=object)
    for r in range(reps):
        for pi, param in enumerate(prob_params):
            for mi, f in enumerate(method_job_funcs):
                # name used to save the result
                func_name = f.__name__
                fname = '%s-%s-n%d_r%d_p%g_a%.3f_trp%.2f.p' \
                    %(prob_label, func_name, sample_size, r, param, alpha,
                            tr_proportion)
                if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                    logger.info('%s exists. Load and return.' % fname)
                    job_result = glo.ex_load_result(ex, prob_label, fname)

                    sra = SingleResultAggregator()
                    sra.submit_result(SingleResult(job_result))
                    aggregators[r, pi, mi] = sra
                else:
                    # result not exists or rerun

                    # p: an UnnormalizedDensity object
                    p = ps[pi]
                    job = Ex2Job(SingleResultAggregator(), p, data_sources[pi],
                                 prob_label, r, f, param)
                    agg = engine.submit_job(job)
                    aggregators[r, pi, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    job_results = np.empty((reps, len(prob_params), n_methods), dtype=object)
    for r in range(reps):
        for pi, param in enumerate(prob_params):
            for mi, f in enumerate(method_job_funcs):
                logger.info("Collecting result (%s, r=%d, param=%.3g)" %
                            (f.__name__, r, param))
                # let the aggregator finalize things
                aggregators[r, pi, mi].finalize()

                # aggregators[i].get_final_result() returns a SingleResult instance,
                # which we need to extract the actual result
                job_result = aggregators[r, pi, mi].get_final_result().result
                job_results[r, pi, mi] = job_result

    #func_names = [f.__name__ for f in method_job_funcs]
    #func2labels = exglobal.get_func2label_map()
    #method_labels = [func2labels[f] for f in func_names if f in func2labels]

    # save results
    results = {
        'job_results': job_results,
        'prob_params': prob_params,
        'alpha': alpha,
        'repeats': reps,
        'ps': ps,
        'list_data_source': data_sources,
        'tr_proportion': tr_proportion,
        'method_job_funcs': method_job_funcs,
        'prob_label': prob_label,
        'sample_size': sample_size,
    }

    # class name
    fname = 'ex%d-%s-me%d_n%d_rs%d_pmi%g_pma%g_a%.3f_trp%.2f.p' \
        %(ex, prob_label, n_methods, sample_size, reps, min(prob_params),
                max(prob_params), alpha, tr_proportion)

    glo.ex_save_result(ex, results, fname)
    logger.info('Saved aggregated results to %s' % fname)
 def wait_for_all(self):
     self._wait_until_n_unfinished(0)
     logger.info("All jobs finished.")
示例#38
0
def run_problem(prob_label):
    """Run the experiment"""
    # ///////  submit jobs //////////
    # create folder name string
    #result_folder = glo.result_folder()
    from kmod.config import expr_configs
    tmp_dir = expr_configs['scratch_path']
    foldername = os.path.join(tmp_dir, 'kmod_slurm', 'e%d' % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    partitions = expr_configs['slurm_partitions']
    if partitions is None:
        engine = SlurmComputationEngine(batch_parameters)
    else:
        engine = SlurmComputationEngine(batch_parameters, partition=partitions)
    n_methods = len(method_funcs)

    # problem setting
    ns, P, Q, ds, = get_ns_pqrsource(prob_label)

    # repetitions x len(ns) x #methods
    aggregators = np.empty((reps, len(ns), n_methods), dtype=object)

    for r in range(reps):
        for ni, n in enumerate(ns):
            for mi, f in enumerate(method_funcs):
                # name used to save the result
                func_name = f.__name__
                fname = '%s-%s-n%d_r%d_a%.3f.p' \
                        %(prob_label, func_name, n, r, alpha,)
                if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                    logger.info('%s exists. Load and return.' % fname)
                    job_result = glo.ex_load_result(ex, prob_label, fname)

                    sra = SingleResultAggregator()
                    sra.submit_result(SingleResult(job_result))
                    aggregators[r, ni, mi] = sra
                else:
                    # result not exists or rerun
                    job = Ex1Job(SingleResultAggregator(), P, Q, ds,
                                 prob_label, r, f, n)

                    agg = engine.submit_job(job)
                    aggregators[r, ni, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    job_results = np.empty((reps, len(ns), n_methods), dtype=object)
    for r in range(reps):
        for ni, n in enumerate(ns):
            for mi, f in enumerate(method_funcs):
                logger.info("Collecting result (%s, r=%d, n=%d)" %
                            (f.__name__, r, n))
                # let the aggregator finalize things
                aggregators[r, ni, mi].finalize()

                # aggregators[i].get_final_result() returns a SingleResult instance,
                # which we need to extract the actual result
                job_result = aggregators[r, ni, mi].get_final_result().result
                job_results[r, ni, mi] = job_result

    #func_names = [f.__name__ for f in method_funcs]
    #func2labels = exglobal.get_func2label_map()
    #method_labels = [func2labels[f] for f in func_names if f in func2labels]

    # save results
    results = {
        'job_results': job_results,
        'P': P,
        'Q': Q,
        'data_source': ds,
        'alpha': alpha,
        'repeats': reps,
        'ns': ns,
        'method_funcs': method_funcs,
        'prob_label': prob_label,
    }

    # class name
    fname = 'ex%d-%s-me%d_rs%d_nmi%d_nma%d_a%.3f.p' \
        %(ex, prob_label, n_methods, reps, min(ns), max(ns), alpha,)

    glo.ex_save_result(ex, results, fname)
    logger.info('Saved aggregated results to %s' % fname)
示例#39
0
    # the serial one runs everything locally
    engine = SerialComputationEngine()
    #     engine = SGEComputationEngine(batch_parameters)
    #     engine = SlurmComputationEngine(batch_parameters)

    # On submission, the engine returns aggregators that can be
    # used to retreive results after potentially doing postprocessing
    returned_aggregators = []

    for i in range(3):
        job = MyJob(ScalarResultAggregator())
        agg = engine.submit_job(job)
        returned_aggregators.append(agg)

    # This call blocks until all jobs are finished (magic happens here)
    logger.info("Waiting for all jobs to be completed.")
    engine.wait_for_all()

    # now that everything is done, we can collect the results
    # and or do postprocessing
    logger.info("Collecting results")
    results = np.zeros(len(returned_aggregators))
    for i, agg in enumerate(returned_aggregators):
        # the aggregator might implement postprocessing
        agg.finalize()

        # aggregators[i].get_final_result() here returns a ScalarResult instance,
        # which we need to extract the number from
        results[i] = agg.get_final_result().result

    print "Results", results
示例#40
0
def run_problem(folder_path, prob_label):
    """Run the experiment"""

    pl = exglo.parse_prob_label(prob_label)
    is_h0 = pl['is_h0']
    n = pl['n']
    # ///////  submit jobs //////////
    # create folder name string
    #result_folder = glo.result_folder()
    #tmp_dir = tempfile.gettempdir()
    from fsic.config import expr_configs
    tmp_dir = expr_configs['scratch_dir']
    foldername = os.path.join(tmp_dir, 'wj_slurm', 'e%d' % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    engine = SlurmComputationEngine(batch_parameters)
    n_methods = len(method_job_funcs)
    # repetitions x sample_sizes x #methods
    aggregators = np.empty((reps, n_methods), dtype=object)
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            # name used to save the result
            func_name = f.__name__
            fname = '%s-%s-r%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, r, alpha, tr_proportion)
            if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                logger.info('%s exists. Load and return.' % fname)
                job_result = glo.ex_load_result(ex, prob_label, fname)

                sra = SingleResultAggregator()
                sra.submit_result(SingleResult(job_result))
                aggregators[r, mi] = sra
            else:
                # result not exists or rerun
                job = Ex4Job(SingleResultAggregator(), folder_path, prob_label,
                             r, f)
                agg = engine.submit_job(job)
                aggregators[r, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    job_results = np.empty((reps, n_methods), dtype=object)
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            logger.info("Collecting result (%s, r=%d, n=%d)" %
                        (f.__name__, r, n))
            # let the aggregator finalize things
            aggregators[r, mi].finalize()

            # aggregators[i].get_final_result() returns a SingleResult instance,
            # which we need to extract the actual result
            job_result = aggregators[r, mi].get_final_result().result
            job_results[r, mi] = job_result

    #func_names = [f.__name__ for f in method_job_funcs]
    #func2labels = exglobal.get_func2label_map()
    #method_labels = [func2labels[f] for f in func_names if f in func2labels]

    # save results
    # - Do not store PairedSource because it can be very big.
    results = {
        'job_results': job_results,
        'n': n,
        'is_h0': is_h0,
        'alpha': alpha,
        'repeats': reps,
        'tr_proportion': tr_proportion,
        'method_job_funcs': method_job_funcs,
        'prob_label': prob_label,
    }

    # class name
    fname = 'ex%d-%s-me%d_rs%d_a%.3f_trp%.2f.p' \
        %(ex, prob_label, n_methods, reps, alpha, tr_proportion)
    glo.ex_save_result(ex, results, fname)
    logger.info('Saved aggregated results to %s' % fname)
示例#41
0
def run_dataset(prob_label):
    """Run the experiment"""
    sample_source, n = get_sample_source(prob_label)

    # ///////  submit jobs //////////
    # create folder name string
    home = os.path.expanduser("~")
    foldername = os.path.join(home, "freqopttest_slurm", 'e%d' % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    engine = SlurmComputationEngine(batch_parameters, do_clean_up=True)
    n_methods = len(method_job_funcs)
    # repetitions x  #methods
    aggregators = np.empty((reps, n_methods), dtype=object)
    d = sample_source.dim()
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            # name used to save the result
            func_name = f.__name__
            fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
            if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                logger.info('%s exists. Load and return.' % fname)
                test_result = glo.ex_load_result(ex, prob_label, fname)
                sra = SingleResultAggregator()
                sra.submit_result(SingleResult(test_result))

                aggregators[r, mi] = sra
            else:
                # result not exists or rerun
                job = Ex5Job(SingleResultAggregator(), prob_label, r, n, f)
                agg = engine.submit_job(job)
                aggregators[r, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    test_results = np.empty((reps, n_methods), dtype=object)
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            logger.info("Collecting result (%s, r=%d)" % (f.__name__, r))
            # let the aggregator finalize things
            aggregators[r, mi].finalize()

            # aggregators[i].get_final_result() returns a SingleResult instance,
            # which we need to extract the actual result
            test_result = aggregators[r, mi].get_final_result().result
            test_results[r, mi] = test_result

            func_name = f.__name__
            fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
            glo.ex_save_result(ex, test_result, prob_label, fname)

    func_names = [f.__name__ for f in method_job_funcs]
    func2labels = exglobal.get_func2label_map()
    method_labels = [func2labels[f] for f in func_names if f in func2labels]
    # save results
    results = {
        'results': test_results,
        'n': n,
        'data_fname': label2fname[prob_label],
        'alpha': alpha,
        'J': J,
        'sample_source': sample_source,
        'tr_proportion': tr_proportion,
        'method_job_funcs': method_job_funcs,
        'prob_label': prob_label,
        'method_labels': method_labels
    }

    # class name
    fname = 'ex%d-%s-me%d_J%d_rs%d_nma%d_d%d_a%.3f_trp%.2f.p' \
        %(ex, prob_label, n_methods, J, reps, n, d, alpha, tr_proportion)
    glo.ex_save_result(ex, results, fname)
    logger.info('Saved aggregated results to %s' % fname)
示例#42
0
def run_problem(prob_label):
    """Run the experiment"""
    ns, p, ds = get_ns_pqsource(prob_label)
    # ///////  submit jobs //////////
    # create folder name string
    # result_folder = glo.result_folder()
    from sbibm.third_party.kgof.config import expr_configs

    tmp_dir = expr_configs["scratch_path"]
    foldername = os.path.join(tmp_dir, "kgof_slurm", "e%d" % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    # engine = SerialComputationEngine()
    # engine = SlurmComputationEngine(batch_parameters, partition='wrkstn,compute')
    engine = SlurmComputationEngine(batch_parameters)
    n_methods = len(method_job_funcs)
    # repetitions x len(ns) x #methods
    aggregators = np.empty((reps, len(ns), n_methods), dtype=object)
    for r in range(reps):
        for ni, n in enumerate(ns):
            for mi, f in enumerate(method_job_funcs):
                # name used to save the result
                func_name = f.__name__
                fname = "%s-%s-n%d_r%d_a%.3f_trp%.2f.p" % (
                    prob_label,
                    func_name,
                    n,
                    r,
                    alpha,
                    tr_proportion,
                )
                if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                    logger.info("%s exists. Load and return." % fname)
                    job_result = glo.ex_load_result(ex, prob_label, fname)

                    sra = SingleResultAggregator()
                    sra.submit_result(SingleResult(job_result))
                    aggregators[r, ni, mi] = sra
                else:
                    # result not exists or rerun

                    # p: an UnnormalizedDensity object
                    job = Ex1Job(SingleResultAggregator(), p, ds, prob_label,
                                 r, f, n)
                    agg = engine.submit_job(job)
                    aggregators[r, ni, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    job_results = np.empty((reps, len(ns), n_methods), dtype=object)
    for r in range(reps):
        for ni, n in enumerate(ns):
            for mi, f in enumerate(method_job_funcs):
                logger.info("Collecting result (%s, r=%d, n=%rd)" %
                            (f.__name__, r, n))
                # let the aggregator finalize things
                aggregators[r, ni, mi].finalize()

                # aggregators[i].get_final_result() returns a SingleResult instance,
                # which we need to extract the actual result
                job_result = aggregators[r, ni, mi].get_final_result().result
                job_results[r, ni, mi] = job_result

    # func_names = [f.__name__ for f in method_job_funcs]
    # func2labels = exglobal.get_func2label_map()
    # method_labels = [func2labels[f] for f in func_names if f in func2labels]

    # save results
    results = {
        "job_results": job_results,
        "data_source": ds,
        "alpha": alpha,
        "repeats": reps,
        "ns": ns,
        "p": p,
        "tr_proportion": tr_proportion,
        "method_job_funcs": method_job_funcs,
        "prob_label": prob_label,
    }

    # class name
    fname = "ex%d-%s-me%d_rs%d_nmi%d_nma%d_a%.3f_trp%.2f.p" % (
        ex,
        prob_label,
        n_methods,
        reps,
        min(ns),
        max(ns),
        alpha,
        tr_proportion,
    )

    glo.ex_save_result(ex, results, fname)
    logger.info("Saved aggregated results to %s" % fname)