def compute(self): p = self.p data_source = self.data_source r = self.rep n = self.n job_func = self.job_func data = data_source.sample(n, seed=r) with util.ContextTimer() as t: tr, te = data.split_tr_te(tr_proportion=tr_proportion, seed=r + 21) prob_label = self.prob_label logger.info("computing. %s. prob=%s, r=%d,\ n=%d" % (job_func.__name__, prob_label, r, n)) job_result = job_func(p, data_source, tr, te, r) # create ScalarResult instance result = SingleResult(job_result) # submit the result to my own aggregator self.aggregator.submit_result(result) func_name = job_func.__name__ logger.info("done. ex2: %s, prob=%s, r=%d, n=%d. Took: %.3g s " % (func_name, prob_label, r, n, t.secs)) # save result fname = "%s-%s-n%d_r%d_a%.3f_trp%.2f.p" % ( prob_label, func_name, n, r, alpha, tr_proportion, ) glo.ex_save_result(ex, job_result, prob_label, fname)
def run_problem(prob_label): """Run the experiment""" ns, p, ds = get_ns_pqsource(prob_label) # /////// submit jobs ////////// # create folder name string # result_folder = glo.result_folder() from sbibm.third_party.kgof.config import expr_configs tmp_dir = expr_configs["scratch_path"] foldername = os.path.join(tmp_dir, "kgof_slurm", "e%d" % ex) logger.info("Setting engine folder to %s" % foldername) # create parameter instance that is needed for any batch computation engine logger.info("Creating batch parameter instance") batch_parameters = BatchClusterParameters(foldername=foldername, job_name_base="e%d_" % ex, parameter_prefix="") # Use the following line if Slurm queue is not used. # engine = SerialComputationEngine() # engine = SlurmComputationEngine(batch_parameters, partition='wrkstn,compute') engine = SlurmComputationEngine(batch_parameters) n_methods = len(method_job_funcs) # repetitions x len(ns) x #methods aggregators = np.empty((reps, len(ns), n_methods), dtype=object) for r in range(reps): for ni, n in enumerate(ns): for mi, f in enumerate(method_job_funcs): # name used to save the result func_name = f.__name__ fname = "%s-%s-n%d_r%d_a%.3f_trp%.2f.p" % ( prob_label, func_name, n, r, alpha, tr_proportion, ) if not is_rerun and glo.ex_file_exists(ex, prob_label, fname): logger.info("%s exists. Load and return." % fname) job_result = glo.ex_load_result(ex, prob_label, fname) sra = SingleResultAggregator() sra.submit_result(SingleResult(job_result)) aggregators[r, ni, mi] = sra else: # result not exists or rerun # p: an UnnormalizedDensity object job = Ex1Job(SingleResultAggregator(), p, ds, prob_label, r, f, n) agg = engine.submit_job(job) aggregators[r, ni, mi] = agg # let the engine finish its business logger.info("Wait for all call in engine") engine.wait_for_all() # ////// collect the results /////////// logger.info("Collecting results") job_results = np.empty((reps, len(ns), n_methods), dtype=object) for r in range(reps): for ni, n in enumerate(ns): for mi, f in enumerate(method_job_funcs): logger.info("Collecting result (%s, r=%d, n=%rd)" % (f.__name__, r, n)) # let the aggregator finalize things aggregators[r, ni, mi].finalize() # aggregators[i].get_final_result() returns a SingleResult instance, # which we need to extract the actual result job_result = aggregators[r, ni, mi].get_final_result().result job_results[r, ni, mi] = job_result # func_names = [f.__name__ for f in method_job_funcs] # func2labels = exglobal.get_func2label_map() # method_labels = [func2labels[f] for f in func_names if f in func2labels] # save results results = { "job_results": job_results, "data_source": ds, "alpha": alpha, "repeats": reps, "ns": ns, "p": p, "tr_proportion": tr_proportion, "method_job_funcs": method_job_funcs, "prob_label": prob_label, } # class name fname = "ex%d-%s-me%d_rs%d_nmi%d_nma%d_a%.3f_trp%.2f.p" % ( ex, prob_label, n_methods, reps, min(ns), max(ns), alpha, tr_proportion, ) glo.ex_save_result(ex, results, fname) logger.info("Saved aggregated results to %s" % fname)