def compute(self): p = self.p data_source = self.data_source r = self.rep prob_param = self.prob_param job_func = self.job_func # sample_size is a global variable data = data_source.sample(sample_size, seed=r) with util.ContextTimer() as t: tr, te = data.split_tr_te(tr_proportion=tr_proportion, seed=r + 21) prob_label = self.prob_label logger.info("computing. %s. prob=%s, r=%d,\ param=%.3g" % (job_func.__name__, prob_label, r, prob_param)) job_result = job_func(p, data_source, tr, te, r) # create ScalarResult instance result = SingleResult(job_result) # submit the result to my own aggregator self.aggregator.submit_result(result) func_name = job_func.__name__ logger.info("done. ex2: %s, prob=%s, r=%d, param=%.3g. Took: %.3g s " % (func_name, prob_label, r, prob_param, t.secs)) # save result fname = '%s-%s-n%d_r%d_p%g_a%.3f_trp%.2f.p' \ %(prob_label, func_name, sample_size, r, prob_param, alpha, tr_proportion) glo.ex_save_result(ex, job_result, prob_label, fname)
def run_problem(prob_label): """Run the experiment""" L = get_pqsource_list(prob_label) prob_params, ps, data_sources = zip(*L) # make them lists prob_params = list(prob_params) ps = list(ps) data_sources = list(data_sources) # /////// submit jobs ////////// # create folder name string #result_folder = glo.result_folder() from kgof.config import expr_configs tmp_dir = expr_configs['scratch_path'] foldername = os.path.join(tmp_dir, 'kgof_slurm', 'e%d' % ex) logger.info("Setting engine folder to %s" % foldername) # create parameter instance that is needed for any batch computation engine logger.info("Creating batch parameter instance") batch_parameters = BatchClusterParameters(foldername=foldername, job_name_base="e%d_" % ex, parameter_prefix="") # Use the following line if Slurm queue is not used. #engine = SerialComputationEngine() engine = SlurmComputationEngine(batch_parameters) #engine = SlurmComputationEngine(batch_parameters, partition='wrkstn,compute') n_methods = len(method_job_funcs) # repetitions x len(prob_params) x #methods aggregators = np.empty((reps, len(prob_params), n_methods), dtype=object) for r in range(reps): for pi, param in enumerate(prob_params): for mi, f in enumerate(method_job_funcs): # name used to save the result func_name = f.__name__ fname = '%s-%s-n%d_r%d_p%g_a%.3f_trp%.2f.p' \ %(prob_label, func_name, sample_size, r, param, alpha, tr_proportion) if not is_rerun and glo.ex_file_exists(ex, prob_label, fname): logger.info('%s exists. Load and return.' % fname) job_result = glo.ex_load_result(ex, prob_label, fname) sra = SingleResultAggregator() sra.submit_result(SingleResult(job_result)) aggregators[r, pi, mi] = sra else: # result not exists or rerun # p: an UnnormalizedDensity object p = ps[pi] job = Ex2Job(SingleResultAggregator(), p, data_sources[pi], prob_label, r, f, param) agg = engine.submit_job(job) aggregators[r, pi, mi] = agg # let the engine finish its business logger.info("Wait for all call in engine") engine.wait_for_all() # ////// collect the results /////////// logger.info("Collecting results") job_results = np.empty((reps, len(prob_params), n_methods), dtype=object) for r in range(reps): for pi, param in enumerate(prob_params): for mi, f in enumerate(method_job_funcs): logger.info("Collecting result (%s, r=%d, param=%.3g)" % (f.__name__, r, param)) # let the aggregator finalize things aggregators[r, pi, mi].finalize() # aggregators[i].get_final_result() returns a SingleResult instance, # which we need to extract the actual result job_result = aggregators[r, pi, mi].get_final_result().result job_results[r, pi, mi] = job_result #func_names = [f.__name__ for f in method_job_funcs] #func2labels = exglobal.get_func2label_map() #method_labels = [func2labels[f] for f in func_names if f in func2labels] # save results results = { 'job_results': job_results, 'prob_params': prob_params, 'alpha': alpha, 'repeats': reps, 'ps': ps, 'list_data_source': data_sources, 'tr_proportion': tr_proportion, 'method_job_funcs': method_job_funcs, 'prob_label': prob_label, 'sample_size': sample_size, } # class name fname = 'ex%d-%s-me%d_n%d_rs%d_pmi%g_pma%g_a%.3f_trp%.2f.p' \ %(ex, prob_label, n_methods, sample_size, reps, min(prob_params), max(prob_params), alpha, tr_proportion) glo.ex_save_result(ex, results, fname) logger.info('Saved aggregated results to %s' % fname)