def __init__(self, batch_parameters, submission_cmd, check_interval=10, do_clean_up=False, submission_delay=0.5, max_jobs_in_queue=0): IndependentComputationEngine.__init__(self) self.batch_parameters = batch_parameters self.check_interval = check_interval self.do_clean_up = do_clean_up self.submission_cmd = submission_cmd self.submission_delay = submission_delay self.max_jobs_in_queue = max_jobs_in_queue # make sure submission command executable is in path if not FileSystem.cmd_exists(submission_cmd): raise ValueError("Submission command executable \"%s\" not found" % submission_cmd) # list of tuples of (job_name, submission_time), which is kept in sorted # order by the time, only unfinished jobs self.submitted_jobs = [] # list of all jobs ever submitted self.all_jobs = [] # whether to also store all aggregators in current working dir self.store_fire_and_forget = False
def main(): Log.set_loglevel(logging.DEBUG) modulename = "sample_ozone_posterior_average_slurm" if not FileSystem.cmd_exists("sbatch"): engine = SerialComputationEngine() else: johns_slurm_hack = "#SBATCH --partition=intel-ivy,wrkstn,compute" johns_slurm_hack = "#SBATCH --partition=intel-ivy,compute" folder = os.sep + os.sep.join(["nfs", "data3", "ucabhst", modulename]) batch_parameters = BatchClusterParameters( foldername=folder, max_walltime=24 * 60 * 60, resubmit_on_timeout=False, memory=3, parameter_prefix=johns_slurm_hack) engine = SlurmComputationEngine(batch_parameters, check_interval=1, do_clean_up=True) prior = Gaussian(Sigma=eye(2) * 100) num_estimates = 100 posterior = OzonePosteriorAverageEngine(computation_engine=engine, num_estimates=num_estimates, prior=prior) posterior.logdet_method = "shogun_estimate" proposal_cov = diag([4.000000000000000e-05, 1.072091680000000e+02]) mcmc_sampler = StandardMetropolis(posterior, scale=1.0, cov=proposal_cov) start = asarray([-11.35, -13.1]) mcmc_params = MCMCParams(start=start, num_iterations=2000) chain = MCMCChain(mcmc_sampler, mcmc_params) chain.append_mcmc_output(StatisticsOutput(print_from=1, lag=1)) home = expanduser("~") folder = os.sep.join([home, modulename]) store_chain_output = StoreChainOutput(folder) chain.append_mcmc_output(store_chain_output) loaded = store_chain_output.load_last_stored_chain() if loaded is None: logging.info("Running chain from scratch") else: logging.info("Running chain from iteration %d" % loaded.iteration) chain = loaded chain.run() f = open(folder + os.sep + "final_chain", "w") dump(chain, f) f.close()
def main(): Log.set_loglevel(logging.DEBUG) modulename = "sample_ozone_posterior_average_slurm" if not FileSystem.cmd_exists("sbatch"): engine = SerialComputationEngine() else: johns_slurm_hack = "#SBATCH --partition=intel-ivy,wrkstn,compute" johns_slurm_hack = "#SBATCH --partition=intel-ivy,compute" folder = os.sep + os.sep.join(["nfs", "data3", "ucabhst", modulename]) batch_parameters = BatchClusterParameters(foldername=folder, max_walltime=24 * 60 * 60, resubmit_on_timeout=False, memory=3, parameter_prefix=johns_slurm_hack) engine = SlurmComputationEngine(batch_parameters, check_interval=1, do_clean_up=True) prior = Gaussian(Sigma=eye(2) * 100) num_estimates = 100 posterior = OzonePosteriorAverageEngine(computation_engine=engine, num_estimates=num_estimates, prior=prior) posterior.logdet_method = "shogun_estimate" proposal_cov = diag([ 4.000000000000000e-05, 1.072091680000000e+02]) mcmc_sampler = StandardMetropolis(posterior, scale=1.0, cov=proposal_cov) start = asarray([-11.35, -13.1]) mcmc_params = MCMCParams(start=start, num_iterations=2000) chain = MCMCChain(mcmc_sampler, mcmc_params) chain.append_mcmc_output(StatisticsOutput(print_from=1, lag=1)) home = expanduser("~") folder = os.sep.join([home, modulename]) store_chain_output = StoreChainOutput(folder) chain.append_mcmc_output(store_chain_output) loaded = store_chain_output.load_last_stored_chain() if loaded is None: logging.info("Running chain from scratch") else: logging.info("Running chain from iteration %d" % loaded.iteration) chain = loaded chain.run() f = open(folder + os.sep + "final_chain", "w") dump(chain, f) f.close()
def test_slurm_engine_max_waiting_time(self): if not FileSystem.cmd_exists("sbatch"): raise SkipTest home = expanduser("~") folder = os.sep.join([home, "unit_test_dummy_slurm_result_max_wait"]) try: shutil.rmtree(folder) except OSError: pass batch_parameters = BatchClusterParameters(foldername=folder) engine = SlurmComputationEngine(batch_parameters, check_interval=1) sleep_times = [2, -1] self.engine_helper(engine, sleep_times)
def test_slurm_engine_no_clean_up(self): if not FileSystem.cmd_exists("sbatch"): raise SkipTest home = expanduser("~") folder = os.sep.join([home, "unit_test_slurm_dummy_result"]) try: shutil.rmtree(folder) except OSError: pass batch_parameters = BatchClusterParameters(foldername=folder) engine = SlurmComputationEngine(batch_parameters, check_interval=1, do_clean_up=False) num_submissions = 3 sleep_times = randint(0, 3, num_submissions) self.engine_helper(engine, sleep_times)
def test_sge_engine_no_clean_up(self): if not FileSystem.cmd_exists("qsub"): raise SkipTest home = expanduser("~") folder = os.sep.join([home, "unit_test_sge_dummy_result"]) try: shutil.rmtree(folder) except OSError: pass batch_parameters = BatchClusterParameters(foldername=folder) engine = SGEComputationEngine(batch_parameters, check_interval=1, do_clean_up=False) num_submissions = 3 sleep_times = randint(0, 3, num_submissions) self.engine_helper(engine, sleep_times)
def is_available(self): return FileSystem.cmd_exists(self.submission_cmd)
return job if __name__ == "__main__": logger.setLevel(10) num_repetitions = 10 # plain MCMC parameters, plan is to use every 200th sample thin_step = 1 num_iterations = 5200 num_warmup = 200 compute_local = False if not FileSystem.cmd_exists("sbatch") or compute_local: engine = SerialComputationEngine() else: johns_slurm_hack = "#SBATCH --partition=intel-ivy,wrkstn,compute" folder = os.sep + os.sep.join(["nfs", "data3", "ucabhst", modulename]) batch_parameters = BatchClusterParameters( foldername=folder, resubmit_on_timeout=False, parameter_prefix=johns_slurm_hack) engine = SlurmComputationEngine(batch_parameters, check_interval=1, do_clean_up=True) engine.max_jobs_in_queue = 1000 engine.store_fire_and_forget = True
def test_cmd_exists_true(self): cmd = "ls" self.assertTrue(FileSystem.cmd_exists(cmd))
def test_cmd_exists_false(self): cmd = "assdjglksdjsdf" self.assertFalse(FileSystem.cmd_exists(cmd))
def compute(fname_base, job_generator, Ds, Ns, num_repetitions, num_steps, step_size, max_steps=None, compute_local=False): if not FileSystem.cmd_exists("sbatch") or compute_local: engine = SerialComputationEngine() else: johns_slurm_hack = "#SBATCH --partition=intel-ivy,wrkstn,compute" folder = os.sep + os.sep.join(["nfs", "data3", "ucabhst", fname_base]) batch_parameters = BatchClusterParameters( foldername=folder, resubmit_on_timeout=False, parameter_prefix=johns_slurm_hack) engine = SlurmComputationEngine(batch_parameters, check_interval=1, do_clean_up=True) engine.max_jobs_in_queue = 1000 engine.store_fire_and_forget = True # fixed order of aggregators aggregators = [] for D in Ds: for N in Ns: for j in range(num_repetitions): logger.info("%s trajectory, D=%d/%d, N=%d/%d repetition %d/%d" % \ (str(job_generator), D, np.max(Ds), N, np.max(Ns), j + 1, num_repetitions)) job = job_generator(D, N, N) aggregators += [engine.submit_job(job)] time.sleep(0.1) # block until all done engine.wait_for_all() avg_accept = np.zeros((num_repetitions, len(Ds), len(Ns))) avg_accept_est = np.zeros((num_repetitions, len(Ds), len(Ns))) log_dets = np.zeros((num_repetitions, len(Ds), len(Ns))) log_dets_est = np.zeros((num_repetitions, len(Ds), len(Ns))) avg_steps_taken = np.zeros((num_repetitions, len(Ds), len(Ns))) agg_counter = 0 for i in range(len(Ds)): for k in range(len(Ns)): for j in range(num_repetitions): agg = aggregators[agg_counter] agg_counter += 1 agg.finalize() result = agg.get_final_result() agg.clean_up() avg_accept[j, i, k] = result.acc_mean avg_accept_est[j, i, k] = result.acc_est_mean log_dets[j, i, k] = result.vol log_dets_est[j, i, k] = result.vol_est avg_steps_taken[j, i, k] = result.steps_taken with open(fname_base + ".csv", 'a+') as f: line = np.array([ Ds[i], Ns[k], avg_accept[j, i, k], avg_accept_est[j, i, k], log_dets[j, i, k], log_dets_est[j, i, k], avg_steps_taken[j, i, k], ]) f.write(" ".join(map(str, line)) + os.linesep)