def attempt_dispatch(expt_config, expt_dir, chooser, driver, options): log("\n" + "-" * 40) expt = load_experiment(expt_config) # Build the experiment grid. expt_grid = ExperimentGrid(expt_dir, expt.variable, options.grid_size, options.grid_seed) # Print out the current best function value. best_val, best_job = expt_grid.get_best() if best_job >= 0: log("Current best: %f (job %d)" % (best_val, best_job)) else: log("Current best: No results returned yet.") # Gets you everything - NaN for unknown values & durations. grid, values, durations = expt_grid.get_grid() # Returns lists of indices. candidates = expt_grid.get_candidates() pending = expt_grid.get_pending() complete = expt_grid.get_complete() n_candidates = candidates.shape[0] n_pending = pending.shape[0] n_complete = complete.shape[0] log("%d candidates %d pending %d complete" % (n_candidates, n_pending, n_complete)) # Verify that pending jobs are actually running, and add them back to the # candidate set if they have crashed or gotten lost. for job_id in pending: proc_id = expt_grid.get_proc_id(job_id) if not driver.is_proc_alive(job_id, proc_id): log("Set job %d back to pending status." % (job_id)) expt_grid.set_candidate(job_id) # Track the time series of optimization. write_trace(expt_dir, best_val, best_job, n_candidates, n_pending, n_complete) # Print out the best job results write_best_job(expt_dir, best_val, best_job, expt_grid) if n_complete >= options.max_finished_jobs: log("Maximum number of finished jobs (%d) reached." "Exiting" % options.max_finished_jobs) return False if n_candidates == 0: log("There are no candidates left. Exiting.") return False if n_pending >= options.max_concurrent: log("Maximum number of jobs (%d) pending." % (options.max_concurrent)) return True else: # start a bunch of candidate jobs if possible #to_start = min(options.max_concurrent - n_pending, n_candidates) #log("Trying to start %d jobs" % (to_start)) #for i in xrange(to_start): # Ask the chooser to pick the next candidate log("Choosing next candidate... ") job_id = chooser.next(grid, values, durations, candidates, pending, complete) # If the job_id is a tuple, then the chooser picked a new job. # We have to add this to our grid if isinstance(job_id, tuple): (job_id, candidate) = job_id job_id = expt_grid.add_to_grid(candidate) log("selected job %d from the grid." % (job_id)) # Convert this back into an interpretable job and add metadata. job = Job() job.id = job_id job.expt_dir = expt_dir job.name = expt.name job.language = expt.language job.status = 'submitted' job.submit_t = int(time.time()) job.param.extend(expt_grid.get_params(job_id)) save_job(job) pid = driver.submit_job(job) if pid != None: log("submitted - pid = %d" % (pid)) expt_grid.set_submitted(job_id, pid) else: log("Failed to submit job!") log("Deleting job file.") os.unlink(job_file_for(job)) return True
def attempt_dispatch(expt_name, expt_dir, work_dir, chooser, options): #import drmaa sys.stderr.write("\n") expt_file = os.path.join(expt_dir, options.config_file) expt = load_expt(expt_file) # Build the experiment grid. expt_grid = ExperimentGrid(expt_dir, expt.variable, options.grid_size, options.grid_seed) # Print out the current best function value. best_val, best_job = expt_grid.get_best() sys.stderr.write("Current best: %f (job %d)\n" % (best_val, best_job)) # Gets you everything - NaN for unknown values & durations. grid, values, durations = expt_grid.get_grid() # Returns lists of indices. candidates = expt_grid.get_candidates() pending = expt_grid.get_pending() complete = expt_grid.get_complete() sys.stderr.write("%d candidates %d pending %d complete\n" % (candidates.shape[0], pending.shape[0], complete.shape[0])) ################# START ### ANDREI ######################## plot_dir = os.path.join(expt_dir, options.plot_dir) if not os.path.exists(plot_dir): sys.stderr.write("Creating plot directories '%s'.\n" % (plot_dir)) mkdirp(plot_dir) mkdirp(os.path.join(plot_dir, '1D')) mkdirp(os.path.join(plot_dir, '2D')) mkdirp(os.path.join(plot_dir, 'CSV')) mkdirp(os.path.join(plot_dir, 'CSV', '1D')) mkdirp(os.path.join(plot_dir, 'CSV', '2D')) gmap = expt_grid.vmap if np.isnan(best_job): # TODO: deal with plotting the prior GP with no evaluated points sys.stderr.write("Need at least one complete evaluation to plot\n") sys.exit(-1) best_complete = grid[best_job, :].reshape((1,gmap.cardinality)) #print('Best complete is ' + str(best_complete)) #print('best_complete.shape is ' + str(best_complete.shape)) # Avoid MCMC if not needed if options.no_mcmc: chooser.mcmc_iters = 0 # Fit the hyperparameters only once for the whole plotting chooser.prepare_evaluation(grid, values, complete) # Loop on first dimension grid_i = 0 for v1 in gmap.variables: v1_dim = v1['size'] for i in range(0,v1_dim): v1_name = str(v1['name']) if v1_dim > 1: v1_name = v1_name + "_" + str(i+1) print('PLOT1D:',v1_name, 'Min:', v1['min'], 'Max:', v1['max']) # Evaluate on the marginal slice containing the best fit print('slicing along dim ' + str(grid_i)) x, candidates = slice_1d(best_complete, grid_i, options.grid_size) mean, variance, ei = evaluate_gp(chooser, candidates, grid[complete, :], values[complete], durations[complete]) if not options.no_plot: plot_1d(x, v1['min'], v1['max'], mean, variance, ei, best_complete, v1_name) if options.plot_max < float("+inf"): pplt.ylim(ymax=options.plot_max) if options.plot_min > float("-inf"): pplt.ylim(ymin=options.plot_min) # If the space is entirely 1D, plot the evaluation points if gmap.cardinality == 1: ylim = pplt.ylim() pplt.scatter(np.asarray(complete).squeeze(), np.asarray(values).squeeze(), c='lime', marker='o', s=dot_size) pplt.ylim(ylim) pplt.savefig(os.path.join(plot_dir, '1D', v1_name + '.png')) if not options.no_csv: out_file = os.path.join(plot_dir, 'CSV', '1D', v1_name + '.csv') save_to_csv(out_file, gmap, candidates, mean, variance, ei) # Loop on second dimension grid_j = 0 for v2 in gmap.variables: v2_dim = v2['size'] for j in range(0,v2_dim): # Sub-diagonal is skipped if grid_j <= grid_i: grid_j = grid_j + 1 continue v2_name = str(v2['name']) if v2_dim > 1: v2_name = v2_name + "_" + str(j+1) print('PLOT2D:',v1_name, ' vs ', v2_name) # Now let's evaluate the GP on a grid x, y, candidates = slice_2d(best_complete, v1['min'], v1['max'], v2['min'], v2['max'], grid_i, grid_j, options.grid_size) mean, variance, ei = evaluate_gp(chooser, candidates, grid[complete, :], values[complete], durations[complete]) if not options.no_plot: h, h_mean, h_var, h_ei = plot_2d(x, y, mean, variance, ei, best_complete, v1_name, v2_name) if options.plot_max < float("+inf"): h_mean.set_clim(vmax=options.plot_max) h_var.set_clim(vmax=options.plot_max) h_ei.set_clim(vmax=options.plot_max) if options.plot_min > float("-inf"): h_mean.set_clim(vmin=options.plot_min) h_var.set_clim(vmin=options.plot_min) h_ei.set_clim(vmin=options.plot_min) # If the space is entirely 2D, plot the evaluation points if gmap.cardinality == 2: for i in (131,132,133): pplt.subplot(i) xlim = pplt.xlim() ylim = pplt.ylim() pplt.scatter(np.asarray(complete[:,0]).squeeze(), np.asarray(complete[:,1]).squeeze(), c='lime', marker='o', s=dot_size) pplt.xlim(xlim) pplt.ylim(ylim) pplt.savefig(os.path.join(plot_dir, '2D', v1_name + "_" + v2_name + ".png")) if not options.no_csv: out_file = os.path.join(plot_dir, 'CSV', '2D', v1_name + "_" + v2_name + ".csv") save_to_csv(out_file, gmap, candidates, mean, variance, ei) grid_j = grid_j + 1 grid_i = grid_i + 1
def attempt_dispatch(expt_name, expt_dir, work_dir, chooser, options): sys.stderr.write("\n") expt_file = os.path.join(expt_dir, options.config_file) expt = load_expt(expt_file) # Build the experiment grid. expt_grid = ExperimentGrid(expt_dir, expt.variable, options.grid_size, options.grid_seed) # Print out the current best function value. best_val, best_job = expt_grid.get_best() if best_job >= 0: sys.stderr.write("Current best: %f (job %d)\n" % (best_val, best_job)) else: sys.stderr.write("Current best: No results returned yet.\n") # Gets you everything - NaN for unknown values & durations. grid, values, durations = expt_grid.get_grid() # Returns lists of indices. candidates = expt_grid.get_candidates() pending = expt_grid.get_pending() complete = expt_grid.get_complete() sys.stderr.write("%d candidates %d pending %d complete\n" % (candidates.shape[0], pending.shape[0], complete.shape[0])) # Verify that pending jobs are actually running. for job_id in pending: sgeid = expt_grid.get_sgeid(job_id) reset_job = False try: # Send an alive signal to proc (note this could kill it in windows) os.kill(sgeid, 0) except OSError: # Job is no longer running but still in the candidate list. Assume it crashed out. expt_grid.set_candidate(job_id) # Track the time series of optimization. trace_fh = open(os.path.join(expt_dir, 'trace.csv'), 'a') trace_fh.write("%d,%f,%d,%d,%d,%d\n" % (time.time(), best_val, best_job, candidates.shape[0], pending.shape[0], complete.shape[0])) trace_fh.close() # Print out the best job results best_job_fh = open(os.path.join(expt_dir, 'best_job_and_result.txt'), 'w') best_job_fh.write("Best result: %f\nJob-id: %d\nParameters: \n" % (best_val, best_job)) for best_params in expt_grid.get_params(best_job): best_job_fh.write(str(best_params) + '\n') best_job_fh.close() if complete.shape[0] >= options.max_finished_jobs: sys.stderr.write("Maximum number of finished jobs (%d) reached." "Exiting\n" % options.max_finished_jobs) sys.exit(0) if candidates.shape[0] == 0: sys.stderr.write("There are no candidates left. Exiting.\n") sys.exit(0) if pending.shape[0] >= options.max_concurrent: sys.stderr.write("Maximum number of jobs (%d) pending.\n" % (options.max_concurrent)) return # Ask the chooser to actually pick one. job_id = chooser.next(grid, values, durations, candidates, pending, complete) # If the job_id is a tuple, then the chooser picked a new job. # We have to add this to our grid if isinstance(job_id, tuple): (job_id, candidate) = job_id job_id = expt_grid.add_to_grid(candidate) sys.stderr.write("Selected job %d from the grid.\n" % (job_id)) # Convert this back into an interpretable job and add metadata. job = Job() job.id = job_id job.expt_dir = expt_dir job.name = expt.name job.language = expt.language job.status = 'submitted' job.submit_t = int(time.time()) job.param.extend(expt_grid.get_params(job_id)) # Make sure we have a job subdirectory. job_subdir = os.path.join(expt_dir, 'jobs') if not os.path.exists(job_subdir): os.mkdir(job_subdir) # Name this job file. job_file = os.path.join(job_subdir, '%08d.pb' % (job_id)) # Store the job file. save_job(job_file, job) # Make sure there is a directory for output. output_subdir = os.path.join(expt_dir, 'output') if not os.path.exists(output_subdir): os.mkdir(output_subdir) output_file = os.path.join(output_subdir, '%08d.out' % (job_id)) process = job_submit("%s-%08d" % (expt_name, job_id), output_file, job_file, work_dir) process.poll() if process.returncode is not None and process.returncode < 0: sys.stderr.write("Failed to submit job or job crashed " "with return code %d !\n" % process.returncode) sys.stderr.write("Deleting job file.\n") os.unlink(job_file) return else: sys.stderr.write("Submitted job as process: %d\n" % process.pid) # Now, update the experiment status to submitted. expt_grid.set_submitted(job_id, process.pid) return
def attempt_dispatch(expt_name, expt_dir, work_dir, chooser, options): import drmaa sys.stderr.write("\n") expt_file = os.path.join(expt_dir, options.config_file) expt = load_expt(expt_file) # Build the experiment grid. expt_grid = ExperimentGrid(expt_dir, expt.variable, options.grid_size, options.grid_seed) # Print out the current best function value. best_val, best_job = expt_grid.get_best() sys.stderr.write("Current best: %f (job %d)\n" % (best_val, best_job)) # Gets you everything - NaN for unknown values & durations. grid, values, durations = expt_grid.get_grid() # Returns lists of indices. candidates = expt_grid.get_candidates() pending = expt_grid.get_pending() complete = expt_grid.get_complete() sys.stderr.write("%d candidates %d pending %d complete\n" % (candidates.shape[0], pending.shape[0], complete.shape[0])) # Verify that pending jobs are actually running. s = drmaa.Session() s.initialize() for job_id in pending: sgeid = expt_grid.get_sgeid(job_id) reset_job = False try: status = s.jobStatus(str(sgeid)) except: sys.stderr.write("EXC: %s\n" % (str(sys.exc_info()[0]))) sys.stderr.write("Could not find SGE id for job %d (%d)\n" % (job_id, sgeid)) status = -1 reset_job = True if status == drmaa.JobState.UNDETERMINED: sys.stderr.write("Job %d (%d) in undetermined state.\n" % (job_id, sgeid)) reset_job = True elif status in [drmaa.JobState.QUEUED_ACTIVE, drmaa.JobState.RUNNING]: pass # Good shape. elif status in [drmaa.JobState.SYSTEM_ON_HOLD, drmaa.JobState.USER_ON_HOLD, drmaa.JobState.USER_SYSTEM_ON_HOLD, drmaa.JobState.SYSTEM_SUSPENDED, drmaa.JobState.USER_SUSPENDED]: sys.stderr.write("Job %d (%d) is held or suspended.\n" % (job_id, sgeid)) reset_job = True elif status == drmaa.JobState.DONE: sys.stderr.write("Job %d (%d) complete but not yet updated.\n" % (job_id, sgeid)) elif status == drmaa.JobState.FAILED: sys.stderr.write("Job %d (%d) failed.\n" % (job_id, sgeid)) reset_job = True if reset_job: try: # Kill the job. s.control(str(sgeid), drmaa.JobControlAction.TERMINATE) sys.stderr.write("Killed SGE job %d.\n" % (sgeid)) except: sys.stderr.write("Failed to kill SGE job %d.\n" % (sgeid)) # Set back to being a candidate state. expt_grid.set_candidate(job_id) sys.stderr.write("Set job %d back to pending status.\n" % (job_id)) s.exit() # Track the time series of optimization. trace_fh = open(os.path.join(expt_dir, 'trace.csv'), 'a') trace_fh.write("%d,%f,%d,%d,%d,%d\n" % (time.time(), best_val, best_job, candidates.shape[0], pending.shape[0], complete.shape[0])) trace_fh.close() # Print out the best job results best_job_fh = open(os.path.join(expt_dir, 'best_job_and_result.txt'), 'a') best_job_fh.write("Best result: %f\n Job-id: %d\n Parameters: %s\n" % (best_val, best_job, expt_grid.get_params(best_job))) best_job_fh.close() if complete.shape[0] >= options.max_finished_jobs: sys.stderr.write("Maximum number of finished jobs (%d) reached. " "Exiting\n" % options.max_finished_jobs) sys.exit(0) if candidates.shape[0] == 0: sys.stderr.write("There are no candidates left. Exiting.\n") sys.exit(0) if pending.shape[0] >= options.max_concurrent: sys.stderr.write("Maximum number of jobs (%d) pending.\n" % (options.max_concurrent)) return # Ask the chooser to actually pick one. job_id = chooser.next(grid, values, durations, candidates, pending, complete) # If the job_id is a tuple, then the chooser picked a new job. # We have to add this to our grid if isinstance(job_id, tuple): (job_id, candidate) = job_id job_id = expt_grid.add_to_grid(candidate) sys.stderr.write("Selected job %d from the grid.\n" % (job_id)) # Convert this back into an interpretable job and add metadata. job = Job() job.id = job_id job.expt_dir = expt_dir job.name = expt.name job.language = expt.language job.status = 'submitted' job.submit_t = int(time.time()) job.param.extend(expt_grid.get_params(job_id)) # Make sure we have a job subdirectory. job_subdir = os.path.join(expt_dir, 'jobs') if not os.path.exists(job_subdir): os.mkdir(job_subdir) # Name this job file. job_file = os.path.join(job_subdir, '%08d.pb' % (job_id)) # Store the job file. save_job(job_file, job) # Make sure there is a directory for output. output_subdir = os.path.join(expt_dir, 'output') if not os.path.exists(output_subdir): os.mkdir(output_subdir) output_file = os.path.join(output_subdir, '%08d.out' % (job_id)) queue_id, msg = sge_submit("%s-%08d" % (expt_name, job_id), output_file, DEFAULT_MODULES, job_file, work_dir) if queue_id is None: sys.stderr.write("Failed to submit job: %s" % (msg)) sys.stderr.write("Deleting job file.\n") os.unlink(job_file) return else: sys.stderr.write("Submitted as job %d\n" % (queue_id)) # Now, update the experiment status to submitted. expt_grid.set_submitted(job_id, queue_id) return
def attempt_dispatch(expt_config, expt_dir, chooser, driver, options): log("\n" + "-" * 40) expt = load_experiment(expt_config) print(options) # Build the experiment grid. expt_grid = ExperimentGrid(expt_dir, expt.variable, options.grid_size, options.grid_seed) # Print out the current best function value. best_val, best_job = expt_grid.get_best() if best_job >= 0: log("Current best: %f (job %d)" % (best_val, best_job)) else: log("Current best: No results returned yet.") # Gets you everything - NaN for unknown values & durations. grid, values, durations = expt_grid.get_grid() # Returns lists of indices. candidates = expt_grid.get_candidates() pending = expt_grid.get_pending() complete = expt_grid.get_complete() executed = expt_grid.get_executed() n_candidates = candidates.shape[0] n_pending = pending.shape[0] n_complete = complete.shape[0] n_executed = executed.shape[0] log("%d candidates %d pending %d complete %d executed" % (n_candidates, n_pending, n_complete, n_executed)) # Verify that pending jobs are actually running, and add them back to the # candidate set if they have crashed or gotten lost. for job_id in pending: proc_id = expt_grid.get_proc_id(job_id) if not driver.is_proc_alive(job_id, proc_id): log("Set job %d back to pending status." % (job_id)) expt_grid.set_candidate(job_id) # Track the time series of optimization. write_trace(expt_dir, best_val, best_job, n_candidates, n_pending, n_complete, n_executed) # Print out the best job results write_best_job(expt_dir, best_val, best_job, expt_grid) if n_complete >= options.max_finished_jobs: log("Maximum number of finished jobs (%d) reached." "Exiting" % options.max_finished_jobs) return False if n_candidates == 0: log("There are no candidates left. Exiting.") return False if n_pending >= options.max_concurrent: log("Maximum number of jobs (%d) pending." % (options.max_concurrent)) return True else: # start a bunch of candidate jobs if possible #to_start = min(options.max_concurrent - n_pending, n_candidates) #log("Trying to start %d jobs" % (to_start)) #for i in xrange(to_start): # Ask the chooser to pick the next candidate log("Choosing next candidate... ") job_id, ei = chooser.next(grid, values, durations, candidates, pending, complete) log("Expected improvement: %.6f" % ei) print ">>>>>>>", n_executed, ei if ei < config.EI and n_executed >= config.MIN_ACCEPTED_RUNS: config.strikes += 1 if config.strikes > 0: return False else: config.strikes = 0 # If the job_id is a tuple, then the chooser picked a new job. # We have to add this to our grid if isinstance(job_id, tuple): (job_id, candidate) = job_id job_id = expt_grid.add_to_grid(candidate) log("selected job %d from the grid." % (job_id)) # Convert this back into an interpretable job and add metadata. job = Job() job.id = job_id job.expt_dir = expt_dir job.name = expt.name job.language = expt.language job.status = 'submitted' job.submit_t = int(time.time()) job.param.extend(expt_grid.get_params(job_id)) #TODO: (@omid) check if the job has been previously completed; if so # mark the job as completed and use the cached value params = job_params(job) for key, val in params.items(): if isinstance(val, np.ndarray): val = val.tolist() if isinstance(val, list): val = frozenset(val) params[key] = val params = frozenset(params.items()) if params in jobs_executed: jid = jobs_executed[params] print ">>>> Bypassing job execution." for stat in ['status', 'values', 'durs']: dic = getattr(expt_grid, stat) dic[job_id] = dic[jid] expt_grid._save_jobs() return True jobs_executed[params] = job_id save_job(job) pid = driver.submit_job(job) if pid != None: log("submitted - pid = %d" % (pid)) expt_grid.set_submitted(job_id, pid) else: log("Failed to submit job!") log("Deleting job file.") os.unlink(job_file_for(job)) return True
def attempt_dispatch(expt_name, expt_dir, work_dir, chooser, options): sys.stderr.write("\n") expt_file = os.path.join(expt_dir, options.config_file) expt = load_expt(expt_file) # Build the experiment grid. expt_grid = ExperimentGrid(expt_dir, expt.variable, options.grid_size, options.grid_seed, locking=True) # Print out the current best function value. best_val, best_job = expt_grid.get_best() if best_job >= 0: sys.stderr.write("Current best: %f (job %d)\n" % (best_val, best_job)) else: sys.stderr.write("Current best: No results returned yet.\n") # Gets you everything - NaN for unknown values & durations. grid, values, durations = expt_grid.get_grid() # Returns lists of indices. candidates = expt_grid.get_candidates() pending = expt_grid.get_pending() complete = expt_grid.get_complete() sys.stderr.write( "%d candidates %d pending %d complete\n" % (candidates.shape[0], pending.shape[0], complete.shape[0])) # Verify that pending jobs are actually running. for job_id in pending: sgeid = expt_grid.get_sgeid(job_id) if not sgeid in psutil.pids(): # Job is no longer running but still in the candidate list. Assume it crashed out. expt_grid.set_candidate(job_id) # Track the time series of optimization. trace_fh = open(os.path.join(expt_dir, 'trace.csv'), 'a') trace_fh.write("%d,%f,%d,%d,%d,%d\n" % (time.time(), best_val, best_job, candidates.shape[0], pending.shape[0], complete.shape[0])) trace_fh.close() # Print out the best job results best_job_fh = open(os.path.join(expt_dir, 'best_job_and_result.txt'), 'w') best_job_fh.write("Best result: %f\nJob-id: %d\nParameters: \n" % (best_val, best_job)) for best_params in expt_grid.get_params(best_job): best_job_fh.write(str(best_params) + '\n') best_job_fh.close() if complete.shape[0] >= options.max_finished_jobs: sys.stderr.write("Maximum number of finished jobs (%d) reached.\n" "Exiting\n" % options.max_finished_jobs) sys.exit(0) if candidates.shape[0] == 0 and pending.shape[0] > 0: sys.stderr.write( "There are no candidates left. Waiting for job completion.\n") return if candidates.shape[0] == 0 and pending.shape[0] == 0: sys.stderr.write("There are no candidates left. Exiting.\n") sys.exit(0) if pending.shape[0] >= options.max_concurrent: sys.stderr.write("Maximum number of jobs (%d) pending.\n" % (options.max_concurrent)) return # Dont submit if pending + finished > max_finished_jobs. if pending.shape[0] + complete.shape[0] >= options.max_finished_jobs: sys.stderr.write("Full number of jobs (%d) submitted. Waiting for " "completion.\n" % (options.max_finished_jobs)) return # Ask the chooser to actually pick one. job_id = chooser.next(grid, values, durations, candidates, pending, complete) # If the job_id is a tuple, then the chooser picked a new job. # We have to add this to our grid if isinstance(job_id, tuple): (job_id, candidate) = job_id job_id = expt_grid.add_to_grid(candidate) sys.stderr.write("Selected job %d from the grid.\n" % (job_id)) # Convert this back into an interpretable job and add metadata. job = Job() job.id = job_id job.expt_dir = expt_dir job.name = expt.name job.language = expt.language job.status = 'submitted' job.submit_t = int(time.time()) job.param.extend(expt_grid.get_params(job_id)) # Make sure we have a job subdirectory. job_subdir = os.path.join(expt_dir, 'jobs') if not os.path.exists(job_subdir): os.mkdir(job_subdir) # Name this job file. job_file = os.path.join(job_subdir, '%08d.pb' % (job_id)) # Store the job file. save_job(job_file, job) # Make sure there is a directory for output. output_subdir = os.path.join(expt_dir, 'output') if not os.path.exists(output_subdir): os.mkdir(output_subdir) output_file = os.path.join(output_subdir, '%08d.out' % (job_id)) process = job_submit("%s-%08d" % (expt_name, job_id), output_file, job_file, work_dir) process.poll() if process.returncode is not None and process.returncode < 0: sys.stderr.write("Failed to submit job or job crashed " "with return code %d !\n" % process.returncode) sys.stderr.write("Deleting job file.\n") os.unlink(job_file) return else: sys.stderr.write("Submitted job as process: %d\n" % process.pid) # Now, update the experiment status to submitted. expt_grid.set_submitted(job_id, process.pid) return