def main(): """ Main script. """ options = { "jobid": ("The PBS_JOBID of the job for which we want information", None, "store", None), "information": ( "Comma-separated list of the job info to print. " "Entries of the format input_key:output_key", None, "store", None, ), } opts = simple_option(options) if not opts.options.jobid: logger.error("jobid is a required option. Bailing.") sys.exit(1) pquery = PBSQuery() current_job = pquery.getjob(opts.options.jobid) s = transform_info(current_job, opts.options.information) print "\n".join(s)
class PBSScheduler(AbstractClusterScheduler): def __init__(self, *args, **kwargs): self.pbsquery = PBSQuery() super(PBSScheduler, self).__init__(*args, **kwargs) def output_regexp(self): return r'(^\d+)' def submit_command(self, output_file, job_name): # Note for posterity: ssh bint01 "source BLAH && qsub BLAH" doesn't work #return """ssh bint01 "/usr/syscom/nsg/opt/torque/4.2.6/bin/qsub -S /bin/bash -N %s -e %s -o %s -j oe" """ % \ #(job_name, output_file, output_file) #return """qsub -S /bin/bash -N %s -e %s -o %s -j oe" """ % (job_name, output_file, output_file) return """qsub -N %s -e %s -o %s -j oe """ % (job_name, output_file, output_file) # ' '.join(['ssh', 'bint01', '"qsub', '-S', '/bin/bash', # '-N', "%s" % (job_name), # '-e', output_file, # '-o', output_file, # '-j', 'oe"', # ]) def alive(self, process_id): alive = False try: status = self.pbsquery.getjob(str(process_id))['job_state'][0] except: # job not found status = -1 sys.stderr.write("EXC: %s\n" % str(sys.exc_info()[0])) sys.stderr.write("Could not find job for process id %d\n" % process_id) if status == 'Q': sys.stderr.write("Job %d waiting in queue.\n" % (process_id)) alive = True elif status == 'R': sys.stderr.write("Job %d is running.\n" % (process_id)) alive = True elif status in ['H','S']: sys.stderr.write("Job %d is held or suspended.\n" % (process_id)) alive = False if not alive: try: # Kill the job. c = pbs.pbs_connect(pbs.pbs_default()) result = pbs.pbs_deljob(c, str(process_id)) sys.stderr.write("Killed job %d.\n" % (process_id)) except: sys.stderr.write("Failed to kill job %d.\n" % (process_id)) return False else: return True
class PBSScheduler(AbstractClusterScheduler): def __init__(self, *args, **kwargs): self.pbsquery = PBSQuery() super(PBSScheduler, self).__init__(*args, **kwargs) def output_regexp(self): return r'(^\d+)' def submit_command(self, output_file, job_name): # Note for posterity: ssh bint01 "source BLAH && qsub BLAH" doesn't work return """ssh bint01 "/usr/syscom/nsg/opt/torque/4.2.6/bin/qsub -S /bin/bash -N %s -e %s -o %s -j oe" """ % \ (job_name, output_file, output_file) # ' '.join(['ssh', 'bint01', '"qsub', '-S', '/bin/bash', # '-N', "%s" % (job_name), # '-e', output_file, # '-o', output_file, # '-j', 'oe"', # ]) def alive(self, process_id): alive = False try: status = self.pbsquery.getjob(str(process_id))['job_state'][0] except: # job not found status = -1 sys.stderr.write("EXC: %s\n" % str(sys.exc_info()[0])) sys.stderr.write("Could not find job for process id %d\n" % process_id) if status == 'Q': sys.stderr.write("Job %d waiting in queue.\n" % (process_id)) alive = True elif status == 'R': sys.stderr.write("Job %d is running.\n" % (process_id)) alive = True elif status in ['H', 'S']: sys.stderr.write("Job %d is held or suspended.\n" % (process_id)) alive = False if not alive: try: # Kill the job. c = pbs.pbs_connect(pbs.pbs_default()) result = pbs.pbs_deljob(c, str(process_id)) sys.stderr.write("Killed job %d.\n" % (process_id)) except: sys.stderr.write("Failed to kill job %d.\n" % (process_id)) return False else: return True
def main(): """ Main script. """ options = { "jobid": ("The PBS_JOBID of the job for which we want information", None, "store", None), "information": ("Comma-separated list of the job info to print. " "Entries of the format input_key:output_key", None, "store", None), } opts = simple_option(options) if not opts.options.jobid: logger.error("jobid is a required option. Bailing.") sys.exit(1) pquery = PBSQuery() current_job = pquery.getjob(opts.options.jobid) s = transform_info(current_job, opts.options.information) print "\n".join(s)
class QstatViewer: """ Presents a nicer (?) interface to PBSQuery The two main member objects are: * jobs -- a dictionary with job ID (as str) as the key, and the corresponding Job object as the value * nodes -- a dictionary with node name as the key, and a set of corresponding job IDs (of jobs running on node) """ def __init__(self, pbs_server=None, debug_p=False): """Creates a QstatViewer object. Arguments: - pbs_server : FQDN of the TORQUE server to query (string)""" self.debug_p = debug_p self.nodes = {} self.jobs = {} self.queues = {} self.pbsquery = PBSQuery(pbs_server) self.servername = self.pbsquery.get_server_name() self.__make_server() self.__make_queues() self.__make_jobs() self.__make_nodes() def __make_nodes(self): """Make dict with node names as keys, and list of job objects as values""" # make list of jobids running on the node #node_jobs = {} #for jobid,job in self.jobs.iteritems(): # if job.exec_host: # for node_cpu in job.exec_host: # node = node_cpu.split('/')[0] # if node not in node_jobs: # node_jobs[node] = [] # else: # node_jobs[node].append(jobid) rawnodes = self.pbsquery.getnodes() for n,s in rawnodes.iteritems(): self.nodes[n] = Node(name=n, pbsnodes_dict=dict(s), debug_p=self.debug_p) def __make_jobs(self): """Make dict with job IDs as keys, and job properties as values""" rawjobs = self.pbsquery.getjobs() for j,p in rawjobs.iteritems(): self.jobs[j] = Job(id=j, pbsjobs_dict=dict(p), debug_p=self.debug_p) def __make_queues(self): """make dict with queue names as keys, and queue properties as values""" rawqueues = self.pbsquery.getqueues() for q,p in rawqueues.iteritems(): self.queues[q] = Queue(name=q, pbsqueue_dict=p) def __make_server(self): self.__serverinfo = self.pbsquery.get_serverinfo()[self.servername] if self.debug_p: print 'FOOBAR: self.serverinfo =', self.__serverinfo for k,v in self.__serverinfo.iteritems(): self.__dict__[k] = None if k == 'state_count': # Example of state_count: Transit:0 Queued:-6458 Held:6383 Waiting:0 Running:964 Exiting:0 self.__dict__[k] = {} vals = v[0].strip().split(' ') for state in vals: statename = state.split(':')[0] stateval = int(state.split(':')[1]) self.__dict__[k][statename] = stateval elif k == 'resources_default': v['mem'] = Memory(v['mem'][0]) v['pmem'] = Memory(v['pmem'][0]) v['cput'] = pbstimestr_to_timedelta(v['cput'][0]) v['walltime'] = pbstimestr_to_timedelta(v['walltime'][0]) self.__dict__[k] = v elif k == 'resources_assigned': if 'mem' in v: v['mem'] = Memory(v['mem'][0]) if 'vmem' in v: v['vmem'] = Memory(v['vmem'][0]) if 'ncpus' in v: v['ncpus'] = int(v['ncpus'][0]) if 'nodect' in v: v['nodect'] = int(v['nodect'][0]) self.__dict__[k] = v elif k == 'scheduling' or k == 'query_other_jobs': if v[0] == 'True': v[0] = True elif v[0] == 'False': v[0] = False self.__dict__[k] = v[0] elif k == 'scheduler_iteration': self.__dict__[k] = datetime.timedelta(seconds=int(v[0])) elif k == 'next_job_number' or k == 'node_check_rate' or k == 'tcp_timeout' or k == 'total_jobs': self.__dict__[k] = int(v[0]) elif len(v) == 1: self.__dict__[k] = v[0] else: self.__dict__[k] = v def get_job(self, jobid): """Queries the queue for jobid""" j = self.pbsquery.getjob(jobid) if self.debug_p: print 'ALOHA: ', print j.__dict__['data'] if 'data' in j.__dict__: return Job(id=jobid, pbsjobs_dict=dict(j), debug_p=self.debug_p) else: return None def jobs_by_user(self, username=None): """Returns a dict of jobs (keyed by jobid) belonging to username""" retval = {} if not username: retval = None else: for jobid,job in self.jobs.iteritems(): if job.owner == username: retval[jobid] = job return retval def nodes_with_property(self, prop): """Returns a dict of nodes (keyed by nodename) having the given property string""" retval = {} if prop: for nodename,node in self.nodes.iteritems(): if prop in node.properties: retval[nodename] = node else: retval = self.nodes return retval def nodes_in_clan(self, clan): """Returns a dict of nodes (keyed by nodename) belonging to the given clan""" retval = {} if clan: for nodename,node in self.nodes.iteritems(): if clan == node.clan: retval[nodename] = node else: retval = self.nodes return retval def __unicode__(self): if self.debug_p: print 'FOOBAR: type(self.jobs) =', type(self.jobs) print 'FOOBAR: self.jobs =', self.jobs job_dict_list = [] for k,v in self.jobs.iteritems(): job_dict_list.append(str(v)) return str(job_dict_list) def __str__(self): return self.__unicode__()
job_attrs[1].name = pbs.ATTR_e job_attrs[1].value = efile # get a handle conn = pbs.pbs_connect(pbs_server) # queue it if os.access(job_file, os.R_OK): log.debug("submitting file %s with output %s and error %s" % (job_file, ofile, efile) ) log.debug("command is: %s" % command_line) job_id = pbs.pbs_submit(conn, job_attrs, job_file, None, None) # monitor if job_id: p = PBSQuery() job_data = p.getjob(job_id) old_state = job_data[job_id]["job_state"] log.debug("initial state is %s" % old_state) running = False while True: job_data = p.getjob(job_id) if not job_data: break state = job_data[job_id]["job_state"] if state != old_state: log.debug("job state changed from %s to %s" % (old_state, state) ) if state == "R" and not running: running = True for data in out_data.values(): data.state = data.states.RUNNING data.blurb = "running"
# get a handle conn = pbs.pbs_connect(pbs_server) # queue it if os.access(job_file, os.R_OK): log.debug( "submitting file %s with output %s and error %s" % (job_file, ofile, efile)) log.debug("command is: %s" % command_line) job_id = pbs.pbs_submit(conn, job_attrs, job_file, None, None) # monitor if job_id: p = PBSQuery() job_data = p.getjob(job_id) old_state = job_data[job_id]["job_state"] log.debug("initial state is %s" % old_state) running = False while True: job_data = p.getjob(job_id) if not job_data: break state = job_data[job_id]["job_state"] if state != old_state: log.debug("job state changed from %s to %s" % (old_state, state)) if state == "R" and not running: running = True for data in out_data.values(): data.state = data.states.RUNNING
def pp_predict_motifs(fastafile, outfile, analysis="small", organism="hg18", single=False, background="", tools=None, job_server="", ncpus=8, logger=None, max_time=None, fg_file=None, bg_file=None): if tools is None: tools = {} config = MotifConfig() if not tools: tools = dict([(x,1) for x in config.get_default_params["tools"].split(",")]) #logger = logging.getLogger('prediction.pp_predict_motifs') wmin = 5 step = 1 if analysis in ["large","xl"]: step = 2 wmin = 6 analysis_max = {"xs":5,"small":8, "medium":10,"large":14, "xl":20} wmax = analysis_max[analysis] if analysis == "xs": sys.stderr.write("Setting analysis xs to small") analysis = "small" jobs = {} result = PredictionResult(outfile, logger=logger, fg_file=fg_file, bg_file=bg_file) # Dynamically load all tools toolio = [x[1]() for x in inspect.getmembers( tool_classes, lambda x: inspect.isclass(x) and issubclass(x, tool_classes.MotifProgram) ) if x[0] != 'MotifProgram'] # TODO: # Add warnings for running time: Weeder GADEM # Prepare PBS submission server = pbs.pbs_default() c = pbs.pbs_connect(server) q = PBSQuery() attropl = pbs.new_attropl(6) # Name attropl[0].name = pbs.ATTR_N # Restartable attropl[1].name = pbs.ATTR_r attropl[1].value = 'y' # Walltime attropl[2].name = pbs.ATTR_l attropl[2].resource = 'walltime' attropl[2].value = '600' # Node requirements attropl[3].name = pbs.ATTR_l attropl[3].resource = 'nodes' attropl[3].value = '1:ppn=1' # attropl[4].name = pbs.ATTR_o attropl[5].name = pbs.ATTR_e rundir = os.path.join(os.path.split(os.path.abspath(fastafile))[0], "torque") if not os.path.exists(rundir): os.mkdir(rundir) params = { 'analysis': analysis, 'background':background, "single":single, "organism":organism } jobs = {} for t in toolio: if tools.has_key(t.name) and tools[t.name]: if t.use_width: for i in range(wmin, wmax + 1, step): logger.info("Starting %s job, width %s" % (t.name, i)) params['width'] = i sh = write_shell_script(t.name, fastafile, rundir=rundir, params=params) job_name = os.path.basename(os.path.splitext(sh)[0]) # submit attropl[0].value = job_name attropl[4].value = "{0}/{1}.stdout".format(rundir, job_name) attropl[5].value = "{0}/{1}.stderr".format(rundir, job_name) job_id = pbs.pbs_submit(c, attropl, sh, "batchq", 'NULL') e, e_txt = pbs.error() if e: logger.error("Failed: {0}".format(e_txt)) else: jobs[job_id] = job_name else: logger.debug("Starting %s job" % t.name) sh = write_shell_script(t.name, fastafile, rundir=rundir, params=params) job_name = os.path.basename(os.path.splitext(sh)[0]) # submit attropl[0].value = job_name attropl[4].value = "{0}/{1}.stdout".format(rundir, job_name) attropl[5].value = "{0}/{1}.stderr".format(rundir, job_name) job_id = pbs.pbs_submit(c, attropl, sh, "batchq", 'NULL') e, e_txt = pbs.error() if e: logger.error("Failed submission: {0}".format(e_txt)) else: jobs[job_id] = job_name else: logger.debug("Skipping %s" % t.name) ### Wait until all jobs are finished or the time runs out ### start_time = time() try: # Run until all jobs are finished while len(jobs) > 0 and not(max_time) or time() - start_time < max_time: for job_id,job_name in jobs.items(): job = q.getjob(job_id) if not job: # or not job.is_running(): motifs = [] if job: name = job['Job_Name'] # Some error checking here! else: pwmfile = os.path.join(rundir, "{0}.pwm".format(job_name)) if os.path.exists(pwmfile): motifs = read_motifs(open(pwmfile), fmt="pwm") else: logger.error("Job {0} finished, but couldn find {1}!".format(job_name, pwmfile)) stdout = open(os.path.join(rundir, "{0}.stdout".format(job_name))).read() stderr = open(os.path.join(rundir, "{0}.stderr".format(job_name))).read() result.add_motifs(job_id, (motifs, stdout, stderr)) #for fname in glob.glob("{0}*".format(job_name)): # logger.debug("Deleting {0}".format(fname)) # #os.unlink(fname) del jobs[job_id] sleep(5) ### Or the user gets impatient... ### except KeyboardInterrupt, e: # Destroy all running jobs logger.info("Caught interrupt, destroying all running jobs")