示例#1
0
def main():
    """
    Main script.
    """

    options = {
        "jobid": ("The PBS_JOBID of the job for which we want information", None, "store", None),
        "information": (
            "Comma-separated list of the job info to print. " "Entries of the format input_key:output_key",
            None,
            "store",
            None,
        ),
    }
    opts = simple_option(options)

    if not opts.options.jobid:
        logger.error("jobid is a required option. Bailing.")
        sys.exit(1)

    pquery = PBSQuery()
    current_job = pquery.getjob(opts.options.jobid)

    s = transform_info(current_job, opts.options.information)

    print "\n".join(s)
示例#2
0
class PBSScheduler(AbstractClusterScheduler):
    def __init__(self, *args, **kwargs):
        self.pbsquery = PBSQuery()
        super(PBSScheduler, self).__init__(*args, **kwargs)

    def output_regexp(self):
        return r'(^\d+)'

    def submit_command(self, output_file, job_name):
        # Note for posterity: ssh bint01 "source BLAH && qsub BLAH" doesn't work
        #return """ssh bint01 "/usr/syscom/nsg/opt/torque/4.2.6/bin/qsub -S /bin/bash -N %s -e %s -o %s -j oe" """ % \
        #(job_name, output_file, output_file)
        #return """qsub -S /bin/bash -N %s -e %s -o %s -j oe" """ % (job_name, output_file, output_file)
        return """qsub -N %s -e %s -o %s -j oe """ % (job_name, output_file, output_file)
        # ' '.join(['ssh', 'bint01', '"qsub', '-S', '/bin/bash',
        #            '-N', "%s" % (job_name),
        #            '-e', output_file,
        #            '-o', output_file,
        #            '-j', 'oe"',
        #        ])

    def alive(self, process_id):
        alive = False
        try:
            status = self.pbsquery.getjob(str(process_id))['job_state'][0]            
        except:
            # job not found
            status = -1
            sys.stderr.write("EXC: %s\n" % str(sys.exc_info()[0]))
            sys.stderr.write("Could not find job for process id %d\n" % process_id)

        if status == 'Q':
            sys.stderr.write("Job %d waiting in queue.\n" % (process_id))
            alive = True
        elif status == 'R':
            sys.stderr.write("Job %d is running.\n" % (process_id))
            alive = True
        elif status in ['H','S']:
            sys.stderr.write("Job %d is held or suspended.\n" % (process_id))
            alive = False

        if not alive:
            try:
                # Kill the job.
                c = pbs.pbs_connect(pbs.pbs_default())
                result = pbs.pbs_deljob(c, str(process_id))                    
                sys.stderr.write("Killed job %d.\n" % (process_id))
            except:
                sys.stderr.write("Failed to kill job %d.\n" % (process_id))

            return False
        else:
            return True
示例#3
0
class PBSScheduler(AbstractClusterScheduler):
    def __init__(self, *args, **kwargs):
        self.pbsquery = PBSQuery()
        super(PBSScheduler, self).__init__(*args, **kwargs)

    def output_regexp(self):
        return r'(^\d+)'

    def submit_command(self, output_file, job_name):
        # Note for posterity: ssh bint01 "source BLAH && qsub BLAH" doesn't work
        return """ssh bint01 "/usr/syscom/nsg/opt/torque/4.2.6/bin/qsub -S /bin/bash -N %s -e %s -o %s -j oe" """ % \
               (job_name, output_file, output_file)
        # ' '.join(['ssh', 'bint01', '"qsub', '-S', '/bin/bash',
        #            '-N', "%s" % (job_name),
        #            '-e', output_file,
        #            '-o', output_file,
        #            '-j', 'oe"',
        #        ])

    def alive(self, process_id):
        alive = False
        try:
            status = self.pbsquery.getjob(str(process_id))['job_state'][0]
        except:
            # job not found
            status = -1
            sys.stderr.write("EXC: %s\n" % str(sys.exc_info()[0]))
            sys.stderr.write("Could not find job for process id %d\n" %
                             process_id)

        if status == 'Q':
            sys.stderr.write("Job %d waiting in queue.\n" % (process_id))
            alive = True
        elif status == 'R':
            sys.stderr.write("Job %d is running.\n" % (process_id))
            alive = True
        elif status in ['H', 'S']:
            sys.stderr.write("Job %d is held or suspended.\n" % (process_id))
            alive = False

        if not alive:
            try:
                # Kill the job.
                c = pbs.pbs_connect(pbs.pbs_default())
                result = pbs.pbs_deljob(c, str(process_id))
                sys.stderr.write("Killed job %d.\n" % (process_id))
            except:
                sys.stderr.write("Failed to kill job %d.\n" % (process_id))

            return False
        else:
            return True
示例#4
0
def main():
    """
    Main script.
    """

    options = {
        "jobid": ("The PBS_JOBID of the job for which we want information",
                  None, "store", None),
        "information":
        ("Comma-separated list of the job info to print. "
         "Entries of the format input_key:output_key", None, "store", None),
    }
    opts = simple_option(options)

    if not opts.options.jobid:
        logger.error("jobid is a required option. Bailing.")
        sys.exit(1)

    pquery = PBSQuery()
    current_job = pquery.getjob(opts.options.jobid)

    s = transform_info(current_job, opts.options.information)

    print "\n".join(s)
示例#5
0
class QstatViewer:
    """
    Presents a nicer (?) interface to PBSQuery
    The two main member objects are:
    * jobs -- a dictionary with job ID (as str) as the key, 
              and the corresponding Job object as the value
    * nodes -- a dictionary with node name as the key, 
               and a set of corresponding job IDs (of jobs 
               running on node)
    """
    def __init__(self, pbs_server=None, debug_p=False):
        """Creates a QstatViewer object. Arguments:
           - pbs_server : FQDN of the TORQUE server to query (string)"""
        self.debug_p = debug_p

        self.nodes = {}
        self.jobs = {}
        self.queues = {}

        self.pbsquery = PBSQuery(pbs_server)

        self.servername = self.pbsquery.get_server_name()

        self.__make_server()
        self.__make_queues()
        self.__make_jobs()
        self.__make_nodes()


    def __make_nodes(self):
        """Make dict with node names as keys, and list of job objects as values"""

        # make list of jobids running on the node
        #node_jobs = {}
        #for jobid,job in self.jobs.iteritems():
        #    if job.exec_host:
        #        for node_cpu in job.exec_host:
        #            node = node_cpu.split('/')[0]
        #            if node not in node_jobs:
        #                node_jobs[node] = []
        #            else:
        #                node_jobs[node].append(jobid)

        rawnodes = self.pbsquery.getnodes()
        for n,s in rawnodes.iteritems():
            self.nodes[n] = Node(name=n, pbsnodes_dict=dict(s), debug_p=self.debug_p)


    def __make_jobs(self):
        """Make dict with job IDs as keys, and job properties as values"""
        rawjobs = self.pbsquery.getjobs()
        for j,p in rawjobs.iteritems():
            self.jobs[j] = Job(id=j, pbsjobs_dict=dict(p), debug_p=self.debug_p)

    def __make_queues(self):
        """make dict with queue names as keys, and queue properties as values"""
        rawqueues = self.pbsquery.getqueues()
        for q,p in rawqueues.iteritems():
            self.queues[q] = Queue(name=q, pbsqueue_dict=p)

    def __make_server(self):
        self.__serverinfo = self.pbsquery.get_serverinfo()[self.servername]
        if self.debug_p:
            print 'FOOBAR: self.serverinfo =', self.__serverinfo
        for k,v in self.__serverinfo.iteritems():
            self.__dict__[k] = None
            if k == 'state_count':
                # Example of state_count: Transit:0 Queued:-6458 Held:6383 Waiting:0 Running:964 Exiting:0
                self.__dict__[k] = {}
                vals = v[0].strip().split(' ')
                for state in vals:
                    statename = state.split(':')[0]
                    stateval  = int(state.split(':')[1])
                    self.__dict__[k][statename] = stateval
            elif k == 'resources_default':
                v['mem'] = Memory(v['mem'][0])
                v['pmem'] = Memory(v['pmem'][0])
                v['cput'] = pbstimestr_to_timedelta(v['cput'][0])
                v['walltime'] = pbstimestr_to_timedelta(v['walltime'][0])
                self.__dict__[k] = v
            elif k == 'resources_assigned':
                if 'mem' in v:
                    v['mem'] = Memory(v['mem'][0])

                if 'vmem' in v:
                    v['vmem'] = Memory(v['vmem'][0])

                if 'ncpus' in v:
                    v['ncpus'] = int(v['ncpus'][0])
                
                if 'nodect' in v:
                    v['nodect'] = int(v['nodect'][0])

                self.__dict__[k] = v
            elif k == 'scheduling' or k == 'query_other_jobs':
                if v[0] == 'True':
                    v[0] = True
                elif v[0] == 'False':
                    v[0] = False
                self.__dict__[k] = v[0]
            elif k == 'scheduler_iteration':
                self.__dict__[k] = datetime.timedelta(seconds=int(v[0]))
            elif k == 'next_job_number' or k == 'node_check_rate' or k == 'tcp_timeout' or k == 'total_jobs':
                self.__dict__[k] = int(v[0])
            elif len(v) == 1:
                self.__dict__[k] = v[0]
            else:
                self.__dict__[k] = v



    def get_job(self, jobid):
        """Queries the queue for jobid"""
        j = self.pbsquery.getjob(jobid)
        if self.debug_p:
            print 'ALOHA: ',
            print j.__dict__['data']

        if 'data' in j.__dict__:
            return Job(id=jobid, pbsjobs_dict=dict(j), debug_p=self.debug_p)
        else:
            return None

    def jobs_by_user(self, username=None):
        """Returns a dict of jobs (keyed by jobid) belonging to username"""
        retval = {}
        if not username:
            retval = None
        else:
            for jobid,job in self.jobs.iteritems():
                if job.owner == username:
                    retval[jobid] = job
        return retval

    def nodes_with_property(self, prop):
        """Returns a dict of nodes (keyed by nodename) having the given property string"""
        retval = {}
        if prop:
            for nodename,node in self.nodes.iteritems():
                if prop in node.properties:
                    retval[nodename] = node
        else:
            retval = self.nodes
        return retval

    def nodes_in_clan(self, clan):
        """Returns a dict of nodes (keyed by nodename) belonging to the given clan"""
        retval = {}
        if clan:
            for nodename,node in self.nodes.iteritems():
                if clan == node.clan:
                    retval[nodename] = node
        else:
            retval = self.nodes
        return retval

    def __unicode__(self):
        if self.debug_p:
            print 'FOOBAR: type(self.jobs) =', type(self.jobs)
            print 'FOOBAR: self.jobs =', self.jobs
        job_dict_list = []
        for k,v in self.jobs.iteritems():
            job_dict_list.append(str(v))
        return str(job_dict_list)

    def __str__(self):
        return self.__unicode__()
示例#6
0
                job_attrs[1].name = pbs.ATTR_e
                job_attrs[1].value = efile

                # get a handle
                conn = pbs.pbs_connect(pbs_server)

                # queue it
                if os.access(job_file, os.R_OK):
                    log.debug("submitting file %s with output %s and error %s" % (job_file, ofile, efile) )
                    log.debug("command is: %s" % command_line)
                    job_id = pbs.pbs_submit(conn, job_attrs, job_file, None, None)

                    # monitor
                    if job_id:
                        p = PBSQuery()
                        job_data = p.getjob(job_id)
                        old_state = job_data[job_id]["job_state"]
                        log.debug("initial state is %s" % old_state)
                        running = False
                        while True:
                            job_data = p.getjob(job_id)
                            if not job_data:
                                break
                            state = job_data[job_id]["job_state"]
                            if state != old_state:
                                log.debug("job state changed from %s to %s" % (old_state, state) )
                            if state == "R" and not running:
                                running = True
                                for data in out_data.values():
                                    data.state = data.states.RUNNING
                                    data.blurb = "running"
示例#7
0
                # get a handle
                conn = pbs.pbs_connect(pbs_server)

                # queue it
                if os.access(job_file, os.R_OK):
                    log.debug(
                        "submitting file %s with output %s and error %s" %
                        (job_file, ofile, efile))
                    log.debug("command is: %s" % command_line)
                    job_id = pbs.pbs_submit(conn, job_attrs, job_file, None,
                                            None)

                    # monitor
                    if job_id:
                        p = PBSQuery()
                        job_data = p.getjob(job_id)
                        old_state = job_data[job_id]["job_state"]
                        log.debug("initial state is %s" % old_state)
                        running = False
                        while True:
                            job_data = p.getjob(job_id)
                            if not job_data:
                                break
                            state = job_data[job_id]["job_state"]
                            if state != old_state:
                                log.debug("job state changed from %s to %s" %
                                          (old_state, state))
                            if state == "R" and not running:
                                running = True
                                for data in out_data.values():
                                    data.state = data.states.RUNNING
def pp_predict_motifs(fastafile, outfile, analysis="small", organism="hg18", single=False, background="", tools=None, job_server="", ncpus=8, logger=None, max_time=None, fg_file=None, bg_file=None):
    if tools is None:
        tools = {}

    config = MotifConfig()

    if not tools:
        tools = dict([(x,1) for x in config.get_default_params["tools"].split(",")])
    
    #logger = logging.getLogger('prediction.pp_predict_motifs')

    wmin = 5 
    step = 1
    if analysis in ["large","xl"]:
        step = 2
        wmin = 6
    
    analysis_max = {"xs":5,"small":8, "medium":10,"large":14, "xl":20}
    wmax = analysis_max[analysis]

    if analysis == "xs":
        sys.stderr.write("Setting analysis xs to small")
        analysis = "small"

    jobs = {}
    
    result = PredictionResult(outfile, logger=logger, fg_file=fg_file, bg_file=bg_file)
    
    # Dynamically load all tools
    toolio = [x[1]() for x in inspect.getmembers(
                                                tool_classes, 
                                                lambda x: 
                                                        inspect.isclass(x) and 
                                                        issubclass(x, tool_classes.MotifProgram)
                                                ) if x[0] != 'MotifProgram']
    
    # TODO:
    # Add warnings for running time: Weeder GADEM
        
    # Prepare PBS submission
    server = pbs.pbs_default()
    c = pbs.pbs_connect(server)
    q = PBSQuery()
    attropl = pbs.new_attropl(6)
    # Name
    attropl[0].name  = pbs.ATTR_N
    # Restartable
    attropl[1].name  = pbs.ATTR_r
    attropl[1].value = 'y'
    # Walltime
    attropl[2].name  = pbs.ATTR_l
    attropl[2].resource = 'walltime'
    attropl[2].value = '600'
    # Node requirements
    attropl[3].name  = pbs.ATTR_l
    attropl[3].resource = 'nodes'
    attropl[3].value = '1:ppn=1'   # 
    attropl[4].name  = pbs.ATTR_o
    attropl[5].name  = pbs.ATTR_e
   
    rundir = os.path.join(os.path.split(os.path.abspath(fastafile))[0], "torque")
    if not os.path.exists(rundir):
        os.mkdir(rundir)

    params = {
              'analysis': analysis, 
              'background':background, 
              "single":single, 
              "organism":organism
              }
    
    jobs = {}
    for t in toolio:
        if tools.has_key(t.name) and tools[t.name]:
            if t.use_width:
                for i in range(wmin, wmax + 1, step):
                    logger.info("Starting %s job, width %s" % (t.name, i))
                    params['width'] = i
                    sh = write_shell_script(t.name, fastafile, rundir=rundir, params=params)
                    job_name = os.path.basename(os.path.splitext(sh)[0]) 
                    # submit
                    attropl[0].value = job_name
                    attropl[4].value = "{0}/{1}.stdout".format(rundir, job_name)
                    attropl[5].value = "{0}/{1}.stderr".format(rundir, job_name)
                    job_id = pbs.pbs_submit(c, attropl, sh, "batchq", 'NULL')
                    e, e_txt = pbs.error()
                    if e:
                        logger.error("Failed: {0}".format(e_txt))
                    else:
                        jobs[job_id] = job_name
            else:
                logger.debug("Starting %s job" % t.name)
                sh = write_shell_script(t.name, fastafile, rundir=rundir, params=params)
                job_name = os.path.basename(os.path.splitext(sh)[0]) 
                # submit
                attropl[0].value = job_name
                attropl[4].value = "{0}/{1}.stdout".format(rundir, job_name)
                attropl[5].value = "{0}/{1}.stderr".format(rundir, job_name)
                job_id = pbs.pbs_submit(c, attropl, sh, "batchq", 'NULL')
                e, e_txt = pbs.error()
                if e:
                    logger.error("Failed submission: {0}".format(e_txt))
                else:
                    jobs[job_id] = job_name
        else:
            logger.debug("Skipping %s" % t.name)
    
    ### Wait until all jobs are finished or the time runs out ###
    start_time = time()  
    try:
        # Run until all jobs are finished
        while len(jobs) > 0 and not(max_time) or time() - start_time < max_time:
            for job_id,job_name in jobs.items():
                job = q.getjob(job_id)
                
                if not job: # or not job.is_running():
                    motifs = []
                    if job:
                        name = job['Job_Name']
                        # Some error checking here!
                    else:
                        pwmfile = os.path.join(rundir, "{0}.pwm".format(job_name))
                        if os.path.exists(pwmfile):
                            motifs = read_motifs(open(pwmfile), fmt="pwm")
                        else:
                            logger.error("Job {0} finished, but couldn find {1}!".format(job_name, pwmfile))
                    stdout = open(os.path.join(rundir, "{0}.stdout".format(job_name))).read()
                    stderr = open(os.path.join(rundir, "{0}.stderr".format(job_name))).read()
                    
                    result.add_motifs(job_id, (motifs, stdout, stderr))
                    #for fname in glob.glob("{0}*".format(job_name)):
                    #    logger.debug("Deleting {0}".format(fname))
                    #    #os.unlink(fname)
                    
                    del jobs[job_id]
            sleep(5)

    ### Or the user gets impatient... ###
    except KeyboardInterrupt, e:
        # Destroy all running jobs
        logger.info("Caught interrupt, destroying all running jobs")