Exemplo n.º 1
0
    def submit_with_retry(pbs_attrs, script_path, queue, pbs_server=None):
        # connect to pbs server
        connection = _connect_to_server(pbs_server)

        # submit job
        retry = 0
        job_id = pbs.pbs_submit(connection, pbs_attrs, script_path, queue,
                                None)

        # if pbs.pbs_submit failed, try again
        while not job_id and retry < _MAX_RETRY:
            retry += 1
            print("Retrying connection...", file=sys.stderr)
            time.sleep(retry**2)
            job_id = pbs.pbs_submit(connection, pbs_attrs, script_path, queue,
                                    None)

        pbs.pbs_disconnect(connection)

        #check to see if the job was submitted successfully.
        if not job_id:
            e, e_msg = pbs.error()
            # the batch system returned an error, throw exception
            raise Exception("Error submitting job.  "
                            "Torque error {0}: '{1}'".format(
                                e, torque_strerror(e)))

        return job_id
def submit_intrepid_job(job):
    # This is how we are passing the fasta and job id to the script
    server_name = pbs.pbs_default()
    c = pbs.pbs_connect(server_name)
    attropl = pbs.new_attropl(6)

    attropl[0].name = pbs.ATTR_N
    attropl[0].value = "INTREPID Job: %s" % job.id

    attropl[1].name = pbs.ATTR_l
    attropl[1].resource = 'nodes'
    attropl[1].value = '1:ppn=8'

    attropl[2].name = pbs.ATTR_o
    attropl[2].value = JOB_LOG_FILE

    attropl[3].name = pbs.ATTR_e
    attropl[3].value = JOB_LOG_FILE

    attropl[4].name = pbs.ATTR_v
    attropl[4].value = "job_id=%s" % (job.id)

    attropl[5].name = pbs.ATTR_l
    attropl[5].resource = 'walltime'
    attropl[5].value = '48:00:00'

    if job.development_job:
        job_id = pbs.pbs_submit(
            c, attropl,
            "/clusterfs/ohana/software/intrepid/scripts/intrepid_development_pipeline.py",
            'web', 'NULL')
    else:
        job_id = pbs.pbs_submit(
            c, attropl,
            "/clusterfs/ohana/software/intrepid/scripts/intrepid_pipeline.py",
            'web', 'NULL')
    logger.info("Submitting %s to the grid with id %s" % (job.id, job_id))

    if job_id:
        job.pbs_job_id = job_id
        job.status_id = JOB_SUBMITTED
        job.save()
    else:
        pass

    pbs.pbs_disconnect(c)

    return job_id
def submit_get_subfamilies_job(job):
    # This is how we are passing the fasta and job id to the script
    server_name = pbs.pbs_default()
    c = pbs.pbs_connect(server_name)
    attropl = pbs.new_attropl(5)

    attropl[0].name  = pbs.ATTR_N
    attropl[0].value = "FAT-CAT Get Sub-Families: %s" % job.id

    attropl[1].name  = pbs.ATTR_l
    attropl[1].resource = 'nodes'
    attropl[1].value = '1:ppn=1'

    attropl[2].name  = pbs.ATTR_o
    attropl[2].value = JOB_LOG_FILE

    attropl[3].name  = pbs.ATTR_e
    attropl[3].value = JOB_LOG_FILE

    attropl[4].name  = pbs.ATTR_v
    attropl[4].value = "job_id=%s" % (job.id)

    job.status_id = 5
    job.save()

    job_id = pbs.pbs_submit(c, attropl, "/clusterfs/ohana/software/fatcat/scripts/get_best_nodes.py", 'web', 'NULL')
    logger.info("Submitting %s to the grid to get best nodes with id %s" % (job.id, job_id))

    if job_id: 
        job.get_best_nodes_pbs_job_id = job_id
        job.save()

    pbs.pbs_disconnect(c)

    return job_id
Exemplo n.º 4
0
 def run(self):
     script_name = 'pylight_script'
     with open(script_name, 'w') as f:
         f.write(self.script)
     self.jobid = pbs.pbs_submit(self.connect(), self.attropl, script_name, 'batch', "NULL") 
     log.info("PBS submits a job %s" % self.jobid) 
     os.remove(script_name)
     e, text = pbs.error()
     if e:
         log.warning("Failed to submit a job: %s", text)
         self.status = main.job.JOB_STATUS.FAIL
     self.disconnect()
    def run_cluster(self, pbs_server, job_script, settings):

        import pbs
        from threading import threa

        self.settings = copy.deepcopy(settings)
        # Launch script, wait for output to come back, return when it does

        # Create the job options struct
        attropl = pbs.new_attropl(4)

        # Set the name of the job
        #
        attropl[0].name = pbs.ATTR_N
        attropl[0].value = "inferno_" + self.name

        # Job is Rerunable
        #
        attropl[1].name = pbs.ATTR_r
        attropl[1].value = "y"

        # Walltime
        #
        attropl[2].name = pbs.ATTR_l
        attropl[2].resource = "walltime"
        attropl[2].value = "400"

        # Nodes
        #
        attropl[3].name = pbs.ATTR_l
        attropl[3].resource = "nodes"
        attropl[3].value = "1:ppn=4"

        # Run the job
        if pbs_server == None:
            pbs_server = pbs.pbs_default()
        job_id = pbs.pbs_submit(pbs_server, attropl, job_script, "NULL", "NULL")

        e, e_txt = pbs.error()
        if e:
            print e, e_txt

        # Save the job ID for later so we can check on the status
        self.job_id = job_id

        # TODO: Change this
        # Now loop, checking every 5 seconds or so if the job is done by
        # polling the pbs_server about the jobid.
        running = True
        while running:
            job_info = pbs.pbs_statjob(pbs_server, self.job_id, "NULL", "NULL")
            print job_info
            time.sleep(5)
def submit_fxn_site_prediction_job(job):
    # This is how we are passing the fasta and job id to the script
    server_name = pbs.pbs_default()
    c = pbs.pbs_connect(server_name)

    print server_name
    print c

    attropl = pbs.new_attropl(7)

    attropl[0].name = pbs.ATTR_N
    attropl[0].value = "Functional Site Prediction Job: %s" % job.id

    attropl[1].name = pbs.ATTR_l
    attropl[1].resource = 'nodes'
    attropl[1].value = '1:ppn=1'

    attropl[2].name = pbs.ATTR_o
    attropl[2].value = JOB_LOG_FILE

    attropl[3].name = pbs.ATTR_e
    attropl[3].value = JOB_LOG_FILE

    attropl[4].name = pbs.ATTR_v
    attropl[4].value = "job_id=%s" % (job.id)

    attropl[5].name = pbs.ATTR_r
    attropl[5].value = 'y'

    attropl[6].name = pbs.ATTR_l
    attropl[6].resource = 'walltime'
    attropl[6].value = '1000'

    job.status_id = 2
    job.save()

    job_id = pbs.pbs_submit(
        c, attropl,
        "/home/cyrus_afrasiabi/ohana_repository/bpg/fxn_site_prediction.py",
        'web', 'NULL')
    logger.info(
        "Submitting %s to the grid to get functional site predictions with id %s"
        % (job.id, job_id))

    if job_id:
        job.pbs_job_id = job_id
        job.save()

    pbs.pbs_disconnect(c)

    return job_id
Exemplo n.º 7
0
    def submitScript(script):
        result = {}
        try:
            pbs_connection = pbs.pbs_connect(pbs.pbs_default())
            #    queues = pbs.pbs_statque(pbs_connection, "batch", "NULL", "NULL")

            attropl = pbs.new_attropl(4)

            # Set the name of the job
            #
            attropl[0].name = pbs.ATTR_N
            attropl[0].value = str(script['jobName']) if script['jobName'] else "new_job"

            # Job is Rerunable
            #
            attropl[1].name = pbs.ATTR_r
            attropl[1].value = 'y'

            # Walltime
            #
            attropl[2].name = pbs.ATTR_l
            attropl[2].resource = 'walltime'
            attropl[2].value = str(script['maxTime']) if script['maxTime'] else '01:00:00'

            # Nodes
            #
            attropl[3].name = pbs.ATTR_l
            attropl[3].resource = 'nodes'
            attropl[3].value = '1:ppn=' + str(script['cpuNumber']) if script['cpuNumber'] else '1'


            # A1.tsk is the job script filename
            #
            job_id = pbs.pbs_submit(pbs_connection, attropl, str(script['scriptName']), str(script['queue']), 'NULL')

            e, e_txt = pbs.error()
            if e:
                result['Result'] = 'ERROR'
                result['Message'] = str(e) + ' : ' + e_txt
            else:
                result['Result'] = 'OK'
                result['Message'] = job_id
        except Exception as exc:
            result['Result'] = 'ERROR'
            result['Message'] = str(exc)

        return result
Exemplo n.º 8
0
 def submit_jobs_pbs(self, jobs):
     for job in jobs:
         tool = job.tool
         command = job.create_command()            
         attropl = self.get_pbs_attr(job.db_job.id, tool.config)
         script = PBS_SCRIPT % (job.tool.directory, command)
         log.info(script)
         script_name = 'pylight_script'
         with open(script_name, 'w') as f:
             f.write(script)
         job_id = pbs.pbs_submit(self.c, attropl, script_name, 'batch', "NULL") 
         os.remove(script_name)
         e, text = pbs.error()
         if e:
             log.warning("Failed to submit a job: %s", text)
             #what about jobs that following this one?
             continue
         log.info("PBS submits a job %s as %s" % (job, job_id)) 
         self.submit_list[job_id] = job
Exemplo n.º 9
0
    def create_job(self, username, Job_Name, queue, nodes, walltime, file):
        c = pbs.pbs_connect(str(pbs.pbs_default()))

        attrl = pbs.new_attropl(3)

        attrl[0].name = pbs.ATTR_N
        attrl[0].value = str(Job_Name)

        attrl[1].name = pbs.ATTR_l
        attrl[1].resource = 'nodes'
        attrl[1].value = str(nodes)

        attrl[2].name = pbs.ATTR_l
        attrl[2].resource = 'walltime'
        attrl[2].value = str(walltime)

        queue = str(queue)
        task_id = pbs.pbs_submit(c, attrl,
                                 str("media/" + username + "/" + file), queue,
                                 'NULL')
        return pbs.pbs_geterrmsg(c)
Exemplo n.º 10
0
                ofile = "%s/database/pbs/%s.o" % (os.getcwd(), job_name)
                efile = "%s/database/pbs/%s.e" % (os.getcwd(), job_name)
                job_attrs = pbs.new_attropl(2)
                job_attrs[0].name = pbs.ATTR_o
                job_attrs[0].value = ofile
                job_attrs[1].name = pbs.ATTR_e
                job_attrs[1].value = efile

                # get a handle
                conn = pbs.pbs_connect(pbs_server)

                # queue it
                if os.access(job_file, os.R_OK):
                    log.debug("submitting file %s with output %s and error %s" % (job_file, ofile, efile) )
                    log.debug("command is: %s" % command_line)
                    job_id = pbs.pbs_submit(conn, job_attrs, job_file, None, None)

                    # monitor
                    if job_id:
                        p = PBSQuery()
                        job_data = p.getjob(job_id)
                        old_state = job_data[job_id]["job_state"]
                        log.debug("initial state is %s" % old_state)
                        running = False
                        while True:
                            job_data = p.getjob(job_id)
                            if not job_data:
                                break
                            state = job_data[job_id]["job_state"]
                            if state != old_state:
                                log.debug("job state changed from %s to %s" % (old_state, state) )
Exemplo n.º 11
0
    def queue_job(self, job_wrapper):
        """Create PBS script for a job and submit it to the PBS queue"""
        # prepare the job
        if not self.prepare_job(
                job_wrapper,
                include_metadata=not (self.app.config.pbs_stage_path)):
            return

        job_destination = job_wrapper.job_destination

        # Determine the job's PBS destination (server/queue) and options from the job destination definition
        pbs_queue_name = None
        pbs_server_name = self.default_pbs_server
        pbs_options = []
        if '-q' in job_destination.params and 'destination' not in job_destination.params:
            job_destination.params['destination'] = job_destination.params.pop(
                '-q')
        if 'destination' in job_destination.params:
            if '@' in job_destination.params['destination']:
                # Destination includes a server
                pbs_queue_name, pbs_server_name = job_destination.params[
                    'destination'].split('@')
                if pbs_queue_name == '':
                    # e.g. `qsub -q @server`
                    pbs_queue_name = None
            else:
                # Destination is just a queue
                pbs_queue_name = job_destination.params['destination']
            job_destination.params.pop('destination')

        # Parse PBS params
        pbs_options = self.parse_destination_params(job_destination.params)

        # Explicitly set the determined PBS destination in the persisted job destination for recovery
        job_destination.params['destination'] = '%s@%s' % (pbs_queue_name or
                                                           '', pbs_server_name)

        c = pbs.pbs_connect(util.smart_str(pbs_server_name))
        if c <= 0:
            errno, text = pbs.error()
            job_wrapper.fail(
                "Unable to queue job for execution.  Resubmitting the job may succeed."
            )
            log.error("Connection to PBS server for submit failed: %s: %s" %
                      (errno, text))
            return

        # define job attributes
        ofile = "%s/%s.o" % (self.app.config.cluster_files_directory,
                             job_wrapper.job_id)
        efile = "%s/%s.e" % (self.app.config.cluster_files_directory,
                             job_wrapper.job_id)
        ecfile = "%s/%s.ec" % (self.app.config.cluster_files_directory,
                               job_wrapper.job_id)

        output_fnames = job_wrapper.get_output_fnames()

        # If an application server is set, we're staging
        if self.app.config.pbs_application_server:
            pbs_ofile = self.app.config.pbs_application_server + ':' + ofile
            pbs_efile = self.app.config.pbs_application_server + ':' + efile
            output_files = [str(o) for o in output_fnames]
            output_files.append(ecfile)
            stagein = self.get_stage_in_out(job_wrapper.get_input_fnames() +
                                            output_files,
                                            symlink=True)
            stageout = self.get_stage_in_out(output_files)
            attrs = [
                dict(name=pbs.ATTR_o, value=pbs_ofile),
                dict(name=pbs.ATTR_e, value=pbs_efile),
                dict(name=pbs.ATTR_stagein, value=stagein),
                dict(name=pbs.ATTR_stageout, value=stageout),
            ]
        # If not, we're using NFS
        else:
            attrs = [
                dict(name=pbs.ATTR_o, value=ofile),
                dict(name=pbs.ATTR_e, value=efile),
            ]

        # define PBS job options
        attrs.append(
            dict(name=pbs.ATTR_N,
                 value=str("%s_%s_%s" %
                           (job_wrapper.job_id, job_wrapper.tool.id,
                            job_wrapper.user))))
        job_attrs = pbs.new_attropl(len(attrs) + len(pbs_options))
        for i, attr in enumerate(attrs + pbs_options):
            job_attrs[i].name = attr['name']
            job_attrs[i].value = attr['value']
            if 'resource' in attr:
                job_attrs[i].resource = attr['resource']
        exec_dir = os.path.abspath(job_wrapper.working_directory)

        # write the job script
        if self.app.config.pbs_stage_path != '':
            # touch the ecfile so that it gets staged
            with file(ecfile, 'a'):
                os.utime(ecfile, None)

            stage_commands = pbs_symlink_template % (
                " ".join(job_wrapper.get_input_fnames() + output_files),
                self.app.config.pbs_stage_path,
                exec_dir,
            )
        else:
            stage_commands = ''

        env_setup_commands = [stage_commands]
        script = self.get_job_file(job_wrapper,
                                   exit_code_path=ecfile,
                                   env_setup_commands=env_setup_commands)
        job_file = "%s/%s.sh" % (self.app.config.cluster_files_directory,
                                 job_wrapper.job_id)
        self.write_executable_script(job_file, script)
        # job was deleted while we were preparing it
        if job_wrapper.get_state() == model.Job.states.DELETED:
            log.debug(
                "Job %s deleted by user before it entered the PBS queue" %
                job_wrapper.job_id)
            pbs.pbs_disconnect(c)
            if self.app.config.cleanup_job in ("always", "onsuccess"):
                self.cleanup((ofile, efile, ecfile, job_file))
                job_wrapper.cleanup()
            return

        # submit
        # The job tag includes the job and the task identifier
        # (if a TaskWrapper was passed in):
        galaxy_job_id = job_wrapper.get_id_tag()
        log.debug("(%s) submitting file %s" % (galaxy_job_id, job_file))

        tries = 0
        while tries < 5:
            job_id = pbs.pbs_submit(c, job_attrs, job_file, pbs_queue_name,
                                    None)
            tries += 1
            if job_id:
                pbs.pbs_disconnect(c)
                break
            errno, text = pbs.error()
            log.warning("(%s) pbs_submit failed (try %d/5), PBS error %d: %s" %
                        (galaxy_job_id, tries, errno, text))
            time.sleep(2)
        else:
            log.error("(%s) All attempts to submit job failed" % galaxy_job_id)
            job_wrapper.fail(
                "Unable to run this job due to a cluster error, please retry it later"
            )
            return

        if pbs_queue_name is None:
            log.debug("(%s) queued in default queue as %s" %
                      (galaxy_job_id, job_id))
        else:
            log.debug("(%s) queued in %s queue as %s" %
                      (galaxy_job_id, pbs_queue_name, job_id))

        # persist destination
        job_wrapper.set_job_destination(job_destination, job_id)

        # Store PBS related state information for job
        job_state = AsynchronousJobState()
        job_state.job_wrapper = job_wrapper
        job_state.job_id = job_id
        job_state.job_file = job_file
        job_state.output_file = ofile
        job_state.error_file = efile
        job_state.exit_code_file = ecfile
        job_state.old_state = 'N'
        job_state.running = False
        job_state.job_destination = job_destination

        # Add to our 'queue' of jobs to monitor
        self.monitor_queue.put(job_state)
Exemplo n.º 12
0
                job_attrs = pbs.new_attropl(2)
                job_attrs[0].name = pbs.ATTR_o
                job_attrs[0].value = ofile
                job_attrs[1].name = pbs.ATTR_e
                job_attrs[1].value = efile

                # get a handle
                conn = pbs.pbs_connect(pbs_server)

                # queue it
                if os.access(job_file, os.R_OK):
                    log.debug(
                        "submitting file %s with output %s and error %s" %
                        (job_file, ofile, efile))
                    log.debug("command is: %s" % command_line)
                    job_id = pbs.pbs_submit(conn, job_attrs, job_file, None,
                                            None)

                    # monitor
                    if job_id:
                        p = PBSQuery()
                        job_data = p.getjob(job_id)
                        old_state = job_data[job_id]["job_state"]
                        log.debug("initial state is %s" % old_state)
                        running = False
                        while True:
                            job_data = p.getjob(job_id)
                            if not job_data:
                                break
                            state = job_data[job_id]["job_state"]
                            if state != old_state:
                                log.debug("job state changed from %s to %s" %
Exemplo n.º 13
0
    def queue_job( self, job_wrapper ):
        """Create PBS script for a job and submit it to the PBS queue"""
        # prepare the job
        if not self.prepare_job( job_wrapper, include_metadata=not( self.app.config.pbs_stage_path ) ):
            return

        job_destination = job_wrapper.job_destination

        # Determine the job's PBS destination (server/queue) and options from the job destination definition
        pbs_queue_name = None
        pbs_server_name = self.default_pbs_server
        pbs_options = []
        if '-q' in job_destination.params and 'destination' not in job_destination.params:
            job_destination.params['destination'] = job_destination.params.pop('-q')
        if 'destination' in job_destination.params:
            if '@' in job_destination.params['destination']:
                # Destination includes a server
                pbs_queue_name, pbs_server_name = job_destination.params['destination'].split('@')
                if pbs_queue_name == '':
                    # e.g. `qsub -q @server`
                    pbs_queue_name = None
            else:
                # Destination is just a queue
                pbs_queue_name = job_destination.params['destination']
            job_destination.params.pop('destination')

        # Parse PBS params
        pbs_options = self.parse_destination_params(job_destination.params)

        # Explicitly set the determined PBS destination in the persisted job destination for recovery
        job_destination.params['destination'] = '%s@%s' % (pbs_queue_name or '', pbs_server_name)

        c = pbs.pbs_connect( util.smart_str( pbs_server_name ) )
        if c <= 0:
            errno, text = pbs.error()
            job_wrapper.fail( "Unable to queue job for execution.  Resubmitting the job may succeed." )
            log.error( "Connection to PBS server for submit failed: %s: %s" % ( errno, text ) )
            return

        # define job attributes
        ofile = "%s/%s.o" % (self.app.config.cluster_files_directory, job_wrapper.job_id)
        efile = "%s/%s.e" % (self.app.config.cluster_files_directory, job_wrapper.job_id)
        ecfile = "%s/%s.ec" % (self.app.config.cluster_files_directory, job_wrapper.job_id)

        output_fnames = job_wrapper.get_output_fnames()

        # If an application server is set, we're staging
        if self.app.config.pbs_application_server:
            pbs_ofile = self.app.config.pbs_application_server + ':' + ofile
            pbs_efile = self.app.config.pbs_application_server + ':' + efile
            output_files = [ str( o ) for o in output_fnames ]
            output_files.append(ecfile)
            stagein = self.get_stage_in_out( job_wrapper.get_input_fnames() + output_files, symlink=True )
            stageout = self.get_stage_in_out( output_files )
            attrs = [
                dict( name=pbs.ATTR_o, value=pbs_ofile ),
                dict( name=pbs.ATTR_e, value=pbs_efile ),
                dict( name=pbs.ATTR_stagein, value=stagein ),
                dict( name=pbs.ATTR_stageout, value=stageout ),
            ]
        # If not, we're using NFS
        else:
            attrs = [
                dict( name=pbs.ATTR_o, value=ofile ),
                dict( name=pbs.ATTR_e, value=efile ),
            ]

        # define PBS job options
        attrs.append( dict( name=pbs.ATTR_N, value=str( "%s_%s_%s" % ( job_wrapper.job_id, job_wrapper.tool.id, job_wrapper.user ) ) ) )
        job_attrs = pbs.new_attropl( len( attrs ) + len( pbs_options ) )
        for i, attr in enumerate( attrs + pbs_options ):
            job_attrs[i].name = attr['name']
            job_attrs[i].value = attr['value']
            if 'resource' in attr:
                job_attrs[i].resource = attr['resource']
        exec_dir = os.path.abspath( job_wrapper.working_directory )

        # write the job script
        if self.app.config.pbs_stage_path != '':
            # touch the ecfile so that it gets staged
            with open(ecfile, 'a'):
                os.utime(ecfile, None)

            stage_commands = pbs_symlink_template % (
                " ".join( job_wrapper.get_input_fnames() + output_files ),
                self.app.config.pbs_stage_path,
                exec_dir,
            )
        else:
            stage_commands = ''

        env_setup_commands = [ stage_commands ]
        script = self.get_job_file(job_wrapper, exit_code_path=ecfile, env_setup_commands=env_setup_commands)
        job_file = "%s/%s.sh" % (self.app.config.cluster_files_directory, job_wrapper.job_id)
        self.write_executable_script( job_file, script )
        # job was deleted while we were preparing it
        if job_wrapper.get_state() == model.Job.states.DELETED:
            log.debug( "Job %s deleted by user before it entered the PBS queue" % job_wrapper.job_id )
            pbs.pbs_disconnect(c)
            if job_wrapper.cleanup_job in ( "always", "onsuccess" ):
                self.cleanup( ( ofile, efile, ecfile, job_file ) )
                job_wrapper.cleanup()
            return

        # submit
        # The job tag includes the job and the task identifier
        # (if a TaskWrapper was passed in):
        galaxy_job_id = job_wrapper.get_id_tag()
        log.debug("(%s) submitting file %s" % ( galaxy_job_id, job_file ) )

        tries = 0
        while tries < 5:
            job_id = pbs.pbs_submit(c, job_attrs, job_file, pbs_queue_name, None)
            tries += 1
            if job_id:
                pbs.pbs_disconnect(c)
                break
            errno, text = pbs.error()
            log.warning( "(%s) pbs_submit failed (try %d/5), PBS error %d: %s" % (galaxy_job_id, tries, errno, text) )
            time.sleep(2)
        else:
            log.error( "(%s) All attempts to submit job failed" % galaxy_job_id )
            job_wrapper.fail( "Unable to run this job due to a cluster error, please retry it later" )
            return

        if pbs_queue_name is None:
            log.debug("(%s) queued in default queue as %s" % (galaxy_job_id, job_id) )
        else:
            log.debug("(%s) queued in %s queue as %s" % (galaxy_job_id, pbs_queue_name, job_id) )

        # persist destination
        job_wrapper.set_job_destination( job_destination, job_id )

        # Store PBS related state information for job
        job_state = AsynchronousJobState()
        job_state.job_wrapper = job_wrapper
        job_state.job_id = job_id
        job_state.job_file = job_file
        job_state.output_file = ofile
        job_state.error_file = efile
        job_state.exit_code_file = ecfile
        job_state.old_state = 'N'
        job_state.running = False
        job_state.job_destination = job_destination

        # Add to our 'queue' of jobs to monitor
        self.monitor_queue.put( job_state )
Exemplo n.º 14
0
    def submit(self, txt):
        """Submit the jobscript txt, set self.jobid"""
        self.log.debug("Going to submit script %s", txt)

        attropl = pbs.new_attropl(2)  # jobparams
        attropl[0].name = 'Job_Name'
        attropl[0].value = self.options.get('name', 'python_pbs_job')
        attropl[1].name = 'Rerunable'
        attropl[1].value = 'y'

        for arg in self.args.keys():
            tmp = self.args[arg]
            tmpattropl = pbs.new_attropl(len(tmp))  # jobparams
            if arg in ('resources',):
                idx = 0
                for k, v in tmp.items():
                    tmpattropl[idx].name = 'Resource_List'  # resources
                    tmpattropl[idx].resource = k
                    tmpattropl[idx].value = v
                    idx += 1
            elif arg in ('mail',):
                tmpattropl[0].name = 'Mail_Points'
                tmpattropl[0].value = tmp['send']
                if len(tmp) > 1:
                    tmpattropl[0].name = "Mail_Users"
                    tmpattropl[0].value = tmp['others']
            elif arg in ('queue',):
                # # use destination field of pbs_submit
                pass
            elif arg in ('account',):
                tmpattropl = pbs.new_attropl(1)
                tmpattropl[0].name = pbs.ATTR_A 
                tmpattropl[0].value = tmp
                #continue 
            else:
                self.log.error('Unknown arg %s', arg)
                tmpattropl = pbs.new_attropl(0)

            attropl.extend(tmpattropl)


        # add a bunch of variables (added by qsub)
        # also set PBS_O_WORKDIR to os.getcwd()
        os.environ.setdefault('WORKDIR', os.getcwd())

        defvars = ['MAIL', 'HOME', 'PATH', 'SHELL', 'WORKDIR']

        tmpattropl = pbs.new_attropl(1)
        tmpattropl[0].name = 'Variable_List'
        tmpattropl[0].value = ",".join(["PBS_O_%s=%s" % (
            x, os.environ.get(x, 'NOTFOUND_%s' % x)) for x in defvars])
        attropl.extend(tmpattropl)

        fh, scriptfn = tempfile.mkstemp()
        f = os.fdopen(fh, 'w')
        self.log.debug("Writing temp jobscript to %s" % scriptfn)
        f.write(txt)
        f.close()

        queue = self.args.get(
            'queue', self.options.get('queue', ''))  # do not set with attropl
        if queue:
            self.log.debug("Going to submit to queue %s", queue)
        else:
            self.log.debug("No queue specified. Will submit to default destination.")

        extend = 'NULL'  # always

        jobid = pbs.pbs_submit(self.pbsconn, attropl, scriptfn, queue, extend)

        is_error, errormsg = pbs.error()
        if is_error:
            self.log.error("Failed to submit job script %s: error %s",
                           scriptfn, errormsg)
        else:
            self.log.debug("Succesful jobsubmission returned jobid %s", jobid)
            self.jobid = jobid
            os.remove(scriptfn)
Exemplo n.º 15
0
def pp_predict_motifs(fastafile, outfile, analysis="small", organism="hg18", single=False, background="", tools=None, job_server="", ncpus=8, logger=None, max_time=None, fg_file=None, bg_file=None):
    if tools is None:
        tools = {}

    config = MotifConfig()

    if not tools:
        tools = dict([(x,1) for x in config.get_default_params["tools"].split(",")])
    
    #logger = logging.getLogger('prediction.pp_predict_motifs')

    wmin = 5 
    step = 1
    if analysis in ["large","xl"]:
        step = 2
        wmin = 6
    
    analysis_max = {"xs":5,"small":8, "medium":10,"large":14, "xl":20}
    wmax = analysis_max[analysis]

    if analysis == "xs":
        sys.stderr.write("Setting analysis xs to small")
        analysis = "small"

    jobs = {}
    
    result = PredictionResult(outfile, logger=logger, fg_file=fg_file, bg_file=bg_file)
    
    # Dynamically load all tools
    toolio = [x[1]() for x in inspect.getmembers(
                                                tool_classes, 
                                                lambda x: 
                                                        inspect.isclass(x) and 
                                                        issubclass(x, tool_classes.MotifProgram)
                                                ) if x[0] != 'MotifProgram']
    
    # TODO:
    # Add warnings for running time: Weeder GADEM
        
    # Prepare PBS submission
    server = pbs.pbs_default()
    c = pbs.pbs_connect(server)
    q = PBSQuery()
    attropl = pbs.new_attropl(6)
    # Name
    attropl[0].name  = pbs.ATTR_N
    # Restartable
    attropl[1].name  = pbs.ATTR_r
    attropl[1].value = 'y'
    # Walltime
    attropl[2].name  = pbs.ATTR_l
    attropl[2].resource = 'walltime'
    attropl[2].value = '600'
    # Node requirements
    attropl[3].name  = pbs.ATTR_l
    attropl[3].resource = 'nodes'
    attropl[3].value = '1:ppn=1'   # 
    attropl[4].name  = pbs.ATTR_o
    attropl[5].name  = pbs.ATTR_e
   
    rundir = os.path.join(os.path.split(os.path.abspath(fastafile))[0], "torque")
    if not os.path.exists(rundir):
        os.mkdir(rundir)

    params = {
              'analysis': analysis, 
              'background':background, 
              "single":single, 
              "organism":organism
              }
    
    jobs = {}
    for t in toolio:
        if tools.has_key(t.name) and tools[t.name]:
            if t.use_width:
                for i in range(wmin, wmax + 1, step):
                    logger.info("Starting %s job, width %s" % (t.name, i))
                    params['width'] = i
                    sh = write_shell_script(t.name, fastafile, rundir=rundir, params=params)
                    job_name = os.path.basename(os.path.splitext(sh)[0]) 
                    # submit
                    attropl[0].value = job_name
                    attropl[4].value = "{0}/{1}.stdout".format(rundir, job_name)
                    attropl[5].value = "{0}/{1}.stderr".format(rundir, job_name)
                    job_id = pbs.pbs_submit(c, attropl, sh, "batchq", 'NULL')
                    e, e_txt = pbs.error()
                    if e:
                        logger.error("Failed: {0}".format(e_txt))
                    else:
                        jobs[job_id] = job_name
            else:
                logger.debug("Starting %s job" % t.name)
                sh = write_shell_script(t.name, fastafile, rundir=rundir, params=params)
                job_name = os.path.basename(os.path.splitext(sh)[0]) 
                # submit
                attropl[0].value = job_name
                attropl[4].value = "{0}/{1}.stdout".format(rundir, job_name)
                attropl[5].value = "{0}/{1}.stderr".format(rundir, job_name)
                job_id = pbs.pbs_submit(c, attropl, sh, "batchq", 'NULL')
                e, e_txt = pbs.error()
                if e:
                    logger.error("Failed submission: {0}".format(e_txt))
                else:
                    jobs[job_id] = job_name
        else:
            logger.debug("Skipping %s" % t.name)
    
    ### Wait until all jobs are finished or the time runs out ###
    start_time = time()  
    try:
        # Run until all jobs are finished
        while len(jobs) > 0 and not(max_time) or time() - start_time < max_time:
            for job_id,job_name in jobs.items():
                job = q.getjob(job_id)
                
                if not job: # or not job.is_running():
                    motifs = []
                    if job:
                        name = job['Job_Name']
                        # Some error checking here!
                    else:
                        pwmfile = os.path.join(rundir, "{0}.pwm".format(job_name))
                        if os.path.exists(pwmfile):
                            motifs = read_motifs(open(pwmfile), fmt="pwm")
                        else:
                            logger.error("Job {0} finished, but couldn find {1}!".format(job_name, pwmfile))
                    stdout = open(os.path.join(rundir, "{0}.stdout".format(job_name))).read()
                    stderr = open(os.path.join(rundir, "{0}.stderr".format(job_name))).read()
                    
                    result.add_motifs(job_id, (motifs, stdout, stderr))
                    #for fname in glob.glob("{0}*".format(job_name)):
                    #    logger.debug("Deleting {0}".format(fname))
                    #    #os.unlink(fname)
                    
                    del jobs[job_id]
            sleep(5)

    ### Or the user gets impatient... ###
    except KeyboardInterrupt, e:
        # Destroy all running jobs
        logger.info("Caught interrupt, destroying all running jobs")
Exemplo n.º 16
0
    def submit(self):
 
 
        attropl = pbs.new_attropl(self.attribute_count + 1)
        attropl_idx = 0
 
        attropl[attropl_idx].name  = pbs.ATTR_v
        attropl[attropl_idx].value = self.generate_env()
        attropl_idx += 1
 
        if self.name:
            attropl[attropl_idx].name   = pbs.ATTR_N
            attropl[attropl_idx].value  = self.name
            attropl_idx += 1
           
        if self.walltime:
            attropl[attropl_idx].name     = pbs.ATTR_l
            attropl[attropl_idx].resource = 'walltime'
            attropl[attropl_idx].value    = self.walltime
            attropl_idx += 1
        
        if self.nodes:
            attropl[attropl_idx].name     = pbs.ATTR_l
            attropl[attropl_idx].resource = 'nodes'
            attropl[attropl_idx].value    = self.nodes
            attropl_idx += 1
           
        if self.stdout_path:
            attropl[attropl_idx].name  = pbs.ATTR_o
            attropl[attropl_idx].value = self.stdout_path
            attropl_idx += 1

        if self.stderr_path:
            attropl[attropl_idx].name  = pbs.ATTR_o
            attropl[attropl_idx].value = self.stderr_path
            attropl_idx += 1
           
        if self.dependency_list:
            attropl[attropl_idx].name = pbs.ATTR_depend
            attropl[attropl_idx].value = self.dependency_list
            attropl_idx += 1
           
        if self.mail_options:
            attropl[attropl_idx].name = pbs.ATTR_m
            attropl[attropl_idx].value = self.mail_options
            attropl_idx += 1
           
        if self.mem:
            attropl[attropl_idx].name     = pbs.ATTR_l
            attropl[attropl_idx].resource = 'mem'
            attropl[attropl_idx].value    = self.mem
            attropl_idx += 1
            
        if self.vmem:
            attropl[attropl_idx].name     = pbs.ATTR_l
            attropl[attropl_idx].resource = 'vmem'
            attropl[attropl_idx].value    = self.vmem
            attropl_idx += 1
            
        connection = pbs.pbs_connect(pbs.pbs_default())
        
        self.job_id = pbs.pbs_submit(connection, attropl, self.job_script, None, None)
       
        pbs.pbs_disconnect(connection)
        
        e, e_msg = pbs.error()
        
        # the batch system returned an error, throw exception 
        if e:
            message = "%d: %s" % (e, e_msg)
            raise Exception(message)
            
        return self.job_id
Exemplo n.º 17
0
import pbs

pbs_server = pbs.pbs_default()
pbsconn = pbs.pbs_connect(pbs_server)

print pbsconn

attrl = pbs.new_attropl(1)
attrl[0].name = pbs.ATTR_N
attrl[0].value = "test"

task_id = pbs.pbs_submit(pbsconn, attrl, "A1.tsk", 'NULL', 'NULL')

e, e_txt = pbs.error()
if e:
    print e, e_txt

print task_id
Exemplo n.º 18
0
    def submit_pbs(self, name, taskfile, lastid=None):
        targets = self.targets
        target = targets[name]
        subenv = target["env"].asdict()

        target["attrs"].setdefault(pbs.ATTR_N, name)

        # Just include all variables by default
        varlist = ",".join("%s=%s" % (k, v) for k, v in subenv.iteritems())
        target["attrs"].setdefault(pbs.ATTR_v, varlist)

        # Track job dependencies
        dependencies = []
        dep_type = name.partition("::")[-1] or "afterok"
        for dep in target["components"]:
            dependencies.append("%s:%s" % (dep_type, targets[dep]["torqueid"]))
        if lastid:
            dependencies.append("%s:%s" % (dep_type, lastid))
        if dependencies:
            target["attrs"][pbs.ATTR_depend] = ",".join(dependencies)

        # /bin/sh as a default shell will generally do the right thing.
        # It honors #! syntax at the beginning of the file and it
        # interprets basic commands without a #! at the beginning of
        # the file. Obscure users can opt for other shells
        # (eg: bash,csh,ksh,python,...) via the standard #! syntax
        #   -- This default ensures users with non-standard shells
        #      can still use pbsmake files from other users.
        target["attrs"].setdefault(pbs.ATTR_S, "/bin/sh")

        # We need to handle ATTR_l specially. Each resource needs its own
        # attropl with the name pbs.ATTR_l:
        attr_l = []
        if pbs.ATTR_l in target["attrs"]:
            attr_l = target["attrs"][pbs.ATTR_l].split(",")
            del (target["attrs"][pbs.ATTR_l])

        # Attach attributes to job as the pbs module expects it
        attropl = pbs.new_attropl(len(target["attrs"]) + len(attr_l))
        i = 0
        for n in target["attrs"]:
            attropl[i].name = n
            attropl[i].value = target["env"].interp(target["attrs"][n], defer=False)
            i += 1
        for n in attr_l:
            attropl[i].name = pbs.ATTR_l
            res, val = n.split("=", 1)
            attropl[i].resource = res
            attropl[i].value = target["env"].interp(val, defer=False)
            i += 1
        try:
            destination = target["attrs"]["queue"]
        except KeyError:
            destination = ""

        # attempt to submit job
        lastid = pbs.pbs_submit(self.conn, attropl, taskfile.name, destination, "")
        if lastid:
            target["torqueid"] = lastid
        else:
            print "Error submitting job: %s\n\tAttributes:" % name
            for attr, val in target["attrs"].items():
                print "\t\t%s: %s" % (attr, val)
            raise Exception(pbs.error())

        return lastid
Exemplo n.º 19
0
    def submit(self):
        """Submit the jobscript txt, set self.jobid"""
        txt = self.script
        self.log.debug("Going to submit script %s" % txt)


        # Build default pbs_attributes list
        pbs_attributes = pbs.new_attropl(1)
        pbs_attributes[0].name = 'Job_Name'
        pbs_attributes[0].value = self.name


        # set resource requirements
        resourse_attributes = pbs.new_attropl(len(self.resources))
        idx = 0
        for k, v in self.resources.items():
            resourse_attributes[idx].name = 'Resource_List'
            resourse_attributes[idx].resource = k
            resourse_attributes[idx].value = v
            idx += 1
        pbs_attributes.extend(resourse_attributes)

        # add job dependencies to attributes
        if self.deps:
            deps_attributes = pbs.new_attropl(1)
            deps_attributes[0].name = pbs.ATTR_depend
            deps_attributes[0].value = ",".join(["afterany:%s" % dep for dep in self.deps])
            pbs_attributes.extend(deps_attributes)
            self.log.debug("Job deps attributes: %s" % deps_attributes[0].value)

        # add a bunch of variables (added by qsub)
        # also set PBS_O_WORKDIR to os.getcwd()
        os.environ.setdefault('WORKDIR', os.getcwd())

        defvars = ['MAIL', 'HOME', 'PATH', 'SHELL', 'WORKDIR']
        pbsvars = ["PBS_O_%s=%s" % (x, os.environ.get(x, 'NOTFOUND_%s' % x)) for x in defvars]
        # extend PBS variables with specified variables
        pbsvars.extend(["%s=%s" % (name, value) for (name, value) in self.env_vars.items()])
        variable_attributes = pbs.new_attropl(1)
        variable_attributes[0].name = 'Variable_List'
        variable_attributes[0].value = ",".join(pbsvars)

        pbs_attributes.extend(variable_attributes)
        self.log.debug("Job variable attributes: %s" % variable_attributes[0].value)

        # mail settings
        mail_attributes = pbs.new_attropl(1)
        mail_attributes[0].name = 'Mail_Points'
        mail_attributes[0].value = 'n'  # disable all mail
        pbs_attributes.extend(mail_attributes)
        self.log.debug("Job mail attributes: %s" % mail_attributes[0].value)

        import tempfile
        fh, scriptfn = tempfile.mkstemp()
        f = os.fdopen(fh, 'w')
        self.log.debug("Writing temporary job script to %s" % scriptfn)
        f.write(txt)
        f.close()

        self.log.debug("Going to submit to queue %s" % self.queue)

        # extend paramater should be 'NULL' because this is required by the python api
        extend = 'NULL'
        # job submission sometimes fails without producing an error, e.g. when one of the dependency jobs has already finished
        # when that occurs, None will be returned by pbs_submit as job id
        jobid = pbs.pbs_submit(self.pbsconn, pbs_attributes, scriptfn, self.queue, extend)
        is_error, errormsg = pbs.error()
        if is_error or jobid is None:
            self.log.error("Failed to submit job script %s (job id: %s, error %s)" % (scriptfn, jobid, errormsg))
        else:
            self.log.debug("Succesful job submission returned jobid %s" % jobid)
            self.jobid = jobid
            os.remove(scriptfn)
Exemplo n.º 20
0
                    attropl[index].name = getattr(pbs, 'ATTR_' + attr)
                    attropl[index].resource = resource
                    attropl[index].value = value
                    index += 1
            else:
                attropl[index].name = getattr(pbs, 'ATTR_' + attr)
                attropl[index].value = value
                index += 1
        return attropl


if __name__ == '__main__':
    jp = JobParser()

    try:
        jp.read(sys.argv[1])
    except IndexError:
        print("Usage: %s <jobscript>" % (sys.argv[0]))
        sys.exit(1)

    server_name = pbs.pbs_default()
    con = pbs.pbs_connect(server_name)
    job_id = pbs.pbs_submit(con, jp.get_attropl(), sys.argv[1], 'batch',
                            'NULL')

    e, e_txt = pbs.error()
    if e:
        print(e, e_txt)
    else:
        print(job_id)
Exemplo n.º 21
0
 def submit(self, job):
     attr = self._job2AttrOp(job)
     jobid = pbs.pbs_submit(self._connection_id, attr,
         job.scriptpath, job.queue, "NULL")
     job.id = jobid
Exemplo n.º 22
0
    def _submit(self):
        """Submit the jobscript txt, set self.jobid"""
        txt = self.script
        self.log.debug("Going to submit script %s" % txt)

        # Build default pbs_attributes list
        pbs_attributes = pbs.new_attropl(3)
        pbs_attributes[0].name = pbs.ATTR_N  # Job_Name
        pbs_attributes[0].value = self.name

        output_dir = build_option('job_output_dir')
        pbs_attributes[1].name = pbs.ATTR_o
        pbs_attributes[1].value = os.path.join(output_dir,
                                               '%s.o$PBS_JOBID' % self.name)

        pbs_attributes[2].name = pbs.ATTR_e
        pbs_attributes[2].value = os.path.join(output_dir,
                                               '%s.e$PBS_JOBID' % self.name)

        # set resource requirements
        resource_attributes = pbs.new_attropl(len(self.resources))
        idx = 0
        for k, v in self.resources.items():
            resource_attributes[idx].name = pbs.ATTR_l  # Resource_List
            resource_attributes[idx].resource = k
            resource_attributes[idx].value = v
            idx += 1
        pbs_attributes.extend(resource_attributes)

        # add job dependencies to attributes
        if self.deps:
            deps_attributes = pbs.new_attropl(1)
            deps_attributes[0].name = pbs.ATTR_depend
            deps_attributes[0].value = ",".join(
                ["afterany:%s" % dep.jobid for dep in self.deps])
            pbs_attributes.extend(deps_attributes)
            self.log.debug("Job deps attributes: %s" %
                           deps_attributes[0].value)

        # submit job with (user) hold
        hold_attributes = pbs.new_attropl(1)
        hold_attributes[0].name = pbs.ATTR_h
        hold_attributes[0].value = pbs.USER_HOLD
        pbs_attributes.extend(hold_attributes)
        self.holds.append(pbs.USER_HOLD)
        self.log.debug("Job hold attributes: %s" % hold_attributes[0].value)

        # add a bunch of variables (added by qsub)
        # also set PBS_O_WORKDIR to os.getcwd()
        os.environ.setdefault('WORKDIR', os.getcwd())

        defvars = ['MAIL', 'HOME', 'PATH', 'SHELL', 'WORKDIR']
        pbsvars = [
            "PBS_O_%s=%s" % (x, os.environ.get(x, 'NOTFOUND_%s' % x))
            for x in defvars
        ]
        # extend PBS variables with specified variables
        pbsvars.extend([
            "%s=%s" % (name, value) for (name, value) in self.env_vars.items()
        ])
        variable_attributes = pbs.new_attropl(1)
        variable_attributes[0].name = pbs.ATTR_v  # Variable_List
        variable_attributes[0].value = ",".join(pbsvars)

        pbs_attributes.extend(variable_attributes)
        self.log.debug("Job variable attributes: %s" %
                       variable_attributes[0].value)

        # mail settings
        mail_attributes = pbs.new_attropl(1)
        mail_attributes[0].name = pbs.ATTR_m  # Mail_Points
        mail_attributes[0].value = 'n'  # disable all mail
        pbs_attributes.extend(mail_attributes)
        self.log.debug("Job mail attributes: %s" % mail_attributes[0].value)

        fh, scriptfn = tempfile.mkstemp()
        f = os.fdopen(fh, 'w')
        self.log.debug("Writing temporary job script to %s" % scriptfn)
        f.write(txt)
        f.close()

        self.log.debug("Going to submit to queue %s" % self.queue)

        # job submission sometimes fails without producing an error, e.g. when one of the dependency jobs has already finished
        # when that occurs, None will be returned by pbs_submit as job id
        jobid = pbs.pbs_submit(self.pbsconn, pbs_attributes, scriptfn,
                               self.queue, NULL)
        is_error, errormsg = pbs.error()
        if is_error or jobid is None:
            raise EasyBuildError(
                "Failed to submit job script %s (job id: %s, error %s)",
                scriptfn, jobid, errormsg)
        else:
            self.log.debug("Succesful job submission returned jobid %s" %
                           jobid)
            self.jobid = jobid
            os.remove(scriptfn)
Exemplo n.º 23
0
    def submitJob(self, conn, job, task=None, requirements=''):
        """ Need to copy the inputsandbox to WN before submitting a job"""

        # Write a temporary submit script
        # NB: we assume an env var PBS_JOBCOOKIE points to the exec dir on the batch host

        ifiles = task['globalSandbox'].split(',')

        f = tempfile.NamedTemporaryFile()
        s = []
        s.append('#!/bin/sh')
        if self.workerNodeWorkDir:
            s.append('cd ' + self.workerNodeWorkDir)
        s.append(
            'if [ ! -d $PBS_JOBCOOKIE ] ; then mkdir -p $PBS_JOBCOOKIE ; fi')
        s.append('cd $PBS_JOBCOOKIE')
        for ifile in task['globalSandbox'].split(','):
            s.append('cp ' + ifile + ' .')
        s.append(self.jobScriptDir + job['executable']+' '+ job['arguments'] +\
                 ' >' + job['standardOutput'] + ' 2>' + job['standardError'])
        if self.workerNodeWorkDir:
            s.append('cd ' + self.workerNodeWorkDir)

        # this fails if the job is aborted, which leaks disc space. Adding an epilogue to make
        # sure it's gone for good - AMM 18/07/2011
        s.append('rm -fr $PBS_JOBCOOKIE')
        f.write('\n'.join(s))
        f.flush()

        epilogue = tempfile.NamedTemporaryFile()
        s = []
        s.append('#!/bin/sh')
        if self.workerNodeWorkDir:
            s.append('cd ' + self.workerNodeWorkDir)
        s.append('rm -fr $PBS_JOBCOOKIE')
        s.append('touch $HOME/done.$1')
        epilogue.write('\n'.join(s))
        epilogue.flush()
        os.chmod(epilogue.name, 700)
        attr_dict = {
            'Job_Name': 'CRAB_PBS',
            'Variable_List': self.pbs_env,
            'Output_Path':
            self.jobResDir + 'wrapper_' + str(job['standardOutput']),
            'Error_Path':
            self.jobResDir + 'wrapper_' + str(job['standardError'])
        }

        attropl = pbs.new_attropl(len(attr_dict) + len(self.res_dict) + 1)
        i_attr = 0
        for k in attr_dict.keys():
            self.logging.debug("adding k %s" % k)
            attropl[i_attr].name = k
            attropl[i_attr].value = attr_dict[k]
            i_attr += 1
        for k in self.res_dict.keys():
            attropl[i_attr].name = 'Resource_List'
            attropl[i_attr].resource = k
            attropl[i_attr].value = self.res_dict[k]
            i_attr += 1

        attropl[i_attr].name = 'Resource_List'
        attropl[i_attr].resource = 'epilogue'
        attropl[i_attr].value = epilogue.name
        self.logging.debug("adding epilogue: %s" % epilogue.name)
        i_attr += 1

        jobid = pbs.pbs_submit(conn, attropl, f.name, self.queue, 'NULL')
        f.close()

        if not jobid:
            err, err_text = pbs.error()
            self.logging.error('Error in job submission')
            self.logging.error('PBS error code ' + str(err) + ': ' + err_text)
            self.pbs_disconn(conn)
            raise SchedulerError('PBS error', str(err) + ': ' + err_text)

        return {job['name']: jobid}, None, None
Exemplo n.º 24
0
    def submitJob ( self, conn, job, task=None, requirements=''):
        """ Need to copy the inputsandbox to WN before submitting a job"""

        # Write a temporary submit script
        # NB: we assume an env var PBS_JOBCOOKIE points to the exec dir on the batch host

        ifiles=task['globalSandbox'].split(',')

        f=tempfile.NamedTemporaryFile()
        s=[]
        s.append('#!/bin/sh');
        if self.workerNodeWorkDir:
            s.append('cd ' + self.workerNodeWorkDir)
        s.append('if [ ! -d $PBS_JOBCOOKIE ] ; then mkdir -p $PBS_JOBCOOKIE ; fi')
        s.append('cd $PBS_JOBCOOKIE')
        for ifile in task['globalSandbox'].split(','):
            s.append('cp '+ifile+' .')
        s.append(self.jobScriptDir + job['executable']+' '+ job['arguments'] +\
                 ' >' + job['standardOutput'] + ' 2>' + job['standardError'])
        if self.workerNodeWorkDir:
            s.append('cd ' + self.workerNodeWorkDir)
    
        # this fails if the job is aborted, which leaks disc space. Adding an epilogue to make
        # sure it's gone for good - AMM 18/07/2011
        s.append('rm -fr $PBS_JOBCOOKIE')
        f.write('\n'.join(s))
        f.flush()

        epilogue = tempfile.NamedTemporaryFile()
        s = []
        s.append('#!/bin/sh');
        if self.workerNodeWorkDir:
            s.append('cd ' + self.workerNodeWorkDir)
        s.append('rm -fr $PBS_JOBCOOKIE')
        s.append('touch $HOME/done.$1')
        epilogue.write( '\n'.join( s ) )
        epilogue.flush()
        os.chmod( epilogue.name, 700 )
        attr_dict={'Job_Name':'CRAB_PBS',
                   'Variable_List':self.pbs_env,
                   'Output_Path':self.jobResDir+'wrapper_'+str(job['standardOutput']),
                   'Error_Path':self.jobResDir+'wrapper_'+str(job['standardError'])
                   }

        attropl=pbs.new_attropl(len(attr_dict)+len(self.res_dict) + 1)
        i_attr=0
        for k in attr_dict.keys():
            self.logging.debug("adding k %s" % k)
            attropl[i_attr].name=k
            attropl[i_attr].value=attr_dict[k]
            i_attr+=1
        for k in self.res_dict.keys():
            attropl[i_attr].name='Resource_List'
            attropl[i_attr].resource=k
            attropl[i_attr].value=self.res_dict[k]
            i_attr+=1
    
        attropl[i_attr].name = 'Resource_List'
        attropl[i_attr].resource  = 'epilogue'
        attropl[i_attr].value = epilogue.name
        self.logging.debug("adding epilogue: %s" % epilogue.name)
        i_attr += 1

        jobid = pbs.pbs_submit(conn, attropl, f.name, self.queue, 'NULL')
        f.close()

        if not jobid:
            err, err_text=pbs.error()
            self.logging.error('Error in job submission')
            self.logging.error('PBS error code '+str(err)+': '+err_text)
            self.pbs_disconn(conn)
            raise SchedulerError('PBS error', str(err)+': '+err_text)
        
        return {job['name']:jobid}, None, None 
Exemplo n.º 25
0
    def queue_job(self, batch_job):
        """
          queue a BatchJob.
          
          :param batch_job: description of the job to queue
        """

        # batch job names should be unique for civet pipelines because the
        # job name is used to name log files and other output
        # Civet generates unique names for each step, so this is just checking
        # for a programming error
        assert batch_job.name not in self._job_names

        if self.execution_log_dir:
            log_dir = self.execution_log_dir
        else:
            log_dir = self.log_dir

        # set batch_job.stderr_path and batch_job.stdout_path if they aren't already set
        if not batch_job.stdout_path:
            batch_job.stdout_path = os.path.join(log_dir,
                                                 batch_job.name + ".o")
        if not batch_job.stderr_path:
            batch_job.stderr_path = os.path.join(log_dir,
                                                 batch_job.name + ".e")

        # write batch script
        filename = self.write_script(batch_job)

        if self.submit:
            # build up our torque job attributes and resources
            job_attributes = {}
            job_resources = {
                'nodes': "{0}:ppn={1}".format(batch_job.nodes, batch_job.ppn),
                'walltime': batch_job.walltime,
                'epilogue': self.epilogue_filename
            }

            if batch_job.mem:
                job_resources['mem'] = batch_job.mem

            job_attributes[pbs.ATTR_v] = self.generate_env(batch_job.workdir)

            if batch_job.name:
                job_attributes[pbs.ATTR_N] = batch_job.name

            job_attributes[pbs.ATTR_o] = batch_job.stdout_path

            #XXX workaround for a TORQUE bug where local copies of stderr &
            # stdout files to /dev/null don't work correctly but remote
            # copies (to submit host) do
            if job_attributes[pbs.ATTR_o] == "/dev/null":
                job_attributes[
                    pbs.ATTR_o] = socket.gethostname() + ":/dev/null"

            job_attributes[pbs.ATTR_e] = batch_job.stderr_path

            #XXX workaround for a TORQUE bug where local copies of stderr &
            # stdout files to /dev/null don't work correctly but remote
            # copies (to submit host) do
            if job_attributes[pbs.ATTR_e] == "/dev/null":
                job_attributes[
                    pbs.ATTR_e] = socket.gethostname() + ":/dev/null"

            if batch_job.depends_on:
                job_attributes[pbs.ATTR_depend] = self._dependency_string(
                    batch_job)
            elif self.submit_with_hold:
                job_attributes[pbs.ATTR_h] = 'u'

            if batch_job.mail_option:
                job_attributes[pbs.ATTR_m] = batch_job.mail_option

            if batch_job.email_list:
                job_attributes[pbs.ATTR_M] = batch_job.email_list

            if batch_job.date_time:
                job_attributes[pbs.ATTR_a] = str(
                    int(time.mktime(batch_job.date_time.timetuple())))

            pbs_attrs = pbs.new_attropl(
                len(job_attributes) + len(job_resources))

            # populate pbs_attrs
            attr_idx = 0
            for resource, val in job_resources.iteritems():
                pbs_attrs[attr_idx].name = pbs.ATTR_l
                pbs_attrs[attr_idx].resource = resource
                pbs_attrs[attr_idx].value = val
                attr_idx += 1

            for attribute, val in job_attributes.iteritems():
                pbs_attrs[attr_idx].name = attribute
                pbs_attrs[attr_idx].value = val
                attr_idx += 1

            # we've initialized pbs_attrs with all the attributes we need to set
            # now we can connect to the server and submit the job
            connection = _connect_to_server(self._server)

            # connected to pbs_server

            # submit job
            retry = 0
            job_id = pbs.pbs_submit(connection, pbs_attrs, filename,
                                    self.queue, None)

            # if pbs.pbs_submit failed, try again
            while not job_id and retry < _MAX_RETRY:
                retry += 1
                print("Retrying connection...", file=sys.stderr)
                time.sleep(retry**2)
                job_id = pbs.pbs_submit(connection, pbs_attrs, filename,
                                        self.queue, None)

            pbs.pbs_disconnect(connection)

            # check to see if the job was submitted successfully.
            if not job_id:
                e, e_msg = pbs.error()
                # the batch system returned an error, throw exception
                raise Exception("Error submitting job.  "
                                "Torque error {0}: '{1}'".format(
                                    e, torque_strerror(e)))

            if self.submit_with_hold and not batch_job.depends_on:
                self.held_jobs.append(job_id)

        else:
            #self.submit is False, fake a job ID
            job_id = "{0}.civet".format(self._id_seq)
            self._id_seq += 1

        self._job_names.append(batch_job.name)

        self._id_log.write(
            job_id + '\t' + batch_job.name + '\t' +
            str(self._printable_dependencies(batch_job.depends_on)) + '\n')
        self._id_log.flush()
        return job_id
Exemplo n.º 26
0
    def submit(self, with_hold=False):
        """Submit the jobscript txt, set self.jobid"""
        txt = self.script
        self.log.debug("Going to submit script %s" % txt)

        # Build default pbs_attributes list
        pbs_attributes = pbs.new_attropl(1)
        pbs_attributes[0].name = pbs.ATTR_N  # Job_Name
        pbs_attributes[0].value = self.name

        # set resource requirements
        resourse_attributes = pbs.new_attropl(len(self.resources))
        idx = 0
        for k, v in self.resources.items():
            resourse_attributes[idx].name = pbs.ATTR_l  # Resource_List
            resourse_attributes[idx].resource = k
            resourse_attributes[idx].value = v
            idx += 1
        pbs_attributes.extend(resourse_attributes)

        # add job dependencies to attributes
        if self.deps:
            deps_attributes = pbs.new_attropl(1)
            deps_attributes[0].name = pbs.ATTR_depend
            deps_attributes[0].value = ",".join(["afterany:%s" % dep for dep in self.deps])
            pbs_attributes.extend(deps_attributes)
            self.log.debug("Job deps attributes: %s" % deps_attributes[0].value)

        # submit job with (user) hold if requested
        if with_hold:
            hold_attributes = pbs.new_attropl(1)
            hold_attributes[0].name = pbs.ATTR_h
            hold_attributes[0].value = pbs.USER_HOLD
            pbs_attributes.extend(hold_attributes)
            self.holds.append(pbs.USER_HOLD)
            self.log.debug("Job hold attributes: %s" % hold_attributes[0].value)

        # add a bunch of variables (added by qsub)
        # also set PBS_O_WORKDIR to os.getcwd()
        os.environ.setdefault('WORKDIR', os.getcwd())

        defvars = ['MAIL', 'HOME', 'PATH', 'SHELL', 'WORKDIR']
        pbsvars = ["PBS_O_%s=%s" % (x, os.environ.get(x, 'NOTFOUND_%s' % x)) for x in defvars]
        # extend PBS variables with specified variables
        pbsvars.extend(["%s=%s" % (name, value) for (name, value) in self.env_vars.items()])
        variable_attributes = pbs.new_attropl(1)
        variable_attributes[0].name = pbs.ATTR_v  # Variable_List
        variable_attributes[0].value = ",".join(pbsvars)

        pbs_attributes.extend(variable_attributes)
        self.log.debug("Job variable attributes: %s" % variable_attributes[0].value)

        # mail settings
        mail_attributes = pbs.new_attropl(1)
        mail_attributes[0].name = pbs.ATTR_m  # Mail_Points
        mail_attributes[0].value = 'n'  # disable all mail
        pbs_attributes.extend(mail_attributes)
        self.log.debug("Job mail attributes: %s" % mail_attributes[0].value)

        fh, scriptfn = tempfile.mkstemp()
        f = os.fdopen(fh, 'w')
        self.log.debug("Writing temporary job script to %s" % scriptfn)
        f.write(txt)
        f.close()

        self.log.debug("Going to submit to queue %s" % self.queue)

        # job submission sometimes fails without producing an error, e.g. when one of the dependency jobs has already finished
        # when that occurs, None will be returned by pbs_submit as job id
        jobid = pbs.pbs_submit(self.pbsconn, pbs_attributes, scriptfn, self.queue, NULL)
        is_error, errormsg = pbs.error()
        if is_error or jobid is None:
            self.log.error("Failed to submit job script %s (job id: %s, error %s)" % (scriptfn, jobid, errormsg))
        else:
            self.log.debug("Succesful job submission returned jobid %s" % jobid)
            self.jobid = jobid
            os.remove(scriptfn)
Exemplo n.º 27
0
    def submit(self):
        """Submit the jobscript txt, set self.jobid"""
        txt = self.script
        self.log.debug("Going to submit script %s" % txt)


        # Build default pbs_attributes list
        pbs_attributes = pbs.new_attropl(1)
        pbs_attributes[0].name = 'Job_Name'
        pbs_attributes[0].value = self.name


        # set resource requirements
        resourse_attributes = pbs.new_attropl(len(self.resources))
        idx = 0
        for k, v in self.resources.items():
            resourse_attributes[idx].name = 'Resource_List'
            resourse_attributes[idx].resource = k
            resourse_attributes[idx].value = v
            idx += 1
        pbs_attributes.extend(resourse_attributes)

        # add job dependencies to attributes
        if self.deps:
            deps_attributes = pbs.new_attropl(1)
            deps_attributes[0].name = pbs.ATTR_depend
            deps_attributes[0].value = ",".join(["afterany:%s" % dep for dep in self.deps])
            pbs_attributes.extend(deps_attributes)
            self.log.debug("Job deps attributes: %s" % deps_attributes[0].value)

        # add a bunch of variables (added by qsub)
        # also set PBS_O_WORKDIR to os.getcwd()
        os.environ.setdefault('WORKDIR', os.getcwd())

        defvars = ['MAIL', 'HOME', 'PATH', 'SHELL', 'WORKDIR']
        pbsvars = ["PBS_O_%s=%s" % (x, os.environ.get(x, 'NOTFOUND_%s' % x)) for x in defvars]
        # extend PBS variables with specified variables
        pbsvars.extend(["%s=%s" % (name, value) for (name, value) in self.env_vars.items()])
        variable_attributes = pbs.new_attropl(1)
        variable_attributes[0].name = 'Variable_List'
        variable_attributes[0].value = ",".join(pbsvars)

        pbs_attributes.extend(variable_attributes)
        self.log.debug("Job variable attributes: %s" % variable_attributes[0].value)

        # mail settings
        mail_attributes = pbs.new_attropl(1)
        mail_attributes[0].name = 'Mail_Points'
        mail_attributes[0].value = 'n'  # disable all mail
        pbs_attributes.extend(mail_attributes)
        self.log.debug("Job mail attributes: %s" % mail_attributes[0].value)

        import tempfile
        fh, scriptfn = tempfile.mkstemp()
        f = os.fdopen(fh, 'w')
        self.log.debug("Writing temporary job script to %s" % scriptfn)
        f.write(txt)
        f.close()

        self.log.debug("Going to submit to queue %s" % self.queue)

        # extend paramater should be 'NULL' because this is required by the python api
        extend = 'NULL'
        jobid = pbs.pbs_submit(self.pbsconn, pbs_attributes, scriptfn, self.queue, extend)

        is_error, errormsg = pbs.error()
        if is_error:
            self.log.error("Failed to submit job script %s: error %s" % (scriptfn, errormsg))
        else:
            self.log.debug("Succesful job submission returned jobid %s" % jobid)
            self.jobid = jobid
            os.remove(scriptfn)