Пример #1
0
 def __translate_cu_sj_description(self, compute_unit_description):
     jd = description()
     if compute_unit_description.has_key("executable"): 
         jd.executable = compute_unit_description["executable"]
     if compute_unit_description.has_key("spmd_variation"):
         jd.spmd_variation = compute_unit_description["spmd_variation"]
     else:
         jd.spmd_variation = "single"
     if compute_unit_description.has_key("arguments"): 
         jd.arguments = compute_unit_description["arguments"]
     if compute_unit_description.has_key("environment"):
         jd.environment = compute_unit_description["environment"] 
     
     # handling number of processes
     if compute_unit_description.has_key("number_of_processes"):
         jd.number_of_processes=int(compute_unit_description["number_of_processes"])
     elif compute_unit_description.has_key("total_cpu_count"):
         jd.number_of_processes=int(compute_unit_description["total_cpu_count"])
     else:
         jd.number_of_processes=1
         
     if compute_unit_description.has_key("working_directory"): 
         jd.working_directory = compute_unit_description["working_directory"]
     if compute_unit_description.has_key("output"): 
         jd.output =  compute_unit_description["output"]
     if compute_unit_description.has_key("error"): 
         jd.error = compute_unit_description["error"]
     if compute_unit_description.has_key("file_transfer"):
         jd.file_transfer=compute_unit_description["file_transfer"]  
     if compute_unit_description.has_key("input_data"):
         jd.input_data=compute_unit_description["input_data"]  
     if compute_unit_description.has_key("output_data"):
         jd.output_data=compute_unit_description["output_data"]            
     return jd
Пример #2
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue=None # if None default queue is used
    project=None # if None default allocation is used 
    walltime=10
    processes_per_node=1
    number_of_processes = 1
    workingdirectory="." # working directory for agent
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)

        
    #lrms_url = "ec2+ssh://localhost" # resource url to run on GCE
    lrms_url = "gce+ssh://locahost"
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job( lrms_url,
                        number_of_processes,
                        queue,
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node)
    
    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()
    jd.executable = "/bin/echo"
    #jd.executable = "$HOME/hello.sh"
    jd.number_of_processes = "1"
    jd.arguments = ["$HELLOWORLD"]
    jd.environment = ['HELLOWORLD=hello_world']
    jd.input_data = ["hi", "ho"]
    
    # specify an optinal working directory if sub-job should be executed outside of bigjob sandbox
    #jd.working_directory = "/tmp" 
    jd.output = "stdout.txt"
    jd.error = "stderr.txt"
    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)
    
    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        print "state: " + state
        if(state=="Failed" or state=="Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Пример #3
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue = None  # if None default queue is used
    project = None  # if None default allocation is used
    walltime = 10
    processes_per_node = 8
    number_nodes = 24
    workingdirectory = os.getcwd()  # working directory for agent
    userproxy = None  # userproxy (not supported yet due to context issue w/ SAGA)
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "pbs://localhost"  # resource url to run the jobs on localhost

    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job(lrms_url, number_nodes, queue, project,
                       workingdirectory, userproxy, walltime,
                       processes_per_node)

    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(
        bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()
    jd.executable = "/bin/hostname"
    jd.number_of_processes = "2"
    jd.spmd_variation = "single"
    jd.arguments = [""]
    #jd.working_directory = "/tmp"
    jd.output = "stdout.txt"
    jd.error = "stderr.txt"

    for i in range(0, 12):
        sj = subjob()
        sj.submit_job(bj.pilot_url, jd)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.wait()
    bj.cancel()
Пример #4
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue = None  # if None default queue is used
    project = None  # if None default allocation is used
    walltime = 10
    processes_per_node = 1
    number_of_processes = 1
    workingdirectory = "."  # working directory for agent
    userproxy = None  # userproxy (not supported yet due to context issue w/ SAGA)

    #lrms_url = "ec2+ssh://localhost" # resource url to run on GCE
    lrms_url = "gce+ssh://locahost"
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job(lrms_url, number_of_processes, queue, project,
                       workingdirectory, userproxy, walltime,
                       processes_per_node)

    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(
        bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()
    jd.executable = "/bin/echo"
    #jd.executable = "$HOME/hello.sh"
    jd.number_of_processes = "1"
    jd.arguments = ["$HELLOWORLD"]
    jd.environment = ['HELLOWORLD=hello_world']
    jd.input_data = ["hi", "ho"]

    # specify an optinal working directory if sub-job should be executed outside of bigjob sandbox
    #jd.working_directory = "/tmp"
    jd.output = "stdout.txt"
    jd.error = "stderr.txt"
    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)

    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        print "state: " + state
        if (state == "Failed" or state == "Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
   def get_job_description(self, replica_id):        

       jd = description()  
       jd.executable = self.working_directory + "sync_agent_16/" + str(replica_id) + "/namd2"
       jd.number_of_processes = "4" 
       jd.spmd_variation = "single"
       jd.arguments = ["NPT.conf"] 
       jd.working_directory = self.working_directory + "sync_agent_16/" + str(replica_id) + "/"
       jd.output = "stdout-" + str(replica_id) + ".txt"
       jd.error = "stderr-" + str(replica_id) + ".txt"
       
       return jd
Пример #6
0
   def get_job_description(self, replica_id):        

       jd = description()  
       jd.executable = self.working_directory + "async_agent_4/" + str(replica_id) + "/namd2"
       jd.number_of_processes = "2" 
       jd.spmd_variation = "single"
       jd.arguments = ["NPT.conf"] 
       jd.working_directory = self.working_directory + "async_agent_4/" + str(replica_id) + "/"
       jd.output = "stdout-" + str(replica_id) + ".txt"
       jd.error = "stderr-" + str(replica_id) + ".txt"
       
       return jd
Пример #7
0
    def __translate_cu_sj_description(self, compute_unit_description):
        jd = description()
        if compute_unit_description.has_key("executable"):
            jd.executable = compute_unit_description["executable"]
        if compute_unit_description.has_key("spmd_variation"):
            jd.spmd_variation = compute_unit_description["spmd_variation"]
        else:
            jd.spmd_variation = "single"
        if compute_unit_description.has_key("arguments"):
            jd.arguments = compute_unit_description["arguments"]

        if compute_unit_description.has_key("environment"):

            env = compute_unit_description["environment"]
            if type(env) == dict:
                # convet to 'old-style' argument list
                env_list = list()
                for (key, val) in env.iteritems():
                    env_list.append("%s=%s" % (key, val))
                jd.environment = env_list
            else:
                jd.environment = env

        # handling number of processes
        if compute_unit_description.has_key("number_of_processes"):
            jd.number_of_processes = int(
                compute_unit_description["number_of_processes"])
        elif compute_unit_description.has_key("total_cpu_count"):
            jd.number_of_processes = int(
                compute_unit_description["total_cpu_count"])
        else:
            jd.number_of_processes = 1

        if compute_unit_description.has_key("working_directory"):
            jd.working_directory = compute_unit_description[
                "working_directory"]
        if compute_unit_description.has_key("output"):
            jd.output = compute_unit_description["output"]
        if compute_unit_description.has_key("error"):
            jd.error = compute_unit_description["error"]
        if compute_unit_description.has_key("file_transfer"):
            jd.file_transfer = compute_unit_description["file_transfer"]
        if compute_unit_description.has_key("input_data"):
            jd.input_data = compute_unit_description["input_data"]
        if compute_unit_description.has_key("output_data"):
            jd.output_data = compute_unit_description["output_data"]
        return jd
Пример #8
0
    def __translate_cu_sj_description(self, compute_unit_description):
        jd = description()
        if compute_unit_description.has_key("executable"): 
            jd.executable = compute_unit_description["executable"]
        if compute_unit_description.has_key("spmd_variation"):
            jd.spmd_variation = compute_unit_description["spmd_variation"]
        else:
            jd.spmd_variation = "single"
        if compute_unit_description.has_key("arguments"): 
            jd.arguments = compute_unit_description["arguments"]

        if compute_unit_description.has_key("environment"):

            env = compute_unit_description["environment"]
            if type(env) == dict:
                # convet to 'old-style' argument list
                env_list = list()
                for (key, val) in env.iteritems():
                    env_list.append("%s=%s" % (key, val))
                jd.environment = env_list
            else:
                jd.environment = env
        
        # handling number of processes
        if compute_unit_description.has_key("number_of_processes"):
            jd.number_of_processes=int(compute_unit_description["number_of_processes"])
        elif compute_unit_description.has_key("total_cpu_count"):
            jd.number_of_processes=int(compute_unit_description["total_cpu_count"])
        else:
            jd.number_of_processes=1
            
        if compute_unit_description.has_key("working_directory"): 
            jd.working_directory = compute_unit_description["working_directory"]
        if compute_unit_description.has_key("output"): 
            jd.output =  compute_unit_description["output"]
        if compute_unit_description.has_key("error"): 
            jd.error = compute_unit_description["error"]
        if compute_unit_description.has_key("file_transfer"):
            jd.file_transfer=compute_unit_description["file_transfer"]  
        if compute_unit_description.has_key("input_data"):
            jd.input_data=compute_unit_description["input_data"]  
        if compute_unit_description.has_key("output_data"):
            jd.output_data=compute_unit_description["output_data"]            
        return jd
Пример #9
0
    def submit_wu(self, wu):
        jd = description()
        jd.executable = wu["executable"]
        jd.number_of_processes = "1"
        # wu["number_of_processes"]
        jd.spmd_variation = wu["spmd_variation"]
        jd.arguments = [wu["arguments"]]
        jd.environment = wu["environment"].split(",")
        jd.working_directory = wu["working_directory"]
        jd.output = wu["output"]
        jd.error = wu["error"]

        subjob = self.mjs[int(wu["resource"])].create_job(jd)
        subjob.run()

        print "Submited sub-job " + "."
        self.jobs.append(subjob)
        self.job_start_times[subjob] = time.time()
        self.job_states[subjob] = subjob.get_state()
        self.logger.info("jd.number_of_processes " + str(jd.number_of_processes))
        self.logger.info("jd exec " + jd.executable)
Пример #10
0
    def __translate_cu_sj_description(self, compute_unit_description):
        jd = description()
        if compute_unit_description.has_key("executable"):
            jd.executable = compute_unit_description["executable"]
        if compute_unit_description.has_key("spmd_variation"):
            jd.spmd_variation = compute_unit_description["spmd_variation"]
        else:
            jd.spmd_variation = "single"
        if compute_unit_description.has_key("arguments"):
            jd.arguments = compute_unit_description["arguments"]
        if compute_unit_description.has_key("environment"):
            jd.environment = compute_unit_description["environment"]

        # handling number of processes
        if compute_unit_description.has_key("number_of_processes"):
            jd.number_of_processes = int(
                compute_unit_description["number_of_processes"])
        elif compute_unit_description.has_key("total_cpu_count"):
            jd.number_of_processes = int(
                compute_unit_description["total_cpu_count"])
        else:
            jd.number_of_processes = 1

        if compute_unit_description.has_key("working_directory"):
            jd.working_directory = compute_unit_description[
                "working_directory"]
        if compute_unit_description.has_key("output"):
            jd.output = compute_unit_description["output"]
        if compute_unit_description.has_key("error"):
            jd.error = compute_unit_description["error"]
        if compute_unit_description.has_key("file_transfer"):
            jd.file_transfer = compute_unit_description["file_transfer"]
        if compute_unit_description.has_key("input_data"):
            jd.input_data = compute_unit_description["input_data"]
        if compute_unit_description.has_key("output_data"):
            jd.output_data = compute_unit_description["output_data"]
        return jd
Пример #11
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue = None  # if None default queue is used
    project = None  # if None default allocation is used
    walltime = 10
    processes_per_node = 4
    number_of_processes = 8
    workingdirectory = os.path.join(os.getcwd(),
                                    "agent")  # working directory for agent
    userproxy = None  # userproxy (not supported yet due to context issue w/ SAGA)
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    #lrms_url = "fork://localhost" # resource url to run the jobs on localhost
    lrms_url = "condorg://brgw1.renci.org:2119/jobmanager-pbs"

    #lrms_url = "ssh://[email protected]"
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job(lrms_url, None, number_of_processes, queue, project,
                       workingdirectory, userproxy, walltime,
                       processes_per_node)

    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(
        bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()

    jd.executable = "/bin/date"
    jd.number_of_processes = "1"
    jd.spmd_variation = "single"
    #jd.arguments = ["match -f  bgr1.fa -A 0  -r reads_1.fastq -n 4 -T /tmp/ > bfast.matches.file.bgr.1.bmf"]
    jd.arguments = [""]
    #jd.working_directory = ""
    jd.output = "bfast-stdout.txt"
    jd.error = "bfast-stderr.txt"

    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)

    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        print "state: " + state
        if (state == "Failed" or state == "Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Пример #12
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue = None  # if None default queue is used
    project = None  # if None default allocation is used
    walltime = 10
    processes_per_node = 4
    number_of_processes = 8
    workingdirectory = os.path.join(os.getcwd(), "agent")
    userproxy = None  # userproxy (not supported yet due to context issue w/ SAGA)
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "condor://localhost"

    ##########################################################################################

    input_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..",
                              "test.txt")
    bj_filetransfers = [input_file + " > test.txt"]

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job(lrms_url, None, number_of_processes, queue, project,
                       workingdirectory, userproxy, walltime,
                       processes_per_node, bj_filetransfers)

    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(
        bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()

    jd.executable = "/bin/cat"
    jd.number_of_processes = "1"
    jd.spmd_variation = "single"
    jd.arguments = ["test.txt"]
    #jd.working_directory = ""
    jd.output = "sj-stdout.txt"
    jd.error = "sj-stderr.txt"

    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)

    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        bj_state = bj.get_state()
        print "bj state: " + str(bj_state) + " state: " + state
        if (state == "Failed" or state == "Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Пример #13
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue="normal" # if None default queue is used
    project=None # if None default allocation is used 
    walltime=10
    processes_per_node=4
    number_of_processes = 8
    #workingdirectory=os.path.join(os.getcwd(), "agent")  # working directory for agent
    workingdirectory="agent"
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)

    
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "fork://localhost" # resource url to run the jobs on localhost
    #lrms_url = "sge://localhost" # resource url to run the jobs on localhost
    #lrms_url = "ssh://localhost" # resource url to run the jobs on localhost
   
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job( lrms_url,
                        number_of_processes,
                        queue,
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node)
    
    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()
    jd.executable = "/bin/echo"
    #jd.executable = "$HOME/hello.sh"
    jd.number_of_processes = "1"
    jd.arguments = ["$HELLOWORLD"]
    jd.environment = ['HELLOWORLD=hello_world']
    #jd.spmd_variation = "mpi"
    
    # specify an optinal working directory if sub-job should be executed outside of bigjob sandbox
    #jd.working_directory = "/tmp" 
    jd.output = "stdout.txt"
    jd.error = "stderr.txt"
    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)
    
    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        print "state: " + state
        if(state=="Failed" or state=="Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Пример #14
0
                        walltime,
                        processes_per_node)

    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(bj.get_state())

    ##########################################################################################
    jobs = []
    job_start_times = {}
    job_states = {}

    # Submit Jobs through BigJob
    # Here you can add any arguments to each SubJob, change the ouput and error filenames and so on
    # change this to your heart's content, but be careful

    for i in range(0, NUMBER_JOBS):
        jd = description()
        jd.executable = "/bin/echo"
        jd.number_of_processes = "4"
        jd.spmd_variation = "mpi" # for serial codes jd.spmd_variation="single"
        jd.arguments = ["$INFRASTRUCTURE"]
        jd.environment = ["INFRASTRUCTURE=FutureGrid"]
        jd.output = "sj-stdout-"+str(i)+".txt"
        jd.error = "sj-stderr-"+str(i)+".txt"
        sj = subjob()
        jobs.append(sj)
        sj.submit_job(bj.pilot_url, jd)
        job_start_times[sj]=time.time()
        job_states[sj] = sj.get_state()

    # busy wait for completion
    while 1:
Пример #15
0
                        queue,
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node)

    print "Pilot Job/BigJob URL: " + bj.get_url() + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jobs = []
    job_start_times = {}
    job_states = {}
    for i in range(0, NUMBER_JOBS):
        jd = description()
        jd.executable = "/bin/date"
        jd.number_of_processes = "1"
        jd.spmd_variation = "single"
        jd.arguments = [""]
        jd.output = "sj-stdout-"+str(i)+".txt"
        jd.error = "sj-stderr-"+str(i)+".txt"

        sj = subjob()
        sj.submit_job(bj.pilot_url, jd)
        jobs.append(sj)
        job_start_times[sj]=time.time()
        job_states[sj] = sj.get_state()

    print "Terminating application. You can reconnect to BJ via the following URL: %s"%bj.get_url()
Пример #16
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue=None # if None default queue is used
    project=None # if None default allocation is used 
    walltime=10
    processes_per_node=4
    number_of_processes = 8
    workingdirectory= os.path.join(os.getcwd(), "agent")
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)

    
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "condor://localhost"

    ##########################################################################################


    input_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "test.txt")
    bj_filetransfers = [input_file +" > test.txt"]
  
    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job( lrms_url,
                        None,
                        number_of_processes,
                        queue,
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node,
                        bj_filetransfers)
    
    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()

    jd.executable = "/bin/cat"
    jd.number_of_processes = "1"
    jd.spmd_variation = "single"
    jd.arguments = ["test.txt"]
    #jd.working_directory = "" 
    jd.output = "sj-stdout.txt"
    jd.error = "sj-stderr.txt"    

    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)
    
    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        bj_state = bj.get_state()
        print "bj state: " + str(bj_state) + " state: " + state
        if(state=="Failed" or state=="Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Пример #17
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue = "normal"  # if None default queue is used
    project = None  # if None default allocation is used
    walltime = 10
    processes_per_node = 4
    number_of_processes = 8
    #workingdirectory=os.path.join(os.getcwd(), "agent")  # working directory for agent
    workingdirectory = "agent"
    userproxy = None  # userproxy (not supported yet due to context issue w/ SAGA)
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:

    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.

    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "ssh://boskop"
    #lrms_url = "sge://localhost"
    #lrms_url = "fork://localhost"

    ##########################################################################################

    # for i in range(99999):
    #     js = saga.job.Service (lrms_url)
    #     j  = js.run_job ("/bin/sleep 1000")
    #     print "%4d: %s" % (i, j.state)

    for i in range(99999):
        print i

        print "Start Pilot Job/BigJob at: " + lrms_url
        bj = bigjob(COORDINATION_URL)
        bj.start_pilot_job(lrms_url, number_of_processes, queue, project,
                           workingdirectory, userproxy, walltime,
                           processes_per_node)

        print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(
            bj.get_state())

        ##########################################################################################
        # Submit SubJob through BigJob
        jd = description()
        jd.executable = "/bin/echo"
        #jd.executable = "$HOME/hello.sh"
        jd.number_of_processes = "1"
        jd.arguments = ["$HELLOWORLD"]
        jd.environment = ['HELLOWORLD=hello_world']
        #jd.spmd_variation = "mpi"

        # specify an optinal working directory if sub-job should be executed outside of bigjob sandbox
        #jd.working_directory = "/tmp"
        jd.output = "stdout.txt"
        jd.error = "stderr.txt"

        sj = subjob()
        sj.submit_job(bj.pilot_url, jd)

        #########################################
        # busy wait for completion
        while 1:
            state = str(sj.get_state())
            print "state: " + state
            if (state == "Failed" or state == "Done"):
                break
            time.sleep(2)

        ##########################################################################################
        # Cleanup - stop BigJob
        bj.cancel()
Пример #18
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue=None # if None default queue is used
    project=None # if None default allocation is used 
    walltime=600
    processes_per_node=12
    number_of_processes=24
    workingdirectory="/lustre/scratch/aluckow/agent"  # working directory for agent
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)

    
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "xt5torque://localhost" # resource url to run the jobs on localhost
   
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job( lrms_url,
                        number_of_processes,
                        queue,
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node)
    
    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()
    jd.executable = "/bin/hostname"
    jd.number_of_processes = "1"
    jd.spmd_variation = "single"
    jd.arguments = [""]
    #jd.working_directory = "/tmp" 
    jd.output = "stdout.txt"
    jd.error = "stderr.txt"

    sjs = []
    for i in range(0,24):
        sj = subjob()
        sj.submit_job(bj.pilot_url, jd)
        sjs.append(sj)

    
    
    
    #########################################
    # busy wait for completion
    while 1:
        for idx, sj in enumerate(sjs):
            state = str(sj.get_state())
            print "sj: %d state: %s"%(idx,state)

        if(state=="Failed" or state=="Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue=None # if None default queue is used
    project=None # if None default allocation is used 
    walltime=10
    processes_per_node=4
    number_of_processes = 8
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)
    
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "sge-ssh://lonestar.tacc.teragrid.org"
    
    """
        To use Globus Online the working directory must be specified using the following conventions
    """ 
    workingdirectory="go://"+GLOBUS_ONLINE_USER+":"+GLOBUS_ONLINE_PASSWORD+"@globusonline.org?ep=xsede#lonestar4&path=~/bigjob/"
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    
    bj_filetransfers = ["go://"+GLOBUS_ONLINE_USER+":"+GLOBUS_ONLINE_PASSWORD+"@globusonline.org?ep=drelu#MacBook&path=" + os.path.dirname(os.path.abspath(__file__)) 
                        + "/test.txt > BIGJOB_WORK_DIR"]
    
    
    bj.start_pilot_job( lrms_url,
                        None,
                        number_of_processes,
                        queue,
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node,
                        bj_filetransfers)
    
    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()
    jd.executable = "/bin/cat"
    jd.number_of_processes = "1"
    jd.spmd_variation = "single"
    jd.arguments = ["test.txt"]
    jd.output = "stdout.txt"
    jd.error = "stderr.txt"
    jd.file_transfer = ["go://"+GLOBUS_ONLINE_USER+":"+GLOBUS_ONLINE_PASSWORD+"@globusonline.org?ep=drelu#MacBook&path=" + os.path.dirname(os.path.abspath(__file__)) 
                       + "/test.txt > SUBJOB_WORK_DIR"]
    
    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)
    
    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        print "state: " + state
        if(state=="Failed" or state=="Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Пример #20
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue=None # if None default queue is used
    project=None # if None default allocation is used 
    walltime=10
    processes_per_node=4
    number_of_processes = 8
    workingdirectory=os.path.join(os.getcwd(), "agent")  # working directory for agent
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)

    
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    #lrms_url = "fork://localhost" # resource url to run the jobs on localhost
    lrms_url = "condorg://brgw1.renci.org:2119/jobmanager-pbs"

    #lrms_url = "ssh://[email protected]" 
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job( lrms_url,
                        None,
                        number_of_processes,
                        queue,
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node)
    
    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()

    jd.executable = "/bin/date"
    jd.number_of_processes = "1"
    jd.spmd_variation = "single"
    #jd.arguments = ["match -f  bgr1.fa -A 0  -r reads_1.fastq -n 4 -T /tmp/ > bfast.matches.file.bgr.1.bmf"]
    jd.arguments = [""]
    #jd.working_directory = "" 
    jd.output = "bfast-stdout.txt"
    jd.error = "bfast-stderr.txt"    

    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)
    
    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        print "state: " + state
        if(state=="Failed" or state=="Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Пример #21
0
def main():
    try:
        print "ManyJob load test with " + str(NUMBER_JOBS) + " jobs."
        starttime=time.time()

        """ submit via mj abstraction
         
         resource_list.append( {"resource_url" : "gram://eric1.loni.org/jobmanager-pbs", "processes_per_node":"4",
                               "number_of_processes" : "4", "allocation" : None, "queue" : "workq", 
                               "working_directory": (os.getcwd() + "/agent"), "walltime":10 })

        """
        resource_list = []
        resource_dictionary = {"resource_url" : "fork://localhost/", "number_of_processes" : "32", 
                               "processes_per_node":"1", "allocation" : None, "queue" : None, 
                               "working_directory": (os.getcwd() + "/agent"), "walltime":3600 }
        resource_list.append(resource_dictionary)
        
        
        #Flags for controlling dynamic BigJob
        add_additional_resources=True
        remove_additional_resources=False
        

        print "Create Dynamic BigJob Service "
        mjs = many_job_service(resource_list, COORDINATION_URL)
        
        jobs = []
        job_start_times = {}
        job_states = {}
        cwd = os.getcwd()
        for i in range(0, NUMBER_JOBS):
            # create job description
            jd = description()
            jd.executable = "/bin/date"
            jd.number_of_processes = "1"
            jd.spmd_variation = "single"
            jd.arguments = [""]
            jd.working_directory = os.getcwd();
            jd.output =  "stdout-" + str(i) + ".txt"
            jd.error = "stderr-" + str(i) + ".txt"
            subjob = mjs.create_job(jd)
            subjob.run()
            print "Submited sub-job " + "%d"%i + "."
            jobs.append(subjob)
            job_start_times[subjob]=time.time()
            job_states[subjob] = subjob.get_state()
        print "************************ All Jobs submitted ************************"
        while 1: 
            finish_counter=0
            result_map = {}
            for i in range(0, NUMBER_JOBS):
                old_state = job_states[jobs[i]]
                state = jobs[i].get_state()
                if result_map.has_key(state) == False:
                    result_map[state]=0
                result_map[state] = result_map[state]+1
                #print "counter: " + str(i) + " job: " + str(jobs[i]) + " state: " + state
                if old_state != state:
                    print "Job " + str(jobs[i]) + " changed from: " + old_state + " to " + state
                if old_state != state and has_finished(state)==True:
                    print "Job: " + str(jobs[i]) + " Runtime: " + str(time.time()-job_start_times[jobs[i]]) + " s."
                if has_finished(state)==True:
                    finish_counter = finish_counter + 1
                job_states[jobs[i]]=state
                
            # Dynamic BigJob add resources at runtime
            # if more than 30 s - add additional resource
            if time.time()-starttime > 10 and add_additional_resources==True:
                print "***add additional resources***"
                mjs.add_resource(resource_dictionary)
                add_additional_resources=False  
                
            # remove resources from dynamic bigjob
            if (time.time()-starttime > 15 and remove_additional_resources==True):
                bj_list = mjs.get_resources()
                if len(bj_list)>0:
                    print "***remove resources: " + str(bj_list[0])
                    mjs.remove_resource(bj_list[0])
                remove_additional_resources=False
                
            print "Current states: " + str(result_map) 
            time.sleep(5)
            if finish_counter == NUMBER_JOBS:
                break

        mjs.cancel()
        runtime = time.time()-starttime
        print "Runtime: " + str(runtime) + " s; Runtime per Job: " + str(runtime/NUMBER_JOBS)
    except:
        traceback.print_exc(file=sys.stdout)
        try:
            mjs.cancel()
        except:
            pass
Пример #22
0
def main():
    try:
        print "ManyJob load test with " + str(NUMBER_JOBS) + " jobs."
        starttime = time.time()
        """ submit via mj abstraction
         
         resource_list.append( {"resource_url" : "gram://eric1.loni.org/jobmanager-pbs", "processes_per_node":"4",
                               "number_of_processes" : "4", "allocation" : None, "queue" : "workq", 
                               "working_directory": (os.getcwd() + "/agent"), "walltime":10 })

        """
        resource_list = []
        resource_dictionary = {
            "resource_url": "fork://localhost/",
            "number_of_processes": "32",
            "processes_per_node": "1",
            "allocation": None,
            "queue": None,
            "working_directory": (os.getcwd() + "/agent"),
            "walltime": 3600
        }
        resource_list.append(resource_dictionary)

        #Flags for controlling dynamic BigJob
        add_additional_resources = True
        remove_additional_resources = False

        print "Create Dynamic BigJob Service "
        mjs = many_job_service(resource_list, COORDINATION_URL)

        jobs = []
        job_start_times = {}
        job_states = {}
        cwd = os.getcwd()
        for i in range(0, NUMBER_JOBS):
            # create job description
            jd = description()
            jd.executable = "/bin/date"
            jd.number_of_processes = "1"
            jd.spmd_variation = "single"
            jd.arguments = [""]
            jd.working_directory = os.getcwd()
            jd.output = "stdout-" + str(i) + ".txt"
            jd.error = "stderr-" + str(i) + ".txt"
            subjob = mjs.create_job(jd)
            subjob.run()
            print "Submited sub-job " + "%d" % i + "."
            jobs.append(subjob)
            job_start_times[subjob] = time.time()
            job_states[subjob] = subjob.get_state()
        print "************************ All Jobs submitted ************************"
        while 1:
            finish_counter = 0
            result_map = {}
            for i in range(0, NUMBER_JOBS):
                old_state = job_states[jobs[i]]
                state = jobs[i].get_state()
                if result_map.has_key(state) == False:
                    result_map[state] = 0
                result_map[state] = result_map[state] + 1
                #print "counter: " + str(i) + " job: " + str(jobs[i]) + " state: " + state
                if old_state != state:
                    print "Job " + str(
                        jobs[i]
                    ) + " changed from: " + old_state + " to " + state
                if old_state != state and has_finished(state) == True:
                    print "Job: " + str(jobs[i]) + " Runtime: " + str(
                        time.time() - job_start_times[jobs[i]]) + " s."
                if has_finished(state) == True:
                    finish_counter = finish_counter + 1
                job_states[jobs[i]] = state

            # Dynamic BigJob add resources at runtime
            # if more than 30 s - add additional resource
            if time.time(
            ) - starttime > 10 and add_additional_resources == True:
                print "***add additional resources***"
                mjs.add_resource(resource_dictionary)
                add_additional_resources = False

            # remove resources from dynamic bigjob
            if (time.time() - starttime > 15
                    and remove_additional_resources == True):
                bj_list = mjs.get_resources()
                if len(bj_list) > 0:
                    print "***remove resources: " + str(bj_list[0])
                    mjs.remove_resource(bj_list[0])
                remove_additional_resources = False

            print "Current states: " + str(result_map)
            time.sleep(5)
            if finish_counter == NUMBER_JOBS:
                break

        mjs.cancel()
        runtime = time.time() - starttime
        print "Runtime: " + str(runtime) + " s; Runtime per Job: " + str(
            runtime / NUMBER_JOBS)
    except:
        traceback.print_exc(file=sys.stdout)
        try:
            mjs.cancel()
        except:
            pass