def main(): try: ctx = saga.Context("x509") ctx.user_proxy = '/Users/mark/proj/myproxy/xsede.x509' session = saga.Session() session.add_context(ctx) # open home directory on a remote machine #remote_dir = saga.filesystem.Directory('sftp://hotel.futuregrid.org/opt/', #remote_dir = saga.filesystem.Directory('go://netbook/', session=session) #remote_dir = saga.filesystem.Directory('go://marksant#netbook/~/tmp/go') remote_dir = saga.filesystem.Directory('go://xsede#stampede/~/tmp/go/') for entry in remote_dir.list(): if remote_dir.is_dir(entry): print "d %12s %s" % (remote_dir.get_size(entry), entry) elif remote_dir.is_link(entry): print "l %12s %s" % (remote_dir.get_size(entry), entry) elif remote_dir.is_file(entry): print "- %12s %s" % (remote_dir.get_size(entry), entry) else: print 'Other taste ....: %s' % entry return 0 except saga.SagaException as ex: # Catch all saga exceptions print "An exception occured: (%s) %s " % (ex.type, (str(ex))) # Trace back the exception. That can be helpful for debugging. print " \n*** Backtrace:\n %s" % ex.traceback return -1
def main(): try: # Your ssh identity on the remote machine. ctx = saga.Context("ssh") # Change e.g., if you have a differnent username on the remote machine #ctx.user_id = "your_ssh_username" session = saga.Session() session.add_context(ctx) # open home directory on a remote machine remote_dir = saga.filesystem.Directory( 'sftp://stampede.tacc.xsede.org/etc/', session=session) # copy .bash_history to /tmp/ on the local machine remote_dir.copy('hosts', 'file://localhost/tmp/') # list 'h*' in local /tmp/ directory local_dir = saga.filesystem.Directory('file://localhost/tmp/') for entry in local_dir.list(pattern='h*'): print entry return 0 except saga.SagaException, ex: # Catch all saga exceptions print "An exception occured: (%s) %s " % (ex.type, (str(ex))) # Trace back the exception. That can be helpful for debugging. print " \n*** Backtrace:\n %s" % ex.traceback return -1
def __init__(self, i_arch, i_project, i_user, i_serviceUrl, i_env, i_maxJobs): try: # set architecture self.m_architecture = i_arch # set project self.m_project = i_project # set environment self.m_environment = i_env # set maximum number of jobs self.m_maxJobs = i_maxJobs # create context self.m_context = saga.Context('ssh') self.m_context.user_id = i_user # create saga session self.m_session = saga.Session() self.m_session.add_context(self.m_context) # create service self.m_service = saga.job.Service(i_serviceUrl, session=self.m_session) # catch saga exceptions except saga.SagaException, i_ex: self.sagaEx(i_ex)
def start(self): """Start the process""" #self.user.server.port = random_port() cmd = [] #env = self.user_env(self.env) cmd.extend(self.cmd) cmd.extend(self.get_args()) self.log.info("Spawning %r", cmd) ctx = saga.Context("ssh") session = saga.Session() session.add_context(ctx) js = saga.job.Service("pbs+ssh://gordon.sdsc.edu", session=session) jd = saga.job.Description() jd.environment = {'MYOUTPUT': '"Hello from SAGA"'} jd.executable = cmd[0] jd.arguments = cmd[1:] jd.output = "mysagajob.stdout" jd.error = "mysagajob.stderr" jd.wall_time_limit = 5 self.job = js.create_job(jd) self.job.run()
def main(): try: # Your ssh identity on the remote machine. ctx = saga.Context("ssh") ctx.user_id = getpass.getuser() # Change if necessary session = saga.Session() session.add_context(ctx) # open home directory on a remote machine remote_dir = saga.filesystem.Directory( 'sftp://hotel.futuregrid.org/opt/', session=session) for entry in remote_dir.list(): if remote_dir.is_dir(entry): print "d %12s %s" % (remote_dir.get_size(entry), entry) else: print "- %12s %s" % (remote_dir.get_size(entry), entry) return 0 except saga.SagaException, ex: # Catch all saga exceptions print "An exception occured: (%s) %s " % (ex.type, (str(ex))) # Trace back the exception. That can be helpful for debugging. print " \n*** Backtrace:\n %s" % ex.traceback return -1
def process_dependencies(dependencies, machine_parameters): i = 0 for x in dependencies: ctx = saga.Context("ssh") ctx.user_id = "vshah505" session = saga.Session() session.add_context(ctx) flag = 1 for files in x: if flag == 1: service = machine_parameters[i][1] service = service[(service.find('/') + 2):] flag = 0 else: #print("file://localhost" + os.getcwd() + "/" + files) #print("file://"+service+machine_parameters[i][2]) f = saga.filesystem.File("file://localhost" + os.getcwd() + "/" + files, session=session) f.copy("sftp://" + service + machine_parameters[i][2] + files) print "Successful copy" i += 1
def main(): try: # Your ssh identity on the remote machine. Change if necessary. ctx = saga.Context("ssh") #ctx.user_id = "oweidner" #ctx.user_key = "/Users/oweidner/.ssh/sagaproj_rsa" session = saga.Session() session.add_context(ctx) # Create a job service object that represent the local machine. # The keyword 'fork://' in the url scheme triggers the 'shell' adaptor # which can execute jobs on the local machine as well as on a remote # machine via "ssh://hostname". You can use 'localhost' or replace # it with the name/address of a machien you have ssh access to. js = saga.job.Service("ssh://localhost", session=session) # describe our job jd = saga.job.Description() # Next, we describe the job we want to run. A complete set of job # description attributes can be found in the API documentation. jd.environment = {'MYOUTPUT': '"Hello from SAGA"'} jd.executable = '/bin/echo' jd.arguments = ['$MYOUTPUT'] jd.output = "mysagajob.stdout" jd.error = "mysagajob.stderr" # Create a new job from the job description. The initial state of # the job is 'New'. myjob = js.create_job(jd) # Check our job's id and state print "Job ID : %s" % (myjob.id) print "Job State : %s" % (myjob.state) print "\n...starting job...\n" # Now we can start our job. myjob.run() print "Job ID : %s" % (myjob.id) print "Job State : %s" % (myjob.state) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.state) print "Exitcode : %s" % (myjob.exit_code) return 0 except saga.SagaException, ex: # Catch all saga exceptions print "An exception occured: (%s) %s " % (ex.type, (str(ex))) # Trace back the exception. That can be helpful for debugging. print " \n*** Backtrace:\n %s" % ex.traceback return -1
def _get_session(): global session if session is None: ctx = saga.Context("ssh") ctx.user_id = USER session = saga.Session(default=False) session.add_context(ctx) return session
def connect(self, machine): """Stablish SSH connection with remote cluster""" try: ctx = saga.Context(self.config.get_server(machine).get_protocol()) ctx.user_id = self.config.get_server(machine).get_user() session = saga.Session() session.add_context(ctx) except saga.SagaException, ex: logging.error("Job: ", str(ex))
def create_session(): """ Creates and returns a new SAGA session """ ctx = saga.Context("UserPass") ctx.user_id = USER ctx.user_pass = PASSWORD session = saga.Session() session.add_context(ctx) return session
def get_proxy(): logger = logging.getLogger('periodic_tasks_logger') getRotatingFileHandler(logger, 'celery.get_proxy.log') proxy_local = '/tmp/x509up_u%s' % os.geteuid() proxy_user_id = settings.PROXY_USER_ID proxy_password = settings.PROXY_PASSWORD try: ctx = saga.Context("UserPass") ctx.user_id = proxy_user_id # remote login name ctx.user_pass = proxy_password # password if os.path.isfile(proxy_local): old_proxy = os.stat(proxy_local).st_mtime logger.info("Current proxy: %s" % time.ctime(old_proxy)) logger.info('connect to pandawms') session = saga.Session() session.add_context(ctx) js = saga.job.Service("ssh://pandawms.jinr.ru", session=session) jd = saga.job.Description() jd.executable = "voms-proxy-init -voms vo.compass.cern.ch:/vo.compass.cern.ch/Role=production --valid 96:00 -q -old --out /home/virthead/x509up_u500 -pwstdin < proxy/gp" jd.output = "/home/virthead/proxy/GetProxy.stdout" # full path to remote stdout jd.error = "/home/virthead/proxy/GetProxy.stderr" # full path to remote srderr myjob = js.create_job(jd) myjob.run() myjob.wait() old_proxy = 0.0 outfilesource = 'sftp://pandawms.jinr.ru/home/virthead/x509up_u500' # path to proxy outfiletarget = 'file://localhost%s' % proxy_local logger.info('start loading proxy') load = True while load: out = saga.filesystem.File(outfilesource, session=session) out.copy(outfiletarget) new_proxy = os.stat(proxy_local).st_mtime if new_proxy > old_proxy: load = False logger.info('proxy loaded') new_proxy = os.stat(proxy_local).st_mtime logger.info("New proxy: %s" % time.ctime(new_proxy)) return 0 except saga.SagaException, ex: # Catch all saga exceptions logger.exception("An exception occured: (%s) %s " % (ex.type, (str(ex)))) # Trace back the exception. That can be helpful for debugging. logger.exception(" \n*** Backtrace:\n %s" % ex.traceback) return -1
def get_osg_task_status(task_id): # Your ssh identity on the remote machine. ctx = saga.Context("ssh") ctx.user_id = getpass.getuser() # Change if necessary session = saga.Session() session.add_context(ctx) js = saga.job.Service(CONDOR_URL, session=session) sleebjob_clone = js.get_job(task_id) return sleebjob_clone.state
def start_pilot(cr=None): """ In order to start a pilot on the newly created CR, we need to define a resource description for that CR. To do so, we programatically create a clone of the local.localhost description, and replace the job submission URL with an ssh:// URL pointing to the CR. """ if not cr: class _CR(object): def __init__(self): self.access = 'ssh://remote.host.net:1234/' cr = _CR() # get the local resource config session = rp.Session() cfg = session.get_resource_config('local.localhost') # create a new config based on the local one, and add it back new_cfg = rp.ResourceConfig('ec2.vm', cfg) new_cfg.schemas = ['ssh'] new_cfg['ssh']['job_manager_endpoint'] = cr.access new_cfg['ssh']['filesystem_endpoint'] = cr.access # the new config needs to make sure we can bootstrap on the VM new_cfg['pre_bootstrap_1'] = [ 'sudo apt-get update', 'sudo apt-get install -y python-virtualenv python-dev dnsutils bc' ] session.add_resource_config(new_cfg) # use the *same* ssh key for ssh access to the VM ssh_ctx = rs.Context('SSH') ssh_ctx.user_id = 'admin' ssh_ctx.user_key = os.environ['EC2_KEYPAIR'] session.contexts.append(ssh_ctx) # submit a pilot to it. pd = rp.ComputePilotDescription() pd.resource = 'ec2.vm' pd.runtime = 10 pd.cores = 1 pd.exit_on_error = True, pmgr = rp.PilotManager(session=session) return pmgr.submit_pilots(pd)
def start_run_osg_task(task_id): # Your ssh identity on the remote machine. ctx = saga.Context("ssh") ctx.user_id = getpass.getuser() # Change if necessary session = saga.Session() session.add_context(ctx) js = saga.job.Service(CONDOR_URL, session=session) jd = saga.job.Description() jd.name = 'testjob' jd.project = 'TG-MCB090174' jd.environment = {'RUNTIME': '/etc/passwd'} jd.wall_time_limit = 2 # minutes jd.executable = '/bin/cat' jd.arguments = ["$RUNTIME"] jd.output = "saga_condorjob.stdout" jd.error = "saga_condorjob.stderr" # jd.candidate_hosts = ["FNAL_FERMIGRID", "cinvestav", "SPRACE", # "NYSGRID_CORNELL_NYS1", "Purdue-Steele", # "MIT_CMS_CE2", "SWT2_CPB", "AGLT2_CE_2", # "UTA_SWT2", "GridUNESP_CENTRAL", # "USCMS-FNAL-WC1-CE3"] # create the job (state: New) sleepjob = js.create_job(jd) # check our job's id and state print("Job ID : %s" % (sleepjob.id)) print("Job State : %s" % (sleepjob.state)) print("\n...starting job...\n") sleepjob.run() print("Job ID : %s" % (sleepjob.id)) print("Job State : %s" % (sleepjob.state))
def main(): try: c = saga.Context ('ssh') c.user_id = 'tg12736' c.user_cert = '/home/user/ssh/id_rsa_xsede' # private key derived from cert s = saga.Session (default=False) # create session with no contexts s.add_context (c) js = saga.job.Service ('ssh://login1.stampede.tacc.utexas.edu', session=s) js.run_job ("/bin/true") except saga.SagaException, ex: # Catch all saga exceptions print "An exception occured: (%s) %s " % (ex.type, (str(ex))) # Trace back the exception. That can be helpful for debugging. print " \n*** Backtrace:\n %s" % ex.traceback return -1
def main(): try: ctx = saga.Context("x509") ctx.user_proxy = '/Users/mark/proj/myproxy/xsede.x509' session = saga.Session() session.add_context(ctx) source = "go://marksant#netbook/Users/mark/tmp/go/" #destination = "go://xsede#stampede/~/tmp/" #destination = "go://gridftp.stampede.tacc.xsede.org/~/tmp/" destination = "go://oasis-dm.sdsc.xsede.org/~/tmp/" #destination = "go://ncsa#BlueWaters/~/tmp/" filename = "my_file" # open home directory on a remote machine source_dir = saga.filesystem.Directory(source) # copy .bash_history to /tmp/ on the local machine source_dir.copy(filename, destination) # list 'm*' in local /tmp/ directory dest_dir = saga.filesystem.Directory(destination) for entry in dest_dir.list(pattern='%s*' % filename[0]): print entry dest_file = saga.filesystem.File(os.path.join(destination, filename)) assert dest_file.is_file() == True assert dest_file.is_link() == False assert dest_file.is_dir() == False print 'Size: %d' % dest_file.get_size() return 0 except saga.SagaException as ex: # Catch all saga exceptions print "An exception occured: (%s) %s " % (ex.type, (str(ex))) # Trace back the exception. That can be helpful for debugging. print " \n*** Backtrace:\n %s" % ex.traceback return -1
def __init__(self, cfg_file): # initialize configuration. We only use the 'saga.tests' category from # the config file. rut.TestConfig.__init__(self, cfg_file, 'saga.tests') # setup a saga session for the tests # don't populate session with default contexts... self.session = saga.Session(default=False) # attempt to create a context from the test config if self.context_type: c = saga.Context(self.context_type) c.user_id = self.context_user_id c.user_pass = self.context_user_pass c.user_cert = self.context_user_cert c.user_proxy = self.context_user_proxy # add it to the session self.session.add_context(c)
def __init__(self, platform_config): ''' Constructor ''' super(JobDeploymentEC2, self).__init__(platform_config) # Here we set up Apache libcloud with the necessary config obtained # from the job config # Prepare the necessary config information from the job config object. #host = self.platform_config.platform_service_host #port = self.platform_config.platform_service_port access_key = self.platform_config.access_key secret_key = self.platform_config.secret_key region = self.platform_config.service_region VERIFY_SSL_CERT = False if region in self.REGION_MAPPINGS.keys(): EC2 = get_driver(self.REGION_MAPPINGS[region]) else: EC2 = get_driver(Provider.EC2_EU_WEST) self.driver = EC2(access_key, secret_key, secure=True) # SAGA Session is pre-created by superclass # Prepare the job security context and store it - this will allow # access to the node(s) for running the job. # We add this to the session if/when required. self.job_ctx = saga.Context("ssh") self.job_ctx.user_id = self.platform_config.user_id self.job_ctx.user_key = self.platform_config.user_key_file self.admin_ctx = None LOG.debug('Set up security context for job account...')
def main(): try: # Your ssh identity on the remote machine. ctx = saga.Context("ssh") # Change e.g., if you have a differnent username on the remote machine #ctx.user_id = "your_ssh_username" session = saga.Session() session.add_context(ctx) # Create a job service object that represent a remote pbs cluster. # The keyword 'pbs' in the url scheme triggers the PBS adaptors # and '+ssh' enables PBS remote access via SSH. js = saga.job.Service("lsf://localhost", session=session) # Next, we describe the job we want to run. A complete set of job # description attributes can be found in the API documentation. jd = saga.job.Description() jd.environment = {'FILENAME': 'testfile'} jd.wall_time_limit = 1 # minutes jd.executable = '/bin/touch' jd.arguments = ['$FILENAME'] jd.total_cpu_count = 16 jd.queue = "regular" jd.project = "URTG0003" jd.working_directory = "$HOME/A/B/C" jd.output = "examplejob.out" jd.error = "examplejob.err" # Create a new job from the job description. The initial state of # the job is 'New'. touchjob = js.create_job(jd) # Register our callback. We want it to 'fire' on job state change touchjob.add_callback(saga.STATE, job_state_change_cb) # Check our job's id and state print "Job ID : %s" % (touchjob.id) print "Job State : %s" % (touchjob.state) # Now we can start our job. print "\n...starting job...\n" touchjob.run() print "Job ID : %s" % (touchjob.id) # List all jobs that are known by the adaptor. # This should show our job as well. #print "\nListing active jobs: " #for job in js.list(): # print " * %s" % job # wait for our job to complete print "\n...waiting for job...\n" touchjob.wait() print "Job State : %s" % (touchjob.state) print "Exitcode : %s" % (touchjob.exit_code) print "Exec. hosts : %s" % (touchjob.execution_hosts) print "Create time : %s" % (touchjob.created) print "Start time : %s" % (touchjob.started) print "End time : %s" % (touchjob.finished) js.close() return 0 except saga.SagaException, ex: # Catch all saga exceptions print "An exception occurred: (%s) %s " % (ex.type, (str(ex))) # Get the whole traceback in case of an exception - # this can be helpful for debugging the problem print " \n*** Backtrace:\n %s" % ex.traceback return -1
def start_cr(): """ We use SAGA to start a VM (called Compute Resource (cr) in this context) """ # In order to connect to EC2, we need an EC2 ID and KEY. We read those # from the environment. ec2_ctx = rs.Context('EC2') ec2_ctx.user_id = os.environ['EC2_ACCESS_KEY'] ec2_ctx.user_key = os.environ['EC2_SECRET_KEY'] # The SSH keypair we want to use the access the EC2 VM. If the keypair is # not yet registered on EC2 saga will register it automatically. This # context specifies the key for VM startup, ie. the VM will be configured to # accept this key ec2keypair_ctx = rs.Context('EC2_KEYPAIR') ec2keypair_ctx.token = os.environ['EC2_KEYPAIR_ID'] ec2keypair_ctx.user_key = os.environ['EC2_KEYPAIR'] ec2keypair_ctx.user_id = 'admin' # the user id on the target VM # We specify the *same* ssh key for ssh access to the VM. That now should # work if the VM go configured correctly per the 'EC2_KEYPAIR' context # above. ssh_ctx = rs.Context('SSH') ssh_ctx.user_id = 'admin' ssh_ctx.user_key = os.environ['EC2_KEYPAIR'] session = rs.Session(False) # FALSE: don't use other (default) contexts session.contexts.append(ec2_ctx) session.contexts.append(ec2keypair_ctx) session.contexts.append(ssh_ctx) cr = None # compute resource handle rid = None # compute resource ID try: # ---------------------------------------------------------------------- # # reconnect to VM (ID given in ARGV[1]) # if len(sys.argv) > 1: rid = sys.argv[1] # reconnect to the given resource print 'reconnecting to %s' % rid cr = rs.resource.Compute(id=rid, session=session) print 'reconnected to %s' % rid print " state : %s (%s)" % (cr.state, cr.state_detail) # ---------------------------------------------------------------------- # # start a new VM # else: # start a VM if needed # in our session, connect to the EC2 resource manager rm = rs.resource.Manager("ec2://aws.amazon.com/", session=session) # Create a resource description with an image and an OS template,. # We pick a small VM and a plain Ubuntu image... cd = rs.resource.ComputeDescription() cd.image = 'ami-e6eeaa8e' # plain debain wheezy cd.template = 'Small Instance' # Create a VM instance from that description. cr = rm.acquire(cd) print "\nWaiting for VM to become active..." # ---------------------------------------------------------------------- # # use the VM # # Wait for the VM to 'boot up', i.e., become 'ACTIVE' cr.wait(rs.resource.ACTIVE) # Query some information about the newly created VM print "Created VM: %s" % cr.id print " state : %s (%s)" % (cr.state, cr.state_detail) print " access : %s" % cr.access # give the VM some time to start up comlpetely, otherwise the subsequent # job submission might end up failing... time.sleep(60) return cr except Exception as e: # Catch all other exceptions print "An Exception occured: %s " % e raise
__author__ = "Andre Merzky" __copyright__ = "Copyright 2012-2013, The SAGA Project" __license__ = "MIT" import time import saga def my_cb(a, b, c): print " ----- callback: [%s, %s, %s]" % (a, b, c) return True try: c = saga.Context('ssh') c.user_key = '/home/merzky/.ssh/id_rsa_test' c.user_id = 'tester' c.user_pass = '******' s = saga.Session(default=True) # s.add_context (c) # js = saga.job.Service ('gsissh://gsissh.kraken.nics.xsede.org', session=s) # js = saga.job.Service ('ssh://localhost/', session=s) js = saga.job.Service('ssh://india.futuregrid.org/', session=s) jd = saga.job.Description() jd.executable = '/bin/echo' jd.arguments = ['hello world; date ; sleep 3'] # jd.output = "/tmp/out" # jd.error = "/tmp/err"
def __init__(self, addr): self.addr = addr self.ctx = saga.Context("ssh") self.ctx.user_id = "dilawar" self.session = saga.Session() self.session.add_context(self.ctx)
def main(): try: # Your ssh identity on the remote machine ctx = saga.Context("ssh") ctx.user_id = "oweidner" session = saga.Session() # session.add_context(ctx) # Create a job service object that represent the local machine. # The keyword 'fork://' in the url scheme triggers the 'shell' adaptor # which can execute jobs on the local machine as well as on a remote # machine via "ssh://hostname". js = saga.job.Service("ssh://%s" % REMOTE_HOST, session=session) # describe our job jd = saga.job.Description() # Next, we describe the job we want to run. A complete set of job # description attributes can be found in the API documentation. jd.environment = {'MYOUTPUT':'"Hello from SAGA"'} jd.executable = '/bin/echo' jd.arguments = ['$MYOUTPUT'] jd.output = "/tmp/mysagajob-%s.stdout" % getpass.getuser() jd.error = "/tmp/mysagajob-%s.stderr" % getpass.getuser() # Create a new job from the job description. The initial state of # the job is 'New'. myjob = js.create_job(jd) # Check our job's id and state print "Job ID : %s" % (myjob.id) print "Job State : %s" % (myjob.state) print "\n...starting job...\n" # Now we can start our job. myjob.run() print "Job ID : %s" % (myjob.id) print "Job State : %s" % (myjob.state) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.state) print "Exitcode : %s" % (myjob.exit_code) outfilesource = 'sftp://%s/tmp/mysagajob-%s.stdout' % (REMOTE_HOST, getpass.getuser()) outfiletarget = "file://%s/" % os.getcwd() out = saga.filesystem.File(outfilesource, session=session) out.copy(outfiletarget) print "Staged out %s to %s (size: %s bytes)" % (outfilesource, outfiletarget, out.get_size()) return 0 except saga.SagaException, ex: # Catch all saga exceptions print "An exception occured: (%s) %s " % (ex.type, (str(ex))) # Trace back the exception. That can be helpful for debugging. print " \n*** Backtrace:\n %s" % ex.traceback return -1
REMOTE_FILE_ENDPOINT = "sftp://" + REMOTE_HOST + "/" + REMOTE_DIR # the dimension (in pixel) of the whole fractal imgx = 2048 imgy = 2048 # the number of tiles in X and Y direction tilesx = 2 tilesy = 2 #----------------------------------------------------------------------------- # if __name__ == "__main__": try: # Your ssh identity on the remote machine ctx = saga.Context("ssh") #ctx.user_id = "" session = saga.Session() session.add_context(ctx) # list that holds the jobs jobs = [] # create a working directory in /scratch dirname = '%s/mbrot/' % (REMOTE_FILE_ENDPOINT) workdir = saga.filesystem.Directory(dirname, saga.filesystem.CREATE, session=session) # copy the executable and warpper script to the remote host
def main(): try: # Your ssh identity on the remote machine ctx = saga.Context("ssh") ctx.user_id = "your_username" session = saga.Session() session.add_context(ctx) # Create a job service object that represent a remote loadleveler cluster. # The keyword 'loadl' in the url scheme triggers the LoadLeveler adaptors # and '+ssh' enables LoadLeveler remote access via SSH. # and 'cluster' URL query specify loadleveler cluster name. "llq -X cluster" js = saga.job.Service("loadl+ssh://%s?cluster=your_cluster_name" % REMOTE_HOST, session=session) # describe our job jd = saga.job.Description() # Next, we describe the job we want to run. A complete set of job # description attributes can be found in the API documentation. jd.environment = {'MYOUTPUT': '"Hello LoadLevler Adaptor from SAGA"'} jd.executable = '/bin/echo' jd.arguments = ['$MYOUTPUT'] jd.output = "/tmp/mysagajob.stdout" jd.error = "/tmp/mysagajob.stderr" # Create a new job from the job description. The initial state of # the job is 'New'. myjob = js.create_job(jd) # Check our job's id and state print "Job ID : %s" % (myjob.id) print "Job State : %s" % (myjob.state) print "\n...starting job...\n" # Now we can start our job. myjob.run() print "Job ID : %s" % (myjob.id) print "Job State : %s" % (myjob.state) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job ID : %s" % (myjob.id) print "Job State : %s" % (myjob.state) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.state) print "Exitcode : %s" % (myjob.exit_code) outfilesource = 'sftp://%s/tmp/mysagajob.stdout' % REMOTE_HOST outfiletarget = 'file://localhost/tmp/' out = saga.filesystem.File(outfilesource, session=session) out.copy(outfiletarget) print "Staged out %s to %s (size: %s bytes)\n" % ( outfilesource, outfiletarget, out.get_size()) return 0 except saga.SagaException, ex: # Catch all saga exceptions print "An exception occured: (%s) %s " % (ex.type, (str(ex))) # Trace back the exception. That can be helpful for debugging. print " \n*** Backtrace:\n %s" % ex.traceback return -1
def run(self): request_dict = { "kind": "compute#instance", "disks": [{ "kind": "compute#instanceDisk", "type": "PERSISTENT", "mode": "READ", "deviceName": "reference-genome", "source": "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/disks/reference-genome" }], "networkInterfaces": [{ "kind": "compute#instanceNetworkInterface", "accessConfigs": [{ "name": "External NAT", "type": "ONE_TO_ONE_NAT" }], "network": "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/networks/default" }], "serviceAccounts": [{ "kind": "compute#serviceAccount", "email": "default", "scopes": [ "https://www.googleapis.com/auth/userinfo.email", "https://www.googleapis.com/auth/compute", "https://www.googleapis.com/auth/devstorage.full_control" ] }], #"zone": "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/zones/us-east1-a", "zone": self.location, #"machineType": "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/machine-types/n1-standard-1", "machineType": self.machine_type, "name": self.id, "image": self.image_url } http = httplib2.Http() http = self.credentials.authorize(http) gce = build("compute", "v1beta12", http=http) #result = gce.instances().get(instance="bigjob-pilot", project="bigjob-pilot").execute() gce.instances().insert(project=GCE_PROJECT_ID, body=request_dict).execute() time.sleep(15) # wait for startup #wait for compute instance to become active self.wait_for_running() # spawn BJ agent via SSH compute_instance_details = self.__get_instance_resource() logger.debug("Compute Instance Details: " + str(compute_instance_details)) self.network_ip = compute_instance_details["networkInterfaces"][0][ "accessConfigs"][0]['natIP'] url = "ssh://" + str(self.network_ip) logger.debug("Connect to: %s" % (url)) # Submit job ctx = saga.Context("SSH") #ctx.type = saga.Context.SSH ctx.user_id = self.pilot_compute_description["vm_ssh_username"] ctx.user_key = self.pilot_compute_description["vm_ssh_keyfile"] #js.session.contexts = [ctx] session = saga.Session() session.add_context(ctx) js = saga.job.Service(url, session=session) job = js.create_job(self.job_description) print "Submit pilot job to: " + str(url) TRIAL_MAX = 15 trials = 0 while trials < TRIAL_MAX: try: logger.debug("Attempt: %d, submit pilot job to: %s " % (trials, str(url))) job.run() break except: trials = trials + 1 time.sleep(10) if trials == TRIAL_MAX: raise Exception("Submission of agent failed.") logger.debug("Job State : %s" % (job.get_state())) print "Job State : %s" % (job.get_state())
def main(): try: # Your ssh identity on the remote machine. ctx = saga.Context("ssh") # Change e.g., if you have a differnent username on the remote machine #ctx.user_id = "your_ssh_username" session = saga.Session() session.add_context(ctx) # Create a job service object that represent a remote pbs cluster. # The keyword 'pbs' in the url scheme triggers the SGE adaptors # and '+ssh' enables SGE remote access via SSH. js = saga.job.Service("slurm+ssh://login1.stampede.tacc.utexas.edu", session=session) # Next, we describe the job we want to run. A complete set of job # description attributes can be found in the API documentation. jd = saga.job.Description() jd.environment = {'FILENAME': 'testfile'} jd.wall_time_limit = 1 # minutes jd.executable = '/bin/touch' jd.arguments = ['$FILENAME'] jd.queue = "development" jd.project = "TG-MCB090174" jd.working_directory = "$SCRATCH/A/B/C" jd.output = "examplejob.out" jd.error = "examplejob.err" # Create a new job from the job description. The initial state of # the job is 'New'. touchjob = js.create_job(jd) # Check our job's id and state print "Job ID : %s" % (touchjob.id) print "Job State : %s" % (touchjob.state) # Now we can start our job. print "\n...starting job...\n" touchjob.run() print "Job ID : %s" % (touchjob.id) print "Job State : %s" % (touchjob.state) # List all jobs that are known by the adaptor. # This should show our job as well. print "\nListing active jobs: " for job in js.list(): print " * %s" % job # Now we disconnect and reconnect to our job by using the get_job() # method and our job's id. While this doesn't make a lot of sense # here, disconnect / reconnect can become very important for # long-running job. touchjob_clone = js.get_job(touchjob.id) # wait for our job to complete print "\n...waiting for job...\n" touchjob_clone.wait() print "Job State : %s" % (touchjob_clone.state) print "Exitcode : %s" % (touchjob_clone.exit_code) # print "Exec. hosts : %s" % (touchjob_clone.execution_hosts) # not impl. # print "Create time : %s" % (touchjob_clone.created) # print "Start time : %s" % (touchjob_clone.started) # print "End time : %s" % (touchjob_clone.finished) js.close() return 0 except saga.SagaException, ex: # Catch all saga exceptions print "An exception occured: (%s) %s " % (ex.type, (str(ex))) # Get the whole traceback in case of an exception - # this can be helpful for debugging the problem print " \n*** Backtrace:\n %s" % ex.traceback return -1
def run(self): """ Start VM and start BJ agent via SSH on VM """ """ Map fields of Pilot description to EC2 API { "vm_id":"ami-d7f742be", "vm_ssh_username":"******", "vm_ssh_keyname":"MyKey", "vm_ssh_keyfile":"<path>", "vm_type":"t1.micro", "access_key_id":"xxx", "secret_access_key":"xxx" } """ reservation = self.ec2_conn.run_instances( self.pilot_compute_description["vm_id"], key_name=self.pilot_compute_description["vm_ssh_keyname"], instance_type=self.pilot_compute_description["vm_type"], security_groups=[SECURITY_GROUP]) self.instance = reservation.instances[0] self.instance_id = self.instance.id logger.debug("Started EC2/Eucalyptus/Nova instance: %s" % self.instance_id) time.sleep(5) self.wait_for_running() if self.resource_url.scheme != "euca+ssh" and self.resource_url.scheme != "nova+ssh": self.ec2_conn.create_tags([self.instance_id], {"Name": self.id}) self.network_ip = self.instance.ip_address url = "ssh://" + str(self.network_ip) logger.debug("Connect to: %s" % (url)) # Submit job ctx = saga.Context("SSH") #ctx.type = saga.Context.SSH ctx.user_id = self.pilot_compute_description["vm_ssh_username"] ctx.user_key = self.pilot_compute_description["vm_ssh_keyfile"] session = saga.Session() session.add_context(ctx) TRIAL_MAX = 30 trials = 0 while trials < TRIAL_MAX: try: js = saga.job.Service(url, session=session) logger.debug("Job Description Type: " + str(type(self.job_description))) job = js.create_job(self.job_description) logger.debug("Attempt: %d, submit pilot job to: %s " % (trials, str(url))) job.run() if job.get_state() == saga.job.FAILED: logger.warning("Submission failed.") trials = trials + 1 time.sleep(30) continue else: break except: exc_type, exc_value, exc_traceback = sys.exc_info() logger.warning("Submission failed: " + str(exc_value)) #self.__print_traceback() trials = trials + 1 time.sleep(30) if trials == TRIAL_MAX: raise Exception("Submission of agent failed.") logger.debug("Job State : %s" % (job.get_state()))
# if not 'EC2_URL' in os.environ: usage("no %s in environment" % 'EC2_URL') if not 'EC2_ACCESS_KEY' in os.environ: usage("no %s in environment" % 'EC2_ACCESS_KEY') if not 'EC2_SECRET_KEY' in os.environ: usage("no %s in environment" % 'EC2_SECRET_KEY') if not 'EC2_KEYPAIR_ID' in os.environ: usage("no %s in environment" % 'EC2_KEYPAIR_ID') if not 'EC2_KEYPAIR' in os.environ: usage("no %s in environment" % 'EC2_KEYPAIR') server = saga.Url(os.environ['EC2_URL']) # in order to connect to EC2, we need an EC2 ID and KEY c1 = saga.Context('ec2') c1.user_id = os.environ['EC2_ACCESS_KEY'] c1.user_key = os.environ['EC2_SECRET_KEY'] c1.server = server # in order to access a created VM, we additionally need to point to the ssh # key which is used for EC2 VM contextualization, i.e. as EC2 'keypair'. # If the keypair is not yet registered on EC2, it will be registered by SAGA # -- but then a user_key *must* be specified (only the public key is ever # transfererd to EC2). c2 = saga.Context('ec2_keypair') c2.token = os.environ['EC2_KEYPAIR_ID'] c2.user_cert = os.environ['EC2_KEYPAIR'] c2.user_id = 'ubuntu' # the user id on the target VM c2.server = server
def main(): # In order to connect to EC2, we need an EC2 ID and KEY. We read those # from the environment. ec2_ctx = saga.Context('EC2') ec2_ctx.user_id = os.environ['EC2_ACCESS_KEY'] ec2_ctx.user_key = os.environ['EC2_SECRET_KEY'] # The SSH keypair we want to use the access the EC2 VM. If the keypair is # not yet registered on EC2 saga will register it automatically. This # context specifies the key for VM startup, ie. the VM will be configured to # accept this key ec2keypair_ctx = saga.Context('EC2_KEYPAIR') ec2keypair_ctx.token = os.environ['EC2_KEYPAIR_ID'] ec2keypair_ctx.user_key = os.environ['EC2_KEYPAIR'] ec2keypair_ctx.user_id = 'root' # the user id on the target VM # We specify the *same* ssh key for ssh access to the VM. That now should # work if the VM go configured correctly per the 'EC2_KEYPAIR' context # above. ssh_ctx = saga.Context('SSH') ssh_ctx.user_id = 'root' ssh_ctx.user_key = os.environ['EC2_KEYPAIR'] session = saga.Session(False) # FALSE: don't use other (default) contexts session.contexts.append(ec2_ctx) session.contexts.append(ec2keypair_ctx) session.contexts.append(ssh_ctx) cr = None # compute resource handle rid = None # compute resource ID try: # ---------------------------------------------------------------------- # # reconnect to VM (ID given in ARGV[1]) # if len(sys.argv) > 1: rid = sys.argv[1] # reconnect to the given resource print 'reconnecting to %s' % rid cr = saga.resource.Compute(id=rid, session=session) print 'reconnected to %s' % rid print " state : %s (%s)" % (cr.state, cr.state_detail) # ---------------------------------------------------------------------- # # start a new VM # else: # start a VM if needed # in our session, connect to the EC2 resource manager rm = saga.resource.Manager("ec2://aws.amazon.com/", session=session) # Create a resource description with an image and an OS template,. # We pick a small VM and a plain Ubuntu image... cd = saga.resource.ComputeDescription() cd.image = 'ami-0256b16b' # plain ubuntu cd.template = 'Small Instance' # Create a VM instance from that description. cr = rm.acquire(cd) rid = cr.id print "\nWaiting for VM to become active..." # ---------------------------------------------------------------------- # # use the VM # # Wait for the VM to 'boot up', i.e., become 'ACTIVE' cr.wait(saga.resource.ACTIVE) # Query some information about the newly created VM print "Created VM: %s" % cr.id print " state : %s (%s)" % (cr.state, cr.state_detail) print " access : %s" % cr.access # give the VM some time to start up comlpetely, otherwise the subsequent # job submission might end up failing... time.sleep(60) # create a job service which uses the VM's access URL (cr.access) js = saga.job.Service(cr.access, session=session) jd = saga.job.Description() jd.executable = '/bin/sleep' jd.arguments = ['30'] job = js.create_job(jd) job.run() print "\nRunning Job: %s" % job.id print " state : %s" % job.state job.wait() print " state : %s" % job.state except saga.SagaException, ex: # Catch all saga exceptions print "An exception occured: (%s) %s " % (ex.type, (str(ex))) raise