def post(self): self.response.headers['Content-Type'] = 'application/json' req_type = self.request.get('req_type') job_type = self.request.get('job_type') id = self.request.get('id') instance_type = self.request.get('instance_type') if req_type == 'analyze': logging.info('Analyzing the cost...') #job_type = params['job_type'] logging.info('rerun cost analysis in '+instance_type) credentials = self.user_data.getCredentials() access_key = credentials['EC2_ACCESS_KEY'] secret_key = credentials['EC2_SECRET_KEY'] backend_services = backendservice.backendservices(self.user_data) if not self.user_data.valid_credentials or not backend_services.isOneOrMoreComputeNodesRunning(instance_type): logging.info('You must have at least one active '+instance_type+' compute node to run in the cloud.') self.response.write(json.dumps({ 'status': False, 'msg': 'You must have at least one active '+instance_type+' compute node to run in the cloud.' })) return result = {} try: uuid, _ = self.get_uuid_name(id, job_type) logging.info('start to rerun the job {0} for cost analysis'.format(str(uuid))) # Set up CloudTracker with user credentials and specified UUID to rerun the job ct = CloudTracker(access_key, secret_key, str(uuid), self.user_data.getBucketName()) has_prov = not ct.if_tracking() # If there is no provenance data for this job, report an error to the user if not has_prov: result = {'status':"fail",'msg':"The job with this ID does not exist or cannot be reproduced."} self.response.write(json.dumps(result)) return params = ct.get_input() params['cost_analysis_uuid'] = uuid cloud_result = backend_services.submit_cloud_task(params, agent_type = AgentTypes.EC2, instance_type = instance_type, cost_replay = True) if not cloud_result["success"]: e = cloud_result["exception"] result = { 'status': False, 'msg': 'Cloud execution failed: '+str(e) } return result result = {'status':True,'msg':'Cost analysis submitted successfully.'} except Exception,e: result = {'status':False,'msg':'Cloud execution failed: '+str(e)}
def task(taskid, params, agent, database, storage_agent, access_key, secret_key, task_prefix=""): ''' This is the actual work done by a task worker params should contain at least 'bucketname' ''' uuidstr = taskid res = {} try: bucketname = params['bucketname'] if_tracking = False ct = None if agent == AgentTypes.EC2: logging.info('Trying to track via CloudTracker...') # Initialize cloudtracker with the task's UUID ct = CloudTracker(access_key, secret_key, uuidstr, bucketname) if_tracking = ct.if_tracking() logging.info('This is the first time to execute the job? '.format( if_tracking)) if if_tracking: ct.track_input(params) logging.info('task to be executed at remote location') print 'inside celery task method' data = {'status': 'active', 'message': 'Task Executing in cloud'} # will have prefix for cost-anaylsis job taskid = '{0}{1}'.format(task_prefix, taskid) database.updateEntry(taskid=taskid, data=data, tablename=params["db_table"]) paramstr = params['paramstring'] job_type = params['job_type'] if job_type == 'spatial': create_dir_str = "mkdir -p output/%s/results " % uuidstr os.system(create_dir_str) create_dir_str = "mkdir -p output/%s/result " % uuidstr os.system(create_dir_str) return_code_file = "output/{0}/return_code".format(uuidstr) filename = os.path.join('output', uuidstr, '{0}.xml'.format(uuidstr)) with open(filename, 'w') as f: f.write(params['document']) xmlfilepath = filename stdout = os.path.join('output', uuidstr, 'stdout.log') stderr = os.path.join('output', uuidstr, 'stderr.log') exec_str = '' logging.info('job_type = {}'.format(job_type)) if job_type == 'stochkit': # The following executiong string is of the form : # stochkit_exec_str = "~/StochKit/ssa -m ~/output/%s/dimer_decay.xml -t 20 -i 10 -r 1000" % (uuidstr) exec_str = "{0}/{1} -m {2} --force --out-dir output/{3}/result 2>{4} > {5}".format( TaskConfig.STOCHKIT_DIR, paramstr, xmlfilepath, uuidstr, stderr, stdout) elif job_type == 'stochkit_ode' or job_type == 'sensitivity': logging.info('sensitivity job') exec_str = "{0}/{1} -m {2} --force --out-dir output/{3}/result 2>{4} > {5}".format( TaskConfig.ODE_DIR, paramstr, xmlfilepath, uuidstr, stderr, stdout) elif job_type == 'spatial': cmd = "chown -R {username} output/{uuidstr}".format( username=TaskConfig.USERNAME, uuidstr=uuidstr) print cmd os.system(cmd) exec_str = \ "sudo -E -u {username} {pyurdme_wrapper} {model_xml_file} {output_dir} {sim_algorithm} {sim_realizations} {sim_seed} 2>{stderr} > {stdout}".format( username=TaskConfig.USERNAME, pyurdme_wrapper=TaskConfig.PYURDME_WRAPPER_PATH, model_xml_file=xmlfilepath, output_dir=os.path.join('output', uuidstr, 'results'), sim_algorithm=params['simulation_algorithm'], sim_realizations=params['simulation_realizations'], sim_seed=params['simulation_seed'], stderr=stderr, stdout=stdout) print "=======================" print " Command to be executed : " print "{0}".format(exec_str) print "=======================" print "To test if the command string was correct. Copy the above line and execute in terminal." timestarted = datetime.now() # os.system(exec_str) execute_task(exec_str, return_code_file=return_code_file) timeended = datetime.now() results = os.listdir("output/{0}/result".format(uuidstr)) if 'stats' in results and os.listdir( "output/{0}/result/stats".format(uuidstr)) == ['.parallel']: raise Exception( "The compute node can not handle a job of this size.") filepath = os.path.join('output', uuidstr) absolute_file_path = os.path.abspath(filepath) if agent == AgentTypes.EC2: try: if if_tracking: ct.track_output(absolute_file_path) except Exception, e: print "CloudTracker Error: track_output" print e data = { 'status': 'active', 'message': 'Task finished. Generating output.' } database.updateEntry(taskid=taskid, data=data, tablename=params["db_table"]) diff = timeended - timestarted if task_prefix != "": res['status'] = "finished" res['time_taken'] = "{0} seconds".format(diff.total_seconds()) else: logging.info('Generating output tar file...') output_dir = os.path.join("output", uuidstr) output_tar_filename = os.path.join("output", "{}.tar".format(uuidstr)) create_output_tar_command = \ "tar -zcvf {output_tar_filename} {output_dir}".format(output_tar_filename=output_tar_filename, output_dir=output_dir) logging.info(create_output_tar_command) os.system(create_output_tar_command) output_url = storage_agent.upload_file( filename=output_tar_filename) logging.info('output_url = {}'.format(output_url)) logging.info('Removing xml file...') removefilestr = "rm {0}".format(xmlfilepath) os.system(removefilestr) logging.info('Removing output ...') remove_output_command = \ "rm -r {output_tar_filename} {output_dir}".format(output_tar_filename=output_tar_filename, output_dir=output_dir) os.system(remove_output_command) # if there is some task prefix, meaning that it is cost replay, # update the table another way res['status'] = "finished" res['pid'] = uuidstr res['output'] = output_url res['time_taken'] = "{0} seconds".format(diff.total_seconds()) database.updateEntry(taskid=taskid, data=res, tablename=params["db_table"]) if agent == AgentTypes.EC2: # there is no "cost_analysis_table" in params in cost analysis task, # but it there should have in normal task and rerun task. logging.info( 'Updating cost analysis table for normal/rerun task...') if "cost_analysis_table" in params: if "INSTANCE_TYPE" not in os.environ: logging.error( "Error: there is no INSTANCE_TYPE in environment variable." ) res['message'] = "Error: there is no INSTANCE_TYPE in environment variable." return res result = database.getEntry('taskid', taskid, params["db_table"]) data = None for one in result: data = one logging.info("{0} data in stochss table: {1}".format( taskid, one)) break if data == None: logging.error( "Error: there is no data in stochss table with {0}.". format(taskid)) res['message'] = "EError: there is no data in stochss table with {0}.".format( taskid) return res instance_type = os.environ["INSTANCE_TYPE"] taskid_prefix = '{0}_{1}_'.format(agent, instance_type) taskid = taskid_prefix + taskid cost_analysis_data = { 'agent': agent, 'instance_type': instance_type, 'message': data['message'], 'start_time': data['start_time'], 'status': data['status'], 'time_taken': data['time_taken'], 'uuid': data['uuid'] } database.updateEntry(taskid=taskid, data=cost_analysis_data, tablename=params["cost_analysis_table"])
def post(self): self.response.content_type = 'application/json' req_type = self.request.get('req_type') credentials = self.user_data.getCredentials() os.environ["AWS_ACCESS_KEY_ID"] = credentials['EC2_ACCESS_KEY'] os.environ["AWS_SECRET_ACCESS_KEY"] = credentials['EC2_SECRET_KEY'] access_key = credentials['EC2_ACCESS_KEY'] secret_key = credentials['EC2_SECRET_KEY'] if req_type == 'delOutput': uuid = self.request.get('uuid') logging.debug('delOutput: uuid={0}'.format(uuid)) try: job_type = self.request.get('job_type') if job_type == 'stochkit': job = db.GqlQuery("SELECT * FROM StochKitJobWrapper WHERE user_id = :1 AND cloudDatabaseID = :2", self.user.user_id(),uuid).get() job.output_stored = 'False' job.put() elif job_type == 'sensitivity': job = sensitivity.SensitivityJobWrapper.all().filter('user_id =', self.user.user_id()).filter('cloudDatabaseID =', uuid).get() job.output_stored = 'False' job.outData = None job.put() elif job_type == 'spatial': job = spatial.SpatialJobWrapper.all().filter('user_id =', self.user.user_id()).filter('cloudDatabaseID =', uuid).get() job.output_stored = 'False' job.outData = None job.put() service = backendservices(self.user_data) service.deleteTaskOutput(job) # delete the local output if any output_path = os.path.join(os.path.dirname(__file__), '../output/') if os.path.exists(str(output_path)+uuid): shutil.rmtree(str(output_path)+uuid) result = {'status':True,'msg':'Output deleted successfully.'} except Exception as e: logging.error(e) result = {'status':False,'msg':'Fail to delete output in the cloud: '+str(e)} self.response.write(json.dumps(result)) return elif req_type == 'rerun': service = backendservices(self.user_data) job_type = self.request.get('job_type') uuid = self.request.get('uuid') logging.debug('rerun: uuid={0}'.format(uuid)) logging.info('job uuid: '.format(uuid)) if not self.user_data.valid_credentials or not service.isOneOrMoreComputeNodesRunning(): self.response.write(json.dumps({ 'status': False, 'msg': 'There is no '+instance_type+' node running. *Launch one node? ' })) return if job_type == 'stochkit': job = db.GqlQuery("SELECT * FROM StochKitJobWrapper WHERE user_id = :1 AND cloudDatabaseID = :2", self.user.user_id(), uuid).get() try: logging.info('start to rerun the job {0}'.format(str(uuid))) # Set up CloudTracker with user credentials and specified UUID to rerun the job ct = CloudTracker(access_key, secret_key, str(uuid), self.user_data.getBucketName()) has_prov = not ct.if_tracking() # If there is no provenance data for this job, report an error to the user if not has_prov: result = {'status':False,'msg':"The job with this ID does not exist or cannot be reproduced."} self.response.content_type = 'application/json' self.response.write(json.dumps(result)) return params = ct.get_input() logging.info("OUT_PUT SIZE: {0}".format(params['output_size'])) time = datetime.datetime.now() params['rerun_uuid'] = uuid cloud_result = service.submit_cloud_task(params=params) if not cloud_result["success"]: e = cloud_result["exception"] result = { 'status': False, 'msg': 'Cloud execution failed: '+str(e) } return result # The celery_pid is the Celery Task ID. job.celeryPID = cloud_result["celery_pid"] job.status = 'Running' job.outData = None job.output_stored = 'True' job.startDate = time.strftime("%Y-%m-%d-%H-%M-%S") job.put() result = {'status':True,'msg':'Job rerun submitted successfully.'} except Exception,e: result = {'status':False,'msg':'Cloud execution failed: '+str(e)} self.response.write(json.dumps(result)) return elif job_type == 'sensitivity': job = sensitivity.SensitivityJobWrapper.all().filter('user_id =', self.user.user_id()).filter('cloudDatabaseID =', uuid).get() try: ct = CloudTracker(access_key, secret_key, str(uuid), self.user_data.getBucketName()) has_prov = not ct.if_tracking() # If there is no provenance data for this job, report an error to the user if not has_prov: result = {'status':False,'msg':"The job with this ID does not exist or cannot be reproduced."} self.response.content_type = 'application/json' self.response.write(json.dumps(result)) return params = ct.get_input() time = datetime.datetime.now() # execute task in cloud params['rerun_uuid'] = uuid cloud_result = service.submit_cloud_task(params=params) if not cloud_result["success"]: e = cloud_result["exception"] result = { 'status': False, 'msg': 'Cloud execution failed: '+str(e) } return result # The celery_pid is the Celery Task ID. job.status = "Running" job.celeryPID = cloud_result["celery_pid"] job.startTime = time.strftime("%Y-%m-%d-%H-%M-%S") job.output_stored = 'True' job.put() result = {'status':True,'msg':'Job rerun submitted successfully.'} except Exception,e: result = {'status':False,'msg':'Cloud execution failed: '+str(e)} self.response.write(json.dumps(result)) return
def task(taskid, params, agent, database, storage_agent, access_key, secret_key, task_prefix=""): ''' This is the actual work done by a task worker params should contain at least 'bucketname' ''' uuidstr = taskid res = {} try: bucketname = params['bucketname'] if_tracking = False ct = None if agent == AgentTypes.EC2: logging.info('Trying to track via CloudTracker...') # Initialize cloudtracker with the task's UUID ct = CloudTracker(access_key, secret_key, uuidstr, bucketname) if_tracking = ct.if_tracking() logging.info('This is the first time to execute the job? '.format(if_tracking)) if if_tracking: ct.track_input(params) logging.info('task to be executed at remote location') print 'inside celery task method' data = {'status': 'active', 'message': 'Task Executing in cloud'} # will have prefix for cost-anaylsis job taskid = '{0}{1}'.format(task_prefix, taskid) database.updateEntry(taskid=taskid, data=data, tablename=params["db_table"]) paramstr = params['paramstring'] job_type = params['job_type'] if job_type == 'spatial': create_dir_str = "mkdir -p output/%s/results " % uuidstr os.system(create_dir_str) create_dir_str = "mkdir -p output/%s/result " % uuidstr os.system(create_dir_str) return_code_file = "output/{0}/return_code".format(uuidstr) filename = os.path.join('output', uuidstr, '{0}.xml'.format(uuidstr)) with open(filename, 'w') as f: f.write(params['document']) xmlfilepath = filename stdout = os.path.join('output', uuidstr, 'stdout.log') stderr = os.path.join('output', uuidstr, 'stderr.log') exec_str = '' logging.info('job_type = {}'.format(job_type)) if job_type == 'stochkit': # The following executiong string is of the form : # stochkit_exec_str = "~/StochKit/ssa -m ~/output/%s/dimer_decay.xml -t 20 -i 10 -r 1000" % (uuidstr) exec_str = "{0}/{1} -m {2} --force --out-dir output/{3}/result 2>{4} > {5}".format(TaskConfig.STOCHKIT_DIR, paramstr, xmlfilepath, uuidstr, stderr, stdout) elif job_type == 'stochkit_ode' or job_type == 'sensitivity': logging.info('sensitivity job') exec_str = "{0}/{1} -m {2} --force --out-dir output/{3}/result 2>{4} > {5}".format(TaskConfig.ODE_DIR, paramstr, xmlfilepath, uuidstr, stderr, stdout) elif job_type == 'spatial': cmd = "chown -R {username} output/{uuidstr}".format(username=TaskConfig.USERNAME, uuidstr=uuidstr) print cmd os.system(cmd) exec_str = \ "sudo -E -u {username} {pyurdme_wrapper} {model_xml_file} {output_dir} {sim_algorithm} {sim_realizations} {sim_seed} 2>{stderr} > {stdout}".format( username=TaskConfig.USERNAME, pyurdme_wrapper=TaskConfig.PYURDME_WRAPPER_PATH, model_xml_file=xmlfilepath, output_dir=os.path.join('output', uuidstr, 'results'), sim_algorithm=params['simulation_algorithm'], sim_realizations=params['simulation_realizations'], sim_seed=params['simulation_seed'], stderr=stderr, stdout=stdout) print "=======================" print " Command to be executed : " print "{0}".format(exec_str) print "=======================" print "To test if the command string was correct. Copy the above line and execute in terminal." timestarted = datetime.now() # os.system(exec_str) execute_task(exec_str, return_code_file=return_code_file) timeended = datetime.now() results = os.listdir("output/{0}/result".format(uuidstr)) if 'stats' in results and os.listdir("output/{0}/result/stats".format(uuidstr)) == ['.parallel']: raise Exception("The compute node can not handle a job of this size.") filepath = os.path.join('output', uuidstr) absolute_file_path = os.path.abspath(filepath) if agent == AgentTypes.EC2: try: if if_tracking: ct.track_output(absolute_file_path) except Exception, e: print "CloudTracker Error: track_output" print e data = {'status': 'active', 'message': 'Task finished. Generating output.'} database.updateEntry(taskid=taskid, data=data, tablename=params["db_table"]) diff = timeended - timestarted if task_prefix != "": res['status'] = "finished" res['time_taken'] = "{0} seconds".format(diff.total_seconds()) else: logging.info('Generating output tar file...') output_dir = os.path.join("output", uuidstr) output_tar_filename = os.path.join("output", "{}.tar".format(uuidstr)) create_output_tar_command = \ "tar -zcvf {output_tar_filename} {output_dir}".format(output_tar_filename=output_tar_filename, output_dir=output_dir) logging.info(create_output_tar_command) os.system(create_output_tar_command) output_url = storage_agent.upload_file(filename=output_tar_filename) logging.info('output_url = {}'.format(output_url)) logging.info('Removing xml file...') removefilestr = "rm {0}".format(xmlfilepath) os.system(removefilestr) logging.info('Removing output ...') remove_output_command = \ "rm -r {output_tar_filename} {output_dir}".format(output_tar_filename=output_tar_filename, output_dir=output_dir) os.system(remove_output_command) # if there is some task prefix, meaning that it is cost replay, # update the table another way res['status'] = "finished" res['pid'] = uuidstr res['output'] = output_url res['time_taken'] = "{0} seconds".format(diff.total_seconds()) database.updateEntry(taskid=taskid, data=res, tablename=params["db_table"]) if agent == AgentTypes.EC2: # there is no "cost_analysis_table" in params in cost analysis task, # but it there should have in normal task and rerun task. logging.info('Updating cost analysis table for normal/rerun task...') if "cost_analysis_table" in params: if "INSTANCE_TYPE" not in os.environ: logging.error("Error: there is no INSTANCE_TYPE in environment variable.") res['message'] = "Error: there is no INSTANCE_TYPE in environment variable." return res result = database.getEntry('taskid', taskid, params["db_table"]) data = None for one in result: data = one logging.info("{0} data in stochss table: {1}".format(taskid, one)) break if data == None: logging.error("Error: there is no data in stochss table with {0}.".format(taskid)) res['message'] = "EError: there is no data in stochss table with {0}.".format(taskid) return res instance_type = os.environ["INSTANCE_TYPE"] taskid_prefix = '{0}_{1}_'.format(agent, instance_type) taskid = taskid_prefix+taskid cost_analysis_data = { 'agent': agent, 'instance_type': instance_type, 'message': data['message'], 'start_time': data['start_time'], 'status': data['status'], 'time_taken': data['time_taken'], 'uuid': data['uuid'] } database.updateEntry(taskid=taskid, data=cost_analysis_data, tablename=params["cost_analysis_table"])
def post(self): self.response.content_type = 'application/json' req_type = self.request.get('req_type') credentials = self.user_data.getCredentials() os.environ["AWS_ACCESS_KEY_ID"] = credentials['EC2_ACCESS_KEY'] os.environ["AWS_SECRET_ACCESS_KEY"] = credentials['EC2_SECRET_KEY'] access_key = credentials['EC2_ACCESS_KEY'] secret_key = credentials['EC2_SECRET_KEY'] if req_type == 'delOutput': uuid = self.request.get('uuid') logging.debug('delOutput: uuid={0}'.format(uuid)) try: job_type = self.request.get('job_type') if job_type == 'stochkit': job = db.GqlQuery( "SELECT * FROM StochKitJobWrapper WHERE user_id = :1 AND cloudDatabaseID = :2", self.user.user_id(), uuid).get() job.output_stored = 'False' job.put() elif job_type == 'sensitivity': job = sensitivity.SensitivityJobWrapper.all().filter( 'user_id =', self.user.user_id()).filter('cloudDatabaseID =', uuid).get() job.output_stored = 'False' job.outData = None job.put() elif job_type == 'spatial': job = spatial.SpatialJobWrapper.all().filter( 'user_id =', self.user.user_id()).filter('cloudDatabaseID =', uuid).get() job.output_stored = 'False' job.outData = None job.put() service = backendservices(self.user_data) service.deleteTaskOutput(job) # delete the local output if any output_path = os.path.join(os.path.dirname(__file__), '../output/') if os.path.exists(str(output_path) + uuid): shutil.rmtree(str(output_path) + uuid) result = { 'status': True, 'msg': 'Output deleted successfully.' } except Exception as e: logging.error(e) result = { 'status': False, 'msg': 'Fail to delete output in the cloud: ' + str(e) } self.response.write(json.dumps(result)) return elif req_type == 'rerun': service = backendservices(self.user_data) job_type = self.request.get('job_type') uuid = self.request.get('uuid') logging.debug('rerun: uuid={0}'.format(uuid)) logging.info('job uuid: '.format(uuid)) if not self.user_data.valid_credentials or not service.isOneOrMoreComputeNodesRunning( ): self.response.write( json.dumps({ 'status': False, 'msg': 'There is no ' + instance_type + ' node running. *Launch one node? ' })) return if job_type == 'stochkit': job = db.GqlQuery( "SELECT * FROM StochKitJobWrapper WHERE user_id = :1 AND cloudDatabaseID = :2", self.user.user_id(), uuid).get() try: logging.info('start to rerun the job {0}'.format( str(uuid))) # Set up CloudTracker with user credentials and specified UUID to rerun the job ct = CloudTracker(access_key, secret_key, str(uuid), self.user_data.getBucketName()) has_prov = not ct.if_tracking() # If there is no provenance data for this job, report an error to the user if not has_prov: result = { 'status': False, 'msg': "The job with this ID does not exist or cannot be reproduced." } self.response.content_type = 'application/json' self.response.write(json.dumps(result)) return params = ct.get_input() logging.info("OUT_PUT SIZE: {0}".format( params['output_size'])) time = datetime.datetime.now() params['rerun_uuid'] = uuid cloud_result = service.submit_cloud_task(params=params) if not cloud_result["success"]: e = cloud_result["exception"] result = { 'status': False, 'msg': 'Cloud execution failed: ' + str(e) } return result # The celery_pid is the Celery Task ID. job.celeryPID = cloud_result["celery_pid"] job.status = 'Running' job.outData = None job.output_stored = 'True' job.startDate = time.strftime("%Y-%m-%d-%H-%M-%S") job.put() result = { 'status': True, 'msg': 'Job rerun submitted successfully.' } except Exception, e: result = { 'status': False, 'msg': 'Cloud execution failed: ' + str(e) } self.response.write(json.dumps(result)) return elif job_type == 'sensitivity': job = sensitivity.SensitivityJobWrapper.all().filter( 'user_id =', self.user.user_id()).filter('cloudDatabaseID =', uuid).get() try: ct = CloudTracker(access_key, secret_key, str(uuid), self.user_data.getBucketName()) has_prov = not ct.if_tracking() # If there is no provenance data for this job, report an error to the user if not has_prov: result = { 'status': False, 'msg': "The job with this ID does not exist or cannot be reproduced." } self.response.content_type = 'application/json' self.response.write(json.dumps(result)) return params = ct.get_input() time = datetime.datetime.now() # execute task in cloud params['rerun_uuid'] = uuid cloud_result = service.submit_cloud_task(params=params) if not cloud_result["success"]: e = cloud_result["exception"] result = { 'status': False, 'msg': 'Cloud execution failed: ' + str(e) } return result # The celery_pid is the Celery Task ID. job.status = "Running" job.celeryPID = cloud_result["celery_pid"] job.startTime = time.strftime("%Y-%m-%d-%H-%M-%S") job.output_stored = 'True' job.put() result = { 'status': True, 'msg': 'Job rerun submitted successfully.' } except Exception, e: result = { 'status': False, 'msg': 'Cloud execution failed: ' + str(e) } self.response.write(json.dumps(result)) return