def process(self, run_settings): try: self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): self.id = 0 try: self.created_nodes = ast.literal_eval( getval(run_settings, '%s/stages/create/created_nodes' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): self.created_nodes = [] try: self.scheduled_nodes = ast.literal_eval( getval(run_settings, '%s/stages/schedule/scheduled_nodes' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): self.scheduled_nodes = [] try: self.bootstrapped_nodes = ast.literal_eval( getval(run_settings, '%s/stages/bootstrap/bootstrapped_nodes' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): self.bootstrapped_nodes = [] messages.info(run_settings, "%d: destroy" % self.id) comp_pltf_settings = self.get_platform_settings( run_settings, 'http://rmit.edu.au/schemas/platform/computation') try: platform_type = comp_pltf_settings['platform_type'] except KeyError, e: logger.error(e) messages.error(run_settings, e) return
def process(self, run_settings): try: self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): self.id = 0 try: self.created_nodes = ast.literal_eval(getval( run_settings, '%s/stages/create/created_nodes' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): self.created_nodes = [] try: self.scheduled_nodes = ast.literal_eval(getval( run_settings, '%s/stages/schedule/scheduled_nodes' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): self.scheduled_nodes = [] try: self.bootstrapped_nodes = ast.literal_eval(getval( run_settings, '%s/stages/bootstrap/bootstrapped_nodes' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): self.bootstrapped_nodes = [] messages.info(run_settings, "%d: destroy" % self.id) comp_pltf_settings = self.get_platform_settings( run_settings, 'http://rmit.edu.au/schemas/platform/computation') try: platform_type = comp_pltf_settings['platform_type'] except KeyError, e: logger.error(e) messages.error(run_settings, e) return
def create_resource(self, local_settings): iteration_id = self.get_iteration_id(local_settings) messages.info(local_settings, "%s: Creating VMs" % iteration_id) created_nodes = [] group_id, vms_detail_list = create_vms(local_settings) logger.debug("group_id=%s vms_detail_list=%s" % (group_id, vms_detail_list)) try: if not vms_detail_list or len(vms_detail_list) < local_settings['min_count']: raise InsufficientVMError print_vms(local_settings, all_vms=vms_detail_list) for vm in vms_detail_list: if not vm.ip_address: vm.ip_address = vm.private_ip_address created_nodes = [[x.id, x.ip_address, unicode(x.region), 'running'] for x in vms_detail_list] messages.info_context(int(local_settings['contextid']), "%s: Creating VMs (%s created)" % (iteration_id, len(vms_detail_list))) except InsufficientVMError as e: group_id = 'UNKNOWN' messages.error_context(int(local_settings['contextid']), "error: sufficient VMs cannot be created") ftmanager = FTManager() ftmanager.manage_failure( e, settings=local_settings, created_vms=vms_detail_list) return group_id, created_nodes
def create_resource(self, local_settings): iteration_id = self.get_iteration_id(local_settings) messages.info(local_settings, "%s: Creating VMs" % iteration_id) created_nodes = [] group_id, vms_detail_list = create_vms(local_settings) logger.debug("group_id=%s vms_detail_list=%s" % (group_id, vms_detail_list)) try: if not vms_detail_list or len( vms_detail_list) < local_settings['min_count']: raise InsufficientVMError print_vms(local_settings, all_vms=vms_detail_list) for vm in vms_detail_list: if not vm.ip_address: vm.ip_address = vm.private_ip_address created_nodes = [[ x.id, x.ip_address, unicode(x.region), 'running' ] for x in vms_detail_list] messages.info_context( int(local_settings['contextid']), "%s: Creating VMs (%s created)" % (iteration_id, len(vms_detail_list))) except InsufficientVMError as e: group_id = 'UNKNOWN' messages.error_context(int(local_settings['contextid']), "error: sufficient VMs cannot be created") ftmanager = FTManager() ftmanager.manage_failure(e, settings=local_settings, created_vms=vms_detail_list) return group_id, created_nodes
def process(self, run_settings): self.experiment_id = 0 local_settings = setup_settings(run_settings) self.experiment_id = local_settings['experiment_id'] messages.info(run_settings, "1: waiting for completion") logger.debug("settings=%s" % local_settings) try: self.runs_left = ast.literal_eval(getval(run_settings, '%s/stages/make/runs_left' % RMIT_SCHEMA)) except (ValueError, SettingNotFoundException): self.runs_left = [] # if self._exists(run_settings, # 'http://rmit.edu.au/schemas/stages/make', # u'runs_left'): # self.runs_left = ast.literal_eval( # run_settings['http://rmit.edu.au/schemas/stages/make'][u'runs_left']) # else: # self.runs_left = [] def _get_dest_bdp_url(local_settings): return "%s@%s" % ( "nci", os.path.join(local_settings['payload_destination'], str(local_settings['contextid']))) dest_url = _get_dest_bdp_url(local_settings) computation_platform_url = local_settings['comp_platform_url'] bdp_username = local_settings['bdp_username'] comp_pltf_settings = manage.get_platform_settings( computation_platform_url, bdp_username) local_settings.update(comp_pltf_settings) encoded_d_url = storage.get_url_with_credentials( local_settings, dest_url, is_relative_path=True, ip_address=local_settings['host']) (scheme, host, mypath, location, query_settings) = \ storage.parse_bdpurl(encoded_d_url) if self.runs_left: job_finished = self._job_finished( settings=local_settings, remote_path=dest_url) if not job_finished: return self._get_output(local_settings, dest_url) self.runs_left -= 1 if self.runs_left <= 0: messages.success(run_settings, "%s: finished" % (1)) logger.debug("processing finished")
def start_multi_bootstrap_task(self, settings, relative_path_suffix): try: payload_source = settings['payload_source'] except IndexError: pass if payload_source: messages.info(settings, "%s: Bootstrapping VMs started" % self.get_iteration_id(settings)) bootstrap.start_multi_bootstrap_task(settings, relative_path_suffix)
def complete_bootstrap(self, bootstrap_class, local_settings): try: payload_source = local_settings['payload_source'] except IndexError: pass if payload_source: messages.info(local_settings, "%s: Bootstrapping in progress" % self.get_iteration_id(local_settings)) bootstrap.complete_bootstrap(bootstrap_class, local_settings, self.get_iteration_id(local_settings))
def process(self, run_settings): self.experiment_id = 0 local_settings = setup_settings(run_settings) self.experiment_id = local_settings['experiment_id'] messages.info(run_settings, "1: waiting for completion") logger.debug("settings=%s" % local_settings) try: self.runs_left = ast.literal_eval( getval(run_settings, '%s/stages/make/runs_left' % RMIT_SCHEMA)) except (ValueError, SettingNotFoundException): self.runs_left = [] # if self._exists(run_settings, # 'http://rmit.edu.au/schemas/stages/make', # u'runs_left'): # self.runs_left = ast.literal_eval( # run_settings['http://rmit.edu.au/schemas/stages/make'][u'runs_left']) # else: # self.runs_left = [] def _get_dest_bdp_url(local_settings): return "%s@%s" % ("nci", os.path.join( local_settings['payload_destination'], str(local_settings['contextid']))) dest_url = _get_dest_bdp_url(local_settings) computation_platform_url = local_settings['comp_platform_url'] bdp_username = local_settings['bdp_username'] comp_pltf_settings = manage.get_platform_settings( computation_platform_url, bdp_username) local_settings.update(comp_pltf_settings) encoded_d_url = storage.get_url_with_credentials( local_settings, dest_url, is_relative_path=True, ip_address=local_settings['host']) (scheme, host, mypath, location, query_settings) = \ storage.parse_bdpurl(encoded_d_url) if self.runs_left: job_finished = self._job_finished(settings=local_settings, remote_path=dest_url) if not job_finished: return self._get_output(local_settings, dest_url) self.runs_left -= 1 if self.runs_left <= 0: messages.success(run_settings, "%s: finished" % (1)) logger.debug("processing finished")
def process(self, run_settings): messages.info(run_settings, "bootstrapping nodes") comp_pltf_settings = self.get_platform_settings( run_settings, 'http://rmit.edu.au/schemas/platform/computation') try: platform_type = comp_pltf_settings['platform_type'] except KeyError, e: logger.error(e) messages.error(run_settings, e) return
def process(self, run_settings): messages.info(run_settings, "1: create") comp_pltf_settings = self.get_platform_settings( run_settings, 'http://rmit.edu.au/schemas/platform/computation') try: platform_type = comp_pltf_settings['platform_type'] except KeyError, e: logger.error(e) messages.error(run_settings, e) return
def complete_bootstrap(self, bootstrap_class, local_settings): try: payload_source = local_settings['payload_source'] except IndexError: pass if payload_source: messages.info( local_settings, "%s: Bootstrapping in progress" % self.get_iteration_id(local_settings)) bootstrap.complete_bootstrap(bootstrap_class, local_settings, self.get_iteration_id(local_settings))
def start_multi_bootstrap_task(self, settings, relative_path_suffix): try: payload_source = settings['payload_source'] except IndexError: pass if payload_source: messages.info( settings, "%s: Bootstrapping VMs started" % self.get_iteration_id(settings)) bootstrap.start_multi_bootstrap_task(settings, relative_path_suffix)
def destroy_resource(self, destroy_class, run_settings, local_settings): messages.info(run_settings, "%s: Deleting VMs" % self.get_iteration_id(run_settings)) node_type = ['created_nodes'] destroy_vms(local_settings, node_types=node_type, registered_vms=[]) logger.debug('-all vms terminated') for node in destroy_class.created_nodes: if node[3] == 'running': node[3] = 'terminated' for node in destroy_class.scheduled_nodes: if node[3] == 'running': node[3] = 'terminated' for node in destroy_class.bootstrapped_nodes: if node[3] == 'running': node[3] = 'terminated'
def schedule_task(schedule_class, run_settings, local_settings): schedule_class.nodes = get_registered_vms(local_settings, node_type='bootstrapped_nodes') try: maximum_retry = getval(run_settings, '%s/input/reliability/maximum_retry' % RMIT_SCHEMA) except SettingNotFoundException: maximum_retry = 0 local_settings['maximum_retry'] = maximum_retry try: id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): id = 0 if schedule_class.procs_2b_rescheduled: messages.info(run_settings, '%d: rescheduling failed processes' % (id)) start_reschedule(schedule_class, run_settings, local_settings) else: messages.info(run_settings, '%d: scheduling processes' % id) start_schedule(schedule_class, run_settings, local_settings)
def schedule_task(schedule_class, run_settings, local_settings): #schedule_class.nodes = get_registered_vms(local_settings, node_type='bootstrapped_nodes') schedule_class.nodes = schedule_class.bootstrapped_nodes try: maximum_retry = getval(run_settings, '%s/input/reliability/maximum_retry' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: maximum_retry = 0 local_settings['maximum_retry'] = maximum_retry try: id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): id = 0 if schedule_class.procs_2b_rescheduled: messages.info(run_settings, '%d: rescheduling failed processes' % int(id)) start_reschedule(schedule_class, run_settings, local_settings) else: messages.info(run_settings, '%d: Scheduling processes' % int(id)) start_schedule(schedule_class, run_settings, local_settings)
def process(self, run_settings): """ perform the stage operation """ bdp_username = getval(run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA) # bdp_username = run_settings[RMIT_SCHEMA + '/bdp_userprofile']['username'] logger.debug("bdp_username=%s" % bdp_username) input_storage_url = getval(run_settings, '%s/platform/storage/input/platform_url' % RMIT_SCHEMA) # input_storage_url = run_settings[ # RMIT_SCHEMA + '/platform/storage/input']['platform_url'] logger.debug("input_storage_url=%s" % input_storage_url) input_storage_settings = manage.get_platform_settings( input_storage_url, bdp_username) logger.debug("input_storage_settings=%s" % pformat(input_storage_settings)) input_offset = getval(run_settings, '%s/platform/storage/input/offset' % RMIT_SCHEMA) # input_offset = run_settings[RMIT_SCHEMA + "/platform/storage/input"]['offset'] logger.debug("input_offset=%s" % pformat(input_offset)) input_prefix = '%s://%s@' % (input_storage_settings['scheme'], input_storage_settings['type']) map_initial_location = "%s/%s/initial" % (input_prefix, input_offset) logger.debug("map_initial_location=%s" % map_initial_location) local_settings = setup_settings(run_settings) logger.debug("local_settings=%s" % local_settings) values_map = _load_values_map(local_settings, map_initial_location) logger.debug("values_map=%s" % values_map) _upload_variations_inputs( local_settings, map_initial_location, values_map) _upload_payload(local_settings, local_settings['payload_source'], values_map) messages.info(run_settings, "1: upload done")
def process(self, run_settings): logger.debug('run_settings=%s' % run_settings) self.setup_output(run_settings) self.setup_input(run_settings) self.setup_computation(run_settings) messages.info(run_settings, "0: Setting up computation") local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS) # local_settings = run_settings[models.UserProfile.PROFILE_SCHEMA_NS] logger.debug("settings=%s" % pformat(run_settings)) local_settings['bdp_username'] = getval(run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX) # local_settings['bdp_username'] = run_settings[ # django_settings.SCHEMA_PREFIX + '/bdp_userprofile']['username'] logger.debug('local_settings=%s' % local_settings) self.setup_scratchspace(run_settings) output_location = self.output_loc_offset # run_settings[django_settings.SCHEMA_PREFIX + '/input/system'][u'output_location'] try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except KeyError: self.experiment_id = 0 except ValueError: self.experiment_id = 0 try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: curate_data = False if curate_data: try: mytardis_platform = jobs.safe_import('chiminey.platform.mytardis.MyTardisPlatform', [], {}) self.experiment_id = mytardis_platform.create_experiment(run_settings, output_location, self.experiment_id) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e)
def process(self, run_settings): settings = setup_settings(run_settings) messages.info(run_settings, "1: execute starting") def _get_dest_bdp_url(settings): return "%s@%s" % ( "nci", os.path.join(settings['payload_destination'], str(settings['contextid']))) dest_url = _get_dest_bdp_url(settings) computation_platform_url = settings['comp_platform_url'] bdp_username = settings['bdp_username'] comp_pltf_settings = manage.get_platform_settings( computation_platform_url, bdp_username) logger.debug("comp_pltf_settings=%s" % pformat(comp_pltf_settings)) settings.update(comp_pltf_settings) encoded_d_url = storage.get_url_with_credentials( settings, dest_url, is_relative_path=True, ip_address=settings['host']) (scheme, host, mypath, location, query_settings) = \ storage.parse_bdpurl(encoded_d_url) stderr = '' try: ssh = open_connection( ip_address=settings['host'], settings=settings) (command_out, stderr) = compute.run_make(ssh, (os.path.join( query_settings['root_path'], mypath)), 'startrun') except Exception, e: logger.error(e) raise
def process(self, run_settings): try: id = int( getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: transforming' % (id + 1)) # self.contextid = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) bdp_username = getval( run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX) output_storage_url = getval( run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX) output_storage_settings = manage.get_platform_settings( output_storage_url, bdp_username) logger.debug("output_storage_settings=%s" % output_storage_settings) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval( run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) self.job_dir = manage.get_job_dir(output_storage_settings, offset) try: self.id = int( getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.output_dir = os.path.join( os.path.join(self.job_dir, "output_%s" % self.id)) self.input_dir = os.path.join( os.path.join(self.job_dir, "input_%d" % self.id)) self.new_input_dir = os.path.join( os.path.join(self.job_dir, "input_%d" % (self.id + 1))) except (SettingNotFoundException, ValueError): # FIXME: Not clear that this a valid path through stages self.output_dir = os.path.join(os.path.join( self.job_dir, "output")) self.output_dir = os.path.join(os.path.join(self.job_dir, "input")) self.new_input_dir = os.path.join( os.path.join(self.job_dir, "input_1")) logger.debug('self.output_dir=%s' % self.output_dir) try: self.experiment_id = int( getval( run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 output_url = get_url_with_credentials(output_storage_settings, output_prefix + self.output_dir, is_relative_path=False) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) outputs = self.process_outputs(run_settings, self.job_dir, output_url, output_storage_settings, offset) try: curate_data = getval( run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval( run_settings, '%s/input/mytardis/mytardis_platform' % django_settings.SCHEMA_PREFIX) mytardis_settings = manage.get_platform_settings( mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) all_settings['contextid'] = getval( run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) try: mytardis_platform = jobs.safe_import( 'chiminey.platform.mytardis.MyTardisPlatform', [], {}) logger.debug('self_outpus=%s' % outputs) self.experiment_id = mytardis_platform.create_dataset_for_intermediate_output( run_settings, self.experiment_id, self.job_dir, output_url, all_settings, outputs=outputs) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e) else: logger.warn('Data curation is off')
def process(self, run_settings): try: id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: transforming' % (id+1)) # self.contextid = getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA) bdp_username = getval(run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA) output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA) output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) logger.debug("output_storage_settings=%s" % output_storage_settings) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA) self.job_dir = manage.get_job_dir(output_storage_settings, offset) try: self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) self.output_dir = os.path.join(os.path.join(self.job_dir, "output_%s" % self.id)) self.input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % self.id)) self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % (self.id + 1))) except (SettingNotFoundException, ValueError): # FIXME: Not clear that this a valid path through stages self.output_dir = os.path.join(os.path.join(self.job_dir, "output")) self.output_dir = os.path.join(os.path.join(self.job_dir, "input")) self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_1")) logger.debug('self.output_dir=%s' % self.output_dir) try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 output_url = get_url_with_credentials( output_storage_settings, output_prefix + self.output_dir, is_relative_path=False) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) self.process_outputs(run_settings, self.job_dir, output_url, output_storage_settings, offset) # logger.debug("output_url=%s" % output_url) # # Should this be output_dir or root of remotesys? # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # logger.debug("fsys=%s" % fsys) # logger.debug("mypath=%s" % mypath) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) # self.audit = "" # outputs = [] # Node_info = namedtuple('Node_info', # ['dir', 'index', 'number', 'criterion']) # # gather node_infos # for node_output_dir in node_output_dirs: # base_fname = "HRMC.inp" # try: # values_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.output_dir, node_output_dir, # '%s_values' % base_fname), is_relative_path=False) # values_content = storage.get_file(values_url) # logger.debug("values_file=%s" % values_url) # except IOError: # logger.warn("no values file found") # values_map = {} # else: # values_map = dict(json.loads(values_content)) # criterion = self.compute_psd_criterion( # node_output_dir, fsys, # output_storage_settings) # #criterion = self.compute_hrmc_criterion(values_map['run_counter'], node_output_dir, fs,) # logger.debug("criterion=%s" % criterion) # index = 0 # FIXME: as node_output_dirs in particular order, then index is not useful. # outputs.append(Node_info(dir=node_output_dir, # index=index, number=values_map['run_counter'], criterion=criterion)) # outputs.sort(key=lambda x: int(x.criterion)) # logger.debug("outputs=%s" % outputs) # logger.debug('threshold=%s' % self.threshold) # total_picks = 1 # if len(self.threshold) > 1: # for i in self.threshold: # total_picks *= self.threshold[i] # else: # total_picks = self.threshold[0] # if not outputs: # logger.error("no ouput found for this iteration") # return # for index in range(0, total_picks): # Node_info = outputs[index] # logger.debug("node_info.dir=%s" % Node_info.dir) # logger.debug("Node_info=%s" % str(Node_info)) # self.new_input_node_dir = os.path.join(self.new_input_dir, # Node_info.dir) # logger.debug("New input node dir %s" % self.new_input_node_dir) # # Move all existing domain input files unchanged to next input directory # for f in self.DOMAIN_INPUT_FILES: # source_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.output_dir, Node_info.dir, f), is_relative_path=False) # dest_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_node_dir, f), # is_relative_path=False) # logger.debug('source_url=%s, dest_url=%s' % (source_url, dest_url)) # content = storage.get_file(source_url) # logger.debug('content collected') # storage.put_file(dest_url, content) # logger.debug('put successfully') # logger.debug('put file successfully') # pattern = "*_values" # self.copy_files_with_pattern(fsys, os.path.join(self.output_dir, Node_info.dir), # self.new_input_node_dir, pattern, # output_storage_settings) # pattern = "*_template" # self.copy_files_with_pattern(fsys, os.path.join(self.output_dir, Node_info.dir), # self.new_input_node_dir, pattern, # output_storage_settings) # # NB: Converge stage triggers based on criterion value from audit. # info = "Run %s preserved (error %s)\n" % (Node_info.number, Node_info.criterion) # audit_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_node_dir, 'audit.txt'), is_relative_path=False) # storage.put_file(audit_url, info) # logger.debug("audit=%s" % info) # self.audit += info # # move xyz_final.xyz to initial.xyz # source_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.output_dir, Node_info.dir, "xyz_final.xyz"), is_relative_path=False) # dest_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_node_dir, 'input_initial.xyz'), is_relative_path=False) # content = storage.get_file(source_url) # storage.put_file(dest_url, content) # self.audit += "spawning diamond runs\n" # audit_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_dir, 'audit.txt'), is_relative_path=False) # storage.put_file(audit_url, self.audit) # curate dataset into mytardis try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % RMIT_SCHEMA) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA) mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) all_settings['contextid'] = getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA) self.experiment_id = self.curate_dataset(run_settings, self.experiment_id, self.job_dir, output_url, all_settings) else: logger.warn('Data curation is off')
def process(self, run_settings): try: id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: transforming' % (id+1)) # self.contextid = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) bdp_username = getval(run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX) output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX) output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) logger.debug("output_storage_settings=%s" % output_storage_settings) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval(run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) self.job_dir = manage.get_job_dir(output_storage_settings, offset) try: self.id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.output_dir = os.path.join(os.path.join(self.job_dir, "output_%s" % self.id)) self.input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % self.id)) self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % (self.id + 1))) except (SettingNotFoundException, ValueError): # FIXME: Not clear that this a valid path through stages self.output_dir = os.path.join(os.path.join(self.job_dir, "output")) self.output_dir = os.path.join(os.path.join(self.job_dir, "input")) self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_1")) logger.debug('self.output_dir=%s' % self.output_dir) try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 output_url = get_url_with_credentials( output_storage_settings, output_prefix + self.output_dir, is_relative_path=False) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) outputs = self.process_outputs(run_settings, self.job_dir, output_url, output_storage_settings, offset) try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % django_settings.SCHEMA_PREFIX) mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) all_settings['contextid'] = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) try: mytardis_platform = jobs.safe_import('chiminey.platform.mytardis.MyTardisPlatform', [], {}) logger.debug('self_outpus=%s' % outputs) self.experiment_id = mytardis_platform.create_dataset_for_intermediate_output(run_settings, self.experiment_id, self.job_dir, output_url, all_settings, outputs=outputs) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e) else: logger.warn('Data curation is off')
def process(self, run_settings): try: id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: converging' % (id+1)) def retrieve_local_settings(run_settings, local_settings): update(local_settings, run_settings # '%s/stages/setup/payload_source' % RMIT_SCHEMA, # '%s/stages/setup/payload_destination' % RMIT_SCHEMA, # '%s/system/platform' % RMIT_SCHEMA, # # '%s/stages/create/custom_prompt' % RMIT_SCHEMA, # # '%s/stages/create/cloud_sleep_interval' % RMIT_SCHEMA, # # '%s/stages/create/created_nodes' % RMIT_SCHEMA, # '%s/stages/run/payload_cloud_dirname' % RMIT_SCHEMA, # '%s/system/max_seed_int' % RMIT_SCHEMA, # '%s/stages/run/compile_file' % RMIT_SCHEMA, # '%s/stages/run/retry_attempts' % RMIT_SCHEMA, # '%s/input/system/cloud/number_vm_instances' % RMIT_SCHEMA, # '%s/input/hrmc/iseed' % RMIT_SCHEMA, # '%s/input/hrmc/optimisation_scheme' % RMIT_SCHEMA, # '%s/input/hrmc/threshold' % RMIT_SCHEMA, ) local_settings['bdp_username'] = getval(run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA) local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS) retrieve_local_settings(run_settings, local_settings) bdp_username = local_settings['bdp_username'] # get output output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA) output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA) job_dir = manage.get_job_dir(output_storage_settings, offset) # get mytardis #mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA) #mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) # setup new paths try: self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) self.output_dir = os.path.join(job_dir, "output_%d" % self.id) self.iter_inputdir = os.path.join(job_dir, "input_%d" % (self.id + 1)) #self.new_iter_inputdir = "input_%d" % (self.id + 1) except (SettingNotFoundException, ValueError): self.output_dir = os.path.join(job_dir, "output") self.iter_inputdir = os.path.join(job_dir, "input") self.id = 0 logger.debug('output_dir=%s iter_inputdir=%s' % (self.output_dir, self.iter_inputdir)) try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 inputdir_url = get_url_with_credentials(output_storage_settings, output_prefix + self.iter_inputdir, is_relative_path=False) logger.debug('input_dir_url=%s' % inputdir_url) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(inputdir_url) # fsys = storage.get_filesystem(inputdir_url) # logger.debug('mypath=%s' % mypath) # input_dirs, _ = fsys.listdir(mypath) # logger.debug('input_dirs=%s' % input_dirs) (self.done_iterating, self.criterion) = self.process_outputs(run_settings, job_dir, inputdir_url, output_storage_settings) if self.done_iterating: logger.debug("Total Iterations: %d" % self.id) # output_prefix = '%s://%s@' % (output_storage_settings['scheme'], # output_storage_settings['type']) # new_output_dir = os.path.join(base_dir, 'output') output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) # get source url iter_output_dir = os.path.join(os.path.join(job_dir, "output_%s" % self.id)) source_url = "%s%s" % (output_prefix, iter_output_dir) # get dest url new_output_dir = os.path.join(job_dir, 'output') dest_url = "%s%s" % (output_prefix, new_output_dir) source_url = get_url_with_credentials(output_storage_settings, output_prefix + os.path.join(iter_output_dir), is_relative_path=False) dest_url = get_url_with_credentials(output_storage_settings, output_prefix + os.path.join(new_output_dir), is_relative_path=False) storage.copy_directories(source_url, dest_url) # curate try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % RMIT_SCHEMA) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA) mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) logger.debug("source_url=%s" % source_url) logger.debug("dest_url=%s" % dest_url) logger.debug("job_dir=%s" % job_dir) self.experiment_id = self.curate_dataset(run_settings, self.experiment_id, job_dir, dest_url, all_settings) else: logger.warn('Data curation is off')
output_storage_url, local_settings['bdp_username']) offset = getval( run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) self.job_dir = manage.get_job_dir(output_storage_settings, offset) # TODO: we assume initial input is in "%s/input_0" % self.job_dir # in configure stage we could copy initial data in 'input_location' # into this location try: self.id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.iter_inputdir = os.path.join( self.job_dir, "input_%s" % self.id) except (SettingNotFoundException, ValueError): self.id = 0 self.iter_inputdir = os.path.join(self.job_dir, "input_location") messages.info(run_settings, "%s: Executing" % (self.id + 1)) logger.debug("id = %s" % self.id) try: self.initial_numbfile = int( getval(run_settings, '%s/stages/run/initial_numbfile' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): logger.warn("setting initial_numbfile for first iteration") self.initial_numbfile = 1 try: self.experiment_id = int( getval(run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0
# run_settings['http://rmit.edu.au/schemas/system'][u'contextid'] output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA) output_storage_settings = manage.get_platform_settings(output_storage_url, local_settings['bdp_username']) offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA) # offset = run_settings['http://rmit.edu.au/schemas/platform/storage/output']['offset'] self.job_dir = manage.get_job_dir(output_storage_settings, offset) # TODO: we assume initial input is in "%s/input_0" % self.job_dir # in configure stage we could copy initial data in 'input_location' into this location try: self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) self.iter_inputdir = os.path.join(self.job_dir, "input_%s" % self.id) except (SettingNotFoundException, ValueError): self.id = 0 self.iter_inputdir = os.path.join(self.job_dir, "input_location") messages.info(run_settings, "%s: execute" % (self.id + 1)) logger.debug("id = %s" % self.id) try: self.initial_numbfile = int(getval(run_settings, '%s/stages/run/initial_numbfile' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): logger.warn("setting initial_numbfile for first iteration") self.initial_numbfile = 1 try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA)) # self.experiment_id = int(smartconnectorscheduler.get_existing_key(run_settings, # 'http://rmit.edu.au/schemas/input/mytardis/experiment_id')) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0
run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) self.job_dir = manage.get_job_dir(output_storage_settings, offset) # TODO: we assume initial input is in "%s/input_0" % self.job_dir # in configure stage we could copy initial data in 'input_location' # into this location try: self.id = int( getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.iter_inputdir = os.path.join(self.job_dir, "input_%s" % self.id) except (SettingNotFoundException, ValueError): self.id = 0 self.iter_inputdir = os.path.join(self.job_dir, "input_location") messages.info(run_settings, "%s: Executing" % (self.id + 1)) logger.debug("id = %s" % self.id) try: self.initial_numbfile = int( getval( run_settings, '%s/stages/run/initial_numbfile' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): logger.warn("setting initial_numbfile for first iteration") self.initial_numbfile = 1 try: self.experiment_id = int( getval( run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX))
def process(self, run_settings): """ Check all registered nodes to find whether they are running, stopped or in error_nodes """ local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS) # local_settings = run_settings[models.UserProfile.PROFILE_SCHEMA_NS] retrieve_local_settings(run_settings, local_settings) logger.debug("local_settings=%s" % local_settings) self.contextid = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX) output_storage_settings = get_platform_settings(output_storage_url, local_settings['bdp_username']) # FIXME: Need to be consistent with how we handle settings here. Prob combine all into # single local_settings for simplicity. output_storage_settings['bdp_username'] = local_settings['bdp_username'] offset = getval(run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) self.job_dir = get_job_dir(output_storage_settings, offset) try: self.finished_nodes = getval(run_settings, '%s/stages/run/finished_nodes' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: self.finished_nodes = '[]' try: self.id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.output_dir = "output_%s" % self.id except (SettingNotFoundException, ValueError): self.id = 0 self.output_dir = "output" logger.debug("output_dir=%s" % self.output_dir) logger.debug("run_settings=%s" % run_settings) logger.debug("Wait stage process began") #processes = self.executed_procs processes = [x for x in self.current_processes if x['status'] == 'running'] self.error_nodes = [] # TODO: parse finished_nodes input logger.debug('self.finished_nodes=%s' % self.finished_nodes) self.finished_nodes = ast.literal_eval(self.finished_nodes) computation_platform_url = getval(run_settings, '%s/platform/computation/platform_url' % django_settings.SCHEMA_PREFIX) comp_pltf_settings = get_platform_settings(computation_platform_url, local_settings['bdp_username']) local_settings.update(comp_pltf_settings) comp_pltf_settings['bdp_username'] = local_settings['bdp_username'] wait_strategy = strategies.SynchronousWaitStrategy() try: payload_source = getval(run_settings, '%s/stages/setup/payload_source' % django_settings.SCHEMA_PREFIX) if payload_source: wait_strategy = strategies.AsynchronousWaitStrategy() except SettingNotFoundException: pass for process in processes: #instance_id = node.id ip_address = process['ip_address'] process_id = process['id'] retry_left = process['retry_left'] #ip = botocloudconnector.get_instance_ip(instance_id, self.boto_settings) #ssh = open_connection(ip_address=ip, settings=self.boto_settings) #if not botocloudconnector.is_vm_running(node): # An unlikely situation where the node crashed after is was # detected as registered. #FIXME: should error nodes be counted as finished? # logging.error('Instance %s not running' % instance_id) # self.error_nodes.append(node) # continue relative_path_suffix = self.get_relative_output_path(local_settings) fin = wait_strategy.is_job_finished( self, ip_address, process_id, retry_left, local_settings, relative_path_suffix) logger.debug("fin=%s" % fin) if fin: logger.debug("done. output is available") logger.debug("node=%s" % str(process)) logger.debug("finished_nodes=%s" % self.finished_nodes) #FIXME: for multiple nodes, if one finishes before the other then #its output will be retrieved, but it may again when the other node fails, because #we cannot tell whether we have prevous retrieved this output before and finished_nodes # is not maintained between triggerings... if not (int(process_id) in [int(x['id']) for x in self.finished_nodes if int(process_id) == int(x['id'])]): self.get_output(ip_address, process_id, self.output_dir, local_settings, comp_pltf_settings, output_storage_settings, run_settings) audit_url = get_url_with_credentials( comp_pltf_settings, os.path.join( self.output_dir, process_id, "audit.txt"), is_relative_path=True) fsys = storage.get_filesystem(audit_url) logger.debug("Audit file url %s" % audit_url) if fsys.exists(audit_url): fsys.delete(audit_url) self.finished_nodes.append(process) logger.debug('finished_processes=%s' % self.finished_nodes) for iterator, p in enumerate(self.all_processes): if int(p['id']) == int(process_id) and p['status'] == 'running': self.all_processes[iterator]['status'] = 'completed' for iterator, p in enumerate(self.executed_procs): if int(p['id']) == int(process_id) and p['status'] == 'running': self.executed_procs[iterator]['status'] = 'completed' for iterator, p in enumerate(self.current_processes): if int(p['id']) == int(process_id) and p['status'] == 'running': self.current_processes[iterator]['status'] = 'completed' else: logger.warn("We have already " + "processed output of %s on node %s" % (process_id, ip_address)) else: print "job %s at %s not completed" % (process_id, ip_address) failed_processes = [x for x in self.current_processes if x['status'] == 'failed'] logger.debug('failed_processes=%s' % failed_processes) logger.debug('failed_processes=%d' % len(failed_processes)) messages.info(run_settings, "%d: Waiting %d processes (%d completed, %d failed) " % ( self.id + 1, len(self.current_processes), len(self.finished_nodes), len(failed_processes)))
def process(self, run_settings): logger.debug('run_settings=%s' % run_settings) messages.info(run_settings, "0: sweep started") # Need to make copy because we pass on run_settings to sub connector # so any changes we make here to run_settings WILL be inherited def make_local_settings(run_settings): from copy import deepcopy local_settings = deepcopy( getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS)) update( local_settings, run_settings, django_settings.SCHEMA_PREFIX + '/system/platform', # django_settings.SCHEMA_PREFIX + '/input/mytardis/experiment_id', # django_settings.SCHEMA_PREFIX + '/system/random_numbers', ) local_settings['bdp_username'] = getval( run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX) return local_settings local_settings = make_local_settings(run_settings) logger.debug('local_settings=%s' % local_settings) compplatform = [ k for k, v in run_settings.iteritems() if k.startswith('%s/input/system/compplatform' % django_settings.SCHEMA_PREFIX) ] setval( run_settings, '%s/platform/computation/platform_url' % django_settings.SCHEMA_PREFIX, getval(run_settings, '%s/computation_platform' % compplatform[0])) def _parse_output_location(run_settings, location): loc_list = location.split('/') name = loc_list[0] offset = '' if len(loc_list) > 1: offset = os.path.join(*loc_list[1:]) logger.debug('offset=%s' % offset) return name, offset contextid = int( getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX)) logger.debug("contextid=%s" % contextid) sweep_name = self._get_sweep_name(run_settings) logger.debug("sweep_name=%s" % sweep_name) output_loc = self.output_exists(run_settings) location = "" if output_loc: location = getval(run_settings, output_loc) output_storage_name, output_storage_offset = \ _parse_output_location(run_settings, location) setval( run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX, output_storage_name) setval( run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX, os.path.join(output_storage_offset, '%s%s' % (sweep_name, contextid))) def _parse_input_location(run_settings, location): loc_list = location.split('/') name = loc_list[0] offset = '' if len(loc_list) > 1: offset = os.path.join(*loc_list[1:]) logger.debug('offset=%s' % offset) return (name, offset) input_loc = self.input_exists(run_settings) logger.debug('special_input_loc=%s' % input_loc) if input_loc: location = getval(run_settings, input_loc) input_storage_name, input_storage_offset = \ _parse_input_location(run_settings, location) setval( run_settings, '%s/platform/storage/input/platform_url' % django_settings.SCHEMA_PREFIX, input_storage_name) # store offsets setval( run_settings, '%s/platform/storage/input/offset' % django_settings.SCHEMA_PREFIX, input_storage_offset) # TODO: replace with scratch space computation platform space self.scratch_platform = '%s%s%s' % (manage.get_scratch_platform(), sweep_name, contextid) # mytardis if output_loc: try: self.experiment_id = int( getval( run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except KeyError, ValueError: self.experiment_id = 0 try: curate_data = getval( run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: curate_data = False curate_data = False #TODO remove if curate_data: logger.debug('location=%s' % location) location = "%s%s" % (sweep_name, contextid) self.experiment_id = self.curate_data(run_settings, location, self.experiment_id) setval( run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX, str(self.experiment_id))
def process(self, run_settings): """ Check all registered nodes to find whether they are running, stopped or in error_nodes """ local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS) # local_settings = run_settings[models.UserProfile.PROFILE_SCHEMA_NS] retrieve_local_settings(run_settings, local_settings) logger.debug("local_settings=%s" % local_settings) self.contextid = getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA) output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA) output_storage_settings = get_platform_settings(output_storage_url, local_settings['bdp_username']) # FIXME: Need to be consistent with how we handle settings here. Prob combine all into # single local_settings for simplicity. output_storage_settings['bdp_username'] = local_settings['bdp_username'] offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA) self.job_dir = get_job_dir(output_storage_settings, offset) try: self.finished_nodes = getval(run_settings, '%s/stages/run/finished_nodes' % RMIT_SCHEMA) # self.finished_nodes = smartconnectorscheduler.get_existing_key(run_settings, # 'http://rmit.edu.au/schemas/stages/run/finished_nodes') except SettingNotFoundException: self.finished_nodes = '[]' try: self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) # self.id = int(smartconnectorscheduler.get_existing_key(run_settings, # 'http://rmit.edu.au/schemas/system/id')) self.output_dir = "output_%s" % self.id except (SettingNotFoundException, ValueError): self.id = 0 self.output_dir = "output" logger.debug("output_dir=%s" % self.output_dir) logger.debug("run_settings=%s" % run_settings) logger.debug("Wait stage process began") #processes = self.executed_procs processes = [x for x in self.current_processes if x['status'] == 'running'] self.error_nodes = [] # TODO: parse finished_nodes input logger.debug('self.finished_nodes=%s' % self.finished_nodes) self.finished_nodes = ast.literal_eval(self.finished_nodes) computation_platform_url = getval(run_settings, '%s/platform/computation/platform_url' % RMIT_SCHEMA) comp_pltf_settings = get_platform_settings(computation_platform_url, local_settings['bdp_username']) local_settings.update(comp_pltf_settings) comp_pltf_settings['bdp_username'] = local_settings['bdp_username'] wait_strategy = strategies.SynchronousWaitStrategy() try: synchronous_wait = getval(run_settings, '%s/stages/wait/synchronous' % RMIT_SCHEMA) if not synchronous_wait: wait_strategy = strategies.AsynchronousWaitStrategy() except SettingNotFoundException: pass for process in processes: #instance_id = node.id ip_address = process['ip_address'] process_id = process['id'] retry_left = process['retry_left'] #ip = botocloudconnector.get_instance_ip(instance_id, self.boto_settings) #ssh = open_connection(ip_address=ip, settings=self.boto_settings) #if not botocloudconnector.is_vm_running(node): # An unlikely situation where the node crashed after is was # detected as registered. #FIXME: should error nodes be counted as finished? # logging.error('Instance %s not running' % instance_id) # self.error_nodes.append(node) # continue relative_path_suffix = self.get_relative_output_path(local_settings) fin = wait_strategy.is_job_finished( self, ip_address, process_id, retry_left, local_settings, relative_path_suffix) logger.debug("fin=%s" % fin) if fin: logger.debug("done. output is available") logger.debug("node=%s" % str(process)) logger.debug("finished_nodes=%s" % self.finished_nodes) #FIXME: for multiple nodes, if one finishes before the other then #its output will be retrieved, but it may again when the other node fails, because #we cannot tell whether we have prevous retrieved this output before and finished_nodes # is not maintained between triggerings... if not (int(process_id) in [int(x['id']) for x in self.finished_nodes if int(process_id) == int(x['id'])]): self.get_output(ip_address, process_id, self.output_dir, local_settings, comp_pltf_settings, output_storage_settings, run_settings) audit_url = get_url_with_credentials( comp_pltf_settings, os.path.join( self.output_dir, process_id, "audit.txt"), is_relative_path=True) fsys = storage.get_filesystem(audit_url) logger.debug("Audit file url %s" % audit_url) if fsys.exists(audit_url): fsys.delete(audit_url) self.finished_nodes.append(process) logger.debug('finished_processes=%s' % self.finished_nodes) for iterator, p in enumerate(self.all_processes): if int(p['id']) == int(process_id) and p['status'] == 'running': self.all_processes[iterator]['status'] = 'completed' for iterator, p in enumerate(self.executed_procs): if int(p['id']) == int(process_id) and p['status'] == 'running': self.executed_procs[iterator]['status'] = 'completed' for iterator, p in enumerate(self.current_processes): if int(p['id']) == int(process_id) and p['status'] == 'running': self.current_processes[iterator]['status'] = 'completed' else: logger.warn("We have already " + "processed output of %s on node %s" % (process_id, ip_address)) else: print "job %s at %s not completed" % (process_id, ip_address) failed_processes = [x for x in self.current_processes if x['status'] == 'failed'] logger.debug('failed_processes=%s' % failed_processes) logger.debug('failed_processes=%d' % len(failed_processes)) messages.info(run_settings, "%d: waiting %d processes (%d completed, %d failed) " % ( self.id + 1, len(self.current_processes), len(self.finished_nodes), len(failed_processes)))
def process(self, run_settings): logger.debug('run_settings=%s' % run_settings) self.output_platform_name = '' self.output_platform_offset = '' self.input_platform_name = '' self.input_platform_offset = '' self.compute_platform_name = '' self.compute_platform_offset = '' if self.output_exists(run_settings): try: run_settings['http://rmit.edu.au/schemas/platform/storage/output'] except KeyError: logger.debug('bdp_url settings ...') try: bdp_url = getval(run_settings, RMIT_SCHEMA + '/input/system/output_location') logger.debug('bdp_url=%s' % bdp_url) except SettingNotFoundException: bdp_url = getval(run_settings, RMIT_SCHEMA + '/input/location/output/output_location') logger.debug('bdp_url=%s' % bdp_url) self.output_platform_name, self.output_platform_offset = self.break_bdp_url(bdp_url) run_settings[RMIT_SCHEMA + '/platform/storage/output'] = {} run_settings[RMIT_SCHEMA + '/platform/storage/output'][ 'platform_url'] = self.output_platform_name run_settings[RMIT_SCHEMA + '/platform/storage/output']['offset'] = self.output_platform_offset if self.input_exists(run_settings): try: run_settings['http://rmit.edu.au/schemas/platform/storage/input'] except KeyError: try: bdp_url = getval(run_settings, RMIT_SCHEMA + '/input/system/input_location') except SettingNotFoundException: bdp_url = getval(run_settings, RMIT_SCHEMA + '/input/location/input/input_location') self.input_platform_name, self.input_platform_offset = self.break_bdp_url(bdp_url) run_settings[RMIT_SCHEMA + '/platform/storage/input'] = {} run_settings[RMIT_SCHEMA + '/platform/storage/input'][ 'platform_url'] = self.input_platform_name run_settings[RMIT_SCHEMA + '/platform/storage/input']['offset'] = self.input_platform_offset try: run_settings['http://rmit.edu.au/schemas/platform/computation'] except KeyError: bdp_url = run_settings[RMIT_SCHEMA + '/input/system/compplatform']['computation_platform'] logger.debug('tbdp_url=%s' % bdp_url) self.compute_platform_name, self.compute_platform_offset = self.break_bdp_url(bdp_url) run_settings[RMIT_SCHEMA + '/platform/computation'] = {} run_settings[RMIT_SCHEMA + '/platform/computation']['platform_url'] = self.compute_platform_name run_settings[RMIT_SCHEMA + '/platform/computation']['offset'] = self.compute_platform_offset messages.info(run_settings, "1: configure") local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS) # local_settings = run_settings[models.UserProfile.PROFILE_SCHEMA_NS] logger.debug("settings=%s" % pformat(run_settings)) local_settings['bdp_username'] = getval(run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA) # local_settings['bdp_username'] = run_settings[ # RMIT_SCHEMA + '/bdp_userprofile']['username'] logger.debug('local_settings=%s' % local_settings) #input_location = getval(run_settings, "%s/input/system/input_location" % RMIT_SCHEMA) # input_location = run_settings[ # RMIT_SCHEMA + '/input/system']['input_location'] #logger.debug("input_location=%s" % input_location) bdp_username = local_settings['bdp_username'] output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA) # output_storage_url = run_settings['http://rmit.edu.au/schemas/platform/storage/output']['platform_url'] #output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) #input_storage_url = getval(run_settings, '%s/platform/storage/input/platform_url' % RMIT_SCHEMA) #input_storage_settings = manage.get_platform_settings( # input_storage_url, # bdp_username) #input_offset = getval(run_settings, '%s/platform/storage/input/offset' % RMIT_SCHEMA) #input_prefix = '%s://%s@' % (input_storage_settings['scheme'], # input_storage_settings['type']) #map_initial_location = "%s/%s/initial" % (input_prefix, input_offset) #logger.debug("map_initial_location=%s" % map_initial_location) self.contextid = getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA) logger.debug("self.contextid=%s" % self.contextid) self.output_loc_offset = str(self.contextid) ''' self.output_loc_offset = str(self.contextid) logger.debug("suffix=%s" % self.output_loc_offset) try: #fixme, hrmc should be variable..so configure can be used in any connector off = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA) self.output_loc_offset = os.path.join(off, 'hrmc' + self.output_loc_offset) except SettingNotFoundException: pass ''' self.output_loc_offset = self.get_results_dirname(run_settings) logger.debug('self.output_loc_offset=%s' % self.output_loc_offset) if self.input_exists(run_settings): self.copy_to_scratch_space(run_settings, local_settings) ''' run_settings['http://rmit.edu.au/schemas/platform/storage/output']['offset'] = self.output_loc_offset offset = run_settings['http://rmit.edu.au/schemas/platform/storage/output']['offset'] self.job_dir = manage.get_job_dir(output_storage_settings, offset) iter_inputdir = os.path.join(self.job_dir, "input_0") logger.debug("iter_inputdir=%s" % iter_inputdir) #todo: input location will evenatually be replaced by the scratch space that was used by the sweep #todo: the sweep will indicate the location of the scratch space in the run_settings #todo: add scheme (ssh) to inputlocation source_url = get_url_with_credentials(local_settings, input_location) logger.debug("source_url=%s" % source_url) destination_url = get_url_with_credentials( output_storage_settings, '%s://%s@%s' % (output_storage_settings['scheme'], output_storage_settings['type'], iter_inputdir), is_relative_path=False) logger.debug("destination_url=%s" % destination_url) storage.copy_directories(source_url, destination_url) ''' output_location = self.output_loc_offset # run_settings[RMIT_SCHEMA + '/input/system'][u'output_location'] try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA)) except KeyError: self.experiment_id = 0 except ValueError: self.experiment_id = 0 try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % RMIT_SCHEMA) except SettingNotFoundException: curate_data = False if curate_data: self.experiment_id = self.curate_data(run_settings, output_location, self.experiment_id) '''
try: ssh = open_connection( ip_address=settings['host'], settings=settings) (command_out, stderr) = compute.run_make(ssh, (os.path.join( query_settings['root_path'], mypath)), 'startrun') except Exception, e: logger.error(e) raise finally: if ssh: ssh.close() self.program_success = int(not stderr) logger.debug("program_success =%s" % self.program_success) messages.info(run_settings, "1: execute started") def output(self, run_settings): # TODO: should only set runnning if program_success is true? setvals(run_settings, { '%s/stages/make/program_success' % RMIT_SCHEMA: self.program_success, '%s/stages/make/running' % RMIT_SCHEMA: 1 }) # run_settings.setdefault( # 'http://rmit.edu.au/schemas/stages/make', # {})[u'program_success'] = self.program_success # run_settings.setdefault( # 'http://rmit.edu.au/schemas/stages/make', # {})[u'running'] = 1