class PluginInstanceManager(object): def __init__(self, plugin_instance): self.c_plugin_inst = plugin_instance self.l_plugin_inst_param_instances = self.c_plugin_inst.get_parameter_instances() self.str_job_id = ChrisInstance.load().job_id_prefix + str(plugin_instance.id) self.pfcon_client = pfcon.Client(plugin_instance.compute_resource.compute_url) self.swift_manager = SwiftManager(settings.SWIFT_CONTAINER_NAME, settings.SWIFT_CONNECTION_PARAMS) def run_plugin_instance_app(self): """ Run the plugin instance's app via a call to a remote pfcon service. """ if self.c_plugin_inst.status == 'cancelled': return plugin = self.c_plugin_inst.plugin plugin_type = plugin.meta.type inputdirs = [] try: if plugin_type == 'ds': inputdirs.append(self.get_previous_output_path()) else: inputdirs.append(self.manage_plugin_instance_app_empty_inputdir()) except Exception: self.c_plugin_inst.status = 'cancelled' # giving up self.save_plugin_instance_final_status() return d_unextpath_params, d_path_params = self.get_plugin_instance_path_parameters() for path_param_value in [param_value for param_value in d_path_params.values()]: # the value of each parameter of type 'path' is a string # representing a comma-separated list of paths in obj storage inputdirs = inputdirs + path_param_value.split(',') # create data file to transmit try: zip_file = self.create_zip_file(inputdirs) except Exception: self.c_plugin_inst.status = 'cancelled' # giving up self.save_plugin_instance_final_status() return # create job description dictionary cmd_args = self.get_plugin_instance_app_cmd_args() cmd_path_flags = list(d_unextpath_params.keys()) + list(d_path_params.keys()) job_descriptors = { 'cmd_args': ' '.join(cmd_args), 'cmd_path_flags': ','.join(cmd_path_flags), 'auid': self.c_plugin_inst.owner.username, 'number_of_workers': str(self.c_plugin_inst.number_of_workers), 'cpu_limit': str(self.c_plugin_inst.cpu_limit), 'memory_limit': str(self.c_plugin_inst.memory_limit), 'gpu_limit': str(self.c_plugin_inst.gpu_limit), 'image': plugin.dock_image, 'selfexec': plugin.selfexec, 'selfpath': plugin.selfpath, 'execshell': plugin.execshell, 'type': plugin_type } pfcon_url = self.pfcon_client.url job_id = self.str_job_id logger.info(f'Submitting job {job_id} to pfcon url -->{pfcon_url}<--, ' f'description: {json.dumps(job_descriptors, indent=4)}') try: d_resp = self.pfcon_client.submit_job(job_id, job_descriptors, zip_file.getvalue(), timeout=9000) except PfconRequestException as e: logger.error(f'[CODE01,{job_id}]: Error submitting job to pfcon url ' f'-->{pfcon_url}<--, detail: {str(e)}') self.c_plugin_inst.error_code = 'CODE01' self.c_plugin_inst.status = 'cancelled' # giving up self.save_plugin_instance_final_status() else: logger.info(f'Successfully submitted job {job_id} to pfcon url ' f'-->{pfcon_url}<--, response: {json.dumps(d_resp, indent=4)}') # update the job status and summary self.c_plugin_inst.status = 'started' self.c_plugin_inst.summary = self.get_job_status_summary() # initial status self.c_plugin_inst.raw = json_zip2str(d_resp) self.c_plugin_inst.save() def check_plugin_instance_app_exec_status(self): """ Check a plugin instance's app execution status. It connects to the remote pfcon service to determine job status and if finished without error then downloads and unpacks job's zip file and registers output files with the DB. Finally it sends a request to delete the job's data from the remote environment. """ if self.c_plugin_inst.status == 'started': pfcon_url = self.pfcon_client.url job_id = self.str_job_id logger.info(f'Sending job status request to pfcon url -->{pfcon_url}<-- for ' f'job {job_id}') try: d_resp = self.pfcon_client.get_job_status(job_id, timeout=200) except PfconRequestException as e: logger.error(f'[CODE02,{job_id}]: Error getting job status at pfcon ' f'url -->{pfcon_url}<--, detail: {str(e)}') return self.c_plugin_inst.status # return, CUBE will retry later logger.info(f'Successful job status response from pfcon url -->{pfcon_url}<--' f' for job {job_id}: {json.dumps(d_resp, indent=4)}') status = d_resp['compute']['status'] logger.info(f'Current job {job_id} remote status = {status}') logger.info(f'Current job {job_id} DB status = {self.c_plugin_inst.status}') summary = self.get_job_status_summary(d_resp) self.c_plugin_inst.summary = summary raw = json_zip2str(d_resp) self.c_plugin_inst.raw = raw # only update (atomically) if status='started' to avoid concurrency problems PluginInstance.objects.filter( id=self.c_plugin_inst.id, status='started').update(summary=summary, raw=raw) if status in ('finishedSuccessfully', 'finishedWithError', 'undefined'): if status == 'finishedSuccessfully': self._handle_finished_successfully_status() elif status == 'finishedWithError': self._handle_finished_with_error_status() else: self._handle_undefined_status() logger.info(f'Deleting job {job_id} data from pfcon at url ' f'-->{pfcon_url}<--') try: self.pfcon_client.delete_job_data(job_id, timeout=500) except PfconRequestException as e: logger.error(f'[CODE12,{job_id}]: Error deleting job data from ' f'pfcon at url -->{pfcon_url}<--, detail: {str(e)}') else: logger.info(f'Successfully deleted job {job_id} data from pfcon at ' f'url -->{pfcon_url}<--') return self.c_plugin_inst.status def cancel_plugin_instance_app_exec(self): """ Cancel a plugin instance's app execution. It connects to the remote service to cancel job. """ pass def get_previous_output_path(self): """ Get the previous plugin instance output directory. Make sure to deal with the eventual consistency. """ job_id = self.str_job_id previous = self.c_plugin_inst.previous output_path = previous.get_output_path() fnames = [f.fname.name for f in previous.files.all()] for i in range(20): # loop to deal with eventual consistency try: l_ls = self.swift_manager.ls(output_path) except ClientException as e: logger.error(f'[CODE06,{job_id}]: Error while listing swift ' f'storage files in {output_path}, detail: {str(e)}') else: if all(obj in l_ls for obj in fnames): return output_path time.sleep(3) logger.error(f'[CODE11,{job_id}]: Error while listing swift storage files in ' f'{output_path}, detail: Presumable eventual consistency problem') self.c_plugin_inst.error_code = 'CODE11' raise NameError('Presumable eventual consistency problem.') def get_plugin_instance_app_cmd_args(self): """ Get the list of the plugin instance app's cmd arguments. """ # append flags to save input meta data (passed options) and # output meta data (output description) app_args = ['--saveinputmeta', '--saveoutputmeta'] # append the parameters to app's argument list for param_inst in self.l_plugin_inst_param_instances: param = param_inst.plugin_param value = param_inst.value if param.action == 'store': app_args.append(param.flag) if param.type == 'string' and not value: value = "''" # handle empty string as a valid value for a flag app_args.append(str(value)) # convert all argument values to string elif param.action == 'store_true' and value: app_args.append(param.flag) elif param.action == 'store_false' and not value: app_args.append(param.flag) return app_args def get_plugin_instance_path_parameters(self): """ Get the unextpath and path parameters dictionaries in a tuple. The keys and values in these dictionaries are parameters' flag and value respectively. """ path_parameters_dict = {} unextpath_parameters_dict = {} for param_inst in self.l_plugin_inst_param_instances: param = param_inst.plugin_param value = param_inst.value if param.type == 'unextpath': unextpath_parameters_dict[param.flag] = value if param.type == 'path': path_parameters_dict[param.flag] = value return unextpath_parameters_dict, path_parameters_dict def get_ts_plugin_instance_input_objs(self): """ Get a tuple whose first element is a dictionary with keys that are the ids of each input plugin instance to this 'ts' plugin instance. The values of this dictionary are also dictionaries containing the output dir of the plugin instances and the list of all the objects under the output dir that match a regular expression. The second element of the tuple indicates the value of the 'groupByInstance' flag for this 'ts' plugin instance. """ job_id = self.str_job_id # extract the 'ts' plugin's special parameters from the DB plg_inst_ids = regexs = [] group_by_instance = False if self.c_plugin_inst.plugin.meta.type == 'ts': for param_inst in self.l_plugin_inst_param_instances: if param_inst.plugin_param.name == 'plugininstances': # string param that represents a comma-separated list of ids plg_inst_ids = param_inst.value.split(',') if param_inst.value else [] elif param_inst.plugin_param.name == 'filter': # string param that represents a comma-separated list of regular expr regexs = param_inst.value.split(',') if param_inst.value else [] elif param_inst.plugin_param.name == 'groupByInstance': group_by_instance = param_inst.value d_objs = {} for i, inst_id in enumerate(plg_inst_ids): try: plg_inst = PluginInstance.objects.get(pk=int(inst_id)) except PluginInstance.DoesNotExist: logger.error(f"[CODE05,{job_id}]: Couldn't find any plugin instance with " f"id {inst_id} while processing input instances to 'ts' " f"plugin instance with id {self.c_plugin_inst.id}") self.c_plugin_inst.error_code = 'CODE05' raise output_path = plg_inst.get_output_path() try: l_ls = self.swift_manager.ls(output_path) except ClientException as e: logger.error(f'[CODE06,{job_id}]: Error while listing swift ' f'storage files in {output_path}, detail: {str(e)}') self.c_plugin_inst.error_code = 'CODE06' raise if (i < len(regexs)) and regexs[i]: r = re.compile(regexs[i]) d_objs[plg_inst.id] = {'output_path': output_path, 'objs': [obj for obj in l_ls if r.search(obj)]} else: d_objs[plg_inst.id] = {'output_path': output_path, 'objs': l_ls} return d_objs, group_by_instance def manage_plugin_instance_app_empty_inputdir(self): """ This method is responsible for managing the 'inputdir' in the case of FS and TS plugins. FS and TS plugins do not have an inputdir spec, since this is only a requirement for DS plugins. Nonetheless, the remote services do require some non-zero inputdir spec in order to operate correctly. The hack here is to store data somewhere in swift and accessing it as a "pseudo" inputdir for FS and TS plugins. We create a "dummy" inputdir with a small dummy text file in swift storage. This is then transmitted as an 'inputdir' to the compute environment and can be completely ignored by the plugin. """ job_id = self.str_job_id data_dir = os.path.join(os.path.expanduser("~"), 'data') str_inputdir = os.path.join(data_dir, 'squashEmptyDir').lstrip('/') str_squashFile = os.path.join(str_inputdir, 'squashEmptyDir.txt') str_squashMsg = 'Empty input dir.' try: if not self.swift_manager.obj_exists(str_squashFile): with io.StringIO(str_squashMsg) as f: self.swift_manager.upload_obj(str_squashFile, f.read(), content_type='text/plain') except ClientException as e: logger.error(f'[CODE07,{job_id}]: Error while uploading file ' f'{str_squashFile} to swift storage, detail: {str(e)}') self.c_plugin_inst.error_code = 'CODE07' raise return str_inputdir def create_zip_file(self, swift_paths): """ Create job zip file ready for transmission to the remote from a list of swift storage paths (prefixes). """ job_id = self.str_job_id memory_zip_file = io.BytesIO() with zipfile.ZipFile(memory_zip_file, 'w', zipfile.ZIP_DEFLATED) as job_data_zip: for swift_path in swift_paths: try: l_ls = self.swift_manager.ls(swift_path) except ClientException as e: logger.error(f'[CODE06,{job_id}]: Error while listing swift ' f'storage files in {swift_path}, detail: {str(e)}') self.c_plugin_inst.error_code = 'CODE06' raise for obj_path in l_ls: try: contents = self.swift_manager.download_obj(obj_path) except ClientException as e: logger.error(f'[CODE08,{job_id}]: Error while downloading file ' f'{obj_path} from swift storage, detail: {str(e)}') self.c_plugin_inst.error_code = 'CODE08' raise zip_path = obj_path.replace(swift_path, '', 1).lstrip('/') job_data_zip.writestr(zip_path, contents) memory_zip_file.seek(0) return memory_zip_file def unpack_zip_file(self, zip_file_content): """ Unpack job zip file from the remote into swift storage and register the extracted files with the DB. """ job_id = self.str_job_id swift_filenames = [] try: memory_zip_file = io.BytesIO(zip_file_content) with zipfile.ZipFile(memory_zip_file, 'r', zipfile.ZIP_DEFLATED) as job_zip: filenames = job_zip.namelist() logger.info(f'{len(filenames)} files to decompress for job {job_id}') output_path = self.c_plugin_inst.get_output_path() + '/' for fname in filenames: content = job_zip.read(fname) swift_fname = output_path + fname.lstrip('/') try: self.swift_manager.upload_obj(swift_fname, content) except ClientException as e: logger.error(f'[CODE07,{job_id}]: Error while uploading file ' f'{swift_fname} to swift storage, detail: {str(e)}') self.c_plugin_inst.error_code = 'CODE07' raise swift_filenames.append(swift_fname) except ClientException: raise except Exception as e: logger.error(f'[CODE04,{job_id}]: Received bad zip file from remote, ' f'detail: {str(e)}') self.c_plugin_inst.error_code = 'CODE04' raise self._register_output_files(swift_filenames) def save_plugin_instance_final_status(self): """ Save to the DB and log the final status of the plugin instance. """ job_id = self.str_job_id logger.info(f"Saving job {job_id} DB status as '{self.c_plugin_inst.status}'") self.c_plugin_inst.end_date = timezone.now() logger.info(f"Saving job {job_id} DB end_date as '{self.c_plugin_inst.end_date}'") self.c_plugin_inst.save() def _handle_app_unextpath_parameters(self, unextpath_parameters_dict): """ Internal method to handle parameters of type 'unextpath' passed to the plugin instance app. """ job_id = self.str_job_id outputdir = self.c_plugin_inst.get_output_path() obj_output_path_list = [] for param_flag in unextpath_parameters_dict: # each parameter value is a string of one or more paths separated by comma path_list = unextpath_parameters_dict[param_flag].split(',') for path in path_list: try: obj_list = self.swift_manager.ls(path) except ClientException as e: logger.error(f'[CODE06,{job_id}]: Error while listing swift ' f'storage files in {path}, detail: {str(e)}') self.c_plugin_inst.error_code = 'CODE06' raise for obj in obj_list: obj_output_path = obj.replace(path.rstrip('/'), outputdir, 1) if not obj_output_path.startswith(outputdir + '/'): obj_output_path = outputdir + '/' + obj.split('/')[-1] try: if not self.swift_manager.obj_exists(obj_output_path): self.swift_manager.copy_obj(obj, obj_output_path) except ClientException as e: logger.error(f'[CODE09,{job_id}]: Error while copying file ' f'from {obj} to {obj_output_path} in swift storage, ' f'detail: {str(e)}') self.c_plugin_inst.error_code = 'CODE09' raise else: obj_output_path_list.append(obj_output_path) logger.info('Registering output files not extracted from swift with job %s', self.str_job_id) self._register_output_files(obj_output_path_list) def _handle_app_ts_unextracted_input_objs(self, d_ts_input_objs, group_by_instance): """ Internal method to handle a 'ts' plugin's input instances' filtered objects (which are not extracted from object storage). """ job_id = self.str_job_id outputdir = self.c_plugin_inst.get_output_path() obj_output_path_list = [] for plg_inst_id in d_ts_input_objs: plg_inst_output_path = d_ts_input_objs[plg_inst_id]['output_path'] obj_list = d_ts_input_objs[plg_inst_id]['objs'] plg_inst_outputdir = outputdir if group_by_instance: plg_inst_outputdir = os.path.join(outputdir, str(plg_inst_id)) for obj in obj_list: obj_output_path = obj.replace(plg_inst_output_path, plg_inst_outputdir, 1) try: if not self.swift_manager.obj_exists(obj_output_path): self.swift_manager.copy_obj(obj, obj_output_path) except ClientException as e: logger.error(f'[CODE09,{job_id}]: Error while copying file ' f'from {obj} to {obj_output_path} in swift storage, ' f'detail: {str(e)}') self.c_plugin_inst.error_code = 'CODE09' raise obj_output_path_list.append(obj_output_path) logger.info("Registering 'ts' plugin's output files not extracted from swift with" " job %s", self.str_job_id) self._register_output_files(obj_output_path_list) def _handle_finished_successfully_status(self): """ Internal method to handle the 'finishedSuccessfully' status returned by the remote compute. """ plg_inst_lock = PluginInstanceLock(plugin_inst=self.c_plugin_inst) try: plg_inst_lock.save() except IntegrityError: pass # another async task has already entered here else: # only one concurrent async task should get here pfcon_url = self.pfcon_client.url job_id = self.str_job_id logger.info(f'Sending zip file request to pfcon url -->{pfcon_url}<-- ' f'for job {job_id}') try: zip_content = self.pfcon_client.get_job_zip_data(job_id, timeout=9000) except PfconRequestException as e: logger.error(f'[CODE03,{job_id}]: Error fetching zip from pfcon url ' f'-->{pfcon_url}<--, detail: {str(e)}') self.c_plugin_inst.error_code = 'CODE03' self.c_plugin_inst.status = 'cancelled' # giving up else: # data successfully downloaded so update summary d_jobStatusSummary = json.loads(self.c_plugin_inst.summary) d_jobStatusSummary['pullPath']['status'] = True self.c_plugin_inst.summary = json.dumps(d_jobStatusSummary) logger.info('Registering output files from remote with job %s', job_id) self.c_plugin_inst.status = 'registeringFiles' self.c_plugin_inst.save() # inform FE about status change try: self.unpack_zip_file(zip_content) # register files from remote # register files from unextracted path parameters d_unextpath_params, _ = self.get_plugin_instance_path_parameters() if d_unextpath_params: self._handle_app_unextpath_parameters(d_unextpath_params) # register files from filtered input instance paths ('ts' plugins) if self.c_plugin_inst.plugin.meta.type == 'ts': d_ts_input_objs, tf = self.get_ts_plugin_instance_input_objs() self._handle_app_ts_unextracted_input_objs(d_ts_input_objs, tf) except Exception: self.c_plugin_inst.status = 'cancelled' # giving up else: self.c_plugin_inst.status = 'finishedSuccessfully' self.save_plugin_instance_final_status() def _handle_finished_with_error_status(self): """ Internal method to handle the 'finishedWithError' status returned by the remote compute. """ self.c_plugin_inst.status = 'finishedWithError' self.save_plugin_instance_final_status() def _handle_undefined_status(self): """ Internal method to handle the 'undefined' status returned by the remote compute. """ job_id = self.str_job_id logger.error(f'[CODE10,{job_id}]: Got undefined status from remote') self.c_plugin_inst.error_code = 'CODE10' self.c_plugin_inst.status = 'cancelled' self.save_plugin_instance_final_status() def _register_output_files(self, filenames): """ Internal method to register files generated by the plugin instance object with the REST API. The 'filenames' arg is a list of obj names in object storage. """ job_id = self.str_job_id for obj_name in filenames: logger.info(f'Registering file -->{obj_name}<-- for job {job_id}') plg_inst_file = PluginInstanceFile(plugin_inst=self.c_plugin_inst) plg_inst_file.fname.name = obj_name try: plg_inst_file.save() except IntegrityError: # avoid re-register a file already registered logger.info(f'File -->{obj_name}<-- already registered for job {job_id}') @staticmethod def get_job_status_summary(d_response=None): """ Get a job status summary JSON string from pfcon response. """ # Still WIP about what is best summary... d_jobStatusSummary = { 'pushPath': { 'status': True }, 'pullPath': { 'status': False }, 'compute': { 'submit': { 'status': True }, 'return': { 'status': False, 'job_status': '', 'job_logs': '' } }, } if d_response is not None: d_c = d_response['compute'] if d_c['status'] in ('undefined', 'finishedSuccessfully', 'finishedWithError'): d_jobStatusSummary['compute']['return']['status'] = True d_jobStatusSummary['compute']['return']['job_status'] = d_c['status'] logs = d_jobStatusSummary['compute']['return']['job_logs'] = d_c['logs'] if len(logs) > 3000: d_jobStatusSummary['compute']['return']['job_logs'] = logs[-3000:] return json.dumps(d_jobStatusSummary)
class PluginInstanceManager(object): def __init__(self, plugin_instance): self.c_plugin_inst = plugin_instance # hardcode mounting points for the input and outputdir in the app's container! self.str_app_container_inputdir = '/share/incoming' self.str_app_container_outputdir = '/share/outgoing' # some schedulers require a minimum job ID string length self.str_job_id = 'chris-jid-' + str(plugin_instance.id) # local data dir to store zip files before transmitting to the remote self.data_dir = os.path.join(os.path.expanduser("~"), 'data') self.swift_manager = SwiftManager(settings.SWIFT_CONTAINER_NAME, settings.SWIFT_CONNECTION_PARAMS) def run_plugin_instance_app(self): """ Run a plugin instance's app via a call to a remote service provider. """ if self.c_plugin_inst.status == 'cancelled': return plugin = self.c_plugin_inst.plugin app_args = [] # append app's container input dir to app's argument list (only for ds plugins) if plugin.meta.type == 'ds': app_args.append(self.str_app_container_inputdir) # append app's container output dir to app's argument list app_args.append(self.str_app_container_outputdir) # append flag to save input meta data (passed options) app_args.append("--saveinputmeta") # append flag to save output meta data (output description) app_args.append("--saveoutputmeta") # append the parameters to app's argument list and identify # parameters of type 'unextpath' and 'path' path_parameters_dict = {} unextpath_parameters_dict = {} param_instances = self.c_plugin_inst.get_parameter_instances() for param_inst in param_instances: param = param_inst.plugin_param value = param_inst.value if param.action == 'store': app_args.append(param.flag) if param.type == 'unextpath': unextpath_parameters_dict[param.name] = value value = self.str_app_container_inputdir if param.type == 'path': path_parameters_dict[param.name] = value value = self.str_app_container_inputdir app_args.append(value) if param.action == 'store_true' and value: app_args.append(param.flag) if param.action == 'store_false' and not value: app_args.append(param.flag) str_outputdir = self.c_plugin_inst.get_output_path() # handle parameters of type 'unextpath' self.handle_app_unextpath_parameters(unextpath_parameters_dict) if self.c_plugin_inst.previous: # WARNING: 'ds' plugins can also have 'path' parameters! str_inputdir = self.c_plugin_inst.previous.get_output_path() elif len(path_parameters_dict): # WARNING: Inputdir assumed to only be one of the 'path' parameters! path_list = next(iter(path_parameters_dict.values())).split(',') str_inputdir = path_list[0].strip('/') else: # No parameter of type 'path' was submitted, input dir is empty str_inputdir = self.manage_app_service_fsplugin_empty_inputdir() str_exec = os.path.join(plugin.selfpath, plugin.selfexec) l_appArgs = [str(s) for s in app_args] # convert all arguments to string str_allCmdLineArgs = ' '.join(l_appArgs) str_cmd = '%s %s' % (str_exec, str_allCmdLineArgs) logger.info('cmd = %s', str_cmd) # logger.debug('d_pluginInst = %s', vars(self.c_plugin_inst)) str_IOPhost = self.c_plugin_inst.compute_resource.name d_msg = { "action": "coordinate", "threadAction": True, "meta-store": { "meta": "meta-compute", "key": "jid" }, "meta-data": { "remote": { "key": "%meta-store" }, "localSource": { "path": str_inputdir, "storageType": "swift" }, "localTarget": { "path": str_outputdir, "createDir": True }, "specialHandling": { "op": "plugin", "cleanup": True }, "transport": { "mechanism": "compress", "compress": { "archive": "zip", "unpack": True, "cleanup": True } }, "service": str_IOPhost }, "meta-compute": { 'cmd': "%s %s" % (plugin.execshell, str_cmd), 'threaded': True, 'auid': self.c_plugin_inst.owner.username, 'jid': self.str_job_id, 'number_of_workers': str(self.c_plugin_inst.number_of_workers), 'cpu_limit': str(self.c_plugin_inst.cpu_limit), 'memory_limit': str(self.c_plugin_inst.memory_limit), 'gpu_limit': str(self.c_plugin_inst.gpu_limit), "container": { "target": { "image": plugin.dock_image, "cmdParse": False, "selfexec": plugin.selfexec, "selfpath": plugin.selfpath, "execshell": plugin.execshell }, "manager": { "image": "fnndsc/swarm", "app": "swarm.py", "env": { "meta-store": "key", "serviceType": "docker", "shareDir": "%shareDir", "serviceName": self.str_job_id } } }, "service": str_IOPhost } } self.call_app_service(d_msg) self.c_plugin_inst.status = 'started' self.c_plugin_inst.save() def handle_app_unextpath_parameters(self, unextpath_parameters_dict): """ Handle parameters of type 'unextpath' passed to the plugin instance app. """ outputdir = self.c_plugin_inst.get_output_path() nobjects = 0 for param_name in unextpath_parameters_dict: # each parameter value is a string of one or more paths separated by comma path_list = unextpath_parameters_dict[param_name].split(',') for path in path_list: obj_list = [] try: obj_list = self.swift_manager.ls(path) except ClientException as e: logger.error('Swift storage error, detail: %s' % str(e)) for obj in obj_list: obj_output_path = obj.replace(path.rstrip('/'), outputdir, 1) if not obj_output_path.startswith(outputdir + '/'): obj_output_path = outputdir + '/' + obj.split('/')[-1] try: self.swift_manager.copy_obj(obj, obj_output_path) except ClientException as e: logger.error('Swift storage error, detail: %s' % str(e)) else: nobjects += 1 swiftState = {'d_swiftstore': {'filesPushed': nobjects}} self.c_plugin_inst.register_output_files(swiftState=swiftState) def check_plugin_instance_app_exec_status(self): """ Check a plugin instance's app execution status. It connects to the remote service to determine job status and if just finished without error, register output files. """ if self.c_plugin_inst.status == 'cancelled': return self.c_plugin_inst.status d_msg = { "action": "status", "meta": { "remote": { "key": self.str_job_id } } } d_response = self.call_app_service(d_msg) l_status = d_response['jobOperationSummary']['compute']['return'][ 'l_status'] logger.info('Current job remote status = %s', l_status) str_DBstatus = self.c_plugin_inst.status logger.info('Current job DB status = %s', str_DBstatus) str_responseStatus = self.serialize_app_response_status(d_response) if str_DBstatus == 'started' and 'swiftPut:True' in str_responseStatus: # register output files d_swiftState = d_response['jobOperation']['info']['swiftPut'] self.c_plugin_inst.register_output_files(swiftState=d_swiftState) if 'finishedSuccessfully' in l_status: self.c_plugin_inst.status = 'finishedSuccessfully' logger.info("Saving job DB status as '%s'", self.c_plugin_inst.status) self.c_plugin_inst.end_date = timezone.now() logger.info("Saving job DB end_date as '%s'", self.c_plugin_inst.end_date) self.c_plugin_inst.save() # Some possible error handling... if 'finishedWithError' in l_status: self.c_plugin_inst.status = 'finishedWithError' logger.info("Saving job DB status as '%s'", self.c_plugin_inst.status) self.c_plugin_inst.end_date = timezone.now() logger.info("Saving job DB end_date as '%s'", self.c_plugin_inst.end_date) self.c_plugin_inst.save() self.handle_app_remote_error() return self.c_plugin_inst.status def cancel_plugin_instance_app_exec(self): """ Cancel a plugin instance's app execution. It connects to the remote service to cancel job. """ pass def call_app_service(self, d_msg): """ This method sends the JSON 'msg' argument to the remote service. """ remote_url = self.c_plugin_inst.compute_resource.compute_url serviceCall = pfurl.Pfurl( msg=json.dumps(d_msg), http=remote_url, verb='POST', # contentType = 'application/json', b_raw=True, b_quiet=True, b_httpResponseBodyParse=True, jsonwrapper='payload', ) logger.info('comms sent to pfcon service at -->%s<--', remote_url) logger.info('message sent: %s', json.dumps(d_msg, indent=4)) # speak to the service... d_response = json.loads(serviceCall()) if isinstance(d_response, dict): logger.info( 'looks like we got a successful response from pfcon service') logger.info('response from pfurl(): %s', json.dumps(d_response, indent=4)) else: logger.info( 'looks like we got an UNSUCCESSFUL response from pfcon service' ) logger.info('response from pfurl(): -->%s<--', d_response) if "Connection refused" in d_response: logging.error('fatal error in talking to pfcon service') return d_response def manage_app_service_fsplugin_empty_inputdir(self): """ This method is responsible for managing the 'inputdir' in the case of FS plugins. An FS plugin does not have an inputdir spec, since this is only a requirement for DS plugins. Nonetheless, the underlying management system (pfcon/pfurl) does require some non-zero inputdir spec in order to operate correctly. The hack here is to store data somewhere in swift and accessing it as a "pseudo" inputdir for FS plugins. For example, if an FS plugin has no arguments of type 'path', then we create a "dummy" inputdir with a small dummy text file in swift storage. This is then transmitted as an 'inputdir' to the compute environment, and can be completely ignored by the plugin. Importantly, one major exception to the normal FS processing scheme exists: an FS plugin that collects data from object storage. This storage location is not an 'inputdir' in the traditional sense, and is thus specified in the FS plugin argument list as argument of type 'path' (i.e. there is no positional argument for inputdir as in DS plugins. Thus, if a type 'path' argument is specified, this 'path' is assumed to denote a location in object storage. """ str_inputdir = os.path.join(self.data_dir, 'squashEmptyDir').lstrip('/') str_squashFile = os.path.join(str_inputdir, 'squashEmptyDir.txt') str_squashMsg = 'Empty input dir.' try: if not self.swift_manager.obj_exists(str_squashFile): with io.StringIO(str_squashMsg) as f: self.swift_manager.upload_obj(str_squashFile, f.read(), content_type='text/plain') except ClientException as e: logger.error('Swift storage error, detail: %s' % str(e)) return str_inputdir def serialize_app_response_status(self, d_response): """ Serialize and save the 'jobOperation' and 'jobOperationSummary'. """ str_summary = json.dumps(d_response['jobOperationSummary']) #logger.debug("str_summary = '%s'", str_summary) str_raw = self.json_zipToStr(d_response['jobOperation']) # Still WIP about what is best summary... # a couple of options / ideas linger try: str_containerLogs = d_response['jobOperation'] \ ['info'] \ ['compute'] \ ['return'] \ ['d_ret'] \ ['l_logs'][0] except: str_containerLogs = "Container logs not currently available." # update plugin instance with status info self.c_plugin_inst.summary = str_summary self.c_plugin_inst.raw = str_raw self.c_plugin_inst.save() str_responseStatus = "" for str_action in ['pushPath', 'compute', 'pullPath', 'swiftPut']: if str_action == 'compute': for str_part in ['submit', 'return']: str_actionStatus = str(d_response['jobOperationSummary'] \ [str_action] \ [str_part] \ ['status']) str_actionStatus = ''.join(str_actionStatus.split()) str_responseStatus += str_action + '.' + str_part + ':' + \ str_actionStatus + ';' else: str_actionStatus = str(d_response['jobOperationSummary'] \ [str_action] \ ['status']) str_actionStatus = ''.join(str_actionStatus.split()) str_responseStatus += str_action + ':' + str_actionStatus + ';' return str_responseStatus def handle_app_remote_error(self): """ Collect the 'stderr' from the remote app. """ str_deepVal = '' def str_deepnest(d): nonlocal str_deepVal if d: for k, v in d.items(): if isinstance(v, dict): str_deepnest(v) else: str_deepVal = '%s' % ("{0} : {1}".format(k, v)) # Collect the 'stderr' from the app service for this instance d_msg = { "action": "search", "meta": { "key": "jid", "value": self.str_job_id, "job": "0", "when": "end", "field": "stderr" } } d_response = self.call_app_service(d_msg) str_deepnest(d_response) logger.error('deepVal = %s', str_deepVal) d_msg['meta']['field'] = 'returncode' d_response = self.call_app_service(d_msg) str_deepnest(d_response) logger.error('deepVal = %s', str_deepVal) def create_zip_file(self, swift_paths): """ Create job zip file ready for transmission to the remote from a list of swift storage paths (prefixes). """ if not os.path.exists(self.data_dir): try: os.makedirs(self.data_dir) # create data dir except OSError as e: msg = 'Creation of dir %s failed, detail: %s' % (self.data_dir, str(e)) logger.error(msg) zipfile_path = os.path.join(self.data_dir, self.str_job_id + '.zip') with zipfile.ZipFile(zipfile_path, 'w', zipfile.ZIP_DEFLATED) as job_data_zip: for swift_path in swift_paths: l_ls = [] try: l_ls = self.swift_manager.ls(swift_path) except ClientException as e: msg = 'Listing of swift storage files in %s failed, detail: %s' % ( swift_path, str(e)) logger.error(msg) for obj_path in l_ls: try: contents = self.swift_manager.download_obj(obj_path) except ClientException as e: msg = 'Downloading of file %s from swift storage for %s job ' \ 'failed, detail: %s' % (obj_path, self.str_job_id, str(e)) logger.error(msg) job_data_zip.writestr(obj_path, contents) @staticmethod def json_zipToStr(json_data): """ Return a string of compressed JSON data, suitable for transmission back to a client. """ return base64.b64encode( zlib.compress( json.dumps(json_data).encode('utf-8'))).decode('ascii')