def main(): """Main function""" r = Reactor() r.logger.info("Hello this is actor {}".format(r.uid)) r = Reactor() #xnonce = r.add_nonce(permission='EXECUTE', maxuses=-1) #print("NONCE:") #print(xnonce) ag = r.client # Agave client context = r.context # Actor context pprint.pprint(context) m = context.message_dict status = None try: status = context.status #or m['status'] except Exception as e: try: status = m.get('status') except Exception as e: status = None r.logger.info("Triggered action for last action of {}".format(status)) if status == "FINISHED": resub(r, ag) r.on_success("successfully ran") # m = context.message_dict # lastAction = None # try: # lastAction = context.lastAction # except Exception as e: # try: # lastAction = m.get('lastAction') # except Exception as e: # lastAction = None # # r.logger.info("Triggered action for last action of {}".format(lastAction)) # for action in defaultTaskPath[lastAction]: # if action is not None: # action(r) r.on_success("successfully ran")
def main(): r = Reactor() m = AttrDict(r.context.message_dict) # Look up my own name actor_name = r.get_attr('name') # example: # 'bob' 'was unable to call' 'karen' (id: ABCDEX, exec: BCDEG) template = "{} {} {} (actor/exec {} {})" # override on_failure and on_success funcType = type(r.on_failure) r.on_failure = funcType(on_failure, r, Reactor) funcType = type(r.on_success) r.on_success = funcType(on_success, r, Reactor) r.logger.debug("message: {}".format(m)) # Use JSONschema-based message validator # - In theory, this obviates some get() boilerplate if not r.validate_message(m): r.on_failure( template.format(actor_name, 'got an invalid message', m, r.uid, r.execid), None) ag = r.client # Agave client # db = AgaveKeyValStore(ag) # AgaveDB client context = r.context # Actor context m = context.message_dict r.logger.debug("Message: {}".format(m)) agave_uri = m.get('uri') (agave_storage_sys, agave_abs_dir, agave_filename) =\ agaveutils.from_agave_uri(agave_uri) manifest_path = os.path.join('/', agave_abs_dir, agave_filename) r.logger.debug("fetching manifest {}".format(agave_uri)) try: mani_file = agaveutils.agave_download_file( agaveClient=r.client, agaveAbsolutePath=manifest_path, systemId=agave_storage_sys, localFilename='manifest.json') if mani_file is None: raise Exception("no error was detected but file appears empty") except Exception as e: r.on_failure( template.format(actor_name, 'failed to download', manifest_path, r.uid, r.execid), e) # Load manifest so we can read the plan and config # - Use AttrDict so we can use dot.notation r.logger.debug("loading manifest into a dict and getting values") manifest_dict = {} try: with open('manifest.json') as json_data: manifest_dict = AttrDict(json.load(json_data)) plan_uri = manifest_dict.plan instrument_config_uri = manifest_dict.instrument_configuration except Exception as e: r.on_failure( template.format(actor_name, 'was unable to properly parse the', 'manifest file', r.uid, r.execid), e) r.logger.debug("fetching plan {}".format(plan_uri)) plan_abs_path = None try: (plan_system, plan_dirpath, plan_filename) =\ agaveutils.from_agave_uri(plan_uri) plan_abs_path = os.path.join(plan_dirpath, plan_filename) plan_file = agaveutils.agave_download_file( agaveClient=r.client, agaveAbsolutePath=plan_abs_path, systemId=plan_system, localFilename='plan.json') except Exception as e: r.on_failure( template.format(actor_name, 'failed to download', plan_abs_path, r.uid, r.execid), e) r.logger.debug( "fetching instrument config {}".format(instrument_config_uri)) try: (ic_system, ic_dirpath, ic_filename) = \ agaveutils.from_agave_uri(instrument_config_uri) ic_abs_path = os.path.join(ic_dirpath, ic_filename) ic_file = agaveutils.agave_download_file( agaveClient=r.client, agaveAbsolutePath=ic_abs_path, systemId=ic_system, localFilename='cytometer_configuration.json') except Exception as e: r.on_failure( template.format(actor_name, 'failed to download', ic_abs_path, r.uid, r.execid), e) r.logger.debug( "loading dict from instrument config file {}".format(ic_file)) try: cytometer_configuration = json.load(open(ic_file, 'rb')) except Exception as e: r.on_failure( template.format(actor_name, 'could not load dict from JSON document', ic_file, r.uid, r.execid), e) r.logger.debug("loading tasbe_cytometer_configuration.channels") try: channels = cytometer_configuration['tasbe_cytometer_configuration'][ 'channels'] except Exception as e: r.on_failure( template.format( actor_name, 'was unable to load', 'tasbe_cytometer_configuration.channels from settings', r.uid, r.execid), e) r.logger.debug("loading dict from plan JSON file {}".format(plan_file)) try: plan = json.load(open(plan_file, 'rb')) except Exception as e: r.on_failure( template.format(actor_name, 'could not load dict from JSON document', plan_file, r.uid, r.execid), e) r.logger.debug("writing experimental data to local storage") experimental_data = extract_experimental_data(manifest_dict, plan) with open('experimental_data.json', 'wb') as outfile: json.dump(experimental_data, outfile, sort_keys=True, indent=4, separators=(',', ': ')) r.logger.debug("writing intermediary JSON files to local storage") try: with open('process_control_data.json', 'wb') as outfile: json.dump(build_process_control_data(plan, channels, experimental_data, instrument_config_uri, manifest_dict), outfile, sort_keys=True, indent=4, separators=(',', ': ')) with open('color_model_parameters.json', 'wb') as outfile: json.dump(build_color_model(channels), outfile, sort_keys=True, indent=4, separators=(',', ': ')) with open('analysis_parameters.json', 'wb') as outfile: json.dump(build_analysis_parameters(), outfile, sort_keys=True, indent=4, separators=(',', ': ')) except Exception as e: r.on_failure( template.format(actor_name, 'could not load write JSON file(s)', plan_file, r.uid, r.execid), e) # We will now upload the completed files to: # agave://data-sd2e-community/temp/flow_etl/REACTOR_NAME/PLAN_ID # - /temp/flow_etl/REACTOR_NAME is set by config.yml/destination.base_path # # Expectation: these files have been written to pwd() somwhere above datafiles = { 'analysisParameters': 'analysis_parameters.json', 'colorModelParameters': 'color_model_parameters.json', 'cytometerConfiguration': 'cytometer_configuration.json', 'experimentalData': 'experimental_data.json', 'processControl': 'process_control_data.json' } # Figure out the plan_id from plan_uri # - Get the JSON file plan_uri_file = os.path.basename(plan_uri) # - Get JSON filename root plan_id = os.path.splitext(plan_uri_file)[0] # Default upload destination set in config.yml # - may want to add override but not essential now dest_dir = os.path.join(r.settings.destination.base_path, plan_id) dest_sys = r.settings.destination.system_id r.logger.debug("ensuring destination {} exists".format( agaveutils.to_agave_uri(dest_sys, dest_dir))) try: agaveutils.agave_mkdir(r.client, plan_id, dest_sys, r.settings.destination.base_path) except Exception as e: r.on_failure( template.format(actor_name, 'could not access or create destination', dest_dir, r.uid, r.execid), e) job_def_inputs = {} for agaveparam, fname in datafiles.items(): r.logger.info("uploading {} to {}".format(fname, dest_dir)) fpath = os.path.join(PWD, fname) # rename the remote if it exists try: r.logger.debug("renaming remote {}".format(fname)) remote_abs_path = os.path.join(dest_dir, fname) new_name = os.path.basename(remote_abs_path) + \ '.' + str(int(datetime.datetime.now().strftime("%s")) * 1000) r.client.files.manage(systemId=dest_sys, body={ 'action': 'rename', 'path': new_name }, filePath=remote_abs_path) except Exception: r.logger.debug("{} does not exist or is inaccessible. ({})".format( remote_abs_path, 'ignoring error')) pass # upload the newly-generated file try: r.logger.debug("now uploading {}".format(fname)) agaveutils.agave_upload_file(r.client, dest_dir, dest_sys, fpath) except Exception as e: prefix = '{} failed to upload {}'.format(actor_name, fname) r.on_failure( template.format(prefix, 'to', dest_dir, r.uid, r.execid), e) # Entries in this dict are needed to submit the FCS-ETL job later job_def_inputs[agaveparam] = agaveutils.to_agave_uri( dest_sys, os.path.join(dest_dir, fname)) # Base inputPath off path of manifest # Cowboy coding - Take grandparent directory sans sanity checking! manifest_pathGrandparent = os.path.dirname(os.path.dirname(manifest_path)) # Build the inputData path from settings (instead of hard-coding vals) # # Our settings.job_params.data_subdir could be an array # should there be a need to pull in other top-level dirs. # In such a case inputPath would be constructed as a list # of agave URIs. This is challenging to process in the # job's runner script but possible and documented. inputDataPath = os.path.join(manifest_pathGrandparent, r.settings.job_params.data_subdir) job_def_inputs['inputData'] = agaveutils.to_agave_uri( agave_storage_sys, inputDataPath) # Submit a job request to the FCS-ETL app based on template + vars # # The job configuration is templated from settings.job_definition # name, inputs are empty. notifications are empty, too, # but we aren't implementing for the time being. Use the inputs # we built above from the uploaded list and path to the manifest # and synthesize a job name from app/actor/execution. # # By convention, slots we wish to template are left empty. Slots # we want to have a default value (that aren't defined by the app # itself) are included in the template, but can be over-ridden # programmatically with Python dict operations job_def = r.settings.job_definition app_record = r.settings.linked_reactors.get(AGAVE_APP_ALIAS, {}) # this allows the appId to be set in the job_definition, but overridden # by configuration provided in settings. job_def_orig_appId = job_def.get('appId', None) job_def.appId = app_record.get('id', job_def_orig_appId) # add dynamically-generated callback to log aggregator # sends gross amounts of JSON in each POST if r.settings.logs.get('token', None) is not None: proto = r.settings.get('logger', {}).get('proto', 'http') hostname = r.settings.get('logger', {}).get('host', 'localhost') port = str(r.settings.get('logger', {}).get('port', 8080)) client_key = r.settings.get('logger', {}).get('client_key', 'KEY') client_secret = r.settings.logs.get('token', 'SECRET') # read loggger path from default -> reactor settings -> app settings path = r.settings.get('logger', {}).get('path', '/logger') path = app_record.get('opts', {}).get('logger', {}).get('path', path) logger_uri = proto + '://' + client_key + ':' + client_secret + '@' +\ hostname + ':' + port + path + '/' + job_def.appId logger_callback = {'persistent': True, 'event': '*', 'url': logger_uri} nlist = list(job_def.notifications) nlist.append(logger_callback) ntuple = tuple(nlist) job_def.notifications = ntuple job_def.inputs = job_def_inputs job_def.name = "{}-{}".format(r.uid, r.execid) # set archivePath and archiveSystem based on manifest job_def.archiveSystem = agave_storage_sys job_def.archivePath = os.path.join(manifest_pathGrandparent, r.settings.job_params.output_subdir, job_def.appId, "{}-{}".format(r.uid, r.execid)) # Expected outcome: # # An experimental data collection 'ABCDEF' # has (at present) directories of measurements and one or more # manifests (allowing for versioning). ETL apps can deposit results # under ABCDEF/processed/appid/<unique-directory-name>. r.logger.info('submitting FSC-ETL agave compute job') job_id = 'mockup' try: job_id = r.client.jobs.submit(body=job_def)['id'] r.logger.info("compute job id is {}".format(job_id)) except Exception as e: # Use a print here so we can more easily snag the job def # TODO - come back and take this out if we ever add a nonce to # the callback notifications because that should not # show up in the logs. One alternative would be to # register a plaintext log formatter with redaction # support, but that requires extending our logger module print(json.dumps(job_def, indent=4)) r.on_failure( template.format(actor_name, 'failed when submitting an agave compute job for', job_def.appId, r.uid, r.execid), e) # Make a nice human-readable success message for the Slack log suffix = '{} and will deposit outputs in {}'.format( job_id, job_def.archivePath) r.on_success( template.format(actor_name, 'submitted job', suffix, r.uid, r.execid))