def main(): r = Reactor() m = r.context.get('message_dict') # shortcut to message_dict p = r.pemagent r.logger.debug("Message: {}".format(m)) # Use JSONschema-based message validator # - In theory, this obviates some get() boilerplate if r.validate_message(m) is False: r.on_failure("Invalid message: {}".format(m)) (syst, abspath, fname) = agaveutils.from_agave_uri(m.get('uri')) try: fullpath = os.path.join(abspath, fname) r.logger.debug('fullpath: {}'.format(fullpath)) p.grant(syst, fullpath, m.get('username'), m.get('permission')) except Exception as e: r.on_failure(e)
def main(): """ Download and validate a JSON manifest at a given TACC S3 path Input message(s): {"uri": "s3://storage-system-bucket-alias/path/to/manifest.json"} Output message(s): {"uri": "s3://storage-system-bucket-alias/path/to/manifest.json"} Linked actors: - 'copy_dir_s3' """ r = Reactor() ag = r.client # Agave client for grabbing the file db = AgaveKeyValStore(ag) m = AttrDict(r.context.message_dict) r.logger.debug("Config: {}".format(Reactor.settings)) r.logger.info("Message: {}".format(m)) s3_uri = m.get('uri') agaveStorageSystem, agaveAbsDir, agaveFileName = \ agaveutils.uri.from_tacc_s3_uri(s3_uri) manifestPath = os.path.join('/', agaveAbsDir, agaveFileName) sourceAgaveStorageSystem = \ r.settings.system_maps.get('source').get(agaveStorageSystem) r.logger.debug("source-uri: {}".format( r.settings.system_maps.get('source').get(agaveStorageSystem))) r.logger.debug("storage-system: {}".format(sourceAgaveStorageSystem)) r.logger.info("validating: {}".format(s3_uri)) try: result = agaveutils.files.agave_download_file( agaveClient=ag, agaveAbsolutePath=manifestPath, systemId=sourceAgaveStorageSystem, localFilename=DOWNLOAD_FILE) except Exception as e: r.on_failure("download-faiedl: {}".format( agaveutils.uri.to_agave_uri(sourceAgaveStorageSystem, manifestPath))) r.logger.info("validating {}".format(result)) try: validate_file_schema(result) r.logger.info("validation succeeded") except Exception as e: r.on_failure("validation failed: {}".format(e)) # Downstream actions # Trigger S3->POSIX copy via manifest_dir_copy # {"uri": "s3://storage-system-bucket-alias/path/to/manifest.json"} if r.local is False: try: r.logger.info("message: copy_dir_s3") mani_dir_copy_id = r.aliases.id_from_alias('copy_dir_s3', db) r.logger.info(" copy_dir_s3 id: {}".format(mani_dir_copy_id)) # Forward original message to the next actor mani_dir_copy_msg = m r.logger.debug(" message: {}".format(m)) mani_id = agaveutils.reactors.message_reactor( ag, mani_dir_copy_id, mani_dir_copy_msg) r.logger.info(" execution: {}".format(mani_id)) except Exception as e: r.logger.error("error initiating copy_dir_s3: {}".format(e)) if r.settings.linked_reactors.get('copy_dir_s3').get('ignore_err'): pass
def main(): r = Reactor() m = AttrDict(r.context.message_dict) # Look up my own name actor_name = r.get_attr('name') # example: # 'bob' 'was unable to call' 'karen' (id: ABCDEX, exec: BCDEG) template = "{} {} {} (actor/exec {} {})" # override on_failure and on_success funcType = type(r.on_failure) r.on_failure = funcType(on_failure, r, Reactor) funcType = type(r.on_success) r.on_success = funcType(on_success, r, Reactor) r.logger.debug("message: {}".format(m)) # Use JSONschema-based message validator # - In theory, this obviates some get() boilerplate if not r.validate_message(m): r.on_failure( template.format(actor_name, 'got an invalid message', m, r.uid, r.execid), None) ag = r.client # Agave client # db = AgaveKeyValStore(ag) # AgaveDB client context = r.context # Actor context m = context.message_dict r.logger.debug("Message: {}".format(m)) agave_uri = m.get('uri') (agave_storage_sys, agave_abs_dir, agave_filename) =\ agaveutils.from_agave_uri(agave_uri) manifest_path = os.path.join('/', agave_abs_dir, agave_filename) r.logger.debug("fetching manifest {}".format(agave_uri)) try: mani_file = agaveutils.agave_download_file( agaveClient=r.client, agaveAbsolutePath=manifest_path, systemId=agave_storage_sys, localFilename='manifest.json') if mani_file is None: raise Exception("no error was detected but file appears empty") except Exception as e: r.on_failure( template.format(actor_name, 'failed to download', manifest_path, r.uid, r.execid), e) # Load manifest so we can read the plan and config # - Use AttrDict so we can use dot.notation r.logger.debug("loading manifest into a dict and getting values") manifest_dict = {} try: with open('manifest.json') as json_data: manifest_dict = AttrDict(json.load(json_data)) plan_uri = manifest_dict.plan instrument_config_uri = manifest_dict.instrument_configuration except Exception as e: r.on_failure( template.format(actor_name, 'was unable to properly parse the', 'manifest file', r.uid, r.execid), e) r.logger.debug("fetching plan {}".format(plan_uri)) plan_abs_path = None try: (plan_system, plan_dirpath, plan_filename) =\ agaveutils.from_agave_uri(plan_uri) plan_abs_path = os.path.join(plan_dirpath, plan_filename) plan_file = agaveutils.agave_download_file( agaveClient=r.client, agaveAbsolutePath=plan_abs_path, systemId=plan_system, localFilename='plan.json') except Exception as e: r.on_failure( template.format(actor_name, 'failed to download', plan_abs_path, r.uid, r.execid), e) r.logger.debug( "fetching instrument config {}".format(instrument_config_uri)) try: (ic_system, ic_dirpath, ic_filename) = \ agaveutils.from_agave_uri(instrument_config_uri) ic_abs_path = os.path.join(ic_dirpath, ic_filename) ic_file = agaveutils.agave_download_file( agaveClient=r.client, agaveAbsolutePath=ic_abs_path, systemId=ic_system, localFilename='cytometer_configuration.json') except Exception as e: r.on_failure( template.format(actor_name, 'failed to download', ic_abs_path, r.uid, r.execid), e) r.logger.debug( "loading dict from instrument config file {}".format(ic_file)) try: cytometer_configuration = json.load(open(ic_file, 'rb')) except Exception as e: r.on_failure( template.format(actor_name, 'could not load dict from JSON document', ic_file, r.uid, r.execid), e) r.logger.debug("loading tasbe_cytometer_configuration.channels") try: channels = cytometer_configuration['tasbe_cytometer_configuration'][ 'channels'] except Exception as e: r.on_failure( template.format( actor_name, 'was unable to load', 'tasbe_cytometer_configuration.channels from settings', r.uid, r.execid), e) r.logger.debug("loading dict from plan JSON file {}".format(plan_file)) try: plan = json.load(open(plan_file, 'rb')) except Exception as e: r.on_failure( template.format(actor_name, 'could not load dict from JSON document', plan_file, r.uid, r.execid), e) r.logger.debug("writing experimental data to local storage") experimental_data = extract_experimental_data(manifest_dict, plan) with open('experimental_data.json', 'wb') as outfile: json.dump(experimental_data, outfile, sort_keys=True, indent=4, separators=(',', ': ')) r.logger.debug("writing intermediary JSON files to local storage") try: with open('process_control_data.json', 'wb') as outfile: json.dump(build_process_control_data(plan, channels, experimental_data, instrument_config_uri, manifest_dict), outfile, sort_keys=True, indent=4, separators=(',', ': ')) with open('color_model_parameters.json', 'wb') as outfile: json.dump(build_color_model(channels), outfile, sort_keys=True, indent=4, separators=(',', ': ')) with open('analysis_parameters.json', 'wb') as outfile: json.dump(build_analysis_parameters(), outfile, sort_keys=True, indent=4, separators=(',', ': ')) except Exception as e: r.on_failure( template.format(actor_name, 'could not load write JSON file(s)', plan_file, r.uid, r.execid), e) # We will now upload the completed files to: # agave://data-sd2e-community/temp/flow_etl/REACTOR_NAME/PLAN_ID # - /temp/flow_etl/REACTOR_NAME is set by config.yml/destination.base_path # # Expectation: these files have been written to pwd() somwhere above datafiles = { 'analysisParameters': 'analysis_parameters.json', 'colorModelParameters': 'color_model_parameters.json', 'cytometerConfiguration': 'cytometer_configuration.json', 'experimentalData': 'experimental_data.json', 'processControl': 'process_control_data.json' } # Figure out the plan_id from plan_uri # - Get the JSON file plan_uri_file = os.path.basename(plan_uri) # - Get JSON filename root plan_id = os.path.splitext(plan_uri_file)[0] # Default upload destination set in config.yml # - may want to add override but not essential now dest_dir = os.path.join(r.settings.destination.base_path, plan_id) dest_sys = r.settings.destination.system_id r.logger.debug("ensuring destination {} exists".format( agaveutils.to_agave_uri(dest_sys, dest_dir))) try: agaveutils.agave_mkdir(r.client, plan_id, dest_sys, r.settings.destination.base_path) except Exception as e: r.on_failure( template.format(actor_name, 'could not access or create destination', dest_dir, r.uid, r.execid), e) job_def_inputs = {} for agaveparam, fname in datafiles.items(): r.logger.info("uploading {} to {}".format(fname, dest_dir)) fpath = os.path.join(PWD, fname) # rename the remote if it exists try: r.logger.debug("renaming remote {}".format(fname)) remote_abs_path = os.path.join(dest_dir, fname) new_name = os.path.basename(remote_abs_path) + \ '.' + str(int(datetime.datetime.now().strftime("%s")) * 1000) r.client.files.manage(systemId=dest_sys, body={ 'action': 'rename', 'path': new_name }, filePath=remote_abs_path) except Exception: r.logger.debug("{} does not exist or is inaccessible. ({})".format( remote_abs_path, 'ignoring error')) pass # upload the newly-generated file try: r.logger.debug("now uploading {}".format(fname)) agaveutils.agave_upload_file(r.client, dest_dir, dest_sys, fpath) except Exception as e: prefix = '{} failed to upload {}'.format(actor_name, fname) r.on_failure( template.format(prefix, 'to', dest_dir, r.uid, r.execid), e) # Entries in this dict are needed to submit the FCS-ETL job later job_def_inputs[agaveparam] = agaveutils.to_agave_uri( dest_sys, os.path.join(dest_dir, fname)) # Base inputPath off path of manifest # Cowboy coding - Take grandparent directory sans sanity checking! manifest_pathGrandparent = os.path.dirname(os.path.dirname(manifest_path)) # Build the inputData path from settings (instead of hard-coding vals) # # Our settings.job_params.data_subdir could be an array # should there be a need to pull in other top-level dirs. # In such a case inputPath would be constructed as a list # of agave URIs. This is challenging to process in the # job's runner script but possible and documented. inputDataPath = os.path.join(manifest_pathGrandparent, r.settings.job_params.data_subdir) job_def_inputs['inputData'] = agaveutils.to_agave_uri( agave_storage_sys, inputDataPath) # Submit a job request to the FCS-ETL app based on template + vars # # The job configuration is templated from settings.job_definition # name, inputs are empty. notifications are empty, too, # but we aren't implementing for the time being. Use the inputs # we built above from the uploaded list and path to the manifest # and synthesize a job name from app/actor/execution. # # By convention, slots we wish to template are left empty. Slots # we want to have a default value (that aren't defined by the app # itself) are included in the template, but can be over-ridden # programmatically with Python dict operations job_def = r.settings.job_definition app_record = r.settings.linked_reactors.get(AGAVE_APP_ALIAS, {}) # this allows the appId to be set in the job_definition, but overridden # by configuration provided in settings. job_def_orig_appId = job_def.get('appId', None) job_def.appId = app_record.get('id', job_def_orig_appId) # add dynamically-generated callback to log aggregator # sends gross amounts of JSON in each POST if r.settings.logs.get('token', None) is not None: proto = r.settings.get('logger', {}).get('proto', 'http') hostname = r.settings.get('logger', {}).get('host', 'localhost') port = str(r.settings.get('logger', {}).get('port', 8080)) client_key = r.settings.get('logger', {}).get('client_key', 'KEY') client_secret = r.settings.logs.get('token', 'SECRET') # read loggger path from default -> reactor settings -> app settings path = r.settings.get('logger', {}).get('path', '/logger') path = app_record.get('opts', {}).get('logger', {}).get('path', path) logger_uri = proto + '://' + client_key + ':' + client_secret + '@' +\ hostname + ':' + port + path + '/' + job_def.appId logger_callback = {'persistent': True, 'event': '*', 'url': logger_uri} nlist = list(job_def.notifications) nlist.append(logger_callback) ntuple = tuple(nlist) job_def.notifications = ntuple job_def.inputs = job_def_inputs job_def.name = "{}-{}".format(r.uid, r.execid) # set archivePath and archiveSystem based on manifest job_def.archiveSystem = agave_storage_sys job_def.archivePath = os.path.join(manifest_pathGrandparent, r.settings.job_params.output_subdir, job_def.appId, "{}-{}".format(r.uid, r.execid)) # Expected outcome: # # An experimental data collection 'ABCDEF' # has (at present) directories of measurements and one or more # manifests (allowing for versioning). ETL apps can deposit results # under ABCDEF/processed/appid/<unique-directory-name>. r.logger.info('submitting FSC-ETL agave compute job') job_id = 'mockup' try: job_id = r.client.jobs.submit(body=job_def)['id'] r.logger.info("compute job id is {}".format(job_id)) except Exception as e: # Use a print here so we can more easily snag the job def # TODO - come back and take this out if we ever add a nonce to # the callback notifications because that should not # show up in the logs. One alternative would be to # register a plaintext log formatter with redaction # support, but that requires extending our logger module print(json.dumps(job_def, indent=4)) r.on_failure( template.format(actor_name, 'failed when submitting an agave compute job for', job_def.appId, r.uid, r.execid), e) # Make a nice human-readable success message for the Slack log suffix = '{} and will deposit outputs in {}'.format( job_id, job_def.archivePath) r.on_success( template.format(actor_name, 'submitted job', suffix, r.uid, r.execid))