def get_pipeline_definition(pipeline_name, working_dir): """Open and parse the pipeline definition yaml. Get instance of the module specified by the pipeline_name. pipeline_name.yaml should be in the pipelines folder. """ logger.debug("starting") pipeline_path = pypyr.moduleloader.get_pipeline_path( pipeline_name=pipeline_name, working_directory=working_dir) logger.debug(f"Trying to open pipeline at path {pipeline_path}") try: with open(pipeline_path) as yaml_file: pipeline_definition = yaml.safe_load(yaml_file) logger.debug( f"found {len(pipeline_definition)} stages in pipeline.") except FileNotFoundError: logger.error("The pipeline doesn't exist. Looking for a file here: " f"{pipeline_name}.yaml in the /pipelines sub directory.") raise logger.debug("pipeline definition loaded") logger.debug("done") return pipeline_definition
def run_step(context): """Outputs pypyr version in format 'pypyr x.y.z python a.b.c'""" logger.debug("started") logger.info(f"pypyr version is: {pypyr.version.get_version()}") logger.debug("done")
def run_step(context): """Executes dynamic python code. Context is a dictionary or dictionary-like. Context must contain key 'pycode' Will exec context['pycode'] as dynamically interpreted python statements. context is mandatory. When you execute the pipeline, it should look something like this: pipeline-runner [name here] --context 'pycode=print(1+1)'. """ logger.debug("started") context.assert_key_has_value(key='pycode', caller=__name__) logger.debug(f"Executing python string: {context['pycode']}") locals_dictionary = locals() exec(context['pycode'], globals(), locals_dictionary) # It looks like this dance might be unnecessary in python 3.6 logger.debug("looking for context update in exec") exec_context = locals_dictionary['context'] context.update(exec_context) logger.debug("exec output context merged with pipeline context") logger.debug("done")
def run_step(context): """Simple echo. Outputs context['echoMe']. Args: context: dictionary-like. context is mandatory. context must contain key 'echoMe' context['echoMe'] will echo the value to logger. This logger could well be stdout. When you execute the pipeline, it should look something like this: pypyr [name here] --context 'echoMe=test'. """ logger.debug("started") assert context, ("context must be set for echo. Did you set " "--context 'echoMe=text here'?") context.assert_key_exists('echoMe', __name__) if isinstance(context['echoMe'], str): val = context.get_formatted('echoMe') else: val = context['echoMe'] logger.info(val) logger.debug("done")
def main(pipeline_name, pipeline_context_input, working_dir, log_level): """Entry point for pypyr pipeline runner. Call this once per pypyr run. Call me if you want to run a pypyr pipeline from your own code. This function does some one-off 1st time initialization before running the actual pipeline. pipeline_name.yaml should be in the working_dir/pipelines/ directory. Args: pipeline_name: string. Name of pipeline, sans .yaml at end. pipeline_context_input: string. Initialize the pypyr context with this string. working_dir: path. looks for ./pipelines and modules in this directory. log_level: int. Standard python log level enumerated value. Returns: None """ pypyr.log.logger.set_root_logger(log_level) logger.debug("starting pypyr") # pipelines specify steps in python modules that load dynamically. # make it easy for the operator so that the cwd is automatically included # without needing to pip install a package 1st. pypyr.moduleloader.set_working_directory(working_dir) run_pipeline(pipeline_name=pipeline_name, pipeline_context_input=pipeline_context_input, working_dir=working_dir) logger.debug("pypyr done")
def get_parsed_context(context_arg): """Parse input context string and returns context as dictionary.""" assert context_arg, ("pipeline must be invoked with --context set. For " "this commastolist parser you're looking for " "something like--context 'spam,eggs' " "or --context 'spam'.") logger.debug("starting") # for each comma-delimited element, project (element-name, true) return dict((element, True) for element in context_arg.split(','))
def get_parsed_context(context_arg): """Parse input context string and returns context as dictionary.""" assert context_arg, ("pipeline must be invoked with --context set. For " "this keyvaluepairs parser you're looking for " "something like " "--context 'key1=value1,key2=value2'.") logger.debug("starting") # for each comma-delimited element, project key=value return dict(element.split('=') for element in context_arg.split(','))
def get_parsed_context(context_arg): """Parse input context string and returns context as dictionary.""" assert context_arg, ("pipeline must be invoked with --context set. For " "this json parser you're looking for something " "like " "--context '{\"key1\":\"value1\"," "\"key2\":\"value2\"}'") logger.debug("starting") # deserialize the input context string into json return json.loads(context_arg)
def run_step_group(pipeline_definition, step_group_name, context): """Get the specified step group from the pipeline and run its steps.""" logger.debug(f"starting {step_group_name}") assert step_group_name steps = get_pipeline_steps(pipeline=pipeline_definition, steps_group=step_group_name) run_pipeline_steps(steps=steps, context=context) logger.debug(f"done {step_group_name}")
def run_step(context): """Archive and/or extract tars with or without compression. Args: context: dictionary-like. Mandatory. At least one of these context keys must exist: context['tarExtract'] context['tarArchive'] Optional: context['tarFormat'] - if not specified, defaults to lzma/xz Available options: - '' - no compression - gz (gzip) - bz2 (bzip2) - xz (lzma) This step will run whatever combination of Extract and Archive you specify. Regardless of combination, execution order is Extract, Archive. Source and destination paths support {key} string interpolation. Never extract archives from untrusted sources without prior inspection. It is possible that files are created outside of path, e.g. members that have absolute filenames starting with "/" or filenames with two dots "..". """ logger.debug("started") assert context, f"context must have value for {__name__}" # at least 1 of tarExtract or tarArchive must exist in context tarExtract, tarArchive = context.keys_of_type_exist(('tarExtract', list), ('tarArchive', list)) found_at_least_one = False if tarExtract.key_in_context and tarExtract.is_expected_type: found_at_least_one = True tar_extract(context) if tarArchive.key_in_context and tarArchive.is_expected_type: found_at_least_one = True tar_archive(context) if not found_at_least_one: # This will raise exception on first item with a problem. context.assert_keys_type_value( __name__, ('This step needs any combination of ' 'tarExtract or tarArchive in context.'), tarExtract, tarArchive) logger.debug("done")
def run_step(context): """Wipe the entire context. Args: Context is a dictionary or dictionary-like. Does not require any specific keys in context. """ logger.debug("started") context.clear() logger.info(f"Context wiped. New context size: {len(context)}") logger.debug("done")
def run_step(context): """Parses input file and replaces a search string. This also does string substitutions from context on the fileReplacePairs. It does this before it search & replaces the in file. Be careful of order. If fileReplacePairs is not an ordered collection, replacements could evaluate in any given order. If this is coming in from pipeline yaml it will be an ordered dictionary, so life is good. Args: context: pypyr.context.Context. Mandatory. The following context keys expected: - fileReplaceIn. mandatory. path-like. Path to source file on disk. - fileReplaceOut. mandatory. path-like. Write output file to here. Will create directories in path for you. - fileReplacePairs. mandatory. Dictionary where items are: 'find_string': 'replace_string' Returns: None. Raises: FileNotFoundError: take a guess pypyr.errors.KeyNotInContextError: Any of the required keys missing in context. pypyr.errors.KeyInContextHasNoValueError: Any of the required keys exists but is None. """ logger.debug("started") context.assert_keys_have_values(__name__, 'fileReplaceIn', 'fileReplaceOut', 'fileReplacePairs') in_path = context.get_formatted('fileReplaceIn') out_path = context.get_formatted('fileReplaceOut') logger.debug("Running subsitutions from context on fileReplacePairs") formatted_replacements = context.get_formatted_iterable( context['fileReplacePairs']) logger.debug(f"opening source file: {in_path}") with open(in_path) as infile: logger.debug(f"opening destination file for writing: {out_path}") os.makedirs(os.path.abspath(os.path.dirname(out_path)), exist_ok=True) with open(out_path, 'w') as outfile: outfile.writelines( iter_replace_strings(infile, formatted_replacements)) logger.info(f"Read {in_path}, replaced strings and wrote to {out_path}") logger.debug("done")
def env_unset(context): """Unset $ENVs. Context is a dictionary or dictionary-like. context is mandatory. context['envUnset'] must exist. It's a list. List items are the names of the $ENV values to unset. For example, say input context is: key1: value1 key2: value2 key3: value3 envUnset: MYVAR1 MYVAR2 This will result in the following $ENVs being unset: $MYVAR1 $MYVAR2 """ logger.debug("started") for env_var_name in context['envUnset']: logger.debug(f"unsetting ${env_var_name}") try: del os.environ[env_var_name] except KeyError: # If user is trying to get rid of the $ENV, if it doesn't exist, no # real point in throwing up an error that the thing you're trying # to be rid off isn't there anyway. logger.debug(f"${env_var_name} doesn't exist anyway. As you were.") logger.debug("done")
def run_failure_step_group(pipeline, context): """Run the on_failure step group if it exists. This function will swallow all errors, to prevent obfuscating the error condition that got it here to begin with. """ logger.debug("starting") try: assert pipeline # if no on_failure exists, it'll do nothing. run_step_group(pipeline_definition=pipeline, step_group_name='on_failure', context=context) except Exception as exception: logger.error("Failure handler also failed. Swallowing.") logger.error(exception) logger.debug("done")
def get_pipeline_definition(pipeline_name, working_dir): """Open and parse the pipeline definition yaml. Parses pipeline yaml and returns dictionary representing the pipeline. pipeline_name.yaml should be in the working_dir/pipelines/ directory. Args: pipeline_name: string. Name of pipeline. This will be the file-name of the pipeline - i.e {pipeline_name}.yaml working_dir: path. Start looking in ./working_dir/pipelines/pipeline_name.yaml Returns: dict describing the pipeline, parsed from the pipeline yaml. Raises: FileNotFoundError: pipeline_name.yaml not found in the various pipeline dirs. """ logger.debug("starting") pipeline_path = pypyr.moduleloader.get_pipeline_path( pipeline_name=pipeline_name, working_directory=working_dir) logger.debug(f"Trying to open pipeline at path {pipeline_path}") try: with open(pipeline_path) as yaml_file: yaml_loader = yaml.YAML(typ='safe', pure=True) pipeline_definition = yaml_loader.load(yaml_file) logger.debug( f"found {len(pipeline_definition)} stages in pipeline.") except FileNotFoundError: logger.error("The pipeline doesn't exist. Looking for a file here: " f"{pipeline_name}.yaml in the /pipelines sub directory.") raise logger.debug("pipeline definition loaded") logger.debug("done") return pipeline_definition
def prepare_context(pipeline, context_in_string, context): """Prepare context for pipeline run. Args: pipeline: dict. Dictionary representing the pipeline. context_in_string: string. Argument string used to initialize context. context: pypyr.context.Context. Merge any new context generated from context_in_string into this context instance. Returns: None. The context instance to use for the pipeline run is contained in the context arg, it's not passed back as a function return. """ logger.debug("starting") parsed_context = get_parsed_context(pipeline=pipeline, context_in_string=context_in_string) context.update(parsed_context) logger.debug("done")
def run_step(context): """Run shell command without shell interpolation. Context is a dictionary or dictionary-like. Will execute context['cmd'] in the shell as a sub-process. Escape curly braces: if you want a literal curly brace, double it like {{ or }}. context is mandatory. When you execute the pipeline, it should look something like this: pipeline-runner [name here] --context 'cmd=ls -a'. context['cmd'] will interpolate anything in curly braces for values found in context. So if your context looks like this: key1: value1 key2: value2 cmd: mything --arg1 {key1} The cmd passed to the shell will be "mything --arg value1" """ logger.debug("started") context.assert_key_has_value(key='cmd', caller=__name__) logger.debug(f"Processing command string: {context['cmd']}") interpolated_string = context.get_formatted('cmd') # input string is a command like 'ls -l | grep boom'. Split into list on # spaces to allow for natural shell language input string. args = interpolated_string.split(' ') # check=True throws CalledProcessError if exit code != 0 subprocess.run(args, shell=False, check=True) logger.debug("done")
def tar_archive(context): """Archive specified path to a tar archive. Args: context: dictionary-like. context is mandatory. context['tarArchive'] must exist. It's a dictionary. keys are the paths to archive. values are the destination output paths. Example: tarArchive: - in: path/to/dir out: path/to/destination.tar.xs - in: another/my.file out: ./my.tar.xs This will archive directory path/to/dir to path/to/destination.tar.xs, and also archive file another/my.file to ./my.tar.xs """ logger.debug("start") mode = get_file_mode_for_writing(context) for item in context['tarArchive']: # value is the destination tar. Allow string interpolation. destination = context.get_formatted_string(item['out']) # key is the source to archive source = context.get_formatted_string(item['in']) with tarfile.open(destination, mode) as archive_me: logger.debug(f"Archiving '{source}' to '{destination}'") archive_me.add(source) logger.info(f"Archived '{source}' to '{destination}'") logger.debug("end")
def run_step(context): """Create new context keys from already existing context keys. Context is a dictionary or dictionary-like. context['contextSet'] must exist. It's a dictionary. Will iterate context['contextSet'] and save the values as new keys to the context. For example, say input context is: key1: value1 key2: value2 key3: value3 contextSet: key2: key1 key4: key3 This will result in return context: key1: value1 key2: value1 key3: value3 key4: value3 """ logger.debug("started") context.assert_key_has_value(key='contextSet', caller=__name__) for k, v in context['contextSet'].items(): logger.debug(f"setting context {k} to value from context {v}") context[k] = context[v] logger.debug("done")
def tar_extract(context): """Extract all members of tar archive to specified path. Args: context: dictionary-like. context is mandatory. context['tarExtract'] must exist. It's a dictionary. keys are the path to the tar to extract. values are the destination paths. Example: tarExtract: - in: path/to/my.tar.xs out: /path/extract/here - in: another/tar.xs out: . This will extract path/to/my.tar.xs to /path/extract/here, and also extract another/tar.xs to $PWD. """ logger.debug("start") mode = get_file_mode_for_reading(context) for item in context['tarExtract']: # in is the path to the tar to extract. Allows string interpolation. source = context.get_formatted_string(item['in']) # out is the outdir, dhur. Allows string interpolation. destination = context.get_formatted_string(item['out']) with tarfile.open(source, mode) as extract_me: logger.debug(f"Extracting '{source}' to '{destination}'") extract_me.extractall(destination) logger.info(f"Extracted '{source}' to '{destination}'") logger.debug("end")
def get_module(module_abs_import): """Use importlib to get the module dynamically. Get instance of the module specified by the module_abs_import. This means that module_abs_import must be resolvable from this package. Args: module_abs_import: string. Absolute name of module to import. Raises: PyModuleNotFoundError: if module not found. """ logger.debug("starting") logger.debug(f"loading module {module_abs_import}") try: imported_module = importlib.import_module(module_abs_import) logger.debug("done") return imported_module except ModuleNotFoundError as err: msg = ("The module doesn't exist. Looking for a file like this: " f"{module_abs_import}") extended_msg = (f"{module_abs_import}.py should be in your working " "dir or it should be installed to the python path." "\nIf you have 'package.sub.mod' your current working " "dir should contain ./package/sub/mod.py\n" "If you specified 'mymodulename', your current " "working dir should contain ./mymodulename.py\n" "If the module is not in your current working dir, it " "must exist in your current python path - so you " "should have run pip install or setup.py") logger.error(msg) raise PyModuleNotFoundError(extended_msg) from err
def env_set(context): """Set $ENVs to specified string. from the pypyr context. Args: context: is dictionary-like. context is mandatory. context['envSet'] must exist. It's a dictionary. Values are strings to write to $ENV. Keys are the names of the $ENV values to which to write. For example, say input context is: key1: value1 key2: value2 key3: value3 envSet: MYVAR1: {key1} MYVAR2: before_{key3}_after MYVAR3: arbtexthere This will result in the following $ENVs: $MYVAR1 = value1 $MYVAR2 = before_value3_after $MYVAR3 = arbtexthere Note that the $ENVs are not persisted system-wide, they only exist for pypyr sub-processes, and as such for the following steps during this pypyr pipeline execution. If you set an $ENV here, don't expect to see it in your system environment variables after the pipeline finishes running. """ logger.debug("started") for k, v in context['envSet'].items(): logger.debug(f"setting ${k} to context[{v}]") os.environ[k] = context.get_formatted_string(v) logger.debug("done")
def run_step(context): """Remove specified keys from context. Args: Context is a dictionary or dictionary-like. context['contextClear'] must exist. It's a dictionary. Will iterate context['contextClear'] and remove those keys from context. For example, say input context is: key1: value1 key2: value2 key3: value3 key4: value4 contextClear: - key2 - key4 - contextClear This will result in return context: key1: value1 key3: value3 """ logger.debug("started") context.assert_key_has_value(key='contextClear', caller=__name__) for k in context['contextClear']: logger.debug(f"removing {k} from context") # slightly unorthodox pop returning None means you don't get a KeyError # if key doesn't exist context.pop(k, None) logger.info(f"removed {k} from context") logger.debug("done")
def env_get(context): """Get $ENVs into the pypyr context. Context is a dictionary or dictionary-like. context is mandatory. context['envGet'] must exist. It's a dictionary. Values are the names of the $ENVs to write to the pypyr context. Keys are the pypyr context item to which to write the $ENV values. For example, say input context is: key1: value1 key2: value2 pypyrCurrentDir: value3 envGet: pypyrUser: USER pypyrCurrentDir: PWD This will result in context: key1: value1 key2: value2 key3: value3 pypyrUser: <<value of $USER here>> pypyrCurrentDir: <<value of $PWD here, not value3>> """ logger.debug("start") for k, v in context['envGet'].items(): logger.debug(f"setting context {k} to $ENV {v}") context[k] = os.environ[v] logger.debug("done")
def get_parsed_context(context_arg): """Parse input context string and returns context as dictionary.""" assert context_arg, ("pipeline must be invoked with --context set. For " "this json parser you're looking for something " "like --context './myjsonfile.json'") logger.debug("starting") # open the json file on disk so that you can initialize the dictionary logger.debug(f"attempting to open file: {context_arg}") with open(context_arg) as json_file: payload = json.load(json_file) logger.debug(f"json file loaded into context. Count: {len(payload)}") logger.debug("done") return payload
def prepare_context(pipeline, context_in_args, context): """Prepare context for pipeline run. Args: pipeline (dict): Dictionary representing the pipeline. context_in_args (list of str): Args used to initialize context. context (pypyr.context.Context): Merge any new context generated from context_in_args into this context instance. Returns: None. The context instance to use for the pipeline run is contained in the context arg, it's not passed back as a function return. """ logger.debug("starting") parsed_context = get_parsed_context(pipeline=pipeline, context_in_args=context_in_args) context.update(parsed_context) logger.debug("done")
def run_step(context): """Get, set, unset $ENVs. Context is a dictionary or dictionary-like. context is mandatory. At least one of these context keys must exist: context['envGet'] context['envSet'] context['envUnset'] This step will run whatever combination of Get, Set and Unset you specify. Regardless of combination, execution order is Get, Set, Unset. """ logger.debug("started") # at least 1 of envGet, envSet or envUnset must exist in context assert context, f"context must have value for {__name__}" get_info, set_info, unset_info = context.keys_of_type_exist( ('envGet', dict), ('envSet', dict), ('envUnset', list) ) found_at_least_one = False if get_info.key_in_context and get_info.is_expected_type: found_at_least_one = True env_get(context) if set_info.key_in_context and set_info.is_expected_type: found_at_least_one = True env_set(context) if unset_info.key_in_context and unset_info.is_expected_type: found_at_least_one = True env_unset(context) assert found_at_least_one, ("context must contain any combination of " f"envGet, envSet or envUnset for {__name__}") logger.debug("done")
def run_step(context): """Parses input yaml file and substitutes {tokens} from context. Loads yaml into memory to do parsing, so be aware of big files. Args: context: pypyr.context.Context. Mandatory. The following context keys expected: - fileFormatYamlIn. mandatory. path-like. Path to source file on disk. - fileFormatYamlOut. mandatory. path-like. Write output file to here. Will create directories in path for you. Returns: None. Raises: FileNotFoundError: take a guess pypyr.errors.KeyNotInContextError: fileFormatYamlIn or fileFormatYamlOut missing in context. pypyr.errors.KeyInContextHasNoValueError: fileFormatYamlIn or fileFormatYamlOut exists but is None. """ logger.debug("started") context.assert_keys_have_values(__name__, 'fileFormatYamlIn', 'fileFormatYamlOut') in_path = context.get_formatted('fileFormatYamlIn') out_path = context.get_formatted('fileFormatYamlOut') logger.debug(f"opening yaml source file: {in_path}") with open(in_path) as infile: payload = yaml.load(infile, Loader=yaml.RoundTripLoader) logger.debug(f"opening destination file for writing: {out_path}") os.makedirs(os.path.abspath(os.path.dirname(out_path)), exist_ok=True) with open(out_path, 'w') as outfile: formatted_iterable = context.get_formatted_iterable(payload) yaml.dump(formatted_iterable, outfile, Dumper=yaml.RoundTripDumper, allow_unicode=True, width=50) logger.info( f"Read {in_path} yaml, formatted contents and wrote to {out_path}") logger.debug("done")
def run_pipeline_step(step_name, context): """Run a single pipeline step.""" logger.debug("starting") logger.debug(f"running step {step_name}") step = pypyr.moduleloader.get_module(step_name) try: logger.debug(f"running step {step}") step.run_step(context) logger.debug(f"step {step} done") except AttributeError: logger.error(f"The step {step_name} doesn't have a run_step(context) " "function.") raise
def main(pipeline_name, pipeline_context_input, working_dir, groups=None, success_group=None, failure_group=None): """Entry point for pypyr pipeline runner. Call this once per pypyr run. Call me if you want to run a pypyr pipeline from your own code. This function does some one-off 1st time initialization before running the actual pipeline. If you're invoking pypyr from your own application via the API, it's your responsibility to set up and configure logging. If you just want to replicate the log handlers & formatters that the pypyr cli uses, you can call pypyr.log.logger.set_root_logger() before invoking this function (pipelinerunner.main()) Be aware that if you invoke this method, pypyr adds a NOTIFY - 25 custom log-level and notify() function to logging. pipeline_name.yaml should be in the working_dir/pipelines/ directory. Args: pipeline_name: string. Name of pipeline, sans .yaml at end. pipeline_context_input: string. Initialize the pypyr context with this string. working_dir: path. looks for ./pipelines and modules in this directory. groups: list of str. step-group names to run in pipeline. success_group: str. step-group name to run on success completion. failure_group: str. step-group name to run on pipeline failure. Returns: None """ pypyr.log.logger.set_up_notify_log_level() logger.debug("starting pypyr") # pipelines specify steps in python modules that load dynamically. # make it easy for the operator so that the cwd is automatically included # without needing to pip install a package 1st. pypyr.moduleloader.set_working_directory(working_dir) try: load_and_run_pipeline(pipeline_name=pipeline_name, pipeline_context_input=pipeline_context_input, groups=groups, success_group=success_group, failure_group=failure_group) except Stop: logger.debug("Stop: stopped pypyr") logger.debug("pypyr done")