def setup_workspace(self): """Set up the study's main workspace directory.""" try: logger.info("Setting up study workspace in '%s'", self._out_path) create_parentdir(self._out_path) except Exception as e: logger.error(e.args) return False
def setup(self, submission_attempts=1, restart_limit=1): """ Method for executing initial setup of a Study. The method is used for going through and actually acquiring each dependency, substituting variables, sources and labels. Also sets up the folder structure for the study. :param submission_attempts: Number of attempted submissions before marking a step as failed. :param restart_limit: Upper limit on the number of times a step with a restart command can be resubmitted before it is considered failed. :returns: True if the Study is successfully setup, False otherwise. """ # If the study has been set up, just return. if self._issetup: logger.info("%s is already set up, returning.") return True self._submission_attempts = submission_attempts self._restart_limit = restart_limit # Set up the directory structure. # TODO: fdinatal - As I implement the high level program (manager and # launcher in bin), I'm starting to have questions about whether or # not the study set up is the place to handle the output path... it # feels like the determination of the output path should be at the # higher level. out_name = "{}_{}".format(self.name.replace(" ", "_"), time.strftime("%Y%m%d-%H%M%S")) self.output.value = os.path.join(self.output.value, out_name) # Set up the environment if it hasn't been already. if not self.environment.is_set_up: logger.info("Environment is setting up.") self.environment.acquire_environment() try: create_parentdir(self.output.value) except Exception as e: logger.error(e.message) return False # Apply all environment artifcacts and acquire everything. for key, node in self.values.items(): logger.info("Applying to step '%s' of the study '%s'...", key, node) if node: node.__dict__ = apply_function( node.__dict__, self.environment.apply_environment) # Flag the study as set up. self._issetup = True return True
def setup_logging(name, output_path, log_lvl=2, log_path=None, log_stdout=False, log_format=None): """ Set up logging in the Main class. :param args: A Namespace object created by a parsed ArgumentParser. :param name: The name of the log file. """ # Check if the user has specified a custom log path. if log_path: LOGGER.info("Log path overwritten by command line -- %s", log_path) else: log_path = os.path.join(output_path, "logs") if not log_format: log_format = LFORMAT loglevel = log_lvl * 10 # Attempt to create the logging directory. create_parentdir(log_path) formatter = logging.Formatter(LFORMAT) ROOTLOGGER.setLevel(loglevel) # Set up handlers if log_stdout: handler = logging.StreamHandler() handler.setFormatter(formatter) ROOTLOGGER.addHandler(handler) log_file = os.path.join(log_path, "{}.log".format(name)) handler = logging.FileHandler(log_file) handler.setFormatter(formatter) ROOTLOGGER.addHandler(handler) ROOTLOGGER.setLevel(loglevel) # Print the level of logging. LOGGER.info("INFO Logging Level -- Enabled") LOGGER.warning("WARNING Logging Level -- Enabled") LOGGER.critical("CRITICAL Logging Level -- Enabled") LOGGER.debug("DEBUG Logging Level -- Enabled")
def setup_logging(args, path, name): """ Set up logging based on the ArgumentParser. :param args: A Namespace object created by a parsed ArgumentParser. :param path: A default path to be used if a log path is not specified by user command line arguments. :param name: The name of the log file. """ # If the user has specified a path, use that. if args.logpath: logpath = args.logpath # Otherwise, we should just output to the OUTPUT_PATH. else: logpath = make_safe_path(path, *["logs"]) loglevel = args.debug_lvl * 10 # Create the FileHandler and add it to the logger. create_parentdir(logpath) formatter = logging.Formatter(LFORMAT) ROOTLOGGER.setLevel(loglevel) log_path = make_safe_path(logpath, *["{}.log".format(name)]) fh = logging.FileHandler(log_path) fh.setLevel(loglevel) fh.setFormatter(formatter) ROOTLOGGER.addHandler(fh) if args.logstdout: # Add the StreamHandler sh = logging.StreamHandler() sh.setLevel(loglevel) sh.setFormatter(formatter) ROOTLOGGER.addHandler(sh) # Print the level of logging. LOGGER.info("INFO Logging Level -- Enabled") LOGGER.warning("WARNING Logging Level -- Enabled") LOGGER.critical("CRITICAL Logging Level -- Enabled") LOGGER.debug("DEBUG Logging Level -- Enabled")
def setup_logging(args, name): """ Set up logging in the Main class. :param args: A Namespace object created by a parsed ArgumentParser. :param name: The name of the log file. """ # Check if the user has specified a custom log path. if args.logpath: logger.info("Log path overwritten by command line -- %s", args.logpath) log_path = args.logpath else: log_path = os.path.join(args.directory, "logs") loglevel = args.debug_lvl * 10 # Attempt to create the logging directory. create_parentdir(log_path) formatter = logging.Formatter(LFORMAT) rootlogger.setLevel(loglevel) # Set up handlers if args.logstdout: handler = logging.StreamHandler() handler.setFormatter(formatter) rootlogger.addHandler(handler) log_file = os.path.join(log_path, "{}.log".format(name)) handler = logging.FileHandler(log_file) handler.setFormatter(formatter) rootlogger.addHandler(handler) rootlogger.setLevel(loglevel) # Print the level of logging. logger.info("INFO Logging Level -- Enabled") logger.warning("WARNING Logging Level -- Enabled") logger.critical("CRITICAL Logging Level -- Enabled") logger.debug("DEBUG Logging Level -- Enabled")
def _setup_parameterized(self): """ Set up the ExecutionGraph of a parameterized study. :returns: The path to the study's global workspace and an expanded ExecutionGraph based on the parameters and parameterized workflow steps. """ # Construct ExecutionGraph dag = ExecutionGraph() dag.add_description(**self.description) # Items to store that should be reset. global_workspace = self.output.value # Highest ouput dir # Rework begins here: # First step, we need to map each workflow step to the parameters that # they actually use -- and only the parameters used. This setup will # make it so that workflows can be constructed with implicit stages. # That's to say that if a step only requires a subset of parameters, # we only need to run the set of combinations dictated by that subset. # NOTE: We're going to need to make a way for users to access the # workspaces of other steps. With this rework we won't be able to # assume that every directory has all parameters on it. used_params = {} workspaces = {} for parent, step, node in self.walk_study(): # Source doesn't matter -- ignore it. if step == SOURCE: continue # Otherwise, we have a valid key. # We need to collect used parameters for two things: # 1. Collect the used parameters for the current step. # 2. Get the used parameters for the parent step. # The logic here is that the used parameters are going to be the # union of the used parameters for this step and ALL parent steps. # If we keep including the step's parent parameters, we will simply # carry parent parameters recursively. step_params = self.parameters.get_used_parameters(node) if parent != SOURCE: step_params |= used_params[parent] used_params[step] = step_params logger.debug("Used Parameters - \n%s", used_params) # Secondly, we need to now iterate over all combinations for each step # and simply apply the combination. We can then add the name to the # expanded map using only the parameters that we discovered above. for combo in self.parameters: # For each Combination in the parameters... logger.info("==================================================") logger.info("Expanding study '%s' for combination '%s'", self.name, str(combo)) logger.info("==================================================") # For each step in the Study # Walk the study and construct subtree based on the combination. for parent, step, node in self.walk_study(): # If we find the source node, we can just add it and continue. if step == SOURCE: logger.debug("Source node found.") dag.add_node(SOURCE, None) continue logger.debug("Processing step '%s'.", step) # Due to the rework, we now can get the parameters used. We no # longer have to blindly apply the parameters. In fact, better # if we don't know. We have to see if the name exists in the # DAG first. If it does we can skip the step. Otherwise, apply # and add. if used_params[step]: logger.debug("Used parameters %s", used_params[step]) # Apply the used parameters to the step. modified, step_exp = node.apply_parameters(combo) # Name the step based on the parameters used. combo_str = combo.get_param_string(used_params[step]) step_name = "{}_{}".format(step_exp.name, combo_str) logger.debug( "Step has been modified. Step '%s' renamed" " to '%s'", step_exp.name, step_name) step_exp.name = step_name logger.debug("Resulting step name: %s", step_name) # Set the workspace to the parameterized workspace self.output.value = os.path.join(global_workspace, combo_str) # We now should account for varying workspace locations. # Search for the use of workspaces in the command line so # that we can go ahead and fill in the appropriate space # for this combination. cmd = step_exp.run["cmd"] used_spaces = re.findall(WSREGEX, cmd) for match in used_spaces: logger.debug("Workspace found -- %s", match) # Append the parameters that the step uses matching the # current combo. combo_str = combo.get_param_string(used_params[match]) logger.debug("Combo str -- %s", combo_str) if combo_str: _ = "{}_{}".format(match, combo_str) else: _ = match # Replace the workspace tag in the command. workspace_var = "$({}.workspace)".format(match) cmd = cmd.replace(workspace_var, workspaces[_]) logger.debug("New cmd -- %s", cmd) step_exp.run["cmd"] = cmd else: # Otherwise, we know that this step is a joining node. step_exp = copy.deepcopy(node) modified = False logger.debug("No parameters found. Resulting name %s", step_exp.name) self.output.value = os.path.join(global_workspace) # Add the workspace name to the map of workspaces. workspaces[step_exp.name] = self.output.value # Now we need to make sure we handle the dependencies. # We know the parent and the step name (whether it's modified # or not and is not _source). So now there's two cases: # 1. If the ExecutionGraph contains the parent name as it # exists without parameterization, then we know we have # a hub/joining node. # 2. If the ExecutionGraph does not have the parent node, # then our next assumption is that it has a parameterized # version of the parent. We need to check and make sure. # 3. Fall back third case... Abort. Something is not right. if step_exp.run["restart"]: rlimit = self._restart_limit else: rlimit = 0 if parent != SOURCE: # With the rework, we now need to check the parent's used # parmeters. combo_str = combo.get_param_string(used_params[parent]) param_name = "{}_{}".format(parent, combo_str) # If the parent node is not '_source', check. if parent in dag.values: # If the parent is in the dag, add the current step... dag.add_step(step_exp.name, step_exp, self.output.value, rlimit) # And its associated edge. dag.add_edge(parent, step_exp.name) elif param_name in dag.values: # Find the index in the step for the dependency... i = step_exp.run['depends'].index(parent) # Sub it with parameterized dependency... step_exp.run['depends'][i] = param_name # Add the node and edge. dag.add_step(step_exp.name, step_exp, self.output.value, rlimit) dag.add_edge(param_name, step_exp.name) else: msg = "'{}' nor '{}' found in the ExecutionGraph. " \ "Unexpected error occurred." \ .format(parent, param_name) logger.error(msg) raise ValueError(msg) else: # If the parent is source, then we can just execute it from # '_source'. dag.add_step(step_exp.name, step_exp, self.output.value, rlimit) dag.add_edge(SOURCE, step_exp.name) # Go ahead and substitute in the output path and create the # workspace in the ExecutionGraph. create_parentdir(self.output.value) step_exp.__dict__ = apply_function(step_exp.__dict__, self.output.substitute) # logging logger.debug("---------------- Modified --------------") logger.debug("Modified = %s", modified) logger.debug("step_exp = %s", step_exp.__dict__) logger.debug("----------------------------------------") # Reset the output path to the global_workspace. self.output.value = global_workspace logger.info( "==================================================") return global_workspace, dag
def run_study(args): """Run a Maestro study.""" # Load the Specification try: spec = YAMLSpecification.load_specification(args.specification) except jsonschema.ValidationError as e: LOGGER.error(e.message) sys.exit(1) environment = spec.get_study_environment() steps = spec.get_study_steps() # Set up the output directory. out_dir = environment.remove("OUTPUT_PATH") if args.out: # If out is specified in the args, ignore OUTPUT_PATH. output_path = os.path.abspath(args.out) # If we are automatically launching, just set the input as yes. if os.path.exists(output_path): if args.autoyes: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input( "Output path already exists. Would you like to overwrite " "it? [yn] ") if uinput.lower() in ACCEPTED_INPUT: print("Cleaning up existing out path...") shutil.rmtree(output_path) else: print("Opting to quit -- not cleaning up old out path.") sys.exit(0) else: if out_dir is None: # If we don't find OUTPUT_PATH in the environment, assume pwd. out_dir = os.path.abspath("./") else: # We just take the value from the environment. out_dir = os.path.abspath(out_dir.value) out_name = "{}_{}".format(spec.name.replace(" ", "_"), time.strftime("%Y%m%d-%H%M%S")) output_path = make_safe_path(out_dir, *[out_name]) environment.add(Variable("OUTPUT_PATH", output_path)) # Set up file logging create_parentdir(os.path.join(output_path, "logs")) log_path = os.path.join(output_path, "logs", "{}.log".format(spec.name)) LOG_UTIL.add_file_handler(log_path, LFORMAT, args.debug_lvl) # Check for pargs without the matching pgen if args.pargs and not args.pgen: msg = "Cannot use the 'pargs' parameter without specifying a 'pgen'!" LOGGER.exception(msg) raise ArgumentError(msg) # Addition of the $(SPECROOT) to the environment. spec_root = os.path.split(args.specification)[0] spec_root = Variable("SPECROOT", os.path.abspath(spec_root)) environment.add(spec_root) # Handle loading a custom ParameterGenerator if specified. if args.pgen: # 'pgen_args' has a default of an empty list, which should translate # to an empty dictionary. kwargs = create_dictionary(args.pargs) # Copy the Python file used to generate parameters. shutil.copy(args.pgen, output_path) # Add keywords and environment from the spec to pgen args. kwargs["OUTPUT_PATH"] = output_path kwargs["SPECROOT"] = spec_root # Load the parameter generator. parameters = load_parameter_generator(args.pgen, environment, kwargs) else: parameters = spec.get_parameters() # Setup the study. study = Study(spec.name, spec.description, studyenv=environment, parameters=parameters, steps=steps, out_path=output_path) # Check if the submission attempts is greater than 0: if args.attempts < 1: _msg = "Submission attempts must be greater than 0. " \ "'{}' provided.".format(args.attempts) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the throttle is zero or greater: if args.throttle < 0: _msg = "Submission throttle must be a value of zero or greater. " \ "'{}' provided.".format(args.throttle) LOGGER.error(_msg) raise ArgumentError(_msg) # Check if the restart limit is zero or greater: if args.rlimit < 0: _msg = "Restart limit must be a value of zero or greater. " \ "'{}' provided.".format(args.rlimit) LOGGER.error(_msg) raise ArgumentError(_msg) # Set up the study workspace and configure it for execution. study.setup_workspace() study.configure_study(throttle=args.throttle, submission_attempts=args.attempts, restart_limit=args.rlimit, use_tmp=args.usetmp, hash_ws=args.hashws, dry_run=args.dry) study.setup_environment() if args.dry: # If performing a dry run, drive sleep time down to generate scripts. sleeptime = 1 else: # else, use args to decide sleeptime sleeptime = args.sleeptime batch = {"type": "local"} if spec.batch: batch = spec.batch if "type" not in batch: batch["type"] = "local" # Copy the spec to the output directory shutil.copy(args.specification, study.output_path) # Use the Conductor's classmethod to store the study. Conductor.store_study(study) Conductor.store_batch(study.output_path, batch) # If we are automatically launching, just set the input as yes. if args.autoyes or args.dry: uinput = "y" elif args.autono: uinput = "n" else: uinput = six.moves.input("Would you like to launch the study? [yn] ") if uinput.lower() in ACCEPTED_INPUT: if args.fg: # Launch in the foreground. LOGGER.info("Running Maestro Conductor in the foreground.") conductor = Conductor(study) conductor.initialize(batch, sleeptime) completion_status = conductor.monitor_study() conductor.cleanup() return completion_status.value else: # Launch manager with nohup log_path = make_safe_path(study.output_path, *["{}.txt".format(study.name)]) cmd = [ "nohup", "conductor", "-t", str(sleeptime), "-d", str(args.debug_lvl), study.output_path, ">", log_path, "2>&1" ] LOGGER.debug(" ".join(cmd)) start_process(" ".join(cmd)) print("Study launched successfully.") else: print("Study launch aborted.") return 0
def setup_workspace(self): """Initialize the record's workspace.""" create_parentdir(self.workspace.value)
def setup(self, submission_attempts=1, restart_limit=1, throttle=0, use_tmp=False): """ Perform initial setup of a study. The method is used for going through and actually acquiring each dependency, substituting variables, sources and labels. Also sets up the folder structure for the study. :param submission_attempts: Number of attempted submissions before marking a step as failed. :param restart_limit: Upper limit on the number of times a step with a restart command can be resubmitted before it is considered failed. :param throttle: The maximum number of in-progress jobs allowed. [0 denotes no cap]. :param use_tmp: Boolean value specifying if the generated ExecutionGraph dumps its information into a temporary directory. :returns: True if the Study is successfully setup, False otherwise. """ # If the study has been set up, just return. if self._issetup: logger.info("%s is already set up, returning.") return True self._submission_attempts = submission_attempts self._restart_limit = restart_limit self._submission_throttle = throttle self._use_tmp = use_tmp logger.info( "\n------------------------------------------\n" "Output path = %s\n" "Submission attempts = %d\n" "Submission restart limit = %d\n" "Submission throttle limit = %d\n" "Use temporary directory = %s\n" "------------------------------------------", self._out_path, submission_attempts, restart_limit, throttle, use_tmp ) # Set up the environment if it hasn't been already. if not self.environment.is_set_up: logger.info("Environment is setting up.") self.environment.acquire_environment() try: logger.info("Environment is setting up.") create_parentdir(self._out_path) except Exception as e: logger.error(e.message) return False # Apply all environment artifcacts and acquire everything. for key, node in self.values.items(): logger.info("Applying to step '%s' of the study '%s'...", key, node) if node: node.__dict__ = apply_function( node.__dict__, self.environment.apply_environment) # Flag the study as set up. self._issetup = True return True
def store_metadata(self): """Store metadata related to the study.""" # Create the metadata directory. create_parentdir(self._meta_path) # Store the environment object in order to preserve it. path = os.path.join(self._meta_path, "study") create_parentdir(path) path = os.path.join(path, "env.pkl") with open(path, 'wb') as pkl: pickle.dump(self, pkl) # Construct other metadata related to study construction. _workspaces = {} for key, value in self.workspaces.items(): if key == "_source": _workspaces[key] = value elif key in self.step_combos: _workspaces[key] = os.path.split(value)[-1] else: _workspaces[key] = \ os.path.sep.join(value.rsplit(os.path.sep)[-2:]) # Construct relative paths for the combinations and nest them in the # same way as the step combinations dictionary. _step_combos = {} for key, value in self.step_combos.items(): if key == SOURCE: _step_combos[key] = self.workspaces[key] elif not self.used_params[key]: _ws = self.workspaces[key] _step_combos[key] = {key: os.path.split(_ws)[-1]} else: _step_combos[key] = {} for combo in value: _ws = self.workspaces[combo] _step_combos[key][combo] = \ os.path.sep.join(_ws.rsplit(os.path.sep)[-2:]) metadata = { "dependencies": self.depends, "hub_dependencies": self.hub_depends, "workspaces": _workspaces, "used_parameters": self.used_params, "step_combinations": _step_combos, } # Write out the study construction metadata. path = os.path.join(self._meta_path, "metadata.yaml") with open(path, "wb") as metafile: metafile.write(yaml.dump(metadata).encode("utf-8")) # Write out parameter metadata. metadata = self.parameters.get_metadata() path = os.path.join(self._meta_path, "parameters.yaml") with open(path, "wb") as metafile: metafile.write(yaml.dump(metadata).encode("utf-8")) # Write out environment metadata path = os.path.join(self._meta_path, "environment.yaml") with open(path, "wb") as metafile: metafile.write(yaml.dump(os.environ.copy()).encode("utf-8"))