def modify_next_group( self, stack: "StageStack", ) -> List[str]: name = stack.stage_names[-1] if not self.match(name): raise YmpStageError(f"Internal Error: {name} not a group?") # fetch directly previous grouoing if stack.prev_stack is not None: group = stack.prev_stack.stage.modify_next_group( stack.prev_stack) or [] else: group = [] group_name = name[len(self.PREFIX):] if group_name == "ALL": if group: raise YmpStageError( "Regrouping to ALL means previous group statement has no effect" ) elif group_name == "BIN": group += ["__bin__"] else: group += [group_name] return group
def __init__(self, path, stage=None): self.name = path self.stage_names = path.split(".") self.stages = [find_stage(name) for name in self.stage_names] cfg = ymp.get_config() # determine project try: self.project = cfg.projects[self.stage_names[0]] except IndexError: log.error("here") raise YmpStageError(f"No project for stage stack {path} found") # determine top stage stage_names = copy.copy(self.stage_names) top_stage = stage_names.pop() if stage: if not stage.match(top_stage): raise YmpStageError( f"Internal error: {top_stage} not matched by {stage}") if not stage: stage = find_stage(top_stage) self.stage = stage # determine grouping self.group = getattr(stage, "group", None) if stage_names and stage_names[-1].startswith("group_"): self.group = [stage_names.pop().split("_")[1]] # collect inputs self.prevs = self.resolve_prevs() if self.group is None: groups = list( dict.fromkeys(group for p in reversed(list(self.prevs.values())) for group in p.group)) self.group = self.project.minimize_variables(groups) log.info("Stage stack %s using column %s", self, self.group) prevmap = dict() for typ, stack in self.prevs.items(): prevmap.setdefault(str(stack), []).append(typ) for stack, typ in prevmap.items(): ftypes = ", ".join(typ).replace("/{sample}", "*") title = stack.split(".")[-1] if self.stage_names.count(title) != 1: title = stack log.info(f".. from {title}: {ftypes}")
def resolve_prevs(self): inputs = self.stage.get_inputs() stage = self.stage prevs = self._do_resolve_prevs(stage, inputs, exclude_self=True) if inputs: raise YmpStageError(self._format_missing_input_error(inputs)) return prevs
def target(self, args, kwargs): """ Determines the IDs for a given input data type and output ID (replaces "{:target:}"). """ # Find stage stack from which input should be requested. # (not sure why the below causes a false positive in pylint) prev_stack = self.prev(args, kwargs) # pylint: disable=not-callable # Find name of current output target cur_target = kwargs['wc'].target if self.debug: rulename = getattr(kwargs.get('rule'), 'name', 'N/A') log.error("input ids for %s", self) log.warning(" rule %s", rulename) log.warning(" from stack %s", prev_stack) cols = self.group vals = cur_target if cols == [] and vals == 'ALL': cols = vals = None ids = prev_stack.get_ids(prev_stack.group, cols, vals) if ids == []: rulename = getattr(kwargs.get('rule'), 'name', 'N/A') raise YmpStageError( f"Internal Error: Failed to find inputs\n\n" f"Context:\n" f" In stack '{self}' rule '{rulename}'\n" f" Building '{vals}' (grouped on '{','.join(cols)}')\n" f" Seeking input from '{prev_stack}' " f"(grouped on '{','.join(prev_stack.group)}')" f"\n") return ids
def minimize_variables(self, groups): if not groups: groups = [self.idcol] if len(groups) > 1: groups = [g for g in groups if g != 'ALL'] # FIXME: lowercase? if len(groups) > 1: groups = self.data.groupby_dedup(groups) if len(groups) > 1: raise YmpStageError( f"multi-idx grouping not implemented (groups={groups})") return groups
def find_stage(name): cfg = ymp.get_config() registry = Stage.get_registry() if name.startswith("group_"): return GroupBy(name) if name.startswith("ref_"): refname = name[4:] if refname in cfg.ref: return cfg.ref[refname] else: raise YmpStageError(f"Unknown reference '{cfg.ref[refname]}'") if name in cfg.projects: return cfg.projects[name] if name in cfg.pipelines: return cfg.pipelines[name] for stage in registry.values(): if stage.match(name): return stage raise YmpStageError(f"Unknown stage '{name}'")
def bin(self, _args=None, kwargs=None): """ Dynamic ID for splitting stages """ rule = kwargs['rule'] if not rule.is_checkpoint: raise YmpStageError("Only checkpoints may use '{:bin:}'") item = kwargs['item'] norm_item = item.replace(".{:bin:}", "") norm_suffix = self.register_inout("this", self._outputs, norm_item) self.checkpoints.setdefault(rule.name, set()).add(norm_suffix) raise RemoveValue()
def get_ids(self, groups, match_groups=None, match_value=None): ids = None if groups == ['ALL']: ids = 'ALL' elif groups == match_groups: ids = match_value elif groups[0] in self.data.columns(): if match_groups and match_groups != ['ALL']: ids = self.data.get(match_groups[0], match_value, groups[0]) else: ids = self.data.column(groups[0]) if not ids: if len(groups) == 1: ids = groups[0] else: raise YmpStageError( f"no ids for {groups} {match_groups} {match_value}") return ids
def __init__(self, path): #: Name of stack, aka is its full path self.name = path #: Names of stages on stack self.stage_names = path.split(".") #: Stages on stack self.stages = [find_stage(name) for name in self.stage_names] #: Top Stage self.stage = self.stages[-1] #: Top Stage Name self.stage_name = self.stage_names[-1] #: Stage below top stage or None if first in stack self.prev_stage = self.stages[-2] if len(self.stages) > 1 else None self.prev_stack = None if len(self.stages) > 1: self.prev_stack = self.instance(".".join(self.stage_names[:-1])) cfg = ymp.get_config() #: Project on which stack operates #: This is needed for grouping variables currently. self.project = cfg.projects.get(self.stage_names[0]) if not self.project: raise YmpStageError(f"No project for stage stack {path} found") #: Mapping of each input type required by the stage of this stack #: to the prefix stack providing it. self.prevs = self.resolve_prevs() # Gather all previous groups groups = list( dict.fromkeys(group for stack in reversed(list(self.prevs.values())) for group in stack.group)) project_groups, other_groups = self.project.minimize_variables(groups) #: Grouping in effect for this StageStack. And empty list groups into #: one pseudo target, 'ALL'. self.group: List[str] = \ self.stage.get_group(self, project_groups + other_groups)