def test_str(set_cycling_type): set_cycling_type() trigger = TaskTrigger('name', '1', 'output', offset_is_absolute=True) assert str(trigger) == 'name[1]:output' trigger = TaskTrigger('name', '+P1', 'output') assert str(trigger) == 'name[+P1]:output' trigger = TaskTrigger('name', None, 'output') assert str(trigger) == 'name:output'
def test_str(cycling_mode): cycling_mode() one = get_point('1') two = get_point('2') trigger = TaskTrigger('name', '1', 'output', offset_is_absolute=True) assert str(trigger) == 'name[1]:output' trigger = TaskTrigger('name', '+P1', 'output') assert str(trigger) == 'name[+P1]:output' trigger = TaskTrigger('name', None, 'output') assert str(trigger) == 'name:output'
def test_check_for_list_of_lists_exp(): task_trigger = TaskTrigger('fake_task_name', None, None, 'fakeOutput') dependency = Dependency([task_trigger, '&', ['task', '&', 'another_task']], [task_trigger], False) actual = str(dependency) expected = ( "( fake_task_name:fakeOutput ) ( & ) ['task', '&', 'another_task']") assert actual == expected
def test_check_for_true_suicide(): task_trigger = TaskTrigger('fake_task_name', None, None, 'fakeOutput') dependency = Dependency([task_trigger, '&', task_trigger], [task_trigger], True) actual = str(dependency) expected = ( '! ( fake_task_name:fakeOutput ) ( & ) ( fake_task_name:fakeOutput )') assert actual == expected
def test_check_for_false_suicide(): task_trigger = TaskTrigger('fake_task_name', '1', None, 'fakeOutput') dependency = Dependency([task_trigger, '&', task_trigger], [task_trigger], False) actual = str(dependency) expected = ('( fake_task_name[1]:fakeOutput ) ( & ) ( fake_task_name[1]' ':fakeOutput )') assert actual == expected
def parse(self, node): """Parse graph node, and cache the result. Args: node (str): node to parse Return: tuple: (name, offset, output, offset_is_from_icp, offset_is_irregular, offset_is_absolute) NOTE that offsets from ICP like foo[^] and foo[^+P1] are not considered absolute like foo[2] etc. Raise: GraphParseError: on illegal syntax. """ if node not in self._nodes: match = self.REC_NODE.match(node) if not match: raise GraphParseError('Illegal graph node: %s' % node) name, icp_mark, offset, output = match.groups() offset_is_from_icp = (icp_mark == '^') # convert to boolean if offset_is_from_icp and not offset: offset = self._get_offset() offset_is_irregular = False offset_is_absolute = False if offset: if is_offset_absolute(offset): offset_is_absolute = True if self.REC_IRREGULAR_OFFSET.search(offset): offset_is_irregular = True else: offset = self._get_offset(offset) self._nodes[node] = ( name, offset, TaskTrigger.standardise_name(output), offset_is_from_icp, offset_is_irregular, offset_is_absolute) return self._nodes[node]
def test_get_parent_point(cycling_mode): cycling_mode() one = get_point('1') two = get_point('2') trigger = TaskTrigger('name', None, 'output') assert trigger.get_parent_point(one) == one trigger = TaskTrigger('name', one, 'output', offset_is_absolute=True) assert trigger.get_parent_point(None) == one trigger = TaskTrigger('name', '+P1', 'output', initial_point=one) assert trigger.get_parent_point(one) == two trigger = TaskTrigger( 'name', '+P1', 'output', offset_is_from_icp=True, initial_point=one) assert trigger.get_parent_point(two) == two assert trigger.get_parent_point(one) == two
def test_check_exeption(): with pytest.raises(TriggerExpressionError): TaskTrigger.get_trigger_name("Foo:Elephant")
def test_check_with_no_cycle_point_or_offset(): task_trigger = TaskTrigger( 'fake_task_name', None, 'fakeOutput', None, None, None, None) actual = str(task_trigger) expected = 'fake_task_name:fakeOutput' assert actual == expected
def test_get_child_point(cycling_mode): cycling_mode() zero = get_point('0') one = get_point('1') two = get_point('2') p1 = get_sequence('P1', one) trigger = TaskTrigger('name', None, 'output') assert trigger.get_child_point(one, p1) == one assert trigger.get_child_point(two, p1) == two trigger = TaskTrigger('name', '+P1', 'output', offset_is_absolute=True) assert trigger.get_child_point(None, p1) == one trigger = TaskTrigger('name', '+P1', 'output', offset_is_from_icp=True) assert trigger.get_child_point(None, p1) == one trigger = TaskTrigger('name', '+P1', 'output', offset_is_irregular=True) assert trigger.get_child_point(one, p1) == zero trigger = TaskTrigger('name', '-P1', 'output', offset_is_irregular=True) assert trigger.get_child_point(one, p1) == two trigger = TaskTrigger('name', '+P1', 'output') assert trigger.get_child_point(one, None) == zero trigger = TaskTrigger('name', '-P1', 'output') assert trigger.get_child_point(one, None) == two
def test_check_with_cycle_point(): task_trigger = TaskTrigger('fake_task_name', '1', None, 'fakeOutput') actual = str(task_trigger) expected = 'fake_task_name[1]:fakeOutput' assert actual == expected
def _compute_triggers(self, orig_expr: str, rights: List[str], expr: str, info: List[Tuple[str, str, str]]) -> None: """Store trigger info from "expr => right". Args: orig_expr: the original associated graph expression rights: list of right-side nodes including qualifiers like :fail? expr: the associated graph expression info: [(name, offset, trigger-name)] for each name in expr. """ trigs = [] for name, offset, trigger in info: # Replace finish triggers (must be done after member substn). if name.startswith(self.__class__.XTRIG): trigs += [name] elif trigger == TASK_OUTPUT_FINISHED: this = f"{name}{offset}:{trigger}" that = "(%s%s:%s%s%s%s:%s)" % ( name, offset, TASK_OUTPUT_SUCCEEDED, self.__class__.OP_OR, name, offset, TASK_OUTPUT_FAILED) expr = expr.replace(this, that) trigs += [ "%s%s:%s" % (name, offset, TASK_OUTPUT_SUCCEEDED), "%s%s:%s" % (name, offset, TASK_OUTPUT_FAILED) ] else: trigs += [f"{name}{offset}:{trigger}"] for right in rights: m = self.__class__.REC_RHS_NODE.match(right) # This will match, bad nodes are detected earlier (type ignore): suicide_char, name, output, opt_char = m.groups() # type: ignore suicide = (suicide_char == self.__class__.SUICIDE) optional = (opt_char == self.__class__.OPTIONAL) if output: output = output.strip(self.__class__.QUALIFIER) if name in self.family_map: fam = True mems = self.family_map[name] if not output: # (Plain family name on RHS). # Make implicit success explicit. output = self.__class__.QUAL_FAM_SUCCEED_ALL elif output.startswith("finish"): if optional: raise GraphParseError( f"Family pseudo-output {name}:{output} can't be" " optional") # But implicit optional for the real succeed/fail outputs. optional = True try: outputs = self.__class__.fam_to_mem_output_map[output] except KeyError: # Illegal family trigger on RHS of a pair. raise GraphParseError( f"Illegal family trigger: {name}:{output}") else: fam = False if not output: # Make implicit success explicit. output = TASK_OUTPUT_SUCCEEDED else: # Convert to standard output names if necessary. output = TaskTrigger.standardise_name(output) mems = [name] outputs = [output] for mem in mems: self._set_triggers(mem, suicide, trigs, expr, orig_expr) for output in outputs: self._set_output_opt(mem, output, optional, suicide, fam)
def _proc_dep_pair(self, pair: Tuple[Optional[str], str]) -> None: """Process a single dependency pair 'left => right'. 'left' can be a logical expression of qualified node names. 'left' can be None, when triggering a left-side or lone node. 'left' can be "", if null task name in graph error (a => => b). 'right' can be one or more node names joined by AND. 'right' can't be None or "". A node is an xtrigger, or a task or a family name. A qualified name is NAME([CYCLE-POINT-OFFSET])(:QUALIFIER). Trigger qualifiers, but not cycle offsets, are ignored on the right to allow chaining. """ left, right = pair # Raise error for right-hand-side OR operators. if self.__class__.OP_OR in right: raise GraphParseError(f"Illegal OR on right side: {right}") # Raise error if suicide triggers on the left of the trigger. if left and self.__class__.SUICIDE in left: raise GraphParseError("Suicide markers must be" f" on the right of a trigger: {left}") # Ignore cycle point offsets on the right side. # (Note we can't ban this; all nodes get process as left and right.) if '[' in right: return # Check that parentheses match. if left and left.count("(") != left.count(")"): raise GraphParseError("Mismatched parentheses in: \"" + left + "\"") # Split right side on AND. rights = right.split(self.__class__.OP_AND) if '' in rights or right and not all(rights): raise GraphParseError( f"Null task name in graph: {left} => {right}") if not left or (self.__class__.OP_OR in left or '(' in left): # Treat conditional or bracketed expressions as a single entity. # Can get [None] or [""] here lefts: List[Optional[str]] = [left] else: # Split non-conditional left-side expressions on AND. # Can get [""] here too # TODO figure out how to handle this wih mypy: # assign List[str] to List[Optional[str]] lefts = left.split(self.__class__.OP_AND) # type: ignore if '' in lefts or left and not all(lefts): raise GraphParseError( f"Null task name in graph: {left} => {right}") for left in lefts: # Extract information about all nodes on the left. if left: info = self.__class__.REC_NODES.findall(left) expr = left else: # There is no left-hand-side task. info = [] expr = '' n_info: List[Tuple[str, str, str, bool]] = [] for name, offset, trig, opt_char in info: opt = opt_char == self.__class__.OPTIONAL if name.startswith(self.__class__.XTRIG): n_info.append((name, offset, trig, opt)) continue if trig: # Replace with standard trigger name if necessary trig = trig.strip(self.__class__.QUALIFIER) n_trig = TaskTrigger.standardise_name(trig) if n_trig != trig: if offset: this = r'\b%s\b%s:%s(?!:)' % (re.escape(name), re.escape(offset), re.escape(trig)) else: this = r'\b%s:%s\b(?![\[:])' % (re.escape(name), re.escape(trig)) that = f"{name}{offset}:{n_trig}" expr = re.sub(this, that, expr) else: # Make success triggers explicit. n_trig = TASK_OUTPUT_SUCCEEDED if offset: this = r'\b%s\b%s(?!:)' % (re.escape(name), re.escape(offset)) else: this = r'\b%s\b(?![\[:])' % re.escape(name) that = f"{name}{offset}:{n_trig}" expr = re.sub(this, that, expr) n_info.append((name, offset, n_trig, opt)) info = n_info # Determine semantics of all family triggers present. family_trig_map = {} for name, _, trig, _ in info: if name.startswith(self.__class__.XTRIG): # Avoid @xtrigger nodes. continue if name in self.family_map: # Family; deal with members. try: family_trig_map[(name, trig)] = ( self.__class__.fam_to_mem_trigger_map[trig]) except KeyError: # "FAM:bad => foo" in LHS (includes "FAM => bar" too). raise GraphParseError( f"Illegal family trigger in {expr}") else: # Not a family. if trig in self.__class__.fam_to_mem_trigger_map: raise GraphParseError( "family trigger on non-family namespace {expr}") # remove '?' from expr (not needed in logical trigger evaluation) expr = re.sub(self.__class__._RE_OPT, '', expr) self._families_all_to_all(expr, rights, info, family_trig_map)
def parse_graph(self, graph_string: str) -> None: """Parse the graph string for a single graph section. (Assumes any general line-continuation markers have been processed). 1. Strip comments, whitespace, and blank lines. (all whitespace is removed up front so we don't have to consider it in regexes and strip it from matched elements) 2. Join incomplete lines starting or ending with '=>'. 3. Replicate and expand any parameterized lines. 4. Split and process by pairs "left-expression => right-node": i. Replace families with members (any or all semantics). ii. Record parsed dependency information for each right-side node. """ # Strip comments, whitespace, and blank lines. non_blank_lines = [] bad_lines = [] for line in graph_string.split('\n'): modified_line = self.__class__.REC_COMMENT.sub('', line) # Ignore empty lines if not modified_line or modified_line.isspace(): continue # Catch simple bad lines that would be accepted once # spaces are removed, e.g. 'foo bar => baz' if self.REC_GRAPH_BAD_SPACES_LINE.search(modified_line): bad_lines.append(line) continue # Apparently this is the fastest way to strip all whitespace!: modified_line = "".join(modified_line.split()) non_blank_lines.append(modified_line) # Check if there were problem lines and abort if bad_lines: self._report_invalid_lines(bad_lines) # Join incomplete lines (beginning or ending with an arrow). full_lines = [] part_lines = [] for i, _ in enumerate(non_blank_lines): this_line = non_blank_lines[i] for seq in self.CONTINUATION_STRS: if i == 0 and this_line.startswith(seq): # First line can't start with an arrow. raise GraphParseError(f"Leading {seq}: {this_line}") try: next_line = non_blank_lines[i + 1] except IndexError: next_line = '' for seq in self.CONTINUATION_STRS: if this_line.endswith(seq): # Last line can't end with an arrow, & or |. raise GraphParseError(f"Dangling {seq}:" f"{this_line}") part_lines.append(this_line) # Check that a continuation sequence doesn't end this line and # begin the next: if (this_line.endswith(self.CONTINUATION_STRS) and next_line.startswith(self.CONTINUATION_STRS)): raise GraphParseError( 'Consecutive lines end and start with continuation ' 'characters:\n' f'{this_line}\n' f'{next_line}') # Check that line ends with a valid continuation sequence: if (any( this_line.endswith(seq) or next_line.startswith(seq) for seq in self.CONTINUATION_STRS) and not (any( this_line.endswith(seq) or next_line.startswith(seq) for seq in self.BAD_STRS))): continue full_line = ''.join(part_lines) # Record inter-workflow dependence and remove the marker notation. # ("foo<WORKFLOW::TASK:fail> => bar" becomes:fail "foo => bar"). repl = Replacement('\\1') full_line = self.__class__.REC_WORKFLOW_STATE.sub(repl, full_line) for item in repl.match_groups: l_task, r_all, r_workflow, r_task, r_status = item if r_status: r_status = r_status.strip(self.__class__.QUALIFIER) r_status = TaskTrigger.standardise_name(r_status) else: r_status = TASK_OUTPUT_SUCCEEDED self.workflow_state_polling_tasks[l_task] = (r_workflow, r_task, r_status, r_all) full_lines.append(full_line) part_lines = [] # Check for double-char conditional operators (a common mistake), # and bad node syntax (order of qualifiers). bad_lines = [] for line in full_lines: if self.__class__.OP_AND_ERR in line: raise GraphParseError("The graph AND operator is " f"'{self.__class__.OP_AND}': {line}") if self.__class__.OP_OR_ERR in line: raise GraphParseError("The graph OR operator is " f"'{self.__class__.OP_OR}': {line}") # Check node syntax. First drop all non-node characters. node_str = line for spec in [ self.__class__.ARROW, self.__class__.OP_OR, self.__class__.OP_AND, self.__class__.SUICIDE, '(', ')', ]: node_str = node_str.replace(spec, ' ') # Drop all valid @xtriggers, longest first to avoid sub-strings. nodes = self.__class__.REC_XTRIG.findall(node_str) nodes.sort(key=len, reverse=True) for node in nodes: node_str = node_str.replace(node, '') # Then drop all valid nodes, longest first to avoid sub-strings. bad_lines = [ node_str for node in node_str.split() if self.__class__.REC_NODE_FULL.sub('', node, 1) ] if bad_lines: self._report_invalid_lines(bad_lines) # Expand parameterized lines (or detect undefined parameters). line_set = set() graph_expander = GraphExpander(self.parameters) for line in full_lines: if not self.__class__.REC_PARAMS.search(line): line_set.add(line) continue for line_ in graph_expander.expand(line): line_set.add(line_) # Process chains of dependencies as pairs: left => right. # Parameterization can duplicate some dependencies, so use a set. pairs: Set[Tuple[Optional[str], str]] = set() for line in line_set: chain = [] # "foo => bar => baz" becomes [foo, bar, baz] # "foo => bar_-32768 => baz" becomes [foo] # "foo_-32768 => bar" becomes [] for node in line.split(self.__class__.ARROW): # This can happen, e.g. "foo => => bar" produces # "foo, '', bar", so we add so that later it raises # an error if node == '': chain.append(node) continue node = self.REC_NODE_OUT_OF_RANGE.sub('', node) if node == '': # For "foo => bar<err> => baz", stop at "bar<err>" break else: chain.append(node) if not chain: continue for item in self.__class__.REC_NODES.findall(chain[0]): # Auto-trigger lone nodes and initial nodes in a chain. if not item[0].startswith(self.__class__.XTRIG): pairs.add((None, ''.join(item))) for i in range(0, len(chain) - 1): pairs.add((chain[i], chain[i + 1])) for pair in pairs: self._proc_dep_pair(pair)