Пример #1
0
    def prune_nodes(graph, nodes=None, node_selector=None):
        if not nodes and not node_selector:
            return graph

        components_before_prune = list(
            nx.algorithms.components.weakly_connected_components(graph))

        prune_nodes = nodes if nodes else \
            [node for node in graph.nodes if node_selector(node)]

        for node in prune_nodes:
            _GraphUtil.prune_node(node, graph)

        components_after_prune = list(
            nx.algorithms.components.weakly_connected_components(graph))

        if len(components_before_prune) < len(components_after_prune):
            logger.warning('Pruning of nodes %s from graph %s caused components to '
                           'become disconnected! Components before prune: %s; after prune: %s',
                           prune_nodes,
                           graph,
                           components_before_prune,
                           components_after_prune)

        return graph
Пример #2
0
    def process_arg(self, arg, node, raw_args):
        regex = None
        try:
            regex = re.compile(self.pattern)
        except Exception:
            raise Exception(
                'Error compiling regex for `{}`: `{}` is an invalid pattern'.
                format(self.type, self.properties['pattern']))

        rendered_arg = None
        try:
            rendered_arg = self.render_template(arg, raw_args)
        except jinja2.exceptions.UndefinedError:
            logger.warning(
                'Could not render template `%s`; cannot verify that the argument '
                'matches the required pattern `%s`!', arg, regex.pattern)
            return arg

        if regex.match(rendered_arg):
            # return the original arg, not the rendered arg, because we are not
            # actually transforming anything, just validating
            return arg

        VERBATIM_REGEX = '<<.+>>'
        if re.compile(VERBATIM_REGEX).search(rendered_arg):
            logger.warning(
                'Argument generated from `%s` may not match the required pattern `%s` and fail.',
                rendered_arg, regex.pattern)
            return arg

        raise Exception(
            'Invalid argument `{}`: does not match expected pattern `{}`'.
            format(rendered_arg, regex.pattern))
Пример #3
0
    def _build_graph(graph_name, nodes):
        graph = nx.DiGraph()

        logger.debug('Creating graph %s from nodes %s',
                     graph_name, nodes.keys())

        graph.add_nodes_from(nodes.keys())

        graph.add_edges_from(
            (upstream_name, downstream_name)
            for (upstream_name, (downstream_node_names, node)) in six.iteritems(nodes)
            for downstream_name in downstream_node_names)

        if not nx.algorithms.components.is_weakly_connected(graph):
            components = list(
                nx.algorithms.components.weakly_connected_components(graph))
            logger.warning(
                'Multiple connected components found for graph `%s`: %s',
                graph_name,
                components)

        if not nx.algorithms.dag.is_directed_acyclic_graph(graph):
            raise exceptions.CyclicWorkflowException(
                'Invalid graph `{}`: not a DAG!'.format(graph_name))

        logger.debug('Successfully created graph %s with nodes %s',
                     graph_name, graph.nodes)

        return graph
Пример #4
0
    def _flow_control_nodes(self, wf):
        result = {
            'start': ([wf['start']['to']], wf['start']),
            wf['end'].get('name', 'end'): ([], wf['end'])
        }

        if wf.get('kill'):
            result[wf['kill'].get('name', 'kill')] = ([], wf['kill'])

        for decision in wf['decision']:
            if not self.debug:
                raise Exception('decision node found: `{}` not supported right '
                                'now!  Set --debug to build without raising this error, but '
                                'only at your own risk!'.format(decision['name']))

            logger.warning(
                'decision node found: `%s`.  Including all downstream '
                'branches because --debug was specified', decision['name'])

            cases = [case['to'] for case in decision['switch']['case']]
            default = decision['switch']['default']['to']
            result[decision['name']] = (cases + [default], decision)

        return result
Пример #5
0
    def resolve_properties(self,
                           execution_context,
                           default_task_args=None,
                           base_operator_loader=None,
                           preprocessor_loader=None):
        """ Get the properties / arguments for the operator, and split them
            according to their source.  Specifically, properties are provided
            to the operator by either the DAG config file, the resources
            available in the operator's context, any task defaults specified
            in the primary DAG, and the schema defaults, in that order of
            precedence.

            Once the properties are all resolved, this method then validates
            all of the resolved arguments against the task's schema.

            :param execution_context: the context in which this node is executed,
                specifically containing the available resources and the node
                that referred to this node, if any
            :type execution_context: boundary_layer.containers.ExecutionContext
            :param default_task_args: the default task args defined in the
                DAG
            :type default_task_args: dict
            :param base_operator_loader: A method that retrieves typed operators,
                equivalent to a Registry.get method
            :type base_operator_loader: callable
            :param preprocessor_loader: A method that retrieves typed preprocessors,
                equivalent to a Registry.get method
            :type preprocessor_loader: callable

            :returns: a mapping of property source to property key/value pairs
            :rtype: dict<dict<string, any>>
        """
        schema = self.get_schema(base_operator_loader)
        schema_properties = frozenset(schema.get('properties', {}).keys())

        self.set_default_task_args(default_task_args)

        (sources, property_values) = self._get_property_sources_and_values(
            schema_properties, execution_context)

        validated = validator.validate_and_fill_defaults(item=property_values,
                                                         schema=schema)

        for key in validated:
            if key not in property_values:
                continue

            sources.schema.add(key)

        logger.debug('%s: validated partitioned properties: %s', self.name,
                     sources)

        preprocessors = self._load_preprocessors(base_operator_loader,
                                                 preprocessor_loader)

        self._preprocessor_imports = {
            pp_name: pp.imports()
            for (pp_name, pp) in six.iteritems(preprocessors)
        }

        preprocessed_values = self._apply_preprocessors(
            args=validated, preprocessors=preprocessors)

        if self._resolved_properties:
            if preprocessed_values != self._resolved_properties.values:
                raise Exception(
                    'resolve_properties() was already called for operator {}, '
                    'and different values were computed this time!  Found: {}, '
                    'expected: {}.  This was probably caused by repeated '
                    'references to a sub-dag or generator using different resource '
                    'contexts.  This is not presently supported!'.format(
                        self, preprocessed_values,
                        self._resolved_properties.values))
            else:
                logger.warning(
                    'resolve_properties() was already called for operator %s, '
                    'but no differences in the computed properties were found.',
                    self)

        self._resolved_properties = ResolvedProperties(
            sources=sources, values=preprocessed_values)

        return self._resolved_properties
Пример #6
0
    def validate_and_resolve_properties(spec):
        secondary_lookup = {dag['name']: dag for dag in spec.secondary}
        default_task_args = spec.primary.get('default_task_args', {})

        # Construct sets of all of the resources created and requested,
        # so that we can check for unused resources and default args
        all_resources_created = set()
        all_resources_requested = set()
        all_defaults_used = set()

        def validate_dag(dag, execution_context):
            nodes = Workflow.get_all_nodes(dag)

            dag_resources = {
                resource.name: resource
                for resource in dag['resources']
            }

            Workflow.ensure_no_duplicate_names(
                dag['name'],
                [node.name for node in nodes] + list(dag_resources),
                list(execution_context.resources))

            for resource in dag_resources.values():
                (create_properties,
                 destroy_properties) = resource.resolve_properties(
                     execution_context=execution_context,
                     default_task_args=default_task_args,
                     base_operator_loader=plugins.manager.operators,
                     preprocessor_loader=plugins.manager.
                     property_preprocessors,
                 )
                all_defaults_used.update(
                    set(create_properties.sources.default_task_args))
                all_defaults_used.update(
                    set(destroy_properties.sources.default_task_args))

            all_resources_created.update(set(dag_resources))

            all_resources_requested.update(
                set(resource for node in nodes
                    for resource in node.requires_resources))

            resources_available = execution_context.resources.copy()
            resources_available.update(dag_resources)

            missing_resources = {
                name: missing
                for (name, missing) in [(node.name,
                                         frozenset(node.requires_resources) -
                                         frozenset(resources_available))
                                        for node in nodes] if missing
            }

            if missing_resources:
                raise ValueError(
                    'Error in dag {}: Operators require resources '
                    'outside their local contexts: {}'.format(
                        dag['name'], missing_resources))

            for node in nodes:
                properties = node.resolve_properties(
                    execution_context=execution_context._replace(
                        resources=resources_available),
                    default_task_args=default_task_args,
                    base_operator_loader=plugins.manager.operators,
                    preprocessor_loader=plugins.manager.property_preprocessors,
                )

                all_defaults_used.update(
                    set(properties.sources.default_task_args))

            all_referrers = dag['sub_dags'] + dag['generators']
            for referrer in all_referrers:
                subdag = secondary_lookup[referrer.target]
                subdag_resources_available = {
                    name: resource
                    for (name, resource) in six.iteritems(resources_available)
                    if name in frozenset(referrer.requires_resources)
                }
                subdag_ctx = ExecutionContext(
                    referrer=referrer, resources=subdag_resources_available)
                validate_dag(subdag, subdag_ctx)

        validate_dag(spec.primary, ExecutionContext(referrer=None,
                                                    resources={}))

        unused_resources = all_resources_created - all_resources_requested

        if unused_resources:
            raise ValueError('Unused resources `{}`'.format(
                '`, `'.join(unused_resources)))

        unused_defaults = frozenset(default_task_args) - frozenset(
            all_defaults_used)

        if unused_defaults:
            logger.warning('Unused default task args: `%s`',
                           '`, `'.join(unused_defaults))