Exemplo n.º 1
0
    def get_selected(self, include, exclude, resource_types, tags, required):
        graph = self.linker.graph

        include = coalesce(include, ['*'])
        exclude = coalesce(exclude, [])
        tags = coalesce(tags, [])

        to_run = self.get_valid_nodes(graph)
        filtered_graph = graph.subgraph(to_run)
        selected_nodes = self.select_nodes(filtered_graph, include, exclude)

        filtered_nodes = set()
        for node_name in selected_nodes:
            if self._is_match(node_name, resource_types, tags, required):
                filtered_nodes.add(node_name)

        return filtered_nodes
Exemplo n.º 2
0
    def get_selected(self, include, exclude, resource_types, tags, required):
        graph = self.linker.graph

        include = coalesce(include, ['fqn:*', 'source:*'])
        exclude = coalesce(exclude, [])
        tags = coalesce(tags, [])

        to_run = self.get_valid_nodes(graph)
        filtered_graph = graph.subgraph(to_run)
        selected_nodes = self.select_nodes(filtered_graph, include, exclude)

        filtered_nodes = set()
        for node_name in selected_nodes:
            if self._is_match(node_name, resource_types, tags, required):
                filtered_nodes.add(node_name)

        return filtered_nodes
Exemplo n.º 3
0
    def get_selected(self, include, exclude, resource_types, tags, required):
        include = coalesce(include, ['fqn:*', 'source:*'])
        exclude = coalesce(exclude, [])
        tags = coalesce(tags, [])

        graph_members = {
            node_name
            for node_name in self.full_graph.nodes()
            if self._is_graph_member(node_name)
        }
        filtered_graph = self.full_graph.subgraph(graph_members)
        selected_nodes = self.select_nodes(filtered_graph, include, exclude)

        filtered_nodes = set()
        for node_name in selected_nodes:
            if self._is_match(node_name, resource_types, tags, required):
                filtered_nodes.add(node_name)

        return filtered_nodes
Exemplo n.º 4
0
    def get_selected(self, include, exclude, resource_types, tags):
        graph = self.linker.graph

        include = coalesce(include, ['*'])
        exclude = coalesce(exclude, [])
        tags = coalesce(tags, [])

        to_run = self.get_valid_nodes(graph)
        filtered_graph = graph.subgraph(to_run)
        selected_nodes = self.select_nodes(filtered_graph, include, exclude)

        filtered_nodes = set()
        for node_name in selected_nodes:
            node = self.manifest.nodes[node_name]

            matched_resource = node.resource_type in resource_types
            matched_tags = (len(tags) == 0 or bool(set(node.tags) & set(tags)))

            if matched_resource and matched_tags:
                filtered_nodes.add(node_name)

        return filtered_nodes
Exemplo n.º 5
0
    def parse_node(self,
                   node,
                   node_path,
                   package_project_config,
                   tags=None,
                   fqn_extra=None,
                   fqn=None,
                   agate_table=None,
                   archive_config=None,
                   column_name=None):
        """Parse a node, given an UnparsedNode and any other required information.

        agate_table should be set if the node came from a seed file.
        archive_config should be set if the node is an Archive node.
        column_name should be set if the node is a Test node associated with a
        particular column.
        """
        logger.debug("Parsing {}".format(node_path))

        tags = coalesce(tags, [])
        fqn_extra = coalesce(fqn_extra, [])

        if fqn is None:
            fqn = self.get_fqn(node.path, package_project_config, fqn_extra)

        config = SourceConfig(self.root_project_config, package_project_config,
                              fqn, node.resource_type)

        parsed_dict = self._build_intermediate_node_dict(
            config, node.serialize(), node_path, config, tags, fqn,
            agate_table, archive_config, column_name)
        parsed_node = ParsedNode(**parsed_dict)

        self._render_with_context(parsed_node, config)
        self._update_parsed_node_info(parsed_node, config)

        parsed_node.validate()

        return parsed_node
Exemplo n.º 6
0
    def parse_node(self, node, node_path, package_project_config, tags=None,
                   fqn_extra=None, fqn=None, agate_table=None,
                   archive_config=None, column_name=None):
        """Parse a node, given an UnparsedNode and any other required information.

        agate_table should be set if the node came from a seed file.
        archive_config should be set if the node is an Archive node.
        column_name should be set if the node is a Test node associated with a
        particular column.
        """
        logger.debug("Parsing {}".format(node_path))

        tags = coalesce(tags, [])
        fqn_extra = coalesce(fqn_extra, [])

        if fqn is None:
            fqn = self.get_fqn(node.path, package_project_config, fqn_extra)

        config = SourceConfig(
            self.root_project_config,
            package_project_config,
            fqn,
            node.resource_type)

        parsed_dict = self._build_intermediate_node_dict(
            config, node.serialize(), node_path, config, tags, fqn,
            agate_table, archive_config, column_name
        )
        parsed_node = ParsedNode(**parsed_dict)

        self._render_with_context(parsed_node, config)
        self._update_parsed_node_info(parsed_node, config)

        parsed_node.validate()

        return parsed_node
Exemplo n.º 7
0
Arquivo: base.py Projeto: f1fe/dbt
    def _build_intermediate_node_dict(self, config, node_dict, node_path,
                                      package_project_config, tags, fqn,
                                      agate_table, snapshot_config,
                                      column_name):
        """Update the unparsed node dictionary and build the basis for an
        intermediate ParsedNode that will be passed into the renderer
        """
        # because this takes and returns dicts, subclasses can safely override
        # this and mutate its results using super() both before and after.
        if agate_table is not None:
            node_dict['agate_table'] = agate_table

        # Set this temporarily. Not the full config yet (as config() hasn't
        # been called from jinja yet). But the Var() call below needs info
        # about project level configs b/c they might contain refs.
        # TODO: Restructure this?
        config_dict = coalesce(snapshot_config, {})
        config_dict.update(config.config)

        empty = (
            'raw_sql' in node_dict and len(node_dict['raw_sql'].strip()) == 0
        )

        node_dict.update({
            'refs': [],
            'sources': [],
            'depends_on': {
                'nodes': [],
                'macros': [],
            },
            'unique_id': node_path,
            'empty': empty,
            'fqn': fqn,
            'tags': tags,
            'config': config_dict,
            # Set these temporarily so get_rendered() has access to a schema,
            # database, and alias.
            'schema': self.default_schema,
            'database': self.default_database,
            'alias': node_dict.get('name'),
        })

        # if there's a column, it should end up part of the ParsedNode
        if column_name is not None:
            node_dict['column_name'] = column_name

        return node_dict
Exemplo n.º 8
0
    def _build_intermediate_node_dict(self, config, node_dict, node_path,
                                      package_project_config, tags, fqn,
                                      agate_table, archive_config,
                                      column_name):
        """Update the unparsed node dictionary and build the basis for an
        intermediate ParsedNode that will be passed into the renderer
        """
        # because this takes and returns dicts, subclasses can safely override
        # this and mutate its results using super() both before and after.
        if agate_table is not None:
            node_dict['agate_table'] = agate_table

        # Set this temporarily. Not the full config yet (as config() hasn't
        # been called from jinja yet). But the Var() call below needs info
        # about project level configs b/c they might contain refs.
        # TODO: Restructure this?
        config_dict = coalesce(archive_config, {})
        config_dict.update(config.config)

        empty = (
            'raw_sql' in node_dict and len(node_dict['raw_sql'].strip()) == 0
        )

        node_dict.update({
            'refs': [],
            'sources': [],
            'depends_on': {
                'nodes': [],
                'macros': [],
            },
            'unique_id': node_path,
            'empty': empty,
            'fqn': fqn,
            'tags': tags,
            'config': config_dict,
            # Set these temporarily so get_rendered() has access to a schema,
            # database, and alias.
            'schema': self.default_schema,
            'database': self.default_database,
            'alias': node_dict.get('name'),
        })

        # if there's a column, it should end up part of the ParsedNode
        if column_name is not None:
            node_dict['column_name'] = column_name

        return node_dict
Exemplo n.º 9
0
    def get_nodes_from_multiple_specs(
            self,
            graph,
            specs,
            nodes=None,
            check_existence=False,
            exclude=False
    ):
        selected_nodes: Set[str] = coalesce(nodes, set())
        operator = set.difference_update if exclude else set.update

        for raw_spec in split_specs(specs):
            nodes = self.get_nodes_from_intersection_spec(graph, raw_spec)

            if check_existence:
                alert_non_existence(raw_spec, nodes)

            operator(selected_nodes, nodes)

        return selected_nodes
Exemplo n.º 10
0
    def select_nodes(self, graph, raw_include_specs, raw_exclude_specs):
        raw_exclude_specs = coalesce(raw_exclude_specs, [])
        check_existence = True

        if not raw_include_specs:
            check_existence = False
            raw_include_specs = ['fqn:*', 'source:*']

        selected_nodes = self.get_nodes_from_multiple_specs(
            graph,
            raw_include_specs,
            check_existence=check_existence
        )
        selected_nodes = self.get_nodes_from_multiple_specs(
            graph,
            raw_exclude_specs,
            nodes=selected_nodes,
            exclude=True
        )

        return selected_nodes
Exemplo n.º 11
0
    def parse_node(cls,
                   node,
                   node_path,
                   root_project_config,
                   package_project_config,
                   all_projects,
                   tags=None,
                   fqn_extra=None,
                   fqn=None,
                   macros=None,
                   agate_table=None,
                   archive_config=None):
        """Parse a node, given an UnparsedNode and any other required information.

        agate_table should be set if the node came from a seed file.
        archive_config should be set if the node is an Archive node.
        """
        logger.debug("Parsing {}".format(node_path))

        node = node.serialize()

        if agate_table is not None:
            node['agate_table'] = agate_table
        tags = coalesce(tags, [])
        fqn_extra = coalesce(fqn_extra, [])
        macros = coalesce(macros, {})

        node.update({
            'refs': [],
            'depends_on': {
                'nodes': [],
                'macros': [],
            }
        })

        if fqn is None:
            fqn = cls.get_fqn(node.get('path'), package_project_config,
                              fqn_extra)

        config = dbt.model.SourceConfig(root_project_config,
                                        package_project_config, fqn,
                                        node['resource_type'])

        node['unique_id'] = node_path
        node['empty'] = ('raw_sql' in node
                         and len(node['raw_sql'].strip()) == 0)
        node['fqn'] = fqn
        node['tags'] = tags
        node['config_reference'] = config

        # Set this temporarily. Not the full config yet (as config() hasn't
        # been called from jinja yet). But the Var() call below needs info
        # about project level configs b/c they might contain refs.
        # TODO: Restructure this?
        config_dict = coalesce(archive_config, {})
        config_dict.update(config.config)
        node['config'] = config_dict

        # Set this temporarily so get_rendered() has access to a schema & alias
        profile = dbt.utils.get_profile_from_project(root_project_config)
        default_schema = profile.get('schema', 'public')
        node['schema'] = default_schema
        default_alias = node.get('name')
        node['alias'] = default_alias

        context = dbt.context.parser.generate(node, root_project_config,
                                              {"macros": macros})

        dbt.clients.jinja.get_rendered(node.get('raw_sql'),
                                       context,
                                       node,
                                       capture_macros=True)

        # Clean up any open conns opened by adapter functions that hit the db
        db_wrapper = context['adapter']
        adapter = db_wrapper.adapter
        profile = db_wrapper.profile
        adapter.release_connection(profile, node.get('name'))

        # Special macro defined in the global project
        schema_override = config.config.get('schema')
        get_schema = context.get('generate_schema_name',
                                 lambda x: default_schema)
        node['schema'] = get_schema(schema_override)
        node['alias'] = config.config.get('alias', default_alias)

        # Overwrite node config
        config_dict = node.get('config', {})
        config_dict.update(config.config)
        node['config'] = config_dict

        for hook_type in dbt.hooks.ModelHookType.Both:
            node['config'][hook_type] = dbt.hooks.get_hooks(node, hook_type)

        del node['config_reference']

        return ParsedNode(**node)
Exemplo n.º 12
0
    def parse_node(cls,
                   node,
                   node_path,
                   root_project_config,
                   package_project_config,
                   all_projects,
                   tags=None,
                   fqn_extra=None,
                   fqn=None,
                   macros=None,
                   agate_table=None,
                   archive_config=None,
                   column_name=None):
        """Parse a node, given an UnparsedNode and any other required information.

        agate_table should be set if the node came from a seed file.
        archive_config should be set if the node is an Archive node.
        column_name should be set if the node is a Test node associated with a
        particular column.
        """
        logger.debug("Parsing {}".format(node_path))

        node = node.serialize()

        if agate_table is not None:
            node['agate_table'] = agate_table
        tags = coalesce(tags, [])
        fqn_extra = coalesce(fqn_extra, [])
        macros = coalesce(macros, {})

        node.update({
            'refs': [],
            'depends_on': {
                'nodes': [],
                'macros': [],
            }
        })

        if fqn is None:
            fqn = cls.get_fqn(node.get('path'), package_project_config,
                              fqn_extra)

        config = dbt.model.SourceConfig(root_project_config,
                                        package_project_config, fqn,
                                        node['resource_type'])

        node['unique_id'] = node_path
        node['empty'] = ('raw_sql' in node
                         and len(node['raw_sql'].strip()) == 0)
        node['fqn'] = fqn
        node['tags'] = tags

        # Set this temporarily. Not the full config yet (as config() hasn't
        # been called from jinja yet). But the Var() call below needs info
        # about project level configs b/c they might contain refs.
        # TODO: Restructure this?
        config_dict = coalesce(archive_config, {})
        config_dict.update(config.config)
        node['config'] = config_dict

        # Set this temporarily so get_rendered() has access to a schema & alias
        default_schema = getattr(root_project_config.credentials, 'schema',
                                 'public')
        node['schema'] = default_schema
        default_alias = node.get('name')
        node['alias'] = default_alias

        # if there's a column, it should end up part of the ParsedNode
        if column_name is not None:
            node['column_name'] = column_name

        # make a manifest with just the macros to get the context
        manifest = Manifest(macros=macros,
                            nodes={},
                            docs={},
                            generated_at=dbt.utils.timestring())

        parsed_node = ParsedNode(**node)
        context = dbt.context.parser.generate(parsed_node, root_project_config,
                                              manifest, config)

        dbt.clients.jinja.get_rendered(parsed_node.raw_sql,
                                       context,
                                       parsed_node.to_shallow_dict(),
                                       capture_macros=True)

        # Clean up any open conns opened by adapter functions that hit the db
        db_wrapper = context['adapter']
        adapter = db_wrapper.adapter
        runtime_config = db_wrapper.config
        adapter.release_connection(parsed_node.name)

        # Special macro defined in the global project
        schema_override = config.config.get('schema')
        get_schema = context.get('generate_schema_name',
                                 lambda x: default_schema)
        parsed_node.schema = get_schema(schema_override)
        parsed_node.alias = config.config.get('alias', default_alias)

        # Overwrite node config
        config_dict = parsed_node.get('config', {})
        config_dict.update(config.config)
        parsed_node.config = config_dict

        for hook_type in dbt.hooks.ModelHookType.Both:
            parsed_node.config[hook_type] = dbt.hooks.get_hooks(
                parsed_node, hook_type)

        parsed_node.validate()

        return parsed_node
Exemplo n.º 13
0
def parse_node(node,
               node_path,
               root_project_config,
               package_project_config,
               all_projects,
               tags=None,
               fqn_extra=None,
               fqn=None,
               macros=None):
    logger.debug("Parsing {}".format(node_path))
    node = copy.deepcopy(node)

    tags = coalesce(tags, set())
    fqn_extra = coalesce(fqn_extra, [])
    macros = coalesce(macros, {})

    node.update({
        'refs': [],
        'depends_on': {
            'nodes': [],
            'macros': [],
        }
    })

    if fqn is None:
        fqn = get_fqn(node.get('path'), package_project_config, fqn_extra)

    config = dbt.model.SourceConfig(root_project_config,
                                    package_project_config, fqn)

    node['unique_id'] = node_path
    node['empty'] = (len(node.get('raw_sql').strip()) == 0)
    node['fqn'] = fqn
    node['tags'] = tags
    node['config_reference'] = config

    # Set this temporarily. Not the full config yet (as config() hasn't been
    # called from jinja yet). But the Var() call below needs info about project
    # level configs b/c they might contain refs. TODO: Restructure this?
    config_dict = node.get('config', {})
    config_dict.update(config.config)
    node['config'] = config_dict

    # Set this temporarily so get_rendered() below has access to a schema
    profile = dbt.utils.get_profile_from_project(root_project_config)
    default_schema = profile.get('schema', 'public')
    node['schema'] = default_schema

    context = dbt.context.parser.generate(node, root_project_config,
                                          {"macros": macros})

    dbt.clients.jinja.get_rendered(node.get('raw_sql'),
                                   context,
                                   node,
                                   capture_macros=True)

    # Clean up any open connections opened by adapter functions that hit the db
    db_wrapper = context['adapter']
    adapter = db_wrapper.adapter
    profile = db_wrapper.profile
    adapter.release_connection(profile, node.get('name'))

    # Special macro defined in the global project
    schema_override = config.config.get('schema')
    get_schema = context.get('generate_schema_name', lambda x: default_schema)
    node['schema'] = get_schema(schema_override)

    # Overwrite node config
    config_dict = node.get('config', {})
    config_dict.update(config.config)
    node['config'] = config_dict

    for hook_type in dbt.hooks.ModelHookType.Both:
        node['config'][hook_type] = dbt.hooks.get_hooks(node, hook_type)

    del node['config_reference']

    return node