Пример #1
0
def check_test_cnt(
    paths: Sequence[str], manifest: Dict[str, Any], required_tests: Dict[str, int]
) -> int:
    status_code = 0
    sqls = get_model_sqls(paths, manifest)
    filenames = set(sqls.keys())

    # get manifest nodes that pre-commit found as changed
    models = get_models(manifest, filenames)

    for model in models:
        childs = list(
            get_parent_childs(
                manifest=manifest,
                obj=model,
                manifest_node="child_map",
                node_types=["test"],
            )
        )
        tests = [test for test in childs if isinstance(test, Test)]
        grouped = groupby(
            sorted(tests, key=lambda x: x.test_type), lambda x: x.test_type
        )
        test_dict = {key: list(value) for key, value in grouped}
        for required_test, required_cnt in required_tests.items():
            test = test_dict.get(required_test, [])
            test_cnt = len(test)
            if not test or required_cnt > test_cnt:
                status_code = 1
                print(
                    f"{model.model_name}: "
                    f"has only {test_cnt} {required_test} tests, but "
                    f"{required_cnt} are required.",
                )
    return status_code
Пример #2
0
def check_test_cnt(paths: Sequence[str], manifest: Dict[str, Any],
                   test_cnt: int) -> int:
    status_code = 0
    sqls = get_model_sqls(paths, manifest)
    filenames = set(sqls.keys())

    # get manifest nodes that pre-commit found as changed
    models = get_models(manifest, filenames)

    for model in models:
        childs = list(
            get_parent_childs(
                manifest=manifest,
                obj=model,
                manifest_node="child_map",
                node_types=["test"],
            ))
        tests = [test for test in childs if isinstance(test, Test)]
        model_test_cnt = len(tests)
        if model_test_cnt < test_cnt:
            status_code = 1
            print(
                f"{model.model_name}: "
                f"has only {model_test_cnt} tests, but {test_cnt} are required.",
            )
    return status_code
def has_description(paths: Sequence[str], manifest: Dict[str, Any]) -> int:
    status_code = 0
    ymls = get_filenames(paths, [".yml", ".yaml"])
    sqls = get_model_sqls(paths, manifest)
    filenames = set(sqls.keys())

    # get manifest nodes that pre-commit found as changed
    models = get_models(manifest, filenames)
    # if user added schema but did not rerun the model
    schemas = get_model_schemas(list(ymls.values()), filenames)
    # convert to sets
    in_models = {
        model.filename
        for model in models if model.node.get("description")
    }
    in_schemas = {
        schema.model_name
        for schema in schemas if schema.schema.get("description")
    }
    missing = filenames.difference(in_models, in_schemas)

    for model in missing:
        status_code = 1
        print(
            f"{sqls.get(model)}: "
            f"does not have defined description or properties file is missing.",
        )
    return status_code
def check_model_columns(paths: Sequence[str], manifest: Dict[str, Any],
                        catalog: Dict[str, Any]) -> int:
    status_code = 0
    sqls = get_model_sqls(paths, manifest)
    filenames = set(sqls.keys())

    # get manifest nodes that pre-commit found as changed
    models = get_models(manifest, filenames)

    catalog_nodes = catalog.get("nodes", {})

    for model in models:
        catalog_node = catalog_nodes.get(model.model_id, {})
        if catalog_node:
            model_only, catalog_only = compare_columns(
                catalog_columns=catalog_node.get("columns", {}),
                model_columns=model.node.get("columns", {}),
            )
            schema_path = model.node.get("patch_path",
                                         "schema")  # pragma: no mutate
            if model_only:
                status_code = 1
                print_cols = [
                    "- name: %s" % (col) for col in model_only if col
                ]
                print(
                    "{file}: columns in {schema_path} but not in db (catalog.json):\n"
                    "{columns}".format(
                        file=sqls.get(model.filename),
                        columns="\n".join(print_cols),  # pragma: no mutate
                        schema_path=schema_path,
                    ))
            if catalog_only:
                status_code = 1
                print_cols = [
                    "- name: %s" % (col) for col in catalog_only if col
                ]
                print(
                    "{file}: columns in db (catalog.json) but not in {schema_path}:\n"
                    "{columns}".format(
                        file=sqls.get(model.filename),
                        columns="\n".join(print_cols),  # pragma: no mutate
                        schema_path=schema_path,
                    ))
        else:
            status_code = 1
            print(
                f"Unable to find model `{model.model_id}` in catalog file. "
                f"Make sure you run `dbt docs generate` before executing this hook."
            )
    return status_code
def check_column_desc(paths: Sequence[str],
                      manifest: Dict[str, Any]) -> Tuple[int, Dict[str, Any]]:
    status_code = 0
    ymls = get_filenames(paths, [".yml", ".yaml"])
    sqls = get_model_sqls(paths, manifest)
    filenames = set(sqls.keys())

    # get manifest nodes that pre-commit found as changed
    models = get_models(manifest, filenames)
    # if user added schema but did not rerun the model
    schemas = get_model_schemas(list(ymls.values()), filenames)
    missing: Dict[str, Set[str]] = {}

    for item in itertools.chain(models, schemas):
        missing_cols = set()  # pragma: no mutate
        if isinstance(item, ModelSchema):
            model_name = item.model_name
            missing_cols = {
                key.get("name")
                for key in item.schema.get("columns", [])
                if not key.get("description")
            }
        # Model
        elif isinstance(item, Model):
            model_name = item.filename
            missing_cols = {
                key
                for key, value in item.node.get("columns", {}).items()
                if not value.get("description")
            }
        else:
            continue  # pragma: no cover, no mutate
        seen = missing.get(model_name)
        if seen:
            if not missing_cols:
                missing[model_name] = set()  # pragma: no mutate
            else:
                missing[model_name] = seen.union(missing_cols)
        elif missing_cols:
            missing[model_name] = missing_cols

    for model, columns in missing.items():
        if columns:
            status_code = 1
            result = "\n- ".join(list(columns))  # pragma: no mutate
            print(
                f"{sqls.get(model)}: "
                f"following columns are missing description:\n- {result}", )
    return status_code, missing
Пример #6
0
def check_child_parent_cnt(
    paths: Sequence[str],
    manifest: Dict[str, Any],
    required_cnt: Sequence[Dict[str, Any]],
) -> int:
    status_code = 0
    sqls = get_model_sqls(paths, manifest)
    filenames = set(sqls.keys())

    # get manifest nodes that pre-commit found as changed
    models = get_models(manifest, filenames)

    for model in models:
        childs = list(
            get_parent_childs(
                manifest=manifest,
                obj=model,
                manifest_node="child_map",
                node_types=["model"],
            ))
        parents = list(
            get_parent_childs(
                manifest=manifest,
                obj=model,
                manifest_node="parent_map",
                node_types=["model", "source"],
            ))
        real_cnt = {"childs": len(childs), "parents": len(parents)}
        for required in required_cnt:
            req_cnt = required.get("cnt")
            req_operator = required.get("operator", operator.lt)
            req_type = required.get("type")  # pragma: no mutate
            req_dep = required.get("dep", "")  # pragma: no mutate
            real_value = real_cnt.get(req_dep)
            if req_cnt and req_operator(real_value, req_cnt):
                status_code = 1
                print(
                    f"{model.model_name}: "
                    f"has {real_value} {req_type}, but {req_type} {req_cnt}"
                    f"is/are required.", )

    return status_code
Пример #7
0
def validate_tags(paths: Sequence[str], manifest: Dict[str, Any],
                  tags: Sequence[str]) -> int:
    status_code = 0
    sqls = get_model_sqls(paths, manifest)
    filenames = set(sqls.keys())

    # get manifest nodes that pre-commit found as changed
    models = get_models(manifest, filenames)
    for model in models:
        # tags can be specified only from manifest
        model_tags = set(model.node.get("tags", []))
        valid_tags = set(tags)
        if not model_tags.issubset(valid_tags):
            status_code = 1
            list_diff = list(model_tags.difference(valid_tags))
            result = "\n- ".join(list_diff)  # pragma: no mutate
            print(
                f"{model.node.get('original_file_path', model.filename)}: "
                f"has invalid tags:\n- {result}", )
    return status_code
def has_properties_file(paths: Sequence[str],
                        manifest: Dict[str, Any]) -> Tuple[int, Set[str]]:
    status_code = 0
    sqls = get_model_sqls(paths, manifest)
    filenames = set(sqls.keys())

    # get manifest nodes that pre-commit found as changed
    models = get_models(manifest, filenames)
    # convert to sets
    in_models = {
        model.filename
        for model in models if model.node.get("patch_path")
    }
    missing = filenames.difference(in_models)

    for model in missing:
        status_code = 1
        print(
            f"{sqls.get(model)}: "
            f"does not have model properties defined in any .yml file.", )
    return status_code, missing
Пример #9
0
def check_test_cnt(
    paths: Sequence[str],
    manifest: Dict[str, Any],
    test_group: Dict[str, int],
    test_cnt: int,
) -> int:
    status_code = 0
    sqls = get_model_sqls(paths, manifest)
    filenames = set(sqls.keys())

    # get manifest nodes that pre-commit found as changed
    models = get_models(manifest, filenames)

    for model in models:
        childs = list(
            get_parent_childs(
                manifest=manifest,
                obj=model,
                manifest_node="child_map",
                node_types=["test"],
            )
        )
        tests = [test for test in childs if isinstance(test, Test)]
        grouped = groupby(
            sorted(tests, key=lambda x: x.test_name), lambda x: x.test_name
        )
        test_dict = {key: list(value) for key, value in grouped}
        required_test_count = 0
        for test in test_group:
            if test_dict.get(test):
                required_test_count += 1
        if required_test_count < test_cnt:
            print(
                f"{model.model_name}: "
                f"has only {required_test_count} test(s) from {test_group}.",
            )
            status_code = 1
    return status_code