示例#1
0
def test_enrich_columns_with_tagging(tagger: tg.Tagger):
    columns = [
        ds.DataSourceColumn(name="col_foo", dataType="", remoteType="", nullable=False),
        ds.DataSourceColumn(name="col_bar", dataType="", remoteType="", nullable=False),
        ds.DataSourceColumn(name="bad_col", dataType="", remoteType="", nullable=False),
    ]
    enriched_cols = tagger.enrich_columns_with_tagging(columns)
    assert len(enriched_cols) == len(columns)
    for col in enriched_cols:
        assert len(col.tags) == len(TAG_MAP.get(col.name, []))
def main(config_file: str, search_text: str, dry_run: bool, debug: bool):
    logging.basicConfig(
        format="[%(name)s][%(levelname)s][%(asctime)s] %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
        level=(logging.DEBUG if debug else logging.INFO),
    )
    config = parse_config(config_file=config_file)
    client = get_client(**config)
    tagger = Tagger(config_root=config["config_root"])

    logging.info("Making tags")
    tagger.make_tags(client)

    logging.info("Gathering data sources to tag")
    data_sources_to_tag = []
    with Paginator(client.get_data_source_list,
                   search_text=search_text) as paginator:
        for data_source in paginator:
            data_sources_to_tag.append({
                "id": data_source["id"],
                "name": data_source["name"]
            })

    progress_iterator = tqdm(data_sources_to_tag)
    for data_source in progress_iterator:
        progress_iterator.set_description(
            desc=
            f"Tagging ID: {data_source['id']}, Name: {data_source['name']} :")
        data_source_tags = tagger.get_tags_for_data_source(
            name=data_source["name"])
        if data_source_tags:
            logging.debug(f"Adding data source tags to {data_source['name']}.")
            if not dry_run:
                client.tag_data_source(id=data_source["id"],
                                       tag_data=data_source_tags)
        dictionary = client.get_data_source_dictionary(id=data_source["id"])
        enriched_columns = tagger.enrich_columns_with_tagging(
            dictionary.metadata)
        if enriched_columns == dictionary.metadata:
            logging.debug(
                f"No change to column tags for data source: {data_source['name']}. Skipping."
            )
            continue
        logging.debug(f"Enriched columns for {data_source['name']}:"
                      f" {dictionary.dict()['metadata']}")
        logging.info(
            f"Change detected to column tags. Updating data source {data_source['name']}'s data dictionary."
        )
        dictionary.metadata = enriched_columns
        if not dry_run:
            client.update_data_source_dictionary(id=data_source["id"],
                                                 dictionary=dictionary)
    logging.info("FIN.")