def main(config_file: str, search_text: str, dry_run: bool, delete: bool, debug: bool): logging.basicConfig( format="[%(name)s][%(levelname)s][%(asctime)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=(logging.DEBUG if debug else logging.INFO), ) config = parse_config(config_file=config_file) client = get_client(**config) if delete: if search_text is None: logging.error( "--delete must be invoked with an explicit value for --search-text" ) return False return delete_existing_policies(client=client, dry_run=dry_run, search_text=search_text, debug=debug) else: return create_or_update_policies( client=client, config_root=config["config_root"], dry_run=dry_run, debug=debug, )
def main(config_file: str, search_text: str, dry_run: bool, debug: bool): logging.basicConfig( format="[%(name)s][%(levelname)s][%(asctime)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=(logging.DEBUG if debug else logging.INFO), ) config = parse_config(config_file=config_file) client = get_client(**config) tagger = Tagger(config_root=config["config_root"]) logging.info("Making tags") tagger.make_tags(client) logging.info("Gathering data sources to tag") data_sources_to_tag = [] with Paginator(client.get_data_source_list, search_text=search_text) as paginator: for data_source in paginator: data_sources_to_tag.append({ "id": data_source["id"], "name": data_source["name"] }) progress_iterator = tqdm(data_sources_to_tag) for data_source in progress_iterator: progress_iterator.set_description( desc= f"Tagging ID: {data_source['id']}, Name: {data_source['name']} :") data_source_tags = tagger.get_tags_for_data_source( name=data_source["name"]) if data_source_tags: logging.debug(f"Adding data source tags to {data_source['name']}.") if not dry_run: client.tag_data_source(id=data_source["id"], tag_data=data_source_tags) dictionary = client.get_data_source_dictionary(id=data_source["id"]) enriched_columns = tagger.enrich_columns_with_tagging( dictionary.metadata) if enriched_columns == dictionary.metadata: logging.debug( f"No change to column tags for data source: {data_source['name']}. Skipping." ) continue logging.debug(f"Enriched columns for {data_source['name']}:" f" {dictionary.dict()['metadata']}") logging.info( f"Change detected to column tags. Updating data source {data_source['name']}'s data dictionary." ) dictionary.metadata = enriched_columns if not dry_run: client.update_data_source_dictionary(id=data_source["id"], dictionary=dictionary) logging.info("FIN.")
def main( config_file: str, search_text: str, hard_delete: bool, dry_run: bool, debug: bool ): logging.basicConfig( format="[%(name)s][%(levelname)s][%(asctime)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=(logging.DEBUG if debug else logging.INFO), ) config = parse_config(config_file=config_file) client = get_client(**config) logging.info("Gathering data-stores to delete") data_sources_to_delete = [] with Paginator(client.get_data_source_list, search_text=search_text) as paginator: for data_source in paginator: data_sources_to_delete.append( {"id": data_source["id"], "name": data_source["name"]} ) if dry_run: logging.info("bulk-delete dry run") for data_source in data_sources_to_delete: logging.info( f"Data source Id: {data_source['id']}. Name: {data_source['name']}" ) elif hard_delete: logging.info( f"Hard deleting {len(data_sources_to_delete)} data sources. " f"The data sources will not be able to be restored in the future" ) for data_source in tqdm(data_sources_to_delete, desc="Deleting"): logging.debug(f"Hard deleting {data_source['name']}") client.delete_data_source(data_source["id"]) else: logging.info( f"Disabling {len(data_sources_to_delete)} data sources. " f"The data sources can be restored in the future" ) for data_source in tqdm(data_sources_to_delete, desc="Disabling"): logging.debug(f"Disabling {data_source['name']}") client.disable_data_source(data_source["id"])
def main(config_file: str, glob_prefix: str, debug: bool, dry_run: bool) -> bool: logging.basicConfig( format="[%(name)s][%(levelname)s][%(asctime)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=(logging.DEBUG if debug else logging.INFO), ) no_enrollment_errors = True config = parse_config(config_file=config_file) client = get_client(**config) dataset_spec_filepath = os.path.join( config["config_root"], "enrolled_datasets", glob_prefix ) LOGGER.debug(f"Globbing for files in {dataset_spec_filepath}") for filepath in glob.glob(dataset_spec_filepath): LOGGER.info("Processing file: %s", filepath) with open(filepath) as handle: dataset_spec = yaml.safe_load(handle) credentials = retrieve_credentials(dataset_spec["credentials"]) dataset_spec["username"] = credentials["username"] dataset_spec["password"] = credentials["password"] schema_table_mapping = get_tables_in_database(client, dataset_spec) failed_tables = set() data_sources_to_enroll = [ (dataset_spec["schemas_to_enroll"], data_sources_enroll_iterator), (dataset_spec["schemas_to_bulk_enroll"], data_sources_bulk_enroll_iterator), ] for schemas, enroll_iter in data_sources_to_enroll: if not schemas: continue for schema_object in schemas: for (data_source, handler) in enroll_iter( # type: ignore client=client, schema_table_mapping=schema_table_mapping, schema_obj=schema_object, config=dataset_spec, ): LOGGER.debug("Data source: %s", data_source.json()) if isinstance(handler, list): LOGGER.debug("Handler[0]: %s", handler[0].json()) elif isinstance(handler, Handler): LOGGER.debug("Handler: %s", handler.json()) else: raise TypeError( f"Unexpected type for handler; Got: {type(handler)}" ) if not dry_run: if not create_data_source( client=client, data_source=data_source, handler=handler ): failed_tables.add(data_source.name) if failed_tables: no_enrollment_errors = False LOGGER.warning("Tables that failed creation:") for table in failed_tables: LOGGER.warning(table) LOGGER.info("Finished enrollment") return no_enrollment_errors