def unmap_dataset( project: Project, *, source_dataset: Dataset, remove_dataset_from_project: bool = False, skip_if_missing: bool = False, ) -> None: """ Wholly unmaps a dataset and optionally removes it from a project. Args: source_dataset: the source dataset (Dataset object not a string) to unmap project: the project in which to unmap the dataset remove_dataset_from_project: boolean to also remove the dataset from the project skip_if_missing: boolean to skip if dataset is not in project. If set to false and dataset is not in project will raise a RuntimeError Returns: None Raises: RuntimeError: if `source_dataset` is not in `project` and `skip_if_missing` not set to True """ # check to make sure dataset is in project and log a warning if it is not if source_dataset.name not in [x.name for x in project.input_datasets()]: if skip_if_missing: LOGGER.warning( f"Dataset to unmap {source_dataset.name} not in project {project.name}! " f"However skip_if_missing flag is set so will do nothing" ) return None else: error_message = ( f"Dataset to unmap {source_dataset.name} not in project " f"{project.name} and skip_if_missing not set to True so failing! " ) LOGGER.error(error_message) raise RuntimeError(error_message) # the resource ids of attribute mappings unfortunately change when you delete one # so need to just do this until there are no mappings left for the source dataset of interest while True: mappings = [ x for x in project.attribute_mappings().stream() if x.input_dataset_name == source_dataset.name ] # if no mappings found for this dataset then break if not mappings: break for mapping in mappings: # can only delete one then have to break out of inner loop project.attribute_mappings().delete_by_resource_id(mapping.resource_id) break # optionally remove dataset from the project if remove_dataset_from_project: project.remove_input_dataset(source_dataset)
def unmap_attribute( project: Project, *, source_attribute_name: str, source_dataset_name: str, unified_attribute_name: str, ) -> None: """ Unmaps a source attribute. Args: source_attribute_name: the name of the source attribute to unmap source_dataset_name: the name of the source dataset containing that source attribute unified_attribute_name: the unified attribute from which to unmap project: the project in which to unmap the attribute Returns: None """ LOGGER.info( f"Trying to remove mapping of source attribute {source_attribute_name} in dataset " f"{source_dataset_name} from unified attribute {unified_attribute_name}" ) # get mapping collection mapping_collection = project.attribute_mappings() # run through and get the resource id of the mapping to remove resource_id_to_remove = None for mapping in mapping_collection.stream(): # consider it match if all of source attribute, source dataset and unified attribute # are equal if ( source_attribute_name == mapping.input_attribute_name and source_dataset_name == mapping.input_dataset_name and unified_attribute_name == mapping.unified_attribute_name ): resource_id_to_remove = mapping.resource_id break # log warning if resource id wasn't found if resource_id_to_remove is None: LOGGER.warning( f"Mapping of {source_attribute_name} in dataset {source_dataset_name} to " f"unified attribute {unified_attribute_name} not found!" ) # if found remove it else: mapping_collection.delete_by_resource_id(resource_id_to_remove)
def map_attribute( project: Project, *, source_attribute_name: str, source_dataset_name: str, unified_attribute_name: str, ) -> AttributeMapping: """ Maps source_attribute in source_dataset to unified_attribute in unified_dataset. If the mapping already exists it will log a warning and return the existing AttributeMapping from the project's collection. Args: source_attribute_name: Source attribute name to map source_dataset_name: Source dataset containing the source attribute unified_attribute_name: Unified attribute to which to map the source attribute project: The project in which to perform the mapping Returns: AttributeMapping that was created Raises: ValueError: if input variables `source_attribute_name` or `source_dataset_name` or `unified_attribute_name` are set to empty strings; or if the dataset `source_dataset_name` is not found on Tamr; or if `source_attribute_name` is missing from the attributes of `source_attribute_name` """ # simple validation, nothing should be empty variables = [source_attribute_name, source_dataset_name, unified_attribute_name] empty_variables = [x for x in variables if x == ""] if empty_variables: empty_variable_string = ", ".join(empty_variables) error_message = ( f"The following variables are set to empty strings and " f"need to be filled in: {empty_variable_string} !" ) LOGGER.error(error_message) raise ValueError(error_message) # also validate that the dataset exists and has this column try: source_dataset = project.client.datasets.by_name(source_dataset_name) except KeyError: error_msg = f"Dataset {source_dataset_name} not found!" LOGGER.error(error_msg) raise ValueError(error_msg) try: assert source_attribute_name in [x.name for x in source_dataset.attributes] except AssertionError: error_msg = f"Attribute {source_attribute_name} not found in {source_dataset_name}!" LOGGER.error(error_msg) raise ValueError(error_msg) # generate mapping spec mapping_spec = _get_mapping_spec_for_ud( source_attr_name=source_attribute_name, source_ds_name=source_dataset_name, unified_attr_name=unified_attribute_name, unified_ds_name=project.unified_dataset().name, ) # add the mapping to the project's collection - this is what does the actual mapping try: return project.attribute_mappings().create(mapping_spec.to_dict()) except JSONDecodeError as e: # can get a jsondecode error if the attribute is already mapped. # If it is, then log a warning and return empty mapping # if it is not already mapped break loudly m: AttributeMapping for m in project.attribute_mappings().stream(): if ( m.input_dataset_name == source_dataset_name and m.input_attribute_name == source_attribute_name and m.unified_attribute_name == unified_attribute_name ): # mapping exists, log warning and return existing mapping LOGGER.warning( f"mapping of attribute {source_attribute_name} in dataset " f"{source_dataset_name} to unified attribute {unified_attribute_name} " f"already exists! Returning existing mapping spec" ) return m # if haven't returned then throw the JSONDecodeError raise e