def test_export_errors(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) project = client.projects.by_resource_id(CONFIG["projects"]["minimal_categorization"]) # test incorrect artifact name with pytest.raises(ValueError): export_artifacts( project=project, artifact_directory_path="/home/ubuntu/tamr/projectExports", exclude_artifacts=[ CategorizationArtifacts.CATEGORIZATION_VERIFIED_LABELS, CategorizationArtifacts.CATEGORIZATION_TAXONOMIES, "INCORRECT_ARTIFACT_NAME", ], asynchronous=False, ) # test incorrect artifact directory path with pytest.raises(ValueError): export_artifacts( project=project, artifact_directory_path="/an/incorrect/path", exclude_artifacts=[ CategorizationArtifacts.CATEGORIZATION_VERIFIED_LABELS, CategorizationArtifacts.CATEGORIZATION_TAXONOMIES, ], asynchronous=False, )
def test_import_new( project_to_export: str, new_project_name: str, include_additive_artifacts: Optional[List[str]], include_destructive_artifacts: Optional[List[str]], ): client = utils.client.create(**CONFIG["toolbox_test_instance"]) project = client.projects.by_resource_id(project_to_export) # export a project op = export_artifacts( project=project, artifact_directory_path="/home/ubuntu/tamr/projectExports", exclude_artifacts=None, asynchronous=False, ) assert op.succeeded() # get artifact path description = op.description artifact_path = str(description).split(" ")[-1] assert artifact_path[0] == "/" assert os.path.splitext(artifact_path)[1] == ".zip" # import new project if it doesn't exist already if project.type == "GOLDEN_RECORDS": new_unified_dataset_name = None else: new_unified_dataset_name = new_project_name + "_ud" if new_project_name not in [p.name for p in client.projects]: op = import_artifacts( project_artifact_path=artifact_path, tamr_client=client, new_project_name=new_project_name, new_unified_dataset_name=new_unified_dataset_name, include_additive_artifacts=include_additive_artifacts, include_destructive_artifacts=include_destructive_artifacts, asynchronous=False, ) assert op.succeeded() else: raise AssertionError(f"{new_project_name} already exists in test instance.") # run new project project = client.projects.by_name(new_project_name) # run jobs ops = workflow.jobs.run([project], run_apply_feedback=False) for op in ops: assert op.succeeded() # clean up delete project and associated datasets # delete is unstable and can leave orphaned unified datasets + downstream datasets responses = _project_clean_up(client, new_project_name, new_unified_dataset_name) print(responses)
def test_import_existing( project_to_export: str, existing_project_name: str, include_additive_artifacts: Optional[List[str]], include_destructive_artifacts: Optional[List[str]], ): client = utils.client.create(**CONFIG["toolbox_test_instance"]) # project to export project = client.projects.by_resource_id(project_to_export) # export a project op = export_artifacts( project=project, artifact_directory_path="/home/ubuntu/tamr/projectExports", exclude_artifacts=None, asynchronous=False, ) assert op.succeeded() # get artifact path description = op.description artifact_path = str(description).split(" ")[-1] assert artifact_path[0] == "/" assert os.path.splitext(artifact_path)[1] == ".zip" # get existing project existing_project = client.projects.by_name(existing_project_name) # test import into existing project op = import_artifacts( tamr_client=existing_project.client, project_artifact_path=artifact_path, target_project=existing_project, include_additive_artifacts=include_additive_artifacts, include_destructive_artifacts=include_destructive_artifacts, overwrite_existing=True, asynchronous=False, ) assert op.succeeded() # run target project ops = workflow.jobs.run([existing_project], run_apply_feedback=False) for op in ops: assert op.succeeded()
def test_export(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) project = client.projects.by_resource_id(CONFIG["projects"]["minimal_categorization"]) path_export_dir = "/home/ubuntu/tamr/projectExports" exclude_list = [ CategorizationArtifacts.CATEGORIZATION_VERIFIED_LABELS, CategorizationArtifacts.CATEGORIZATION_FEEDBACK, CategorizationArtifacts.CATEGORIZATION_TAXONOMIES, ] op = export_artifacts( project=project, artifact_directory_path=path_export_dir, exclude_artifacts=exclude_list, asynchronous=False, ) assert op.succeeded()
def test_import_existing_errors(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) project = client.projects.by_resource_id(CONFIG["projects"]["minimal_categorization"]) # export a project op = export_artifacts( project=project, artifact_directory_path="/home/ubuntu/tamr/projectExports", exclude_artifacts=None, asynchronous=False, ) assert op.succeeded() # get artifact path description = op.description artifact_path = str(description).split(" ")[-1] print(artifact_path) assert artifact_path[0] == "/" assert os.path.splitext(artifact_path)[1] == ".zip" # get existing project existing_project = client.projects.by_name("minimal_incomplete_categorization") # test trying to set new_project name on existing project with pytest.raises(KeyError): import_artifacts( tamr_client=existing_project.client, project_artifact_path=artifact_path, target_project=existing_project, new_project_name="new_project_name", overwrite_existing=True, ) # test trying to set new_unified_dataset_name on existing project with pytest.raises(KeyError): import_artifacts( tamr_client=existing_project.client, project_artifact_path=artifact_path, target_project=existing_project, new_unified_dataset_name="new_ud_name", overwrite_existing=True, ) # test overwrite existing with pytest.raises(KeyError): import_artifacts( tamr_client=existing_project.client, project_artifact_path="incorrect/artifact/path", target_project=existing_project, overwrite_existing=False, ) # test incorrect artifact path with pytest.raises(ValueError): import_artifacts( tamr_client=existing_project.client, project_artifact_path="incorrect/artifact/path", target_project=existing_project, overwrite_existing=True, ) # fail if not present with pytest.raises(ValueError): import_artifacts( project_artifact_path=artifact_path, tamr_client=client, target_project=existing_project, include_destructive_artifacts=[CategorizationArtifacts.CATEGORIZATION_TAXONOMIES], fail_if_not_present=True, overwrite_existing=True, ) # test incorrect artifact name with pytest.raises(ValueError): import_artifacts( tamr_client=existing_project.client, project_artifact_path=artifact_path, target_project=existing_project, include_additive_artifacts=["incorrect_artifact_name"], overwrite_existing=True, ) with pytest.raises(ValueError): import_artifacts( tamr_client=existing_project.client, project_artifact_path=artifact_path, target_project=existing_project, include_destructive_artifacts=["incorrect_artifact_name"], overwrite_existing=True, ) # Expected ValueError but got RuntimeError instead with pytest.raises(RuntimeError): import_artifacts( tamr_client=existing_project.client, project_artifact_path=artifact_path, target_project=existing_project, exclude_artifacts=["incorrect_artifact_name"], overwrite_existing=True, ) # trying to include an additive artifact that is not supported # This is RuntimeError, not ValueError with pytest.raises(RuntimeError): import_artifacts( tamr_client=existing_project.client, project_artifact_path=artifact_path, target_project=existing_project, include_additive_artifacts=[CategorizationArtifacts.CATEGORIZATION_MODEL], overwrite_existing=True, )
def test_import_new_errors(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) project = client.projects.by_resource_id(CONFIG["projects"]["minimal_categorization"]) # export a project op = export_artifacts( project=project, artifact_directory_path="/home/ubuntu/tamr/projectExports", exclude_artifacts=None, asynchronous=False, ) assert op.succeeded() # get artifact path description = op.description artifact_path = str(description).split(" ")[-1] assert artifact_path[0] == "/" assert os.path.splitext(artifact_path)[1] == ".zip" # import new project if it doesn't exist already new_project_name = "new_categorization" if new_project_name not in [p.name for p in client.projects]: # test incorrect path with pytest.raises(ValueError): import_artifacts( project_artifact_path="incorrect/artifact/path", tamr_client=client, new_project_name=new_project_name, ) # fail if not present with pytest.raises(ValueError): import_artifacts( project_artifact_path=artifact_path, tamr_client=client, new_project_name=new_project_name, include_destructive_artifacts=[CategorizationArtifacts.UNIFIED_ATTRIBUTES], fail_if_not_present=True, ) # testing incorrect artifact names with pytest.raises(ValueError): import_artifacts( project_artifact_path=artifact_path, tamr_client=client, new_project_name=new_project_name, exclude_artifacts=["incorrect_artifact_name"], ) with pytest.raises(ValueError): import_artifacts( project_artifact_path=artifact_path, tamr_client=client, new_project_name=new_project_name, include_additive_artifacts=["incorrect_artifact_name"], ) with pytest.raises(ValueError): import_artifacts( project_artifact_path=artifact_path, tamr_client=client, new_project_name=new_project_name, include_destructive_artifacts=["incorrect_artifact_name"], ) # test trying to write an existing project name with pytest.raises(ValueError): import_artifacts( project_artifact_path=artifact_path, tamr_client=client, new_project_name="minimal_incomplete_categorization", ) # test trying to write an existing unified dataset name with pytest.raises(ValueError): import_artifacts( project_artifact_path=artifact_path, tamr_client=client, new_project_name=new_project_name, new_unified_dataset_name="minimal_categorization_unified_dataset", ) if new_project_name in [p.name for p in client.projects]: raise RuntimeError(f"{new_project_name} is being unintentionally created during test.") else: raise AssertionError(f"{new_project_name} already exists in test instance.")
# Read config, make Tamr Client tamr = tbox.utils.client.create(username="******", password="******", host="localhost") # Get project object project = tamr.projects.by_resource_id("my_project_id") # Set path to export directory (on server containing tamr instance) path_export_dir = "/home/ubuntu/tamr/projectExports" # Make list of categorization artifacts to exclude. # You can spell out the artifact code if known, # or list access via the CategorizationArtifacts dataclass exclude_list = [ CategorizationArtifacts.CATEGORIZATION_VERIFIED_LABELS, "CATEGORIZATION_TAXONOMIES", CategorizationArtifacts.CATEGORIZATION_FEEDBACK, ] # Export project artifacts op = export_artifacts( project=project, artifact_directory_path=path_export_dir, exclude_artifacts=exclude_list, asynchronous=False, ) # Print operation print(op)