Python export_artifacts示例，tamr_toolbox.project.export_artifacts Python示例

示例#1

0

显示文件

def test_export_errors():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    project = client.projects.by_resource_id(CONFIG["projects"]["minimal_categorization"])

    # test incorrect artifact name
    with pytest.raises(ValueError):
        export_artifacts(
            project=project,
            artifact_directory_path="/home/ubuntu/tamr/projectExports",
            exclude_artifacts=[
                CategorizationArtifacts.CATEGORIZATION_VERIFIED_LABELS,
                CategorizationArtifacts.CATEGORIZATION_TAXONOMIES,
                "INCORRECT_ARTIFACT_NAME",
            ],
            asynchronous=False,
        )

    # test incorrect artifact directory path
    with pytest.raises(ValueError):
        export_artifacts(
            project=project,
            artifact_directory_path="/an/incorrect/path",
            exclude_artifacts=[
                CategorizationArtifacts.CATEGORIZATION_VERIFIED_LABELS,
                CategorizationArtifacts.CATEGORIZATION_TAXONOMIES,
            ],
            asynchronous=False,
        )

示例#2

0

显示文件

def test_import_new(
    project_to_export: str,
    new_project_name: str,
    include_additive_artifacts: Optional[List[str]],
    include_destructive_artifacts: Optional[List[str]],
):
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    project = client.projects.by_resource_id(project_to_export)

    # export a project
    op = export_artifacts(
        project=project,
        artifact_directory_path="/home/ubuntu/tamr/projectExports",
        exclude_artifacts=None,
        asynchronous=False,
    )

    assert op.succeeded()

    # get artifact path
    description = op.description
    artifact_path = str(description).split(" ")[-1]

    assert artifact_path[0] == "/"
    assert os.path.splitext(artifact_path)[1] == ".zip"

    # import new project if it doesn't exist already
    if project.type == "GOLDEN_RECORDS":
        new_unified_dataset_name = None
    else:
        new_unified_dataset_name = new_project_name + "_ud"
    if new_project_name not in [p.name for p in client.projects]:
        op = import_artifacts(
            project_artifact_path=artifact_path,
            tamr_client=client,
            new_project_name=new_project_name,
            new_unified_dataset_name=new_unified_dataset_name,
            include_additive_artifacts=include_additive_artifacts,
            include_destructive_artifacts=include_destructive_artifacts,
            asynchronous=False,
        )
        assert op.succeeded()
    else:
        raise AssertionError(f"{new_project_name} already exists in test instance.")

    # run new project
    project = client.projects.by_name(new_project_name)
    # run jobs
    ops = workflow.jobs.run([project], run_apply_feedback=False)
    for op in ops:
        assert op.succeeded()

    # clean up delete project and associated datasets
    # delete is unstable and can leave orphaned unified datasets + downstream datasets
    responses = _project_clean_up(client, new_project_name, new_unified_dataset_name)
    print(responses)

示例#3

0

显示文件

def test_import_existing(
    project_to_export: str,
    existing_project_name: str,
    include_additive_artifacts: Optional[List[str]],
    include_destructive_artifacts: Optional[List[str]],
):
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    # project to export
    project = client.projects.by_resource_id(project_to_export)
    # export a project
    op = export_artifacts(
        project=project,
        artifact_directory_path="/home/ubuntu/tamr/projectExports",
        exclude_artifacts=None,
        asynchronous=False,
    )
    assert op.succeeded()

    # get artifact path
    description = op.description
    artifact_path = str(description).split(" ")[-1]

    assert artifact_path[0] == "/"
    assert os.path.splitext(artifact_path)[1] == ".zip"

    # get existing project
    existing_project = client.projects.by_name(existing_project_name)

    # test import into existing project
    op = import_artifacts(
        tamr_client=existing_project.client,
        project_artifact_path=artifact_path,
        target_project=existing_project,
        include_additive_artifacts=include_additive_artifacts,
        include_destructive_artifacts=include_destructive_artifacts,
        overwrite_existing=True,
        asynchronous=False,
    )
    assert op.succeeded()

    # run target project
    ops = workflow.jobs.run([existing_project], run_apply_feedback=False)
    for op in ops:
        assert op.succeeded()

示例#4

0

显示文件

def test_export():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])

    project = client.projects.by_resource_id(CONFIG["projects"]["minimal_categorization"])
    path_export_dir = "/home/ubuntu/tamr/projectExports"
    exclude_list = [
        CategorizationArtifacts.CATEGORIZATION_VERIFIED_LABELS,
        CategorizationArtifacts.CATEGORIZATION_FEEDBACK,
        CategorizationArtifacts.CATEGORIZATION_TAXONOMIES,
    ]

    op = export_artifacts(
        project=project,
        artifact_directory_path=path_export_dir,
        exclude_artifacts=exclude_list,
        asynchronous=False,
    )

    assert op.succeeded()

示例#5

0

显示文件

def test_import_existing_errors():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    project = client.projects.by_resource_id(CONFIG["projects"]["minimal_categorization"])

    # export a project
    op = export_artifacts(
        project=project,
        artifact_directory_path="/home/ubuntu/tamr/projectExports",
        exclude_artifacts=None,
        asynchronous=False,
    )

    assert op.succeeded()

    # get artifact path
    description = op.description
    artifact_path = str(description).split(" ")[-1]
    print(artifact_path)

    assert artifact_path[0] == "/"
    assert os.path.splitext(artifact_path)[1] == ".zip"

    # get existing project
    existing_project = client.projects.by_name("minimal_incomplete_categorization")

    # test trying to set new_project name on existing project
    with pytest.raises(KeyError):
        import_artifacts(
            tamr_client=existing_project.client,
            project_artifact_path=artifact_path,
            target_project=existing_project,
            new_project_name="new_project_name",
            overwrite_existing=True,
        )

    # test trying to set new_unified_dataset_name on existing project
    with pytest.raises(KeyError):
        import_artifacts(
            tamr_client=existing_project.client,
            project_artifact_path=artifact_path,
            target_project=existing_project,
            new_unified_dataset_name="new_ud_name",
            overwrite_existing=True,
        )

    # test overwrite existing
    with pytest.raises(KeyError):
        import_artifacts(
            tamr_client=existing_project.client,
            project_artifact_path="incorrect/artifact/path",
            target_project=existing_project,
            overwrite_existing=False,
        )

    # test incorrect artifact path
    with pytest.raises(ValueError):
        import_artifacts(
            tamr_client=existing_project.client,
            project_artifact_path="incorrect/artifact/path",
            target_project=existing_project,
            overwrite_existing=True,
        )

    # fail if not present
    with pytest.raises(ValueError):
        import_artifacts(
            project_artifact_path=artifact_path,
            tamr_client=client,
            target_project=existing_project,
            include_destructive_artifacts=[CategorizationArtifacts.CATEGORIZATION_TAXONOMIES],
            fail_if_not_present=True,
            overwrite_existing=True,
        )

    # test incorrect artifact name
    with pytest.raises(ValueError):
        import_artifacts(
            tamr_client=existing_project.client,
            project_artifact_path=artifact_path,
            target_project=existing_project,
            include_additive_artifacts=["incorrect_artifact_name"],
            overwrite_existing=True,
        )
    with pytest.raises(ValueError):
        import_artifacts(
            tamr_client=existing_project.client,
            project_artifact_path=artifact_path,
            target_project=existing_project,
            include_destructive_artifacts=["incorrect_artifact_name"],
            overwrite_existing=True,
        )
    # Expected ValueError but got RuntimeError instead
    with pytest.raises(RuntimeError):
        import_artifacts(
            tamr_client=existing_project.client,
            project_artifact_path=artifact_path,
            target_project=existing_project,
            exclude_artifacts=["incorrect_artifact_name"],
            overwrite_existing=True,
        )
    # trying to include an additive artifact that is not supported
    # This is RuntimeError, not ValueError
    with pytest.raises(RuntimeError):
        import_artifacts(
            tamr_client=existing_project.client,
            project_artifact_path=artifact_path,
            target_project=existing_project,
            include_additive_artifacts=[CategorizationArtifacts.CATEGORIZATION_MODEL],
            overwrite_existing=True,
        )

示例#6

0

显示文件

def test_import_new_errors():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    project = client.projects.by_resource_id(CONFIG["projects"]["minimal_categorization"])

    # export a project
    op = export_artifacts(
        project=project,
        artifact_directory_path="/home/ubuntu/tamr/projectExports",
        exclude_artifacts=None,
        asynchronous=False,
    )

    assert op.succeeded()

    # get artifact path
    description = op.description
    artifact_path = str(description).split(" ")[-1]

    assert artifact_path[0] == "/"
    assert os.path.splitext(artifact_path)[1] == ".zip"

    # import new project if it doesn't exist already
    new_project_name = "new_categorization"
    if new_project_name not in [p.name for p in client.projects]:
        # test incorrect path
        with pytest.raises(ValueError):
            import_artifacts(
                project_artifact_path="incorrect/artifact/path",
                tamr_client=client,
                new_project_name=new_project_name,
            )
        # fail if not present
        with pytest.raises(ValueError):
            import_artifacts(
                project_artifact_path=artifact_path,
                tamr_client=client,
                new_project_name=new_project_name,
                include_destructive_artifacts=[CategorizationArtifacts.UNIFIED_ATTRIBUTES],
                fail_if_not_present=True,
            )
        # testing incorrect artifact names
        with pytest.raises(ValueError):
            import_artifacts(
                project_artifact_path=artifact_path,
                tamr_client=client,
                new_project_name=new_project_name,
                exclude_artifacts=["incorrect_artifact_name"],
            )
        with pytest.raises(ValueError):
            import_artifacts(
                project_artifact_path=artifact_path,
                tamr_client=client,
                new_project_name=new_project_name,
                include_additive_artifacts=["incorrect_artifact_name"],
            )
        with pytest.raises(ValueError):
            import_artifacts(
                project_artifact_path=artifact_path,
                tamr_client=client,
                new_project_name=new_project_name,
                include_destructive_artifacts=["incorrect_artifact_name"],
            )
        # test trying to write an existing project name
        with pytest.raises(ValueError):
            import_artifacts(
                project_artifact_path=artifact_path,
                tamr_client=client,
                new_project_name="minimal_incomplete_categorization",
            )
        # test trying to write an existing unified dataset name
        with pytest.raises(ValueError):
            import_artifacts(
                project_artifact_path=artifact_path,
                tamr_client=client,
                new_project_name=new_project_name,
                new_unified_dataset_name="minimal_categorization_unified_dataset",
            )
        if new_project_name in [p.name for p in client.projects]:
            raise RuntimeError(f"{new_project_name} is being unintentionally created during test.")
    else:
        raise AssertionError(f"{new_project_name} already exists in test instance.")

示例#7

0

显示文件

文件： export_artifacts.py 项目： skalish/tamr-toolbox

# Read config, make Tamr Client
tamr = tbox.utils.client.create(username="******",
                                password="******",
                                host="localhost")

# Get project object
project = tamr.projects.by_resource_id("my_project_id")

# Set path to export directory (on server containing tamr instance)
path_export_dir = "/home/ubuntu/tamr/projectExports"

# Make list of categorization artifacts to exclude.
# You can spell out the artifact code if known,
# or list access via the CategorizationArtifacts dataclass
exclude_list = [
    CategorizationArtifacts.CATEGORIZATION_VERIFIED_LABELS,
    "CATEGORIZATION_TAXONOMIES",
    CategorizationArtifacts.CATEGORIZATION_FEEDBACK,
]

# Export project artifacts
op = export_artifacts(
    project=project,
    artifact_directory_path=path_export_dir,
    exclude_artifacts=exclude_list,
    asynchronous=False,
)

# Print operation
print(op)