示例#1
0
def _run_custom(
    project: Project,
    *,
    run_update_golden_records: bool = False,
    run_publish_golden_records: bool = False,
) -> List[Operation]:
    """Executes specified steps of a golden records project.

    Args:
        project: The target golden records project
        run_update_golden_records: Whether refresh should be called on the draft golden records
            dataset
        run_publish_golden_records: Whether refresh should be called on the published golden
            records dataset

    Returns:
        The operations that were run

    Raises:
        TypeError: if the `project` is not a Golden Record project
    """
    version.enforce_after_or_equal(project.client,
                                   compare_version="2020.004.0")

    if ProjectType[project.type] != ProjectType.GOLDEN_RECORDS:
        error_msg = f"Cannot use as a golden records project. Project type: {project.type}"
        LOGGER.error(error_msg)
        raise TypeError(error_msg)

    completed_operations = []
    if run_update_golden_records:
        LOGGER.info(
            f"Updating the draft golden records for project {project.name} "
            f"(id={project.resource_id}).")
        resp = project.client.post(
            f"/api/versioned/v1/projects/{project.resource_id}/goldenRecords:refresh"
        ).successful()
        op = Operation.from_response(client=project.client, response=resp)
        op = op.wait()
        operation.enforce_success(op)
        completed_operations.append(op)
    if run_publish_golden_records:
        LOGGER.info(
            f"Publishing golden records for project {project.name} (id={project.resource_id})."
        )
        resp = project.client.post(
            f"/api/versioned/v1/projects/{project.resource_id}/publishedGoldenRecords:refresh"
            f"?validate=true&version=CURRENT").successful()
        op = Operation.from_response(client=project.client, response=resp)
        op = op.wait()
        operation.enforce_success(op)
        completed_operations.append(op)
    return completed_operations
示例#2
0
def test_from_op_failure():
    client = utils.client.create(**CONFIG["toolbox_test_instance"])
    op_json = {
        "id": "-1",
        "type": "NOOP",
        "description": "test",
        "status": {
            "state": "FAILED",
            "startTime": "early",
            "endTime": "late",
            "message": ""
        },
        "created": {
            "username": "",
            "time": "early",
            "version": "-1"
        },
        "lastModified": {
            "username": "",
            "time": "late",
            "version": "-1"
        },
        "relativeId": "operations/-1",
    }
    op = Operation.from_json(client, op_json)
    assert PlanNodeStatus.from_tamr_op(
        op) == PlanNodeStatus.PlanNodeStatus.FAILED
示例#3
0
def test_operation_from_response(client):
    responses.add(responses.GET, full_url(client, "operations/1"), json=op_1_json)

    op1 = Operation.from_response(client, client.get("operations/1").successful())

    assert op1.resource_id == "1"
    assert op1.succeeded
示例#4
0
def monitor(
    operation: Operation,
    *,
    poll_interval_seconds: float = 1,
    timeout_seconds: float = 300,
) -> Operation:
    """Continuously polls for this operation's server-side state and returns operation
    when there is a state change

    Args:
        operation: Operation to be monitored.
        poll_interval_seconds: Time interval (in seconds) between subsequent polls.
        timeout_seconds: Time (in seconds) to wait for operation to resolve.

    Raises:
        TimeoutError: If operation takes longer than `timeout_seconds` to resolve.
    """
    status = OperationState[operation.state]
    started = now()
    while timeout_seconds is None or now() - started < timeout_seconds:
        operation = operation.poll()
        new_status = OperationState[operation.state]
        if operation.status is None:
            return operation
        elif new_status == status:
            sleep(poll_interval_seconds)
        else:
            return operation
    raise TimeoutError(
        f"Waiting for operation took longer than {timeout_seconds} seconds.")
示例#5
0
def wait(
    operation: Operation,
    *,
    poll_interval_seconds: int = 3,
    timeout_seconds: Optional[int] = None,
) -> Operation:
    """Continuously polls for this operation's server-side state.

    Args:
        operation: Operation to be polled.
        poll_interval_seconds: Time interval (in seconds) between subsequent polls.
        timeout_seconds: Time (in seconds) to wait for operation to resolve.

    Raises:
        TimeoutError: If operation takes longer than `timeout_seconds` to resolve.
    """
    started = now()
    while timeout_seconds is None or now() - started < timeout_seconds:
        if operation.status is None:
            return operation
        elif operation.status["state"] in [
                OperationState.PENDING, OperationState.RUNNING
        ]:
            sleep(poll_interval_seconds)
        elif operation.status["state"] in [
                OperationState.CANCELED,
                OperationState.SUCCEEDED,
                OperationState.FAILED,
        ]:
            return operation
        operation = operation.poll()
    raise TimeoutError(
        f"Waiting for operation took longer than {timeout_seconds} seconds.")
示例#6
0
def _collect_operation_calls(*,
                             response: Response,
                             poll_interval_seconds: int = 3) -> List[Response]:
    """If the provided response is an Operation, wait for the operation to complete and
    return responses related to that operation.

    Args:
        response: A previous Response generated from the same Tamr client
        poll_interval_seconds: Time interval (in seconds) between subsequent polls

    Returns:
        Responses related to polling the operation

    """

    client = utils.client._from_response(response)
    op = Operation.from_response(client, response)

    LOGGER.info(f"Waiting for operation to complete: {op}")
    request_while_pending = client.get(
        endpoint=f"/api/versioned/v1/operations/{op.resource_id}")

    while op.state == "PENDING":
        op = op.poll()
        sleep(poll_interval_seconds)
    request_while_running = client.get(
        endpoint=f"/api/versioned/v1/operations/{op.resource_id}")

    op.wait()
    request_when_complete = client.get(
        endpoint=f"/api/versioned/v1/operations/{op.resource_id}")

    return [
        request_while_pending, request_while_running, request_when_complete
    ]
示例#7
0
def enforce_success(operation: Operation) -> None:
    """Raises an error if an operation fails

    Args:
        operation: A Tamr operation
    """
    if not operation.succeeded():
        raise RuntimeError(
            f"Operation {operation.resource_id} failed. Description: {operation.description}."
            f"Status: {operation.status}")
示例#8
0
def from_resource_id(tamr: Client, *, job_id: Union[int, str]) -> Operation:
    """Create an operation from a job id

    Args:
        tamr: A Tamr client
        job_id: A job ID

    Returns:
        A Tamr operation
    """
    return Operation.from_resource_id(tamr, str(job_id))
示例#9
0
def test_operation_from_json(client):
    alias = "operations/123"
    op1 = Operation.from_json(client, op_1_json, alias)
    assert op1.api_path == alias
    assert op1.relative_id == op_1_json["relativeId"]
    assert op1.resource_id == "1"
    assert op1.type == op_1_json["type"]
    assert op1.description == op_1_json["description"]
    assert op1.status == op_1_json["status"]
    assert op1.state == "SUCCEEDED"
    assert op1.succeeded
示例#10
0
    def refresh(self, **options):
        """Brings dataset up-to-date if needed, taking whatever actions are required.

        :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` .
            See :func:`~tamr_unify_client.operation.Operation.apply_options` .
        :returns: The refresh operation.
        :rtype: :class:`~tamr_unify_client.operation.Operation`
        """
        response = self.client.post(self.api_path + ":refresh").successful()
        op = Operation.from_response(self.client, response)
        return op.apply_options(**options)
示例#11
0
    def train(self, **options):
        """Learn from verified labels.

        :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` .
            See :func:`~tamr_unify_client.operation.Operation.apply_options` .
        :returns: The resultant operation.
        :rtype: :class:`~tamr_unify_client.operation.Operation`
        """
        op_json = self.client.post(self.api_path +
                                   ":refresh").successful().json()
        op = Operation.from_json(self.client, op_json)
        return op.apply_options(**options)
示例#12
0
def from_resource_id(tamr: Client, *, job_id: Union[int, str]) -> Operation:
    """Create an operation from a job id

    Args:
        tamr: A Tamr client
        job_id: A job ID

    Returns:
        A Tamr operation
    """
    job_response = tamr.get(f"/api/versioned/v1/operations/{job_id}")
    return Operation.from_response(tamr, job_response)
示例#13
0
    def predict(self, **options):
        """Suggest labels for unverified records.

        :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` .
            See :func:`~tamr_unify_client.operation.Operation.apply_options` .
        :returns: The resultant operation.
        :rtype: :class:`~tamr_unify_client.operation.Operation`
        """
        dependent_dataset = "/".join(self.api_path.split("/")[:-1])
        op_json = self.client.post(dependent_dataset +
                                   ":refresh").successful().json()
        op = Operation.from_json(self.client, op_json)
        return op.apply_options(**options)
示例#14
0
    def create_profile(self, **options):
        """Create a profile for this dataset.

        If a profile already exists, the existing profile will be brought
        up to date.

        :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` .
            See :func:`~tamr_unify_client.operation.Operation.apply_options` .
        :return: The operation to create the profile.
        :rtype: :class:`~tamr_unify_client.operation.Operation`
        """
        response = self.client.post(self.api_path + "/profile:refresh").successful()
        op = Operation.from_response(self.client, response)
        return op.apply_options(**options)
示例#15
0
    def refresh(self, **options):
        """Updates the dataset profile if needed.

        The dataset profile is updated on the server; you will need to call
        :func:`~tamr_unify_client.dataset.resource.Dataset.profile`
        to retrieve the updated profile.

        :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` .
            See :func:`~tamr_unify_client.operation.Operation.apply_options` .
        :returns: The refresh operation.
        :rtype: :class:`~tamr_unify_client.operation.Operation`
        """
        op_json = self.client.post(self.api_path + ":refresh").successful().json()
        op = Operation.from_json(self.client, op_json)
        return op.apply_options(**options)
    def refresh(self, **options):
        """Updates the estimated pair counts if needed.

        The pair count estimates are updated on the server; you will need to call
        :func:`~tamr_unify_client.mastering.project.MasteringProject.estimate_pairs`
        to retrieve the updated estimate.

        :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` .
            See :func:`~tamr_unify_client.operation.Operation.apply_options` .
        :returns: The refresh operation.
        :rtype: :class:`~tamr_unify_client.operation.Operation`
        """
        response = self.client.post(self.api_path + ":refresh").successful()
        op = Operation.from_response(self.client, response)
        return op.apply_options(**options)
示例#17
0
def get_all(tamr: Client) -> List[Operation]:
    """
    Get a list of all jobs or operations.

    Args:
        tamr: A Tamr client

    Returns:
        A list of Operation objects.

    """
    response = tamr.get(
        "/api/versioned/v1/operations", headers={"Accept": "application/json"}, stream=True
    ).json()
    ops = [Operation.from_json(tamr, item) for item in response]

    return ops
示例#18
0
    def test_refresh(self):
        responses.add(
            responses.POST,
            f"{self._url_base}/{self._api_path}:refresh",
            json=self._refresh_json,
        )
        updated = self._refresh_json.copy()
        updated["status"]["state"] = "SUCCEEDED"
        responses.add(responses.GET,
                      f"{self._url_base}/operations/24",
                      json=updated)

        estimate = EstimatedPairCounts.from_json(self.tamr,
                                                 self._estimate_json,
                                                 self._api_path)
        generated = estimate.refresh(poll_interval_seconds=0)

        created = Operation.from_json(self.tamr, updated)
        self.assertEqual(repr(generated), repr(created))
示例#19
0
def test_operation_from_response_noop(client):
    responses.add(responses.GET, full_url(client, "operations/2"), status=204)
    responses.add(responses.GET, full_url(client, "operations/-1"), status=404)

    op2 = Operation.from_response(client, client.get("operations/2").successful())

    assert op2.api_path is not None
    assert op2.relative_id is not None
    assert op2.resource_id is not None
    assert op2.type == "NOOP"
    assert op2.description is not None
    assert op2.status is not None
    assert op2.state == "SUCCEEDED"
    assert op2.succeeded

    op2a = op2.apply_options(asynchronous=True)
    assert op2a.succeeded

    op2w = op2a.wait()
    assert op2w.succeeded

    with pytest.raises(HTTPError):
        op2w.poll()
示例#20
0
def test__collect_operation_calls():
    # setup mock client
    mock_client = Client(None)

    # setup mock operations
    base_operation_json = {
        "id": "2",
        "type": "SPARK",
        "description": "Profiling [employees_tiny.csv] attributes.",
        "status": {
            "state": "SUCCEEDED",
            "startTime": "2020-07-16T17:57:54.458Z",
            "endTime": "2020-07-16T17:58:22.836Z",
            "message": "",
        },
        "created": {
            "username": "******",
            "time": "2020-07-16T17:57:28.920Z",
            "version": "82"
        },
        "lastModified": {
            "username": "******",
            "time": "2020-07-16T17:58:23.977Z",
            "version": "119",
        },
        "relativeId": "operations/2",
    }

    operation_states = [
        OperationState.SUCCEEDED,
        OperationState.PENDING,
        OperationState.CANCELED,
        OperationState.RUNNING,
        OperationState.FAILED,
    ]
    mocks = {}

    for state in operation_states:
        op_json = base_operation_json.copy()
        op_json["status"]["state"] = state.value
        mock_operation = Operation.from_json(mock_client, op_json)

        mock_response = Response()
        mock_response._content = json.dumps(op_json).encode("utf-8")
        mock_response.status_code = 200

        mocks[state] = {"op": mock_operation, "response": mock_response}

    # test succeeded with many pending
    mock_client.get = MagicMock(side_effect=[
        # response while pending
        mocks[OperationState.PENDING]["response"],
        # polling
        mocks[OperationState.PENDING]["response"],
        mocks[OperationState.PENDING]["response"],
        mocks[OperationState.PENDING]["response"],
        mocks[OperationState.PENDING]["response"],
        mocks[OperationState.PENDING]["response"],
        mocks[OperationState.RUNNING]["response"],
        # response while running
        mocks[OperationState.RUNNING]["response"],
        # response while waiting
        mocks[OperationState.SUCCEEDED]["response"],
        # response when complete
        mocks[OperationState.SUCCEEDED]["response"],
    ])

    with patch("tamr_toolbox.utils.client._from_response",
               return_value=mock_client):
        result_success = utils.testing._collect_operation_calls(
            response=mocks[OperationState.PENDING]["response"],
            poll_interval_seconds=0)

    assert len(result_success) == 3
    for resp in result_success:
        assert resp.json()["id"] == "2"
    assert result_success[0].json(
    )["status"]["state"] == OperationState.PENDING.value
    assert result_success[1].json(
    )["status"]["state"] == OperationState.RUNNING.value
    assert result_success[2].json(
    )["status"]["state"] == OperationState.SUCCEEDED.value

    # test failed quickly
    mock_client.get = MagicMock(side_effect=[
        # response while pending
        mocks[OperationState.PENDING]["response"],
        # polling
        mocks[OperationState.FAILED]["response"],
        # response while running
        mocks[OperationState.FAILED]["response"],
        # response while waiting
        mocks[OperationState.FAILED]["response"],
        # response when complete
        mocks[OperationState.FAILED]["response"],
    ])

    with patch("tamr_toolbox.utils.client._from_response",
               return_value=mock_client):
        result_failed = utils.testing._collect_operation_calls(
            response=mocks[OperationState.PENDING]["response"],
            poll_interval_seconds=0)

    assert len(result_failed) == 3
    for resp in result_failed:
        assert resp.json()["id"] == "2"
    assert result_failed[0].json(
    )["status"]["state"] == OperationState.PENDING.value
    assert result_failed[1].json(
    )["status"]["state"] == OperationState.FAILED.value
    assert result_failed[2].json(
    )["status"]["state"] == OperationState.FAILED.value