def _run_custom( project: Project, *, run_update_golden_records: bool = False, run_publish_golden_records: bool = False, ) -> List[Operation]: """Executes specified steps of a golden records project. Args: project: The target golden records project run_update_golden_records: Whether refresh should be called on the draft golden records dataset run_publish_golden_records: Whether refresh should be called on the published golden records dataset Returns: The operations that were run Raises: TypeError: if the `project` is not a Golden Record project """ version.enforce_after_or_equal(project.client, compare_version="2020.004.0") if ProjectType[project.type] != ProjectType.GOLDEN_RECORDS: error_msg = f"Cannot use as a golden records project. Project type: {project.type}" LOGGER.error(error_msg) raise TypeError(error_msg) completed_operations = [] if run_update_golden_records: LOGGER.info( f"Updating the draft golden records for project {project.name} " f"(id={project.resource_id}).") resp = project.client.post( f"/api/versioned/v1/projects/{project.resource_id}/goldenRecords:refresh" ).successful() op = Operation.from_response(client=project.client, response=resp) op = op.wait() operation.enforce_success(op) completed_operations.append(op) if run_publish_golden_records: LOGGER.info( f"Publishing golden records for project {project.name} (id={project.resource_id})." ) resp = project.client.post( f"/api/versioned/v1/projects/{project.resource_id}/publishedGoldenRecords:refresh" f"?validate=true&version=CURRENT").successful() op = Operation.from_response(client=project.client, response=resp) op = op.wait() operation.enforce_success(op) completed_operations.append(op) return completed_operations
def test_from_op_failure(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) op_json = { "id": "-1", "type": "NOOP", "description": "test", "status": { "state": "FAILED", "startTime": "early", "endTime": "late", "message": "" }, "created": { "username": "", "time": "early", "version": "-1" }, "lastModified": { "username": "", "time": "late", "version": "-1" }, "relativeId": "operations/-1", } op = Operation.from_json(client, op_json) assert PlanNodeStatus.from_tamr_op( op) == PlanNodeStatus.PlanNodeStatus.FAILED
def test_operation_from_response(client): responses.add(responses.GET, full_url(client, "operations/1"), json=op_1_json) op1 = Operation.from_response(client, client.get("operations/1").successful()) assert op1.resource_id == "1" assert op1.succeeded
def monitor( operation: Operation, *, poll_interval_seconds: float = 1, timeout_seconds: float = 300, ) -> Operation: """Continuously polls for this operation's server-side state and returns operation when there is a state change Args: operation: Operation to be monitored. poll_interval_seconds: Time interval (in seconds) between subsequent polls. timeout_seconds: Time (in seconds) to wait for operation to resolve. Raises: TimeoutError: If operation takes longer than `timeout_seconds` to resolve. """ status = OperationState[operation.state] started = now() while timeout_seconds is None or now() - started < timeout_seconds: operation = operation.poll() new_status = OperationState[operation.state] if operation.status is None: return operation elif new_status == status: sleep(poll_interval_seconds) else: return operation raise TimeoutError( f"Waiting for operation took longer than {timeout_seconds} seconds.")
def wait( operation: Operation, *, poll_interval_seconds: int = 3, timeout_seconds: Optional[int] = None, ) -> Operation: """Continuously polls for this operation's server-side state. Args: operation: Operation to be polled. poll_interval_seconds: Time interval (in seconds) between subsequent polls. timeout_seconds: Time (in seconds) to wait for operation to resolve. Raises: TimeoutError: If operation takes longer than `timeout_seconds` to resolve. """ started = now() while timeout_seconds is None or now() - started < timeout_seconds: if operation.status is None: return operation elif operation.status["state"] in [ OperationState.PENDING, OperationState.RUNNING ]: sleep(poll_interval_seconds) elif operation.status["state"] in [ OperationState.CANCELED, OperationState.SUCCEEDED, OperationState.FAILED, ]: return operation operation = operation.poll() raise TimeoutError( f"Waiting for operation took longer than {timeout_seconds} seconds.")
def _collect_operation_calls(*, response: Response, poll_interval_seconds: int = 3) -> List[Response]: """If the provided response is an Operation, wait for the operation to complete and return responses related to that operation. Args: response: A previous Response generated from the same Tamr client poll_interval_seconds: Time interval (in seconds) between subsequent polls Returns: Responses related to polling the operation """ client = utils.client._from_response(response) op = Operation.from_response(client, response) LOGGER.info(f"Waiting for operation to complete: {op}") request_while_pending = client.get( endpoint=f"/api/versioned/v1/operations/{op.resource_id}") while op.state == "PENDING": op = op.poll() sleep(poll_interval_seconds) request_while_running = client.get( endpoint=f"/api/versioned/v1/operations/{op.resource_id}") op.wait() request_when_complete = client.get( endpoint=f"/api/versioned/v1/operations/{op.resource_id}") return [ request_while_pending, request_while_running, request_when_complete ]
def enforce_success(operation: Operation) -> None: """Raises an error if an operation fails Args: operation: A Tamr operation """ if not operation.succeeded(): raise RuntimeError( f"Operation {operation.resource_id} failed. Description: {operation.description}." f"Status: {operation.status}")
def from_resource_id(tamr: Client, *, job_id: Union[int, str]) -> Operation: """Create an operation from a job id Args: tamr: A Tamr client job_id: A job ID Returns: A Tamr operation """ return Operation.from_resource_id(tamr, str(job_id))
def test_operation_from_json(client): alias = "operations/123" op1 = Operation.from_json(client, op_1_json, alias) assert op1.api_path == alias assert op1.relative_id == op_1_json["relativeId"] assert op1.resource_id == "1" assert op1.type == op_1_json["type"] assert op1.description == op_1_json["description"] assert op1.status == op_1_json["status"] assert op1.state == "SUCCEEDED" assert op1.succeeded
def refresh(self, **options): """Brings dataset up-to-date if needed, taking whatever actions are required. :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` . See :func:`~tamr_unify_client.operation.Operation.apply_options` . :returns: The refresh operation. :rtype: :class:`~tamr_unify_client.operation.Operation` """ response = self.client.post(self.api_path + ":refresh").successful() op = Operation.from_response(self.client, response) return op.apply_options(**options)
def train(self, **options): """Learn from verified labels. :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` . See :func:`~tamr_unify_client.operation.Operation.apply_options` . :returns: The resultant operation. :rtype: :class:`~tamr_unify_client.operation.Operation` """ op_json = self.client.post(self.api_path + ":refresh").successful().json() op = Operation.from_json(self.client, op_json) return op.apply_options(**options)
def from_resource_id(tamr: Client, *, job_id: Union[int, str]) -> Operation: """Create an operation from a job id Args: tamr: A Tamr client job_id: A job ID Returns: A Tamr operation """ job_response = tamr.get(f"/api/versioned/v1/operations/{job_id}") return Operation.from_response(tamr, job_response)
def predict(self, **options): """Suggest labels for unverified records. :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` . See :func:`~tamr_unify_client.operation.Operation.apply_options` . :returns: The resultant operation. :rtype: :class:`~tamr_unify_client.operation.Operation` """ dependent_dataset = "/".join(self.api_path.split("/")[:-1]) op_json = self.client.post(dependent_dataset + ":refresh").successful().json() op = Operation.from_json(self.client, op_json) return op.apply_options(**options)
def create_profile(self, **options): """Create a profile for this dataset. If a profile already exists, the existing profile will be brought up to date. :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` . See :func:`~tamr_unify_client.operation.Operation.apply_options` . :return: The operation to create the profile. :rtype: :class:`~tamr_unify_client.operation.Operation` """ response = self.client.post(self.api_path + "/profile:refresh").successful() op = Operation.from_response(self.client, response) return op.apply_options(**options)
def refresh(self, **options): """Updates the dataset profile if needed. The dataset profile is updated on the server; you will need to call :func:`~tamr_unify_client.dataset.resource.Dataset.profile` to retrieve the updated profile. :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` . See :func:`~tamr_unify_client.operation.Operation.apply_options` . :returns: The refresh operation. :rtype: :class:`~tamr_unify_client.operation.Operation` """ op_json = self.client.post(self.api_path + ":refresh").successful().json() op = Operation.from_json(self.client, op_json) return op.apply_options(**options)
def refresh(self, **options): """Updates the estimated pair counts if needed. The pair count estimates are updated on the server; you will need to call :func:`~tamr_unify_client.mastering.project.MasteringProject.estimate_pairs` to retrieve the updated estimate. :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` . See :func:`~tamr_unify_client.operation.Operation.apply_options` . :returns: The refresh operation. :rtype: :class:`~tamr_unify_client.operation.Operation` """ response = self.client.post(self.api_path + ":refresh").successful() op = Operation.from_response(self.client, response) return op.apply_options(**options)
def get_all(tamr: Client) -> List[Operation]: """ Get a list of all jobs or operations. Args: tamr: A Tamr client Returns: A list of Operation objects. """ response = tamr.get( "/api/versioned/v1/operations", headers={"Accept": "application/json"}, stream=True ).json() ops = [Operation.from_json(tamr, item) for item in response] return ops
def test_refresh(self): responses.add( responses.POST, f"{self._url_base}/{self._api_path}:refresh", json=self._refresh_json, ) updated = self._refresh_json.copy() updated["status"]["state"] = "SUCCEEDED" responses.add(responses.GET, f"{self._url_base}/operations/24", json=updated) estimate = EstimatedPairCounts.from_json(self.tamr, self._estimate_json, self._api_path) generated = estimate.refresh(poll_interval_seconds=0) created = Operation.from_json(self.tamr, updated) self.assertEqual(repr(generated), repr(created))
def test_operation_from_response_noop(client): responses.add(responses.GET, full_url(client, "operations/2"), status=204) responses.add(responses.GET, full_url(client, "operations/-1"), status=404) op2 = Operation.from_response(client, client.get("operations/2").successful()) assert op2.api_path is not None assert op2.relative_id is not None assert op2.resource_id is not None assert op2.type == "NOOP" assert op2.description is not None assert op2.status is not None assert op2.state == "SUCCEEDED" assert op2.succeeded op2a = op2.apply_options(asynchronous=True) assert op2a.succeeded op2w = op2a.wait() assert op2w.succeeded with pytest.raises(HTTPError): op2w.poll()
def test__collect_operation_calls(): # setup mock client mock_client = Client(None) # setup mock operations base_operation_json = { "id": "2", "type": "SPARK", "description": "Profiling [employees_tiny.csv] attributes.", "status": { "state": "SUCCEEDED", "startTime": "2020-07-16T17:57:54.458Z", "endTime": "2020-07-16T17:58:22.836Z", "message": "", }, "created": { "username": "******", "time": "2020-07-16T17:57:28.920Z", "version": "82" }, "lastModified": { "username": "******", "time": "2020-07-16T17:58:23.977Z", "version": "119", }, "relativeId": "operations/2", } operation_states = [ OperationState.SUCCEEDED, OperationState.PENDING, OperationState.CANCELED, OperationState.RUNNING, OperationState.FAILED, ] mocks = {} for state in operation_states: op_json = base_operation_json.copy() op_json["status"]["state"] = state.value mock_operation = Operation.from_json(mock_client, op_json) mock_response = Response() mock_response._content = json.dumps(op_json).encode("utf-8") mock_response.status_code = 200 mocks[state] = {"op": mock_operation, "response": mock_response} # test succeeded with many pending mock_client.get = MagicMock(side_effect=[ # response while pending mocks[OperationState.PENDING]["response"], # polling mocks[OperationState.PENDING]["response"], mocks[OperationState.PENDING]["response"], mocks[OperationState.PENDING]["response"], mocks[OperationState.PENDING]["response"], mocks[OperationState.PENDING]["response"], mocks[OperationState.RUNNING]["response"], # response while running mocks[OperationState.RUNNING]["response"], # response while waiting mocks[OperationState.SUCCEEDED]["response"], # response when complete mocks[OperationState.SUCCEEDED]["response"], ]) with patch("tamr_toolbox.utils.client._from_response", return_value=mock_client): result_success = utils.testing._collect_operation_calls( response=mocks[OperationState.PENDING]["response"], poll_interval_seconds=0) assert len(result_success) == 3 for resp in result_success: assert resp.json()["id"] == "2" assert result_success[0].json( )["status"]["state"] == OperationState.PENDING.value assert result_success[1].json( )["status"]["state"] == OperationState.RUNNING.value assert result_success[2].json( )["status"]["state"] == OperationState.SUCCEEDED.value # test failed quickly mock_client.get = MagicMock(side_effect=[ # response while pending mocks[OperationState.PENDING]["response"], # polling mocks[OperationState.FAILED]["response"], # response while running mocks[OperationState.FAILED]["response"], # response while waiting mocks[OperationState.FAILED]["response"], # response when complete mocks[OperationState.FAILED]["response"], ]) with patch("tamr_toolbox.utils.client._from_response", return_value=mock_client): result_failed = utils.testing._collect_operation_calls( response=mocks[OperationState.PENDING]["response"], poll_interval_seconds=0) assert len(result_failed) == 3 for resp in result_failed: assert resp.json()["id"] == "2" assert result_failed[0].json( )["status"]["state"] == OperationState.PENDING.value assert result_failed[1].json( )["status"]["state"] == OperationState.FAILED.value assert result_failed[2].json( )["status"]["state"] == OperationState.FAILED.value