def test_from_op_failure(): client = utils.client.create(**CONFIG["toolbox_test_instance"]) op_json = { "id": "-1", "type": "NOOP", "description": "test", "status": { "state": "FAILED", "startTime": "early", "endTime": "late", "message": "" }, "created": { "username": "", "time": "early", "version": "-1" }, "lastModified": { "username": "", "time": "late", "version": "-1" }, "relativeId": "operations/-1", } op = Operation.from_json(client, op_json) assert PlanNodeStatus.from_tamr_op( op) == PlanNodeStatus.PlanNodeStatus.FAILED
def test_operation_from_json(client): alias = "operations/123" op1 = Operation.from_json(client, op_1_json, alias) assert op1.api_path == alias assert op1.relative_id == op_1_json["relativeId"] assert op1.resource_id == "1" assert op1.type == op_1_json["type"] assert op1.description == op_1_json["description"] assert op1.status == op_1_json["status"] assert op1.state == "SUCCEEDED" assert op1.succeeded
def train(self, **options): """Learn from verified labels. :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` . See :func:`~tamr_unify_client.operation.Operation.apply_options` . :returns: The resultant operation. :rtype: :class:`~tamr_unify_client.operation.Operation` """ op_json = self.client.post(self.api_path + ":refresh").successful().json() op = Operation.from_json(self.client, op_json) return op.apply_options(**options)
def predict(self, **options): """Suggest labels for unverified records. :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` . See :func:`~tamr_unify_client.operation.Operation.apply_options` . :returns: The resultant operation. :rtype: :class:`~tamr_unify_client.operation.Operation` """ dependent_dataset = "/".join(self.api_path.split("/")[:-1]) op_json = self.client.post(dependent_dataset + ":refresh").successful().json() op = Operation.from_json(self.client, op_json) return op.apply_options(**options)
def refresh(self, **options): """Updates the dataset profile if needed. The dataset profile is updated on the server; you will need to call :func:`~tamr_unify_client.dataset.resource.Dataset.profile` to retrieve the updated profile. :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` . See :func:`~tamr_unify_client.operation.Operation.apply_options` . :returns: The refresh operation. :rtype: :class:`~tamr_unify_client.operation.Operation` """ op_json = self.client.post(self.api_path + ":refresh").successful().json() op = Operation.from_json(self.client, op_json) return op.apply_options(**options)
def refresh(self, **options): """Updates the estimated pair counts if needed. The pair count estimates are updated on the server; you will need to call :func:`~tamr_unify_client.mastering.project.MasteringProject.estimate_pairs` to retrieve the updated estimate. :param ``**options``: Options passed to underlying :class:`~tamr_unify_client.operation.Operation` . See :func:`~tamr_unify_client.operation.Operation.apply_options` . :returns: The refresh operation. :rtype: :class:`~tamr_unify_client.operation.Operation` """ op_json = self.client.post(self.api_path + ":refresh").successful().json() op = Operation.from_json(self.client, op_json) return op.apply_options(**options)
def get_all(tamr: Client) -> List[Operation]: """ Get a list of all jobs or operations. Args: tamr: A Tamr client Returns: A list of Operation objects. """ response = tamr.get( "/api/versioned/v1/operations", headers={"Accept": "application/json"}, stream=True ).json() ops = [Operation.from_json(tamr, item) for item in response] return ops
def test_refresh(self): responses.add( responses.POST, f"{self._url_base}/{self._api_path}:refresh", json=self._refresh_json, ) updated = self._refresh_json.copy() updated["status"]["state"] = "SUCCEEDED" responses.add(responses.GET, f"{self._url_base}/operations/24", json=updated) estimate = EstimatedPairCounts.from_json(self.tamr, self._estimate_json, self._api_path) generated = estimate.refresh(poll_interval_seconds=0) created = Operation.from_json(self.tamr, updated) self.assertEqual(repr(generated), repr(created))
def test__collect_operation_calls(): # setup mock client mock_client = Client(None) # setup mock operations base_operation_json = { "id": "2", "type": "SPARK", "description": "Profiling [employees_tiny.csv] attributes.", "status": { "state": "SUCCEEDED", "startTime": "2020-07-16T17:57:54.458Z", "endTime": "2020-07-16T17:58:22.836Z", "message": "", }, "created": { "username": "******", "time": "2020-07-16T17:57:28.920Z", "version": "82" }, "lastModified": { "username": "******", "time": "2020-07-16T17:58:23.977Z", "version": "119", }, "relativeId": "operations/2", } operation_states = [ OperationState.SUCCEEDED, OperationState.PENDING, OperationState.CANCELED, OperationState.RUNNING, OperationState.FAILED, ] mocks = {} for state in operation_states: op_json = base_operation_json.copy() op_json["status"]["state"] = state.value mock_operation = Operation.from_json(mock_client, op_json) mock_response = Response() mock_response._content = json.dumps(op_json).encode("utf-8") mock_response.status_code = 200 mocks[state] = {"op": mock_operation, "response": mock_response} # test succeeded with many pending mock_client.get = MagicMock(side_effect=[ # response while pending mocks[OperationState.PENDING]["response"], # polling mocks[OperationState.PENDING]["response"], mocks[OperationState.PENDING]["response"], mocks[OperationState.PENDING]["response"], mocks[OperationState.PENDING]["response"], mocks[OperationState.PENDING]["response"], mocks[OperationState.RUNNING]["response"], # response while running mocks[OperationState.RUNNING]["response"], # response while waiting mocks[OperationState.SUCCEEDED]["response"], # response when complete mocks[OperationState.SUCCEEDED]["response"], ]) with patch("tamr_toolbox.utils.client._from_response", return_value=mock_client): result_success = utils.testing._collect_operation_calls( response=mocks[OperationState.PENDING]["response"], poll_interval_seconds=0) assert len(result_success) == 3 for resp in result_success: assert resp.json()["id"] == "2" assert result_success[0].json( )["status"]["state"] == OperationState.PENDING.value assert result_success[1].json( )["status"]["state"] == OperationState.RUNNING.value assert result_success[2].json( )["status"]["state"] == OperationState.SUCCEEDED.value # test failed quickly mock_client.get = MagicMock(side_effect=[ # response while pending mocks[OperationState.PENDING]["response"], # polling mocks[OperationState.FAILED]["response"], # response while running mocks[OperationState.FAILED]["response"], # response while waiting mocks[OperationState.FAILED]["response"], # response when complete mocks[OperationState.FAILED]["response"], ]) with patch("tamr_toolbox.utils.client._from_response", return_value=mock_client): result_failed = utils.testing._collect_operation_calls( response=mocks[OperationState.PENDING]["response"], poll_interval_seconds=0) assert len(result_failed) == 3 for resp in result_failed: assert resp.json()["id"] == "2" assert result_failed[0].json( )["status"]["state"] == OperationState.PENDING.value assert result_failed[1].json( )["status"]["state"] == OperationState.FAILED.value assert result_failed[2].json( )["status"]["state"] == OperationState.FAILED.value