def test_result(self, stub): format_proto = user_format_to_proto({ "type": "pyarrow", "compression": "brotli" }) destination_proto = user_destination_to_proto("download") job = Job._from_proto( job_pb2.Job( id="foo", state=job_pb2.Job.State(stage=job_pb2.Job.Stage.SUCCEEDED), type=9, format=format_proto, destination=destination_proto, )) result = 2 codec = "lz4" serialized = serialize_pyarrow(result, codec) responses.add( responses.GET, Job.BUCKET_PREFIX.format(job.id), body=serialized, headers={ "x-goog-stored-content-encoding": "application/vnd.pyarrow", "x-goog-meta-X-Arrow-Codec": codec, }, status=200, ) assert download(job) == result
def test_wait_success(self, stub): id_ = "foo" destination = user_destination_to_proto({"type": "download"}) message = job_pb2.Job(id=id_, destination=destination) j = Job._from_proto(message) job_state = job_pb2.Job.State(stage=job_pb2.Job.Stage.SUCCEEDED) stub.return_value.WatchJob.return_value = [job_state] j.wait() assert j._message.state.stage == job_state.stage
def create_side_effect(req, **kwargs): return job_pb2.Job( id=id_, parameters=req.parameters, serialized_graft=req.serialized_graft, typespec=req.typespec, type=req.type, channel=req.channel, state=job_state, format=user_format_to_proto(format), destination=user_destination_to_proto(destination), )
def test_create(self, stub): obj = types.Int(1) parameters = {"foo": types.Str("bar")} typespec = cereal.serialize_typespec(type(obj)) format_proto = user_format_to_proto({ "type": "pyarrow", "compression": "brotli" }) destination_proto = user_destination_to_proto({"type": "download"}) create_job_request_message = job_pb2.CreateJobRequest( parameters=json.dumps(parameters_to_grafts(**parameters)), serialized_graft=json.dumps(obj.graft), typespec=typespec, type=types_pb2.ResultType.Value( cereal.typespec_to_unmarshal_str(typespec)), format=format_proto, destination=destination_proto, no_cache=False, channel=_channel.__channel__, ) message = job_pb2.Job( id="foo", parameters=create_job_request_message.parameters, serialized_graft=create_job_request_message.serialized_graft, typespec=create_job_request_message.typespec, type=create_job_request_message.type, format=create_job_request_message.format, destination=create_job_request_message.destination, no_cache=create_job_request_message.no_cache, channel=create_job_request_message.channel, ) stub.return_value.CreateJob.return_value = message job = Job( obj, parameters, format={ "type": "pyarrow", "compression": "brotli" }, destination="download", ) stub.return_value.CreateJob.assert_called_once_with( create_job_request_message, timeout=Client.DEFAULT_TIMEOUT, metadata=(("x-wf-channel", create_job_request_message.channel), ), ) assert job._message is message
def test_wait_terminated(self, stub): id_ = "foo" destination = user_destination_to_proto({"type": "download"}) message = job_pb2.Job(id=id_, destination=destination) j = Job._from_proto(message) job_state = job_pb2.Job.State( stage=job_pb2.Job.Stage.FAILED, error=job_pb2.Job.Error(code=errors_pb2.ERROR_TERMINATED), ) stub.return_value.WatchJob.return_value = [job_state] with pytest.raises(JobTerminated): j.wait() assert j._message.state.stage == job_state.stage
def create_side_effect(req, **kwargs): return job_pb2.Job( id=id_, serialized_graft=req.serialized_graft, typespec=req.typespec, arguments=req.arguments, geoctx_graft=req.geoctx_graft, no_ruster=req.no_ruster, channel=req.channel, client_version=__version__, expires_timestamp=expires_timestamp, no_cache=req.no_cache, trace=req.trace, state=job_state, type=req.type, format=user_format_to_proto(format), destination=user_destination_to_proto(destination), )
def test_wait_timeout(self, stub): id_ = "foo" destination = user_destination_to_proto({"type": "download"}) message = job_pb2.Job(id=id_, destination=destination) j = Job._from_proto(message) job_state = job_pb2.Job.State(stage=job_pb2.Job.Stage.QUEUED) def side_effect(*args, **kwargs): yield job_state raise MockRpcError(grpc.StatusCode.DEADLINE_EXCEEDED) stub.return_value.WatchJob.side_effect = side_effect with pytest.raises(JobTimeoutError): j.wait(timeout=1) stub.return_value.WatchJob.assert_called() assert j._message.state.stage == job_state.stage
def test_result_to_file(self, stub, file_path, tmpdir): format_proto = user_format_to_proto("json") destination_proto = user_destination_to_proto("download") destination_proto.download.result_url = ( "https://storage.googleapis.com/dl-compute-dev-results") job = Job._from_proto( job_pb2.Job( id="foo", state=job_pb2.Job.State(stage=job_pb2.Job.Stage.SUCCEEDED), format=format_proto, destination=destination_proto, )) result = [1, 2, 3, 4] responses.add( responses.GET, job.url, body=json.dumps(result), headers={"x-goog-stored-content-encoding": "application/json"}, status=200, stream=True, ) path = tmpdir.join("test.json") file_arg = str(path) if file_path else path.open("wb") job.result_to_file(file_arg) if not file_path: assert not file_arg.closed file_arg.flush() with open(str(path), "r") as f: assert result == json.load(f) if not file_path: file_arg.close()
def __init__( self, proxy_object, parameters, format="pyarrow", destination="download", client=None, cache=True, ): """ Creates a new `Job` to compute the provided proxy object with the given parameters. Parameters ---------- proxy_object: Proxytype Proxy object to compute parameters: dict[str, Proxytype] Python dictionary of parameter names and values format: str or dict, default "pyarrow" The serialization format for the result. destination: str or dict, default "download" The destination for the result. client : `.workflows.client.Client`, optional Allows you to use a specific client instance with non-default auth and parameters cache : bool, default True Whether to use the cache for this job. Returns ------- Job The job that's executing. Example ------- >>> from descarteslabs.workflows import Job, Int, parameter >>> my_int = Int(1) + parameter("other_int", Int) >>> job = Job(my_int, {"other_int": 10}) # doctest: +SKIP >>> job.stage # doctest: +SKIP QUEUED """ if client is None: client = get_global_grpc_client() typespec = serialize_typespec(type(proxy_object)) result_type = typespec_to_unmarshal_str(typespec) # ^ this also preemptively checks whether the result type is something we'll know how to unmarshal format_proto = user_format_to_proto(format) destination_proto = user_destination_to_proto(destination) parameters = parameters_to_grafts(**parameters) message = client.api["CreateJob"]( job_pb2.CreateJobRequest( parameters=json.dumps(parameters), serialized_graft=json.dumps(proxy_object.graft), typespec=typespec, type=types_pb2.ResultType.Value(result_type), format=format_proto, destination=destination_proto, no_cache=not cache, channel=_channel.__channel__, ), timeout=client.DEFAULT_TIMEOUT, ) self._message = message self._client = client self._object = proxy_object
def __init__( self, obj, geoctx=None, format="pyarrow", destination="download", cache=True, _ruster=None, _trace=False, client=None, **arguments, ): """ Creates a new `Job` to compute the provided proxy object with the given arguments. Parameters ---------- obj: Proxytype Proxy object to compute, or list/tuple of proxy objects. If it depends on parameters, ``obj`` is first converted to a `.Function` that takes those parameters. geoctx: `~.workflows.types.geospatial.GeoContext`, or None The GeoContext parameter under which to run the computation. Almost all computations will require a `~.workflows.types.geospatial.GeoContext`, but for operations that only involve non-geospatial types, this parameter is optional. format: str or dict, default "pyarrow" The serialization format for the result. destination: str or dict, default "download" The destination for the result. cache: bool, default True Whether to use the cache for this job. client: `.workflows.client.Client`, optional Allows you to use a specific client instance with non-default auth and parameters **arguments: Any Values for all parameters that ``obj`` depends on (or arguments that ``obj`` takes, if it's a `.Function`). Can be given as Proxytypes, or as Python objects like numbers, lists, and dicts that can be promoted to them. These arguments cannot depend on any parameters. Example ------- >>> from descarteslabs.workflows import Job, Int, parameter >>> my_int = Int(1) + parameter("other_int", Int) >>> job = Job(my_int, other_int=10) # doctest: +SKIP >>> job.stage # doctest: +SKIP QUEUED """ if client is None: client = get_global_grpc_client() if geoctx is not None: try: geoctx = GeoContext._promote(geoctx) except ProxyTypeError as e: raise TypeError(f"Invalid GeoContext {geoctx!r}: {e}") obj, argument_grafts, typespec, result_type = to_computable( obj, arguments) format_proto = user_format_to_proto(format) destination_proto = user_destination_to_proto(destination) message = client.api["CreateJob"]( job_pb2.CreateJobRequest( serialized_graft=json.dumps(obj.graft), typespec=typespec, arguments={ name: json.dumps(arg) for name, arg in argument_grafts.items() }, geoctx_graft=json.dumps(geoctx.graft) if geoctx is not None else None, no_ruster=_ruster is False, channel=client._wf_channel, client_version=__version__, no_cache=not cache, trace=_trace, type=types_pb2.ResultType.Value(result_type), format=format_proto, destination=destination_proto, ), timeout=client.DEFAULT_TIMEOUT, ) self._message = message self._client = client self._object = obj self._arguments = None