def test_deserialize_mapped(): s = state.Success(message="1", result=1) f = state.Failed(message="2", result=2) serialized = StateSchema().dump(state.Mapped(message="message", map_states=[s, f])) deserialized = StateSchema().load(serialized) assert isinstance(deserialized, state.Mapped) assert len(deserialized.map_states) == 2 assert deserialized.map_states == [None, None] assert deserialized._result == NoResult
def test_serialize_state_with_context_allows_for_diverse_values(): s = state.Running(message="hi") s.context = dict(tags=["foo", "bar"], info=dict(x=42), baz="99") serialized = StateSchema().dump(s) assert isinstance(serialized, dict) assert serialized["type"] == "Running" assert serialized["message"] == "hi" assert serialized["__version__"] == prefect.__version__ assert serialized["context"] == s.context deserialized = StateSchema().load(serialized) assert deserialized.is_running() assert deserialized.context == s.context
def test_serialize_scheduled_state_with_context(): with prefect.context(task_run_count=42): s = state.Scheduled(message="hi") serialized = StateSchema().dump(s) assert isinstance(serialized, dict) assert serialized["type"] == "Scheduled" assert serialized["message"] == "hi" assert serialized["__version__"] == prefect.__version__ assert serialized["context"] == dict(task_run_count=42) deserialized = StateSchema().load(serialized) assert deserialized.is_scheduled() assert deserialized.context == dict(task_run_count=42)
def test_serialize_state_with_context(cls): with prefect.context(task_tags=set(["foo", "bar"])): s = cls(message="hi") serialized = StateSchema().dump(s) assert isinstance(serialized, dict) assert serialized["type"] == cls.__name__ assert serialized["message"] == "hi" assert serialized["__version__"] == prefect.__version__ assert isinstance(serialized["context"], dict) assert set(serialized["context"]["tags"]) == set(["foo", "bar"]) deserialized = StateSchema().load(serialized) assert isinstance(deserialized, cls) assert set(deserialized.context["tags"]) == set(["foo", "bar"])
def test_deserialize_json_without_version(): deserialized = StateSchema().load({"type": "Running", "message": "test"}) assert type(deserialized) is state.Running assert deserialized.is_running() assert deserialized.message == "test" assert deserialized.context == dict() assert deserialized._result == NoResult
def test_serialize_state_with_un_handled_result(cls): serialized = StateSchema().dump(cls(message="message", result=1)) assert isinstance(serialized, dict) assert serialized["type"] == cls.__name__ assert serialized["message"] == "message" assert serialized["_result"]["type"] == "NoResultType" assert serialized["__version__"] == prefect.__version__
def test_deserialize_state_from_only_type(cls): serialized = dict(type=cls.__name__) new_state = StateSchema().load(serialized) assert isinstance(new_state, cls) assert new_state.message is None assert new_state._result == Result() assert new_state.result is None
def test_new_result_with_no_location_serializes_correctly(self): s = state.Success(message="test", result=results.S3Result(bucket="foo")) serialized = StateSchema().dump(s) assert serialized["message"] == "test" assert serialized["_result"]["type"] == "S3Result" assert serialized["_result"]["location"] is None
def test_serialize_state_with_no_result(cls): state = cls(message="message") serialized = StateSchema().dump(state) assert isinstance(serialized, dict) assert serialized["type"] == cls.__name__ assert serialized["message"] is "message" assert serialized["_result"]["type"] == "NoResultType" assert serialized["__version__"] == prefect.__version__
def test_new_result_with_location_serializes_correctly(self): s = state.Success( message="test", result=results.S3Result(bucket="foo", location="dir/place.txt"), ) serialized = StateSchema().dump(s) assert serialized["message"] == "test" assert serialized["_result"]["type"] == "S3Result"
def test_deserialize_json_with_context(): deserialized = StateSchema().load( {"type": "Running", "context": {"boo": ["a", "b", "c"]}} ) assert type(deserialized) is state.Running assert deserialized.is_running() assert deserialized.message is None assert deserialized.context == dict(boo=["a", "b", "c"]) assert deserialized._result == NoResult
def test_serialize_mapped_uses_set_n_map_states(): serialized = StateSchema().dump(state.Mapped(message="message", n_map_states=20)) assert isinstance(serialized, dict) assert serialized["type"] == "Mapped" assert serialized["message"] == "message" assert "_result" not in serialized assert "map_states" not in serialized assert serialized["n_map_states"] == 20 assert serialized["__version__"] == prefect.__version__
def test_serialize_state_with_handled_result(cls): res = Result(value=1, location="src/place") state = cls(message="message", result=res) serialized = StateSchema().dump(state) assert isinstance(serialized, dict) assert serialized["type"] == cls.__name__ assert serialized["message"] == "message" assert serialized["_result"]["type"] == "Result" assert serialized["_result"]["location"] == "src/place" assert serialized["__version__"] == prefect.__version__
def test_result_raises_error_on_dump_if_not_valid_json(): res = SafeResult({"x": { "y": { "z": lambda: 1 } }}, result_handler=JSONResultHandler()) s = state.Success(result=res) with pytest.raises(marshmallow.exceptions.ValidationError): StateSchema().dump(s)
def test_serialize_state_with_safe_result(cls): res = SafeResult(value="1", result_handler=JSONResultHandler()) state = cls(message="message", result=res) serialized = StateSchema().dump(state) assert isinstance(serialized, dict) assert serialized["type"] == cls.__name__ assert serialized["message"] == "message" assert serialized["_result"]["type"] == "SafeResult" assert serialized["_result"]["value"] == "1" assert serialized["__version__"] == prefect.__version__
def test_result_must_be_valid_json(): res = SafeResult({"x": { "y": { "z": 1 } }}, result_handler=JSONResultHandler()) s = state.Success(result=res) serialized = StateSchema().dump(s) assert serialized["_result"]["value"] == s.result
def test_deserialize_handles_unknown_fields(): """ensure that deserialization can happen even if a newer version of prefect created unknown fields""" deserialized = StateSchema().load({ "type": "Success", "success_message_that_definitely_wont_exist_on_a_real_state!": 1, }) assert deserialized.is_successful()
def serialize(self) -> dict: """ Serializes the state to a dict. Returns: - dict: a JSON representation of the state """ from prefect.serialization.state import StateSchema json_blob = StateSchema().dump(self) return json_blob
def test_serialize_mapped(): s = state.Success(message="1", result=1) f = state.Failed(message="2", result=2) serialized = StateSchema().dump(state.Mapped(message="message", map_states=[s, f])) assert isinstance(serialized, dict) assert serialized["type"] == "Mapped" assert serialized["message"] == "message" assert "_result" not in serialized assert "map_states" not in serialized assert serialized["n_map_states"] == 2 assert serialized["__version__"] == prefect.__version__
def deserialize(json_blob: dict) -> "State": """ Deserializes the state from a dict. Args: - json_blob (dict): the JSON representing the serialized state """ from prefect.serialization.state import StateSchema state = StateSchema().load(json_blob) return state
def _mark_flow_as_submitted(self, flow_run: GraphQLResult) -> None: """ After a flow run is grabbed this function sets the state to Submitted so it won't be picked up by any other processes Args: - flow_run (GraphQLResult): A GraphQLResult flow run object """ # Set flow run state to `Submitted` if it is currently `Scheduled` if StateSchema().load(flow_run.serialized_state).is_scheduled(): self.logger.debug( f"Updating flow run {flow_run.id} state from Scheduled -> Submitted..." ) self.client.set_flow_run_state( flow_run_id=flow_run.id, version=flow_run.version, state=Submitted( message="Submitted for execution", state=StateSchema().load(flow_run.serialized_state), ), ) # Set task run states to `Submitted` if they are currently `Scheduled` task_runs_updated = 0 for task_run in flow_run.task_runs: if StateSchema().load(task_run.serialized_state).is_scheduled(): task_runs_updated += 1 self.client.set_task_run_state( task_run_id=task_run.id, version=task_run.version, state=Submitted( message="Submitted for execution.", state=StateSchema().load(task_run.serialized_state), ), ) if task_runs_updated: self.logger.debug( f"Updated {task_runs_updated} task runs states for flow run " f"{flow_run.id} from Scheduled -> Submitted")
def test_new_result_with_location_deserializes_correctly(self): s = state.Success( message="test", result=results.S3Result(bucket="foo", location="dir/place.txt"), ) schema = StateSchema() new_state = schema.load(schema.dump(s)) assert new_state.is_successful() assert new_state.result is None assert new_state._result.bucket == "foo" assert isinstance(new_state._result, results.S3Result) assert new_state._result.location == "dir/place.txt"
def test_meta_states_dont_nest(): state = Queued(state=Pending()) for i in range(300): if i % 2: state = Queued(state=state) else: state = Submitted(state=state) assert state.state.is_pending() assert not state.state.is_meta_state() new_state = StateSchema().load(state.serialize()) assert new_state.is_meta_state() assert not new_state.state.is_meta_state()
def test_cached_inputs_are_serialized_correctly(self): s = state.Cached( message="test", result=results.PrefectResult(value=1, location="1"), cached_inputs=dict( x=results.PrefectResult(location='"foo"'), y=results.PrefectResult(location='"bar"'), ), ) schema = StateSchema() serialized = schema.dump(s) assert serialized["cached_inputs"]["x"]["location"] == '"foo"' assert serialized["cached_inputs"]["y"]["location"] == '"bar"' new_state = schema.load(serialized) assert new_state.cached_inputs["x"].location == '"foo"' assert new_state.cached_inputs["y"].location == '"bar"'
def test_deserialize_state_without_type_fails(): with pytest.raises(marshmallow.exceptions.ValidationError): StateSchema().load({})
def _deploy_flow_run( self, flow_run: "GraphQLResult", ) -> None: """ Deploy a flow run and update Cloud with the resulting deployment info. If any errors occur when submitting the flow run, capture the error and log to Cloud. Args: - flow_run (GraphQLResult): The specific flow run to deploy """ # Deploy flow run and mark failed if any deployment error try: # Wait for the flow run's start time. The agent pre-fetches runs that may # not need to start until up to 10 seconds later so we need to wait to # prevent the flow from starting early # # `state.start_time` is used instead of `flow_run.scheduled_start_time` for # execution; `scheduled_start_time` is only to record the originally scheduled # start time of the flow run # # There are two possible states the flow run could be in at this point # - Scheduled - in this case the flow run state will have a start time # - Running - in this case the flow run state will not have a start time so we default to now flow_run_state = StateSchema().load(flow_run.serialized_state) start_time = getattr(flow_run_state, "start_time", pendulum.now()) delay_seconds = max(0, (start_time - pendulum.now()).total_seconds()) if delay_seconds: self.logger.debug( f"Waiting {delay_seconds}s to deploy flow run {flow_run.id} on " "time...") time.sleep(delay_seconds) self.logger.info( f"Deploying flow run {flow_run.id} to execution environment..." ) self._mark_flow_as_submitted(flow_run) # Call the main deployment hook deployment_info = self.deploy_flow(flow_run) self.logger.info(f"Completed deployment of flow run {flow_run.id}") self._safe_write_run_log( flow_run, message="Submitted for execution: {}".format(deployment_info), level="INFO", ) except Exception as exc: # On exception, we'll mark this flow as failed # if first failure was a state update error, we don't want to try another # state update if "State update failed" in str(exc): self.logger.debug("Updating Flow Run state failed: {}".format( str(exc))) return # This is to match existing past behavior, I cannot imagine we would reach # this point with a flow run that has no id if not getattr(flow_run, "id"): self.logger.error("Flow run is missing an id.", exc_info=True) return self.logger.error( f"Exception encountered while deploying flow run {flow_run.id}", exc_info=True, ) self._safe_write_run_log( flow_run, message=str(exc), level="ERROR", ) self._mark_flow_as_failed(flow_run=flow_run, message=str(exc)) self.logger.error(f"Deployment of {flow_run.id} aborted!")
def test_can_deserialize_old_no_result(old_json): schema = StateSchema() state = schema.load(old_json) assert state.is_successful()
def test_deserialize_state_with_safe_result(cls): s = cls(message="message") serialized = StateSchema().dump(s) deserialized = StateSchema().load(serialized) assert isinstance(deserialized, cls) assert deserialized == s
def test_deserialize_state_with_unknown_type_fails(): with pytest.raises(marshmallow.exceptions.ValidationError): StateSchema().load({"type": "FakeState"})
def test_complex_state_attributes_are_handled(state): serialized = StateSchema().dump(state) deserialized = StateSchema().load(serialized) assert state == deserialized