def test_from_storage_dict(): old_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=old_map) class MyThing(NamedTuple): orig_name: str serialized_old = _serialize_dagster_namedtuple(MyThing("old"), whitelist_map=old_map) class CompatSerializer(DefaultNamedTupleSerializer): @classmethod def value_from_storage_dict(cls, storage_dict, klass, args_for_class, whitelist_map, descent_path): # simplified demo of field renaming return klass( storage_dict.get("orig_name") or storage_dict.get("new_name")) new_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=new_map, serializer=CompatSerializer) # pylint: disable=function-redefined class MyThing(NamedTuple): new_name: str deser_old_val = _deserialize_json(serialized_old, whitelist_map=new_map) assert deser_old_val.new_name == "old" serialized_new = _serialize_dagster_namedtuple(MyThing("new"), whitelist_map=new_map) deser_new_val = _deserialize_json(serialized_new, whitelist_map=new_map) assert deser_new_val.new_name == "new"
def test_set(): test_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=test_map) class HasSets(namedtuple("_HasSets", "reg_set frozen_set")): def __new__(cls, reg_set, frozen_set): set_param(reg_set, "reg_set") inst_param(frozen_set, "frozen_set", frozenset) return super(HasSets, cls).__new__(cls, reg_set, frozen_set) foo = HasSets({1, 2, 3, "3"}, frozenset([4, 5, 6, "6"])) serialized = _serialize_dagster_namedtuple(foo, whitelist_map=test_map) foo_2 = _deserialize_json(serialized, whitelist_map=test_map) assert foo == foo_2 # verify that set elements are serialized in a consistent order so that # equal objects always have a consistent serialization / snapshot ID big_foo = HasSets(set(string.ascii_lowercase), frozenset(string.ascii_lowercase)) snap_id = hash_str( _serialize_dagster_namedtuple(big_foo, whitelist_map=test_map)) roundtrip_snap_id = hash_str( _serialize_dagster_namedtuple( _deserialize_json( _serialize_dagster_namedtuple(big_foo, whitelist_map=test_map), whitelist_map=test_map, ), whitelist_map=test_map, )) assert snap_id == roundtrip_snap_id
def test_skip_when_empty(): test_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=test_map) class SameSnapshotTuple(namedtuple("_Tuple", "foo")): def __new__(cls, foo): return super(SameSnapshotTuple, cls).__new__(cls, foo) # pylint: disable=bad-super-call old_tuple = SameSnapshotTuple(foo="A") old_serialized = _serialize_dagster_namedtuple(old_tuple, whitelist_map=test_map) old_snapshot = hash_str(old_serialized) # Without setting skip_when_empty, the ID changes @_whitelist_for_serdes(whitelist_map=test_map) # pylint: disable=function-redefined class SameSnapshotTuple(namedtuple("_Tuple", "foo bar")): def __new__(cls, foo, bar=None): return super(SameSnapshotTuple, cls).__new__( # pylint: disable=bad-super-call cls, foo, bar) new_tuple_without_serializer = SameSnapshotTuple(foo="A") new_snapshot_without_serializer = hash_str( _serialize_dagster_namedtuple(new_tuple_without_serializer, whitelist_map=test_map)) assert new_snapshot_without_serializer != old_snapshot # By setting a custom serializer and skip_when_empty, the snapshot stays the same # as long as the new field is None class SkipWhenEmptySerializer(DefaultNamedTupleSerializer): @classmethod def skip_when_empty(cls) -> Set[str]: return {"bar"} @_whitelist_for_serdes(whitelist_map=test_map, serializer=SkipWhenEmptySerializer) # pylint: disable=function-redefined class SameSnapshotTuple(namedtuple("_Tuple", "foo bar")): def __new__(cls, foo, bar=None): return super(SameSnapshotTuple, cls).__new__( # pylint: disable=bad-super-call cls, foo, bar) for bar_val in [None, [], {}, set()]: new_tuple = SameSnapshotTuple(foo="A", bar=bar_val) new_snapshot = hash_str( _serialize_dagster_namedtuple(new_tuple, whitelist_map=test_map)) assert old_snapshot == new_snapshot rehydrated_tuple = _deserialize_json(old_serialized, whitelist_map=test_map) assert rehydrated_tuple.foo == "A" assert rehydrated_tuple.bar is None new_tuple_with_bar = SameSnapshotTuple(foo="A", bar="B") assert new_tuple_with_bar.foo == "A" assert new_tuple_with_bar.bar == "B"
def test_forward_compat_serdes_new_field_with_default(): test_map = WhitelistMap() @_whitelist_for_serdes(whitelist_map=test_map) class Quux(namedtuple("_Quux", "foo bar")): def __new__(cls, foo, bar): return super(Quux, cls).__new__(cls, foo, bar) # pylint: disable=bad-super-call assert test_map.has_tuple_entry("Quux") klass, _ = test_map.get_tuple_entry("Quux") assert klass is Quux quux = Quux("zip", "zow") serialized = _serialize_dagster_namedtuple(quux, whitelist_map=test_map) # pylint: disable=function-redefined @_whitelist_for_serdes(whitelist_map=test_map) class Quux(namedtuple("_Quux", "foo bar baz")): # pylint: disable=bad-super-call def __new__(cls, foo, bar, baz=None): return super(Quux, cls).__new__(cls, foo, bar, baz=baz) assert test_map.has_tuple_entry("Quux") klass, _ = test_map.get_tuple_entry("Quux") assert klass is Quux deserialized = _deserialize_json_to_dagster_namedtuple(serialized, whitelist_map=test_map) assert deserialized != quux assert deserialized.foo == quux.foo assert deserialized.bar == quux.bar assert deserialized.baz is None
def test_to_storage_value(): test_map = WhitelistMap() class MySerializer(DefaultNamedTupleSerializer): @staticmethod def value_to_storage_dict(value, whitelist_map): return DefaultNamedTupleSerializer.value_to_storage_dict( SubstituteAlphabet(value.a, value.b, value.c), test_map ) @_whitelist_for_serdes(whitelist_map=test_map, serializer=MySerializer) class DeprecatedAlphabet(namedtuple("_DeprecatedAlphabet", "a b c")): def __new__(cls, a, b, c): return super(DeprecatedAlphabet, cls).__new__(cls, a, b, c) @_whitelist_for_serdes(whitelist_map=test_map) class SubstituteAlphabet(namedtuple("_SubstituteAlphabet", "a b c")): def __new__(cls, a, b, c): return super(SubstituteAlphabet, cls).__new__(cls, a, b, c) nested = DeprecatedAlphabet(None, None, "_C") deprecated = DeprecatedAlphabet("A", "B", nested) serialized = _serialize_dagster_namedtuple(deprecated, whitelist_map=test_map) alphabet = _deserialize_json_to_dagster_namedtuple(serialized, whitelist_map=test_map) assert not isinstance(alphabet, DeprecatedAlphabet) assert isinstance(alphabet, SubstituteAlphabet) assert not isinstance(alphabet.c, DeprecatedAlphabet) assert isinstance(alphabet.c, SubstituteAlphabet)
def test_descent_path(): class Foo(NamedTuple): bar: int with pytest.raises(SerializationError, match=re.escape("Descent path: <root:dict>.a.b[2].c")): serialize_value({"a": {"b": [{}, {}, {"c": Foo(1)}]}}) test_map = WhitelistMap.create() blank_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=test_map) class Fizz(NamedTuple): buzz: int ser = _serialize_dagster_namedtuple({"a": { "b": [{}, {}, { "c": Fizz(1) }] }}, whitelist_map=test_map) with pytest.raises(DeserializationError, match=re.escape("Descent path: <root:dict>.a.b[2].c")): _deserialize_json(ser, whitelist_map=blank_map)
def test_forward_compat(): old_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=old_map) class Quux(namedtuple("_Quux", "bar baz")): def __new__(cls, bar, baz): return super().__new__(cls, bar, baz) # new version has a new field with a new type new_map = WhitelistMap.create() # pylint: disable=function-redefined @_whitelist_for_serdes(whitelist_map=new_map) class Quux(namedtuple("_Quux", "foo bar baz")): def __new__(cls, foo, bar, baz): return super().__new__(cls, foo, bar, baz) @_whitelist_for_serdes(whitelist_map=new_map) class Foo(namedtuple("_Foo", "wow")): def __new__(cls, wow): return super().__new__(cls, wow) new_quux = Quux(foo=Foo("wow"), bar="bar", baz="baz") # write from new serialized = _serialize_dagster_namedtuple(new_quux, whitelist_map=new_map) # read from old, foo ignored deserialized = _deserialize_json(serialized, whitelist_map=old_map) assert deserialized.bar == "bar" assert deserialized.baz == "baz"
def test_backward_compat_serdes(): test_map = WhitelistMap() @_whitelist_for_serdes(whitelist_map=test_map) class Quux(namedtuple("_Quux", "foo bar baz")): def __new__(cls, foo, bar, baz): return super(Quux, cls).__new__(cls, foo, bar, baz) # pylint: disable=bad-super-call quux = Quux("zip", "zow", "whoopie") serialized = _serialize_dagster_namedtuple(quux, whitelist_map=test_map) # pylint: disable=function-redefined @_whitelist_for_serdes(whitelist_map=test_map) class Quux(namedtuple("_Quux", "foo bar")): # pylint: disable=bad-super-call def __new__(cls, foo, bar): return super(Quux, cls).__new__(cls, foo, bar) deserialized = _deserialize_json_to_dagster_namedtuple( serialized, whitelist_map=test_map) assert deserialized != quux assert deserialized.foo == quux.foo assert deserialized.bar == quux.bar assert not hasattr(deserialized, "baz")
def test_long_int(): test_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=test_map) class NumHolder(NamedTuple): num: int x = NumHolder(98765432109876543210) ser_x = _serialize_dagster_namedtuple(x, test_map) roundtrip_x = _deserialize_json(ser_x, test_map) assert x.num == roundtrip_x.num
def test_persistent_tuple(): test_map = WhitelistMap() @_whitelist_for_serdes(whitelist_map=test_map) class Alphabet(namedtuple("_Alphabet", "a b c")): def __new__(cls, a, b, c): return super(Alphabet, cls).__new__(cls, a, b, c) foo = Alphabet(a="A", b="B", c="C") serialized = _serialize_dagster_namedtuple(foo, whitelist_map=test_map) foo_2 = _deserialize_json_to_dagster_namedtuple(serialized, whitelist_map=test_map) assert foo == foo_2
def test_set(): test_map = WhitelistMap() @_whitelist_for_serdes(whitelist_map=test_map) class HasSets(namedtuple("_HasSets", "reg_set frozen_set")): def __new__(cls, reg_set, frozen_set): set_param(reg_set, "reg_set") inst_param(frozen_set, "frozen_set", frozenset) return super(HasSets, cls).__new__(cls, reg_set, frozen_set) foo = HasSets({1, 2, 3, "3"}, frozenset([4, 5, 6, "6"])) serialized = _serialize_dagster_namedtuple(foo, whitelist_map=test_map) foo_2 = _deserialize_json_to_dagster_namedtuple(serialized, whitelist_map=test_map) assert foo == foo_2
def test_namedtuple_name_map(): wmap = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=wmap) class Thing(NamedTuple): name: str wmap.register_serialized_name("Thing", "SerializedThing") thing = Thing("foo") thing_serialized = _serialize_dagster_namedtuple(thing, wmap) assert seven.json.loads(thing_serialized)["__class__"] == "SerializedThing" with pytest.raises(DeserializationError): _deserialize_json(thing_serialized, wmap) wmap.register_deserialized_name("SerializedThing", "Thing") assert _deserialize_json(thing_serialized, wmap) == thing
def get_id(self): json_rep = _serialize_dagster_namedtuple(self, whitelist_map=new_map) return hash_str(json_rep)
def test_namedtuple_backcompat(): old_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=old_map) class OldThing(NamedTuple): old_name: str def get_id(self): json_rep = _serialize_dagster_namedtuple(self, whitelist_map=old_map) return hash_str(json_rep) # create the old things old_thing = OldThing("thing") old_thing_id = old_thing.get_id() old_thing_serialized = _serialize_dagster_namedtuple(old_thing, old_map) new_map = WhitelistMap.create() class ThingSerializer(DefaultNamedTupleSerializer): @classmethod def value_from_storage_dict(cls, storage_dict, klass, args_for_class, whitelist_map, descent_path): raw_dict = { key: unpack_inner_value(value, whitelist_map, f"{descent_path}.{key}") for key, value in storage_dict.items() } # typical pattern is to use the same serialization format from an old field and passing # it in as a new field return klass( **{ key: value for key, value in raw_dict.items() if key in args_for_class }, new_name=raw_dict.get("old_name"), ) @classmethod def value_to_storage_dict( cls, value, whitelist_map, descent_path, ): storage = super().value_to_storage_dict(value, whitelist_map, descent_path) name = storage.get("new_name") or storage.get("old_name") if "new_name" in storage: del storage["new_name"] # typical pattern is to use the same serialization format # it in as a new field storage["old_name"] = name storage["__class__"] = "OldThing" # persist using old class name return storage @_whitelist_for_serdes(whitelist_map=new_map, serializer=ThingSerializer) class NewThing(NamedTuple): new_name: str def get_id(self): json_rep = _serialize_dagster_namedtuple(self, whitelist_map=new_map) return hash_str(json_rep) # exercising the old serialization format register_serdes_tuple_fallbacks({"OldThing": NewThing}, whitelist_map=new_map) new_thing = NewThing("thing") new_thing_id = new_thing.get_id() new_thing_serialized = _serialize_dagster_namedtuple(new_thing, new_map) assert new_thing_id == old_thing_id assert new_thing_serialized == old_thing_serialized # ensure that the new serializer can correctly interpret old serialized data old_thing_deserialized = _deserialize_json(old_thing_serialized, new_map) assert isinstance(old_thing_deserialized, NewThing) assert old_thing_deserialized.get_id() == new_thing_id # ensure that the new things serialized can still be read by old code new_thing_deserialized = _deserialize_json(new_thing_serialized, old_map) assert isinstance(new_thing_deserialized, OldThing) assert new_thing_deserialized.get_id() == old_thing_id