示例#1
0
    def test_simple_dict_hashing(self):
        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            data = {"d": 4}
            expected_final_hash = "63c3302ff7ac527023a43dd85cbb92e1"
            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                 expected_final_hash)

            # internal behavior
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'dict'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: ('d', 4)",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: d",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 8277e0910d750195b448797616e091ad, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: 4",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 038835f45126b13749d59afa4382ec30, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 2bbf775e58a10239cb79016c7ae0ec92, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ],
            )
示例#2
0
    def test_pandas_series_hashing(self):
        # series
        for d, expected_final_hash in zip(
            [range(20), ["a"], [1]],
            [7008921389990319782, -4496393130729816112, 6238072747940578789],
        ):
            with self.subTest(d=d, expected_final_hash=expected_final_hash):
                with self.assertLogs(logger="simpleml.persistables.hashing",
                                     level="DEBUG") as logs:
                    # input/output
                    data = pd.Series(d)
                    with self.subTest():
                        self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                         expected_final_hash)

                    # internal behavior
                    # hash series
                    self.assertEqual(
                        logs.output,
                        [
                            f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                            "DEBUG:simpleml.persistables.hashing:hash type: <class 'pandas.core.series.Series'>",
                            f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'int'>",
                        ],
                    )
示例#3
0
    def test_pandas_list_hashing(self):
        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            data = [pd.Series(["a"]), pd.DataFrame([1])]
            expected_final_hash = "58d577105165dfc792672f4e430f2b0a"
            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                 expected_final_hash)

            # internal behavior
            # hash list -> hash items in list
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'list'>",
                    # data
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data[0]}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'pandas.core.series.Series'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: -4496393130729816112, <class 'int'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data[1]}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'pandas.core.frame.DataFrame'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: -7087755961261762286, <class 'int'>",
                    # Final
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ],
            )
示例#4
0
    def test_pandas_frame_hashing(self):
        # frame
        for d, expected_final_hash in zip(
            [[range(10), range(10)], ["a"], [1]],
            [6716675364149054294, 5694802365760992243, -7087755961261762286],
        ):
            with self.subTest(d=d, expected_final_hash=expected_final_hash):
                with self.assertLogs(logger="simpleml.persistables.hashing",
                                     level="DEBUG") as logs:
                    # input/output
                    data = pd.DataFrame(d)
                    with self.subTest():
                        self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                         expected_final_hash)

                    # internal behavior
                    # hash dataframe
                    self.assertEqual(
                        logs.output,
                        [
                            f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                            "DEBUG:simpleml.persistables.hashing:hash type: <class 'pandas.core.frame.DataFrame'>",
                            f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'int'>",
                        ],
                    )
示例#5
0
    def test_simple_list_hashing(self):
        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            data = ["b", 3]
            expected_final_hash = "bf00e62763be22f17074498f35a68302"
            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                 expected_final_hash)

            # internal behavior
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'list'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: b",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 92eb5ffee6ae2fec3ad71c777531578f, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: 3",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: c01ef2d65e504ea354c5bf4f5b6f6329, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ],
            )
示例#6
0
 def test_float_hash(self):
     for data, expected_hash in {
             0.045:
             "4ec2e10062562fe8ba5183cfee61dc7f",
             0.0981209867893243456787453211253689098765265778484245:
             "491007207b7b93336843ee3de9d64ec5",
     }.items():
         self.assertEqual(CustomHasherMixin.md5_hasher(data), expected_hash)
示例#7
0
 def test_tuple_hash(self):
     """
     set/tuple/list/dict/mappingproxy reduce to a tuple of hashes
     """
     data = (
         "0357109b163771392cc674173d921e4b",
         "76f34d73a1a6753d1243c9ba0afe3457",
         "38b1de0299d81decb1341f9f2bfb4c8b",
         "21065bb299df9d8a902754661f1dcf08",
     )
     expected_hash = "57ef70a19f5ecb8fc70d9f173d4f7740"
     self.assertEqual(CustomHasherMixin.md5_hasher(data), expected_hash)
示例#8
0
 def test_string_hash(self):
     for data, expected_hash in {
             "abc":
             "900150983cd24fb0d6963f7d28e17f72",
             "ajfoh203949fja..!#@#@$@":
             "1cb83ecc9f13349b4b04eb4b4aa93d26",
             "AFAL;FADFKA;JAFIEFHIA":
             "33b899614e1e16b4a84646d435064f53",
             "extra_long_value_extra_long_value_extra_long_value_extra_long_value_extra_long_value_extra_long_value":
             "e192f15053eb1e6fce7d0a0769280dc5",
     }.items():
         self.assertEqual(CustomHasherMixin.md5_hasher(data), expected_hash)
示例#9
0
    def test_primitive_list_hashing(self):
        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            data = ["a", 2, ["b", 3], {"d": 4}]
            expected_final_hash = "a0a531f7754274ea2fe57fefce20a55e"
            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                 expected_final_hash)

            # internal behavior
            # hash list -> hash items in list
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'list'>",
                    # primitives
                    "DEBUG:simpleml.persistables.hashing:Hashing input: a",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 0cc175b9c0f1b6a831c399e269772661, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: 2",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 4753b753264f02d409ef7e2fa734d1e5, <class 'str'>",
                    # simple containers
                    "DEBUG:simpleml.persistables.hashing:Hashing input: ['b', 3]",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'list'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: b",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 92eb5ffee6ae2fec3ad71c777531578f, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: 3",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: c01ef2d65e504ea354c5bf4f5b6f6329, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: bf00e62763be22f17074498f35a68302, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: {'d': 4}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'dict'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: ('d', 4)",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: d",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 8277e0910d750195b448797616e091ad, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: 4",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 038835f45126b13749d59afa4382ec30, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 2bbf775e58a10239cb79016c7ae0ec92, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 63c3302ff7ac527023a43dd85cbb92e1, <class 'str'>",
                    # Final
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ],
            )
示例#10
0
 def test_int_hash(self):
     for data, expected_hash in {
             12:
             "9788cdcdd2f907b2ba4c106e05db77dd",
             12756387463875648597426574256294765284528457465783:
             "4fceac488520aff1b49e0419ff29aef4",
             -76348735275375648597426574256294765284528457465783:
             "a30cb8a201eec96790acd4057d8b5de0",
             122390254752894527495027528529045974590245949201071407541071504574910:
             "bde13a1a7d7af2e3d3af1650e3af5d2e",
             -2898275276148014718015745618934189374190558956134871589634510914871105:
             "8aae8e41e85aa776b435dddefd741b6a",
     }.items():
         self.assertEqual(CustomHasherMixin.md5_hasher(data), expected_hash)
示例#11
0
    def test_uninitialized_class_dict_hashing(self):
        """
        Hashes just class attributes (input via cls.__dict__)
        Recursively includes all public methods and class attributes
        """

        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            expected_final_hash = "c7317170afd08252742af30eb98fe2d3"
            with self.subTest():
                self.assertEqual(
                    CustomHasherMixin.custom_hasher(_Test123.__dict__),
                    expected_final_hash,
                )

            # internal behavior
            # hash class dict -> hash dict
            self.maxDiff = None
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {_Test123.__dict__}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'dict'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: ('random_attribute', 'abc')",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: random_attribute",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 2a0611fe4463747f0ec29cd5ad5664ef, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: abc",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 900150983cd24fb0d6963f7d28e17f72, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: a5cccdfa42200d663c4f62f18fe22af7, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: ('fancy_method', {_Test123.fancy_method})",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: fancy_method",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 702b2a8795c39644af3dfc8ad728f918, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {_Test123.fancy_method}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'function'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input:     def fancy_method(self):\n        pass\n",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: ee31cc150ab1b82f7cd90ee978eb4970, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: ee31cc150ab1b82f7cd90ee978eb4970, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: a32e10936ab14ba8a9afa0527229fb7f, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ],
            )
示例#12
0
    def test_uninitialized_class_hashing(self):
        """
        Hashes the repr(cls) for initialized objects
        """

        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            hash_object = _Test123
            self.maxDiff = None

            # results are sensitive to entrypoint (relative path names)
            if __name__ == "simpleml.tests.unit.test_hashing":
                # entry from loader
                # input/output
                expected_final_hash = "efc89d254a441c047df389223d0f14fc"
                expected_logs = [
                    "DEBUG:simpleml.persistables.hashing:Hashing input: <class 'simpleml.tests.unit.test_hashing._Test123'>",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'type'>",
                    "WARNING:simpleml.persistables.hashing:Hashing class import path for <class 'simpleml.tests.unit.test_hashing._Test123'>, if a fully qualified import path is not used, calling again from a different location will yield different results!",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: simpleml.tests.unit.test_hashing._Test123",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ]

            elif __name__ == "__main__":
                # entry from this file
                # input/output
                expected_final_hash = "1ec00bc22a3c72500ab551cbb2f9d520"
                expected_logs = [
                    "DEBUG:simpleml.persistables.hashing:Hashing input: <class '__main__._Test123'>",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'type'>",
                    "WARNING:simpleml.persistables.hashing:Hashing class import path for <class '__main__._Test123'>, if a fully qualified import path is not used, calling again from a different location will yield different results!",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: __main__._Test123",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ]

            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(hash_object),
                                 expected_final_hash)

            self.assertEqual(logs.output, expected_logs)
示例#13
0
    def test_empty_pandas_dataframe_hashing(self):
        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            data = pd.DataFrame()
            expected_final_hash = 0
            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                 expected_final_hash)

            # internal behavior
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'pandas.core.frame.DataFrame'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'int'>",
                ],
            )
示例#14
0
    def test_int_hashing(self):
        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            data = 2
            expected_final_hash = "4753b753264f02d409ef7e2fa734d1e5"
            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                 expected_final_hash)

            # internal behavior
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                    f"DEBUG:simpleml.persistables.hashing:hash type: {type(data)}",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ],
            )
示例#15
0
    def test_string_hashing(self):
        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            data = "a"
            expected_final_hash = "0cc175b9c0f1b6a831c399e269772661"
            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                 expected_final_hash)

            # internal behavior
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                    f"DEBUG:simpleml.persistables.hashing:hash type: {type(data)}",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ],
            )
示例#16
0
    def test_none_hashing(self):
        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            data = None
            expected_final_hash = -12345678987654321
            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                 expected_final_hash)

            # internal behavior
            # hash None
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'NoneType'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'int'>",
                ],
            )
示例#17
0
    def test_lambda_hashing(self):
        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            def data():
                return 0

            expected_final_hash = "edf9b34707b6a63fd5ec95017e690f8f"
            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                 expected_final_hash)

            # internal behavior
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'function'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input:             def data():\n                return 0\n",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ],
            )
示例#18
0
 def test_other_dtype_error(self):
     for dtype in ({}, [], set(), pd.DataFrame, None):
         with self.assertRaises(ValueError):
             CustomHasherMixin.md5_hasher(dtype)
示例#19
0
    def test_complex_list_hashing(self):
        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            data = [
                "a",
                2,
                ["b", 3],
                {
                    "d": 4
                },
                lambda: 0,
                pd.Series(["a"]),
                pd.DataFrame([1]),
            ]
            expected_final_hash = "2be1e4c1f34ee1614844b6b5130052d0"
            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                 expected_final_hash)

            # internal behavior
            # hash list -> hash items in list
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'list'>",
                    # primitives
                    "DEBUG:simpleml.persistables.hashing:Hashing input: a",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 0cc175b9c0f1b6a831c399e269772661, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: 2",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 4753b753264f02d409ef7e2fa734d1e5, <class 'str'>",
                    # simple containers
                    "DEBUG:simpleml.persistables.hashing:Hashing input: ['b', 3]",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'list'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: b",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 92eb5ffee6ae2fec3ad71c777531578f, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: 3",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: c01ef2d65e504ea354c5bf4f5b6f6329, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: bf00e62763be22f17074498f35a68302, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: {'d': 4}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'dict'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: ('d', 4)",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: d",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 8277e0910d750195b448797616e091ad, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: 4",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 038835f45126b13749d59afa4382ec30, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 2bbf775e58a10239cb79016c7ae0ec92, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 63c3302ff7ac527023a43dd85cbb92e1, <class 'str'>",
                    # functions
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data[4]}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'function'>",
                    # source inspection pulls the line the function is defined on with all whitespace
                    # depending on source, this could be more variables than just the function
                    "DEBUG:simpleml.persistables.hashing:Hashing input:                 lambda: 0,\n",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 7c7ecb893a2bdd05739b7fc600fda7e4, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 7c7ecb893a2bdd05739b7fc600fda7e4, <class 'str'>",
                    # data
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data[5]}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'pandas.core.series.Series'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: -4496393130729816112, <class 'int'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data[6]}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'pandas.core.frame.DataFrame'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: -7087755961261762286, <class 'int'>",
                    # Final
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ],
            )
示例#20
0
    def test_initialized_class_hashing(self):
        """
        Hashes the initialized object as (name, __dict__)
        """

        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            hash_object = _Test123()
            self.maxDiff = None

            # results are sensitive to entrypoint (relative path names)
            if __name__ == "simpleml.tests.unit.test_hashing":
                # entry from loader
                # input/output
                expected_final_hash = "8e8c5f11154ccf2eda948ab98f468bf9"
                expected_logs = [
                    "DEBUG:simpleml.persistables.hashing:Hashing input: pretty repr of test class",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'simpleml.tests.unit.test_hashing._Test123'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: (<class 'simpleml.tests.unit.test_hashing._Test123'>, {})",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: <class 'simpleml.tests.unit.test_hashing._Test123'>",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'type'>",
                    "WARNING:simpleml.persistables.hashing:Hashing class import path for <class 'simpleml.tests.unit.test_hashing._Test123'>, if a fully qualified import path is not used, calling again from a different location will yield different results!",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: simpleml.tests.unit.test_hashing._Test123",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: efc89d254a441c047df389223d0f14fc, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: efc89d254a441c047df389223d0f14fc, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: {}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'dict'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: d41d8cd98f00b204e9800998ecf8427e, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ]

            elif __name__ == "__main__":
                # entry from this file
                # input/output
                expected_final_hash = "0399fcca26cf14ec1b3e31b69ca2397e"
                expected_logs = [
                    "DEBUG:simpleml.persistables.hashing:Hashing input: pretty repr of test class",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class '__main__._Test123'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: (<class '__main__._Test123'>, {})",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: <class '__main__._Test123'>",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'type'>",
                    "WARNING:simpleml.persistables.hashing:Hashing class import path for <class '__main__._Test123'>, if a fully qualified import path is not used, calling again from a different location will yield different results!",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: __main__._Test123",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 1ec00bc22a3c72500ab551cbb2f9d520, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 1ec00bc22a3c72500ab551cbb2f9d520, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: {}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'dict'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: d41d8cd98f00b204e9800998ecf8427e, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ]

            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(hash_object),
                                 expected_final_hash)

            self.assertEqual(logs.output, expected_logs)
示例#21
0
    def test_complex_dict_hashing(self):
        with self.assertLogs(logger="simpleml.persistables.hashing",
                             level="DEBUG") as logs:
            # input/output
            data = {
                "a": 2,
                "b": ["b", 3],
                "c": {
                    "d": 4
                },
                "d": lambda: 0,
                "e": pd.Series(["a"]),
                "f": pd.DataFrame([1]),
            }

            expected_final_hash = "53c5ed97bbc39773039fee067bbaf154"
            with self.subTest():
                self.assertEqual(CustomHasherMixin.custom_hasher(data),
                                 expected_final_hash)

            # internal behavior
            # hash dict -> hash items in dict
            self.assertEqual(
                logs.output,
                [
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'dict'>",
                    # primitives
                    "DEBUG:simpleml.persistables.hashing:Hashing input: ('a', 2)",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: a",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 0cc175b9c0f1b6a831c399e269772661, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: 2",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 4753b753264f02d409ef7e2fa734d1e5, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: ac4e6d00fdf03922ad2785e91a749963, <class 'str'>",
                    # simple containers
                    "DEBUG:simpleml.persistables.hashing:Hashing input: ('b', ['b', 3])",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: b",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 92eb5ffee6ae2fec3ad71c777531578f, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: ['b', 3]",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'list'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: b",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 92eb5ffee6ae2fec3ad71c777531578f, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: 3",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: c01ef2d65e504ea354c5bf4f5b6f6329, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: bf00e62763be22f17074498f35a68302, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 89204e715c58f21a1cb85ee468e932f6, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: ('c', {'d': 4})",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: c",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 4a8a08f09d37b73795649038408b5f33, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: {'d': 4}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'dict'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: ('d', 4)",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: d",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 8277e0910d750195b448797616e091ad, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: 4",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 038835f45126b13749d59afa4382ec30, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 2bbf775e58a10239cb79016c7ae0ec92, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 63c3302ff7ac527023a43dd85cbb92e1, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: bb623855eb965681e3be391b1d01df2d, <class 'str'>",
                    # functions
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: ('d', {data['d']})",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: d",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 8277e0910d750195b448797616e091ad, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data['d']}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'function'>",
                    # source inspection pulls the line the function is defined on with all whitespace
                    # depending on source, this could be more variables than just the function
                    """DEBUG:simpleml.persistables.hashing:Hashing input:                 "d": lambda: 0,\n""",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 8ea258d3c421cfc77ddf3c8665388147, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 8ea258d3c421cfc77ddf3c8665388147, <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 740a5a62af87a72436dab3e4c8e50807, <class 'str'>",
                    # data
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: ('e', {data['e']})",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: e",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: e1671797c52e15f763380b45e841ec32, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data['e']}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'pandas.core.series.Series'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: -4496393130729816112, <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 389d9f8d08ee8620fe0eeb9c55228418, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: ('f', {data['f']})",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'tuple'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing input: f",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'str'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: 8fa14cdd754f91cc6554c9e71929cce7, <class 'str'>",
                    f"DEBUG:simpleml.persistables.hashing:Hashing input: {data['f']}",
                    "DEBUG:simpleml.persistables.hashing:hash type: <class 'pandas.core.frame.DataFrame'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: -7087755961261762286, <class 'int'>",
                    "DEBUG:simpleml.persistables.hashing:Hashing output: f91de4d68dee99f575029a1ee9a7a265, <class 'str'>",
                    # Final
                    f"DEBUG:simpleml.persistables.hashing:Hashing output: {expected_final_hash}, <class 'str'>",
                ],
            )