Пример #1
0
    def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET):

        result = defaultdict(int)

        for i, item in enumerate(obj):
            if self._skip_this(item, parent="{}[{}]".format(parent, i)):
                continue

            item_id = get_id(item)
            if parents_ids and item_id in parents_ids:
                continue

            parents_ids_added = add_to_frozen_set(parents_ids, item_id)
            hashed = self._hash(item, parent=parent, parents_ids=parents_ids_added)
            # counting repetitions
            result[hashed] += 1

        if self.ignore_repetition:
            result = list(result.keys())
        else:
            result = [
                '{}|{}'.format(i, v) for i, v in result.items()
            ]

        result = sorted(map(str, result))  # making sure the result items are string and sorted so join command works.
        result = ','.join(result)
        result = KEY_TO_VAL_STR.format(type(obj).__name__, result)

        return result
Пример #2
0
    def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribute=False, original_type=None):

        result = []

        key_text = "%s{}".format(INDEX_VS_ATTRIBUTE[print_as_attribute])
        for key, item in obj.items():
            key_formatted = "'%s'" % key if not print_as_attribute and isinstance(key, strings) else key
            key_in_report = key_text % (parent, key_formatted)

            key_hash = self._hash(key, parent=key_in_report, parents_ids=parents_ids)
            item_id = get_id(item)
            if (parents_ids and item_id in parents_ids) or self._skip_this(item, parent=key_in_report):
                continue
            parents_ids_added = add_to_frozen_set(parents_ids, item_id)
            hashed = self._hash(item, parent=key_in_report, parents_ids=parents_ids_added)
            hashed = KEY_TO_VAL_STR.format(key_hash, hashed)
            result.append(hashed)

        result.sort()
        result = ';'.join(result)
        if print_as_attribute:
            type_ = original_type or type(obj)
            type_str = type_.__name__
            for type_group in self.ignore_type_in_groups:
                if self.type_check_func(type_, type_group):
                    type_str = ','.join(map(lambda x: x.__name__, type_group))
                    break
        else:
            type_str = 'dict'
        return "%s:{%s}" % (type_str, result)
Пример #3
0
 def __contains__(self, obj):
     try:
         hash(obj)
     except TypeError:
         key = get_id(obj)
     else:
         key = obj
     return super().__contains__(key)
Пример #4
0
    def __getitem__(self, obj):
        key = obj
        result = None

        try:
            result = super().__getitem__(key)
        except (TypeError, KeyError):
            key = get_id(obj)
            try:
                result = super().__getitem__(key)
            except KeyError:
                raise KeyError('{} is not one of the hashed items.'.format(obj)) from None
        return result
Пример #5
0
    def __getitem__(self, obj):
        key = obj
        result = None

        try:
            result = super().__getitem__(key)
        except (TypeError, KeyError):
            key = get_id(obj)
            try:
                result = super().__getitem__(key)
            except KeyError:
                raise KeyError(
                    '{} is not one of the hashed items.'.format(obj)) from None
        return result
Пример #6
0
    def _prep_dict(self,
                   obj,
                   parent,
                   parents_ids=EMPTY_FROZENSET,
                   print_as_attribute=False,
                   original_type=None):

        result = []
        counts = 1

        key_text = "%s{}".format(INDEX_VS_ATTRIBUTE[print_as_attribute])
        for key, item in obj.items():
            counts += 1
            # ignore private variables
            if self.ignore_private_variables and isinstance(
                    key, str) and key.startswith('__'):
                continue
            key_formatted = "'%s'" % key if not print_as_attribute and isinstance(
                key, strings) else key
            key_in_report = key_text % (parent, key_formatted)

            key_hash, _ = self._hash(key,
                                     parent=key_in_report,
                                     parents_ids=parents_ids)
            if not key_hash:
                continue
            item_id = get_id(item)
            if (parents_ids and item_id in parents_ids) or self._skip_this(
                    item, parent=key_in_report):
                continue
            parents_ids_added = add_to_frozen_set(parents_ids, item_id)
            hashed, count = self._hash(item,
                                       parent=key_in_report,
                                       parents_ids=parents_ids_added)
            hashed = KEY_TO_VAL_STR.format(key_hash, hashed)
            result.append(hashed)
            counts += count

        result.sort()
        result = ';'.join(result)
        if print_as_attribute:
            type_ = original_type or type(obj)
            type_str = type_.__name__
            for type_group in self.ignore_type_in_groups:
                if self.type_check_func(type_, type_group):
                    type_str = ','.join(map(lambda x: x.__name__, type_group))
                    break
        else:
            type_str = 'dict'
        return "{}:{{{}}}".format(type_str, result), counts
Пример #7
0
 def test_dict1(self):
     string1 = "a"
     key1 = "key1"
     obj = {key1: string1, 1: 10, 2: 20}
     expected_result = {
         1: DeepHash.sha1hex('int:1'),
         10: DeepHash.sha1hex('int:10'),
         2: DeepHash.sha1hex('int:2'),
         20: DeepHash.sha1hex('int:20'),
         key1: '1073ab6cda4b991cd29f9e83a307f34004ae9327',
         string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8',
         get_id(obj): '11e23f096df81b1ccab0c309cdf8b4ba5a0a6895'
     }
     result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)
     assert expected_result == result
Пример #8
0
 def do_list_or_tuple(self, func, func_str):
     string1 = "a"
     obj = func([string1, 10, 20])
     if func is list:
         obj_id = get_id(obj)
     else:
         obj_id = obj
     string1_prepped = prep_str(string1)
     expected_result = {
         10: 'int:10',
         20: 'int:20',
         string1: string1_prepped,
         obj_id: '{}:{},int:10,int:20'.format(func_str, string1_prepped),
     }
     result = DeepHashPrep(obj, ignore_string_type_changes=True)
     assert expected_result == result
Пример #9
0
 def test_dict_hash(self):
     string1 = "a"
     string1_prepped = prep_str(string1)
     key1 = "key1"
     key1_prepped = prep_str(key1)
     obj = {key1: string1, 1: 10, 2: 20}
     expected_result = {
         1: 'int:1',
         10: 'int:10',
         2: 'int:2',
         20: 'int:20',
         key1: key1_prepped,
         string1: string1_prepped,
         get_id(obj): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' % (key1, string1)
     }
     result = DeepHashPrep(obj, ignore_string_type_changes=True)
     assert expected_result == result
Пример #10
0
    def _prep_dict(self,
                   obj,
                   parent,
                   parents_ids=EMPTY_FROZENSET,
                   print_as_attribute=False,
                   original_type=None):

        result = []

        key_text = "%s{}".format(INDEX_VS_ATTRIBUTE[print_as_attribute])
        for key, item in obj.items():
            key_formatted = "'%s'" % key if not print_as_attribute and isinstance(
                key, strings) else key
            key_in_report = key_text % (parent, key_formatted)

            key_hash = self._hash(key,
                                  parent=key_in_report,
                                  parents_ids=parents_ids)
            item_id = get_id(item)
            if (parents_ids and item_id in parents_ids) or self._skip_this(
                    item, parent=key_in_report):
                continue
            parents_ids_added = add_to_frozen_set(parents_ids, item_id)
            hashed = self._hash(item,
                                parent=key_in_report,
                                parents_ids=parents_ids_added)
            hashed = KEY_TO_VAL_STR.format(key_hash, hashed)
            result.append(hashed)

        result.sort()
        result = ';'.join(result)
        if print_as_attribute:
            type_ = original_type or type(obj)
            type_str = type_.__name__
            for type_group in self.ignore_type_in_groups:
                if type_ in type_group:
                    type_str = ','.join(map(lambda x: x.__name__, type_group))
                    break
        else:
            type_str = 'dict'
        return "%s:{%s}" % (type_str, result)
Пример #11
0
    def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
        """The main diff method"""

        if isinstance(obj, bool):
            obj = self._prep_bool(obj)
            result = None
        else:
            result = not_hashed

        try:
            result = self[obj]
        except (TypeError, KeyError):
            pass
        else:
            return result

        if self._skip_this(obj, parent):
            return

        elif obj is None:
            result = 'NONE'

        elif isinstance(obj, strings):
            result = prepare_string_for_hashing(
                obj, ignore_string_type_changes=self.ignore_string_type_changes,
                ignore_string_case=self.ignore_string_case)

        elif isinstance(obj, numbers):
            result = self._prep_number(obj)

        elif isinstance(obj, MutableMapping):
            result = self._prep_dict(obj=obj, parent=parent, parents_ids=parents_ids)

        elif isinstance(obj, tuple):
            result = self._prep_tuple(obj=obj, parent=parent, parents_ids=parents_ids)

        elif isinstance(obj, Iterable):
            result = self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids)

        elif obj in {BoolObj.TRUE, BoolObj.FALSE}:
            result = 'bool:true' if obj is BoolObj.TRUE else 'bool:false'
        else:
            result = self._prep_obj(obj=obj, parent=parent, parents_ids=parents_ids)

        if result is not_hashed:  # pragma: no cover
            self[UNPROCESSED].append(obj)

        elif result is unprocessed:
            pass

        elif self.apply_hash:
            if isinstance(obj, strings):
                result_cleaned = result
            else:
                result_cleaned = prepare_string_for_hashing(
                    result, ignore_string_type_changes=self.ignore_string_type_changes,
                    ignore_string_case=self.ignore_string_case)
            result = self.hasher(result_cleaned)

        # It is important to keep the hash of all objects.
        # The hashes will be later used for comparing the objects.
        try:
            self[obj] = result
        except TypeError:
            obj_id = get_id(obj)
            self[obj_id] = result

        return result
Пример #12
0
 def test_prep_iterable_with_excluded_type(self):
     l1 = logging.getLogger("test")
     obj = [1, l1]
     result = DeepHashPrep(obj, exclude_types={logging.Logger})
     assert get_id(l1) not in result
Пример #13
0
 def test_prep_iterable_with_loop(self):
     obj = [1]
     obj.append(obj)
     result = DeepHashPrep(obj)
     expected_result = {get_id(obj): 'list:int:1', 1: 'int:1'}
     assert expected_result == result
Пример #14
0
 def test_prep_dic_with_loop(self):
     obj = {2: 1337}
     obj[1] = obj
     result = DeepHashPrep(obj)
     expected_result = {get_id(obj): 'dict:{int:2:int:1337}', 1: 'int:1', 2: 'int:2', 1337: 'int:1337'}
     assert expected_result == result
Пример #15
0
 def test_skip_type(self):
     l1 = logging.getLogger("test")
     obj = {"log": l1, 2: 1337}
     result = DeepHashPrep(obj, exclude_types={logging.Logger})
     assert get_id(l1) not in result
Пример #16
0
    def test_dictionary(self):

        obj = {1: 1}
        result = DeepHash(obj)
        assert set(result.keys()) == {1, get_id(obj)}
Пример #17
0
    def __init__(self,
                 obj,
                 *,
                 hashes=None,
                 exclude_types=None,
                 exclude_paths=None,
                 exclude_regex_paths=None,
                 hasher=None,
                 ignore_repetition=True,
                 significant_digits=None,
                 number_format_notation="f",
                 apply_hash=True,
                 ignore_type_in_groups=None,
                 ignore_string_type_changes=False,
                 ignore_numeric_type_changes=False,
                 ignore_type_subclasses=False,
                 ignore_string_case=False,
                 number_to_string_func=None,
                 **kwargs):
        if kwargs:
            raise ValueError(
                ("The following parameter(s) are not valid: %s\n"
                 "The valid parameters are obj, hashes, exclude_types, significant_digits, "
                 "exclude_paths, exclude_regex_paths, hasher, ignore_repetition, "
                 "number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, "
                 "ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case "
                 "number_to_string_func") % ', '.join(kwargs.keys()))
        self.obj = obj
        exclude_types = set() if exclude_types is None else set(exclude_types)
        self.exclude_types_tuple = tuple(exclude_types)  # we need tuple for checking isinstance
        self.ignore_repetition = ignore_repetition
        self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths)
        self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
        default_hasher = self.murmur3_128bit if mmh3 else self.sha256hex
        self.hasher = default_hasher if hasher is None else hasher
        hashes = hashes if hashes else {}
        self.update(hashes)
        self[UNPROCESSED] = []

        self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
        self.number_format_notation = number_format_notation
        self.ignore_type_in_groups = self.get_ignore_types_in_groups(
            ignore_type_in_groups=ignore_type_in_groups,
            ignore_string_type_changes=ignore_string_type_changes,
            ignore_numeric_type_changes=ignore_numeric_type_changes,
            ignore_type_subclasses=ignore_type_subclasses)
        self.ignore_string_type_changes = ignore_string_type_changes
        self.ignore_numeric_type_changes = ignore_numeric_type_changes
        self.ignore_string_case = ignore_string_case
        # makes the hash return constant size result if true
        # the only time it should be set to False is when
        # testing the individual hash functions for different types of objects.
        self.apply_hash = apply_hash
        self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group
        self.number_to_string = number_to_string_func or number_to_string

        self._hash(obj, parent="root", parents_ids=frozenset({get_id(obj)}))

        if self[UNPROCESSED]:
            logger.warning("Can not hash the following items: {}.".format(self[UNPROCESSED]))
        else:
            del self[UNPROCESSED]
Пример #18
0
    def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
        """The main diff method"""

        try:
            result = self[obj]
        except (TypeError, KeyError):
            pass
        else:
            return result

        result = not_hashed

        if self._skip_this(obj, parent):
            return

        elif obj is None:
            result = 'NONE'

        elif isinstance(obj, strings):
            result = prepare_string_for_hashing(
                obj, ignore_string_type_changes=self.ignore_string_type_changes,
                ignore_string_case=self.ignore_string_case)

        elif isinstance(obj, numbers):
            result = self._prep_number(obj)

        elif isinstance(obj, MutableMapping):
            result = self._prep_dict(obj=obj, parent=parent, parents_ids=parents_ids)

        elif isinstance(obj, tuple):
            result = self._prep_tuple(obj=obj, parent=parent, parents_ids=parents_ids)

        elif isinstance(obj, Iterable):
            result = self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids)

        else:
            result = self._prep_obj(obj=obj, parent=parent, parents_ids=parents_ids)

        if result is not_hashed:  # pragma: no cover
            self[UNPROCESSED].append(obj)

        elif result is unprocessed:
            pass

        elif self.apply_hash:
            if isinstance(obj, strings):
                result_cleaned = result
            else:
                result_cleaned = prepare_string_for_hashing(
                    result, ignore_string_type_changes=self.ignore_string_type_changes,
                    ignore_string_case=self.ignore_string_case)
            result = self.hasher(result_cleaned)

        # It is important to keep the hash of all objects.
        # The hashes will be later used for comparing the objects.
        try:
            self[obj] = result
        except TypeError:
            obj_id = get_id(obj)
            self[obj_id] = result

        return result
Пример #19
0
    def __init__(self,
                 obj,
                 *,
                 hashes=None,
                 exclude_types=None,
                 exclude_paths=None,
                 exclude_regex_paths=None,
                 hasher=None,
                 ignore_repetition=True,
                 significant_digits=None,
                 number_format_notation="f",
                 apply_hash=True,
                 ignore_type_in_groups=None,
                 ignore_string_type_changes=False,
                 ignore_numeric_type_changes=False,
                 ignore_type_subclasses=False,
                 ignore_string_case=False,
                 number_to_string_func=None,
                 **kwargs):
        if kwargs:
            raise ValueError(
                ("The following parameter(s) are not valid: %s\n"
                 "The valid parameters are obj, hashes, exclude_types, significant_digits, "
                 "exclude_paths, exclude_regex_paths, hasher, ignore_repetition, "
                 "number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, "
                 "ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case "
                 "number_to_string_func") % ', '.join(kwargs.keys()))
        self.obj = obj
        exclude_types = set() if exclude_types is None else set(exclude_types)
        self.exclude_types_tuple = tuple(exclude_types)  # we need tuple for checking isinstance
        self.ignore_repetition = ignore_repetition
        self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths)
        self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
        default_hasher = self.murmur3_128bit if mmh3 else self.sha256hex
        self.hasher = default_hasher if hasher is None else hasher
        hashes = hashes if hashes else {}
        self.update(hashes)
        self[UNPROCESSED] = []

        self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
        self.number_format_notation = number_format_notation
        self.ignore_type_in_groups = self.get_ignore_types_in_groups(
            ignore_type_in_groups=ignore_type_in_groups,
            ignore_string_type_changes=ignore_string_type_changes,
            ignore_numeric_type_changes=ignore_numeric_type_changes,
            ignore_type_subclasses=ignore_type_subclasses)
        self.ignore_string_type_changes = ignore_string_type_changes
        self.ignore_numeric_type_changes = ignore_numeric_type_changes
        self.ignore_string_case = ignore_string_case
        # makes the hash return constant size result if true
        # the only time it should be set to False is when
        # testing the individual hash functions for different types of objects.
        self.apply_hash = apply_hash
        self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group
        self.number_to_string = number_to_string_func or number_to_string

        self._hash(obj, parent="root", parents_ids=frozenset({get_id(obj)}))

        if self[UNPROCESSED]:
            logger.warning("Can not hash the following items: {}.".format(self[UNPROCESSED]))
        else:
            del self[UNPROCESSED]