def test_to_int(value): if isinstance(value, int): assert XFastTrie._to_int(value, max_trie_entry_size) == value elif isinstance(value, bytes): value_int = unpack( ">Q", value.rjust((maxsize.bit_length() + 1) // 8, b'\x00'))[0] assert XFastTrie._to_int(value, max_trie_entry_size) == value_int
def successor(self, value: Union[int, bytes]) -> Optional[int]: """ Find the smallest value in the trie strictly greater than the given value, if it exists :param value: The value to find the successor of :return: The successor of the given value, or None if it doesn't exist """ value = XFastTrie._to_int(value, self._maxlen) subtree, rep_node = self._get_value_subtree(value) # subtree should be None only if the trie is empty if subtree is None and self._count == 0: raise ValueError("No values exist in trie") elif value >= cast(int, self._max) or self._max is None: return None elif value < cast(int, self._min): return self._min subtree = cast(SortedList, subtree) rep_node = cast(TrieNode, rep_node) if max(subtree) <= value: subtree = self._subtrees[rep_node.succ.value] return cast(int, subtree[subtree.bisect_right(value)])
def predecessor(self, value: Union[int, bytes]) -> Optional[int]: """ Find the largest value in the trie strictly less than the given value, if it exists :param value: The value to find the predecessor of :return: The predecessor of the given value, or None if it doesn't exist """ value = XFastTrie._to_int(value, self._maxlen) subtree, rep_node = self._get_value_subtree(value) # subtree should be None only if the trie is empty if subtree is None and self._count == 0: raise ValueError("No values exist in trie") elif value <= cast(int, self._min) or self._min is None: return None elif value > cast(int, self._max): return self._max subtree = cast(SortedList, subtree) rep_node = cast(TrieNode, rep_node) if min(subtree) >= value: subtree = self._subtrees[rep_node.pred.value] return cast(int, subtree[subtree.bisect_left(value) - 1])
def insert(self, value: Union[int, bytes]) -> None: """ Insert a value into the trie :param value: The value to insert into the trie """ value = XFastTrie._to_int(value, self._maxlen) subtree, rep_node = self._get_value_subtree(value, True) subtree = cast(SortedList, subtree) rep_node = cast(TrieNode, rep_node) # Do nothing if the value is already in the trie if value in subtree: return if self._max is None or value > self._max: self._max = value if self._min is None or value < self._min: self._min = value subtree.add(value) if len(subtree) > self._max_subtree_size: # Out with the old del self._subtrees[rep_node.value] self._partitions -= rep_node.value # In with the new for tree in self._split_subtree(subtree, self._maxlen): rep = self._calculate_representative(max(tree), self._maxlen) self._partitions += rep self._subtrees[rep] = tree self._count += 1
def test_to_int_exceptions(value): if isinstance(value, Integral): with pytest.raises(ValueError): XFastTrie._to_int(value, max_trie_entry_size) elif isinstance(value, bytes): with pytest.raises(ValueError): XFastTrie._to_int(value, max_trie_entry_size) else: with pytest.raises(TypeError): XFastTrie._to_int(value, max_trie_entry_size)
def test_get_closest_ancestor(entries, test_values): t = XFastTrie(max_trie_entry_size) for entry in entries: t += entry entries = [t._to_int(e, t._maxlen) for e in entries] for val in test_values: ancestor, level = t._get_closest_ancestor(val) if val in entries: assert ancestor.leaf assert ancestor.value == val else: test_bits = format(val, 'b').zfill(t._maxlen)[:level + 2] assert not ancestor.leaf assert not ancestor.left.value_bits.startswith(test_bits) assert not ancestor.right.value_bits.startswith(test_bits)
def test_get_closest_leaf(entries, test_values): t = XFastTrie(max_trie_entry_size) for entry in entries: t += entry entries = [t._to_int(e, t._maxlen) for e in entries] for val in test_values: neighbor = t._get_closest_leaf(val) assert neighbor.leaf if val in entries: assert neighbor.value == val else: if neighbor.pred is not None: assert abs(neighbor.value - val) <= abs(neighbor.pred.value - val) if neighbor.succ is not None: assert abs(neighbor.value - val) <= abs(neighbor.succ.value - val)
def __lt__(self, value: Union[int, bytes]) -> Optional[int]: value = XFastTrie._to_int(value, self._maxlen) return self.predecessor(value)
def __isub__(self, value: Union[int, bytes]) -> "YFastTrie": value = XFastTrie._to_int(value, self._maxlen) self.remove(value) return self
def __iadd__(self, value: Union[int, bytes]) -> "YFastTrie": value = XFastTrie._to_int(value, self._maxlen) self.insert(value) return self
def __contains__(self, value: Union[int, bytes]) -> bool: value = XFastTrie._to_int(value, self._maxlen) subtree, _ = self._get_value_subtree(value) return subtree is not None and value in subtree
def remove(self, value: Union[int, bytes]) -> None: """ Remove the given value from the trie :param value: The value to remove from the trie """ value = XFastTrie._to_int(value, self._maxlen) subtree, rep_node = self._get_value_subtree(value) # There should be no subtree only if the given value is not in the trie if subtree is None or value not in subtree: raise ValueError("Value does not exist in trie") subtree = cast(SortedList, subtree) rep_node = cast(TrieNode, rep_node) if self._min == value: if len(subtree) > 1: min_succ = subtree[1] else: min_succ = self.successor(value) else: min_succ = -1 if self._max == value: if len(subtree) > 1: max_pred = subtree[-2] else: max_pred = self.predecessor(value) else: max_pred = -1 if min_succ != -1: self._min = min_succ if max_pred != -1: self._max = max_pred subtree.remove(value) if len(subtree) == 0: del self._subtrees[rep_node.value] self._partitions -= rep_node.value elif len(subtree) < self._min_subtree_size and len( self._partitions) > 1: if rep_node.pred is not None: left_rep = rep_node.pred right_rep = rep_node else: left_rep = rep_node right_rep = rep_node.succ left_tree = self._subtrees[left_rep.value] right_tree = self._subtrees[right_rep.value] # Out with the old del self._subtrees[left_rep.value] del self._subtrees[right_rep.value] self._partitions -= left_rep.value self._partitions -= right_rep.value # In with the new tree: SortedList for tree in filter( None, self._merge_subtrees(left_tree, right_tree, 2 * self._maxlen)): rep = self._calculate_representative(max(tree), self._maxlen) self._partitions += rep self._subtrees[rep] = tree self._count -= 1