Пример #1
0
 def test_popitem_ties(self):
     h = HeapDict()
     for i in range(N):
         h[i] = 0
     for i in range(N):
         k, v = h.popitem()
         self.assertEqual(v, 0)
         self.check_invariants(h)
Пример #2
0
    def make_data():
        pairs = [(random.random(), random.random()) for _ in range(N)]
        h = HeapDict()
        d = {}
        for k, v in pairs:
            h[k] = v
            d[k] = v

        pairs.sort(key=lambda x: x[1], reverse=True)
        return h, pairs, d
Пример #3
0
    def __init__(self, k, epsilon, delta, seed=None):
        """
        Setup a new count-min sketch with parameters num_levels, epsilon, and delta.

        The parameters epsilon and delta control the accuracy of the
        estimates of the sketch

        Cormode and Muthukrishnan prove that for an item i with count a_i, the
        estimate from the sketch a_i_hat will satisfy the relation

        a_hat_i <= a_i + epsilon * ||a||_1

        with probability at least 1 - delta, where a is the the vector of all
        all counts and ||x||_1 is the L1 norm of a vector x

        Parameters
        ----------
        k : int
            A positive integer that sets the number of top items counted
        epsilon : float
            A value in the unit interval that sets the precision of the sketch
        delta : float
            A value in the unit interval that sets the precision of the sketch

        Examples
        --------
        >>> s = FreqSketch(40, 0.005, 10**-7)

        Raises
        ------
        ValueError
            If if k is not a positive integer, or epsilon or delta are not in the unit interval.
        """

        seed = seed or 1729
        random.seed(seed)
        if k < 1:
            raise ValueError("k must be a positive integer")
        if epsilon <= 0 or epsilon >= 1:
            raise ValueError("epsilon must be between 0 and 1, exclusive")
        if delta <= 0 or delta >= 1:
            raise ValueError("delta must be between 0 and 1, exclusive")

        self.k = k
        self.width = int(math.ceil(math.exp(1) / epsilon))
        self.depth = int(math.ceil(math.log(1 / delta)))
        self.hash_function_params = [
            _generate_hash_function_params() for _ in range(self.depth)
        ]
        self.count = array.array('i',
                                 itertools.repeat(0, self.depth * self.width))
        self.heap = HeapDict()
Пример #4
0
    def __init__(self, graph: Graph, start, key=None):
        """
        Create a new DijkstraIterator object.

        :param graph: the graph to iterate over
        :param start: the first node to visit
        :param key: a function of one argument used to extract a comparison key
            to determine which node to visit first in the case of a tie (the
            "smallest" element)
        :raises ValueError: if start is not defined in graph
        """
        super().__init__(graph, start, key=key)

        self._worklist = HeapDict()
        for u in graph.nodes():
            self._worklist[u] = math.inf
        self._worklist[start] = 0
Пример #5
0
 def test_peekitem_empty(self):
     h = HeapDict()
     self.assertRaises(KeyError, h.peekitem)