Exemplo n.º 1
0
 def dispatch(seq):
     vbuf, wbuf = [], []
     for (n, v) in seq:
         if n == 1:
             vbuf.append(v)
         elif n == 2:
             wbuf.append(v)
     return (ResultIterable(vbuf), ResultIterable(wbuf))
Exemplo n.º 2
0
    def groupByKey(self, numPartitions=None):
        """
        Return a new DStream which contains group the values for each key in the
        DStream into a single sequence.
        Hash-partitions the resulting RDD with into numPartitions partitions in
        the DStream.

        Note: If you are grouping in order to perform an aggregation (such as a
        sum or average) over each key, using reduceByKey will provide much
        better performance.

        """
        def createCombiner(x):
            return [x]

        def mergeValue(xs, x):
            xs.append(x)
            return xs

        def mergeCombiners(a, b):
            a.extend(b)
            return a

        return self.combineByKey(
            createCombiner, mergeValue, mergeCombiners,
            numPartitions).mapValues(lambda x: ResultIterable(x))
Exemplo n.º 3
0
    def groupByKey(self, numPartitions=None):
        """
        Group the values for each key in the RDD into a single sequence.
        Hash-partitions the resulting RDD with into numPartitions partitions.

        >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
        >>> map((lambda (x,y): (x, list(y))), sorted(x.groupByKey().collect()))
        [('a', [1, 1]), ('b', [1])]
        """

        def createCombiner(x):
            return [x]

        def mergeValue(xs, x):
            xs.append(x)
            return xs

        def mergeCombiners(a, b):
            return a + b

        return self.combineByKey(createCombiner, mergeValue, mergeCombiners,
                numPartitions).mapValues(lambda x: ResultIterable(x))
Exemplo n.º 4
0
 def dispatch(seq):
     bufs = [[] for _ in range(rdd_len)]
     for n, v in seq:
         bufs[n].append(v)
     return tuple(ResultIterable(vs) for vs in bufs)
Exemplo n.º 5
0
 def test_derivative_should_work(self):
     new_values = MonascaDerivativeLDP.derivative(
         ResultIterable(self.all_metrics))
     new_values = self._values(new_values)
     self.assertEqual(new_values,
                      [0.5, 0.25, -0.5, -0.25, 1.0, 1.5])
Exemplo n.º 6
0
 def test_derivative_should_remove_duplicate(self):
     new_values = MonascaDerivativeLDP.derivative(
         ResultIterable(self.all_metrics[0:4]))
     new_values = self._values(new_values)
     self.assertEqual(new_values,
                      [0.5, 0.5])
Exemplo n.º 7
0
 def test_derivative_should_work_on_first_and_last_values(self):
     new_values = MonascaDerivativeLDP.derivative(
         ResultIterable(self.all_metrics[0:2]))
     new_values = self._values(new_values)
     self.assertEqual(new_values,
                      [0.5, 0.5])
Exemplo n.º 8
0
 def test_derivative_should_do_nothing_with_1_value(self):
     self.assertEqual(MonascaDerivativeLDP.derivative(
         ResultIterable(self.all_metrics[0:1])),
         [])