Exemplo n.º 1
0
    def test_conditional2(self):
        rnd = random()

        def get():
            return rnd.randint(1, 100)

        size = 1000

        conditions = (ChainedCondition().if_then(
            lambda x: x <= 10,
            lambda x: 10).if_then(lambda x: x <= 20, lambda x: 20).if_then(
                lambda x: x <= 30, lambda x: 30).otherwise(lambda x: -1))

        out = (Stream.from_supplier(get).limit(size).conditional(
            conditions).collect(GroupingBy(identity, Counting())))

        out_target = defaultdict(int)
        rnd.reset()

        for _ in range(size):
            e = get()

            if e <= 10:
                k = 10
            elif e <= 20:
                k = 20
            elif e <= 30:
                k = 30
            else:
                k = -1

            out_target[k] += 1

        self.assertDictEqual(out, out_target)
Exemplo n.º 2
0
    def test_4(self):
        size = 10
        eps = 1e-9

        class Data:
            def __init__(self, e):
                self._e = e

            def e(self):
                return self._e

            def __eq__(self, other) -> bool:
                return abs(self._e - other._e) <= eps

        rnd = random()

        out = (Stream.from_supplier(rnd.random).limit(size).map(Data).sort(
            key=Data.e, reverse=True).collect(ToList()))

        rnd.reset()

        out_target = sorted((Data(rnd.random()) for _ in range(size)),
                            key=Data.e,
                            reverse=True)

        for o, t in zip(out, out_target):
            with self.subTest(o=o):
                self.assertAlmostEqual(o, t)
Exemplo n.º 3
0
    def test_1(self):
        rnd = random()

        data = rnd.int_range(1, 100, size=1000)

        def mean(es):
            return sum(es) / len(es)

        window_size = 4

        out = Stream(data).window_function(mean, window_size).collect(ToList())

        out_target = []

        chunk = data[:window_size]

        out_target.append(mean(chunk))

        for e in data[window_size:]:
            chunk = chunk[1:] + [e]
            out_target.append(mean(chunk))

        for o, t in zip(out, out_target):
            with self.subTest():
                self.assertAlmostEqual(o, t, delta=1e-8)
Exemplo n.º 4
0
    def test_1(self):
        rnd = random()
        a, b, size = 0, 10, 10
        chunk_size = 3

        data = rnd.int_range(a, b, size=size)
        itr = iter(data.copy())

        for i in range(a, b, chunk_size):
            chunk = get_chunk(itr, chunk_size, list)
            self.assertListEqual(chunk, data[i:i + chunk_size])
Exemplo n.º 5
0
    def test_1a(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        data = rnd.int_range(start, end, size=size)
        element_frequency = Stream(data).collect(
            GroupingBy(identity, Counting()))

        out_target = Counter(data)
        self.assertDictEqual(element_frequency, out_target)
Exemplo n.º 6
0
    def test_5(self):

        ctx = Context(prec=40)

        def dot_product(v1, v2):
            l1, l2 = len(v1), len(v2)

            assert l1 == l2, "dimension mismatch; v1's dim: {} and v2's dim: {}".format(
                l1, l2)
            assert v1 and v2, 'each vector must have at least one element.'

            out = 0

            for x, y in zip(v1, v2):
                out += x * y

            return out

        class WeightedAverage:
            def __init__(self, weight: tuple):
                assert sum(weight) == 1, 'weight array is not normalised'

                self._weight = weight

            def __call__(self, es: tuple):
                return dot_product(self._weight, es)

        # -------------------------------------------------------------------
        data = tuple(
            D(e, ctx) for e in random().float_range(1, 100, size=1000))
        # -------------------------------------------------------------------

        alpha = D(0.1, ctx)
        window_size = 10

        _sum = alpha * (1 - alpha**window_size) / (1 - alpha)
        weight = tuple(alpha**n / _sum
                       for n in range(1, window_size +
                                      1))  # making sum(weight) == 1

        wa = WeightedAverage(weight)

        # -------------------------------------------------------------------
        out = Stream(data).window_function(wa, window_size).collect(ToList())
        # -------------------------------------------------------------------

        chunk = data[:window_size]

        self.assertEqual(out[0], dot_product(chunk, weight))

        for n, e in enumerate(data[window_size:], start=1):
            chunk = chunk[1:] + (e, )
            self.assertEqual(out[n], dot_product(chunk, weight))
Exemplo n.º 7
0
    def test_3(self):
        rnd = random()
        data = rnd.int_range(1, 100, size=10)

        def mod_5(x: int):
            return x % 5

        # We will be comparing result obtained by reversing comparator in MaxBy
        # so that we get "min" in each bucket created by "group by" operation; and
        # then comparing this result with the result obtained by MinBy.

        bkt_min = Stream(data).collect(
            GroupingBy(mod_5, MaxBy(comparing(lambda x: -x))))
        bkt_min_target = Stream(data).collect(GroupingBy(mod_5, MinBy()))

        self.assertDictEqual(bkt_min, bkt_min_target)
Exemplo n.º 8
0
    def test_1(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        data = rnd.int_range(start, end, size=size)

        # finding frequency of integer numbers generated by random source "rnd".
        # To get this, grouping by elements on their value then collecting them in a list
        # and then finding number of elements in list.
        # For better method see "test_1a"
        element_frequency = Stream(data).collect(
            GroupingBy(identity, CollectAndThen(ToList(), len)))

        out_target = Counter(data)
        self.assertDictEqual(element_frequency, out_target)
Exemplo n.º 9
0
    def test_2(self):
        size = 1000

        rnd = random()

        square = lambda x: x**2

        out = (Stream.from_supplier(rnd.random).limit(size).map(square).sort(
            reverse=True).collect(ToList()))

        rnd.reset()

        out_target = sorted((square(rnd.random()) for _ in range(size)),
                            reverse=True)
        for o, t in zip(out, out_target):
            with self.subTest(o=o):
                self.assertAlmostEqual(o, t, delta=1e-9)
Exemplo n.º 10
0
    def test_4(self):
        rnd = random()
        data = rnd.int_range(1, 100, size=10)

        def mod_5(x: int):
            return x % 5

        bkt_min = Stream(data).collect(GroupingBy(mod_5, MinBy()))

        temp = defaultdict(list)

        for e in data:
            temp[mod_5(e)].append(e)

        out_target = {k: Optional(min(v)) for k, v in temp.items()}

        self.assertDictEqual(bkt_min, out_target)
Exemplo n.º 11
0
    def test_4(self):
        data = random().int_range(1, 100, size=200)

        class Statistic:
            def __init__(self):
                self._mean = 0
                self._mean_sqr = 0  # mean of squares
                self._n = 0

            @property
            def mean(self):
                return self._mean

            @property
            def std(self):
                return (self._mean_sqr - self.mean**2)**0.5

            def __call__(self, es: list):
                n = self._n
                e = es[-1]

                self._mean = (self._mean * n + e) / (n + 1)
                self._mean_sqr = (self._mean_sqr * n + e**2) / (n + 1)

                self._n = n + 1
                return dict(mean=self.mean, std=self.std)

        stat = Statistic()

        out: List[dict] = Stream(data).window_function(stat,
                                                       None).collect(ToList())

        eps = 1e-9

        data_holder = []

        for o, d in zip(out, data):
            data_holder.append(d)

            with self.subTest():
                mu = mean(data_holder)
                self.assertAlmostEqual(o['mean'], mu, delta=eps)
                self.assertAlmostEqual(o['std'],
                                       pstdev(data_holder, mu=mu),
                                       delta=eps)
Exemplo n.º 12
0
    def test_if_else(self):
        rnd = random()

        def get():
            return rnd.randint(1, 100)

        size = 1000

        out = (Stream.from_supplier(get).limit(size).if_else(
            lambda x: x < 50, lambda x: 0,
            lambda x: 1).collect(GroupingBy(identity, Counting())))

        out_target = defaultdict(int)

        rnd.reset()

        for _ in range(size):
            out_target[0 if get() < 50 else 1] += 1

        self.assertDictEqual(out, out_target)
Exemplo n.º 13
0
    def test_2(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        def mod_10(x):
            return x % 10

        data = rnd.int_range(start, end, size=size)

        # Grouping elements on the basis of their remainder when dividing them by 10 and then
        # finding distinct elements in each "bucket"
        distinct_elements = Stream(data).collect(GroupingBy(mod_10, ToSet()))

        out_target = defaultdict(set)

        for e in data:
            out_target[mod_10(e)].add(e)

        self.assertDictEqual(distinct_elements, out_target)
Exemplo n.º 14
0
    def test_4(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        def mod_10(x):
            return x % 10

        data = rnd.int_range(start, end, size=size)

        # Grouping elements on the basis of their remainder when dividing them by 10 and then
        # finding sum of elements in each bucket.
        out = Stream(data).collect(GroupingBy(mod_10, Summing()))

        out_target = defaultdict(int)

        for e in data:
            out_target[mod_10(e)] += e

        self.assertDictEqual(out, out_target)
Exemplo n.º 15
0
    def test_6(self):
        rnd = random()

        start, end = 1, 10000
        size = 1000

        def mod_100(x):
            return x % 100

        data = rnd.int_range(start, end, size=size)
        # finding distinct element count when grouped by their remainder when divided by 100.
        element_distinct_count = Stream(data).collect(
            GroupingBy(mod_100, CollectAndThen(ToSet(), len)))

        temp = defaultdict(set)

        for e in data:
            temp[mod_100(e)].add(e)

        out_target = {k: len(es) for k, es in temp.items()}

        self.assertDictEqual(element_distinct_count, out_target)
Exemplo n.º 16
0
    def test_conditional1(self):
        rnd = random()

        def get():
            return rnd.randint(1, 100)

        size = 1000

        conditions = (ChainedCondition().if_then(
            lambda x: x <= 10,
            lambda x: 10).if_then(lambda x: x <= 20, lambda x: 20).if_then(
                lambda x: x <= 30, lambda x: 30).done())

        out = (Stream.from_supplier(get).limit(size).conditional(
            conditions).mapping(identity,
                                lambda x: 1,
                                resolve=lambda x, y: x + y))

        out_target = defaultdict(int)
        rnd.reset()

        for _ in range(size):
            e = get()
            k = None

            if e <= 10:
                k = 10
            elif e <= 20:
                k = 20
            elif e <= 30:
                k = 30
            else:
                k = e

            out_target[k] += 1

        self.assertDictEqual(out, out_target)
Exemplo n.º 17
0
    def test_8(self):
        rnd = random()
        data_size = 1000
        countries = tuple('ABC')
        sex = ('M', 'F')

        class Person:
            def __init__(self, country, state, age, sex):
                self.country = country
                self.state = state
                self.age = age
                self.sex = sex

        ps = [
            Person(rnd.choice(countries), rnd.randrange(1, 4),
                   rnd.randrange(0, 60), rnd.choice(sex))
            for _ in range(data_size)
        ]
        # Counting Person on the basis of his/her country, then state
        # then on the basis of M/F.
        collector = GroupingBy(
            attrgetter('country'),
            GroupingBy(attrgetter('state'),
                       GroupingBy(attrgetter('sex'), Counting())))

        def _filter(x: Person):
            return 20 <= x.age <= 50  # considering person of age between 20 and 50 (both inclusive)

        out = Stream(ps).filter(_filter).collect(collector)

        out_target = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

        for p in ps:
            if _filter(p):
                out_target[p.country][p.state][p.sex] += 1

        self.assertDictEqual(out, out_target)
Exemplo n.º 18
0
    def test_2(self):
        class Data:
            def __init__(self, num):
                self._num = num

            def __str__(self) -> str:
                return '[' + str(self._num) + ']'

            def __repr__(self):
                return str(self)

            def __eq__(self, other: 'Data'):
                return self._num == other._num

        rnd = random()

        data = [Data(x) for x in rnd.int_range(1, 100, size=10)]

        def mod_5(x: Data):
            return x._num % 5

        comp_key = attrgetter('_num')

        bkt_max = Stream(data).collect(
            GroupingBy(mod_5, MaxBy(comparing(comp_key))))
        temp = defaultdict(list)

        for e in data:
            temp[mod_5(e)].append(e)

        out_target = {
            k: Optional(max(v, key=comp_key))
            for k, v in temp.items()
        }

        self.assertDictEqual(bkt_max, out_target)
Exemplo n.º 19
0
    def test_4a(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        def mod_10(x):
            return x % 10

        data = rnd.int_range(start, end, size=size)

        # Grouping elements on the basis of their remainder when dividing them by 10 and then
        # finding sum of distinct elements in each bucket.
        out = Stream(data).collect(
            GroupingBy(mod_10, CollectAndThen(ToSet(), sum)))

        temp = defaultdict(set)

        for e in data:
            temp[mod_10(e)].add(e)

        out_target = {bkt: sum(es) for bkt, es in temp.items()}

        self.assertDictEqual(out, out_target)
Exemplo n.º 20
0
    def test_5(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        def mod_10(x):
            return x % 10

        def square(x):
            return x**2

        data = rnd.int_range(start, end, size=size)

        # finding sum of squares of elements grouped by remainder when divided by 10.
        out = Stream(data).collect(
            GroupingBy(mod_10, Mapping(square, Summing())))

        out_target = defaultdict(int)

        for e in data:
            out_target[mod_10(e)] += square(e)

        self.assertDictEqual(out, out_target)
Exemplo n.º 21
0
 def setUp(self):
     self.rnd = random()