def test_union_diff_partitions(self): options_left = get_default_options() options_right = get_default_options() options_left['total_partitions'] = 10 options_right['total_partitions'] = 5 left_rp = self.ctx.load("ns1", "testUniontLeft_10p_6", options=options_left).put_all([('a', 1), ('b', 4), ('d', 6), ('e', 0), ('f', 3), ('g', 12), ('h', 13), ('i', 14), ('j', 15), ('k', 16), ('l', 17)], options={"include_key": True}) right_rp = self.ctx.load("ns1", "testUniontRight_5p_6", options=options_right).put_all([('a', 2), ('c', 4), ('d', 1), ('f', 0), ('g', 1)], options={"include_key": True}) print(f'left:{get_value(left_rp)}, right:{get_value(right_rp)}') print('111', get_value(left_rp.union(right_rp, lambda v1, v2: v1 + v2))) print('222', get_value(right_rp.union(left_rp, lambda v1, v2: v1 + v2))) self.assertEqual(get_value(left_rp.union(right_rp, lambda v1, v2: v1 + v2)), [('a', 3), ('b', 4), ('c', 4), ('d', 7), ('e', 0), ('f', 3), ('g', 13), ('h', 13), ('i', 14), ('j', 15), ('k', 16), ('l', 17)]) self.assertEqual(get_value(right_rp.union(left_rp, lambda v1, v2: v1 + v2)), [('a', 3), ('b', 4), ('c', 4), ('d', 7), ('e', 0), ('f', 3), ('g', 13), ('h', 13), ('i', 14), ('j', 15), ('k', 16), ('l', 17)]) right_rp = self.ctx.load("ns1", "testUniontRight_10p_7", options=options_right).put_all([('a', 1), ('b', 4), ('d', 6), ('e', 0), ('f', 3), ('g', 12), ('h', 13), ('i', 14), ('j', 15), ('k', 16), ('l', 17)], options={"include_key": True}) left_rp = self.ctx.load("ns1", "testUniontLeft_5p_7", options=options_left).put_all([('a', 2), ('c', 4), ('d', 1), ('f', 0), ('g', 1)], options={"include_key": True}) print(f'left:{get_value(left_rp)}, right:{get_value(right_rp)}') print('333', get_value(left_rp.union(right_rp, lambda v1, v2: v1 + v2))) self.assertEqual(get_value(left_rp.union(right_rp, lambda v1, v2: v1 + v2)), [('a', 3), ('b', 4), ('c', 4), ('d', 7), ('e', 0), ('f', 3), ('g', 13), ('h', 13), ('i', 14), ('j', 15), ('k', 16), ('l', 17)])
def test_sc(self): options_left = get_default_options() options_right = get_default_options() options_left['total_partitions'] = 10 options_right['total_partitions'] = 5 left_rp = self.ctx.load(namespace="ns1", name="testSubtractLeft_10p_8", options=options_left).put_all( [ ('a', 1), ('b', 4), ('d', 6), ('e', 0), ('f', 3), ], options={"include_key": True}) right_rp = self.ctx.load(namespace="ns1", name="testSubtractRight_5p_8", options=options_right).put_all( [('a', 2), ('c', 4), ('d', 1), ('f', 0), ('g', 1)], options={"include_key": True}) print(f'left:{get_value(left_rp)}, right:{get_value(right_rp)}') print('111', get_value(left_rp.subtract_by_key(right_rp))) print('222', left_rp.subtract_by_key(right_rp).get_partitions())
def test_union(self): options = get_default_options() left_rp = self.ctx.load("ns1", "testUnionLeft123", options=options).put_all([1, 2, 3], options=options) options['include_key'] = True options['total_partitions'] = 3 right_rp = self.ctx.load("ns1", "testUnionRight123", options=options).put_all([(1, 1), (2, 2), (3, 3)]) print(list(left_rp.union(right_rp, lambda v1, v2: v1 + v2).get_all())) left_rp.destroy() right_rp.destroy() options = get_default_options() options['total_partitions'] = 3 left_rp = self.ctx.load("namespace20200102", "testUnionLeft123", options=options).put_all([1, 2, 3], options=options) print("left:", left_rp) options['include_key'] = True right_rp = self.ctx.load("namespace20200102", "testUnionRight123", options=options).put_all([(1, 1), (2, 2), (3, 3)], options=options) print("right:", right_rp) print("left:", list(left_rp.get_all())) print("right:", list(right_rp.get_all())) print(list(left_rp.union(right_rp, lambda v1, v2: v1 + v2).get_all())) left_rp.destroy() right_rp.destroy()
def test_first(self): options = get_default_options() options['keys_only'] = True options['include_key'] = False table = self.ctx.load('ns1', 'test_take', options=options).put_all(range(10), options=options) print(table.first(options=options)) self.assertEqual(table.first(options=options), 0) options_kv = get_default_options() options_kv['include_key'] = False options_kv['keys_only'] = False table = self.ctx.load('ns12020', 'test_take_kv', options=options_kv).put_all(range(10), options=options_kv) print(table.first(options=options_kv)) self.assertEqual(table.first(options=options_kv), (0, 0))
def test_take(self): options = get_default_options() options['keys_only'] = True options['include_key'] = False table = self.ctx.load('ns1', 'test_take', options=options).put_all(range(10), options=options) print(table.take(n=3, options=options)) self.assertEqual(table.take(n=3, options=options), [0, 1, 2]) options_kv = get_default_options() options_kv['keys_only'] = False options_kv['include_key'] = False table = self.ctx.load('ns1', 'test_take_kv', options=options_kv).put_all(range(10), options=options_kv) print(table.take(n=3, options=options_kv)) self.assertEqual(table.take(n=3, options=options_kv), [(0, 0), (1, 1), (2, 2)])
def test_sample(self): options = get_default_options() options['include_key'] = False rp = self.ctx.load("ns1", "testSample", options=options).put_all(range(100), options=options) self.assertEqual(6 <= rp.sample(0.1, 81).count() <= 14, True)
def test_join(self): options = get_default_options() left_rp = self.ctx.load("ns1", "testJoinLeft", options=options).put_all([('a', 1), ('b', 4), ('d', 6), ('e', 0)], options={"include_key": True}) right_rp = self.ctx.load("ns1", "testJoinRight", options=options).put_all([('a', 2), ('c', 4), ('d', 1), ('f', 0), ('g', 1)], options={"include_key": True}) print(list(left_rp.join(right_rp, lambda v1, v2: v1 + v2).get_all())) self.assertEqual(get_value(left_rp.join(right_rp, lambda v1, v2: v1 + v2)), [('a', 3), ('d', 7)]) self.assertEqual(get_value(right_rp.join(left_rp, lambda v1, v2: v1 + v2)), [('a', 3), ('d', 7)])
def test_map_partitions(self): options = get_default_options() options['total_partitions'] = 10 data = [(str(i), i) for i in range(10)] rp = self.ctx.load("ns1", "test_map_partitions", options=options).put_all( data, options={"include_key": True}) def func(iter): ret = [] for k, v in iter: ret.append((f"{k}_{v}_0", v**2)) ret.append((f"{k}_{v}_1", v**3)) return ret table = rp.map_partitions(func) self.assertEqual(table.get("6_6_0"), 36) self.assertEqual(table.get("0_0_1"), 0) self.assertEqual(table.get("1_1_0"), 1) self.assertEqual(sorted(table.get_all(), key=lambda x: x[0]), [('0_0_0', 0), ('0_0_1', 0), ('1_1_0', 1), ('1_1_1', 1), ('2_2_0', 4), ('2_2_1', 8), ('3_3_0', 9), ('3_3_1', 27), ('4_4_0', 16), ('4_4_1', 64), ('5_5_0', 25), ('5_5_1', 125), ('6_6_0', 36), ('6_6_1', 216), ('7_7_0', 49), ('7_7_1', 343), ('8_8_0', 64), ('8_8_1', 512), ('9_9_0', 81), ('9_9_1', 729)])
def test_glom(self): options = get_default_options() rp = self.ctx.load("ns1", "test_glom", options=options).put_all(range(5), options=options) print(list(rp.glom().get_all())) self.assertEqual(get_value(rp.glom()), [(4, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)])])
def test_multi_partition_reduce(self): from operator import add options = get_default_options() rp = self.ctx.load("ns1", "testMultiPartitionReduce", options=options).put_all(range(20), options=options) print(list(rp.reduce(add).get_all())) self.assertEqual(get_value(rp.reduce(add)), [(b'result', 190)])
def test_map_partitions(self): options = get_default_options() data = [(str(i), i) for i in range(10)] rp = self.ctx.load("ns1", "test_map_partitions", options=options).put_all( data, options={"include_key": True}) def func(iter): ret = [] for k, v in iter: ret.append((f"{k}_{v}_0", v**2)) ret.append((f"{k}_{v}_1", v**3)) return ret table = rp.map_partitions(func) print(list(rp.map_partitions(func).get_all())) self.assertEqual(get_value(table), [('0_0_0', 0), ('0_0_1', 0), ('1_1_0', 1), ('1_1_1', 1), ('2_2_0', 4), ('2_2_1', 8), ('3_3_0', 9), ('3_3_1', 27), ('4_4_0', 16), ('4_4_1', 64), ('5_5_0', 25), ('5_5_1', 125), ('6_6_0', 36), ('6_6_1', 216), ('7_7_0', 49), ('7_7_1', 343), ('8_8_0', 64), ('8_8_1', 512), ('9_9_0', 81), ('9_9_1', 729)])
def test_filter(self): options = get_default_options() rp = self.ctx.load("ns1", "test_filter", options=options).put_all(range(5), options=options) print(list(rp.filter(lambda k, v: v % 2 != 0).get_all())) self.assertEqual(get_value(rp.filter(lambda k, v: v % 2 != 0)), [(1, 1), (3, 3)])
def test_destroy_simple(self): options = get_default_options() options['include_key'] = True table = self.ctx.load('ns1', 'test_destroy', options=self.store_opts(include_key=True)) table.destroy()
def test_map_values_many(self): options = get_default_options() options['include_key'] = False rp = self.ctx.load("ns12020", "test_map_values", options=options).put_all(range(10), options=options) for i in range(100): rp.map_values(lambda v: v)
def test_put_all_value(self): options = get_default_options() self.ctx.load("ns1", "testPutAllValue", options=options).destroy() options['include_key'] = False cnt = 100 rp = self.ctx.load("ns1", "testPutAllValue", options=options).put_all( ("s" for i in range(cnt)), options=options) self.assertEqual(rp.count(), cnt)
def test_subtract_by_key(self): options = get_default_options() options['total_partitions'] = 1 options['include_key'] = False left_rp = self.ctx.load("namespace20201", "testSubtractByKeyLeft202013", options=options).put_all(range(10), options=options) right_rp = self.ctx.load("namespace2020131", "testSubtractByKeyRight202013", options=options).put_all(range(5), options=options) self.assertEqual(list(left_rp.subtract_by_key(right_rp).get_all()), [(5, 5), (6, 6), (7, 7), (8, 8), (9, 9)]) print(list(left_rp.subtract_by_key(right_rp).get_all()))
def test_subtract_diff_partitions(self): options_left = get_default_options() options_right = get_default_options() options_left['total_partitions'] = 10 options_right['total_partitions'] = 5 left_rp = self.ctx.load("ns1", "testSubtractLeft_10p_7", options=options_left).put_all( [('a', 1), ('b', 4), ('d', 6), ('e', 0), ('f', 3), ('g', 12), ('h', 13), ('i', 14), ('j', 15), ('k', 16), ('l', 17)], options={"include_key": True}) right_rp = self.ctx.load("ns1", "testSubtractRight_5p_7", options=options_right).put_all( [('a', 2), ('c', 4), ('d', 1), ('f', 0), ('g', 1)], options={"include_key": True}) print(f'left:{get_value(left_rp)}, right:{get_value(right_rp)}') print('111', get_value(left_rp.subtract_by_key(right_rp))) print('222', get_value(right_rp.subtract_by_key(left_rp))) rs = get_value(left_rp.subtract_by_key(right_rp)) print('rs:', rs) self.assertEqual([('b', 4), ('e', 0), ('h', 13), ('i', 14), ('j', 15), ('k', 16), ('l', 17)], rs) self.assertEqual(get_value(right_rp.subtract_by_key(left_rp)), [('c', 4)]) right_rp = self.ctx.load( "ns1", "testSubtractRight_10p_7", options=options_right).put_all([('a', 1), ('b', 4), ('d', 6), ('e', 0), ('f', 3), ('g', 12), ('h', 13), ('i', 14), ('j', 15), ('k', 16), ('l', 17)], options={"include_key": True}) left_rp = self.ctx.load("ns1", "testSubtractLeft_5p_7", options=options_left).put_all( [('a', 2), ('c', 4), ('d', 1), ('f', 0), ('g', 1)], options={"include_key": True}) print(f'left:{get_value(left_rp)}, right:{get_value(right_rp)}') print('333', get_value(left_rp.subtract_by_key(right_rp))) self.assertEqual(get_value(left_rp.subtract_by_key(right_rp)), [('c', 4)])
def test_delete(self): options = get_default_options() options['include_key'] = True data = [("k1", "v1"), ("k2", "v2"), ("k3", "v3"), ("k4", "v4")] table = self.ctx.load('ns1', 'test_delete_one', options=options).put_all(data, options=options) print("before delete:{}".format(list(table.get_all()))) table.delete("k1") print("after delete:{}".format(list(table.get_all()))) self.assertEqual(get_value(table), ([("k2", "v2"), ("k3", "v3"), ("k4", "v4")]))
def test_map_values(self): options = get_default_options() options['include_key'] = False rp = self.ctx.load("ns12020", "test_map_values", options=options).put_all(range(10), options=options) res = rp.map_values(lambda v: str(v) + 'map_values') print(list(res.get_all())) self.assertEqual(get_value(res), [(0, '0map_values'), (1, '1map_values'), (2, '2map_values'), (3, '3map_values'), (4, '4map_values'), (5, '5map_values'), (6, '6map_values'), (7, '7map_values'), (8, '8map_values'), (9, '9map_values')])
def test_destroy(self): options = get_default_options() options['include_key'] = True data = [("k1", "v1"), ("k2", "v2"), ("k3", "v3"), ("k4", "v4")] table = self.ctx.load('ns12020020618', 'test_destroy', options=options)#.put_all(data, options=options) print("before destroy:{}".format(list(table.get_all()))) table.destroy() # TODO:1: table which has been destroyed cannot get_all, should raise exception #print("after destroy:{}".format(list(table.get_all()))) self.assertEqual(table.count(), 0)
def test_join_self(self): options = get_default_options() left_rp = self.ctx.load("ns12020", "testJoinLeft2020", options=options).put_all( [('a', 1), ('b', 4)], options={"include_key": True}) print(list(left_rp.join(left_rp, lambda v1, v2: v1 + v2).get_all())) self.assertEqual( get_value(left_rp.join(left_rp, lambda v1, v2: v1 + v2)), [('a', 2), ('b', 8)])
def test_take(self): alist = [("something1", 1), ("something3", 2), ("something2", 2), ("something10", 3), ("something125", 4), ("something5", 5), ("something16", 6), ("something0", 6), ("something4", 6)] blist = [("something1", 1), ("something3", 2), ("something2", 2), ("something1.34", 3), ("something1.0", 3), ("something1.25", 4), ("something1.105", 5), ("something0.105", 6), ("something0.104", 6)] clist = [('1112', '2'), ('35', '5'), ('18', '8'), ('0', '0'), ('23', '3'), ('6', '6'), ('9', '9'), ('1', '1'), ('4', '4'), ('7', '7')] dlist = [('1112.1', '2'), ('35.2', '5'), ('18.3', '8'), ('0', '0'), ('23.9', '3'), ('35.1', '6'), ('18.2', '9'), ('1', '1'), ('4', '4'), ('23.6', '7')] elist = [("1something", 1), ("3tomething", 2), ("3something", 2), ("2something", 2), ("1.34something", 3), ("1.0something", 3), ("1.25something", 4), ("1.105something", 5), ("0.105something", 6)] all_list = [alist, blist, clist, dlist, elist] for lst in all_list: options = get_default_options() options['keys_only'] = True options['include_key'] = True options['total_partitions'] = 3 table = self.ctx.parallelize(lst, options=options) print(f'get_all:{list(table.get_all())}') print('start take') print(table.take(n=6, options=options)) self.assertEqual(table.take(n=3, options=options), [item[0] for item in list(table.get_all())[:3]]) options_kv = get_default_options() options_kv['keys_only'] = False options_kv['include_key'] = True options_kv['total_partitions'] = 3 table = self.ctx.parallelize(lst, options=options_kv) print(f'get_all:{list(table.get_all())}') print('start take') print(table.take(n=6, options=options_kv)) self.assertEqual(table.take(n=3, options=options_kv), list(table.get_all())[:3])
def test_multi_partition_map(self): options = get_default_options() options['total_partitions'] = 3 options['include_key'] = False rp = self.ctx.load("ns1", "testMultiPartitionsMap", options=options).put_all(range(100), options=options) result = rp.map(lambda k, v: (k + 1, v)) print(result.count()) self.assertEqual(result.count(), 100)
def test_subtract_by_key_second(self): options_left = get_default_options() options_right = get_default_options() options_left['total_partitions'] = 1 options_right['total_partitions'] = 1 left_rp = self.ctx.load("ns1", "testSubtractLeft_10p_3", options=options_left).put_all( [('a', 2), ('c', 4), ('d', 1), ('f', 0), ('g', 1)], options={"include_key": True}) right_rp = self.ctx.load("ns1", "testSubtractRight_5p_3", options=options_right).put_all( [('a', 1), ('b', 4), ('d', 6), ('e', 0)], options={"include_key": True}) print(f'left:{get_value(left_rp)}, right:{get_value(right_rp)}') print('111', get_value(left_rp.subtract_by_key(right_rp))) self.assertEqual([('c', 4), ('f', 0), ('g', 1)], get_value(left_rp.subtract_by_key(right_rp)))
def test_get(self): options = get_default_options() options['include_key'] = True for i in range(10): self.ctx.load("ns1", "testGet", options=options).put(f"k{i}", f"v{i}") print(self.ctx.load("ns1", "testGet").get(f"k{i}")) self.assertEqual( self.ctx.load("ns1", "testGet").get(f"k{i}"), f"v{i}") assert (self.ctx.load("ns1", "testGet").get(f"k{100}") == None) self.ctx.load("ns1", "testGet", options=options).destroy()
def test_collapse_partitions(self): options = get_default_options() options['include_key'] = False rp = self.ctx.load("ns1", "test_collapse_partitions", options=options).put_all(range(5), options=options) def f(iterator): sum = [] for k, v in iterator: sum.append((k, v)) return sum print(list(rp.collapse_partitions(f).get_all())) self.assertEqual(get_value(rp.collapse_partitions(f)), [(4, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)])])
def test_put_all(self): #data = [("k1", "v1"), ("k2", "v2"), ("k3", "v3"), ("k4", "v4"), ("k5", "v5"), ("k6", "v6")] #data = [("k1", "v1"), ("k2", "v2")] options = get_default_options() t = self.ctx.load("ns1", "testPutAll", options=options) options['include_key'] = True t.put_all(kv_list(1000), options=options) self.assertEqual(t.count(), 1000) self.assertUnOrderListEqual(t.get_all(), kv_list(1000)) t.destroy()
def test_multi_partition_put_all(self): #data = [("k1", "v1"), ("k2", "v2"), ("k3", "v3"), ("k4", "v4"), ("k5", "v5"), ("k6", "v6")] options = get_default_options() options['include_key'] = True table = self.ctx.load("ns1", "testMultiPartitionPutAll2020", options=options) table.put_all(kv_list(100), options=options) #self.assertEqual(table.count(), 100) self.assertEqual(get_value(table), kv_list(100)) table.destroy()
def test_map(self): rp = self.ctx.load("ns1", "testMap2") rp.destroy() self.ctx.load("ns1", "testMap2").put_all( ("s" * 4 for i in range(1000)), options={"include_key": False}) options = get_default_options() rp = self.ctx.load("ns1", "testMap2", options=options) # rp = self.ctx.load("ns1", "testMap3", {"store_type":StoreTypes.ROLLPAIR_CACHE}) # rp.put_all(range(100*1000)) # print(rp.count()) # print(rp.map_values(lambda v: v)) print(rp.map(lambda k, v: (k + 1, v)).count()) self.assertEqual(len(get_value(rp.map(lambda k, v: (k + 1, v)))), 1000)
def test_aggregate(self): from operator import add, mul options = get_default_options() options['total_partitions'] = 3 rp = self.ctx.load("ns1", "testMultiPartitionAggregate", options=options) rp.put_all(range(10), options=options) print(list(rp.get_all())) print('count:', rp.count()) print( list( rp.aggregate(zero_value=0, seq_op=add, comb_op=mul).get_all()))