def test_vector_sketch(self): vector_data = [[], [1, 2], [3], [4, 5, 6, 7], [8, 9, 10], None] sa = SArray(data=vector_data) sketch = sa.sketch_summary() self.__validate_sketch_result(sketch, sa) self.__validate_sketch_result(sketch.element_length_summary(), sa.dropna().item_length()) flattened = list(itertools.chain.from_iterable(list(sa.dropna()))) self.__validate_sketch_result(sketch.element_summary(), SArray(flattened)) fi = sketch.frequent_items() self.assertEqual(len(fi), 5) self.assertEqual((fi['[1 2]']), 1) self.assertEqual((fi['[4 5 6 7]']), 1) # sub sketch with one key s = sa.sketch_summary(sub_sketch_keys=1).element_sub_sketch(1) expected = sa.vector_slice(1) self.__validate_sketch_result(s, expected) # sub sketch with multiple keys keys = [1, 3] s = sa.sketch_summary(sub_sketch_keys=keys).element_sub_sketch(keys) self.assertEqual(len(s), len(keys)) for key in keys: self.assertTrue(s.has_key(key)) expected = sa.vector_slice(key) self.__validate_sketch_result(s[key], expected) indexes = range(0, 10) s = sa.sketch_summary(sub_sketch_keys=indexes).element_sub_sketch() self.assertEqual(len(s), len(indexes))
def test_vector_sketch(self): vector_data = [[], [1,2], [3], [4,5,6,7], [8,9,10], None] sa = SArray(data=vector_data) sketch = sa.sketch_summary(); self.__validate_sketch_result(sketch, sa) self.__validate_sketch_result(sketch.element_length_summary(), sa.dropna().item_length()) flattened = list(itertools.chain.from_iterable(list(sa.dropna()))) self.__validate_sketch_result(sketch.element_summary(), SArray(flattened)) fi = sketch.frequent_items() self.assertEqual(len(fi), 5) self.assertEqual((fi['[1 2]']), 1) self.assertEqual((fi['[4 5 6 7]']), 1) # sub sketch with one key s = sa.sketch_summary(sub_sketch_keys = 1).element_sub_sketch(1) expected = sa.vector_slice(1) self.__validate_sketch_result(s, expected) # sub sketch with multiple keys keys = [1,3] s = sa.sketch_summary(sub_sketch_keys = keys).element_sub_sketch(keys) self.assertEqual(len(s), len(keys)) for key in keys: self.assertTrue(s.has_key(key)) expected = sa.vector_slice(key) self.__validate_sketch_result(s[key], expected) indexes = range(0,10) s = sa.sketch_summary(sub_sketch_keys = indexes).element_sub_sketch() self.assertEqual(len(s), len(indexes))
def test_list_sketch(self): list_data = [[], [1,2],[1,2], ['a', 'a', 'a', 'b'], [ 1 ,1 , 2], None] sa = SArray(list_data) self.__validate_nested_sketch_result(sa) sketch = sa.sketch_summary(); self.assertEqual(sketch.num_unique(), 4) element_summary = sketch.element_summary() another_rep = list(itertools.chain.from_iterable(list(sa.dropna()))) self.__validate_sketch_result(element_summary, SArray(another_rep, str)) fi = sketch.frequent_items() self.assertEqual(len(fi), 4) self.assertEqual((fi['[1,2]']), 2) self.assertEqual((fi['["a","a","a","b"]']), 1)
def test_list_sketch(self): list_data = [[], [1, 2], [1, 2], ['a', 'a', 'a', 'b'], [1, 1, 2], None] sa = SArray(list_data) self.__validate_nested_sketch_result(sa) sketch = sa.sketch_summary() self.assertEqual(sketch.num_unique(), 4) element_summary = sketch.element_summary() another_rep = list(itertools.chain.from_iterable(list(sa.dropna()))) self.__validate_sketch_result(element_summary, SArray(another_rep, str)) fi = sketch.frequent_items() self.assertEqual(len(fi), 4) self.assertEqual((fi['[1,2]']), 2) self.assertEqual((fi['["a","a","a","b"]']), 1)