def test_reblock_compatible(self, tb1: TypeBlocks, tb2: TypeBlocks) -> None: post1 = tb1.reblock_compatible(tb2) post2 = tb2.reblock_compatible(tb1) # either direction gets the same result self.assertTrue(post1 == post2) # if the shapes are different, they cannot be block compatible if tb1.shape[1] != tb2.shape[1]: self.assertFalse(post1)
def test_block_compatible(self, tb1: TypeBlocks, tb2: TypeBlocks) -> None: for axis in (None, 0, 1): post1 = tb1.block_compatible(tb2, axis) post2 = tb2.block_compatible(tb1, axis) # either direction gets the same result self.assertTrue(post1 == post2) # if the shapes are different, they cannot be block compatible if axis is None and tb1.shape != tb2.shape: self.assertFalse(post1)
def _to_frame(self, constructor, axis: int = 1): ''' Common Frame construction utilities. ''' from static_frame import TypeBlocks if axis == 1: # present as a column def block_gen(): yield self.values index = self._index own_index = True columns = None if self._name is None else (self._name,) own_columns = False elif axis == 0: def block_gen(): yield self.values.reshape((1, self.values.shape[0])) index = None if self._name is None else (self._name,) own_index = False columns = self._index own_columns = True # index is immutable else: raise NotImplementedError('no handling for axis', axis) return constructor( TypeBlocks.from_blocks(block_gen()), index=index, columns=columns, own_data=True, own_index=own_index, own_columns=own_columns, )
def test_reblock_signature(self, tb: TypeBlocks) -> None: post = tuple(tb._reblock_signature()) unique_dtypes = np.unique(tb.dtypes) # the reblock signature must be have at least as many entries as types self.assertTrue(len(post) >= len(unique_dtypes)) # sum of column widths is qual to columns in shape self.assertTrue(sum(p[1] for p in post), tb.shape[1])
def test_frame_iter_element_c(self) -> None: a2 = np.array([[None, None], [None, 1], [None, 5]], dtype=object) a1 = np.array([True, False, True]) a3 = np.array([['a'], ['b'], ['c']]) tb1 = TypeBlocks.from_blocks((a3, a1, a2)) f1 = Frame(tb1, index=self.get_letters(None, tb1.shape[0]), columns=IndexHierarchy.from_product(('i', 'ii'), ('a', 'b'))) values = list(f1.iter_element()) self.assertEqual( values, ['a', True, None, None, 'b', False, None, 1, 'c', True, None, 5]) f2 = f1.iter_element().apply(lambda x: str(x).lower().replace('e', '')) self.assertEqual( f1.columns.__class__, f2.columns.__class__, ) self.assertEqual( f2.to_pairs(0), ((('i', 'a'), (('a', 'a'), ('b', 'b'), ('c', 'c'))), (('i', 'b'), (('a', 'tru'), ('b', 'fals'), ('c', 'tru'))), (('ii', 'a'), (('a', 'non'), ('b', 'non'), ('c', 'non'))), (('ii', 'b'), (('a', 'non'), ('b', '1'), ('c', '5')))))
def test_element_items(self, tb: TypeBlocks) -> None: # NOTE: this found a flaw in _extract_iloc where we tried to optimize selection with a unified array count = 0 for k, v in tb.element_items(): count += 1 v_extract = tb.iloc[k] self.assertEqualWithNaN(v, v_extract) self.assertEqual(count, tb.size)
def test_drop(self, tb: TypeBlocks) -> None: for row in range(tb.shape[0]): tb_post1 = tb.drop(row) self.assertTrue(tb_post1.shape[0] == tb.shape[0] - 1) if tb.shape[0] > 2: for start in range(1, tb.shape[0]): tb_post2 = tb.drop(slice(start, None)) self.assertTrue(tb_post2.shape[0] == start) for col in range(tb.shape[1]): tb_post3 = tb.drop((None, col)) self.assertTrue(tb_post3.shape[1] == tb.shape[1] - 1) if tb.shape[1] > 2: for start in range(1, tb.shape[1]): tb_post4 = tb.drop((None, slice(start, None))) self.assertTrue(tb_post4.shape[1] == start)
def test_assign_blocks_from_keys(self, tb1: TypeBlocks) -> None: # assigning a single value from a list of column keys for i in range(tb1.shape[1]): tb2 = TypeBlocks.from_blocks( tb1._assign_from_iloc_by_unit(column_key=[i], value=300)) self.assertTrue(tb1.shape == tb2.shape) # no more than one type should be changed self.assertTrue((tb1.dtypes != tb2.dtypes).sum() <= 1) # assigning a single value from a list of row keys for i in range(tb1.shape[0]): tb3 = TypeBlocks.from_blocks( tb1._assign_from_iloc_by_unit(row_key=[i], value=300)) self.assertTrue(tb1.shape == tb3.shape) self.assertTrue(tb3.iloc[i, 0] == 300) # column slices to the end for i in range(tb1.shape[1]): tb4 = TypeBlocks.from_blocks( tb1._assign_from_iloc_by_unit(column_key=slice(i, None), value=300)) self.assertTrue(tb1.shape == tb4.shape) # we have as many or more blocks self.assertTrue(len(tb4.shapes) >= len(tb1.shapes))
def test_basic_attributes(self, tb: TypeBlocks) -> None: self.assertEqual(len(tb.dtypes), tb.shape[1]) self.assertEqual(len(tb.shapes), len(tb.mloc)) self.assertEqual(tb.copy().shape, tb.shape) self.assertEqual(tb.ndim, 2) self.assertEqual(tb.unified, len(tb.mloc) <= 1) if tb.shape[0] > 0 and tb.shape[1] > 0: self.assertTrue(tb.size > 0) self.assertTrue(tb.nbytes > 0) else: self.assertTrue(tb.size == 0) self.assertTrue(tb.nbytes == 0)
def test_from_element_items(self, shapes: tp.List[tp.Tuple[int, int]], labels: tp.Sequence[tp.Hashable]) -> None: # use shapes to get coordinates, where the max shape + 1 is the final shape shape = tuple(np.array(shapes).max(axis=0) + 1) def values() -> tp.Iterator[tp.Tuple[tp.Tuple[int, int], tp.Hashable]]: for idx, coord in enumerate(shapes): yield coord, labels[idx % len(labels)] post = TypeBlocks.from_element_items(values(), shape=shape, dtype=object) self.assertEqual(post.shape, shape)
def test_axis_values(self, tb: TypeBlocks) -> None: # this test found a flaw in axis_values when dealing with axis 1 and unified, 1D type blocks for axis in (0, 1): for reverse in (True, False): post = tuple(tb.axis_values(axis=axis, reverse=reverse)) for idx, array in enumerate(post): self.assertTrue(len(array) == tb.shape[axis]) if axis == 0 and not reverse: # colums self.assertTrue(array.dtype == tb.dtypes[idx]) elif axis == 0 and reverse: # colums self.assertTrue(array.dtype == tb.dtypes[tb.shape[1] - 1 - idx]) else: # NOTE: only checking kinde because found cases where byte-order deviates self.assertTrue(array.dtype.kind == tb._row_dtype.kind)
def test_frame_iter_group_labels_e(self) -> None: index = tuple('pq') columns = IndexHierarchy._from_type_blocks( TypeBlocks.from_blocks(( np.array(('A', 'A', 'B', 'B', 'B')), np.array((4, 2, 1, 0, 4)), np.array(('b', 'a', 'c', 'a', 'b')), ))) records = ( (True, False, 1, 2, 1), (False, True, 30, 8, 7), ) f = Frame.from_records(records, columns=columns, index=index) post = tuple(f.iter_group_labels_items((2, 1), axis=1)) self.assertEqual([p[0] for p in post], [('a', 0), ('a', 2), ('b', 4), ('c', 1)]) self.assertEqual([p[1].values.tolist() for p in post], [[[2], [8]], [[False], [True]], [[True, 1], [False, 7]], [[1], [30]]])
def test_display(self, tb: TypeBlocks) -> None: post = tb.display() self.assertTrue(len(post) > 0)
def test_from_zero_size_shape(self, value: int) -> None: for shape in ((0, value), (value, 0)): post = TypeBlocks.from_zero_size_shape(shape=shape) self.assertEqual(post.shape, shape)
def test_shape_filter(self, shape): self.assertTrue(len(TypeBlocks.shape_filter(shape)), 2)
def test_consolidate(self, tb: TypeBlocks) -> None: tb_post = tb.consolidate() self.assertEqual(tb_post.shape, tb.shape) self.assertTrue((tb_post.dtypes == tb.dtypes).all())
def test_reblock(self, tb: TypeBlocks) -> None: tb_post = TypeBlocks.from_blocks(tb._reblock()) self.assertEqual(tb_post.shape, tb.shape) self.assertTrue((tb_post.dtypes == tb.dtypes).all())
def test_shape_filter(self, shape: np.ndarray) -> None: self.assertTrue(len(TypeBlocks.shape_filter(shape)), 2)