def test_resolve_dtype_iter_a(self) -> None: a1 = np.array([1, 2, 3]) a2 = np.array([False, True, False]) a3 = np.array(['b', 'c', 'd']) a4 = np.array([2.3, 3.2]) a5 = np.array(['test', 'test again'], dtype='S') a6 = np.array([2.3,5.4], dtype='float32') self.assertEqual(resolve_dtype_iter((a1.dtype, a1.dtype)), a1.dtype) self.assertEqual(resolve_dtype_iter((a2.dtype, a2.dtype)), a2.dtype) # boolean with mixed types self.assertEqual(resolve_dtype_iter((a2.dtype, a2.dtype, a3.dtype)), np.object_) self.assertEqual(resolve_dtype_iter((a2.dtype, a2.dtype, a5.dtype)), np.object_) self.assertEqual(resolve_dtype_iter((a2.dtype, a2.dtype, a6.dtype)), np.object_) # numerical types go to float64 self.assertEqual(resolve_dtype_iter((a1.dtype, a4.dtype, a6.dtype)), np.float64) # add in bool or str, goes to object self.assertEqual(resolve_dtype_iter((a1.dtype, a4.dtype, a6.dtype, a2.dtype)), np.object_) self.assertEqual(resolve_dtype_iter((a1.dtype, a4.dtype, a6.dtype, a5.dtype)), np.object_) # mixed strings go to the largest self.assertEqual(resolve_dtype_iter((a3.dtype, a5.dtype)), np.dtype('<U10'))
def values(self) -> np.ndarray: ''' Return a single immutable NumPy 2D array of all labels found in this IndexLevels instance. This may coerce types. ''' depth_count = self.depth shape = self.__len__(), depth_count # need to get a compatible dtype for all dtypes dtype = resolve_dtype_iter(self.dtypes_iter()) labels = np.empty(shape, dtype=dtype) row_count = 0 levels = deque(((self, 0, None), )) # order matters while levels: level, depth, row_previous = levels.popleft() if level.targets is None: rows = len(level.index.values) row_slice = slice(row_count, row_count + rows) labels[row_slice, :] = row_previous labels[row_slice, depth] = level.index.values row_count += rows else: # target is iterable np.ndaarray depth_next = depth + 1 for label, level_target in zip(level.index.values, level.targets): if row_previous is None: # shown to be faster to allocate entire row width row = np.empty(depth_count, dtype=dtype) else: row = row_previous.copy() row[depth] = label levels.append((level_target, depth_next, row)) labels.flags.writeable = False return labels
def test_concat_resolved_axis_1(self, arrays: tp.List[np.ndarray]) -> None: array = util.concat_resolved(arrays, axis=1) self.assertEqual(array.ndim, 2) self.assertEqual(array.dtype, util.resolve_dtype_iter((x.dtype for x in arrays)))
def test_resolve_dtype_iter(self, dtypes: tp.Iterable[np.dtype]) -> None: x = util.resolve_dtype_iter(dtypes) self.assertTrue(isinstance(x, np.dtype))
def dtype_per_depth(self) -> tp.Iterator[np.dtype]: '''Return a tuple of resolved dtypes, one from each depth level.''' depth_count = self.depth for d in range(depth_count): yield resolve_dtype_iter(self.dtypes_at_depth(d))
def pivot_index_map( *, index_src: IndexBase, depth_level: DepthLevelSpecifier, dtypes_src: tp.Optional[tp.Sequence[np.dtype]], ) -> PivotIndexMap: ''' Args: dtypes_src: must be of length equal to axis ''' # We are always moving levels from one axis to another; after application, the expanded axis will always be hierarchical, while the contracted axis may or may not be. From the contract axis, we need to divide the depths into two categories: targets (the depths to be moved and added to expand axis) and groups (unique combinations that remain on the contract axis after removing targets). # Unique target labels are added to labels on the expand axis; unique group labels become the new contract axis. target_select = np.full(index_src.depth, False) target_select[depth_level] = True group_select = ~target_select group_arrays = [] target_arrays = [] for i, v in enumerate(target_select): if v: target_arrays.append(index_src.values_at_depth(i)) else: group_arrays.append(index_src.values_at_depth(i)) group_depth = len(group_arrays) target_depth = len(target_arrays) group_to_dtype: tp.Dict[tp.Optional[tp.Hashable], np.dtype] = {} targets_unique: tp.Iterable[tp.Hashable] if group_depth == 0: # targets must be a tuple group_to_target_map = { None: {v: idx for idx, v in enumerate(zip(*target_arrays))} } targets_unique = [k for k in group_to_target_map[None]] if dtypes_src is not None: group_to_dtype[None] = resolve_dtype_iter(dtypes_src) else: group_to_target_map = defaultdict(dict) targets_unique = dict() # Store targets in order observed for axis_idx, (group, target, dtype) in enumerate( zip( zip(*group_arrays), # get tuples of len 1 to depth zip(*target_arrays), (dtypes_src if dtypes_src is not None else repeat(None)), )): if group_depth == 1: group = group[0] # targets are transfered labels; groups are the new columns group_to_target_map[group][target] = axis_idx targets_unique[target] = None #type: ignore if dtypes_src is not None: if group in group_to_dtype: group_to_dtype[group] = resolve_dtype( group_to_dtype[group], dtype) else: group_to_dtype[group] = dtype return PivotIndexMap( #pylint: disable=E1120 targets_unique=targets_unique, target_depth=target_depth, target_select=target_select, group_to_target_map=group_to_target_map, #type: ignore group_depth=group_depth, group_select=group_select, group_to_dtype=group_to_dtype)
def test_concat_resolved_axis_0(self, arrays): array = util.concat_resolved(arrays, axis=0) self.assertEqual(array.ndim, 2) self.assertEqual(array.dtype, util.resolve_dtype_iter((x.dtype for x in arrays)))
def test_resolve_dtype_iter(self, dtypes): x = util.resolve_dtype_iter(dtypes) self.assertTrue(isinstance(x, np.dtype))