示例#1
0
    def test_ufunc_unique_b(self):

        a1 = np.array([[1, 1], [1, 2], [1, 2]])
        post = ufunc_unique(a1)
        self.assertEqual(post.tolist(), [1, 2])

        post = ufunc_unique(a1, axis=0)
        self.assertEqual(post.tolist(), [[1, 1], [1, 2]])

        post = ufunc_unique(a1, axis=1)
        self.assertEqual(post.tolist(), [[1, 1], [1, 2], [1, 2]])
示例#2
0
    def test_ufunc_unique_c(self):

        a1 = np.array([[1, 'x', 1], [1, None, 1], [1, 'x', 1]], dtype=object)

        post = ufunc_unique(a1)
        self.assertEqual(post, {'x', 1, None})

        post = ufunc_unique(a1, axis=0)
        self.assertEqual(post, {(1, 'x', 1), (1, None, 1)})

        post = ufunc_unique(a1, axis=1)
        self.assertEqual(post, {(1, 1, 1), ('x', None, 'x')})
示例#3
0
    def test_ufunc_unique_a(self):

        a1 = np.array([1, 1, 1, 2, 2])
        post = ufunc_unique(a1)
        self.assertEqual(post.tolist(), [1, 2])

        a2 = np.array([1, 1, 1, 2, 2], dtype=object)
        post = ufunc_unique(a2)
        self.assertEqual(post.tolist(), [1, 2])

        a3 = np.array([1, 'x', 1, None, 2], dtype=object)
        post = ufunc_unique(a3)
        self.assertEqual(post, {None, 1, 2, 'x'})
示例#4
0
    def test_ufunc_unique_b(self) -> None:

        a1 = np.array([[1, 1], [1, 2], [1, 2]])
        post = ufunc_unique(a1)
        assert isinstance(post, np.ndarray)
        self.assertEqual(post.tolist(), [1, 2])

        post = ufunc_unique(a1, axis=0)
        assert isinstance(post, np.ndarray)
        self.assertEqual(post.tolist(), [[1, 1], [1, 2]])

        post = ufunc_unique(a1, axis=1)
        assert isinstance(post, np.ndarray)
        self.assertEqual(post.tolist(), [[1, 1], [1, 2], [1, 2]])
示例#5
0
def pivot_outer_index(
    frame: 'Frame',
    index_fields: tp.Sequence[tp.Hashable],
    index_depth: int,
    index_constructor: IndexConstructor = None,
) -> IndexBase:

    index_loc = index_fields if index_depth > 1 else index_fields[0]

    if index_depth == 1:
        index_values = ufunc_unique(frame._blocks._extract_array_column(
            frame._columns._loc_to_iloc(index_loc)),
                                    axis=0)
        name = index_fields[0]
        index_inner = index_from_optional_constructor(
            index_values,
            default_constructor=partial(Index, name=name),
            explicit_constructor=None if index_constructor is None else
            partial(index_constructor, name=name),
        )
    else:  # > 1
        # NOTE: this might force type an undesirable consolidation
        index_values = ufunc_unique(frame._blocks._extract_array(
            column_key=frame._columns._loc_to_iloc(index_loc)),
                                    axis=0)
        # NOTE: if index_types need to be provided to an IH here, they must be partialed in the single-argument index_constructor
        name = tuple(index_fields)
        index_inner = index_from_optional_constructor(  # type: ignore
            index_values,
            default_constructor=partial(
                IndexHierarchy.from_labels,
                name=name,
            ),
            explicit_constructor=None if index_constructor is None else
            partial(index_constructor, name=name),
        ).flat()
    return index_inner
示例#6
0
 def test_ufunc_unique(self, array: np.ndarray) -> None:
     post = util.ufunc_unique(array, axis=0)
     self.assertTrue(len(post) <= array.shape[0])
示例#7
0
 def unique(self) -> np.ndarray:
     '''
     Return a NumPy array of unqiue values.
     '''
     return ufunc_unique(self.values)
示例#8
0
def pivot_core(
    *,
    frame: 'Frame',
    index_fields: tp.List[tp.Hashable],
    columns_fields: tp.List[tp.Hashable],
    data_fields: tp.List[tp.Hashable],
    func_fields: tp.Tuple[tp.Hashable, ...],
    func_single: tp.Optional[UFunc],
    func_map: tp.Sequence[tp.Tuple[tp.Hashable, UFunc]],
    fill_value: object = np.nan,
    index_constructor: IndexConstructor = None,
) -> 'Frame':
    '''Core implementation of Frame.pivot(). The Frame has already been reduced to just relevant columns, and all fields groups are normalized as lists of hashables.
    '''
    from static_frame.core.series import Series
    from static_frame.core.frame import Frame

    data_fields_len = len(data_fields)
    index_depth = len(index_fields)

    # all are lists of hashables; get converted to lists of integers
    columns_loc_to_iloc = frame.columns._loc_to_iloc
    index_fields_iloc: tp.Sequence[int] = columns_loc_to_iloc(
        index_fields)  #type: ignore
    data_fields_iloc: tp.Sequence[int] = columns_loc_to_iloc(
        data_fields)  #type: ignore
    columns_fields_iloc: tp.Sequence[int] = columns_loc_to_iloc(
        columns_fields)  #type: ignore

    # For data fields, we add the field name, not the field values, to the columns.
    columns_name = tuple(columns_fields)
    if data_fields_len > 1 or not columns_fields:
        # if no columns_fields, have to add values label
        columns_name = tuple(chain(*columns_fields, ('values', )))
    if len(func_map) > 1:
        columns_name = columns_name + ('func', )

    columns_depth = len(columns_name)
    if columns_depth == 1:
        columns_name = columns_name[0]  # type: ignore
        columns_constructor = partial(frame._COLUMNS_CONSTRUCTOR,
                                      name=columns_name)
    else:
        columns_constructor = partial(
            frame._COLUMNS_HIERARCHY_CONSTRUCTOR.from_labels,
            depth_reference=columns_depth,
            name=columns_name)

    dtype_map = frame.dtypes
    dtypes_per_data_fields = tuple(
        pivot_records_dtypes(
            dtype_map=dtype_map,
            data_fields=data_fields,
            func_single=func_single,
            func_map=func_map,
        ))
    if func_single and data_fields_len == 1:
        dtype_single = ufunc_dtype_to_dtype(func_single,
                                            dtype_map[data_fields[0]])

    #---------------------------------------------------------------------------
    # first major branch: if we are only grouping be index fields

    if not columns_fields:  # group by is only index_fields
        columns = data_fields if func_single else tuple(
            product(data_fields, func_fields))

        # NOTE: at this time we do not automatically give back an IndexHierarchy when index_depth is == 1, as the order of the resultant values may not be hierarchable.
        name_index = index_fields[0] if index_depth == 1 else tuple(
            index_fields)
        if index_constructor:
            index_constructor = partial(index_constructor, name=name_index)
        else:
            index_constructor = partial(Index, name=name_index)

        if len(columns) == 1:
            # assert len(data_fields) == 1
            f = frame.from_series(Series.from_items(
                pivot_items(
                    blocks=frame._blocks,
                    group_fields_iloc=index_fields_iloc,
                    group_depth=index_depth,
                    data_field_iloc=data_fields_iloc[0],
                    func_single=func_single,
                ),
                name=columns[0],
                index_constructor=index_constructor,
                dtype=dtype_single,
            ),
                                  columns_constructor=columns_constructor)
        else:
            f = frame.from_records_items(
                pivot_records_items(
                    blocks=frame._blocks,
                    group_fields_iloc=index_fields_iloc,
                    group_depth=index_depth,
                    data_fields_iloc=data_fields_iloc,
                    func_single=func_single,
                    func_map=func_map,
                ),
                columns_constructor=columns_constructor,
                columns=columns,
                index_constructor=index_constructor,
                dtypes=dtypes_per_data_fields,
            )

        # have to rename columns if derived in from_concat
        columns_final = (f.columns.rename(columns_name) if columns_depth == 1
                         else columns_constructor(f.columns))
        return f.relabel(columns=columns_final)  #type: ignore

    #---------------------------------------------------------------------------
    # second major branch: we are only grouping be index and columns fields

    # avoid doing a multi-column-style selection if not needed
    if len(columns_fields) == 1:
        # columns_group = columns_fields[0]
        retuple_group_label = True
    else:
        # columns_group = columns_fields
        retuple_group_label = False

    columns_loc_to_iloc = frame.columns._loc_to_iloc
    # group by on 1 or more columns fields
    # NOTE: explored doing one group on index and coluns that insert into pre-allocated arrays, but that proved slower than this approach
    group_key = columns_fields_iloc if len(
        columns_fields_iloc) > 1 else columns_fields_iloc[0]

    index_outer = pivot_outer_index(
        frame=frame,
        index_fields=index_fields,
        index_depth=index_depth,
        index_constructor=index_constructor,
    )

    # collect subframes based on an index of tuples and columns of tuples (if depth > 1)
    sub_blocks = []
    sub_columns_collected: tp.List[tp.Hashable] = []

    # for group, sub in frame.iter_group_items(columns_group):
    for group, _, sub in frame._blocks.group(axis=0, key=group_key):
        # derive the column fields represented by this group
        sub_columns = extrapolate_column_fields(
            columns_fields, group if not retuple_group_label else (group, ),
            data_fields, func_fields)
        sub_columns_collected.extend(sub_columns)

        # sub is TypeBlocks unique value in columns_group; this may or may not have unique index fields; if not, it needs to be aggregated
        if index_depth == 1:
            sub_index_labels = sub._extract_array_column(index_fields_iloc[0])
            sub_index_labels_unique = ufunc_unique(sub_index_labels)
        else:  # match to an index of tuples; the order might not be the same as IH
            # NOTE: might be able to keep arays and concat below
            sub_index_labels = tuple(
                zip(*(sub._extract_array_column(columns_loc_to_iloc(f))
                      for f in index_fields)))
            sub_index_labels_unique = set(sub_index_labels)

        sub_frame: tp.Union[Frame, Series]

        # if sub_index_labels are not unique we need to aggregate
        if len(sub_index_labels_unique) != len(sub_index_labels):
            # if sub_columns length is 1, that means that we only need to extract one column out of the sub Frame
            if len(sub_columns) == 1:
                assert len(data_fields) == 1
                # NOTE: grouping on index_fields; can pre-process array_to_groups_and_locations
                sub_frame = Series.from_items(
                    pivot_items(
                        blocks=sub,
                        group_fields_iloc=index_fields_iloc,
                        group_depth=index_depth,
                        data_field_iloc=data_fields_iloc[0],
                        func_single=func_single,
                    ),
                    dtype=dtype_single,
                )
            else:
                sub_frame = Frame.from_records_items(
                    pivot_records_items(blocks=sub,
                                        group_fields_iloc=index_fields_iloc,
                                        group_depth=index_depth,
                                        data_fields_iloc=data_fields_iloc,
                                        func_single=func_single,
                                        func_map=func_map),
                    dtypes=dtypes_per_data_fields,
                )
        else:
            # we have unique values per index item, but may not have a complete index
            if func_single:
                # NOTE: should apply function even with func_single
                if len(data_fields) == 1:
                    sub_frame = Frame(sub._extract_array_column(
                        data_fields_iloc[0]),
                                      index=sub_index_labels,
                                      index_constructor=index_constructor,
                                      own_data=True)
                else:
                    sub_frame = Frame(sub._extract(
                        row_key=None, column_key=data_fields_iloc),
                                      index=sub_index_labels,
                                      index_constructor=index_constructor,
                                      own_data=True)
            else:

                def blocks() -> tp.Iterator[np.ndarray]:
                    for field in data_fields_iloc:
                        for _, func in func_map:
                            yield sub._extract_array_column(field)

                sub_frame = Frame(
                    TypeBlocks.from_blocks(blocks()),
                    index=sub_index_labels,
                    own_data=True,
                )

        sub_frame = sub_frame.reindex(
            index_outer,
            own_index=True,
            fill_value=fill_value,
        )
        if sub_frame.ndim == 1:
            sub_blocks.append(sub_frame.values)
        else:
            sub_blocks.extend(sub_frame._blocks._blocks)  # type: ignore

    tb = TypeBlocks.from_blocks(sub_blocks)
    return frame.__class__(
        tb,
        index=index_outer,
        columns=columns_constructor(sub_columns_collected),
        own_data=True,
        own_index=True,
        own_columns=True,
    )