Пример #1
0
    def test_indexers_to_iloc_c(self) -> None:
        indices = [
            Index(np.arange(5)),
            Index(tuple('ABCDE')),
        ]
        indexers = np.array([
            [3, 3, 0, 1, 4, 0, 3, 2, 2, 0],
            [4, 2, 1, 0, 3, 0, 3, 2, 0, 4],
        ])

        hlmap = HierarchicalLocMap(indices=indices, indexers=indexers)

        invalid_indexers = indexers.copy()
        invalid_indexers[0][0] = 14
        invalid_indexers[1][7] = 14
        invalid_indexers = invalid_indexers.T.astype(DTYPE_UINT_DEFAULT)

        with self.assertRaises(KeyError):
            _ = hlmap.indexers_to_iloc(invalid_indexers.copy())

        with self.assertRaises(KeyError):
            _ = hlmap.indexers_to_iloc(invalid_indexers[[0]].copy())

        with self.assertRaises(KeyError):
            _ = hlmap.indexers_to_iloc(invalid_indexers[[7]].copy())

        valid_subset = [1, 2, 3, 4, 5, 6, 8, 9]
        post = hlmap.indexers_to_iloc(invalid_indexers[valid_subset].copy())
        self.assertListEqual(post, valid_subset)
Пример #2
0
    def test_deepcopy_a(self) -> None:
        indices = [
            Index(np.arange(5)),
            Index(tuple('ABCDE')),
        ]
        indexers = np.array([
            [3, 3, 0, 1, 4, 0, 3, 2, 2, 0],
            [4, 2, 1, 0, 3, 0, 3, 2, 0, 4],
        ])

        hlmap = HierarchicalLocMap(indices=indices, indexers=indexers)

        hlmap_copy = deepcopy(hlmap)

        self.assertEqual(hlmap.encoding_can_overflow,
                         hlmap_copy.encoding_can_overflow)
        self.assertListEqual(hlmap.bit_offset_encoders.tolist(),
                             hlmap_copy.bit_offset_encoders.tolist())
        self.assertEqual(hlmap.encoded_indexer_map,
                         hlmap_copy.encoded_indexer_map)

        self.assertNotEqual(id(hlmap.bit_offset_encoders),
                            id(hlmap_copy.bit_offset_encoders))
        self.assertNotEqual(id(hlmap.encoded_indexer_map),
                            id(hlmap_copy.encoded_indexer_map))
Пример #3
0
 def __init__(self,
         labels: IndexInitializer,
         *,
         name: tp.Optional[tp.Hashable] = None
         ):
     # reduce to arguments relevant for these derived classes
     Index.__init__(self, labels=labels, name=name)
Пример #4
0
    def test_init_c(self) -> None:
        indices = [Index((0, 1)), Index((0, 1))]
        indexers = np.array([
            [0, 0, 1, 1, 1],
            [0, 1, 0, 1, 1],
        ])

        with self.assertRaises(ErrorInitIndexNonUnique):
            HierarchicalLocMap(indices=indices, indexers=indexers)
Пример #5
0
    def __init__(self,
                 labels: IndexInitializer,
                 *,
                 name: tp.Optional[tp.Hashable] = None):
        '''Initializer.

        {args}
        '''
        # __init__ here leaves out the dtype argument, reducing the signature to arguments relevant for these derived classes
        Index.__init__(self, labels=labels, name=name)
 def test_index_correspondence_a(self) -> None:
     idx0 = Index([0, 1, 2, 3, 4], loc_is_iloc=True)
     idx1 = Index(
         [0, 1, 2, 3, 4, '100185', '100828', '101376', '100312', '101092'],
         dtype=object)
     ic = IndexCorrespondence.from_correspondence(idx0, idx1)
     self.assertFalse(ic.is_subset)
     self.assertTrue(ic.has_common)
     # this is an array, due to loc_is_iloc being True
     assert isinstance(ic.iloc_src, np.ndarray)
     self.assertEqual(ic.iloc_src.tolist(), [0, 1, 2, 3, 4])
     self.assertEqual(ic.iloc_dst, [0, 1, 2, 3, 4])
Пример #7
0
    def test_indexers_to_iloc_a(self) -> None:
        indices = [
            Index(np.arange(5)),
            Index(tuple('ABCDE')),
        ]
        indexers = np.array([
            [3, 3, 0, 1, 4, 0, 3, 2, 2, 0],
            [4, 2, 1, 0, 3, 0, 3, 2, 0, 4],
        ])

        hlmap = HierarchicalLocMap(indices=indices, indexers=indexers)

        post = hlmap.indexers_to_iloc(indexers.T.astype(DTYPE_UINT_DEFAULT))
        self.assertListEqual(post, list(range(10)))
Пример #8
0
    def test_nbytes_a(self) -> None:
        indices = [
            Index(np.arange(5)),
            Index(tuple('ABCDE')),
        ]
        indexers = np.array([
            [3, 3, 0, 1, 4, 0, 3, 2, 2, 0],
            [4, 2, 1, 0, 3, 0, 3, 2, 0, 4],
        ])

        hlmap = HierarchicalLocMap(indices=indices, indexers=indexers)

        self.assertIn(hlmap.nbytes,
                      (720 + 8 + 8 + 25, 721,
                       705))  # automap + 2 uint64 bit offsets + PyBool
Пример #9
0
    def reindex(self,
                index: tp.Union[Index, tp.Sequence[tp.Any]],
                fill_value=np.nan) -> 'Series':
        '''
        Return a new Series based on the passed index.

        Args:
            fill_value: attempted to be used, but may be coerced by the dtype of this Series. `
        '''
        # TODO: implement `method` argument with bfill, ffill options

        if isinstance(index, (Index, IndexHierarchy)):
            # always use the Index constructor for safe reuse when possible
            index = index.__class__(index)
        else:  # create the Index if not already an index, assume 1D
            index = Index(index)

        ic = IndexCorrespondence.from_correspondence(self.index, index)

        if ic.is_subset:  # must have some common
            return self.__class__(self.values[ic.iloc_src],
                                  index=index,
                                  own_index=True)

        values = _full_for_fill(self.values.dtype, len(index), fill_value)

        # if some intersection of values
        if ic.has_common:
            values[ic.iloc_dst] = self.values[ic.iloc_src]

        # make immutable so a copy is not made
        values.flags.writeable = False
        return self.__class__(values, index=index, own_index=True)
Пример #10
0
    def test_init_a(self) -> None:
        indices = [
            Index(np.arange(5)),
            Index(tuple('ABCDE')),
        ]
        indexers = np.array([
            [3, 3, 0, 1, 4, 0, 3, 2, 2, 0],
            [4, 2, 1, 0, 3, 0, 3, 2, 0, 4],
        ])

        hlmap = HierarchicalLocMap(indices=indices, indexers=indexers)

        self.assertListEqual(list(hlmap.encoded_indexer_map),
                             [35, 19, 8, 1, 28, 0, 27, 18, 2, 32])
        self.assertFalse(hlmap.encoding_can_overflow)
        self.assertListEqual(hlmap.bit_offset_encoders.tolist(), [0, 3])
Пример #11
0
    def __init__(self,
            labels: IndexInitializer,
            *,
            name: NameType = NAME_DEFAULT,
            loc_is_iloc: bool = False,
            ):
        '''Initializer.

        {args}
        '''
        assert not loc_is_iloc
        # __init__ here leaves out the dtype argument, reducing the signature to arguments relevant for these derived classes
        Index.__init__(self,
                labels=labels,
                name=name,
                loc_is_iloc=loc_is_iloc,
                )
Пример #12
0
    def test_init_b(self) -> None:
        indices = [Index(()) for _ in range(4)]
        indexers = [np.array(()) for _ in range(4)]

        hlmap = HierarchicalLocMap(indices=indices, indexers=indexers)

        self.assertListEqual(list(hlmap.encoded_indexer_map), [])
        self.assertFalse(hlmap.encoding_can_overflow)
        self.assertListEqual(hlmap.bit_offset_encoders.tolist(), [0, 0, 0, 0])
Пример #13
0
    def test_indexers_to_iloc_b(self) -> None:
        indices = [
            Index(np.arange(5)),
            Index(tuple('ABCDE')),
        ]
        indexers = np.array([
            [3, 3, 0, 1, 4, 0, 3, 2, 2, 0],
            [4, 2, 1, 0, 3, 0, 3, 2, 0, 4],
        ])

        hlmap = HierarchicalLocMap(indices=indices, indexers=indexers)

        subsets = [[5, 2, 4, 1, 3], [1], [9, 8, 7, 6, 5], [1, 7, 4, 6]]

        for subset in subsets:
            post = hlmap.indexers_to_iloc(
                indexers.T.astype(DTYPE_UINT_DEFAULT)[subset])
            self.assertListEqual(post, subset)
Пример #14
0
 def test_loc_map_b(self) -> None:
     idx = Index(['a', 'b', 'c', 'd', 'e'])
     post1 = LocMap.loc_to_iloc(
         label_to_pos=idx._map,
         labels=idx._labels,
         positions=idx._positions,
         key=['b', 'd'],
         partial_selection=False,
     )
     self.assertEqual(post1, [1, 3])
    def test_index_correspondence_b(self) -> None:
        # issue found with a hypothesis test

        idx = Index([False], loc_is_iloc=False)
        ic = IndexCorrespondence.from_correspondence(idx, idx)
        self.assertTrue(ic.is_subset)
        self.assertTrue(ic.has_common)
        self.assertEqual(ic.size, 1)
        self.assertEqual(ic.iloc_src, [0]) # this is as list in this use case
        self.assertEqual(ic.iloc_dst.tolist(), [0]) # type: ignore
Пример #16
0
    def test_loc_to_iloc_b(self) -> None:
        indices = [
            Index(np.arange(5)),
            Index(tuple('ABCDE')),
        ]
        indexers = np.array([
            [3, 3, 0, 1, 4, 0, 3, 2, 2, 0],
            [4, 2, 1, 0, 3, 0, 3, 2, 0, 4],
        ])

        hlmap = HierarchicalLocMap(indices=indices, indexers=indexers)

        with self.assertRaises(KeyError):
            hlmap.loc_to_iloc((5, 'A'), indices)

        with self.assertRaises(KeyError):
            hlmap.loc_to_iloc((2, ['E']), indices)

        with self.assertRaises(KeyError):
            hlmap.loc_to_iloc(([0, 1, 2], ['A', 'B', 'C']), indices)
Пример #17
0
 def loc_to_iloc(self,  # type: ignore
         key: GetItemKeyType,
         offset: tp.Optional[int] = None,
         ) -> GetItemKeyType:
     '''
     Specialized for IndexData indices to convert string data representations into np.datetime64 objects as appropriate.
     '''
     # not passing self.dtype to key_to_datetime_key so as to allow translation to a foreign datetime; slice comparison will be handled by map_slice_args
     return Index.loc_to_iloc(self,
             key=key,
             offset=offset,
             key_transform=key_to_datetime_key)
Пример #18
0
    def test_bus_to_hierarchy_a(self) -> None:
        f1 = ff.parse('s(4,4)|v(int,float)|c(I, str)').rename('f1')
        f2 = ff.parse('s(4,4)|v(str)|c(I, str)').rename('f2')
        f3 = ff.parse('s(4,4)|v(bool)|c(I, str)').rename('f3')
        b1 = Bus.from_frames((f1, f2, f3), name='a')

        indices = Index((0, 1, 2, 3))
        columns = Index(('zZbu', 'ztsv', 'zUvW', 'zkuW'))

        for _, frame in b1.items():
            self.assertTrue(indices.equals(frame.index))
            self.assertTrue(columns.equals(frame.columns))


        def test_assertions(axis: int, flag: bool) -> None:
            hierarchy, opposite = bus_to_hierarchy(b1, axis=axis, deepcopy_from_bus=flag, init_exception_cls=ErrorInitBus)

            if axis == 0:
                expected_tree: tp.Dict[str, Index] = {
                    'f1': indices, 'f2': indices, 'f3': indices
                }
                expected_index = columns
            else:
                expected_index = indices
                expected_tree = {'f1': columns, 'f2': columns, 'f3': columns}

            self.compare_trees(hierarchy.to_tree(), expected_tree)
            self.assertTrue(expected_index.equals(opposite))

        for axis in (0, 1):
            for flag in (True, False):
                test_assertions(axis, flag)
Пример #19
0
 def _loc_to_iloc(self,  # type: ignore
         key: GetItemKeyType,
         *,
         partial_selection: bool = False,
         ) -> GetItemKeyType:
     '''
     Specialized for IndexData indices to convert string data representations into np.datetime64 objects as appropriate.
     '''
     # not passing self.dtype to key_to_datetime_key so as to allow translation to a foreign datetime; slice comparison will be handled by map_slice_args
     return Index._loc_to_iloc(self,
             key=key,
             key_transform=key_to_datetime_key,
             partial_selection=partial_selection,
             )
Пример #20
0
    def test_indexers_to_iloc_invalid_input(self) -> None:
        indices = [
            Index(np.arange(5)),
            Index(tuple('ABCDE')),
        ]
        indexers = np.array([
            [3, 3, 0, 1, 4, 0, 3, 2, 2, 0],
            [4, 2, 1, 0, 3, 0, 3, 2, 0, 4],
        ])

        hlmap = HierarchicalLocMap(indices=indices, indexers=indexers)

        # 1D
        with self.assertRaises(AssertionError):
            hlmap.indexers_to_iloc(np.array([0, 1, 2]))

        # Shape mismatch
        with self.assertRaises(AssertionError):
            hlmap.indexers_to_iloc(np.array([[0, 1, 2]]))

        # Invliad dtype
        with self.assertRaises(AssertionError):
            hlmap.indexers_to_iloc(np.array([[0, 1]]).astype(object))
Пример #21
0
    def test_loc_to_iloc_a(self) -> None:
        indices = [
            Index(np.arange(5)),
            Index(tuple('ABCDE')),
        ]
        indexers = np.array([
            [3, 3, 0, 1, 4, 0, 3, 2, 2, 0],
            [4, 2, 1, 0, 3, 0, 3, 2, 0, 4],
        ])

        hlmap = HierarchicalLocMap(indices=indices, indexers=indexers)

        self.assertEqual(hlmap.loc_to_iloc((2, 'A'), indices), 8)
        self.assertEqual(hlmap.loc_to_iloc((2, ['A']), indices), [8])
        self.assertEqual(hlmap.loc_to_iloc(([2], 'A'), indices), [8])
        self.assertEqual(hlmap.loc_to_iloc(([2], ['A']), indices), [8])

        self.assertEqual(hlmap.loc_to_iloc(([0, 3], 'E'), indices), [9, 0])
        self.assertEqual(hlmap.loc_to_iloc(([0, 3], ['E']), indices), [9, 0])
        self.assertEqual(hlmap.loc_to_iloc(([3, 0], 'E'), indices), [0, 9])
        self.assertEqual(hlmap.loc_to_iloc(([3, 0], ['E']), indices), [0, 9])

        self.assertEqual(
            hlmap.loc_to_iloc(np.array([0, 'E'], dtype=object), indices), 9)
Пример #22
0
    def test_archive_components_npz_write_arrays_g(self) -> None:

        a1 = np.arange(12).reshape(3, 4)
        a2 = np.array(['a', 'b', 'c'])
        a3 = np.array([True, False, True])

        with temp_file('.zip') as fp:
            index = Index((10, 20, 30), name='foo')
            NPZ(fp, 'w').from_arrays(blocks=(a1, a2, a3), index=index, name='bar', axis=1)

            f = Frame.from_npz(fp)
            self.assertEqual(f.to_pairs(),
                    ((0, ((10, 0), (20, 4), (30, 8))), (1, ((10, 1), (20, 5), (30, 9))), (2, ((10, 2), (20, 6), (30, 10))), (3, ((10, 3), (20, 7), (30, 11))), (4, ((10, 'a'), (20, 'b'), (30, 'c'))), (5, ((10, True), (20, False), (30, True))))
                    )
            self.assertEqual(f.name, 'bar')
            self.assertEqual(f.index.name, 'foo')
Пример #23
0
    def test_archive_components_npy_write_arrays_h(self) -> None:

        a1 = np.arange(12).reshape(3, 4)
        a2 = np.array(['a', 'b', 'c'])
        a3 = np.array([True, False, True])

        with TemporaryDirectory() as fp:
            columns=Index(('a', 'b', 'c', 'd', 'e', 'f'), name='foo')
            NPY(fp, 'w').from_arrays(blocks=(a1, a2, a3), columns=columns, name='bar', axis=1)

            f = Frame.from_npy(fp)
            self.assertEqual(f.to_pairs(),
                    (('a', ((0, 0), (1, 4), (2, 8))), ('b', ((0, 1), (1, 5), (2, 9))), ('c', ((0, 2), (1, 6), (2, 10))), ('d', ((0, 3), (1, 7), (2, 11))), ('e', ((0, 'a'), (1, 'b'), (2, 'c'))), ('f', ((0, True), (1, False), (2, True))))
                    )
            self.assertEqual(f.name, 'bar')
            self.assertEqual(f.columns.name, 'foo')
Пример #24
0
    def iloc_searchsorted(self,
            values: tp.Any,
            *,
            side_left: bool = True,
            ) -> tp.Union[tp.Hashable, tp.Iterable[tp.Hashable]]:
        '''
        {doc}

        Args:
            {values}
            {side_left}
        '''
        # permit variable forms of date specification
        return Index.iloc_searchsorted(self, #type: ignore [no-any-return]
                key_to_datetime_key(values),
                side_left=side_left,
                )
Пример #25
0
    def test_loc_map_a(self) -> None:
        idx = Index(['a', 'b', 'c'])
        post1 = LocMap.loc_to_iloc(
            label_to_pos=idx._map,
            labels=idx._labels,
            positions=idx._positions,
            key='b',
            partial_selection=False,
        )
        self.assertEqual(post1, 1)

        post2 = LocMap.loc_to_iloc(
            label_to_pos=idx._map,
            labels=idx._labels,
            positions=idx._positions,
            key=NULL_SLICE,
            partial_selection=False,
        )
        self.assertEqual(post2, NULL_SLICE)
Пример #26
0
    def test_bus_to_hierarchy_b(self) -> None:

        class CustomError(Exception):
            pass

        tree1 = dict(a_I=Index((1,2,3)), a_II=Index((1,2,3)))
        tree2 = dict(b_I=Index((1,2,3)), b_II=Index((1,2,3)))
        tree3 = dict(c_I=Index((1,2,3)), c_II=Index((1,2,3)))
        index1 = IndexHierarchy.from_tree(tree1)
        index2 = IndexHierarchy.from_tree(tree2)
        index3 = IndexHierarchy.from_tree(tree3)
        values = np.arange(36).reshape(6,6)

        # Align all the frames on columns!
        f1 = Frame(values, index=index1, columns=index1, name='f1')
        f2 = Frame(values, index=index2, columns=index1, name='f2')
        f3 = Frame(values, index=index3, columns=index1, name='f3')
        b1 = Bus.from_frames((f1, f2, f3))

        def test_assertions(hierarchy: IndexHierarchy, opposite: Index) -> None:
            expected_tree = dict(f1=tree1, f2=tree2, f3=tree3)
            self.compare_trees(hierarchy.to_tree(), expected_tree)
            self.assertTrue(index1.equals(opposite))

        test_assertions(*bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=False, init_exception_cls=CustomError))
        test_assertions(*bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=True, init_exception_cls=CustomError))

        # Cannot do this since the frames do not share the same index
        with self.assertRaises(CustomError):
            bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=False, init_exception_cls=CustomError)

        with self.assertRaises(CustomError):
            bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=True, init_exception_cls=CustomError)

        # Align all the frames on index!
        f1 = Frame(values, index=index1, columns=index1, name='f1')
        f2 = Frame(values, index=index1, columns=index2, name='f2')
        f3 = Frame(values, index=index1, columns=index3, name='f3')
        b1 = Bus.from_frames((f1, f2, f3))

        test_assertions(*bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=False, init_exception_cls=CustomError))
        test_assertions(*bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=True, init_exception_cls=CustomError))

        # Cannot do this since the frames do not share the same columns
        with self.assertRaises(CustomError):
            bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=False, init_exception_cls=CustomError)

        with self.assertRaises(CustomError):
            bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=True, init_exception_cls=CustomError)
Пример #27
0
    def test_pivot_items_to_block_a(self) -> None:
        f = ff.parse('s(6,4)|v(int)').assign[0](
                range(6)
                )
        group_fields_iloc = [0]
        index_outer = Index(f[0].values.tolist())

        post = pivot_items_to_block(
                blocks=f._blocks,
                group_fields_iloc=group_fields_iloc,
                group_depth=1,
                data_field_iloc=3,
                func_single=None,
                dtype=np.dtype(int),
                fill_value=0,
                fill_value_dtype=np.dtype(int),
                index_outer=index_outer,
                kind='mergesort',
                )
        self.assertEqual(post.tolist(),
                [129017,  35021, 166924, 122246, 197228, 105269]
                )
Пример #28
0
    def read(
        self,
        label: tp.Optional[str] = None,
        *,
        config: tp.Optional[StoreConfig] = None,
        store_filter: tp.Optional[StoreFilter] = STORE_FILTER_DEFAULT,
        container_type: tp.Type[Frame] = Frame,
    ) -> Frame:
        '''
        Args:
            label: Name of sheet to read from XLSX.
            container_type: Type of container to be returned, either Frame or a Frame subclass

        '''
        if config is None:
            config = StoreConfig()  # get default

        index_depth = config.index_depth
        index_name_depth_level = config.index_name_depth_level
        columns_depth = config.columns_depth
        columns_name_depth_level = config.columns_name_depth_level
        trim_nadir = config.trim_nadir

        skip_header = config.skip_header
        skip_footer = config.skip_footer

        wb = self._load_workbook(self._fp)

        if label is None:
            ws = wb[wb.sheetnames[0]]
            name = None  # do not set to default sheet name
        else:
            ws = wb[label]
            name = ws.title

        if ws.max_column <= 1 or ws.max_row <= 1:
            # https://openpyxl.readthedocs.io/en/stable/optimized.html
            # says that some clients might not report correct dimensions
            ws.calculate_dimension()

        max_column = ws.max_column
        max_row = ws.max_row

        # adjust for downward shift for skipping header, then reduce for footer; at this value and beyond we stop
        last_row_count = max_row - skip_header - skip_footer

        index_values: tp.List[tp.Any] = []
        columns_values: tp.List[tp.Any] = []

        data = []  # pre-size with None?
        apex_rows = []

        if trim_nadir:
            mask = np.full((last_row_count, max_column), False)

        for row_count, row in enumerate(ws.iter_rows(max_row=max_row),
                                        start=-skip_header):
            if row_count < 0:
                continue  # due to skip header; perserves comparison to columns_depth
            if row_count >= last_row_count:
                break

            if trim_nadir:
                row_data: tp.Sequence[tp.Any] = []
                for col_count, c in enumerate(row):
                    if store_filter is None:
                        value = c.value
                    else:
                        value = store_filter.to_type_filter_element(c.value)
                    if value is None:  # NOTE: only checking None, not np.nan
                        mask[row_count, col_count] = True
                    row_data.append(value)  # type: ignore
                if not row_data:
                    mask[row_count] = True
            else:
                if store_filter is None:
                    row_data = tuple(c.value for c in row)
                else:  # only need to filter string values, but probably too expensive to pre-check
                    row_data = tuple(
                        store_filter.to_type_filter_element(c.value)
                        for c in row)

            if row_count <= columns_depth - 1:
                apex_rows.append(row_data[:index_depth])
                if columns_depth == 1:
                    columns_values.extend(row_data[index_depth:])
                elif columns_depth > 1:
                    columns_values.append(row_data[index_depth:])
                continue

            if index_depth == 0:
                data.append(row_data)
            elif index_depth == 1:
                index_values.append(row_data[0])
                data.append(row_data[1:])
            else:
                index_values.append(row_data[:index_depth])
                data.append(row_data[index_depth:])

        wb.close()

        #-----------------------------------------------------------------------
        # Trim all-empty trailing rows created from style formatting GH#146. As the wb is opened in read-only mode, reverse iterating on the wb is not an option, nor is direct row access by integer

        if trim_nadir:
            # NOTE: `mask` is all data, while `data` is post index/columns extraction; this means that if a non-None label is found, the row/column will not be trimmed.
            row_mask = mask.all(axis=1)
            row_trim_start = array1d_to_last_contiguous_to_edge(
                row_mask) - columns_depth
            if row_trim_start < len(row_mask) - columns_depth:
                data = data[:row_trim_start]
                if index_depth > 0:  # this handles depth 1 and greater
                    index_values = index_values[:row_trim_start]

            col_mask = mask.all(axis=0)
            col_trim_start = array1d_to_last_contiguous_to_edge(
                col_mask) - index_depth
            if col_trim_start < len(col_mask) - index_depth:
                data = (r[:col_trim_start] for r in data)  #type: ignore
                if columns_depth == 1:
                    columns_values = columns_values[:col_trim_start]
                if columns_depth > 1:
                    columns_values = (r[:col_trim_start]
                                      for r in columns_values)  #type: ignore

        #-----------------------------------------------------------------------
        # continue with Index and Frame creation
        index_name = None if columns_depth == 0 else apex_to_name(
            rows=apex_rows,
            depth_level=index_name_depth_level,
            axis=0,
            axis_depth=index_depth)

        index: tp.Optional[IndexBase] = None
        own_index = False
        if index_depth == 1:
            index = Index(index_values, name=index_name)
            own_index = True
        elif index_depth > 1:
            index = IndexHierarchy.from_labels(
                index_values,
                continuation_token=None,
                name=index_name,
            )
            own_index = True

        columns_name = None if index_depth == 0 else apex_to_name(
            rows=apex_rows,
            depth_level=columns_name_depth_level,
            axis=1,
            axis_depth=columns_depth)

        columns: tp.Optional[IndexBase] = None
        own_columns = False
        if columns_depth == 1:
            columns = container_type._COLUMNS_CONSTRUCTOR(columns_values,
                                                          name=columns_name)
            own_columns = True
        elif columns_depth > 1:
            columns = container_type._COLUMNS_HIERARCHY_CONSTRUCTOR.from_labels(
                zip(*columns_values),
                continuation_token=None,
                name=columns_name,
            )
            own_columns = True

        return container_type.from_records(
            data,  #type: ignore
            index=index,
            columns=columns,
            dtypes=config.dtypes,
            own_index=own_index,
            own_columns=own_columns,
            name=name,
            consolidate_blocks=config.consolidate_blocks)
Пример #29
0
 def test_archive_components_npz_write_arrays_c(self) -> None:
     with temp_file('.zip') as fp:
         a1 = np.arange(12).reshape(3, 4)
         index = Index((10, 20, 30))
         NPZ(fp, 'w').from_arrays(blocks=(a1, ), index=index)
Пример #30
0
class Series(metaclass=MetaOperatorDelegate):
    '''
    A one-dimensional ordered, labelled collection, immutable and of fixed size.

    Args:
        values: An iterable of values, or a single object, to be aligned with the supplied (or automatically generated) index. Alternatively, a dictionary of index / value pairs can be provided.
        index: Option index initializer. If provided, lenght must be equal to length of values.
        own_index: Flag index as ownable by Series; primarily for internal clients.
    '''

    __slots__ = (
        'values',
        '_index',
    )

    @classmethod
    def from_items(cls,
                   pairs: tp.Iterable[tp.Tuple[tp.Hashable, tp.Any]],
                   dtype: DtypeSpecifier = None) -> 'Series':
        '''Series construction from an iterator or generator of pairs, where the first pair value is the index and the second is the value.

        Args:
            pairs: Iterable of pairs of index, value.
            dtype: dtype or valid dtype specifier.

        Returns:
            :py:class:`static_frame.Series`
        '''
        index = []

        def values():
            for pair in pairs:
                # populate index as side effect of iterating values
                index.append(pair[0])
                yield pair[1]

        return cls(values(), index=index, dtype=dtype)

    #
    # @classmethod
    # def from_record()

    @classmethod
    def from_pandas(cls,
                    value,
                    *,
                    own_data: bool = False,
                    own_index: bool = False) -> 'Series':
        '''Given a Pandas Series, return a Series.

        Args:
            own_data: If True, the underlying NumPy data array will be made immutable and used without a copy.
            own_index: If True, the underlying NumPy index label array will be made immutable and used without a copy.

        Returns:
            :py:class:`static_frame.Series`
        '''
        if own_data:
            data = value.values
            data.flags.writeable = False
        else:
            data = immutable_filter(value.values)

        if own_index:
            index = value.index.values
            index.flags.writeable = False
        else:
            index = immutable_filter(value.index.values)

        # index is already managed, can own
        return cls(data, index=index)

    def __init__(self,
                 values: SeriesInitializer,
                 *,
                 index: IndexInitializer = None,
                 dtype: DtypeSpecifier = None,
                 own_index: bool = False) -> None:
        #-----------------------------------------------------------------------
        # values assignment

        values_constructor = None  # if deferred

        # expose .values directly as it is immutable
        if not isinstance(values, np.ndarray):
            if isinstance(values, dict):
                # not sure if we should sort; not sure what to do if index is provided
                if index is not None:
                    raise Exception(
                        'cannot create a Series from a dictionary when an index is defined'
                    )
                index = []

                def values_gen():
                    for k, v in _dict_to_sorted_items(values):
                        # populate index as side effect of iterating values
                        index.append(k)
                        yield v

                if dtype and dtype != object:
                    # fromiter does not work with object types
                    self.values = np.fromiter(values_gen(),
                                              dtype=dtype,
                                              count=len(values))
                else:
                    self.values = np.array(tuple(values_gen()), dtype=dtype)
                self.values.flags.writeable = False

            # NOTE: not sure if we need to check __iter__ here
            elif (dtype and dtype != object and dtype != str
                  and hasattr(values, '__iter__')
                  and hasattr(values, '__len__')):
                self.values = np.fromiter(values,
                                          dtype=dtype,
                                          count=len(values))
                self.values.flags.writeable = False
            elif hasattr(values, '__len__') and not isinstance(values, str):
                self.values = np.array(values, dtype=dtype)
                self.values.flags.writeable = False
            elif hasattr(values, '__next__'):  # a generator-like
                self.values = np.array(tuple(values), dtype=dtype)
                self.values.flags.writeable = False
            else:  # it must be a single item
                # we cannot create the values until we realize the index, which might be hierarchical and not have final size equal to length
                def values_constructor(shape):
                    self.values = np.full(shape, values, dtype=dtype)
                    self.values.flags.writeable = False
        else:  # is numpy
            if dtype is not None and dtype != values.dtype:
                # what to do here?
                raise Exception(
                    'when supplying values via array, the dtype argument is not necessary; if provided, it must agree with the dtype of the array'
                )
            if values.shape == ():  # handle special case of NP element

                def values_constructor(shape):
                    self.values = np.repeat(values, shape)
                    self.values.flags.writeable = False
            else:
                self.values = immutable_filter(values)

        #-----------------------------------------------------------------------
        # index assignment
        # NOTE: this generally must be done after values assignment, as from_items needs a values generator to be exhausted before looking to values

        if index is None or (hasattr(index, '__len__') and len(index) == 0):
            # create an integer index
            self._index = Index(range(len(self.values)), loc_is_iloc=True)
        elif own_index:
            self._index = index
        elif hasattr(index, 'STATIC'):
            if index.STATIC:
                self._index = index
            else:
                raise Exception(
                    'non-static index cannot be assigned to Series')
        else:  # let index handle instantiation
            if isinstance(index, (Index, IndexHierarchy)):
                # call with the class of the passed-in index, in case it is hierarchical
                self._index = index.__class__(index)
            else:
                self._index = Index(index)

        shape = self._index.__len__()

        if values_constructor:
            values_constructor(shape)  # updates self.values

        if len(self.values) != shape:
            raise Exception('values and index do not match length')

    #---------------------------------------------------------------------------
    def __setstate__(self, state):
        '''
        Ensure that reanimated NP arrays are set not writeable.
        '''
        for key, value in state[1].items():
            setattr(self, key, value)
        self.values.flags.writeable = False

    #---------------------------------------------------------------------------
    # interfaces

    @property
    def loc(self):
        return GetItem(self._extract_loc)

    @property
    def iloc(self):
        return GetItem(self._extract_iloc)

    # NOTE: this could be ExtractInterfacd1D, but are consistent with what is done on the base name space: loc and getitem duplicate each other.

    @property
    def drop(self):
        return InterfaceSelection2D(func_iloc=self._drop_iloc,
                                    func_loc=self._drop_loc,
                                    func_getitem=self._drop_loc)

    @property
    def mask(self):
        return InterfaceSelection2D(func_iloc=self._extract_iloc_mask,
                                    func_loc=self._extract_loc_mask,
                                    func_getitem=self._extract_loc_mask)

    @property
    def masked_array(self):
        return InterfaceSelection2D(
            func_iloc=self._extract_iloc_masked_array,
            func_loc=self._extract_loc_masked_array,
            func_getitem=self._extract_loc_masked_array)

    @property
    def assign(self) -> InterfaceSelection2D:
        return InterfaceSelection2D(func_iloc=self._extract_iloc_assign,
                                    func_loc=self._extract_loc_assign,
                                    func_getitem=self._extract_loc_assign)

    @property
    def iter_group(self):
        return IterNode(container=self,
                        function_items=self._axis_group_items,
                        function_values=self._axis_group,
                        yield_type=IterNodeType.VALUES)

    @property
    def iter_group_items(self):
        return IterNode(container=self,
                        function_items=self._axis_group_items,
                        function_values=self._axis_group,
                        yield_type=IterNodeType.ITEMS)

    @property
    def iter_element(self):
        return IterNode(container=self,
                        function_items=self._axis_element_items,
                        function_values=self._axis_element,
                        yield_type=IterNodeType.VALUES)

    @property
    def iter_element_items(self):
        return IterNode(container=self,
                        function_items=self._axis_element_items,
                        function_values=self._axis_element,
                        yield_type=IterNodeType.ITEMS)

    #---------------------------------------------------------------------------
    # index manipulation

    def _reindex_other_like_iloc(self,
                                 value: 'Series',
                                 iloc_key: GetItemKeyType,
                                 fill_value=np.nan) -> 'Series':
        '''Given a value that is a Series, reindex it to the index components, drawn from this Series, that are specified by the iloc_key.
        '''
        return value.reindex(self._index._extract_iloc(iloc_key),
                             fill_value=fill_value)

    def reindex(self,
                index: tp.Union[Index, tp.Sequence[tp.Any]],
                fill_value=np.nan) -> 'Series':
        '''
        Return a new Series based on the passed index.

        Args:
            fill_value: attempted to be used, but may be coerced by the dtype of this Series. `
        '''
        # TODO: implement `method` argument with bfill, ffill options

        if isinstance(index, (Index, IndexHierarchy)):
            # always use the Index constructor for safe reuse when possible
            index = index.__class__(index)
        else:  # create the Index if not already an index, assume 1D
            index = Index(index)

        ic = IndexCorrespondence.from_correspondence(self.index, index)

        if ic.is_subset:  # must have some common
            return self.__class__(self.values[ic.iloc_src],
                                  index=index,
                                  own_index=True)

        values = _full_for_fill(self.values.dtype, len(index), fill_value)

        # if some intersection of values
        if ic.has_common:
            values[ic.iloc_dst] = self.values[ic.iloc_src]

        # make immutable so a copy is not made
        values.flags.writeable = False
        return self.__class__(values, index=index, own_index=True)

    def relabel(self, mapper: CallableOrMapping) -> 'Series':
        '''
        Return a new Series based on a mapping (or callable) from old to new index values.
        '''
        return self.__class__(self.values,
                              index=self._index.relabel(mapper),
                              own_index=True)

    def reindex_flat(self):
        '''
        Return a new Series, where a ``IndexHierarchy`` (if deifined) is replaced with a flat, one-dimension index of tuples.
        '''
        return self.__class__(self.values, index=self._index.flat())

    def reindex_add_level(self, level: tp.Hashable):
        '''
        Return a new Series, adding a new root level to an ``IndexHierarchy``.
        '''
        return self.__class__(self.values, index=self._index.add_level(level))

    def reindex_drop_level(self, count: int = 1):
        '''
        Return a new Series, dropping one or more leaf levels from an ``IndexHierarchy``.
        '''
        return self.__class__(self.values, index=self._index.drop_level(count))

    #---------------------------------------------------------------------------
    # na handling

    def isna(self) -> 'Series':
        '''
        Return a same-indexed, Boolean Series indicating which values are NaN or None.
        '''
        # consider returning self if not values.any()?
        values = _isna(self.values)
        values.flags.writeable = False
        return self.__class__(values, index=self._index)

    def notna(self) -> 'Series':
        '''
        Return a same-indexed, Boolean Series indicating which values are NaN or None.
        '''
        values = np.logical_not(_isna(self.values))
        values.flags.writeable = False
        return self.__class__(values, index=self._index)

    def dropna(self) -> 'Series':
        '''
        Return a new Series after removing values of NaN or None.
        '''
        sel = np.logical_not(_isna(self.values))
        if not np.any(sel):
            return self

        values = self.values[sel]
        values.flags.writeable = False
        return self.__class__(values, index=self._index.loc[sel])

    def fillna(self, value) -> 'Series':
        '''Return a new Series after replacing NaN or None values with the supplied value.
        '''
        sel = _isna(self.values)
        if not np.any(sel):
            return self

        if isinstance(value, np.ndarray):
            raise Exception('cannot assign an array to fillna')
        else:
            value_dtype = np.array(value).dtype

        assigned_dtype = _resolve_dtype(value_dtype, self.values.dtype)

        if self.values.dtype == assigned_dtype:
            assigned = self.values.copy()
        else:
            assigned = self.values.astype(assigned_dtype)

        assigned[sel] = value
        assigned.flags.writeable = False
        return self.__class__(assigned, index=self._index)

    #---------------------------------------------------------------------------
    # operators

    def _ufunc_unary_operator(self, operator: tp.Callable) -> 'Series':
        return self.__class__(operator(self.values),
                              index=self._index,
                              dtype=self.dtype)

    def _ufunc_binary_operator(self, *, operator: tp.Callable,
                               other) -> 'Series':

        values = self.values
        index = self._index

        if isinstance(other, Series):
            # if indices are the same, we can simply set other to values and fallback on NP
            if len(self.index) != len(other.index) or (self.index !=
                                                       other.index).any():
                index = self.index.union(other.index)
                # now need to reindex the Series
                values = self.reindex(index).values
                other = other.reindex(index).values
            else:
                other = other.values

        # if its an np array, we simply fall back on np behavior
        elif isinstance(other, np.ndarray):
            if other.ndim > 1:
                raise NotImplementedError(
                    'Operator application to greater dimensionalities will result in an array with more than 1 dimension; it is not clear how such an array should be indexed.'
                )
        # permit single value constants; not sure about filtering other types

        # we want the dtype to be the result of applying the operator; this happends by default
        result = operator(values, other)

        if not isinstance(result, np.ndarray):
            # in comparison to Booleans, if values is of length 1 and a character type, we will get a Boolean back, not an array; this issues the following warning: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
            if isinstance(result, _BOOL_TYPES):
                # return a Boolean at the same size as the original Series; this works, but means that we will mask that, if the arguement is a tuple of length equalt to an erray, NP will perform element wise comparison; bit if the arguemtn is a tuple of length greater or eqial, each value in value will be compared to that tuple
                result = np.full(len(values), result)
            else:
                raise Exception(
                    'unexpected branch from non-array result of operator application to array'
                )

        result.flags.writeable = False
        return self.__class__(result, index=index)

    def _ufunc_axis_skipna(self,
                           *,
                           axis,
                           skipna,
                           ufunc,
                           ufunc_skipna,
                           dtype=None):
        '''For a Series, all functions of this type reduce the single axis of the Series to 1d, so Index has no use here.

        Args:
            dtype: not used, part of signature for a commin interface
        '''
        return _ufunc_skipna_1d(array=self.values,
                                skipna=skipna,
                                ufunc=ufunc,
                                ufunc_skipna=ufunc_skipna)

    #---------------------------------------------------------------------------
    def __len__(self) -> int:
        '''Length of values.
        '''
        return self.values.__len__()

    def display(self, config: DisplayConfig = None) -> Display:
        '''Return a Display of the Series.
        '''
        config = config or DisplayActive.get()

        d = self._index.display(config=config)
        d.append_display(
            Display.from_values(self.values,
                                header='<' + self.__class__.__name__ + '>',
                                config=config))
        return d

    def __repr__(self):
        return repr(self.display())

    #---------------------------------------------------------------------------
    # common attributes from the numpy array

    @property
    def mloc(self):
        return mloc(self.values)

    @property
    def dtype(self) -> np.dtype:
        '''
        Return the dtype of the underlying NumPy array.

        Returns:
            :py:class:`numpy.dtype`
        '''
        return self.values.dtype

    @property
    def shape(self) -> tp.Tuple[int]:
        '''
        Return a tuple describing the shape of the underlying NumPy array.

        Returns:
            :py:class:`tp.Tuple[int]`
        '''
        return self.values.shape

    @property
    def ndim(self) -> int:
        '''
        Return the number of dimensions, which for a `Series` is always 1.

        Returns:
            :py:class:`int`
        '''
        return self.values.ndim

    @property
    def size(self) -> int:
        '''
        Return the size of the underlying NumPy array.

        Returns:
            :py:class:`int`
        '''
        return self.values.size

    @property
    def nbytes(self) -> int:
        '''
        Return the total bytes of the underlying NumPy array.

        Returns:
            :py:class:`int`
        '''
        return self.values.nbytes

    #---------------------------------------------------------------------------
    # extraction

    def _extract_iloc(self, key: GetItemKeyType) -> 'Series':
        # iterable selection should be handled by NP (but maybe not if a tuple)
        return self.__class__(self.values[key], index=self._index.iloc[key])

    def _extract_loc(self, key: GetItemKeyType) -> 'Series':
        '''
        Compatibility:
            Pandas supports taking in iterables of keys, where some keys are not found in the index; a Series is returned as if a reindex operation was performed. This is undesirable. Better instead is to use reindex()
        '''
        iloc_key = self._index.loc_to_iloc(key)
        values = self.values[iloc_key]

        if not isinstance(values, np.ndarray):  # if we have a single element
            return values
        return self.__class__(values,
                              index=self._index.iloc[iloc_key],
                              own_index=True)

    def __getitem__(self, key: GetItemKeyType) -> 'Series':
        '''A Loc selection (by index labels).

        Compatibility:
            Pandas supports using both loc and iloc style selections with the __getitem__ interface on Series. This is undesirable, so here we only expose the loc interface (making the Series dictionary like, but unlike the Index, where __getitem__ is an iloc).
        '''
        return self._extract_loc(key)

    #---------------------------------------------------------------------------
    # utilites for alternate extraction: drop, mask and assignment

    def _drop_iloc(self, key: GetItemKeyType) -> 'Series':
        if isinstance(key, np.ndarray) and key.dtype == bool:
            # use Boolean area to select indices from Index positions, as np.delete does not work with arrays
            values = np.delete(self.values, self._index.positions[key])
        else:
            values = np.delete(self.values, key)
        values.flags.writeable = False
        index = self._index._drop_iloc(key)
        return self.__class__(values, index=index, own_index=True)

    def _drop_loc(self, key: GetItemKeyType) -> 'Series':
        return self._drop_iloc(self._index.loc_to_iloc(key))

    #---------------------------------------------------------------------------

    def _extract_iloc_mask(self, key: GetItemKeyType) -> 'Series':
        '''Produce a new boolean Series of the same shape, where the values selected via iloc selection are True.
        '''
        mask = np.full(self.values.shape, False, dtype=bool)
        mask[key] = True
        mask.flags.writeable = False
        # can pass self here as it is immutable (assuming index cannot change)
        return self.__class__(mask, index=self._index)

    def _extract_loc_mask(self, key: GetItemKeyType) -> 'Series':
        '''Produce a new boolean Series of the same shape, where the values selected via loc selection are True.
        '''
        iloc_key = self._index.loc_to_iloc(key)
        return self._extract_iloc_mask(key=iloc_key)

    #---------------------------------------------------------------------------

    def _extract_iloc_masked_array(self, key: GetItemKeyType) -> MaskedArray:
        '''Produce a new boolean Series of the same shape, where the values selected via iloc selection are True.
        '''
        mask = self._extract_iloc_mask(key=key)
        return MaskedArray(data=self.values, mask=mask.values)

    def _extract_loc_masked_array(self, key: GetItemKeyType) -> MaskedArray:
        '''Produce a new boolean Series of the same shape, where the values selected via loc selection are True.
        '''
        iloc_key = self._index.loc_to_iloc(key)
        return self._extract_iloc_masked_array(key=iloc_key)

    #---------------------------------------------------------------------------

    def _extract_iloc_assign(self, key: GetItemKeyType) -> 'SeriesAssign':
        return SeriesAssign(data=self, iloc_key=key)

    def _extract_loc_assign(self, key: GetItemKeyType) -> 'SeriesAssign':
        iloc_key = self._index.loc_to_iloc(key)
        return SeriesAssign(data=self, iloc_key=iloc_key)

    #---------------------------------------------------------------------------
    # axis functions

    def _axis_group_items(self, *, axis=0):
        groups, locations = _array_to_groups_and_locations(self.values)
        for idx, g in enumerate(groups):
            selection = locations == idx
            yield g, self._extract_iloc(selection)

    def _axis_group(self, *, axis=0):
        yield from (x for _, x in self._axis_group_items(axis=axis))

    def _axis_element_items(self, *, axis=0):
        '''Generator of index, value pairs, equivalent to Series.items(). Rpeated to have a common signature as other axis functions.
        '''
        return zip(self._index.values, self.values)

    def _axis_element(self, *, axis=0):
        yield from (x for _, x in self._axis_element_items(axis=axis))

    #---------------------------------------------------------------------------

    @property
    def index(self):
        return self._index

    #---------------------------------------------------------------------------
    # dictionary-like interface

    def keys(self) -> Index:
        '''
        Iterator of index labels.
        '''
        return self._index

    def __iter__(self):
        '''
        Iterator of index labels, same as :py:meth:`Series.keys`.
        '''
        return self._index.__iter__()

    def __contains__(self, value) -> bool:
        '''
        Inclusion of value in index labels.
        '''
        return self._index.__contains__(value)

    def items(self) -> tp.Generator[tp.Tuple[tp.Any, tp.Any], None, None]:
        '''Iterator of pairs of index label and value.
        '''
        return zip(self._index.values, self.values)

    def get(self, key, default=None):
        '''
        Return the value found at the index key, else the default if the key is not found.
        '''
        if key not in self._index:
            return default
        return self.__getitem__(key)

    #---------------------------------------------------------------------------
    # transformations resulting in the same dimensionality

    def sort_index(self,
                   ascending: bool = True,
                   kind: str = _DEFAULT_SORT_KIND) -> 'Series':
        '''
        Return a new Series ordered by the sorted Index.
        '''
        # argsort lets us do the sort once and reuse the results
        order = np.argsort(self._index.values, kind=kind)
        if not ascending:
            order = order[::-1]

        index_values = self._index.values[order]
        index_values.flags.writeable = False
        values = self.values[order]
        values.flags.writeable = False
        return self.__class__(values, index=index_values)

    def sort_values(self,
                    ascending: bool = True,
                    kind: str = _DEFAULT_SORT_KIND) -> 'Series':
        '''
        Return a new Series ordered by the sorted values.
        '''
        # argsort lets us do the sort once and reuse the results
        order = np.argsort(self.values, kind=kind)
        if not ascending:
            order = order[::-1]

        index_values = self._index.values[order]
        index_values.flags.writeable = False
        values = self.values[order]
        values.flags.writeable = False
        return self.__class__(values, index=index_values)

    def isin(self, other) -> 'Series':
        '''
        Return a same-sized Boolean Series that shows if the same-positoined element is in the iterable passed to the function.
        '''
        # cannot use assume_unique because do not know if values is unique
        v, _ = _iterable_to_array(other)
        # NOTE: could identify empty iterable and create False array
        array = np.in1d(self.values, v)
        array.flags.writeable = False
        return self.__class__(array, index=self._index)

    def clip(self, lower=None, upper=None):
        '''Apply a clip operation to the Series.

        Args:
            lower: value or Series to define the inclusive lower bound.
            upper: value or Series to define the inclusive upper bound.
        '''
        args = [lower, upper]
        for idx in range(len(args)):
            arg = args[idx]
            if isinstance(arg, Series):
                # after reindexing, strip away the index
                # NOTE: using the bound forces going to a float type; this may not be the best approach
                bound = -np.inf if idx == 0 else np.inf
                args[idx] = arg.reindex(self.index).fillna(bound).values
            elif hasattr(arg, '__iter__'):
                raise Exception(
                    'only Series are supported as iterable lower/upper arguments'
                )
            # assume single value otherwise, no change necessary

        array = np.clip(self.values, *args)
        array.flags.writeable = False
        return self.__class__(array, index=self._index)

    def transpose(self) -> 'Series':
        '''The transpositon of a Series is itself.
        '''
        return self

    @property
    def T(self):
        return self.transpose()

    def duplicated(self,
                   exclude_first=False,
                   exclude_last=False) -> np.ndarray:
        '''
        Return a same-sized Boolean Series that shows True for all b values that are duplicated.
        '''
        # TODO: might be able to do this witnout calling .values and passing in TypeBlocks, but TB needs to support roll
        duplicates = _array_to_duplicated(self.values,
                                          exclude_first=exclude_first,
                                          exclude_last=exclude_last)
        duplicates.flags.writeable = False
        return self.__class__(duplicates, index=self._index)

    def drop_duplicated(self, exclude_first=False, exclude_last=False):
        '''
        Return a Series with duplicated values removed.
        '''
        duplicates = _array_to_duplicated(self.values,
                                          exclude_first=exclude_first,
                                          exclude_last=exclude_last)
        keep = ~duplicates
        return self.__class__(self.values[keep], index=self._index[keep])

    def astype(self, dtype: DtypeSpecifier) -> 'Series':
        '''
        Return a Series with type determined by `dtype` argument. Note that for Series, this is a simple function, whereas for Frame, this is an interface exposing both a callable and a getitem interface.
        '''
        return self.__class__(self.values.astype(dtype), index=self._index)

    def roll(self, shift: int, include_index: bool = False) -> 'Series':
        '''Return a Series with values rotated forward and wrapped around the index (with a postive shift) or backward and wrapped around the index (with a negative shift).

        Args:
            shift: Postive or negative integer shift.
            include_index: Determine if the Index is shifted with the underlying data.
        '''
        if shift % len(self.values):
            values = array_shift(self.values, shift, axis=0, wrap=True)
            values.flags.writeable = False
        else:
            values = self.values

        if include_index:
            index = self._index.roll(shift=shift)
            own_index = True
        else:
            index = self._index
            own_index = False

        return self.__class__(values, index=index, own_index=own_index)

    def shift(self, shift: int, fill_value=np.nan) -> 'Series':
        '''Return a Series with values shifted forward on the index (with a postive shift) or backward on the index (with a negative shift).

        Args:
            shift: Postive or negative integer shift.
            fill_value: Value to be used to fill data missing after the shift.
        '''

        if shift:
            values = array_shift(self.values,
                                 shift,
                                 axis=0,
                                 wrap=False,
                                 fill_value=fill_value)
            values.flags.writeable = False
        else:
            values = self.values

        return self.__class__(values, index=self._index)

    #---------------------------------------------------------------------------
    # transformations resulting in reduced dimensionality

    def head(self, count: int = 5) -> 'Series':
        '''Return a Series consisting only of the top elements as specified by ``count``.

        Args:
            count: Number of elements to be returned from the top of the Series.
        '''
        return self.iloc[:count]

    def tail(self, count: int = 5) -> 'Series':
        '''Return a Series consisting only of the bottom elements as specified by ``count``.

        Args:
            count: Number of elements to be returned from the bottom of the Series.
        '''
        return self.iloc[-count:]

    #---------------------------------------------------------------------------
    # utility function to numpy array

    def unique(self) -> np.ndarray:
        '''
        Return a NumPy array of unqiue values.
        '''
        return np.unique(self.values)

    #---------------------------------------------------------------------------
    # export

    # NOTE: can add to_frame and to_fram_go after Series has name attribute

    def to_pairs(self) -> tp.Iterable[tp.Tuple[tp.Hashable, tp.Any]]:
        '''
        Return a tuple of tuples, where each inner tuple is a pair of index label, value.
        '''
        if isinstance(self._index, IndexHierarchy):
            index_values = list(_array2d_to_tuples(self._index.values))
        else:
            index_values = self._index.values

        return tuple(zip(index_values, self.values))

    def to_pandas(self):
        '''
        Return a Pandas Series.
        '''
        import pandas
        return pandas.Series(self.values.copy(),
                             index=self._index.values.copy())