Пример #1
0
    def test_store_hdf5_write_a(self) -> None:

        f1 = Frame.from_dict(
                dict(x=(1,2,-5,200), y=(3,4,-5,-3000)),
                index=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')),
                name='f1')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_records(
                ((10, 20, 50, 60), (50.0, 60.4, -50, -60)),
                index=('p', 'q'),
                columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')),
                name='f3')
        f4 = Frame.from_records((
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
                ),
                index=IndexHierarchy.from_product(('top', 'bottom'), ('far', 'near'), ('left', 'right')),
                columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'), (1, 2)),
                name='f4')

        frames = (f1, f2, f3, f4)
        config = StoreConfigMap.from_frames(frames)

        with temp_file('.hdf5') as fp:

            st1 = StoreHDF5(fp)
            st1.write(((f.name, f) for f in frames), config=config)

            labels = tuple(st1.labels()) # this will read from file, not in memory
            self.assertEqual(tuple(f.name for f in frames), labels)

            for i, name in enumerate(labels):
                f_src = frames[i]
                c = config[f_src.name]
                f_loaded = st1.read(name, config=c)
                self.assertEqualFrames(f_src, f_loaded)
Пример #2
0
    def test_bus_interface_b(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='foo')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='bar')

        b1 = Bus.from_frames((f1, f2))
        post1 = b1.interface
        self.assertTrue(isinstance(post1, Frame))
        self.assertTrue(post1.shape, (41, 3))

        post2 = Bus.interface
        self.assertTrue(isinstance(post2, Frame))
        self.assertTrue(post2.shape, (41, 3)) #type: ignore
Пример #3
0
    def test_bus_loc_b(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')

        b1 = Bus.from_frames((f1, f2, f3))
        b2 = b1.loc['f2':] #type: ignore
        self.assertEqual(len(b2), 2)
        self.assertEqual(b2.index.values.tolist(), ['f2', 'f3'])
Пример #4
0
    def test_batch_count_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(20,20,0), a=(20,20,np.nan)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1,np.nan,1), a=(1,50,1)),
                index=('y', 'z', 'x'),
                name='f2')

        self.assertEqual(
                Batch.from_frames((f1, f2)).count(axis=0).to_frame().to_pairs(0),
            (('b', (('f1', 3), ('f2', 2))), ('a', (('f1', 2), ('f2', 3)))))

        self.assertEqual(
            Batch.from_frames((f1, f2)).count(axis=1).to_frame().to_pairs(0),
            (('x', (('f1', 1), ('f2', 2))), ('y', (('f1', 2), ('f2', 2))), ('z', (('f1', 2), ('f2', 1))))
            )
Пример #5
0
    def test_exceed_rows(self) -> None:

        f1 = Frame.from_element('x', index=range(1048576), columns='x')

        with temp_file('.xlsx') as fp:

            with self.assertRaises(RuntimeError):
                # with the index, the limit is exceeded
                f1.to_xlsx(fp, include_columns=True)
Пример #6
0
 def _set_container_type(frame: Frame,
                         container_type: tp.Type[Frame]) -> Frame:
     '''
     Helper method to coerce a frame to the expected type, or return it as is
     if the type is already correct
     '''
     if frame.__class__ is not container_type:
         return frame._to_frame(container_type)
     return frame
Пример #7
0
    def test_batch_round_a(self) -> None:
        f1 = Frame.from_dict(dict(b=(20, 20.234, 0),
                                  a=(20.234, 20.234, 50.828)),
                             index=('z', 'y', 'x'),
                             name='f1')
        f2 = Frame.from_dict(dict(b=(1, 20.234, 1.043),
                                  a=(1.043, 50.828, 1.043)),
                             index=('y', 'z', 'x'),
                             name='f2')

        f3 = round(Batch.from_frames((f1, f2)), 1).to_frame()  #type: ignore
        self.assertEqual(f3.to_pairs(0),
                         (('b', ((('f1', 'z'), 20.0), (('f1', 'y'), 20.2),
                                 (('f1', 'x'), 0.0), (('f2', 'y'), 1.0),
                                 (('f2', 'z'), 20.2), (('f2', 'x'), 1.0))),
                          ('a', ((('f1', 'z'), 20.2), (('f1', 'y'), 20.2),
                                 (('f1', 'x'), 50.8), (('f2', 'y'), 1.0),
                                 (('f2', 'z'), 50.8), (('f2', 'x'), 1.0)))))
Пример #8
0
    def test_bus_loc_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')

        b1 = Bus.from_frames((f1,))
        f2 = b1.loc['f1']
        self.assertTrue(f1 is f2)
Пример #9
0
    def test_batch_f(self) -> None:

        f1 = Frame.from_dict({
            'a': [1, 2, 3],
            'b': [2, 4, 6],
            'group': ['x', 'z', 'z']
        })

        f2 = Batch(f1.iter_group_items('group')).loc[:, 'b'].sum().to_frame()
        self.assertEqual(f2.to_pairs(0), (('b', (('x', 2), ('z', 10))), ))
Пример #10
0
    def test_archive_components_npz_write_arrays_f(self) -> None:
        a1 = np.arange(12).reshape(3, 4)
        a2 = np.array([10, 20, 30, 40]).reshape(1, 4)
        a3 = np.arange(8).reshape(2, 4)

        with temp_file('.zip') as fp:

            NPZ(fp, 'w').from_arrays(blocks=(a1, a2, a3), axis=0)
            f = Frame.from_npz(fp)
            self.assertEqual(f.shape, (6, 4))
Пример #11
0
    def test_archive_components_npz_write_arrays_a(self) -> None:
        with temp_file('.zip') as fp:

            a1 = np.arange(12).reshape(3, 4)
            NPZ(fp, 'w').from_arrays(blocks=(a1, ))

            f = Frame.from_npz(fp)
            self.assertEqual(f.values.tolist(), a1.tolist())
            self.assertIs(f.index._map, None)
            self.assertIs(f.columns._map, None)
Пример #12
0
    def test_archive_components_npz_from_frames_j(self) -> None:
        f1 = ff.parse('s(2,2)|v(float)').relabel(columns=('a', 'b'))
        f2 = ff.parse('s(2,2)|v(float)').relabel(columns=('b', 'c'))

        with TemporaryDirectory() as fp:
            NPY(fp, 'w').from_frames(frames=(f1, f2), axis=0, include_index=False)
            f = Frame.from_npy(fp).fillna(0)
            self.assertEqual(f.to_pairs(),
                    (('a', ((0, 1930.4), (1, -1760.34), (2, 0.0), (3, 0.0))), ('b', ((0, -610.8), (1, 3243.94), (2, 1930.4), (3, -1760.34))), ('c', ((0, 0.0), (1, 0.0), (2, -610.8), (3, 3243.94))))
                    )
Пример #13
0
    def build_frame(self, rows: int, cols: int, groups: int, mixed_data: bool) -> Frame:
        group_col: np.ndarray = self._build_groups(groups, rows)

        if mixed_data:
            built_cols: tp.List[tp.Tuple[str, np.ndarray]] = []
            for col in range(cols):
                dtype = BuildTestFrames._DTYPES[col % len(BuildTestFrames._DTYPES)]
                built_cols.append((str(col), self._build_col(rows, dtype)))

            built_cols.append((GROUPBY_COL, group_col))
            f = Frame.from_items(built_cols)
        else:
            arr = np.arange(rows*cols).reshape(rows, cols)
            arr = np.hstack((arr, group_col.reshape(rows, 1)))

            columns = [str(i) for i in range(cols)] + [GROUPBY_COL]
            f = Frame(arr, columns=columns)

        return BuildTestFrames._shuffle(f)
Пример #14
0
 def test_unary_operators_numeric(self, f1: Frame) -> None:
     for op in UFUNC_UNARY_OPERATORS:
         if op == '__invert__':  # invalid on non Boolean
             continue
         func = getattr(operator, op)
         values = f1.values
         # must coerce all blocks to same type to compare to what NP does
         a = func(f1.astype(values.dtype)).values
         b = func(values)
         self.assertAlmostEqualArray(a, b)
Пример #15
0
    def test_bus_equals_c(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')

        b1 = Bus.from_frames((f1, f2), name='foo')
        self.assertEqual(b1.name, 'foo')

        b2 = Bus.from_frames((f1, f2), name='bar')
        self.assertEqual(b2.name, 'bar')

        self.assertTrue(b1.equals(b2))
        self.assertFalse(b1.equals(b2, compare_name=True))
Пример #16
0
    def test_batch_apply_b(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)),
                             index=('x', 'q'),
                             name='f3')

        b1 = Batch.from_frames((f1, f2, f3), use_threads=True, max_workers=8)
        b2 = b1.apply(lambda x: x.shape)
        self.assertEqual(dict(b2.items()), {
            'f1': (2, 2),
            'f2': (3, 2),
            'f3': (2, 2)
        })
Пример #17
0
    def test_batch_b(self) -> None:

        f1 = Frame.from_dict(
                {'a':[1,49,2,3], 'b':[2,4,381, 6], 'group': ['x', 'x','z','z']},
                index=('r', 's', 't', 'u'))

        b1 = Batch(f1.iter_group_items('group'))
        self.assertEqual(b1['b'].sum().to_frame().to_pairs(0),
                ((None, (('x', 6), ('z', 387))),)
                )
Пример #18
0
    def test_batch_apply_except_d(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('x', 'q'),
                name='f3')

        post = Batch.from_frames((f1, f2, f3), max_workers=3
                ).apply_items_except(func2, KeyError).to_frame()
        self.assertEqual(post.to_pairs(),
                (('d', (('f3', 20),)), ('b', (('f3', 60),))))
Пример #19
0
    def test_batch_shapes_a(self) -> None:

        dc = DisplayConfig.from_default(type_color=False)
        f1 = Frame.from_dict({'a':[1,2,3], 'b':[2,4,6], 'group': ['x','z','z']})

        b1 = Batch(f1.iter_group_items('group'))[['a', 'b']]

        self.assertEqual(b1.shapes.to_pairs(),
                (('x', (1, 2)), ('z', (2, 2)))
                )
Пример #20
0
    def test_batch_apply_array_a(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('x', 'q'),
                name='f3')

        post = Batch.from_frames((f1, f2, f3)).unique().to_frame(axis=1, fill_value=None)
        self.assertEqual(post.to_pairs(0),
                (('f1', ((0, 1), (1, 2), (2, 3), (3, 4), (4, None), (5, None))), ('f2', ((0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6))), ('f3', ((0, 10), (1, 20), (2, 50), (3, 60), (4, None), (5, None))))
                )
Пример #21
0
    def assertEqualFrames(self,
                          f1: Frame,
                          f2: Frame,
                          check_dtypes: bool = True) -> None:
        self.assertEqual(f1.shape, f2.shape)
        self.assertEqual(f1.__class__, f2.__class__)
        self.assertEqual(f1.name, f2.name)

        self.assertEqual(f1.index.depth, f2.index.depth)
        self.assertEqual(f1.index.__class__, f2.index.__class__)

        self.assertEqual(f1.columns.depth, f2.columns.depth)
        self.assertEqual(f1.columns.__class__, f2.columns.__class__)

        # NOTE: this will not handle types
        self.assertAlmostEqualFramePairs(f1.to_pairs(0), f2.to_pairs(0))

        if check_dtypes:
            self.assertTrue((f1.dtypes.values == f2.dtypes.values).all())
Пример #22
0
 def to_frame(
     cls,
     target: tp.Type[ContainerBase],
     *,
     minimized: bool = True,
     max_args: int = MAX_ARGS,
 ) -> Frame:
     '''
     Reduce to key fields.
     '''
     f = Frame.from_records(cls.interrogate(target, max_args=max_args), )
     # order be group order
     f = Frame.from_concat(
         (f.loc[f['group'] == g] for g in INTERFACE_GROUP_ORDER),
         name=target.__name__)
     f = f.set_index('signature', drop=True)
     if minimized:
         return f[['cls_name', 'group', 'doc']]  #type: ignore
     return f  #type: ignore
Пример #23
0
    def test_bus_iloc_a(self) -> None:
        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)),
                             index=('p', 'q'),
                             name='f3')

        b1 = Bus.from_frames((f1, f2, f3))

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)
            b2 = Bus.from_zip_pickle(fp)

            self.assertEqual(b2.iloc[[0, 2]].status['loaded'].to_pairs(),
                             (('f1', True), ('f3', True)))
Пример #24
0
    def test_batch_c3(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)),
                             index=('x', 'q'),
                             name='f3')

        b1 = Batch.from_frames((f1, f2, f3))

        self.assertEqual(
            b1.loc['x'].to_frame(fill_value=0, axis=1).to_pairs(0),
            (('f1', (('a', 1), ('b', 3), ('c', 0), ('d', 0))),
             ('f2', (('a', 0), ('b', 4), ('c', 1), ('d', 0))),
             ('f3', (('a', 0), ('b', 50), ('c', 0), ('d', 10)))))
 def test_store_filter_to_delimited_a(self) -> None:
     f = Frame.from_records(((None, np.inf), (np.nan, -np.inf)))
     store_filter = StoreFilter(from_nan='*',
                                from_none='!',
                                from_posinf='&',
                                from_neginf='@')
     post = StringIO()
     f.to_csv(post, store_filter=store_filter, include_index=False)
     post.seek(0)
     self.assertEqual(post.read(), '0,1\n!,&\n*,@')
Пример #26
0
    def test_batch_shift_a(self) -> None:
        f1 = Frame.from_dict(dict(b=(20, 20, 0), a=(20, 20, 50)),
                             index=('z', 'y', 'x'),
                             name='f1')
        f2 = Frame.from_dict(dict(b=(1, 20, 1), a=(1, 50, 1)),
                             index=('y', 'z', 'x'),
                             name='f2')

        f3 = Batch.from_frames((f1, f2)).shift(index=1,
                                               columns=-1,
                                               fill_value=0).to_frame()

        self.assertEqual(f3.to_pairs(0),
                         (('b', ((('f1', 'z'), 0), (('f1', 'y'), 20),
                                 (('f1', 'x'), 20), (('f2', 'y'), 0),
                                 (('f2', 'z'), 1), (('f2', 'x'), 50))),
                          ('a', ((('f1', 'z'), 0), (('f1', 'y'), 0),
                                 (('f1', 'x'), 0), (('f2', 'y'), 0),
                                 (('f2', 'z'), 0), (('f2', 'x'), 0)))))
Пример #27
0
    def test_store_hdf5_write_b(self) -> None:

        # failure when including objects
        f1 = Frame.from_dict(dict(x=(1, 2, -5, object()),
                                  y=(3, 4, -5, -3000)), )
        frames = (f1, )

        with temp_file('.hdf5') as fp:
            st1 = StoreHDF5(fp)
            with self.assertRaises(RuntimeError):
                st1.write(((f.name, f) for f in frames))
Пример #28
0
    def assertEqualFrames(self,
            f1: Frame,
            f2: Frame,
            compare_dtype: bool = True
            ) -> None:

        if not f1.equals(f2, compare_dtype=compare_dtype):
            self.assertTrue(f1.index.equals(f2.index, compare_dtype=compare_dtype), 'index do not match')
            self.assertTrue(f1.columns.equals(f2.columns, compare_dtype=compare_dtype), 'columns do not match')
            self.assertTrue(f1._blocks.equals(f2._blocks, compare_dtype=compare_dtype), '_blocks do not match')
            self.fail('class or name do not match')
Пример #29
0
    def test_batch_to_bus_a(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        batch1 = Batch.from_frames((f1, f2))
        bus1 = batch1.to_bus()

        self.assertEqual(
            Frame.from_concat_items(bus1.items(), fill_value=0).to_pairs(0),
            (('a', ((('f1', 'x'), 1), (('f1', 'y'), 2), (('f2', 'x'), 0),
                    (('f2', 'y'), 0), (('f2', 'z'), 0))),
             ('b', ((('f1', 'x'), 3), (('f1', 'y'), 4), (('f2', 'x'), 4),
                    (('f2', 'y'), 5), (('f2', 'z'), 6))),
             ('c', ((('f1', 'x'), 0), (('f1', 'y'), 0), (('f2', 'x'), 1),
                    (('f2', 'y'), 2), (('f2', 'z'), 3)))))
Пример #30
0
    def test_batch_ufunc_shape_a(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        b1 = Batch.from_frames((f1, f2), name='foo').cumsum()
        f1 = Frame.from_concat_items(b1.items(), fill_value=0)

        self.assertEqual(
            f1.to_pairs(0),
            (('a', ((('f1', 'x'), 1), (('f1', 'y'), 3), (('f2', 'x'), 0),
                    (('f2', 'y'), 0), (('f2', 'z'), 0))),
             ('b', ((('f1', 'x'), 3), (('f1', 'y'), 7), (('f2', 'x'), 4),
                    (('f2', 'y'), 9), (('f2', 'z'), 15))),
             ('c', ((('f1', 'x'), 0), (('f1', 'y'), 0), (('f2', 'x'), 1),
                    (('f2', 'y'), 3), (('f2', 'z'), 6)))))