def test_store_hdf5_write_a(self) -> None: f1 = Frame.from_dict( dict(x=(1,2,-5,200), y=(3,4,-5,-3000)), index=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')), name='f1') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_records( ((10, 20, 50, 60), (50.0, 60.4, -50, -60)), index=('p', 'q'), columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')), name='f3') f4 = Frame.from_records(( (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), ), index=IndexHierarchy.from_product(('top', 'bottom'), ('far', 'near'), ('left', 'right')), columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'), (1, 2)), name='f4') frames = (f1, f2, f3, f4) config = StoreConfigMap.from_frames(frames) with temp_file('.hdf5') as fp: st1 = StoreHDF5(fp) st1.write(((f.name, f) for f in frames), config=config) labels = tuple(st1.labels()) # this will read from file, not in memory self.assertEqual(tuple(f.name for f in frames), labels) for i, name in enumerate(labels): f_src = frames[i] c = config[f_src.name] f_loaded = st1.read(name, config=c) self.assertEqualFrames(f_src, f_loaded)
def test_bus_interface_b(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='foo') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='bar') b1 = Bus.from_frames((f1, f2)) post1 = b1.interface self.assertTrue(isinstance(post1, Frame)) self.assertTrue(post1.shape, (41, 3)) post2 = Bus.interface self.assertTrue(isinstance(post2, Frame)) self.assertTrue(post2.shape, (41, 3)) #type: ignore
def test_bus_loc_b(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('p', 'q'), name='f3') b1 = Bus.from_frames((f1, f2, f3)) b2 = b1.loc['f2':] #type: ignore self.assertEqual(len(b2), 2) self.assertEqual(b2.index.values.tolist(), ['f2', 'f3'])
def test_batch_count_a(self) -> None: f1 = Frame.from_dict( dict(b=(20,20,0), a=(20,20,np.nan)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1,np.nan,1), a=(1,50,1)), index=('y', 'z', 'x'), name='f2') self.assertEqual( Batch.from_frames((f1, f2)).count(axis=0).to_frame().to_pairs(0), (('b', (('f1', 3), ('f2', 2))), ('a', (('f1', 2), ('f2', 3))))) self.assertEqual( Batch.from_frames((f1, f2)).count(axis=1).to_frame().to_pairs(0), (('x', (('f1', 1), ('f2', 2))), ('y', (('f1', 2), ('f2', 2))), ('z', (('f1', 2), ('f2', 1)))) )
def test_exceed_rows(self) -> None: f1 = Frame.from_element('x', index=range(1048576), columns='x') with temp_file('.xlsx') as fp: with self.assertRaises(RuntimeError): # with the index, the limit is exceeded f1.to_xlsx(fp, include_columns=True)
def _set_container_type(frame: Frame, container_type: tp.Type[Frame]) -> Frame: ''' Helper method to coerce a frame to the expected type, or return it as is if the type is already correct ''' if frame.__class__ is not container_type: return frame._to_frame(container_type) return frame
def test_batch_round_a(self) -> None: f1 = Frame.from_dict(dict(b=(20, 20.234, 0), a=(20.234, 20.234, 50.828)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict(dict(b=(1, 20.234, 1.043), a=(1.043, 50.828, 1.043)), index=('y', 'z', 'x'), name='f2') f3 = round(Batch.from_frames((f1, f2)), 1).to_frame() #type: ignore self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'z'), 20.0), (('f1', 'y'), 20.2), (('f1', 'x'), 0.0), (('f2', 'y'), 1.0), (('f2', 'z'), 20.2), (('f2', 'x'), 1.0))), ('a', ((('f1', 'z'), 20.2), (('f1', 'y'), 20.2), (('f1', 'x'), 50.8), (('f2', 'y'), 1.0), (('f2', 'z'), 50.8), (('f2', 'x'), 1.0)))))
def test_bus_loc_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') b1 = Bus.from_frames((f1,)) f2 = b1.loc['f1'] self.assertTrue(f1 is f2)
def test_batch_f(self) -> None: f1 = Frame.from_dict({ 'a': [1, 2, 3], 'b': [2, 4, 6], 'group': ['x', 'z', 'z'] }) f2 = Batch(f1.iter_group_items('group')).loc[:, 'b'].sum().to_frame() self.assertEqual(f2.to_pairs(0), (('b', (('x', 2), ('z', 10))), ))
def test_archive_components_npz_write_arrays_f(self) -> None: a1 = np.arange(12).reshape(3, 4) a2 = np.array([10, 20, 30, 40]).reshape(1, 4) a3 = np.arange(8).reshape(2, 4) with temp_file('.zip') as fp: NPZ(fp, 'w').from_arrays(blocks=(a1, a2, a3), axis=0) f = Frame.from_npz(fp) self.assertEqual(f.shape, (6, 4))
def test_archive_components_npz_write_arrays_a(self) -> None: with temp_file('.zip') as fp: a1 = np.arange(12).reshape(3, 4) NPZ(fp, 'w').from_arrays(blocks=(a1, )) f = Frame.from_npz(fp) self.assertEqual(f.values.tolist(), a1.tolist()) self.assertIs(f.index._map, None) self.assertIs(f.columns._map, None)
def test_archive_components_npz_from_frames_j(self) -> None: f1 = ff.parse('s(2,2)|v(float)').relabel(columns=('a', 'b')) f2 = ff.parse('s(2,2)|v(float)').relabel(columns=('b', 'c')) with TemporaryDirectory() as fp: NPY(fp, 'w').from_frames(frames=(f1, f2), axis=0, include_index=False) f = Frame.from_npy(fp).fillna(0) self.assertEqual(f.to_pairs(), (('a', ((0, 1930.4), (1, -1760.34), (2, 0.0), (3, 0.0))), ('b', ((0, -610.8), (1, 3243.94), (2, 1930.4), (3, -1760.34))), ('c', ((0, 0.0), (1, 0.0), (2, -610.8), (3, 3243.94)))) )
def build_frame(self, rows: int, cols: int, groups: int, mixed_data: bool) -> Frame: group_col: np.ndarray = self._build_groups(groups, rows) if mixed_data: built_cols: tp.List[tp.Tuple[str, np.ndarray]] = [] for col in range(cols): dtype = BuildTestFrames._DTYPES[col % len(BuildTestFrames._DTYPES)] built_cols.append((str(col), self._build_col(rows, dtype))) built_cols.append((GROUPBY_COL, group_col)) f = Frame.from_items(built_cols) else: arr = np.arange(rows*cols).reshape(rows, cols) arr = np.hstack((arr, group_col.reshape(rows, 1))) columns = [str(i) for i in range(cols)] + [GROUPBY_COL] f = Frame(arr, columns=columns) return BuildTestFrames._shuffle(f)
def test_unary_operators_numeric(self, f1: Frame) -> None: for op in UFUNC_UNARY_OPERATORS: if op == '__invert__': # invalid on non Boolean continue func = getattr(operator, op) values = f1.values # must coerce all blocks to same type to compare to what NP does a = func(f1.astype(values.dtype)).values b = func(values) self.assertAlmostEqualArray(a, b)
def test_bus_equals_c(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') b1 = Bus.from_frames((f1, f2), name='foo') self.assertEqual(b1.name, 'foo') b2 = Bus.from_frames((f1, f2), name='bar') self.assertEqual(b2.name, 'bar') self.assertTrue(b1.equals(b2)) self.assertFalse(b1.equals(b2, compare_name=True))
def test_batch_apply_b(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)), index=('x', 'q'), name='f3') b1 = Batch.from_frames((f1, f2, f3), use_threads=True, max_workers=8) b2 = b1.apply(lambda x: x.shape) self.assertEqual(dict(b2.items()), { 'f1': (2, 2), 'f2': (3, 2), 'f3': (2, 2) })
def test_batch_b(self) -> None: f1 = Frame.from_dict( {'a':[1,49,2,3], 'b':[2,4,381, 6], 'group': ['x', 'x','z','z']}, index=('r', 's', 't', 'u')) b1 = Batch(f1.iter_group_items('group')) self.assertEqual(b1['b'].sum().to_frame().to_pairs(0), ((None, (('x', 6), ('z', 387))),) )
def test_batch_apply_except_d(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('x', 'q'), name='f3') post = Batch.from_frames((f1, f2, f3), max_workers=3 ).apply_items_except(func2, KeyError).to_frame() self.assertEqual(post.to_pairs(), (('d', (('f3', 20),)), ('b', (('f3', 60),))))
def test_batch_shapes_a(self) -> None: dc = DisplayConfig.from_default(type_color=False) f1 = Frame.from_dict({'a':[1,2,3], 'b':[2,4,6], 'group': ['x','z','z']}) b1 = Batch(f1.iter_group_items('group'))[['a', 'b']] self.assertEqual(b1.shapes.to_pairs(), (('x', (1, 2)), ('z', (2, 2))) )
def test_batch_apply_array_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('x', 'q'), name='f3') post = Batch.from_frames((f1, f2, f3)).unique().to_frame(axis=1, fill_value=None) self.assertEqual(post.to_pairs(0), (('f1', ((0, 1), (1, 2), (2, 3), (3, 4), (4, None), (5, None))), ('f2', ((0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6))), ('f3', ((0, 10), (1, 20), (2, 50), (3, 60), (4, None), (5, None)))) )
def assertEqualFrames(self, f1: Frame, f2: Frame, check_dtypes: bool = True) -> None: self.assertEqual(f1.shape, f2.shape) self.assertEqual(f1.__class__, f2.__class__) self.assertEqual(f1.name, f2.name) self.assertEqual(f1.index.depth, f2.index.depth) self.assertEqual(f1.index.__class__, f2.index.__class__) self.assertEqual(f1.columns.depth, f2.columns.depth) self.assertEqual(f1.columns.__class__, f2.columns.__class__) # NOTE: this will not handle types self.assertAlmostEqualFramePairs(f1.to_pairs(0), f2.to_pairs(0)) if check_dtypes: self.assertTrue((f1.dtypes.values == f2.dtypes.values).all())
def to_frame( cls, target: tp.Type[ContainerBase], *, minimized: bool = True, max_args: int = MAX_ARGS, ) -> Frame: ''' Reduce to key fields. ''' f = Frame.from_records(cls.interrogate(target, max_args=max_args), ) # order be group order f = Frame.from_concat( (f.loc[f['group'] == g] for g in INTERFACE_GROUP_ORDER), name=target.__name__) f = f.set_index('signature', drop=True) if minimized: return f[['cls_name', 'group', 'doc']] #type: ignore return f #type: ignore
def test_bus_iloc_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)), index=('p', 'q'), name='f3') b1 = Bus.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) self.assertEqual(b2.iloc[[0, 2]].status['loaded'].to_pairs(), (('f1', True), ('f3', True)))
def test_batch_c3(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)), index=('x', 'q'), name='f3') b1 = Batch.from_frames((f1, f2, f3)) self.assertEqual( b1.loc['x'].to_frame(fill_value=0, axis=1).to_pairs(0), (('f1', (('a', 1), ('b', 3), ('c', 0), ('d', 0))), ('f2', (('a', 0), ('b', 4), ('c', 1), ('d', 0))), ('f3', (('a', 0), ('b', 50), ('c', 0), ('d', 10)))))
def test_store_filter_to_delimited_a(self) -> None: f = Frame.from_records(((None, np.inf), (np.nan, -np.inf))) store_filter = StoreFilter(from_nan='*', from_none='!', from_posinf='&', from_neginf='@') post = StringIO() f.to_csv(post, store_filter=store_filter, include_index=False) post.seek(0) self.assertEqual(post.read(), '0,1\n!,&\n*,@')
def test_batch_shift_a(self) -> None: f1 = Frame.from_dict(dict(b=(20, 20, 0), a=(20, 20, 50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict(dict(b=(1, 20, 1), a=(1, 50, 1)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).shift(index=1, columns=-1, fill_value=0).to_frame() self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'z'), 0), (('f1', 'y'), 20), (('f1', 'x'), 20), (('f2', 'y'), 0), (('f2', 'z'), 1), (('f2', 'x'), 50))), ('a', ((('f1', 'z'), 0), (('f1', 'y'), 0), (('f1', 'x'), 0), (('f2', 'y'), 0), (('f2', 'z'), 0), (('f2', 'x'), 0)))))
def test_store_hdf5_write_b(self) -> None: # failure when including objects f1 = Frame.from_dict(dict(x=(1, 2, -5, object()), y=(3, 4, -5, -3000)), ) frames = (f1, ) with temp_file('.hdf5') as fp: st1 = StoreHDF5(fp) with self.assertRaises(RuntimeError): st1.write(((f.name, f) for f in frames))
def assertEqualFrames(self, f1: Frame, f2: Frame, compare_dtype: bool = True ) -> None: if not f1.equals(f2, compare_dtype=compare_dtype): self.assertTrue(f1.index.equals(f2.index, compare_dtype=compare_dtype), 'index do not match') self.assertTrue(f1.columns.equals(f2.columns, compare_dtype=compare_dtype), 'columns do not match') self.assertTrue(f1._blocks.equals(f2._blocks, compare_dtype=compare_dtype), '_blocks do not match') self.fail('class or name do not match')
def test_batch_to_bus_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') batch1 = Batch.from_frames((f1, f2)) bus1 = batch1.to_bus() self.assertEqual( Frame.from_concat_items(bus1.items(), fill_value=0).to_pairs(0), (('a', ((('f1', 'x'), 1), (('f1', 'y'), 2), (('f2', 'x'), 0), (('f2', 'y'), 0), (('f2', 'z'), 0))), ('b', ((('f1', 'x'), 3), (('f1', 'y'), 4), (('f2', 'x'), 4), (('f2', 'y'), 5), (('f2', 'z'), 6))), ('c', ((('f1', 'x'), 0), (('f1', 'y'), 0), (('f2', 'x'), 1), (('f2', 'y'), 2), (('f2', 'z'), 3)))))
def test_batch_ufunc_shape_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2), name='foo').cumsum() f1 = Frame.from_concat_items(b1.items(), fill_value=0) self.assertEqual( f1.to_pairs(0), (('a', ((('f1', 'x'), 1), (('f1', 'y'), 3), (('f2', 'x'), 0), (('f2', 'y'), 0), (('f2', 'z'), 0))), ('b', ((('f1', 'x'), 3), (('f1', 'y'), 7), (('f2', 'x'), 4), (('f2', 'y'), 9), (('f2', 'z'), 15))), ('c', ((('f1', 'x'), 0), (('f1', 'y'), 0), (('f2', 'x'), 1), (('f2', 'y'), 3), (('f2', 'z'), 6)))))