Пример #1
0
    def test_table_info(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.append_ctable('/table', self.col_data)

        chk_dict = {
            'col_dtype': {
                't_int': 'i',
                't_float': 'f',
                't_str': 's5',
                't_obj': 'o7',
                't_bytes': 'o12',
                't_comp': 's200'
            },
            'col_flavor': {
                't_int': 'python',
                't_float': 'python',
                't_str': 'python',
                't_obj': 'python',
                't_bytes': 'python',
                't_comp': 'python'
            },
            'num_rows': 1000
        }
        info = h.table_info('/table')
        self.assertDictEqual(info['col_dtype'], chk_dict['col_dtype'])
        self.assertDictEqual(info['col_flavor'], chk_dict['col_flavor'])
        self.assertEqual(info['num_rows'], 1000)
Пример #2
0
    def test_delcol(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.append_ctable('/table', {'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']})
        table_info = h.table_info('/table')
        self.assertDictEqual(
            table_info, {
                'col_dtype': {
                    'col1': 'i',
                    'col2': 's1'
                },
                'col_flavor': {
                    'col1': 'python',
                    'col2': 'python'
                },
                'num_rows': 3,
                '_version': VERSION
            })
        h.delete_column('/table', 'col1')
        table_info = h.table_info('/table')
        self.assertSequenceEqual(
            table_info, {
                'col_dtype': {
                    'col2': 's1'
                },
                'col_flavor': {
                    'col2': 'python'
                },
                'num_rows': 3,
                '_version': VERSION
            })
Пример #3
0
    def test_table_safe_expand(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))

        dt = {'t_comp': 'o100'}
        h.create_ctable('/table', col_dtypes=dt)

        d1 = [('hello', ), ('world', 0)]
        h.append_ctable('/table', {'t_comp': d1}, resize=True)
        # read data
        b = h.read_ctable('/table')
        self.assertListEqual(b['t_comp'], d1)
        # read info
        info = h.table_info('/table')
        self.assertEqual(info['col_dtype']['t_comp'], 'o100')

        # write data bigger than
        d2 = [('hello' * 50), '1']
        h.append_ctable('/table', {'t_comp': d2}, resize=True)
        b = h.read_ctable('/table')
        self.assertListEqual(b['t_comp'], d1 + d2)

        # ensure datatypes attribute was updated
        with h.open() as h5:
            node = h5.get_node('/table/t_comp')
            m = re.search(r'S(\d+)', str(node.dtype))
            new_len = int(m.group(1))
        info = h.table_info('/table')
        self.assertEqual(info['col_dtype']['t_comp'], f'o{new_len}')
Пример #4
0
    def test_append_without_create_dtypes(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.append_ctable('/table', self.col_data)

        info = h.table_info('/table')
        self.assertDictEqual(
            info['col_dtype'], {
                't_int': 'i',
                't_float': 'f',
                't_str': 's5',
                't_obj': 'o7',
                't_bytes': 'o12',
                't_comp': 's200'
            })

        h.append_ctable('/table1', self.col_data, col_dtypes=self.dt)
        info = h.table_info('/table1')
        self.assertDictEqual(
            info['col_dtype'], {
                't_int': 'i8',
                't_float': 'n',
                't_str': 's100',
                't_obj': 'o500',
                't_bytes': 'o40',
                't_comp': 'c100'
            })

        b = h.read_ctable('/table1')
        for k, v in self.col_data.items():
            for i, x in enumerate(b[k]):
                self.assertEqual(x, v[i])
Пример #5
0
    def test_select_inds(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        tmp_data = {'c1': [], 'c2': []}
        for i in range(100):
            tmp_data['c1'].append(f'h{i}')
            tmp_data['c2'].append(i)

        h.append_ctable('/table', tmp_data)

        b = h.read_ctable('/table')
        self.assertDictEqual(tmp_data, b)

        b1 = h.read_ctable('/table', inds=list(range(10, 34)))
        self.assertListEqual(b1['c1'], tmp_data['c1'][10:34])
        self.assertListEqual(b1['c2'], tmp_data['c2'][10:34])

        # read outside range
        with self.assertRaises(IndexError) as e:
            b1 = h.read_ctable('/table', inds=list(range(90, 105)))

        # read out of order
        b1 = h.read_ctable('/table', inds=[10, 2, 78])
        self.assertDictEqual(b1, {
            'c2': [10, 2, 78],
            'c1': ['h10', 'h2', 'h78']
        })

        # read limited columns
        b1 = h.read_ctable('/table', inds=[11, 3, 78], cols=['c2'])
        self.assertDictEqual(b1, {'c2': [11, 3, 78]})
Пример #6
0
    def test_makedir_path(self):

        new_file = os.path.join(self.tmp_dir, 'level1', 'level2', 'level3',
                                'abc.h5')
        h5 = H5ColStore(new_file)
        h5.create_ctable('myoobj', col_dtypes={'col1': 'f'})
        self.assertTrue(os.path.exists(new_file))
Пример #7
0
    def test_table_attrs_write(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.create_ctable('/table', col_dtypes=self.dt)
        attrs = h.table_info('/table')
        self.assertDictEqual(attrs['col_dtype'], self.dt)

        with h.open() as h5:
            h._write_attrs(h5, '/table', 'hello', 'world')
        attrs = h.table_info('/table')
        self.assertDictEqual(attrs['col_dtype'], self.dt)
        self.assertEqual(attrs['hello'], 'world')

        # overwrite
        with h.open() as h5:
            h._write_attrs(h5, '/table', 'hello', 'yolo')
        attrs = h.table_info('/table')
        self.assertSetEqual(set(attrs.keys()),
                            {'col_dtype', 'hello', 'num_rows', '_version'})
        self.assertDictEqual(attrs['col_dtype'], self.dt)
        self.assertEqual(attrs['hello'], 'yolo')

        # get by name
        self.assertEqual(h.table_info('/table')['hello'], 'yolo')
Пример #8
0
    def test_repack_nonexist(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        path_name = os.path.join(self.tmp_dir, unique_filename)
        h = H5ColStore(path_name)
        with self.assertRaises(Exception):
            h.repack()
Пример #9
0
    def test_updates_single(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.append_ctable('/table', self.col_data)

        match = [
            ['col2', '==', 'e'],
        ]
        new_data = {'col1': [20]}
        h.update_ctable('/table', match, new_data)
        b = h.read_ctable('/table')
        self.assertListEqual(b['col1'], [0, 1, 2, 3, 20, 5, 6, 7, 8, 9])

        h.update_ctable('/table', match, new_data)
        new_data = {'col1': [22]}
        h.update_ctable('/table', match, new_data)
        b = h.read_ctable('/table')
        self.assertListEqual(b['col1'], [0, 1, 2, 3, 22, 5, 6, 7, 8, 9])

        new_data = {'col3': [('sweet', 'now')]}
        check = [('yolo', 'people') for _ in range(10)]
        check[4] = ('sweet', 'now')
        h.update_ctable('/table', match, new_data)
        b = h.read_ctable('/table')
        self.assertListEqual(b['col3'], check)
Пример #10
0
    def test_table_nocomp_fail(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))

        dt = {'t_comp': 'o100'}
        h.create_ctable('/table', col_dtypes=dt)
        with self.assertRaises(Exception):
            h.append_ctable('/table', {'t_comp': self.dcomp}, resize=False)
Пример #11
0
    def test_path(self):

        h5 = H5ColStore('abc.h5')
        self.assertEqual(h5._path('/hello', 'world'), '/hello/world')
        self.assertEqual(h5._path('hello', 'world'), '/hello/world')

        self.assertEqual(h5._path('/', 'world'), '/world')
        self.assertEqual(h5._path('/', '/world'), '/world')
        self.assertEqual(h5._path('', 'world'), '/world')
Пример #12
0
    def test_delete_rows_no_match(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.append_ctable('/table', self.col_data)

        match = ('col1', '==', 15)
        x = h.delete_rows('/table', query=match)
        self.assertIsNone(x)
Пример #13
0
 def test_convert_obj_compress(self):
     h5 = H5ColStore('a.h5')
     d = h5._convert_data([['a'], ['b']], 'c100')
     expect = [
         b'\x02\x01\x13\x08\x03\x00\x00\x00\x01\x00\x00\x00\x13\x00\x00\x00\x91\xa1a1',
         b'\x02\x01\x13\x08\x03\x00\x00\x00\x01\x00\x00\x00\x13\x00\x00\x00\x91\xa1b1'
     ]
     self.assertIsInstance(d, np.ndarray)
     self.assertListEqual(d.tolist(), expect)
Пример #14
0
    def test_col_same_length_exception(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        col_data = {
            'col1': list(range(10)),
            'col2': ['a', 'a', 'c', 'd', 'e', 'f', 'g', 'g', 'f']
        }
        with self.assertRaises(Exception):
            h.append_ctable('/table', col_data)
Пример #15
0
    def test_matrix_inds(self):

        new_file = os.path.join(self.tmp_dir, 'matrixtest.h5')
        h5 = H5ColStore(new_file)
        m = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])

        h5.append_ctable('/m1', col_data={'col1': m}, col_dtypes={'col1': 'f'})

        x = h5.read_ctable('/m1', inds=[1, 2])['col1']
        self.assertTrue(np.sum(np.sum(x - m[1:, :])) == 0)
Пример #16
0
    def test_addcol(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))

        with self.assertRaises(Exception):
            h.add_column('/table', 'new_col', [1 for _ in range(1000)])

        h.append_ctable('/table', {'init_col': [1 for _ in range(1000)]})

        with self.assertRaises(Exception):
            h.add_column('/table', 'new_col', [1 for _ in range(999)])

        info = h.table_info('/table')
        self.assertDictEqual(
            info, {
                'col_dtype': {
                    'init_col': 'i'
                },
                'col_flavor': {
                    'init_col': 'python'
                },
                'num_rows': 1000,
                '_version': VERSION
            })

        for col_name, col_data in self.col_data.items():
            h.add_column('/table', col_name, col_data)
        info = h.table_info('/table')

        chk = {
            'col_dtype': {
                'init_col': 'i',
                't_int': 'i',
                't_float': 'f',
                't_str': 's5',
                't_obj': 'o7',
                't_bytes': 'o12',
                't_comp': 's200'
            },
            'col_flavor': {
                'init_col': 'python',
                't_int': 'python',
                't_float': 'python',
                't_str': 'python',
                't_obj': 'python',
                't_bytes': 'python',
                't_comp': 'python'
            },
            'num_rows': 1000,
            '_version': VERSION
        }
        self.assertDictEqual(info, chk)
Пример #17
0
    def test_column_matrix(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        data = np.random.rand(10, 3)
        with h.open(mode='a') as h5:
            h._create_column(h5, '/mytable/testcol', data=data)

        with h.open(mode='r') as h5:
            n = h5.get_node('/mytable/testcol')
            self.assertEqual(str(n.dtype), 'float64')
            self.assertTupleEqual(n.shape, (10, 3))
Пример #18
0
    def test_matrix_initappend(self):

        new_file = os.path.join(self.tmp_dir, 'matrixtest.h5')
        h5 = H5ColStore(new_file)
        a = np.random.rand(2, 4)
        b = np.random.rand(3, 4)

        h5.append_ctable('/m1', col_data={'col1': a}, col_dtypes={'col1': 'f'})
        h5.append_ctable('/m1', col_data={'col1': b}, col_dtypes={'col1': 'f'})
        x = h5.read_ctable('/m1')['col1']
        chk_mat = np.vstack((a, b))
        self.assertTrue(np.sum(np.sum(x - chk_mat)) == 0)
Пример #19
0
    def _run_test(self, data, typecheck, size):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))

        with h.open() as h5:
            h._create_column(h5, '/data', data=data)

        with h.open() as h5:
            a = h5.get_node('/data')
            self.assertEqual(a.dtype, typecheck)
            self.assertEqual(len(a), size)
Пример #20
0
    def test_delete_single_item(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.append_ctable('/table', {'col1': ['abc'], 'col2': [1]})

        b = h.read_ctable('/table')
        self.assertDictEqual(b, {'col1': ['abc'], 'col2': [1]})

        match = [['col1', '==', 'abc']]
        h.delete_rows('/table', query=match)
        b = h.read_ctable('/table')
        self.assertDictEqual(b, {'col1': [], 'col2': []})
Пример #21
0
    def test_list_return(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.create_ctable('/table', col_dtypes=self.dt)
        h.append_ctable('/table', self.col_data)

        simpah5_attrs = h.table_info('/table')
        self.assertSetEqual(set(simpah5_attrs['col_flavor'].values()),
                            {'python'})

        b = h.read_ctable('/table')
        for k, v in b.items():
            self.assertIsInstance(v, list)
Пример #22
0
    def test_remove(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.append_ctable('/table', self.col_data)
        _ = h.table_info('/table')
        h.delete_ctable('/table')
        info = h.table_info('/table')
        self.assertEqual(len(info), 0)

        # check for delete exceptions
        self.assertIsNone(h.delete_ctable('/table'))
        with self.assertRaises(tb.NoSuchNodeError):
            h.delete_ctable('/table', raise_exception=True)
Пример #23
0
    def test_table_all_vals(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.create_ctable('/table', col_dtypes=self.dt)

        # check read right after create
        b = h.read_ctable('/table')
        self.assertSetEqual(set(list(b.keys())), set(list(self.dt.keys())))

        h.append_ctable('/table', self.col_data)

        b = h.read_ctable('/table')
        for k, v in self.col_data.items():
            for i, x in enumerate(b[k]):
                self.assertEqual(x, v[i])
Пример #24
0
    def test_ndarray_return(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.create_ctable('/table', col_dtypes=self.dt)
        tmp_data = {}
        for k, v in self.col_data.items():
            tmp_data[k] = np.array(v)

        h.append_ctable('/table', tmp_data)

        b = h.read_ctable('/table')
        for k, v in b.items():
            # ignore packed columns
            if not re.match(r'[oc]', self.dt[k]):
                self.assertIsInstance(v, np.ndarray)
Пример #25
0
    def test_updates_exception(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.append_ctable('/table', self.col_data)

        match = [
            ['col2', '==', 'a'],
        ]
        new_data = {'col1': [21, 22, 23]}
        with self.assertRaises(Exception):
            h.update_ctable('/table', match, new_data)

        match = [
            [('col2', '==', 'a'), ('col2', '==', 'g')],
        ]
        new_data = {'col1': [21, 22]}
        with self.assertRaises(Exception):
            h.update_ctable('/table', match, new_data)
Пример #26
0
    def test_updates_multi(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.append_ctable('/table', self.col_data)

        match = [
            ['col2', '==', 'a'],
        ]
        new_data = {'col1': [20]}
        h.update_ctable('/table', match, new_data)
        b = h.read_ctable('/table')
        self.assertListEqual(b['col1'], [20, 20, 2, 3, 4, 5, 6, 7, 8, 9])

        new_data = {'col1': [21, 22], 'col2': ['w', 'z']}
        h.update_ctable('/table', match, new_data)
        b = h.read_ctable('/table')
        self.assertListEqual(b['col1'], [21, 22, 2, 3, 4, 5, 6, 7, 8, 9])
        self.assertListEqual(
            b['col2'], ['w', 'z', 'c', 'd', 'e', 'f', 'g', 'g', 'f', 'c'])
Пример #27
0
    def test_delete_rows1(self):
        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        h.append_ctable('/table', self.col_data)

        h.delete_rows('/table', rows=[2, 5, 6])
        b = h.read_ctable('/table')
        self.assertListEqual(b['col1'], [0, 1, 7, 3, 4, 8, 9])
        self.assertListEqual(b['col2'], ['a', 'a', 'g', 'd', 'e', 'f', 'c'])

        match = [['col2', '==', 'a']]
        h.delete_rows('/table', query=match)
        b = h.read_ctable('/table')
        self.assertDictEqual(b, {
            'col1': [8, 9, 7, 3, 4],
            'col2': ['f', 'c', 'g', 'd', 'e']
        })

        match = [[('col2', '==', 'd'), ('col2', '==', 'g')]]
        h.delete_rows('/table', query=match)
        b = h.read_ctable('/table')
        self.assertDictEqual(b, {'col1': [8, 9, 4], 'col2': ['f', 'c', 'e']})
Пример #28
0
    def test_growth_rand_repack(self):
        unique_filename = str(uuid.uuid4()) + '.h5'
        path_name = os.path.join(self.tmp_dir, unique_filename)
        h = H5ColStore(path_name)

        nloops = 200
        for x in range(nloops):
            news = ''.join([f'{np.random.randint(10)}'])
            data = {'t_comp': [np.random.rand()], 't_str': [news]}

            h.append_ctable('/table', col_data=data, resize=True)

            sz = os.stat(path_name).st_size

            # read data
            b = h.read_ctable('/table')
            self.assertEqual(len(b['t_comp']), x+1)
            self.assertEqual(len(b['t_str']), x+1)

        self.assertTrue(sz > 100000)
        h.repack()
        sz = os.stat(path_name).st_size
        self.assertTrue(sz < 15000)
Пример #29
0
    def test_write_wrong_data(self):

        unique_filename = str(uuid.uuid4()) + '.h5'
        h = H5ColStore(os.path.join(self.tmp_dir, unique_filename))
        col_data = {
            'col1': [1, 2, 3],
            'col3': [[1, 2], ['3', '4'], [5, '6']],
            'col2': ['a', 'b', 'c'],
        }
        h.append_ctable('/table', col_data)
        info = h.table_info('/table')
        self.assertDictEqual(info['col_dtype'], {
            'col1': 'i',
            'col2': 's1',
            'col3': 'o6'
        })

        # setup incorrect data type on col2
        new_col_data = {
            'col1': [4.],
            # write incorrect data to col2
            'col2': [[
                5,
            ]],
            'col3': [[
                7,
            ]],
        }
        with self.assertRaises(Exception):
            h.append_ctable('/table', new_col_data)

        info = h.table_info('/table')
        data = h.read_ctable('/table')
        self.assertEqual(info['num_rows'], 3)
        self.assertListEqual(data['col1'], [1, 2, 3])
        self.assertListEqual(data['col2'], ['a', 'b', 'c'])
        self.assertListEqual(data['col3'], [(1, 2), ('3', '4'), (5, '6')])
Пример #30
0
    def test_growth_size_limit(self):
        unique_filename = str(uuid.uuid4()) + '.h5'
        path_name = os.path.join(self.tmp_dir, unique_filename)
        h = H5ColStore(path_name)

        nloops = 200
        for x in range(nloops):
            data = {'t_comp': [1], 't_str': ['hello']}

            h.append_ctable('/table', col_data=data, resize=True)

            sz = os.stat(path_name).st_size

            # with pytables 4.3.2 the following grows to 28000 with nloops=200!!
            self.assertTrue(sz < 11000)

            # read data
            b = h.read_ctable('/table')
            self.assertListEqual(b['t_comp'], [1] * (x+1))
            self.assertListEqual(b['t_str'], ['hello'] * (x+1))

        h.repack()
        sz = os.stat(path_name).st_size
        self.assertTrue(sz < 11000)