示例#1
0
    def testReadHdf(self):
        '''
        Creates 2..n files depending upon the number of arkouda_server locales with two
        files each containing different-named datasets with the same pdarrays, reads the files
        with an explicit list of file names to the read_hdf method, and confirms the dataset 
        was returned correctly.

        :return: None
        :raise: AssertionError if the input and returned datasets don't match
        '''
        self._create_file(columns=self.dict_single_column, 
                          prefix_path='{}/iotest_single_column'.format(IOTest.io_test_dir))
        self._create_file(columns=self.dict_single_column, 
                          prefix_path='{}/iotest_single_column_dupe'.format(IOTest.io_test_dir))
        
        dataset = ak.read_hdf(dsetName='int_tens_pdarray', 
                    filenames=['{}/iotest_single_column_LOCALE0'.format(IOTest.io_test_dir),
                               '{}/iotest_single_column_dupe_LOCALE0'.format(IOTest.io_test_dir)])
        self.assertIsNotNone(dataset)

        with self.assertRaises(RuntimeError) as cm:
            ak.read_hdf(dsetName='in_tens_pdarray', 
                    filenames=['{}/iotest_single_column_LOCALE0'.format(IOTest.io_test_dir),
                               '{}/iotest_single_column_dupe_LOCALE0'.format(IOTest.io_test_dir)])       
        self.assertTrue('Error: The dataset in_tens_pdarray does not exist in the file' in  
                         cm.exception.args[0])
        
        with self.assertRaises(RuntimeError) as cm:
            ak.read_hdf(dsetName='int_tens_pdarray', 
                    filenames=['{}/iotest_single_colum_LOCALE0'.format(IOTest.io_test_dir),
                               '{}/iotest_single_colum_dupe_LOCALE0'.format(IOTest.io_test_dir)])       
        self.assertTrue('iotest_single_colum_LOCALE0 not found' in  cm.exception.args[0])
示例#2
0
    def testSaveStringsDataset(self):
        # Create, save, and load Strings dataset
        strings_array = ak.array(
            ['testing string{}'.format(num) for num in list(range(0, 25))])
        strings_array.save('{}/strings-test'.format(IOTest.io_test_dir),
                           dataset='strings')
        r_strings_array = ak.load('{}/strings-test'.format(IOTest.io_test_dir),
                                  dataset='strings')

        strings = strings_array.to_ndarray()
        strings.sort()
        r_strings = r_strings_array.to_ndarray()
        r_strings.sort()
        self.assertTrue((strings == r_strings).all())

        # Read a part of a saved Strings dataset from one hdf5 file
        r_strings_subset = ak.read_all(filenames='{}/strings-test_LOCALE0000'.\
                                    format(IOTest.io_test_dir))
        self.assertIsNotNone(r_strings_subset)
        self.assertTrue(isinstance(r_strings_subset[0], str))
        self.assertIsNotNone(ak.read_hdf(filenames='{}/strings-test_LOCALE0000'.\
                            format(IOTest.io_test_dir), dsetName='strings/values'))
        self.assertIsNotNone(ak.read_hdf(filenames='{}/strings-test_LOCALE0000'.\
                            format(IOTest.io_test_dir), dsetName='strings/segments'))

        # Repeat the test using the calc_string_offsets=True option to have server calculate offsets array
        r_strings_subset = ak.read_all(
            filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000',
            calc_string_offsets=True)
        self.assertIsNotNone(r_strings_subset)
        self.assertTrue(isinstance(r_strings_subset[0], str))
        self.assertIsNotNone(
            ak.read_hdf(
                filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000',
                dsetName='strings/values',
                calc_string_offsets=True))
        self.assertIsNotNone(
            ak.read_hdf(
                filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000',
                dsetName='strings/segments',
                calc_string_offsets=True))
示例#3
0
    def testReadHdfWithGlob(self):
        '''
        Creates 2..n files depending upon the number of arkouda_server locales with two
        files each containing different-named datasets with the same pdarrays, reads the files
        with the glob feature of the read_hdf method, and confirms the datasets and embedded 
        pdarrays match the input dataset and pdarrays

        :return: None
        :raise: AssertionError if the input and returned datasets don't match
        '''
        self._create_file(columns=self.dict_single_column, 
                          prefix_path='{}/iotest_single_column'.format(IOTest.io_test_dir))
        self._create_file(columns=self.dict_single_column, 
                          prefix_path='{}/iotest_single_column_dupe'.format(IOTest.io_test_dir))
        
        dataset = ak.read_hdf(dsetName='int_tens_pdarray', 
                    filenames='{}/iotest_single_column*'.format(IOTest.io_test_dir))
        self.assertEqual(self.int_tens_pdarray.all(), dataset.all())
示例#4
0
    def testReadHdf(self):
        '''
        Creates 2..n files depending upon the number of arkouda_server locales with two
        files each containing different-named datasets with the same pdarrays, reads the files
        with an explicit list of file names to the read_hdf method, and confirms the dataset 
        was returned correctly.

        :return: None
        :raise: AssertionError if the input and returned datasets don't match
        '''
        self._create_file(columns=self.dict_single_column,
                          path_prefix='/tmp/iotest_single_column')
        self._create_file(columns=self.dict_single_column,
                          path_prefix='/tmp/iotest_single_column_dupe')

        dataset = ak.read_hdf(dsetName='int_tens_pdarray',
                              filenames=[
                                  '/tmp/iotest_single_column_LOCALE0',
                                  '/tmp/iotest_single_column_dupe_LOCALE0'
                              ])
        self.assertIsNotNone(dataset)