def testLoad(self): ''' Creates 1..n files depending upon the number of arkouda_server locales with three columns AKA datasets, loads each corresponding dataset and confirms each corresponding pdarray equals the input pdarray. :return: None :raise: AssertionError if the input and returned datasets (pdarrays) don't match ''' self._create_file(columns=self.dict_columns, path_prefix='{}/iotest_dict_columns'.format( IOTest.io_test_dir)) result_array_tens = ak.load( path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir), dataset='int_tens_pdarray') result_array_hundreds = ak.load( path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir), dataset='int_hundreds_pdarray') result_array_float = ak.load( path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir), dataset='float_pdarray') self.assertEqual(self.int_tens_pdarray.all(), result_array_tens.all()) self.assertEqual(self.int_hundreds_pdarray.all(), result_array_hundreds.all()) self.assertEqual(self.float_pdarray.all(), result_array_float.all())
def testAppendMixedStringsDataset(self): strings_array = ak.array( ['string {}'.format(num) for num in list(range(0, 25))]) strings_array.save('{}/append-multi-type-test'.format( IOTest.io_test_dir), dataset='m_strings') m_floats = ak.array([x / 10.0 for x in range(0, 10)]) m_ints = ak.array(list(range(0, 10))) ak.save_all({ 'm_floats': m_floats, 'm_ints': m_ints }, '{}/append-multi-type-test'.format(IOTest.io_test_dir), mode='append') r_mixed = ak.load_all('{}/append-multi-type-test'.format( IOTest.io_test_dir)) self.assertIsNotNone(r_mixed['m_floats']) self.assertIsNotNone(r_mixed['m_ints']) r_floats = ak.sort( ak.load('{}/append-multi-type-test'.format(IOTest.io_test_dir), dataset='m_floats')) r_ints = ak.sort( ak.load('{}/append-multi-type-test'.format(IOTest.io_test_dir), dataset='m_ints')) self.assertTrue((m_floats == r_floats).all()) self.assertTrue((m_ints == r_ints).all()) strings = strings_array.to_ndarray() strings.sort() r_strings = r_mixed['m_strings'].to_ndarray() r_strings.sort() self.assertTrue((strings == r_strings).all())
def testAppendStringsDataset(self): strings_array = ak.array(['string {}'.format(num) for num in list(range(0,25))]) strings_array.save('{}/append-strings-test'.format(IOTest.io_test_dir), dataset='strings') strings_array.save('{}/append-strings-test'.format(IOTest.io_test_dir), dataset='strings-dupe', mode='append') r_strings = ak.load('{}/append-strings-test'.format(IOTest.io_test_dir), dataset='strings') r_strings_dupe = ak.load('{}/append-strings-test'.format(IOTest.io_test_dir), dataset='strings-dupe') self.assertTrue((r_strings == r_strings_dupe).all())
def testSmallStringArrayToHDF5(self): a1 = ak.array(["ab", "cd"]) with tempfile.TemporaryDirectory( dir=IOTest.io_test_dir) as tmp_dirname: a1.save(f"{tmp_dirname}/small_string_array", dataset="a1") # Now load it back in a2 = ak.load(f"{tmp_dirname}/small_string_array", dataset="a1") self.assertEqual(str(a1), str(a2)) # Test a single string b1 = ak.array(["123456789"]) with tempfile.TemporaryDirectory( dir=IOTest.io_test_dir) as tmp_dirname: b1.save(f"{tmp_dirname}/single_string", dataset="b1") # Now load it back in b2 = ak.load(f"{tmp_dirname}/single_string", dataset="b1") self.assertEqual(str(b1), str(b2))
def testSmallArrayToHDF5(self): a1 = ak.array([1]) with tempfile.TemporaryDirectory( dir=IOTest.io_test_dir) as tmp_dirname: a1.save(f"{tmp_dirname}/small_numeric", dataset="a1") # Now load it back in a2 = ak.load(f"{tmp_dirname}/small_numeric", dataset="a1") self.assertEqual(str(a1), str(a2))
def testLoad(self): ''' Creates 1..n files depending upon the number of arkouda_server locales with three columns AKA datasets, loads each corresponding dataset and confirms each corresponding pdarray equals the input pdarray. :return: None :raise: AssertionError if the input and returned datasets (pdarrays) don't match ''' self._create_file(columns=self.dict_columns, prefix_path='{}/iotest_dict_columns'.format( IOTest.io_test_dir)) result_array_tens = ak.load( path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir), dataset='int_tens_pdarray') result_array_hundreds = ak.load( path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir), dataset='int_hundreds_pdarray') result_array_floats = ak.load( path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir), dataset='float_pdarray') result_array_bools = ak.load( path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir), dataset='bool_pdarray') ratens = result_array_tens.to_ndarray() ratens.sort() rahundreds = result_array_hundreds.to_ndarray() rahundreds.sort() rafloats = result_array_floats.to_ndarray() rafloats.sort() self.assertTrue((self.int_tens_ndarray == ratens).all()) self.assertTrue((self.int_hundreds_ndarray == rahundreds).all()) self.assertTrue((self.float_ndarray == rafloats).all()) self.assertEqual(len(self.bool_pdarray), len(result_array_bools)) # Test load with invalid prefix with self.assertRaises(RuntimeError) as cm: ak.load(path_prefix='{}/iotest_dict_column'.format( IOTest.io_test_dir), dataset='int_tens_pdarray') self.assertIn( 'either corresponds to files inaccessible to Arkouda or files of an invalid format', cm.exception.args[0].args[0]) # Test load with invalid file with self.assertRaises(RuntimeError) as cm: ak.load(path_prefix='{}/not-a-file'.format(IOTest.io_test_dir), dataset='int_tens_pdarray') cm.exception.args[0] self.assertIn('is not an HDF5 file', cm.exception.args[0].args[0])
def testInternalVersions(self): """ Test loading internal arkouda hdf5 structuring by loading v0 and v1 files. v1 contains _arkouda_metadata group and attributes, v0 does not. Files are located under `test/resources` ... where server-side unit tests are located. """ # Note: pytest unit tests are located under "tests/" vs chapel "test/" # The test files are located in the Chapel `test/resources` directory # Determine where the test was launched by inspecting our path and update it accordingly cwd = os.getcwd() if cwd.endswith( "tests"): # IDEs may launch unit tests from this location cwd = cwd[:-1] + "/resources" else: # assume arkouda root dir cwd += "/test/resources" # Now that we've figured out our loading path, load the files and test the lengths v0 = ak.load(cwd + "/array_v0.hdf5") v1 = ak.load(cwd + "/array_v1.hdf5") self.assertEqual(50, v0.size) self.assertEqual(50, v1.size)
def testSaveLongStringsDataset(self): # Create, save, and load Strings dataset strings = ak.array(['testing a longer string{} to be written, loaded and appended'.\ format(num) for num in list(range(0,26))]) strings.save('{}/strings-test'.format(IOTest.io_test_dir), dataset='strings') n_strings = strings.to_ndarray() n_strings.sort() r_strings = ak.load('{}/strings-test'.format(IOTest.io_test_dir), dataset='strings').to_ndarray() r_strings.sort() self.assertTrue((n_strings == r_strings).all())
def testUint64ToFromHDF5(self): """ Test our ability to read/write uint64 to HDF5 """ npa1 = np.array( [18446744073709551500, 18446744073709551501, 18446744073709551502], dtype=np.uint64) pda1 = ak.array(npa1) with tempfile.TemporaryDirectory( dir=IOTest.io_test_dir) as tmp_dirname: pda1.save(f"{tmp_dirname}/small_numeric", dataset="pda1") # Now load it back in pda2 = ak.load(f"{tmp_dirname}/small_numeric", dataset="pda1") self.assertEqual(str(pda1), str(pda2)) self.assertEqual(18446744073709551500, pda2[0]) self.assertTrue((pda2.to_ndarray() == npa1).all())
def testStringsWithoutOffsets(self): """ This tests both saving & reading a strings array without saving and reading the offsets to HDF5. Instead the offsets array will be derived from the values/bytes area by looking for null-byte terminator strings """ strings_array = ak.array( ['testing string{}'.format(num) for num in list(range(0, 25))]) strings_array.save('{}/strings-test'.format(IOTest.io_test_dir), dataset='strings', save_offsets=False) r_strings_array = ak.load('{}/strings-test'.format(IOTest.io_test_dir), dataset='strings', calc_string_offsets=True) strings = strings_array.to_ndarray() strings.sort() r_strings = r_strings_array.to_ndarray() r_strings.sort() self.assertTrue((strings == r_strings).all())
def testSaveStringsDataset(self): # Create, save, and load Strings dataset strings_array = ak.array( ['testing string{}'.format(num) for num in list(range(0, 25))]) strings_array.save('{}/strings-test'.format(IOTest.io_test_dir), dataset='strings') r_strings_array = ak.load('{}/strings-test'.format(IOTest.io_test_dir), dataset='strings') strings = strings_array.to_ndarray() strings.sort() r_strings = r_strings_array.to_ndarray() r_strings.sort() self.assertTrue((strings == r_strings).all()) # Read a part of a saved Strings dataset from one hdf5 file r_strings_subset = ak.read_all(filenames='{}/strings-test_LOCALE0000'.\ format(IOTest.io_test_dir)) self.assertIsNotNone(r_strings_subset) self.assertTrue(isinstance(r_strings_subset[0], str)) self.assertIsNotNone(ak.read_hdf(filenames='{}/strings-test_LOCALE0000'.\ format(IOTest.io_test_dir), dsetName='strings/values')) self.assertIsNotNone(ak.read_hdf(filenames='{}/strings-test_LOCALE0000'.\ format(IOTest.io_test_dir), dsetName='strings/segments')) # Repeat the test using the calc_string_offsets=True option to have server calculate offsets array r_strings_subset = ak.read_all( filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000', calc_string_offsets=True) self.assertIsNotNone(r_strings_subset) self.assertTrue(isinstance(r_strings_subset[0], str)) self.assertIsNotNone( ak.read_hdf( filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000', dsetName='strings/values', calc_string_offsets=True)) self.assertIsNotNone( ak.read_hdf( filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000', dsetName='strings/segments', calc_string_offsets=True))