def testSeededRNG(self): N = 100 seed = 8675309 numericdtypes = [ak.int64, ak.float64, ak.bool] for dt in numericdtypes: # Make sure unseeded runs differ a = ak.randint(0, 2**32, N, dtype=dt) b = ak.randint(0, 2**32, N, dtype=dt) self.assertFalse((a == b).all()) # Make sure seeded results are same a = ak.randint(0, 2**32, N, dtype=dt, seed=seed) b = ak.randint(0, 2**32, N, dtype=dt, seed=seed) self.assertTrue((a == b).all()) # Uniform self.assertFalse((ak.uniform(N) == ak.uniform(N)).all()) self.assertTrue((ak.uniform(N, seed=seed) == ak.uniform(N, seed=seed)).all()) # Standard Normal self.assertFalse((ak.standard_normal(N) == ak.standard_normal(N)).all()) self.assertTrue((ak.standard_normal(N, seed=seed) == ak.standard_normal(N, seed=seed)).all()) # Strings (uniformly distributed length) self.assertFalse((ak.random_strings_uniform(1, 10, N) == ak.random_strings_uniform(1, 10, N)).all()) self.assertTrue((ak.random_strings_uniform(1, 10, N, seed=seed) == ak.random_strings_uniform(1, 10, N, seed=seed)).all()) # Strings (log-normally distributed length) self.assertFalse((ak.random_strings_lognormal(2, 1, N) == ak.random_strings_lognormal(2, 1, N)).all()) self.assertTrue((ak.random_strings_lognormal(2, 1, N, seed=seed) == ak.random_strings_lognormal(2, 1, N, seed=seed)).all())
def test_string_registration_suite(self): cleanup() # Initial registration should set name keep = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') self.assertTrue(keep.register("keep_me").name == "keep_me") self.assertTrue(keep.is_registered(), "Expected Strings object to be registered") # Register a second time to confirm name change self.assertTrue(keep.register("kept").name == "kept") self.assertTrue(keep.is_registered(), "Object should be registered with updated name") # Add an item to discard, confirm our registered item remains and discarded item is gone discard = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') ak.clear() self.assertTrue(keep.name == "kept") with self.assertRaises( RuntimeError, msg="discard was not registered and should be discarded"): str(discard) # Unregister, should remain usable until we clear keep.unregister() str(keep) # Should not cause error self.assertFalse(keep.is_registered(), "This item should no longer be registered") ak.clear() with self.assertRaises( RuntimeError, msg="keep was unregistered and should be cleared"): str(keep) # should cause RuntimeError # Test attach functionality s1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') self.assertTrue( s1.register("uut").is_registered(), "uut should be registered") s1 = None self.assertTrue(s1 is None, "Reference should be cleared") s1 = ak.Strings.attach("uut") self.assertTrue(s1.is_registered(), "Should have re-attached to registered object") str( s1 ) # This will throw an exception if the object doesn't exist server-side # Test the Strings unregister by name using previously registered object ak.Strings.unregister_strings_by_name("uut") self.assertFalse(s1.is_registered(), "Expected object to be unregistered") cleanup()
def test_error_handling(self): stringsOne = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') stringsTwo = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') with self.assertRaises(TypeError) as cm: stringsOne.lstick(stringsTwo, delimiter=1) self.assertEqual( 'type of argument "delimiter" must be one of (bytes, str, str_); got int instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: stringsOne.lstick([1], 1) self.assertEqual( 'type of argument "other" must be arkouda.strings.Strings; got list instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: stringsOne.startswith(1) self.assertEqual( 'type of argument "substr" must be one of (bytes, str, str_); got int instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: stringsOne.endswith(1) self.assertEqual( 'type of argument "substr" must be one of (bytes, str, str_); got int instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: stringsOne.contains(1) self.assertEqual( 'type of argument "substr" must be one of (bytes, str, str_); got int instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: stringsOne.peel(1) self.assertEqual( 'type of argument "delimiter" must be one of (bytes, str, str_); got int instead', cm.exception.args[0]) with self.assertRaises(ValueError) as cm: stringsOne.peel("", -5) self.assertEqual('times must be >= 1', cm.exception.args[0])
def test_random_strings_uniform_with_seed(self): pda = ak.random_strings_uniform(minlen=1, maxlen=5, seed=1, size=10) self.assertTrue((ak.array([ 'TVKJ', 'EWAB', 'CO', 'HFMD', 'U', 'MMGT', 'N', 'WOQN', 'HZ', 'VSX' ]) == pda).all()) pda = ak.random_strings_uniform(minlen=1, maxlen=5, seed=1, size=10, characters='printable') self.assertTrue((ak.array([ '+5"f', '-P]3', '4k', '~HFF', 'F', '`,IE', 'Y', 'jkBa', '9(', '5oZ' ]) == pda).all())
def setUp(self): self.maxDiff = None ArkoudaTest.setUp(self) base_words1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable') gremlins = np.array(['"', ' ', '']) self.gremlins = ak.array(gremlins) self.base_words = ak.concatenate((base_words1, base_words2)) self.np_base_words = np.hstack( (base_words1.to_ndarray(), base_words2.to_ndarray())) choices = ak.randint(0, self.base_words.size, N) self.strings = self.base_words[choices] self.test_strings = self.strings.to_ndarray() self.cat = ak.Categorical(self.strings) x, w = tuple( zip(*Counter(''.join(self.base_words.to_ndarray())).items())) self.delim = self._get_delimiter(x, w, gremlins) self.akset = set(ak.unique(self.strings).to_ndarray()) self.gremlins_base_words = ak.concatenate( (self.base_words, self.gremlins)) self.gremlins_strings = ak.concatenate( (self.base_words[choices], self.gremlins)) self.gremlins_test_strings = self.gremlins_strings.to_ndarray() self.gremlins_cat = ak.Categorical(self.gremlins_strings)
def testSaveAndLoadCategoricalMulti(self): """ Test to build a pseudo dataframe with multiple categoricals, pdarrays, strings objects and successfully write/read it from HDF5 """ c1 = self._getCategorical(prefix="c1", size=51) c2 = self._getCategorical(prefix="c2", size=52) pda1 = ak.zeros(51) strings1 = ak.random_strings_uniform(9, 10, 52) with tempfile.TemporaryDirectory( dir=CategoricalTest.cat_test_base_tmp) as tmp_dirname: df = {"cat1": c1, "cat2": c2, "pda1": pda1, "strings1": strings1} ak.save_all(df, f"{tmp_dirname}/cat-save-test") x = ak.load_all(path_prefix=f"{tmp_dirname}/cat-save-test") self.assertTrue(len(x.items()) == 4) # Note assertCountEqual asserts a and b have the same elements in the same amount regardless of order self.assertCountEqual(x["cat1"].categories.to_ndarray().tolist(), c1.categories.to_ndarray().tolist()) self.assertCountEqual(x["cat2"].categories.to_ndarray().tolist(), c2.categories.to_ndarray().tolist()) self.assertCountEqual(x["pda1"].to_ndarray().tolist(), pda1.to_ndarray().tolist()) self.assertCountEqual(x["strings1"].to_ndarray().tolist(), strings1.to_ndarray().tolist())
def test_error_handling(self): stringsOne = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') stringsTwo = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') with self.assertRaises(TypeError) as cm: stringsOne.lstick(stringsTwo, delimiter=1) self.assertEqual('Delimiter must be a string, not int', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: stringsOne.lstick([1], 1) self.assertEqual('stick: not supported between String and list', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: stringsOne.startswith(1) self.assertEqual('Substring must be a string, not int', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: stringsOne.endswith(1) self.assertEqual('Substring must be a string, not int', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: stringsOne.contains(1) self.assertEqual('Substring must be a string, not int', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: stringsOne.peel(1) self.assertEqual('Delimiter must be a string, not int', cm.exception.args[0]) with self.assertRaises(ValueError) as cm: stringsOne.peel("", -5) self.assertEqual('Times must be >= 1', cm.exception.args[0])
def setUp(self): ArkoudaTest.setUp(self) base_words1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable') base_sas1 = ak.suffix_array(base_words1) base_sas2 = ak.suffix_array(base_words2) '''
def test_string_is_registered(self): """ Tests the Strings.is_registered() function """ keep = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') self.assertFalse(keep.is_registered()) keep.register('keep_me') self.assertTrue(keep.is_registered()) keep.unregister() self.assertFalse(keep.is_registered()) ak.clear()
def test_in_place_info(self): """ Tests the class level info method for pdarray, String, and Categorical """ cleanup() my_pda = ak.ones(10, ak.int64) self.assertFalse( any([sym['registered'] for sym in json.loads(my_pda.info())]), msg= 'no components of my_pda should be registered before register call' ) my_pda.register('my_pda') self.assertTrue( all([sym['registered'] for sym in json.loads(my_pda.info())]), msg= 'all components of my_pda should be registered after register call' ) my_str = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') self.assertFalse( any([sym['registered'] for sym in json.loads(my_str.info())]), msg= 'no components of my_str should be registered before register call' ) my_str.register('my_str') self.assertTrue( all([sym['registered'] for sym in json.loads(my_str.info())]), msg= 'all components of my_str should be registered after register call' ) my_cat = ak.Categorical(ak.array([f"my_cat {i}" for i in range(1, 11)])) self.assertFalse( any([sym['registered'] for sym in json.loads(my_cat.info())]), msg= 'no components of my_cat should be registered before register call' ) my_cat.register('my_cat') self.assertTrue( all([sym['registered'] for sym in json.loads(my_cat.info())]), msg= 'all components of my_cat should be registered after register call' ) cleanup()
def setUp(self): ArkoudaTest.setUp(self) base_words1 = ak.random_strings_uniform(0, 10, UNIQUE, characters='printable') base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable') self.base_words = ak.concatenate((base_words1, base_words2)) self.np_base_words = np.hstack( (base_words1.to_ndarray(), base_words2.to_ndarray())) choices = ak.randint(0, self.base_words.size, N) self.strings = self.base_words[choices] self.test_strings = self.strings.to_ndarray() self.cat = ak.Categorical(self.strings) x, w = tuple(zip(*Counter(''.join(self.base_words)).items())) self.delim = np.random.choice(x, p=(np.array(w) / sum(w)))
def test_string_is_registered(self): """ Tests the Strings.is_registered() function """ keep = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') self.assertFalse(keep.is_registered()) keep.register('keep_me') self.assertTrue(keep.is_registered()) keep.unregister() self.assertFalse(keep.is_registered()) # Now mess with one of the internal pieces to test is_registered() logic self.assertTrue( keep.register("uut").is_registered(), "Re-register keep as uut") ak.unregister_pdarray_by_name("uut.bytes") with self.assertRaises( RegistrationError, msg="Expected RegistrationError on mis-matched pieces"): keep.is_registered() ak.clear()
def setUp(self): self.maxDiff = None ArkoudaTest.setUp(self) base_words1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable') gremlins = ak.array(['"', ' ', '']) self.gremlins = gremlins self.base_words = ak.concatenate((base_words1, base_words2)) self.np_base_words = np.hstack((base_words1.to_ndarray(), base_words2.to_ndarray())) choices = ak.randint(0, self.base_words.size, N) self.strings = self.base_words[choices] self.test_strings = self.strings.to_ndarray() self.cat = ak.Categorical(self.strings) x, w = tuple(zip(*Counter(''.join(self.base_words.to_ndarray())).items())) self.delim = np.random.choice(x, p=(np.array(w)/sum(w))) self.akset = set(ak.unique(self.strings).to_ndarray()) self.gremlins_base_words = base_words = ak.concatenate((base_words1, base_words2, gremlins)) self.gremlins_strings = ak.concatenate((base_words[choices], gremlins)) self.gremlins_test_strings = self.gremlins_strings.to_ndarray() self.gremlins_cat = ak.Categorical(self.gremlins_strings) print("=================In Class will check===========================") print("") print(str(base_words1)) print("After base_word1 ") print("") print(str(self.strings)) print("After Print strings") print(str(self.test_strings)) print("") print("After Print teststrings") print(str(self.strings[N//3])) print("") print("After Print strings[N//3]") print(str(self.test_strings[N//3])) print("") print("After Print test_strings[N//3]")
def test_random_strings_uniform(self): pda = ak.random_strings_uniform(minlen=1, maxlen=5, size=100) nda = pda.to_ndarray() self.assertIsInstance(pda, ak.Strings) self.assertEqual(100, len(pda)) self.assertEqual(str, pda.dtype) for string in nda: self.assertTrue(len(string) >= 1 and len(string) <= 5) self.assertTrue(string.isupper()) pda = ak.random_strings_uniform(minlen=np.int64(1), maxlen=np.int64(5), size=np.int64(100)) nda = pda.to_ndarray() self.assertIsInstance(pda, ak.Strings) self.assertEqual(100, len(pda)) self.assertEqual(str, pda.dtype) for string in nda: self.assertTrue(len(string) >= 1 and len(string) <= 5) self.assertTrue(string.isupper()) with self.assertRaises(ValueError) as cm: ak.random_strings_uniform(maxlen=1, minlen=5, size=100) self.assertEqual( "Incompatible arguments: minlen < 0, maxlen < minlen, or size < 0", cm.exception.args[0]) with self.assertRaises(ValueError) as cm: ak.random_strings_uniform(maxlen=5, minlen=1, size=-1) self.assertEqual( "Incompatible arguments: minlen < 0, maxlen < minlen, or size < 0", cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.random_strings_uniform(minlen='1', maxlen=5, size=10) self.assertEqual( 'type of argument "minlen" must be one of (int, int64); got str instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.random_strings_uniform(minlen=1, maxlen='5', size=10) self.assertEqual( 'type of argument "maxlen" must be one of (int, int64); got str instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.random_strings_uniform(minlen=1, maxlen=5, size='10') self.assertEqual( 'type of argument "size" must be one of (int, int64); got str instead', cm.exception.args[0])
if __name__ == '__main__': import sys if len(sys.argv) > 1: ak.connect(server=sys.argv[1], port=sys.argv[2]) else: ak.connect() print("Running test from string_test.__main__") # with open(__file__, 'r') as f: # base_words = np.array(f.read().split()) # test_strings = np.random.choice(base_words, N, replace=True) # strings = ak.array(test_strings) base_words1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable') gremlins = ak.array(['"', ' ', '']) base_words = ak.concatenate((base_words1, base_words2)) np_base_words = np.hstack((base_words1.to_ndarray(), base_words2.to_ndarray())) assert(compare_strings(base_words.to_ndarray(), np_base_words)) choices = ak.randint(0, base_words.size, N) strings = base_words[choices] test_strings = strings.to_ndarray() cat = ak.Categorical(strings) print("strings =", strings) print("categorical =", cat) print("Generation and concatenate passed") # int index run_test_index(strings, test_strings, cat, range(-len(gremlins), 0))
def test_random_strings_uniform(self): pda = ak.random_strings_uniform(minlen=1, maxlen=10, size=100) self.assertIsInstance(pda, ak.Strings) self.assertEqual(100, len(pda)) self.assertEqual(str, pda.dtype) with self.assertRaises(ValueError) as cm: ak.random_strings_uniform(maxlen=1, minlen=5, size=100) self.assertEqual( "Incompatible arguments: minlen < 0, maxlen < minlen, or size < 0", cm.exception.args[0]) with self.assertRaises(ValueError) as cm: ak.random_strings_uniform(maxlen=5, minlen=1, size=-1) self.assertEqual( "Incompatible arguments: minlen < 0, maxlen < minlen, or size < 0", cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.random_strings_uniform(minlen='1', maxlen=5, size=10) self.assertEqual( 'type of argument "minlen" must be int; got str instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.random_strings_uniform(minlen=1, maxlen='5', size=10) self.assertEqual( 'type of argument "maxlen" must be int; got str instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.random_strings_uniform(minlen=1, maxlen=5, size='10') self.assertEqual( 'type of argument "size" must be int; got str instead', cm.exception.args[0])