def test_pdarrays_datatypes(self): self.assertEqual(dtypes.dtype('float64'), ak.ones(10).dtype) self.assertEqual( dtypes.dtype('str'), ak.array(['string {}'.format(i) for i in range(0, 10)]).dtype)
else: ak.connect() # with open(__file__, 'r') as f: # base_words = np.array(f.read().split()) # test_strings = np.random.choice(base_words, N, replace=True) # strings = ak.array(test_strings) print("===============main=============================") base_words1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') print("base_words1=") print(str(base_word1)) base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable') print("base_words2=") print(str(base_word2)) gremlins = ak.array(['"', ' ', '']) base_words = ak.concatenate((base_words1, base_words2)) print("base_words=") print(str(base_word)) np_base_words = np.hstack((base_words1.to_ndarray(), base_words2.to_ndarray())) assert(compare_strings(base_words.to_ndarray(), np_base_words)) choices = ak.randint(0, base_words.size, N) strings = base_words[choices] test_strings = strings.to_ndarray() cat = ak.Categorical(strings) print("strings =", strings) print("categorical =", cat) print("Generation and concatenate passed") # int index print("")
def testArrayCreation(self): pda = ak.array(np.ones(100)) self.assertIsInstance(pda, ak.pdarray) self.assertEqual(100, len(pda)) self.assertEqual(float, pda.dtype) pda = ak.array(list(range(0, 100))) self.assertIsInstance(pda, ak.pdarray) self.assertEqual(100, len(pda)) self.assertEqual(int, pda.dtype) pda = ak.array((range(5))) self.assertIsInstance(pda, ak.pdarray) self.assertEqual(5, len(pda)) self.assertEqual(int, pda.dtype) pda = ak.array(deque(range(5))) self.assertIsInstance(pda, ak.pdarray) self.assertEqual(5, len(pda)) self.assertEqual(int, pda.dtype) with self.assertRaises(RuntimeError) as cm: ak.array({range(0, 100)}) self.assertEqual("Only rank-1 pdarrays or ndarrays supported", cm.exception.args[0]) with self.assertRaises(RuntimeError) as cm: ak.array(np.array([[0, 1], [0, 1]])) self.assertEqual("Only rank-1 pdarrays or ndarrays supported", cm.exception.args[0]) with self.assertRaises(RuntimeError) as cm: ak.array('not an iterable') self.assertEqual("Only rank-1 pdarrays or ndarrays supported", cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.array(list(list(0))) self.assertEqual("'int' object is not iterable", cm.exception.args[0])
def test_mulitdimensional_array_creation(self): with self.assertRaises(RuntimeError) as cm: ak.array([[0, 0], [0, 1], [1, 1]]) self.assertEqual('Only rank-1 pdarrays or ndarrays supported', cm.exception.args[0])
def _getRandomizedCategorical(self) -> ak.Categorical: return ak.Categorical( ak.array([ 'string', 'string1', 'non-string', 'non-string2', 'string', 'non-string', 'string3', 'non-string2', 'string', 'non-string' ]))
def testGroup(self): group = self._getRandomizedCategorical().group() self.assertTrue((ak.array([2, 5, 9, 6, 1, 3, 7, 0, 4, 8]) == group).all())
def _getCategorical(self, prefix: str = 'string', size: int = 11) -> ak.Categorical: return ak.Categorical( ak.array(['{} {}'.format(prefix, i) for i in range(1, size)]))
def setUp(self): ArkoudaTest.setUp(self) self.a = ak.arange(10) self.edgeCases = ak.array([-(2**63), -1, 2**63 - 1])
def test_error_handling(self): d = make_arrays() akdf = {k:ak.array(v) for k, v in d.items()} gb = ak.GroupBy([akdf['keys'], akdf['keys2']]) with self.assertRaises(TypeError) as cm: ak.GroupBy(self.bvalues) self.assertEqual('GroupBy only supports pdarrays with a dtype int64', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.GroupBy(self.fvalues) self.assertEqual('GroupBy only supports pdarrays with a dtype int64', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: gb.broadcast([]) self.assertEqual('type of argument "values" must be arkouda.pdarrayclass.pdarray; got list instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: self.igb.nunique(ak.randint(0,1,10,dtype=bool)) self.assertEqual('the pdarray dtype must be int64', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: self.igb.nunique(ak.randint(0,1,10,dtype=float64)) self.assertEqual('the pdarray dtype must be int64', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: self.igb.any(ak.randint(0,1,10,dtype=float64)) self.assertEqual('any is only supported for pdarrays of dtype bool', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: self.igb.any(ak.randint(0,1,10,dtype=int64)) self.assertEqual('any is only supported for pdarrays of dtype bool', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: self.igb.all(ak.randint(0,1,10,dtype=float64)) self.assertEqual('all is only supported for pdarrays of dtype bool', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: self.igb.all(ak.randint(0,1,10,dtype=int64)) self.assertEqual('all is only supported for pdarrays of dtype bool', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: self.igb.min(ak.randint(0,1,10,dtype=bool)) self.assertEqual('min is only supported for pdarrays of dtype float64 and int64', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: self.igb.max(ak.randint(0,1,10,dtype=bool)) self.assertEqual('max is only supported for pdarrays of dtype float64 and int64', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: self.igb.argmin(ak.randint(0,1,10,dtype=bool)) self.assertEqual('argmin is only supported for pdarrays of dtype float64 and int64', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: self.igb.argmax(ak.randint(0,1,10,dtype=bool)) self.assertEqual('argmax is only supported for pdarrays of dtype float64 and int64', cm.exception.args[0])
def run_test(levels, verbose=False): ''' The run_test method enables execution of ak.GroupBy and ak.GroupBy.Reductions on a randomized set of arrays on the specified number of levels. Note: the current set of valid levels is {1,2} :return: ''' d = make_arrays() df = pd.DataFrame(d) akdf = {k: ak.array(v) for k, v in d.items()} if levels == 1: akg = ak.GroupBy(akdf['keys']) keyname = 'keys' elif levels == 2: akg = ak.GroupBy([akdf['keys'], akdf['keys2']]) keyname = ['keys', 'keys2'] tests = 0 failures = 0 not_impl = 0 if verbose: print(f"Doing .count()") tests += 1 pdkeys, pdvals = groupby_to_arrays(df, keyname, 'int64', 'count', levels) akkeys, akvals = akg.count() akvals = akvals.to_ndarray() failures += compare_keys(pdkeys, akkeys, levels, pdvals, akvals) for vname in ('int64', 'float64', 'bool'): for op in ak.GroupBy.Reductions: if verbose: print(f"\nDoing aggregate({vname}, {op})") tests += 1 do_check = True try: pdkeys, pdvals = groupby_to_arrays(df, keyname, vname, op, levels) except Exception as E: if verbose: print("Pandas does not implement") do_check = False try: akkeys, akvals = akg.aggregate(akdf[vname], op) akvals = akvals.to_ndarray() except RuntimeError as E: if verbose: print("Arkouda error: ", E) not_impl += 1 do_check = False continue if not do_check: continue if op.startswith('arg'): pdextrema = df[vname][pdvals] akextrema = akdf[vname][ak.array(akvals)].to_ndarray() if not np.allclose(pdextrema, akextrema): print( f"Different argmin/argmax: Arkouda failed to find an extremum" ) print("pd: ", pdextrema) print("ak: ", akextrema) failures += 1 else: failures += compare_keys(pdkeys, akkeys, levels, pdvals, akvals) print( f"{tests - failures - not_impl} / {tests - not_impl} passed, {failures} errors, {not_impl} not implemented" ) return failures
def testUnique(self): cat = self._getRandomizedCategorical() self.assertTrue((ak.Categorical(ak.array(['non-string', 'string3', 'string1', 'non-string2', 'string'])).to_ndarray() == cat.unique().to_ndarray()).all())