Exemplo n.º 1
0
    def testConcatenate(self):
        pdaOne = ak.arange(1, 4)
        pdaTwo = ak.arange(4, 7)

        self.assertTrue((ak.array([1, 2, 3, 4, 5,
                                   6]) == ak.concatenate([pdaOne,
                                                          pdaTwo])).all())
        self.assertTrue((ak.array([4, 5, 6, 1, 2,
                                   3]) == ak.concatenate([pdaTwo,
                                                          pdaOne])).all())

        pdaOne = ak.linspace(start=1, stop=3, length=3)
        pdaTwo = ak.linspace(start=4, stop=6, length=3)

        self.assertTrue((ak.array([1, 2, 3, 4, 5,
                                   6]) == ak.concatenate([pdaOne,
                                                          pdaTwo])).all())
        self.assertTrue((ak.array([4, 5, 6, 1, 2,
                                   3]) == ak.concatenate([pdaTwo,
                                                          pdaOne])).all())

        pdaOne = ak.array([True, False, True])
        pdaTwo = ak.array([False, True, True])

        self.assertTrue((ak.array([True, False, True, False, True,
                                   True]) == ak.concatenate([pdaOne,
                                                             pdaTwo])).all())
Exemplo n.º 2
0
    def test_str_repr(self):
        """
        Test 3 different types: int, float, bool with lengths under/over threshold
        Do this for both __str__() and __repr__()
        """
        ak.client.pdarrayIterThresh = 5
        # Test __str__()
        self.assertEqual("[1 2 3]", ak.array([1, 2, 3]).__str__())
        self.assertEqual("[1 2 3 ... 17 18 19]", ak.arange(1, 20).__str__())
        self.assertEqual("[1.100000e+00 2.300000e+00 5.000000e+00]",
                         ak.array([1.1, 2.3, 5]).__str__())
        self.assertEqual(
            "[0.000000e+00 5.263158e-01 1.052632e+00 ... 8.947368e+00 9.473684e+00 1.000000e+01]",
            ak.linspace(0, 10, 20).__str__())
        self.assertEqual("[False False False]",
                         ak.isnan(ak.array([1.1, 2.3, 5])).__str__())
        self.assertEqual("[False False False ... False False False]",
                         ak.isnan(ak.linspace(0, 10, 20)).__str__())

        # Test __repr__()
        self.assertEqual("array([1 2 3])", ak.array([1, 2, 3]).__repr__())
        self.assertEqual("array([1 2 3 ... 17 18 19])",
                         ak.arange(1, 20).__repr__())
        self.assertEqual("array([1.1000000000000001 2.2999999999999998 5])",
                         ak.array([1.1, 2.3, 5]).__repr__())
        self.assertEqual(
            "array([0 0.52631578947368418 1.0526315789473684 ... 8.9473684210526319 9.473684210526315 10])",
            ak.linspace(0, 10, 20).__repr__())
        self.assertEqual("array([False False False])",
                         ak.isnan(ak.array([1.1, 2.3, 5])).__repr__())
        self.assertEqual("array([False False False ... False False False])",
                         ak.isnan(ak.linspace(0, 10, 20)).__repr__())
        ak.client.pdarrayIterThresh = ak.client.pdarrayIterThreshDefVal  # Don't forget to set this back for other tests.
Exemplo n.º 3
0
    def testCast(self):
        N = 100
        arrays = {
            ak.int64: ak.randint(-(2**48), 2**48, N),
            ak.float64: ak.randint(0, 1, N, dtype=ak.float64),
            ak.bool: ak.randint(0, 2, N, dtype=ak.bool)
        }
        roundtripable = set(
            ((ak.bool, ak.bool), (ak.int64, ak.int64), (ak.int64, ak.float64),
             (ak.int64, npstr), (ak.float64, ak.float64), (ak.float64, npstr),
             (ak.uint8, ak.int64), (ak.uint8, ak.float64), (ak.uint8, npstr)))
        for t1, orig in arrays.items():
            for t2 in ak.DTypes:
                t2 = ak.dtype(t2)
                other = ak.cast(orig, t2)
                self.assertEqual(orig.size, other.size)
                if (t1, t2) in roundtripable:
                    roundtrip = ak.cast(other, t1)
                    self.assertTrue((orig == roundtrip).all(),
                                    f"{t1}: {orig[:5]}, {t2}: {roundtrip[:5]}")

        self.assertTrue((ak.array([1, 2, 3, 4,
                                   5]) == ak.cast(ak.linspace(1, 5, 5),
                                                  dt=ak.int64)).all())
        self.assertEqual(
            ak.cast(ak.arange(0, 5), dt=ak.float64).dtype, ak.float64)
        self.assertTrue((ak.array([False, True, True, True,
                                   True]) == ak.cast(ak.linspace(0, 4, 5),
                                                     dt=ak.bool)).all())
Exemplo n.º 4
0
    def test_linspace(self):
        pda = ak.linspace(0, 100, 1000)
        self.assertEqual(1000, len(pda))
        self.assertEqual(float, pda.dtype)
        self.assertIsInstance(pda, ak.pdarray)

        pda = ak.linspace(0.0, 100.0, 150)

        pda = ak.linspace(start=5, stop=0, length=6)
        self.assertEqual(5.0000, pda[0])
        self.assertEqual(0.0000, pda[5])

        pda = ak.linspace(start=5.0, stop=0.0, length=6)
        self.assertEqual(5.0000, pda[0])
        self.assertEqual(0.0000, pda[5])

        with self.assertRaises(TypeError) as cm:
            ak.linspace(0, '100', 1000)
        self.assertEqual((
            'type of argument "stop" must be one of (float, int); got str instead'
        ), cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.linspace('0', 100, 1000)
        self.assertEqual((
            'type of argument "start" must be one of (float, int); got str instead'
        ), cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.linspace(0, 100, '1000')
        self.assertEqual(
            'type of argument "length" must be int; got str instead',
            cm.exception.args[0])
Exemplo n.º 5
0
    def testLinspace(self):
        pda = ak.linspace(0, 100, 1000)  
        self.assertEqual(1000, len(pda))
        self.assertEqual(float, pda.dtype)
        self.assertIsInstance(pda, ak.pdarray)
        
        pda = ak.linspace(start=5, stop=0, length=6)
        self.assertEqual(5.0000, pda[0])
        self.assertEqual(0.0000, pda[5])
        
        with self.assertRaises(TypeError) as cm:        
            ak.linspace(0,'100', 1000)
        self.assertEqual(("The stop parameter must be an int or a" +
                         " scalar that can be parsed to an int, but is a 'str'"), 
                         cm.exception.args[0])  
        
        with self.assertRaises(TypeError) as cm:        
            ak.linspace('0',100, 1000)
        self.assertEqual(("The start parameter must be an int or a" +
                         " scalar that can be parsed to an int, but is a 'str'"), 
                         cm.exception.args[0])  

        with self.assertRaises(TypeError) as cm:          
            ak.linspace(0,100,'1000')           
        self.assertEqual("The length parameter must be an int64", 
                         cm.exception.args[0])            
Exemplo n.º 6
0
def compare_strategies(length, ncat, op, dtype):
    keys = ak.randint(0, ncat, length)
    if dtype == 'int64':
        vals = ak.randint(0, length//ncat, length)
    elif dtype == 'bool':
        vals = ak.zeros(length, dtype='bool')
        for i in np.random.randint(0, length, ncat//2):
            vals[i] = True
    else:
        vals = ak.linspace(-1, 1, length)        
    print("Global groupby", end=' ')                                        
    start = time()                                                
    gg = ak.GroupBy(keys, False)
    ggtime = time() - start
    print(ggtime)
    print("Global reduce", end=' ')
    start = time()
    gk, gv = gg.aggregate(vals, op)
    grtime = time() - start
    print(grtime)
    print("Local groupby", end=' ')
    start = time()
    lg = ak.GroupBy(keys, True)
    lgtime = time() - start
    print(lgtime)
    print("Local reduce", end=' ')
    start = time()
    lk, lv = lg.aggregate(vals, op)
    lrtime = time() - start
    print(lrtime)
    print(f"Keys match? {(gk == lk).all()}")
    print(f"Absolute diff of vals = {ak.abs(gv - lv).sum()}")
    return ggtime, grtime, lgtime, lrtime
Exemplo n.º 7
0
 def test_compare_linspace(self):
     # create np version
     a = np.linspace(10, 20, N)
     # create ak version
     b = ak.linspace(10, 20, N)
     # print(a,b)
     self.assertTrue(np.allclose(a, b.to_ndarray()))
Exemplo n.º 8
0
def check_linspace(N):
    # create np version
    a = np.linspace(10, 20, N)
    # create ak version
    b = ak.linspace(10, 20, N)
    # print(a,b)
    f = np.allclose(a, b.to_ndarray())
    return pass_fail(f)
Exemplo n.º 9
0
def generate_arrays(length, nkeys, nvals, dtype='int64'):
    keys = ak.randint(0, nkeys, length)
    if dtype == 'int64':
        vals = ak.randint(0, nvals, length)
    elif dtype == 'bool':
        vals = ak.zeros(length, dtype='bool')
        for i in np.random.randint(0, length, nkeys // 2):
            vals[i] = True
    else:
        vals = ak.linspace(-1, 1, length)
    return keys, vals
Exemplo n.º 10
0
    def testCumSum(self):
        pda = ak.linspace(1,10,10)
        result = ak.cumsum(pda) 

        self.assertIsInstance(result, ak.pdarray)
        self.assertEqual(10, len(result))
        self.assertEqual(float, result.dtype)
        
        with self.assertRaises(TypeError) as cm:
            ak.cumsum([range(0,10)])
        self.assertEqual('type of argument "pda" must be arkouda.pdarrayclass.pdarray; got list instead', 
                        cm.exception.args[0])  
Exemplo n.º 11
0
    def testHash(self):
        h1, h2 = ak.hash(ak.arange(10))
        rev = ak.arange(9, -1, -1)
        h3, h4 = ak.hash(rev)
        self.assertTrue((h1 == h3[rev]).all() and (h2 == h4[rev]).all())

        h1 = ak.hash(ak.arange(10), full=False)
        h3 = ak.hash(rev, full=False)
        self.assertTrue((h1 == h3[rev]).all())

        h = ak.hash(ak.linspace(0, 10, 10))
        self.assertTrue((h[0].dtype == ak.int64) and (h[1].dtype == ak.int64))
Exemplo n.º 12
0
    def testAbs(self):
        na = np.linspace(1,10,10)
        pda = ak.array(na)

        self.assertTrue((np.abs(na) == ak.abs(pda).to_ndarray()).all())
        self.assertTrue((ak.arange(5,1,-1) == ak.abs(ak.arange(-5,-1))).all())
        self.assertTrue((ak.array([5,4,3,2,1]) == ak.abs(ak.linspace(-5,-1,5))).all())
        
        with self.assertRaises(TypeError) as cm:
            ak.abs([range(0,10)])
        self.assertEqual('type of argument "pda" must be arkouda.pdarrayclass.pdarray; got list instead', 
                        cm.exception.args[0])  
Exemplo n.º 13
0
    def testValueCounts(self):
        pda = ak.ones(100, dtype=ak.int64)
        result = ak.value_counts(pda)
        self.assertEqual(ak.array([1]), result[0])
        self.assertEqual(ak.array([100]), result[1])

        pda = ak.linspace(1, 10, 10)
        with self.assertRaises(RuntimeError) as cm:
            ak.value_counts(pda)
        self.assertEqual('Error: unique: float64 not implemented',
                         cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.value_counts([0])
        self.assertEqual(
            'type of argument "pda" must be arkouda.pdarrayclass.pdarray; got list instead',
            cm.exception.args[0])
Exemplo n.º 14
0
    def test_error_handling(self):
        with self.assertRaises(TypeError) as cm:
            ak.where([0], ak.linspace(1, 10, 10), ak.linspace(1, 10, 10))
        self.assertEqual((
            'type of argument "condition" must be arkouda.pdarrayclass.pdarray;'
            + ' got list instead'), cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.where(ak.linspace(1, 10, 10), [0], ak.linspace(1, 10, 10))
        self.assertEqual((
            'type of argument "A" must be one of (int, float, int64, pdarray); got list instead'
        ), cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.where(ak.linspace(1, 10, 10), ak.linspace(1, 10, 10), [0])
        self.assertEqual(
            'both A and B must be an int, np.int64, float, np.float64, or pdarray',
            cm.exception.args[0])
Exemplo n.º 15
0
    def test_linspace(self):
        pda = ak.linspace(0, 100, 1000)
        self.assertEqual(1000, len(pda))
        self.assertEqual(float, pda.dtype)
        self.assertIsInstance(pda, ak.pdarray)

        pda = ak.linspace(0.0, 100.0, 150)

        pda = ak.linspace(start=5, stop=0, length=6)
        self.assertEqual(5.0000, pda[0])
        self.assertEqual(0.0000, pda[5])

        pda = ak.linspace(start=5.0, stop=0.0, length=6)
        self.assertEqual(5.0000, pda[0])
        self.assertEqual(0.0000, pda[5])

        pda = ak.linspace(start=np.float(5.0),
                          stop=np.float(0.0),
                          length=np.int64(6))
        self.assertEqual(5.0000, pda[0])
        self.assertEqual(0.0000, pda[5])

        with self.assertRaises(TypeError) as cm:
            ak.linspace(0, '100', 1000)
        self.assertEqual((
            'both start and stop must be an int, np.int64, float, or np.float64'
        ), cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.linspace('0', 100, 1000)
        self.assertEqual((
            'both start and stop must be an int, np.int64, float, or np.float64'
        ), cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.linspace(0, 100, '1000')
        self.assertEqual(
            'type of argument "length" must be one of (int, int64); got str instead',
            cm.exception.args[0])
Exemplo n.º 16
0
def run_tests(verbose):
    # ignore numpy warnings like divide by 0
    np.seterr(all='ignore')
    global pdarrays
    pdarrays = {
        'int64': ak.arange(0, SIZE, 1),
        'float64': ak.linspace(0, 2, SIZE),
        'bool': (ak.arange(0, SIZE, 1) % 2) == 0
    }
    global ndarrays
    ndarrays = {
        'int64': np.arange(0, SIZE, 1),
        'float64': np.linspace(0, 2, SIZE),
        'bool': (np.arange(0, SIZE, 1) % 2) == 0
    }
    global scalars
    #scalars = {k: v[SIZE//2] for k, v in ndarrays.items()}
    scalars = {'int64': 5, 'float64': 3.14159, 'bool': True}
    dtypes = pdarrays.keys()
    if verbose:
        print("Operators: ", ak.pdarray.BinOps)
        print("Dtypes: ", dtypes)
        print("pdarrays: ")
        for k, v in pdarrays.items():
            print(k, ": ", v)
        print("ndarrays: ")
        for k, v in ndarrays.items():
            print(k, ": ", v)
        print("scalars: ")
        for k, v in scalars.items():
            print(k, ": ", v)

    def do_op(lt, rt, ls, rs, isarkouda, oper):
        evalstr = ''
        if ls:
            evalstr += 'scalars["{}"]'.format(lt)
        else:
            evalstr += '{}["{}"]'.format(('ndarrays', 'pdarrays')[isarkouda],
                                         lt)
        evalstr += ' {} '.format(oper)
        if rs:
            evalstr += 'scalars["{}"]'.format(rt)
        else:
            evalstr += '{}["{}"]'.format(('ndarrays', 'pdarrays')[isarkouda],
                                         rt)
        #print(evalstr)
        res = eval(evalstr)
        return res

    results = {
        'neither_implement': [],  # (expression, ak_error)
        'arkouda_minus_numpy': [],  # (expression, ak_result, error_on_exec?)
        'numpy_minus_arkouda': [],  # (expression, ak_result, error_on_exec?)
        'both_implement': []
    }  # (expression, ak_result, error_on_exec?, dtype_mismatch?, value_mismatch?)
    tests = 0
    for ltype, rtype, op in product(dtypes, dtypes, ak.pdarray.BinOps):
        for lscalar, rscalar in ((False, False), (False, True), (True, False)):
            tests += 1
            expression = "{}({}) {} {}({})".format(ltype, ('array',
                                                           'scalar')[lscalar],
                                                   op, rtype,
                                                   ('array',
                                                    'scalar')[rscalar])
            try:
                npres = do_op(ltype, rtype, lscalar, rscalar, False, op)
            except TypeError:  # numpy doesn't implement operation
                try:
                    akres = do_op(ltype, rtype, lscalar, rscalar, True, op)
                except RuntimeError as e:
                    if 'not implemented' in str(
                            e):  # neither numpy nor arkouda implement
                        results['neither_implement'].append(
                            (expression, str(e)))
                    else:  # arkouda implements with error, np does not implement
                        results['arkouda_minus_numpy'].append(
                            (expression, str(e), True))
                    continue
                # arkouda implements but not numpy
                results['arkouda_minus_numpy'].append(
                    (expression, str(akres), False))
                continue
            try:
                akres = do_op(ltype, rtype, lscalar, rscalar, True, op)
            except RuntimeError as e:
                if 'not implemented' in str(
                        e):  # numpy implements but not arkouda
                    results['numpy_minus_arkouda'].append(
                        (expression, str(e), True))
                else:  # both implement, but arkouda errors
                    results['both_implement'].append(
                        (expression, str(e), True, False, False))
                continue
            # both numpy and arkouda execute without error
            try:
                akrestype = akres.dtype
            except Exception as e:
                warnings.warn(
                    "Cannot detect return dtype of ak result: {} (np result: {})"
                    .format(akres, npres))
                results['both_implement'].append(
                    (expression, str(akres), False, True, False))
                continue

            if akrestype != npres.dtype:
                restypes = "{}(np) vs. {}(ak)".format(npres.dtype, akrestype)
                #warnings.warn("dtype mismatch: {} = {}".format(expression, restypes))
                results['both_implement'].append(
                    (expression, restypes, False, True, False))
                continue
            try:
                akasnp = akres.to_ndarray()
            except Exception as e:
                warnings.warn("Could not convert to ndarray: {}".format(akres))
                results['both_implement'].append(
                    (expression, str(akres), True, False, False))
                continue
            if not np.allclose(akasnp, npres, equal_nan=True):
                res = "np: {}\nak: {}".format(npres, akasnp)
                # warnings.warn("result mismatch: {} =\n{}".format(expression, res))
                results['both_implement'].append(
                    (expression, res, False, False, True))
            # Finally, both numpy and arkouda agree on result
            results['both_implement'].append(
                (expression, "", False, False, False))

    print("# ops not implemented by numpy or arkouda: {}".format(
        len(results['neither_implement'])))
    if verbose:
        for expression, err in results['neither_implement']:
            print(expression)
    print("# ops implemented by numpy but not arkouda: {}".format(
        len(results['numpy_minus_arkouda'])))
    if verbose:
        for expression, err, flag in results['numpy_minus_arkouda']:
            print(expression)
    print("# ops implemented by arkouda but not numpy: {}".format(
        len(results['arkouda_minus_numpy'])))
    if verbose:
        for expression, res, flag in results['arkouda_minus_numpy']:
            print(expression, " -> ", res)
    nboth = len(results['both_implement'])
    print("# ops implemented by both: {}".format(nboth))
    matches = 0
    execerrors = []
    dtypeerrors = []
    valueerrors = []
    for (expression, res, ex, dt, val) in results['both_implement']:
        matches += not any((ex, dt, val))
        if ex: execerrors.append((expression, res))
        if dt: dtypeerrors.append((expression, res))
        if val: valueerrors.append((expression, res))
    print("  Matching results:         {} / {}".format(matches, nboth))
    print("  Arkouda execution errors: {} / {}".format(len(execerrors), nboth))
    if verbose: print('\n'.join(map(': '.join, execerrors)))
    print("  Dtype mismatches:         {} / {}".format(len(dtypeerrors),
                                                       nboth))
    if verbose: print('\n'.join(map(': '.join, dtypeerrors)))
    print("  Value mismatches:         {} / {}".format(len(valueerrors),
                                                       nboth))
    if verbose: print('\n'.join(map(': '.join, valueerrors)))
    return matches == nboth