Пример #1
0
def time_ak_coargsort(N_per_locale, trials, dtype, seed):
    print(">>> arkouda {} coargsort".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    for numArrays in (1, 2, 8, 16):
        if seed is None:
            seeds = [None for _ in range(numArrays)]
        else:
            seeds = [seed+i for i in range(numArrays)]
        if dtype == 'int64':
            arrs = [ak.randint(0, 2**32, N//numArrays, seed=s) for s in seeds]
            nbytes = sum(a.size * a.itemsize for a in arrs)
        elif dtype == 'float64':
            arrs = [ak.randint(0, 1, N//numArrays, dtype=ak.float64, seed=s) for s in seeds]
            nbytes = sum(a.size * a.itemsize for a in arrs)
        elif dtype == 'str':
            arrs = [ak.random_strings_uniform(1, 8, N//numArrays, seed=s) for s in seeds]
            nbytes = sum(a.bytes.size * a.bytes.itemsize for a in arrs)

        timings = []
        for i in range(trials):
            start = time.time()
            perm = ak.coargsort(arrs)
            end = time.time()
            timings.append(end - start)
        tavg = sum(timings) / trials

        a = arrs[0][perm]
        if dtype in ('int64', 'float64'):
            assert ak.is_sorted(a)
        print("{}-array Average time = {:.4f} sec".format(numArrays, tavg))
        bytes_per_sec = nbytes / tavg
        print("{}-array Average rate = {:.4f} GiB/sec".format(numArrays, bytes_per_sec/2**30))
Пример #2
0
def time_ak_scatter(isize, vsize, trials, dtype, random):
    print(">>> arkouda scatter")
    cfg = ak.get_config()
    Ni = isize * cfg["numLocales"]
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {}, num_indices = {:,} ; num_values = {:,}".format(
        cfg["numLocales"], Ni, Nv))
    # Index vector is always random
    i = ak.randint(0, Nv, Ni)
    c = ak.zeros(Nv, dtype=dtype)
    if random:
        if dtype == 'int64':
            v = ak.randint(0, 2**32, Ni)
        elif dtype == 'float64':
            v = ak.randint(0, 1, Ni, dtype=ak.float64)
    else:
        v = ak.ones(Ni, dtype=dtype)

    timings = []
    for _ in range(trials):
        start = time.time()
        c[i] = v
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    bytes_per_sec = (i.size * i.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))
Пример #3
0
def time_ak_argsort(N_per_locale, trials, dtype, scale_by_locales):
    print(">>> arkouda argsort")
    cfg = ak.get_config()
    if scale_by_locales:
        N = N_per_locale * cfg["numLocales"]
    else:
        N = N_per_locale
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64)
     
    timings = []
    for i in range(trials):
        start = time.time()
        perm = ak.argsort(a)
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    assert ak.is_sorted(a[perm])
    print("Average time = {:.4f} sec".format(tavg))
    bytes_per_sec = (a.size * a.itemsize) / tavg
    print("Average rate = {:.4f} GiB/sec".format(bytes_per_sec/2**30))
Пример #4
0
def time_ak_reduce(N_per_locale, trials, dtype, random):
    print(">>> arkouda reduce")
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if random:
        if dtype == 'int64':
            a = ak.randint(0, 2**32, N)
        elif dtype == 'float64':
            a = ak.randint(0, 1, N, dtype=ak.float64)
    else:
        a = ak.arange(0, N, 1)
        if dtype == 'float64':
            a = 1.0 * a

    timings = {op: [] for op in OPS}
    results = {}
    for i in range(trials):
        for op in timings.keys():
            fxn = getattr(a, op)
            start = time.time()
            r = fxn()
            end = time.time()
            timings[op].append(end - start)
            results[op] = r
    tavg = {op: sum(t) / trials for op, t in timings.items()}

    for op, t in tavg.items():
        print("{} = {}".format(op, results[op]))
        print("  Average time = {:.4f} sec".format(t))
        bytes_per_sec = (a.size * a.itemsize) / t
        print("  Average rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))
Пример #5
0
def time_ak_write_read(N_per_locale, trials, dtype, path, seed):
    print(">>> arkouda {} write/read".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)

    writetimes = []
    readtimes = []
    for i in range(trials):
        start = time.time()
        a.save(path)
        end = time.time()
        writetimes.append(end - start)
        start = time.time()
        b = ak.load(path)
        end = time.time()
        readtimes.append(end - start)
        for f in glob(path + '_LOCALE*'):
            os.remove(f)
    avgwrite = sum(writetimes) / trials
    avgread = sum(readtimes) / trials

    print("write Average time = {:.4f} sec".format(avgwrite))
    print("read Average time = {:.4f} sec".format(avgread))

    nb = a.size * a.itemsize
    print("write Average rate = {:.2f} GiB/sec".format(nb / 2**30 / avgwrite))
    print("read Average rate = {:.2f} GiB/sec".format(nb / 2**30 / avgread))
Пример #6
0
def time_ak_coargsort(N_per_locale, trials, dtype):
    print(">>> arkouda coargsort")
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    for numArrays in (1, 2, 8, 16):
        if dtype == 'int64':
            arrs = [
                ak.randint(0, 2**32, N // numArrays) for _ in range(numArrays)
            ]
        elif dtype == 'float64':
            arrs = [
                ak.randint(0, 1, N // numArrays, dtype=ak.float64)
                for _ in range(numArrays)
            ]

        timings = []
        for i in range(trials):
            start = time.time()
            perm = ak.coargsort(arrs)
            end = time.time()
            timings.append(end - start)
        tavg = sum(timings) / trials

        a = arrs[0][perm]
        assert ak.is_sorted(a)
        print("{}-array Average time = {:.4f} sec".format(numArrays, tavg))
        bytes_per_sec = sum(a.size * a.itemsize for a in arrs) / tavg
        print("{}-array Average rate = {:.4f} GiB/sec".format(
            numArrays, bytes_per_sec / 2**30))
Пример #7
0
def time_ak_argsort(N_per_locale, trials, dtype, seed):
    print(">>> arkouda {} argsort".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
        nbytes = a.size * a.itemsize
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)
        nbytes = a.size * a.itemsize
    elif dtype == 'str':
        a = ak.random_strings_uniform(1, 16, N, seed=seed)
        nbytes = a.nbytes * a.entry.itemsize

    timings = []
    for i in range(trials):
        start = time.time()
        perm = ak.argsort(a)
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    if dtype in ('int64', 'float64'):
        assert ak.is_sorted(a[perm])
    print("Average time = {:.4f} sec".format(tavg))
    bytes_per_sec = nbytes / tavg
    print("Average rate = {:.4f} GiB/sec".format(bytes_per_sec / 2**30))
Пример #8
0
def time_ak_stream(N_per_locale, trials, alpha, dtype, random):
    print(">>> arkouda stream")
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if random:
        if dtype == 'int64':
            a = ak.randint(0, 2**32, N)
            b = ak.randint(0, 2**32, N)
        elif dtype == 'float64':
            a = ak.randint(0, 1, N, dtype=ak.float64)
            b = ak.randint(0, 1, N, dtype=ak.float64)
    else:
        a = ak.ones(N, dtype=dtype)
        b = ak.ones(N, dtype=dtype)

    timings = []
    for i in range(trials):
        start = time.time()
        c = a + b * alpha
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    bytes_per_sec = (c.size * c.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))
Пример #9
0
def time_ak_write(N_per_locale, numfiles, trials, dtype, path, seed, parquet):
    print(">>> arkouda {} write".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}, filesPerLoc = {}".format(
        cfg["numLocales"], N, numfiles))
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)

    writetimes = []
    for i in range(trials):
        for j in range(numfiles):
            start = time.time()
            a.save(f"{path}{j:04}") if not parquet else a.save_parquet(
                f"{path}{j:04}")
            end = time.time()
            writetimes.append(end - start)
    avgwrite = sum(writetimes) / trials

    print("write Average time = {:.4f} sec".format(avgwrite))

    nb = a.size * a.itemsize * numfiles
    print("write Average rate = {:.2f} GiB/sec".format(nb / 2**30 / avgwrite))
Пример #10
0
def compare_strategies(length, ncat, op, dtype):
    keys = ak.randint(0, ncat, length)
    if dtype == 'int64':
        vals = ak.randint(0, length // ncat, length)
    elif dtype == 'bool':
        vals = ak.zeros(length, dtype='bool')
        for i in np.random.randint(0, length, ncat // 2):
            vals[i] = True
    else:
        vals = ak.linspace(-1, 1, length)
    print("Global groupby", end=' ')
    start = time()
    gg = ak.GroupBy(keys, False)
    ggtime = time() - start
    print(ggtime)
    print("Global reduce", end=' ')
    start = time()
    gk, gv = gg.aggregate(vals, op)
    grtime = time() - start
    print(grtime)
    print("Local groupby", end=' ')
    start = time()
    lg = ak.GroupBy(keys, True)
    lgtime = time() - start
    print(lgtime)
    print("Local reduce", end=' ')
    start = time()
    lk, lv = lg.aggregate(vals, op)
    lrtime = time() - start
    print(lrtime)
    print(f"Keys match? {(gk == lk).all()}")
    print(f"Absolute diff of vals = {ak.abs(gv - lv).sum()}")
    return ggtime, grtime, lgtime, lrtime
Пример #11
0
def time_ak_setops(N_per_locale, trials, dtype, seed):
    print(">>> arkouda {} setops".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
        b = ak.randint(0, 2**32, N, seed=seed)

    timings = {op: [] for op in OPS}
    results = {}
    for i in range(trials):
        for op in timings.keys():
            fxn = getattr(ak, op)
            start = time.time()
            r = fxn(a, b)
            end = time.time()
            timings[op].append(end - start)
            results[op] = r
    tavg = {op: sum(t) / trials for op, t in timings.items()}

    for op, t in tavg.items():
        print("  {} Average time = {:.4f} sec".format(op, t))
        bytes_per_sec = (a.size * a.itemsize * 2) / t
        print("  {} Average rate = {:.2f} GiB/sec".format(
            op, bytes_per_sec / 2**30))
Пример #12
0
def generate_arrays(N, seed):
    # Sort keys so that aggregations will not have to permute values
    # We just want to measure aggregation time, not gather
    keys = ak.sort(ak.randint(0, 2**32, N, seed=seed))
    if seed is not None: seed += 1
    intvals = ak.randint(0, 2**16, N, seed=seed)
    boolvals = (intvals % 2) == 0
    return keys, intvals, boolvals
Пример #13
0
def check_correctness(dtype):
    N = 10**4
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64)

    perm = ak.argsort(a)
    assert ak.is_sorted(a[perm])
Пример #14
0
def check_int_float(N):
    f = ak.randint(0, 2**63, N, dtype=ak.float64)
    i = ak.randint(0, 2**63, N, dtype=ak.int64)

    perm = ak.coargsort([f, i])
    assert ak.is_sorted(f[perm])

    perm = ak.coargsort([i, f])
    assert ak.is_sorted(i[perm])
Пример #15
0
def generate_arrays(length, nkeys, nvals, dtype='int64'):
    keys = ak.randint(0, nkeys, length)
    if dtype == 'int64':
        vals = ak.randint(0, nvals, length)
    elif dtype == 'bool':
        vals = ak.zeros(length, dtype='bool')
        for i in np.random.randint(0, length, nkeys // 2):
            vals[i] = True
    else:
        vals = ak.linspace(-1, 1, length)
    return keys, vals
Пример #16
0
def check_correctness(dtype, seed):
    N = 10**4
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)
    elif dtype == 'str':
        a = ak.random_strings_uniform(1, 16, N, seed=seed)

    perm = ak.argsort(a)
    if dtype in ('int64', 'float64'):
        assert ak.is_sorted(a[perm])
Пример #17
0
def check_correctness(dtype, path, seed):
    N = 10**4
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)

    a.save(path)
    b = ak.load(path)
    for f in glob(path + "_LOCALE*"):
        os.remove(f)
    assert (a == b).all()
Пример #18
0
def check_correctness(dtype):
    N = 10**4
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N)
        z = ak.zeros(N, dtype=dtype)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64)
        z = ak.zeros(N, dtype=dtype)

    perm = ak.coargsort([a, z])
    assert ak.is_sorted(a[perm])
    perm = ak.coargsort([z, a])
    assert ak.is_sorted(a[perm])
Пример #19
0
def time_ak_gather(isize, vsize, trials, dtype, random):
    print(">>> arkouda gather")
    cfg = ak.get_config()
    Ni = isize * cfg["numLocales"]
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {}, num_indices = {:,} ; num_values = {:,}".format(cfg["numLocales"], Ni, Nv))
    # Index vector is always random
    i = ak.randint(0, Nv, Ni)
    if random:
        if dtype == 'int64':
            v = ak.randint(0, 2**32, Nv)
        elif dtype == 'float64':
            v = ak.randint(0, 1, Nv, dtype=ak.float64)
        elif dtype == 'str':
            v = ak.random_strings_uniform(1, 16, Nv)
    else:   
        if dtype == 'str':
            v = ak.random_strings_uniform(1, 16, Nv)
        else:
            v = ak.ones(Nv, dtype=dtype)
    print("v={}".format(v))    
    print("v.offsets={}".format(v.offsets))    
    print("v.nbytes={}".format(v.nbytes))    
    print("v[1]={}".format(v[1]))    
    print("In Gather size={}".format(v.size))    
    print("In Gather nbytes={}".format(v.nbytes))    
    print("In Gather ndim={}".format(v.ndim))    
    print("In Gather shape={}".format(v.shape))    
    print("In Gather offsets name ={}".format(v.offsets.name))
    print("In Gather offsets size={}".format(v.offsets.size))
    print("In Gather bytes name ={}".format(v.bytes.name))
    print("In Gather bytes size={}".format(v.bytes.size))
    timings = []
    for _ in range(trials):
        print("In Gather loop i={}".format(i))
        print("In Gather v[i]={}".format(v[i]))
        start = time.time()
        c = v[i]
        end = time.time()
        print("In Gather loop c={}".format(c))
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    if dtype == 'str':
        offsets_transferred = 3 * c.offsets.size * c.offsets.itemsize
        bytes_transferred = (c.offsets.size * c.offsets.itemsize) + (2 * c.bytes.size)
        bytes_per_sec = (offsets_transferred + bytes_transferred) / tavg
    else:
        bytes_per_sec = (c.size * c.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec/2**30))
Пример #20
0
def time_ak_array_transfer(N, trials, dtype, random, seed):
    print(">>> arkouda {} array creation".format(dtype))
    cfg = ak.get_config()
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))

    a = ak.randint(0, 2**32, N, dtype=dtype, seed=seed)
    nb = a.size * a.itemsize
    ak.client.maxTransferBytes = nb

    to_ndarray_times = []
    to_pdarray_times = []
    for i in range(trials):
        start = time.time()
        npa = a.to_ndarray()
        end = time.time()
        to_ndarray_times.append(end - start)
        start = time.time()
        aka = ak.array(npa)
        end = time.time()
        to_pdarray_times.append(end - start)
        gc.collect()
    avgnd = sum(to_ndarray_times) / trials
    avgpd = sum(to_pdarray_times) / trials

    print("to_ndarray Average time = {:.4f} sec".format(avgnd))
    print("ak.array Average time = {:.4f} sec".format(avgpd))

    print("to_ndarray Average rate = {:.4f} GiB/sec".format(nb / 2**30 /
                                                            avgnd))
    print("ak.array Average rate = {:.4f} GiB/sec".format(nb / 2**30 / avgpd))
Пример #21
0
def check_correctness(dtype, seed):
    arrays, totalbytes = generate_arrays(1000, 2, dtype, seed)
    g = ak.GroupBy(arrays)
    perm = ak.argsort(ak.randint(0, 2**32, arrays[0].size))
    g2 = ak.GroupBy([a[perm] for a in arrays])
    assert all((uk == uk2).all() for uk, uk2 in zip(g.unique_keys, g2.unique_keys))
    assert (g.segments == g2.segments).all()
Пример #22
0
def time_ak_scan(N_per_locale, trials, dtype, random, seed):
    print(">>> arkouda {} scan".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if random or args.seed is not None:
        if dtype == 'int64':
            a = ak.randint(1, N, N, seed=seed)
        elif dtype == 'float64':
            a = ak.uniform(N, seed=seed) + 0.5
    else:
        a = ak.arange(1, N, 1)
        if dtype == 'float64':
            a = 1.0 * a
     
    timings = {op: [] for op in OPS}
    final_values = {}
    for i in range(trials):
        for op in timings.keys():
            fxn = getattr(ak, op)
            start = time.time()
            r = fxn(a)
            end = time.time()
            timings[op].append(end - start)
            final_values[op] = r[r.size-1]
    tavg = {op: sum(t) / trials for op, t in timings.items()}

    for op, t in tavg.items():
        print("{}, final value = {}".format(op, final_values[op]))
        print("  {} Average time = {:.4f} sec".format(op, t))
        bytes_per_sec = (a.size * a.itemsize * 2) / t
        print("  {} Average rate = {:.2f} GiB/sec".format(op, bytes_per_sec/2**30))
Пример #23
0
def create_ak_array(N, op, dtype, seed):
    if op == 'zeros': 
        a = ak.zeros(N, dtype=dtype)
    elif op == 'ones':
        a = ak.ones(N, dtype=dtype)
    elif op == 'randint':
        a = ak.randint(0, 2**32, N, dtype=dtype, seed=seed)
    return a
Пример #24
0
def check_correctness(dtype, path, seed):
    N = 10**4
    a = ak.randint(0, 2**32, N, seed=seed)

    a.save_parquet(path)
    b = ak.read_parquet(path+'*')
    for f in glob(path + '_LOCALE*'):
        os.remove(f)
    assert (a == b).all()
Пример #25
0
def check_correctness(dtype, seed):
    N = 10**4
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
        z = ak.zeros(N, dtype=dtype)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)
        z = ak.zeros(N, dtype=dtype)
    elif dtype == 'str':
        a = ak.random_strings_uniform(1, 16, N, seed=seed)
        z = ak.cast(ak.zeros(N), 'str')

    perm = ak.coargsort([a, z])
    if dtype in ('int64', 'float64'):
        assert ak.is_sorted(a[perm])
    perm = ak.coargsort([z, a])
    if dtype in ('int64', 'float64'):
        assert ak.is_sorted(a[perm])
Пример #26
0
def check_float(N):
    a = ak.randint(0, 1, N, dtype=ak.float64)
    n = ak.randint(-1, 1, N, dtype=ak.float64)
    z = ak.zeros(N, dtype=ak.float64)

    perm = ak.coargsort([a])
    assert ak.is_sorted(a[perm])

    perm = ak.coargsort([a, n])
    assert ak.is_sorted(a[perm])

    perm = ak.coargsort([n, a])
    assert ak.is_sorted(n[perm])

    perm = ak.coargsort([z, a])
    assert ak.is_sorted(a[perm])

    perm = ak.coargsort([z, n])
    assert ak.is_sorted(n[perm])
Пример #27
0
def gen_rmat_edges(lgNv, Ne_per_v, p, perm=False):
    # number of vertices
    Nv = 2**lgNv
    # number of edges
    Ne = Ne_per_v * Nv
    # probabilities
    a = p
    b = (1.0 - a) / 3.0
    c = b
    d = b
    # init edge arrays
    ii = ak.ones(Ne, dtype=ak.int64)
    jj = ak.ones(Ne, dtype=ak.int64)
    # quantites to use in edge generation loop
    ab = a + b
    c_norm = c / (c + d)
    a_norm = a / (a + b)
    # generate edges
    for ib in range(1, lgNv):
        ii_bit = (ak.randint(0, 1, Ne, dtype=ak.float64) > ab)
        jj_bit = (ak.randint(0, 1, Ne, dtype=ak.float64) >
                  (c_norm * ii_bit + a_norm * (~ii_bit)))
        ii = ii + ((2**(ib - 1)) * ii_bit)
        jj = jj + ((2**(ib - 1)) * jj_bit)
    # sort all based on ii and jj using coargsort
    # all edges should be sorted based on both vertices of the edge
    iv = ak.coargsort((ii, jj))
    # permute into sorted order
    ii = ii[iv]  # permute first vertex into sorted order
    jj = jj[iv]  # permute second vertex into sorted order
    # to premute/rename vertices
    if perm:
        # generate permutation for new vertex numbers(names)
        ir = ak.argsort(ak.randint(0, 1, Nv, dtype=ak.float64))
        # renumber(rename) vertices
        ii = ir[ii]  # rename first vertex
        jj = ir[jj]  # rename second vertex
    #
    # maybe: remove edges which are self-loops???
    #
    # return pair of pdarrays
    return (ii, jj)
Пример #28
0
def refinement(N):
    '''
    Coargsort of two arrays, where the first is already sorted
    but has many repeated values
    '''
    groupsize = 100
    a = ak.arange(N // 2) // groupsize
    factor = 2**32 // a.max()
    a *= factor
    b = ak.randint(0, 2**32, N // 2)
    yield 'refinement int64', (a, b)
Пример #29
0
def IP_like(N):
    '''
    Data like a 90/10 mix of IPv4 and IPv6 addresses
    '''
    multiplicity = 10
    nunique = N // (2 * multiplicity)
    # First generate unique addresses, then sample with replacement
    u1 = ak.zeros(nunique, dtype=ak.int64)
    u2 = ak.zeros(nunique, dtype=ak.int64)
    v4 = ak.uniform(nunique) < 0.9
    n4 = v4.sum()
    v6 = ~v4
    n6 = v4.size - n4
    u1[v4] = ak.randint(0, 2**32, n4)
    u1[v6] = ak.randint(-2**63, 2**63, n6)
    u2[v6] = ak.randint(-2**63, 2**63, n6)
    sample = ak.randint(0, nunique, N // 2)
    IP1 = u1[sample]
    IP2 = u2[sample]
    yield 'IP-like 2*int64', (IP1, IP2)
Пример #30
0
def time_ak_gather(isize, vsize, trials, dtype, random, seed):
    print(">>> arkouda {} gather".format(dtype))
    cfg = ak.get_config()
    Ni = isize * cfg["numLocales"]
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {}, num_indices = {:,} ; num_values = {:,}".format(cfg["numLocales"], Ni, Nv))
    # Index vector is always random
    i = ak.randint(0, Nv, Ni, seed=seed)
    if seed is not None:
        seed += 1
    if random or seed is not None:
        if dtype == 'int64':
            v = ak.randint(0, 2**32, Nv, seed=seed)
        elif dtype == 'float64':
            v = ak.randint(0, 1, Nv, dtype=ak.float64, seed=seed)
        elif dtype == 'bool':
            v = ak.randint(0, 1, Nv, dtype=ak.bool, seed=seed)
        elif dtype == 'str':
            v = ak.random_strings_uniform(1, 16, Nv, seed=seed)
    else:   
        if dtype == 'str':
            v = ak.cast(ak.arange(Nv), 'str')
        else:
            v = ak.ones(Nv, dtype=dtype)
    
    timings = []
    for _ in range(trials):
        start = time.time()
        c = v[i]
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    if dtype == 'str':
        offsets_transferred = 3 * c.offsets.size * c.offsets.itemsize
        bytes_transferred = (c.offsets.size * c.offsets.itemsize) + (2 * c.bytes.size)
        bytes_per_sec = (offsets_transferred + bytes_transferred) / tavg
    else:
        bytes_per_sec = (c.size * c.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec/2**30))