Пример #1
0
def time_ak_write_read(N_per_locale, trials, dtype, path, seed):
    print(">>> arkouda {} write/read".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    a = ak.randint(0, 2**32, N)
     
    writetimes = []
    readtimes = []
    for i in range(trials):
        start = time.time()
        a.save_parquet(path)
        end = time.time()
        writetimes.append(end - start)
        start = time.time()
        b = ak.read_parquet(path+'*')
        end = time.time()
        readtimes.append(end - start)
        for f in glob(path + '_LOCALE*'):
            os.remove(f)
    avgwrite = sum(writetimes) / trials
    avgread = sum(readtimes) / trials

    print("write Average time = {:.4f} sec".format(avgwrite))
    print("read Average time = {:.4f} sec".format(avgread))

    nb = a.size * a.itemsize
    print("write Average rate = {:.2f} GiB/sec".format(nb/2**30/avgwrite))
    print("read Average rate = {:.2f} GiB/sec".format(nb/2**30/avgread))
Пример #2
0
def time_ak_scatter(isize, vsize, trials, dtype, random):
    print(">>> arkouda scatter")
    cfg = ak.get_config()
    Ni = isize * cfg["numLocales"]
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {}, num_indices = {:,} ; num_values = {:,}".format(
        cfg["numLocales"], Ni, Nv))
    # Index vector is always random
    i = ak.randint(0, Nv, Ni)
    c = ak.zeros(Nv, dtype=dtype)
    if random:
        if dtype == 'int64':
            v = ak.randint(0, 2**32, Ni)
        elif dtype == 'float64':
            v = ak.randint(0, 1, Ni, dtype=ak.float64)
    else:
        v = ak.ones(Ni, dtype=dtype)

    timings = []
    for _ in range(trials):
        start = time.time()
        c[i] = v
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    bytes_per_sec = (i.size * i.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))
Пример #3
0
def time_ak_reduce(N_per_locale, trials, dtype, random):
    print(">>> arkouda reduce")
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if random:
        if dtype == 'int64':
            a = ak.randint(0, 2**32, N)
        elif dtype == 'float64':
            a = ak.randint(0, 1, N, dtype=ak.float64)
    else:
        a = ak.arange(0, N, 1)
        if dtype == 'float64':
            a = 1.0 * a

    timings = {op: [] for op in OPS}
    results = {}
    for i in range(trials):
        for op in timings.keys():
            fxn = getattr(a, op)
            start = time.time()
            r = fxn()
            end = time.time()
            timings[op].append(end - start)
            results[op] = r
    tavg = {op: sum(t) / trials for op, t in timings.items()}

    for op, t in tavg.items():
        print("{} = {}".format(op, results[op]))
        print("  Average time = {:.4f} sec".format(t))
        bytes_per_sec = (a.size * a.itemsize) / t
        print("  Average rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))
Пример #4
0
def time_ak_argsort(N_per_locale, trials, dtype, scale_by_locales):
    print(">>> arkouda argsort")
    cfg = ak.get_config()
    if scale_by_locales:
        N = N_per_locale * cfg["numLocales"]
    else:
        N = N_per_locale
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64)
     
    timings = []
    for i in range(trials):
        start = time.time()
        perm = ak.argsort(a)
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    assert ak.is_sorted(a[perm])
    print("Average time = {:.4f} sec".format(tavg))
    bytes_per_sec = (a.size * a.itemsize) / tavg
    print("Average rate = {:.4f} GiB/sec".format(bytes_per_sec/2**30))
Пример #5
0
def time_ak_coargsort(N_per_locale, trials, dtype):
    print(">>> arkouda coargsort")
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    for numArrays in (1, 2, 8, 16):
        if dtype == 'int64':
            arrs = [
                ak.randint(0, 2**32, N // numArrays) for _ in range(numArrays)
            ]
        elif dtype == 'float64':
            arrs = [
                ak.randint(0, 1, N // numArrays, dtype=ak.float64)
                for _ in range(numArrays)
            ]

        timings = []
        for i in range(trials):
            start = time.time()
            perm = ak.coargsort(arrs)
            end = time.time()
            timings.append(end - start)
        tavg = sum(timings) / trials

        a = arrs[0][perm]
        assert ak.is_sorted(a)
        print("{}-array Average time = {:.4f} sec".format(numArrays, tavg))
        bytes_per_sec = sum(a.size * a.itemsize for a in arrs) / tavg
        print("{}-array Average rate = {:.4f} GiB/sec".format(
            numArrays, bytes_per_sec / 2**30))
Пример #6
0
def time_ak_in1d(size, trials):
    print(">>> arkouda int64 in1d")
    cfg = ak.get_config()
    N = size * cfg["numLocales"]
    a = ak.arange(N) % LARGE

    for regime, bsize in zip(('Medium', 'Large'), (MEDIUM, LARGE)):
        print(
            "{} regime: numLocales = {}  a.size = {:,}  b.size = {:,}".format(
                regime, cfg["numLocales"], N, bsize))
        b = ak.arange(bsize)
        expected_misses = (LARGE - bsize) * (a.size // LARGE) + max(
            (0, (a.size % LARGE) - bsize))
        timings = []
        for _ in range(trials):
            start = time.time()
            c = ak.in1d(a, b)
            end = time.time()
            timings.append(end - start)
            assert (c.size - c.sum()) == expected_misses, "Incorrect result"
        tavg = sum(timings) / trials
        print("{} average time = {:.4f} sec".format(regime, tavg))
        bytes_per_sec = (a.size * a.itemsize + b.size * b.itemsize) / tavg
        print("{} average rate = {:.2f} GiB/sec".format(
            regime, bytes_per_sec / 2**30))
Пример #7
0
def time_ak_array_transfer(N, trials, dtype, random, seed):
    print(">>> arkouda {} array creation".format(dtype))
    cfg = ak.get_config()
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))

    a = ak.randint(0, 2**32, N, dtype=dtype, seed=seed)
    nb = a.size * a.itemsize
    ak.client.maxTransferBytes = nb

    to_ndarray_times = []
    to_pdarray_times = []
    for i in range(trials):
        start = time.time()
        npa = a.to_ndarray()
        end = time.time()
        to_ndarray_times.append(end - start)
        start = time.time()
        aka = ak.array(npa)
        end = time.time()
        to_pdarray_times.append(end - start)
        gc.collect()
    avgnd = sum(to_ndarray_times) / trials
    avgpd = sum(to_pdarray_times) / trials

    print("to_ndarray Average time = {:.4f} sec".format(avgnd))
    print("ak.array Average time = {:.4f} sec".format(avgpd))

    print("to_ndarray Average rate = {:.4f} GiB/sec".format(nb / 2**30 /
                                                            avgnd))
    print("ak.array Average rate = {:.4f} GiB/sec".format(nb / 2**30 / avgpd))
Пример #8
0
def time_ak_setops(N_per_locale, trials, dtype, seed):
    print(">>> arkouda {} setops".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
        b = ak.randint(0, 2**32, N, seed=seed)

    timings = {op: [] for op in OPS}
    results = {}
    for i in range(trials):
        for op in timings.keys():
            fxn = getattr(ak, op)
            start = time.time()
            r = fxn(a, b)
            end = time.time()
            timings[op].append(end - start)
            results[op] = r
    tavg = {op: sum(t) / trials for op, t in timings.items()}

    for op, t in tavg.items():
        print("  {} Average time = {:.4f} sec".format(op, t))
        bytes_per_sec = (a.size * a.itemsize * 2) / t
        print("  {} Average rate = {:.2f} GiB/sec".format(
            op, bytes_per_sec / 2**30))
Пример #9
0
def time_ak_write(N_per_locale, numfiles, trials, dtype, path, seed, parquet):
    print(">>> arkouda {} write".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}, filesPerLoc = {}".format(
        cfg["numLocales"], N, numfiles))
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)

    writetimes = []
    for i in range(trials):
        for j in range(numfiles):
            start = time.time()
            a.save(f"{path}{j:04}") if not parquet else a.save_parquet(
                f"{path}{j:04}")
            end = time.time()
            writetimes.append(end - start)
    avgwrite = sum(writetimes) / trials

    print("write Average time = {:.4f} sec".format(avgwrite))

    nb = a.size * a.itemsize * numfiles
    print("write Average rate = {:.2f} GiB/sec".format(nb / 2**30 / avgwrite))
Пример #10
0
def time_ak_scan(N_per_locale, trials, dtype, random, seed):
    print(">>> arkouda {} scan".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if random or args.seed is not None:
        if dtype == 'int64':
            a = ak.randint(1, N, N, seed=seed)
        elif dtype == 'float64':
            a = ak.uniform(N, seed=seed) + 0.5
    else:
        a = ak.arange(1, N, 1)
        if dtype == 'float64':
            a = 1.0 * a
     
    timings = {op: [] for op in OPS}
    final_values = {}
    for i in range(trials):
        for op in timings.keys():
            fxn = getattr(ak, op)
            start = time.time()
            r = fxn(a)
            end = time.time()
            timings[op].append(end - start)
            final_values[op] = r[r.size-1]
    tavg = {op: sum(t) / trials for op, t in timings.items()}

    for op, t in tavg.items():
        print("{}, final value = {}".format(op, final_values[op]))
        print("  {} Average time = {:.4f} sec".format(op, t))
        bytes_per_sec = (a.size * a.itemsize * 2) / t
        print("  {} Average rate = {:.2f} GiB/sec".format(op, bytes_per_sec/2**30))
Пример #11
0
def time_ak_stream(N_per_locale, trials, alpha, dtype, random):
    print(">>> arkouda stream")
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if random:
        if dtype == 'int64':
            a = ak.randint(0, 2**32, N)
            b = ak.randint(0, 2**32, N)
        elif dtype == 'float64':
            a = ak.randint(0, 1, N, dtype=ak.float64)
            b = ak.randint(0, 1, N, dtype=ak.float64)
    else:
        a = ak.ones(N, dtype=dtype)
        b = ak.ones(N, dtype=dtype)

    timings = []
    for i in range(trials):
        start = time.time()
        c = a + b * alpha
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    bytes_per_sec = (c.size * c.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))
Пример #12
0
def time_ak_argsort(N_per_locale, trials, dtype, seed):
    print(">>> arkouda {} argsort".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
        nbytes = a.size * a.itemsize
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)
        nbytes = a.size * a.itemsize
    elif dtype == 'str':
        a = ak.random_strings_uniform(1, 16, N, seed=seed)
        nbytes = a.nbytes * a.entry.itemsize

    timings = []
    for i in range(trials):
        start = time.time()
        perm = ak.argsort(a)
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    if dtype in ('int64', 'float64'):
        assert ak.is_sorted(a[perm])
    print("Average time = {:.4f} sec".format(tavg))
    bytes_per_sec = nbytes / tavg
    print("Average rate = {:.4f} GiB/sec".format(bytes_per_sec / 2**30))
Пример #13
0
def time_ak_sa(vsize, trials, dtype):
    print(">>> arkouda suffix array")
    cfg = ak.get_config()
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {},  num of strings  = {:,}".format(
        cfg["numLocales"], Nv))
    #    v = ak.random_strings_uniform(90000000, 100000000, Nv)
    v = ak.random_strings_uniform(1, 16, Nv)
    c = ak.suffix_array(v)
    print("size of suffix array={}".format(c.bytes.size))
    #    print("All the random strings are as follows")
    for k in range(vsize):
        print("the {} th random tring ={}".format(k, v[k]))
        print("the {} th suffix array ={}".format(k, c[k]))
        print("")
#    print(v)
    timings = []
    for _ in range(trials):
        start = time.time()
        ak.suffix_array(v)
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    if dtype == 'str':
        offsets_transferred = 3 * c.offsets.size * c.offsets.itemsize
        bytes_transferred = (c.offsets.size *
                             c.offsets.itemsize) + (2 * c.bytes.size)
        bytes_per_sec = (offsets_transferred + bytes_transferred) / tavg
    else:
        bytes_per_sec = (c.size * c.itemsize * 3) / tavg
Пример #14
0
def time_ak_coargsort(N_per_locale, trials, dtype, seed):
    print(">>> arkouda {} coargsort".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    for numArrays in (1, 2, 8, 16):
        if seed is None:
            seeds = [None for _ in range(numArrays)]
        else:
            seeds = [seed+i for i in range(numArrays)]
        if dtype == 'int64':
            arrs = [ak.randint(0, 2**32, N//numArrays, seed=s) for s in seeds]
            nbytes = sum(a.size * a.itemsize for a in arrs)
        elif dtype == 'float64':
            arrs = [ak.randint(0, 1, N//numArrays, dtype=ak.float64, seed=s) for s in seeds]
            nbytes = sum(a.size * a.itemsize for a in arrs)
        elif dtype == 'str':
            arrs = [ak.random_strings_uniform(1, 8, N//numArrays, seed=s) for s in seeds]
            nbytes = sum(a.bytes.size * a.bytes.itemsize for a in arrs)

        timings = []
        for i in range(trials):
            start = time.time()
            perm = ak.coargsort(arrs)
            end = time.time()
            timings.append(end - start)
        tavg = sum(timings) / trials

        a = arrs[0][perm]
        if dtype in ('int64', 'float64'):
            assert ak.is_sorted(a)
        print("{}-array Average time = {:.4f} sec".format(numArrays, tavg))
        bytes_per_sec = nbytes / tavg
        print("{}-array Average rate = {:.4f} GiB/sec".format(numArrays, bytes_per_sec/2**30))
Пример #15
0
def check_coargsort(N_per_locale):
    print(">>> arkouda coargsort")
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))

    check_int(N)
    check_float(N)
    check_int_float(N)
    check_large(N)
Пример #16
0
def time_substring_search(N, trials, seed):
    print(">>> arkouda substring search")
    cfg = ak.get_config()
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))

    start = ak.random_strings_uniform(minlen=1, maxlen=8, size=N, seed=seed)
    end = ak.random_strings_uniform(minlen=1, maxlen=8, size=N, seed=seed)

    # each string in test_substring contains '1 string 1' with random strings before and after
    test_substring = start.stick(end, delimiter='1 string 1')
    nbytes = test_substring.nbytes * test_substring.entry.itemsize

    non_regex_times = []
    regex_literal_times = []
    regex_pattern_times = []
    for i in range(trials):
        start = time.time()
        non_regex = test_substring.contains('1 string 1')
        end = time.time()
        non_regex_times.append(end - start)

        start = time.time()
        regex_literal = test_substring.contains('1 string 1', regex=True)
        end = time.time()
        regex_literal_times.append(end - start)

        start = time.time()
        regex_pattern = test_substring.contains('\\d string \\d', regex=True)
        end = time.time()
        regex_pattern_times.append(end - start)

    avg_non_regex = sum(non_regex_times) / trials
    avg_regex_literal = sum(regex_literal_times) / trials
    avg_regex_pattern = sum(regex_pattern_times) / trials

    assert non_regex.all()
    assert regex_literal.all()
    assert regex_pattern.all()

    print("non-regex with literal substring Average time = {:.4f} sec".format(
        avg_non_regex))
    print("regex with literal substring Average time = {:.4f} sec".format(
        avg_regex_literal))
    print("regex with pattern Average time = {:.4f} sec".format(
        avg_regex_pattern))

    print("non-regex with literal substring Average rate = {:.4f} GiB/sec".
          format(nbytes / 2**30 / avg_non_regex))
    print("regex with literal substring Average rate = {:.4f} GiB/sec".format(
        nbytes / 2**30 / avg_regex_literal))
    print("regex with pattern Average rate = {:.4f} GiB/sec".format(
        nbytes / 2**30 / avg_regex_pattern))
Пример #17
0
def time_ak_gather(isize, vsize, trials, dtype, random):
    print(">>> arkouda gather")
    cfg = ak.get_config()
    Ni = isize * cfg["numLocales"]
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {}, num_indices = {:,} ; num_values = {:,}".format(cfg["numLocales"], Ni, Nv))
    # Index vector is always random
    i = ak.randint(0, Nv, Ni)
    if random:
        if dtype == 'int64':
            v = ak.randint(0, 2**32, Nv)
        elif dtype == 'float64':
            v = ak.randint(0, 1, Nv, dtype=ak.float64)
        elif dtype == 'str':
            v = ak.random_strings_uniform(1, 16, Nv)
    else:   
        if dtype == 'str':
            v = ak.random_strings_uniform(1, 16, Nv)
        else:
            v = ak.ones(Nv, dtype=dtype)
    print("v={}".format(v))    
    print("v.offsets={}".format(v.offsets))    
    print("v.nbytes={}".format(v.nbytes))    
    print("v[1]={}".format(v[1]))    
    print("In Gather size={}".format(v.size))    
    print("In Gather nbytes={}".format(v.nbytes))    
    print("In Gather ndim={}".format(v.ndim))    
    print("In Gather shape={}".format(v.shape))    
    print("In Gather offsets name ={}".format(v.offsets.name))
    print("In Gather offsets size={}".format(v.offsets.size))
    print("In Gather bytes name ={}".format(v.bytes.name))
    print("In Gather bytes size={}".format(v.bytes.size))
    timings = []
    for _ in range(trials):
        print("In Gather loop i={}".format(i))
        print("In Gather v[i]={}".format(v[i]))
        start = time.time()
        c = v[i]
        end = time.time()
        print("In Gather loop c={}".format(c))
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    if dtype == 'str':
        offsets_transferred = 3 * c.offsets.size * c.offsets.itemsize
        bytes_transferred = (c.offsets.size * c.offsets.itemsize) + (2 * c.bytes.size)
        bytes_per_sec = (offsets_transferred + bytes_transferred) / tavg
    else:
        bytes_per_sec = (c.size * c.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec/2**30))
Пример #18
0
def time_ak_groupby(N_per_locale, trials, dtype, seed):
    print(">>> arkouda groupby")
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    for numArrays in (1, 2, 8, 16):
        arrays, totalbytes = generate_arrays(N, numArrays, dtype, seed)
        timings = []
        for i in range(trials):
            start = time.time()
            g = ak.GroupBy(arrays)
            end = time.time()
            timings.append(end - start)
            tavg = sum(timings) / trials
            print("{}-array Average time = {:.4f} sec".format(numArrays, tavg))
            bytes_per_sec = totalbytes / tavg
            print("{}-array Average rate = {:.4f} GiB/sec".format(
                numArrays, bytes_per_sec / 2**30))
Пример #19
0
def time_flatten(N, trials):
    print(">>> arkouda flatten")
    cfg = ak.get_config()
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))

    thirds = [ak.cast(ak.arange(i, N*3, 3), 'str') for i in range(3)]
    thickrange = thirds[0].stick(thirds[1], delimiter='_').stick(thirds[2], delimiter='_')
    nbytes = thickrange.nbytes * thickrange.entry.itemsize

    non_regex_times = []
    regex_literal_times = []
    regex_pattern_times = []
    for i in range(trials):
        start = time.time()
        non_regex = thickrange.flatten('_')
        end = time.time()
        non_regex_times.append(end - start)

        start = time.time()
        regex_literal = thickrange.flatten('_', regex=True)
        end = time.time()
        regex_literal_times.append(end - start)

        start = time.time()
        regex_pattern = thickrange.flatten('_+', regex=True)
        end = time.time()
        regex_pattern_times.append(end - start)

    avg_non_regex = sum(non_regex_times) / trials
    avg_regex_literal = sum(regex_literal_times) / trials
    avg_regex_pattern = sum(regex_pattern_times) / trials

    answer = ak.cast(ak.arange(N*3), 'str')
    assert (non_regex == answer).all()
    assert (regex_literal == answer).all()
    assert (regex_pattern == answer).all()

    print("non-regex flatten with literal delimiter Average time = {:.4f} sec".format(avg_non_regex))
    print("regex flatten with literal delimiter Average time = {:.4f} sec".format(avg_regex_literal))
    print("regex flatten with pattern delimiter Average time = {:.4f} sec".format(avg_regex_pattern))

    print("non-regex flatten with literal delimiter Average rate = {:.4f} GiB/sec".format(nbytes/2**30/avg_non_regex))
    print("regex flatten with literal delimiter Average rate = {:.4f} GiB/sec".format(nbytes/2**30/avg_regex_literal))
    print("regex flatten with pattern delimiter Average rate = {:.4f} GiB/sec".format(nbytes/2**30/avg_regex_pattern))
Пример #20
0
def time_ak_array_create(N_per_locale, trials, dtype, random, seed):
    print(">>> arkouda {} array creation".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))

    timings = {op: [] for op in OPS}
    for i in range(trials):
        for op in timings.keys():
            start = time.time()
            a = create_ak_array(N, op, dtype, seed)
            end = time.time()
            timings[op].append(end - start)
    tavg = {op: sum(t) / trials for op, t in timings.items()}

    for op, t in tavg.items():
        print("  {} Average time = {:.4f} sec".format(op, t))
        bytes_per_sec = (a.size * a.itemsize) / t
        print("  {} Average rate = {:.2f} GiB/sec".format(op, bytes_per_sec/2**30))
Пример #21
0
def time_ak_gather(isize, vsize, trials, dtype, random, seed):
    print(">>> arkouda {} gather".format(dtype))
    cfg = ak.get_config()
    Ni = isize * cfg["numLocales"]
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {}, num_indices = {:,} ; num_values = {:,}".format(cfg["numLocales"], Ni, Nv))
    # Index vector is always random
    i = ak.randint(0, Nv, Ni, seed=seed)
    if seed is not None:
        seed += 1
    if random or seed is not None:
        if dtype == 'int64':
            v = ak.randint(0, 2**32, Nv, seed=seed)
        elif dtype == 'float64':
            v = ak.randint(0, 1, Nv, dtype=ak.float64, seed=seed)
        elif dtype == 'bool':
            v = ak.randint(0, 1, Nv, dtype=ak.bool, seed=seed)
        elif dtype == 'str':
            v = ak.random_strings_uniform(1, 16, Nv, seed=seed)
    else:   
        if dtype == 'str':
            v = ak.cast(ak.arange(Nv), 'str')
        else:
            v = ak.ones(Nv, dtype=dtype)
    
    timings = []
    for _ in range(trials):
        start = time.time()
        c = v[i]
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    if dtype == 'str':
        offsets_transferred = 3 * c.offsets.size * c.offsets.itemsize
        bytes_transferred = (c.offsets.size * c.offsets.itemsize) + (2 * c.bytes.size)
        bytes_per_sec = (offsets_transferred + bytes_transferred) / tavg
    else:
        bytes_per_sec = (c.size * c.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec/2**30))
Пример #22
0
def time_ak_read(N_per_locale, numfiles, trials, dtype, path, seed, parquet):
    print(">>> arkouda {} read".format(dtype))
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}, filesPerLoc = {}".format(
        cfg["numLocales"], N, numfiles))
    a = ak.array([])

    readtimes = []
    for i in range(trials):
        start = time.time()
        a = ak.read_all(path + '*') if not parquet else ak.read_parquet(path +
                                                                        '*')
        end = time.time()
        readtimes.append(end - start)
    avgread = sum(readtimes) / trials

    print("read Average time = {:.4f} sec".format(avgread))

    nb = a.size * a.itemsize
    print("read Average rate = {:.2f} GiB/sec".format(nb / 2**30 / avgread))
Пример #23
0
def time_ak_write_read(N_per_locale, trials, dtype, path, seed):
    print(">>> arkouda write/read")
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)

    writetimes = []
    readtimes = []
    for i in range(trials):
        start = time.time()
        a.save(path)
        end = time.time()
        writetimes.append(end - start)
        start = time.time()
        b = ak.load(path)
        end = time.time()
        readtimes.append(end - start)
        for f in glob(path + '_LOCALE*'):
            os.remove(f)
    avgwrite = sum(writetimes) / trials
    avgread = sum(readtimes) / trials

    print("Write times: min = {:.4f} sec, max = {:.4f} sec, avg = {:.4f} sec".
          format(min(writetimes), max(writetimes), avgwrite))
    print("Read times : min = {:.4f} sec, max = {:.4f} sec, avg = {:.4f} sec".
          format(min(readtimes), max(readtimes), avgread))

    nb = a.size * a.itemsize
    print(
        "Write rates: min = {:.4f} GiB/sec, max = {:.4f} GiB/sec, avg = {:.4f} GiB/sec"
        .format(nb / 2**30 / max(writetimes), nb / 2**30 / min(writetimes),
                nb / 2**30 / avgwrite))
    print(
        "Read rates : min = {:.4f} GiB/sec, max = {:.4f} GiB/sec, avg = {:.4f} GiB/sec"
        .format(nb / 2**30 / max(readtimes), nb / 2**30 / min(readtimes),
                nb / 2**30 / avgread))
Пример #24
0
def time_ak_in1d(size, trials):
    print(">>> arkouda string in1d")
    cfg = ak.get_config()
    N = size * cfg["numLocales"]
    a = ak.random_strings_uniform(1, MAXSTRLEN, N)

    for regime, bsize in zip(('Medium', 'Large'), (MEDIUM, LARGE)):
        print(
            "{} regime: numLocales = {}  a.size = {:,}  b.size = {:,}".format(
                regime, cfg["numLocales"], N, bsize))
        b = ak.random_strings_uniform(1, MAXSTRLEN, bsize)
        timings = []
        for _ in range(trials):
            start = time.time()
            c = ak.in1d(a, b)
            end = time.time()
            timings.append(end - start)
        tavg = sum(timings) / trials
        print("{} average time = {:.4f} sec".format(regime, tavg))
        bytes_per_sec = (a.size * 8 + a.nbytes + b.size * 8 + b.nbytes) / tavg
        print("{} average rate = {:.2f} GiB/sec".format(
            regime, bytes_per_sec / 2**30))
Пример #25
0
def time_ak_sa(vsize, strlen, trials, dtype):
    print(">>> arkouda suffix array")
    cfg = ak.get_config()
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {},  num of strings  = {:,}".format(
        cfg["numLocales"], Nv))

    if dtype == 'str':
        v = ak.random_strings_uniform(1, strlen, Nv)
    else:
        print("Wrong data type")
    c = ak.suffix_array(v)
    #    print("size of suffix array={}".format(c.bytes.size))
    #    print("offset/number of suffix array={}".format(c.offsets.size))
    #    print("itemsize of suffix array={}".format(c.offsets.itemsize))
    print("All the random strings are as follows")
    for k in range(vsize):
        print("the {} th random tring ={}".format(k, v[k]))
        print("the {} th suffix array ={}".format(k, c[k]))
        print("")
    timings = []
    for _ in range(trials):
        start = time.time()
        c = ak.suffix_array(v)
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    if dtype == 'str':
        offsets_transferred = 0 * c.offsets.size * c.offsets.itemsize
        bytes_transferred = (c.bytes.size *
                             c.offsets.itemsize) + (0 * c.bytes.size)
        bytes_per_sec = (offsets_transferred + bytes_transferred) / tavg
    else:
        print("Wrong data type")
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))
Пример #26
0
def time_ak_aggregate(N_per_locale, trials, seed):
    print(">>> arkouda aggregate")
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    keys, intvals, boolvals = generate_arrays(N, seed)
    g = ak.GroupBy(keys, assume_sorted=True)
    for op in ak.GroupBy.Reductions:
        if op in BOOLOPS:
            v = boolvals
        else:
            v = intvals
        totalbytes = v.size * v.itemsize
        timings = []
        for i in range(trials):
            start = time.time()
            res = g.aggregate(v, op)[1]
            end = time.time()
            timings.append(end - start)
        tavg = sum(timings) / trials
        print("Aggregate {} Average time = {:.4f} sec".format(op, tavg))
        bytes_per_sec = totalbytes / tavg
        print("Aggregate {} Average rate = {:.4f} GiB/sec".format(
            op, bytes_per_sec / 2**30))
Пример #27
0
def report_mem(pre=''):
    cfg = ak.get_config()
    used = ak.get_mem_used() / (cfg['numLocales'] * cfg['physicalMemory'])
    print(f"{pre} mem use: {ak.get_mem_used()/(1024**4): .2f} TB ({used:.1%})")
Пример #28
0
    args = parser.parse_args()

    ak.set_defaults()
    ak.verbose = False
    if args.server is not None:
        if args.port is not None:
            ak.connect(server=args.server, port=args.port)
        else:
            ak.connect(server=args.server)
    else:
        if args.port is not None:
            ak.connect(port=args.port)
        else:
            ak.connect()

    print(ak.get_config())
            
    if len(args.hdffiles) == 0:
        print("usage: {} [--server server] [--port port] hdffiles ".format(sys.argv[0]))

    # fields in the files to read and create pdarrays in the dict
    fields = ['srcIP', 'dstIP', 'srcPort', 'dstPort', 'start']

    # read in the files, all data from hdffiles
    # will be concatenated together in the fields/columns
    nfDF = {field: ak.read_hdf(field, args.hdffiles) for field in fields}

    # print out the pdarrays in the dict and their types
    print(nfDF['start'],nfDF['start'].dtype)
    print(nfDF['srcIP'],type(nfDF['srcIP'])) # Strings dosen't have a dtype?!?
    print(nfDF['dstIP'],type(nfDF['dstIP'])) # Strings dosen't have a dtype?!?