def rmat(size): ''' RMAT-generated edges (coargsort of two vertex arrays) ''' # N = number of edges = number of elements / 2 N = size // 2 avgdegree = 10 lgNv = int(np.log2(N / avgdegree)) # number of vertices Nv = 2**lgNv # probabilities a = 0.01 b = (1.0 - a) / 3.0 c = b d = b # quantites to use in edge generation loop ab = a + b c_norm = c / (c + d) a_norm = a / (a + b) # init edge arrays ii = ak.ones(N, dtype=ak.int64) jj = ak.ones(N, dtype=ak.int64) # generate edges for ib in range(1, lgNv): ii_bit = (ak.uniform(N) > ab) jj_bit = (ak.uniform(N) > (c_norm * ii_bit + a_norm * (~ii_bit))) ii = ii + ((2**(ib - 1)) * ii_bit) jj = jj + ((2**(ib - 1)) * jj_bit) yield 'RMAT int64', (ii, jj)
def time_ak_scan(N_per_locale, trials, dtype, random, seed): print(">>> arkouda {} scan".format(dtype)) cfg = ak.get_config() N = N_per_locale * cfg["numLocales"] print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N)) if random or args.seed is not None: if dtype == 'int64': a = ak.randint(1, N, N, seed=seed) elif dtype == 'float64': a = ak.uniform(N, seed=seed) + 0.5 else: a = ak.arange(1, N, 1) if dtype == 'float64': a = 1.0 * a timings = {op: [] for op in OPS} final_values = {} for i in range(trials): for op in timings.keys(): fxn = getattr(ak, op) start = time.time() r = fxn(a) end = time.time() timings[op].append(end - start) final_values[op] = r[r.size-1] tavg = {op: sum(t) / trials for op, t in timings.items()} for op, t in tavg.items(): print("{}, final value = {}".format(op, final_values[op])) print(" {} Average time = {:.4f} sec".format(op, t)) bytes_per_sec = (a.size * a.itemsize * 2) / t print(" {} Average rate = {:.2f} GiB/sec".format(op, bytes_per_sec/2**30))
def power_law(N): ''' Power law distributed (alpha = 2.5) reals and integers in (1, 2**32) ''' y = ak.uniform(N) a = -2.5 # power law exponent, between -2 and -3 ub = 2**32 # upper bound data = ((ub**(a + 1) - 1) * y + 1)**(1 / (a + 1)) yield 'power-law float64', data datai = ak.cast(data, ak.int64) yield 'power-law int64', datai
def random_uniform(N): ''' Uniformly distributed integers of 1, 2, and 4 digits. Uniformly distributed reals in (0, 1) ''' for lbound, ubound, bstr in ((0, 2**16, '16-bit'), (0, 2**32, '32-bit'), (-(2**63), 2**63, '64-bit')): name = 'uniform int64 {}'.format(bstr) data = ak.randint(lbound, ubound, N) yield name, data name = 'uniform float64' data = ak.uniform(N) yield name, data
def IP_like(N): ''' Data like a 90/10 mix of IPv4 and IPv6 addresses ''' multiplicity = 10 nunique = N // (2 * multiplicity) # First generate unique addresses, then sample with replacement u1 = ak.zeros(nunique, dtype=ak.int64) u2 = ak.zeros(nunique, dtype=ak.int64) v4 = ak.uniform(nunique) < 0.9 n4 = v4.sum() v6 = ~v4 n6 = v4.size - n4 u1[v4] = ak.randint(0, 2**32, n4) u1[v6] = ak.randint(-2**63, 2**63, n6) u2[v6] = ak.randint(-2**63, 2**63, n6) sample = ak.randint(0, nunique, N // 2) IP1 = u1[sample] IP2 = u2[sample] yield 'IP-like 2*int64', (IP1, IP2)