def test_parallel_2(): filename = "sample_data.csv" cpus = cpu_count() total_ranks = cpus * 2 ranks_per = cpus # non-parallel mist = pld.Search() mist.load_file_column_major(filename) mist.tuple_size = 3 res = mist.start() # first ranks mist = pld.Search() mist.load_file_column_major(filename) mist.tuple_size = 3 mist.ranks = ranks_per mist.total_ranks = total_ranks res1 = mist.start() # second ranks mist = pld.Search() mist.load_file_column_major(filename) mist.tuple_size = 3 mist.ranks = ranks_per mist.total_ranks = total_ranks mist.start_rank = ranks_per res2 = mist.start() res_sort = np.sort(res, axis=0) res_parallel = np.concatenate([res1, res2]) res_parallel_sort = np.sort(res_parallel, axis=0) assert (res.shape[0] == res_parallel_sort.shape[0]) assert (res.shape[1] == res_parallel_sort.shape[1]) np.testing.assert_array_equal(res_sort, res_parallel_sort)
def time_mist(q=50, progress=False, rs=[1], ms=[10, 30, 60, 90, 120], ns=[10, 20, 30], bs=[2, 3, 4], ds=[2], cs=[False, True], algs=['vector', 'bitset']): search = lm.Search() search.measure = "symmetricdelta" df = pd.DataFrame( columns=['n', 'd', 'm', 'b', 'r', 'q', 'cache', 'alg', 'time']) if (progress): print("%s" % (','.join(df.columns))) jobs = len(ns) * len(bs) * len(ms) * len(ds) * len(rs) * len(cs) * len( algs) i = 1 for n in ns: for m in ms: for b in bs: data = matrix(n=n, m=m, b=b) search.load_ndarray(data) for d in ds: ts = lm.TupleSpace() vs = list(range(0, n)) ts.addVariableGroup("all", list(range(0, n))) ts.addVariableGroupTuple([0] * d) search.tuple_space = ts for r in rs: search.ranks = r for alg in algs: search.probability_algorithm = alg for c in cs: search.cache_enabled = c t = run_times(search, q) row = pd.DataFrame({ 'n': [n], 'm': [m], 'd': [d], 'b': [b], 'r': [r], 'q': [q], 'alg': [alg], 'cache': [c], 'time': [t] }) if (progress): print("[%02d/%02d : %s]\n%s" % (i, jobs, datetime.datetime.now(), str(row))) df = df.append(row, ignore_index=True) i = i + 1 return df
def test_cutoff_mean(): filename = "sample_data.csv" # all results mist = pld.Search() mist.load_file_column_major(filename) mist.tuple_size = 3 res = mist.start() # cutoff cutoff = np.mean(res[:,-1]) mist.cutoff = cutoff res2 = mist.start() assert(sum(res2[:,-1] < cutoff) == 0)
def test_compute_in_file_out_numpy(): mist = pld.Search() mist.load_file_column_major(filename) mist.threads = 4 mist.tuple_size = 2 res = mist.start() res2 = np.sort(res, axis=0) print(res2) assert (res.shape[0] == 45) assert (res.shape[1] == 3) assert (res2[0][0] == 0) assert (res2[0][1] == 1) assert (res2[0][2] == 0) assert (res2[-1][0] == 8) assert (res2[-1][1] == 9) assert (abs(res2[-1][2] - 0.72192809) < precision)
def results_d2_mat(results, N): results_mat = np.zeros((N, N)) for i in range(0, N): for j in range(i + 1, N): ind = np.where((results[:, 0] == i) & (results[:, 1] == j)) results_mat[i, j] = results[ind][0][2] return results_mat N = 90 M = 1000 B = 4 data = make_data(N=N, M=M, B=B) # full MI search = lm.Search() search.load_ndarray(data) results_mi = search.start() results_mat = results_d2_mat(results_mi, N) plt.title("Mutual Information Heatmap") hm = plt.imshow(results_mat, cmap='Blues', interpolation="nearest") plt.colorbar(hm) #plt.show() plt.savefig("mi.png") plt.close() # limited MI ts = lm.TupleSpace() ts.addVariableGroup("lowMI", list(range(0, N // 3))) ts.addVariableGroupTuple([0, 0]) search = lm.Search()