def benchmark_mocks_threads_all(numpart_frac=[ 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 ], nrepeats=3, keys=None, isa=None): from Corrfunc.mocks import DDrppi_mocks, DDtheta_mocks allkeys = [ #'DDrppi (DD)', #'DDtheta (DD)', 'DDrppi (DR)', 'DDtheta (DR)' ] allisa = ['avx512f', 'avx', 'sse42', 'fallback'] if keys is None: keys = allkeys else: for k in keys: if k not in allkeys: msg = "Valid routines to benchmark are: {0}\nFound routine"\ " = {1}".format(allkeys, k) raise ValueError(msg) if isa is None: isa = allisa else: for i in isa: if i not in allisa: msg = "Valid instructions sets benchmark are: {0}\n"\ "Found routine = {1}".format(allisa, i) raise ValueError(msg) print("Benchmarking mocks routines = {0} with isa = {1}".format(keys, isa)) mocks_file = pjoin(dirname(abspath(Corrfunc.__file__)), "../mocks/tests/data", "Mr19_mock_northonly.rdcz.ff") allra, alldec, allcz = read_catalog(mocks_file) rand_file = pjoin(dirname(abspath(Corrfunc.__file__)), "../mocks/tests/data", "Mr19_randoms_northonly.rdcz.ff") allrand_ra, allrand_dec, allrand_cz = read_catalog(rand_file) cosmology = 1 rmin = 0.1 rmax = 84.0 angmax = 10.0 nbins = 20 rbins = np.logspace(np.log10(rmin), np.log10(rmax), nbins) # set to rmax for easier handling of # scaling with number of particles pimax = rmax angbins = np.logspace(np.log10(rmin), np.log10(angmax), nbins) nthreads = max_threads dtype = np.dtype([('repeat', np.int), ('name', 'U16'), ('isa', 'U16'), ('rmax', np.float), ('ndata', np.int), ('nrand', np.int), ('nthreads', np.int), ('runtime', np.float), ('serial_time', np.float), ('pair_time', np.float), ('api_time', np.float)]) totN = len(numpart_frac) * len(keys) * len(isa) * nrepeats runtimes = np.empty(totN, dtype=dtype) runtimes['nthreads'][:] = nthreads runtimes['rmax'][:] = rmax index = 0 stderr_filename = 'stderr.txt' for run_isa in isa: for frac in numpart_frac: npts = np.int(frac * len(allra)) npts_rand = np.int(frac * len(allrand_ra)) print("Working with (N, nrand) = {0} {1}".format(npts, npts_rand), file=sys.stderr) ra = np.random.choice(allra, npts, replace=False) dec = np.random.choice(alldec, npts, replace=False) cz = np.random.choice(allcz, npts, replace=False) rand_ra = np.random.choice(allrand_ra, npts_rand, replace=False) rand_dec = np.random.choice(allrand_dec, npts_rand, replace=False) rand_cz = np.random.choice(allrand_cz, npts_rand, replace=False) start_thread_index = index if 'DDtheta (DD)' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): autocorr = 1 t0 = time.time() _, api_time = DDtheta_mocks(autocorr, nthreads, angbins, ra, dec, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDtheta (DD)' runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDtheta (DR)' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): autocorr = 0 t0 = time.time() _, api_time = DDtheta_mocks(autocorr, nthreads, angbins, ra, dec, RA2=rand_ra, DEC2=rand_dec, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDtheta (DR)' runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts_rand runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDrppi (DD)' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): autocorr = 1 t0 = time.time() _, api_time = DDrppi_mocks(autocorr, cosmology, nthreads, pimax, rbins, ra, dec, cz, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDrppi (DD)' runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDrppi (DR)' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): autocorr = 0 t0 = time.time() _, api_time = DDrppi_mocks(autocorr, cosmology, nthreads, pimax, rbins, ra, dec, cz, RA2=rand_ra, DEC2=rand_dec, CZ2=rand_cz, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDrppi (DR)' runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts_rand runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 print("{0}".format(runtimes[start_thread_index:index])) sys.stdout.flush() print("index = {0} totN = {1}".format(index, totN)) return keys, isa, runtimes
def benchmark_theory_threads_all(numpart_frac=[ 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 ], nrepeats=3, keys=None, isa=None): from Corrfunc.theory import DD, DDrppi, wp, xi allkeys = [ #'DD', 'DDrppi', 'wp', 'xi' ] allisa = ['avx512f', 'avx', 'sse42', 'fallback'] if keys is None: keys = allkeys else: for k in keys: if k not in allkeys: msg = "Valid routines to benchmark are: {0}\nFound routine"\ " = {1}".format(allkeys, k) raise ValueError(msg) if isa is None: isa = allisa else: for i in isa: if i not in allisa: msg = "Valid instructions sets benchmark are: {0}\n"\ "Found routine = {1}".format(allisa, i) raise ValueError(msg) numpart_frac = np.array(numpart_frac) print("Benchmarking theory routines {0} for isa = {1}".format(keys, isa)) allx, ally, allz = read_catalog() rmin = 0.1 rmax = 84.0 nbins = 20 bins = np.logspace(np.log10(rmin), np.log10(rmax), nbins) pimax = rmax # Set to rmax for comparisons between wp and xi autocorr = 1 boxsize = 420.0 nthreads = max_threads dtype = np.dtype([('repeat', np.int), ('name', 'U16'), ('isa', 'U16'), ('rmax', np.float), ('ndata', np.int), ('nrand', np.int), ('nthreads', np.int), ('runtime', np.float), ('serial_time', np.float), ('pair_time', np.float), ('api_time', np.float)]) totN = len(numpart_frac) * len(keys) * len(isa) * nrepeats runtimes = np.empty(totN, dtype=dtype) runtimes['nthreads'][:] = nthreads runtimes['rmax'][:] = rmax index = 0 stderr_filename = 'stderr.txt' for run_isa in isa: for frac in numpart_frac: npts = np.int(frac * len(allx)) print("Working with N = {0}".format(npts), file=sys.stderr) x = np.random.choice(allx, npts, replace=False) y = np.random.choice(ally, npts, replace=False) z = np.random.choice(allz, npts, replace=False) start_thread_index = index if 'DD' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DD(autocorr, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DD' runtimes['repeat'][index] = repeat runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDrppi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DDrppi(autocorr, nthreads, pimax, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDrppi' runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'wp' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = wp(boxsize, pimax, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'wp' runtimes['repeat'][index] = repeat runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'xi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = xi(boxsize, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'xi' runtimes['repeat'][index] = repeat runtimes['isa'][index] = run_isa runtimes['ndata'][index] = npts runtimes['nrand'][index] = npts runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 print("{0}".format(runtimes[start_thread_index:index])) sys.stdout.flush() print("index = {0} totN = {1}".format(index, totN)) # autocorr is always 1 for theory routines -> 'nrand' == 'ndata' runtimes['nrand'][:] = (runtimes['ndata'][:]).copy() return keys, isa, runtimes
def main(): import sys if len(sys.argv) == 1: print("Running cell timers for wp") all_isa = ['avx512f', 'avx2', 'avx', 'sse42', 'fallback'] x, y, z = read_catalog() # boxsize = 1100.0 # pimax = 45.0 # points = np.loadtxt('halos_emulator_1100box_Neff3_00.txt') # numpart = int(1.*len(points)) # assert (points >= 0).all() and (points < 1100.).all() # dtype = points.dtype # float64 # points = points.reshape(-1).view(dtype=[('x',dtype,3)]) # subsample = np.random.choice(points, numpart, replace=False) # subsample = subsample.view(dtype=dtype).reshape(-1,3) # x, y, z = subsample.T boxsize = 420.0 pimax = 40.0 cell_timings = dict() serial_timings = dict() for isa in all_isa: cell_timings[isa] = dict() serial_timings[isa] = dict() # First run the serial (single threaded) timings = run_wp(boxsize, x, y, z, pimax, nthreads=1, isa=isa) (serial_timings[isa])[1] = timings # then the one with all threads # max_threads is not required but being explicit timings = run_wp(boxsize, x, y, z, pimax, nthreads=max_threads, isa=isa) (cell_timings[isa])[max_threads] = timings with open('wp_cell_timers_ozstar.pkl', 'wb') as outfile: pickle.dump([all_isa, cell_timings, serial_timings], outfile, protocol=pickle.HIGHEST_PROTOCOL) else: import matplotlib import matplotlib.pyplot as plt import matplotlib.colors as mcolors import matplotlib.cm as cm timings_file = sys.argv[1] print("Loading benchmarks from file = {0}".format(timings_file)) with open(timings_file, 'rb') as pkl_file: all_isa, _, serial_timings = pickle.load(pkl_file) legend = ['AVX512F', 'AVX2', 'AVX', 'SSE4.2', 'Fallback'] base_string = 'wp' all_speedup = [] base_timing = (serial_timings['fallback'])[1]['time_in_ns'] N1_parts = (serial_timings['fallback'])[1]['N1'] N2_parts = (serial_timings['fallback'])[1]['N2'] gridsize = 40 cb_range = [0.0, 100.0] contour_nlevels = 4 xlimits = [0, 1000] ylimits = xlimits xlabel = 'Number of points in a cell' ylabel = xlabel cb_diff = (cb_range[1] - cb_range[0]) ''' positive_Ncolors = int((cb_range[1] - 1.0) / cb_diff * 256) negative_Ncolors = 256 - positive_Ncolors colors1 = cm.OrRd(np.linspace(0.0, 1.0, negative_Ncolors)) colors2 = cm.viridis(np.linspace(0.0, 1.0, positive_Ncolors)) # combine them and build a new colormap colors = np.vstack((colors1, colors2)) mycmap = mcolors.LinearSegmentedColormap.from_list('my_colormap', colors) ''' mycmap = 'viridis' matplotlib.style.use('default') # Label levels with specially formatted floats if plt.rcParams["text.usetex"]: cntr_fmt = r'%r\%%' else: cntr_fmt = '%r%%' # Want fallback to appear first all_isa.reverse() legend.reverse() for ii, isa in enumerate(all_isa): if ii == 0: continue this_timing = (serial_timings[isa])[1]['time_in_ns'] ind = (np.where((this_timing > 0.0) & (base_timing > 0.0)))[0] speedup = base_timing[ind] / this_timing[ind] all_speedup.append(speedup) print("Min speedup = {0:0.2f}. Max = {1:0.2f} Median = {2:0.2f}". format(min(speedup), max(speedup), np.median(speedup))) bad = (np.where(speedup <= 1.0))[0] bad_timings_base = np.sum(base_timing[ind[bad]]) bad_timings = np.sum(this_timing[ind[bad]]) print("Cells with slowdown {3}({4:4.3f}%): Base takes - {0:8.3f} " "sec while {1} takes {2:8.3f} seconds".format( bad_timings_base / 1e9, legend[ii], bad_timings / 1e9, len(bad), 100.0 * len(bad) / len(ind))) good = (np.where(speedup > 1.0))[0] good_timings_base = np.sum(base_timing[ind[good]]) good_timings = np.sum(this_timing[ind[good]]) print( "Cells with speedup {3}({4:4.3f}%): Base takes - {0:8.3f} sec " "while {1} takes {2:8.3f} seconds".format( good_timings_base / 1e9, legend[ii], good_timings / 1e9, len(good), 100.0 * len(good) / len(ind))) fig = plt.figure(1, figsize=(8, 8)) figsize = 0.6 left = 0.1 bottom = 0.1 top_aspect = 0.15 hist_area = [left, bottom + figsize, figsize, figsize * top_aspect] axhist = plt.axes(hist_area) axhist.autoscale(enable=True, axis="y") axhist.set_xlim(xlimits) plt.setp(axhist.get_xticklabels(), visible=False) axhist.axis('off') axhist.hist(N1_parts[ind], gridsize, range=xlimits, color='0.5') hist_time_area = [ left + figsize, bottom, figsize * top_aspect, figsize ] ax_time = plt.axes(hist_time_area) ax_time.autoscale(enable=True, axis="x") ax_time.set_ylim(ylimits) plt.setp(ax_time.get_yticklabels(), visible=False) plt.setp(ax_time.get_xticklabels(), visible=False) ax_time.axis('off') ax_time.hist(N1_parts[ind], gridsize, weights=this_timing[ind], range=xlimits, orientation="horizontal", color='0.5') im_area = [left, bottom, figsize, figsize] ax = plt.axes(im_area) ax.set_autoscale_on(False) ax.set_xlim(xlimits) ax.set_ylim(ylimits) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) xedges = np.linspace(xlimits[0], xlimits[1], gridsize) yedges = np.linspace(ylimits[0], ylimits[1], gridsize) cell_time, xedges, yedges = np.histogram2d(N1_parts, N2_parts, (xedges, yedges), weights=base_timing, normed=False) cell_time /= np.sum(cell_time) cell_time *= 100.0 cell_time_1d = cell_time.flatten() sorted_ind = np.argsort(cell_time_1d) cum_sorted_time = np.cumsum(cell_time_1d[sorted_ind]) correct_order_cum_time = np.empty_like(cum_sorted_time) for kk, ct in zip(sorted_ind, cum_sorted_time): correct_order_cum_time[kk] = ct correct_order_cum_time = correct_order_cum_time.reshape( cell_time.shape) extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]] xarr, yarr = np.meshgrid(xedges[0:-1], yedges[0:-1]) contours = ax.contour(xarr, yarr, correct_order_cum_time, contour_nlevels, linewidths=3.0, extent=extent, cmap=cm.Greys) # Recast levels to new class # Reverse the levels to show that the contours represent # enclosed fraction of time spent contours.levels = [nf(val) for val in contours.levels[::-1]] ax.clabel(contours, contours.levels, fmt=cntr_fmt, inline=True, fontsize=10) # Now plot the image for the speedup normalized_this_timing = this_timing / this_timing.sum() im = ax.hexbin( N1_parts[ind], N2_parts[ind], #C=speedup[ind], C=normalized_this_timing[ind], vmin=cb_range[0], vmax=cb_range[1], cmap=mycmap, gridsize=gridsize) plt.figtext(left + figsize - 0.03, bottom + figsize - 0.05, '{0}'.format(legend[ii]), fontsize=16, ha='right') cbar_offset = 0.08 cbar_width = 0.03 cbar_ax = fig.add_axes([ left + figsize + figsize * top_aspect + cbar_offset, bottom, cbar_width, figsize ]) cb = fig.colorbar(im, extend='both', format="%.1f", ticks=np.linspace(cb_range[0], cb_range[1], cb_diff + 1.0), cax=cbar_ax) cb.set_label('Speedup rel. to non-vectorized code') if 'laptop' in timings_file: exts = 'laptop_' elif 'stampede' in timings_file: exts = 'stampede_' elif 'bender' in timings_file: exts = 'bender_' elif 'ozstar' in timings_file: exts = 'ozstar_' else: exts = '' plt.savefig('{1}_{2}Speedup_{0}.png'.format( legend[ii], base_string, exts), dpi=400) plt.savefig('{1}_{2}Speedup_{0}.pdf'.format( legend[ii], base_string, exts), dpi=400) fig.clear() ax.clear() axhist.clear() ax_time.clear() plt.close(fig) np.savez('isa_and_speedups.npz', all_isa=all_isa, all_speedup=all_speedup)
def main(): savetag = '' proj_type = 'tophat' ncomponents = 9 r_edges = np.linspace(10., 100., ncomponents + 1) proj_dict = { 'tophat': { 'ncomponents': ncomponents, 'proj_func': tophat_orig, 'proj_fn': None, 'args': [r_edges], 'kwargs': {} } } proj = proj_dict[proj_type] frac = 0.001 seed = 42 allx, ally, allz = read_catalog() N = np.int(frac * len(allx)) print("N:", N) np.random.seed(seed) x = np.random.choice(allx, N, replace=False) y = np.random.choice(ally, N, replace=False) z = np.random.choice(allz, N, replace=False) data = np.array([x, y, z]).T fmt = '%10.10f' ### Brute force test s = time.time() print('brute force') v_dd_correct, T_dd_correct = dd_bruteforce(data, proj['proj_func'], proj['ncomponents'], *proj['args'], **proj['kwargs']) e = time.time() print(v_dd_correct) print(T_dd_correct) print("brute force time:", e - s, 's') s = time.time() print('numpy trick') v_dd_correct, T_dd_correct = dd_bruteforce_numpy(data, proj['proj_func'], proj['ncomponents'], *proj['args'], **proj['kwargs']) e = time.time() print(v_dd_correct) print(T_dd_correct) print("numpy trick brute force time:", e - s, 's') #np.save(f'../output/correct_full_{proj_type}.npy', [v_dd_correct, T_dd_correct, proj_type, proj]) #np.savetxt(f'../output/correct_vdd_{proj_type}.npy', v_dd_correct, fmt=fmt) #np.savetxt(f'../output/correct_Tdd_{proj_type}.npy', T_dd_correct, fmt=fmt) #print(v_dd_correct) #print(T_dd_correct) ### Corrfunc/suave test nthreads = 1 mumax = 1.0 nmubins = 1 _, v_dd, T_dd = DDsmu(1, nthreads, r_edges, mumax, nmubins, x, y, z, proj_type=proj_type, ncomponents=proj['ncomponents'], projfn=proj['proj_fn'], periodic=False) T_dd = T_dd.reshape( (ncomponents, ncomponents) ) #make code output it like this?! or maybe i didn't because it makes it easier to pass directly to compute_amps, etc print(v_dd) print(T_dd)
def main(): from os.path import dirname, abspath, join as pjoin import numpy as np import time import Corrfunc from Corrfunc.io import read_catalog from Corrfunc._countpairs_mocks import\ countpairs_rp_pi_mocks as rp_pi_mocks_extn,\ countpairs_s_mu_mocks as s_mu_mocks_extn,\ countpairs_theta_mocks as theta_mocks_extn,\ countspheres_vpf_mocks as vpf_mocks_extn tstart = time.time() filename = pjoin(dirname(abspath(Corrfunc.__file__)), "../mocks/tests/data/", "Mr19_mock_northonly.rdcz.ff") t0 = time.time() ra, dec, cz = read_catalog(filename) t1 = time.time() print("RA min = {0} max = {1}".format(np.min(ra), np.max(ra))) print("DEC min = {0} max = {1}".format(np.min(dec), np.max(dec))) print("cz min = {0} max = {1}".format(np.min(cz), np.max(cz))) print("Done reading the data - time taken = {0:10.1f} seconds" .format(t1 - t0)) print("Beginning Correlation functions calculations") nthreads = 4 pimax = 40.0 binfile = pjoin(dirname(abspath(__file__)), "../mocks/tests/", "bins") autocorr = 1 numbins_to_print = 5 cosmology = 1 print("\nRunning 2-D correlation function xi(rp,pi)") results_DDrppi, _ = rp_pi_mocks_extn(autocorr, cosmology, nthreads, pimax, binfile, ra, dec, cz, weights1=np.ones_like(ra), weight_type='pair_product', output_rpavg=True, verbose=True) print("\n# ****** DD(rp,pi): first {0} bins ******* " .format(numbins_to_print)) print("# rmin rmax rpavg pi_upper npairs weightavg") print("########################################################################") for ibin in range(numbins_to_print): items = results_DDrppi[ibin] print("{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}" .format(items[0], items[1], items[2], items[3], items[4], items[5])) print("------------------------------------------------------------------------") nmu_bins = 10 mu_max = 1.0 print("\nRunning 2-D correlation function xi(s,mu)") results_DDsmu, _ = s_mu_mocks_extn(autocorr, cosmology, nthreads, mu_max, nmu_bins, binfile, ra, dec, cz, weights1=np.ones_like(ra), output_savg=True, verbose=True, weight_type='pair_product') print("\n# ****** DD(s,mu): first {0} bins ******* " .format(numbins_to_print)) print("# smin smax savg mu_upper npairs weight_avg") print("###########################################################################") for ibin in range(numbins_to_print): items = results_DDsmu[ibin] print("{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:12.4f}" .format(items[0], items[1], items[2], items[3], items[4], items[5])) print("--------------------------------------------------------------------------") binfile = pjoin(dirname(abspath(__file__)), "../mocks/tests/", "angular_bins") print("\nRunning angular correlation function DD(theta)") results_wtheta, _ = theta_mocks_extn(autocorr, nthreads, binfile, ra, dec, RA2=ra, DEC2=dec, weights1=np.ones_like(ra), weights2=np.ones_like(ra), weight_type='pair_product', output_thetaavg=True, fast_acos=True, verbose=1) print("\n# ****** DD(theta): first {0} bins ******* " .format(numbins_to_print)) print("# thetamin thetamax thetaavg npairs weightavg") print("############################################################################") for ibin in range(numbins_to_print): items = results_wtheta[ibin] print("{0:14.4f} {1:14.4f} {2:14.4f} {3:14d} {4:14.4f}" .format(items[0], items[1], items[2], items[3], items[4])) print("-----------------------------------------------------------------------") print("Beginning the VPF") # Max. sphere radius of 10 Mpc rmax = 10.0 # 10 bins..so counts in spheres of radius 1, 2, 3, 4...10 Mpc spheres nbin = 10 num_spheres = 10000 num_pN = 6 threshold_neighbors = 1 # does not matter since we have the centers centers_file = pjoin(dirname(abspath(__file__)), "../mocks/tests/data/", "Mr19_centers_xyz_forVPF_rmax_10Mpc.txt") results_vpf, _ = vpf_mocks_extn(rmax, nbin, num_spheres, num_pN, threshold_neighbors, centers_file, cosmology, ra, dec, cz, ra, dec, cz, verbose=True) print("\n# ****** pN: first {0} bins ******* " .format(numbins_to_print)) print('# r ', end="") for ipn in range(num_pN): print(' p{0:0d} '.format(ipn), end="") print("") print("###########", end="") for ipn in range(num_pN): print('################', end="") print("") for ibin in range(numbins_to_print): items = results_vpf[ibin] print('{0:10.2f} '.format(items[0]), end="") for ipn in range(num_pN): print(' {0:15.4e}'.format(items[ipn + 1]), end="") print("") print("-----------------------------------------------------------") print("Done with the VPF.") tend = time.time() print("Done with all the MOCK clustering calculations. Total time \ taken = {0:0.2f} seconds.".format(tend - tstart))
def benchmark_theory_threads_all(min_threads=1, max_threads=max_threads, nrepeats=1, keys=None, isa=None): from Corrfunc.theory import DD, DDrppi, wp, xi allkeys = [ #'DDrppi', 'DD', 'wp', 'xi' ] allisa = ['avx', 'sse42', 'fallback'] if keys is None: keys = allkeys else: for k in keys: if k not in allkeys: msg = "Valid routines to benchmark are: {0}\nFound routine"\ " = {1}".format(allkeys, k) raise ValueError(msg) if isa is None: isa = allisa else: for i in isa: if i not in allisa: msg = "Valid instructions sets benchmark are: {0}\n"\ "Found routine = {1}".format(allisa, i) raise ValueError(msg) print("Benchmarking theory routines = {0} with isa = {1}".format( keys, isa)) x, y, z = read_catalog() rmax = 42.0 rmin = 0.1 nbins = 20 bins = np.logspace(np.log10(rmin), np.log10(rmax), nbins) autocorr = 1 pimax = rmax # Set to rmax for comparisons between wp and xi boxsize = 420.0 dtype = np.dtype([('repeat', np.int), ('name', 'S16'), ('isa', 'S16'), ('nthreads', np.int), ('runtime', np.float), ('serial_time', np.float), ('pair_time', np.float), ('api_time', np.float)]) totN = (max_threads - min_threads + 1) * len(keys) * len(isa) * nrepeats runtimes = np.empty(totN, dtype=dtype) index = 0 stderr_filename = 'stderr.txt' for run_isa in isa: for nthreads in range(min_threads, max_threads + 1): print("Working on nthreads = {0}".format(nthreads), file=sys.stderr) start_thread_index = index if 'DD' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DD(autocorr, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DD' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDrppi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = DDrppi(autocorr, nthreads, pimax, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDrppi' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'wp' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = wp(boxsize, pimax, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'wp' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'xi' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): t0 = time.time() _, api_time = xi(boxsize, nthreads, bins, x, y, z, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'xi' runtimes['isa'][index] = run_isa runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 print("{0}".format(runtimes[start_thread_index:index])) sys.stdout.flush() print("index = {0} totN = {1}".format(index, totN)) return keys, isa, runtimes
def benchmark_mocks_threads_all(rmax_array=[10.0, 20.0, 40.0, 80.0, 100.0], thetamax_array=[2., 4., 8., 16., 20.], nrepeats=1, keys=None, isa=None): from Corrfunc.mocks import DDrppi_mocks, DDtheta_mocks allkeys = [ #'DDrppi (DD)', #'DDtheta (DD)', 'DDrppi (DR)', 'DDtheta (DR)' ] allisa = ['avx512f', 'avx', 'sse42', 'fallback'] if keys is None: keys = allkeys else: for k in keys: if k not in allkeys: msg = "Valid routines to benchmark are: {0}\nFound routine"\ " = {1}".format(allkeys, k) raise ValueError(msg) if isa is None: isa = allisa else: for i in isa: if i not in allisa: msg = "Valid instructions sets benchmark are: {0}\n"\ "Found routine = {1}".format(allisa, i) raise ValueError(msg) rmax_array = np.array(rmax_array) thetamax_array = np.array(thetamax_array) print("Benchmarking mocks routines = {0} with isa = {1}".format(keys, isa)) mocks_file = pjoin(dirname(abspath(Corrfunc.__file__)), "../mocks/tests/data", "Mr19_mock_northonly.rdcz.ff") ra, dec, cz = read_catalog(mocks_file) rand_file = pjoin(dirname(abspath(Corrfunc.__file__)), "../mocks/tests/data", "Mr19_randoms_northonly.rdcz.ff") rand_ra, rand_dec, rand_cz = read_catalog(rand_file) cosmology = 1 rmin = 0.1 nbins = 20 nthreads = max_threads dtype = np.dtype([('repeat', np.int), ('name', 'U16'), ('isa', 'U16'), ('rmax', np.float), ('nthreads', np.int), ('runtime', np.float), ('serial_time', np.float), ('pair_time', np.float), ('api_time', np.float)]) totN = len(rmax_array) * len(keys) * len(isa) * nrepeats runtimes = np.empty(totN, dtype=dtype) index = 0 stderr_filename = 'stderr.txt' for run_isa in isa: for thetamax, rmax in zip(thetamax_array, rmax_array): rbins = np.logspace(np.log10(rmin), np.log10(rmax), nbins) thetabins = np.logspace(np.log10(rmin), np.log10(thetamax), nbins) pimax = rmax # Set to rmax for comparisons between wp and xi print("Working on rmax = {0}".format(rmax), file=sys.stderr) start_thread_index = index if 'DDtheta (DD)' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): autocorr = 1 t0 = time.time() _, api_time = DDtheta_mocks(autocorr, nthreads, thetabins, ra, dec, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDtheta (DD)' runtimes['isa'][index] = run_isa runtimes['rmax'][index] = thetamax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDtheta (DR)' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): autocorr = 0 t0 = time.time() _, api_time = DDtheta_mocks(autocorr, nthreads, thetabins, ra, dec, RA2=rand_ra, DEC2=rand_dec, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDtheta (DR)' runtimes['isa'][index] = run_isa runtimes['rmax'][index] = thetamax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDrppi (DD)' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): autocorr = 1 t0 = time.time() _, api_time = DDrppi_mocks(autocorr, cosmology, nthreads, pimax, rbins, ra, dec, cz, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDrppi (DD)' runtimes['isa'][index] = run_isa runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 if 'DDrppi (DR)' in keys: for repeat in range(nrepeats): runtimes['repeat'][index] = repeat with stderr_redirected(to=stderr_filename): autocorr = 0 t0 = time.time() _, api_time = DDrppi_mocks(autocorr, cosmology, nthreads, pimax, rbins, ra, dec, cz, RA2=rand_ra, DEC2=rand_dec, CZ2=rand_cz, verbose=True, c_api_timer=True, isa=run_isa) t1 = time.time() runtimes['name'][index] = 'DDrppi (DR)' runtimes['isa'][index] = run_isa runtimes['rmax'][index] = rmax runtimes['nthreads'][index] = nthreads runtimes['runtime'][index] = t1 - t0 serial_time, pair_time = _get_times(stderr_filename) runtimes['serial_time'][index] = serial_time runtimes['pair_time'][index] = pair_time runtimes['api_time'][index] = api_time index += 1 print("{0}".format(runtimes[start_thread_index:index])) sys.stdout.flush() print("index = {0} totN = {1}".format(index, totN)) return keys, isa, runtimes
import numpy as np from os.path import dirname, abspath, join as pjoin import Corrfunc from Corrfunc.mocks.DDrppi_mocks import DDrppi_mocks from Corrfunc.io import read_catalog from Corrfunc.utils import convert_rp_pi_counts_to_wp galaxy_catalog = pjoin(dirname(abspath(Corrfunc.__file__)), "../mocks/tests/data", "Mr19_mock_northonly.rdcz.ff") # Read the supplied galaxies on a periodic box RA, DEC, CZ = read_catalog(galaxy_catalog) N = len(RA) down = 100 RA = np.random.choice(RA, N / down) DEC = np.random.choice(DEC, N / down) CZ = np.random.choice(CZ, N / down) N = len(RA) print(N) # Read the supplied randoms catalog random_catalog = pjoin(dirname(abspath(Corrfunc.__file__)), "../mocks/tests/data", "Mr19_randoms_northonly.rdcz.ff") rand_RA, rand_DEC, rand_CZ = read_catalog(random_catalog) rand_N = len(rand_RA) rand_RA = np.random.choice(rand_RA, rand_N / down) rand_DEC = np.random.choice(rand_DEC, rand_N / down) rand_CZ = np.random.choice(rand_CZ, rand_N / down) rand_N = len(rand_RA) print(rand_N)
def main(): tstart = time.time() t0 = tstart x, y, z = read_catalog() boxsize = 420.0 t1 = time.time() print("Done reading the data - time taken = {0:10.1f} seconds".format(t1 - t0)) numbins_to_print = 5 print("Beginning Theory Correlation functions calculations") nthreads = 4 pimax = 40.0 binfile = pjoin(dirname(abspath(Corrfunc.__file__)), "../theory/tests/", "bins") autocorr = 1 periodic = 1 print("Running 3-D correlation function DD(r)") results_DD, _ = DD_extn(autocorr, nthreads, binfile, x, y, z, weights1=np.ones_like(x), weight_type='pair_product', verbose=True, periodic=periodic, boxsize=boxsize) print("\n# **** DD(r): first {0} bins ******* ".format( numbins_to_print)) print("# rmin rmax rpavg npairs weightavg") print("#############################################################") for ibin in range(numbins_to_print): items = results_DD[ibin] print("{0:12.4f} {1:12.4f} {2:10.4f} {3:10d} {4:10.4f}".format( items[0], items[1], items[2], items[3], items[4])) print("-------------------------------------------------------------") print("\nRunning 2-D correlation function DD(rp,pi)") results_DDrppi, _ = DDrppi_extn(autocorr, nthreads, pimax, binfile, x, y, z, weights1=np.ones_like(x), weight_type='pair_product', verbose=True, periodic=periodic, boxsize=boxsize) print("\n# ****** DD(rp,pi): first {0} bins ******* ". format(numbins_to_print)) print( "# rmin rmax rpavg pi_upper npairs weightavg" ) print( "########################################################################" ) for ibin in range(numbins_to_print): items = results_DDrppi[ibin] print( "{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}".format( items[0], items[1], items[2], items[3], items[4], items[5])) print( "------------------------------------------------------------------------" ) print("\nRunning 2-D projected correlation function wp(rp)") results_wp, _, _ = wp_extn(boxsize, pimax, nthreads, binfile, x, y, z, weights=np.ones_like(x), weight_type='pair_product', verbose=True) print( "\n# ****** wp: first {0} bins ******* ".format( numbins_to_print)) print( "# rmin rmax rpavg wp npairs weightavg" ) print( "#######################################################################" ) for ibin in range(numbins_to_print): items = results_wp[ibin] print( "{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}".format( items[0], items[1], items[2], items[3], items[4], items[5])) print( "-----------------------------------------------------------------------" ) print("\nRunning 3-D auto-correlation function xi(r)") results_xi, _ = xi_extn(boxsize, nthreads, binfile, x, y, z, weights=np.ones_like(x), weight_type='pair_product', verbose=True) print( "\n# ****** xi: first {0} bins ******* ".format( numbins_to_print)) print( "# rmin rmax rpavg xi npairs weightavg" ) print( "#######################################################################" ) for ibin in range(numbins_to_print): items = results_xi[ibin] print( "{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}".format( items[0], items[1], items[2], items[3], items[4], items[5])) print( "-----------------------------------------------------------------------" ) print("Done with all four correlation calculations.") print("\nRunning VPF pN(r)") rmax = 10.0 nbin = 10 nspheres = 10000 num_pN = 3 seed = -1 results_vpf, _ = vpf_extn(rmax, nbin, nspheres, num_pN, seed, x, y, z, verbose=True, periodic=periodic, boxsize=boxsize) print( "\n# ****** pN: first {0} bins ******* ".format( numbins_to_print)) print('# r ', end="") for ipn in range(num_pN): print(' p{0:0d} '.format(ipn), end="") print("") print("###########", end="") for ipn in range(num_pN): print('################', end="") print("") for ibin in range(numbins_to_print): items = results_vpf[ibin] print('{0:10.2f} '.format(items[0]), end="") for ipn in range(num_pN): print(' {0:15.4e}'.format(items[ipn + 1]), end="") print("") print("-----------------------------------------------------------") tend = time.time() print("Done with all functions. Total time taken = {0:10.1f} seconds. \ Read-in time = {1:10.1f} seconds.".format(tend - tstart, t1 - t0))