def initial_pool(prior_obj, eps0, N_particles, N_threads=1): """ Initial Pool """ args_list = [[i, prior_obj, eps0, N_particles] for i in xrange(N_particles)] if N_threads > 1: pool = InterruptiblePool(processes=N_threads) mapfn = pool.map results = mapfn(initial_pool_sampling, args_list) pool.close() pool.terminate() pool.join() else: results = [] for arg in args_list: results.append(initial_pool_sampling(arg)) results = np.array(results).T theta_t = results[1:prior_obj.n_params + 1, :] w_t = results[prior_obj.n_params + 1, :] rhos = results[prior_obj.n_params + 2, :] sig_t = covariance(theta_t, w_t) return theta_t, w_t, rhos, sig_t
def log_multivariate_gaussian_Nthreads(x, mu, V, xcov, Nthreads=1): """ Use multiprocessing to calculate log likelihoods. """ n_samples = x.shape[0] pool = InterruptiblePool(Nthreads) mapfn = pool.map Nchunk = np.ceil(1. / Nthreads * n_samples).astype(np.int) arglist = [None] * Nthreads for i in range(Nthreads): s = i * Nchunk e = s + Nchunk arglist[i] = (x[s:e], mu, V, xcov[s:e]) result = list(mapfn(lmg, [args for args in arglist])) logls = result[0] for i in range(1, Nthreads): logls = np.vstack((logls, result[i])) pool.close() pool.terminate() pool.join() return logls
def initial_pool(prior_obj, eps0, N_particles, N_threads=1): """ Initial Pool """ args_list = [[i, prior_obj, eps0, N_particles] for i in xrange(N_particles)] if N_threads > 1: pool = InterruptiblePool(processes = N_threads) mapfn = pool.map results = mapfn(initial_pool_sampling, args_list) pool.close() pool.terminate() pool.join() else: results = [] for arg in args_list: results.append(initial_pool_sampling(arg)) results = np.array(results).T theta_t = results[1:prior_obj.n_params+1,:] w_t = results[prior_obj.n_params+1,:] rhos = results[prior_obj.n_params+2,:] sig_t = covariance(theta_t , w_t) return theta_t, w_t, rhos, sig_t
def func_grad_lnX_Nthreads(self, params): """ Use multiprocessing to calculate negative-log-likelihood and gradinets w.r.t lnX, plus the terms coming from th regularization terms. """ n_samples = self.N self.lnX = params #self.fl, self.f, self.g, self.H, Nthreads = args Pool = InterruptiblePool(Nthreads) mapfn = Pool.map Nchunk = np.ceil(1. / Nthreads * n_samples).astype(np.int) arglist = [None] * Nthreads for i in range(Nthreads): s = int(i * Nchunk) e = int(s + Nchunk) arglist[i] = (self.lnX, self.F, self.B, self.fl, self.f, self.g, self.H, s, e) result = list(mapfn(fg, [ars for ars in arglist])) nll, grad = result[0] a = time.time() for i in range(1, Nthreads): nll += result[i][0] grad += result[i][1] #print "adding up nll's from individual threads", time.time() - a Pool.close() Pool.terminate() Pool.join() #computing the regularization term and its derivative w.r.t lnX reg_func, reg_grad = self.reg_func_grad_lnX() return nll + reg_func, grad + reg_grad
def pmc_abc(N_threads=N_threads): # initial pool theta_t, w_t, rhos, sig_t = initial_pool() t = 0 # iternation number plot_thetas(theta_t, w_t, t) plt.savefig("/home/mj/public_html/scatter_hod_gaussian_t" + str(t) + ".png") plt.close() while t < N_iter: eps_t = np.percentile(rhos, 75) print "New Distance Threshold Eps_t = ", eps_t theta_t_1 = theta_t.copy() w_t_1 = w_t.copy() sig_t_1 = sig_t.copy() """these lines are borrowed from initial sampling to double-check multiprocessing""" # pool = InterruptiblePool(processes = N_threads) # mapfn = pool.map # args_list = [i for i in xrange(N_particles)] # results = mapfn(initial_pool_sampling, args_list) # pool.close() # pool.terminate() # pool.join() pool = InterruptiblePool(processes=N_threads) mapfn = pool.map args_list = [[i, theta_t_1, w_t_1, sig_t_1, eps_t] for i in xrange(N_particles)] # results = [] # for args in args_list: # pool_sample = importance_pool_sampling(args) # results.append( pool_sample ) results = mapfn(importance_pool_sampling, args_list) pool.close() pool.terminate() pool.join() sig_t = np.cov(theta_t) results = np.array(results).T theta_t = results[1 : n_params + 1, :] w_t = results[n_params + 1, :] rhos = results[n_params + 2, :] sig_t = np.cov(theta_t) t += 1 plot_thetas(theta_t, w_t, t) plt.savefig("/home/mj/public_html/scatter_hod_gaussian_t" + str(t) + ".png") plt.close()
def pmc_abc(N_threads = N_threads): # initial pool theta_t, w_t, rhos, sig_t = initial_pool() t = 0 # iternation number plot_thetas(theta_t , w_t, t) plt.savefig("/home/mj/public_html/scatter_hod_gaussian_t"+str(t)+".png") plt.close() while t < N_iter: eps_t = np.percentile(rhos, 75) print 'New Distance Threshold Eps_t = ', eps_t theta_t_1 = theta_t.copy() w_t_1 = w_t.copy() sig_t_1 = sig_t.copy() """these lines are borrowed from initial sampling to double-check multiprocessing""" #pool = InterruptiblePool(processes = N_threads) #mapfn = pool.map #args_list = [i for i in xrange(N_particles)] #results = mapfn(initial_pool_sampling, args_list) #pool.close() #pool.terminate() #pool.join() pool = InterruptiblePool(processes = N_threads) mapfn = pool.map args_list = [[i, theta_t_1, w_t_1, sig_t_1, eps_t] for i in xrange(N_particles)] #results = [] #for args in args_list: # pool_sample = importance_pool_sampling(args) # results.append( pool_sample ) results = mapfn(importance_pool_sampling, args_list) pool.close() pool.terminate() pool.join() sig_t = np.cov(theta_t) results = np.array(results).T theta_t = results[1:n_params+1,:] w_t = results[n_params+1,:] rhos = results[n_params+2,:] sig_t = np.cov(theta_t) t += 1 plot_thetas(theta_t, w_t , t) plt.savefig("/home/mj/public_html/scatter_hod_gaussian_t"+str(t)+".png") plt.close()
def pmc_abc(prior_dict, N_particles=100, N_iter=30, eps0=20.0, N_threads=1): """ """ prior_obj = Prior(prior_dict) # initial pool theta_t, w_t, rhos, sig_t = initial_pool(prior_obj, eps0, N_particles, N_threads=N_threads) t = 0 # iternation number #plot_thetas(theta_t , w_t, prior_dict, t) while t < N_iter: eps_t = np.percentile(rhos, 75) print 'New Distance Threshold Eps_t = ', eps_t theta_t_1 = theta_t.copy() w_t_1 = w_t.copy() sig_t_1 = sig_t.copy() args_list = [[i, prior_obj, theta_t_1, w_t_1, sig_t_1, eps_t] for i in xrange(N_particles)] if N_threads > 1: pool = InterruptiblePool(processes=N_threads) mapfn = pool.map results = mapfn(importance_pool_sampling, args_list) pool.close() pool.terminate() pool.join() else: results = [] for args in args_list: pool_sample = importance_pool_sampling(args) results.append(pool_sample) results = np.array(results).T theta_t = results[1:prior_obj.n_params + 1, :] w_t = results[prior_obj.n_params + 1, :] rhos = results[prior_obj.n_params + 2, :] sig_t = covariance(theta_t, w_t) t += 1 plot_thetas(theta_t, w_t, prior_dict, t)
def pmc_abc(N_threads = N_threads): # initial pool theta_t, w_t, rhos, sig_t = initial_pool() w_t = w_t/np.sum(w_t) t = 0 # iternation number plot_thetas(theta_t , w_t, t) while t < N_iter: if t < 4 : eps_t = np.percentile(np.atleast_2d(rhos), 20, axis=1) else: eps_t = np.percentile(np.atleast_2d(rhos), 50, axis=1) print 'New Distance Threshold Eps_t = ', eps_t , "t=" , t theta_t_1 = theta_t.copy() w_t_1 = w_t.copy() sig_t_1 = sig_t.copy() args_list = [[i, theta_t_1, w_t_1, sig_t_1, eps_t] for i in xrange(N_particles)] """serial""" results = [] #for args in args_list: # pool_sample = importance_pool_sampling(args) # results.append( pool_sample ) """parallel""" pool = InterruptiblePool(processes = N_threads) mapfn = pool.map results = mapfn(importance_pool_sampling, args_list) pool.close() pool.terminate() pool.join() results = np.array(results).T theta_t = results[1:n_params+1,:] w_t = results[n_params+1,:] w_t = w_t/np.sum(w_t) rhos = results[n_params+2:,:] #sig_t = knn_sigma(theta_t , k = 10) sig_t = 2. * covariance(theta_t , w_t) t += 1 plot_thetas(theta_t, w_t , t)
def pmc_abc(prior_dict, N_particles=100, N_iter=30, eps0=20.0, N_threads = 1): """ """ prior_obj = Prior(prior_dict) # initial pool theta_t, w_t, rhos, sig_t = initial_pool(prior_obj, eps0, N_particles, N_threads=N_threads) t = 0 # iternation number #plot_thetas(theta_t , w_t, prior_dict, t) while t < N_iter: eps_t = np.percentile(rhos, 75) print 'New Distance Threshold Eps_t = ', eps_t theta_t_1 = theta_t.copy() w_t_1 = w_t.copy() sig_t_1 = sig_t.copy() args_list = [[i, prior_obj, theta_t_1, w_t_1, sig_t_1, eps_t] for i in xrange(N_particles)] if N_threads > 1: pool = InterruptiblePool(processes = N_threads) mapfn = pool.map results = mapfn(importance_pool_sampling, args_list) pool.close() pool.terminate() pool.join() else: results = [] for args in args_list: pool_sample = importance_pool_sampling(args) results.append( pool_sample ) results = np.array(results).T theta_t = results[1:prior_obj.n_params+1,:] w_t = results[prior_obj.n_params+1,:] rhos = results[prior_obj.n_params+2,:] sig_t = covariance(theta_t , w_t) t += 1 plot_thetas(theta_t , w_t, prior_dict, t)
def pmc_abc(N_threads=N_threads): # initial pool theta_t, w_t, rhos, sig_t = initial_pool() w_t = w_t / np.sum(w_t) t = 0 # iternation number plot_thetas(theta_t, w_t, t) while t < N_iter: if t < 4: eps_t = np.percentile(np.atleast_2d(rhos), 20, axis=1) else: eps_t = np.percentile(np.atleast_2d(rhos), 50, axis=1) print 'New Distance Threshold Eps_t = ', eps_t, "t=", t theta_t_1 = theta_t.copy() w_t_1 = w_t.copy() sig_t_1 = sig_t.copy() args_list = [[i, theta_t_1, w_t_1, sig_t_1, eps_t] for i in xrange(N_particles)] """serial""" results = [] #for args in args_list: # pool_sample = importance_pool_sampling(args) # results.append( pool_sample ) """parallel""" pool = InterruptiblePool(processes=N_threads) mapfn = pool.map results = mapfn(importance_pool_sampling, args_list) pool.close() pool.terminate() pool.join() results = np.array(results).T theta_t = results[1:n_params + 1, :] w_t = results[n_params + 1, :] w_t = w_t / np.sum(w_t) rhos = results[n_params + 2:, :] #sig_t = knn_sigma(theta_t , k = 10) sig_t = 2. * covariance(theta_t, w_t) t += 1 plot_thetas(theta_t, w_t, t)
def initial_pool(): pool = InterruptiblePool(processes=N_threads) mapfn = pool.map args_list = [i for i in xrange(N_particles)] results = mapfn(initial_pool_sampling, args_list) pool.close() pool.terminate() pool.join() results = np.array(results).T theta_t = results[1 : n_params + 1, :] w_t = results[n_params + 1, :] rhos = results[n_params + 2, :] sig_t = np.cov(theta_t) return theta_t, w_t, rhos, sig_t
def initial_pool(): pool = InterruptiblePool(processes = N_threads) mapfn = pool.map args_list = [i for i in xrange(N_particles)] results = mapfn(initial_pool_sampling, args_list) pool.close() pool.terminate() pool.join() results = np.array(results).T theta_t = results[1:n_params+1,:] w_t = results[n_params+1,:] rhos = results[n_params+2,:] sig_t = np.cov(theta_t) return theta_t, w_t, rhos, sig_t
def initial_pool(): args_list = np.arange(N_particles) """serial""" #results = [] #for arg in args_list: # results.append(initial_pool_sampling(arg)) """parallel""" pool = InterruptiblePool(processes = N_threads) mapfn = pool.map results = mapfn(initial_pool_sampling, args_list) pool.close() pool.terminate() pool.join() results = np.array(results).T theta_t = results[1:n_params+1,:] w_t = results[n_params+1,:] w_t = w_t / np.sum(w_t) rhos = results[n_params+2:,:] sig_t = covariance(theta_t , w_t) return theta_t, w_t, rhos, sig_t
def parallel_bulkfit(path, num_splits=20, ncores=8, start_pt=0): ''' Run bulk fitting in parallel. Results are outputted in chunks to make restarting easier. ''' spectra = [f for f in os.listdir(path) if f[-4:] == 'fits'] split_at = len(spectra) / num_splits splits = [split_at*i for i in range(1, num_splits)] splits.append(len(spectra)) splits = splits[start_pt:] prev_split = 0 for i, split in enumerate(splits): print("On split " + str(i+1) + " of " + str(len(splits))) print(str(datetime.now())) split_spectra = spectra[prev_split:split] pool = Pool(processes=ncores) output = pool.map(do_specfit, split_spectra) pool.close() pool.join() df = DataFrame(output[0], columns=split_spectra[:1]) for out, spec in zip(output[1:], split_spectra[1:]): df[spec[:-5]] = out df.to_csv("spectral_fitting_"+str(i+1)+".csv") prev_split = split
def parallel_bulkfit(path, num_splits=20, ncores=8, start_pt=0): ''' Run bulk fitting in parallel. Results are outputted in chunks to make restarting easier. ''' spectra = [f for f in os.listdir(path) if f[-4:] == 'fits'] split_at = len(spectra) / num_splits splits = [split_at * i for i in range(1, num_splits)] splits.append(len(spectra)) splits = splits[start_pt:] prev_split = 0 for i, split in enumerate(splits): print("On split " + str(i + 1) + " of " + str(len(splits))) print(str(datetime.now())) split_spectra = spectra[prev_split:split] pool = Pool(processes=ncores) output = pool.map(do_specfit, split_spectra) pool.close() pool.join() df = DataFrame(output[0], columns=split_spectra[:1]) for out, spec in zip(output[1:], split_spectra[1:]): df[spec[:-5]] = out df.to_csv("spectral_fitting_" + str(i + 1) + ".csv") prev_split = split
def initial_pool(): args_list = np.arange(N_particles) """serial""" #results = [] #for arg in args_list: # results.append(initial_pool_sampling(arg)) """parallel""" pool = InterruptiblePool(processes=N_threads) mapfn = pool.map results = mapfn(initial_pool_sampling, args_list) pool.close() pool.terminate() pool.join() results = np.array(results).T theta_t = results[1:n_params + 1, :] w_t = results[n_params + 1, :] w_t = w_t / np.sum(w_t) rhos = results[n_params + 2:, :] sig_t = covariance(theta_t, w_t) return theta_t, w_t, rhos, sig_t
def pmc_abc(self): """ """ self.rhos = self.initial_pool() while self.t < self.T: self.eps_t = np.percentile(self.rhos, 75) print 'Epsilon t', self.eps_t self.theta_t_1 = self.theta_t.copy() self.w_t_1 = self.w_t.copy() self.sig_t_1 = self.sig_t.copy() pool = InterruptiblePool(self.Nthreads) mapfn = pool.map args_list = [ i for i in xrange(self.N) ] results = mapfn(unwrap_self_importance_sampling, zip([self]*len(args_list), args_list)) pool.close() pool.terminate() pool.join() pars = np.array(results).T self.theta_t = pars[1:self.n_params+1,:].copy() self.w_t = pars[self.n_params+1,:].copy() self.rhos = pars[self.n_params+2,:].copy() self.sig_t = 2.0 * np.cov(self.theta_t) self.t += 1 self.writeout() self.plotout() return None
"california_west-250_normed.fits"] fits350 = ["pipeCenterB59-350.fits", "lupusI-350.fits", "aquilaM2-350.fits", "orionB-350.fits", "polaris-350.fits", "chamaeleonI-350.fits", "perseus04-350.fits", "taurusN3-350.fits", "ic5146-350.fits", "orionA-C-350.fits", "orionA-S-350.fits", "california_cntr-350.fits", "california_east-350.fits", "california_west-350.fits"] distances = [145., 150., 260., 400., 150., 170., 235., 140., 460., 400., 400., 450., 450., 450.] # pc offsets = [31.697, 14.437, 85.452, 26.216, 9.330, -879.063, 23.698, 21.273, 20.728, 32.616, 35.219, 9.005, 10.124, 14.678] beamwidth_250 = [18.2] * len(fits250) beamwidth_350 = [24.9] * len(fits350) # Inputs (adjust to desired wavelength) beamwidths = beamwidth_350 # + beamwidth_350 distances = distances # + distances fits_files = fits350 # + fits350 print "Started at " + str(datetime.now()) if not MULTICORE: for i, filename in enumerate(fits_files): wrapper(filename, distances[i], beamwidths[i], offsets[i], verbose=False) else: pool = Pool(processes=NCORES) pool.map(single_input, izip(fits_files, distances, beamwidths, offsets)) pool.close() # pool.join()
def build_multipro(type, catalog_name, corr_name, n_mocks, Nthreads=8, ell=2, Ngrid=360, **kwargs): ''' Calculate dLOS for catalogs in parallel using interruptible pool, which is multiprocessing pool that allows for interrputions Parameters ---------- catalog_name : Name of catalog corr_name : Name of correction n_mocks : Number of mock catalogs to calculate Nthreads : Number of CPUs to use ''' if isinstance(n_mocks, list): n_mock_list = n_mocks else: n_mock_list = range(1, n_mocks + 1) corrdict = {} if catalog_name == 'nseries': if isinstance(corr_name, dict): corrdict = corr_name else: corrdict['name'] = corr_name if 'dlospeak' in corr_name: # hardcoded values for bestfit dlos peak # parameters corrdict['fit'] = 'gauss' corrdict['sigma'] = 3.9 corrdict['fpeak'] = 0.68 if 'env' in corr_name: # hardcoded values for galaxy environment # parameters corrdict['n_NN'] = 5 if 'photoz' in corr_name: corrdict['d_photoz_tail_cut'] = 15 if corr_name == 'fourier_tophat': corrdict['fs'] = 1.0 corrdict['rc'] = 0.43 corrdict['k_fit'] = 0.7 corrdict['k_fixed'] = 0.84 if type == 'bk': arglist = [ [{ 'catalog': {'name': catalog_name, 'n_mock': i_mock}, 'spec': {'P0': 20000, 'Lbox': 3600, 'Ngrid': Ngrid} }, kwargs] for i_mock in n_mock_list] else: arglist = [ [{ 'catalog': {'name': catalog_name, 'n_mock': i_mock}, 'correction': corrdict, 'spec': { 'P0': 20000, #P0 'Lbox': 3600, 'Ngrid': Ngrid, 'ell': ell } }, ell, kwargs] for i_mock in n_mock_list ] if Nthreads > 1: pool = Pewl(processes=Nthreads) mapfn = pool.map if type == 'data': mapfn( build_corrdata_wrapper, [arg for arg in arglist]) elif type == 'pk': mapfn( build_pk_wrapper, [arg for arg in arglist]) elif type == 'bk': mapfn( build_bk_wrapper, [arg for arg in arglist]) pool.close() pool.terminate() pool.join() else: for arg in arglist: if type == 'data': build_corrdata_wrapper(arg) elif type == 'pk': build_pk_wrapper(arg) elif type == 'bk': build_bk_wrapper(arg) return None
offsets = [ 31.697, 14.437, 85.452, 26.216, 9.330, -879.063, 23.698, 21.273, 20.728, 32.616, 35.219, 9.005, 10.124, 14.678 ] beamwidth_250 = [18.2] * len(fits250) beamwidth_350 = [24.9] * len(fits350) # Inputs (adjust to desired wavelength) beamwidths = beamwidth_350 # + beamwidth_350 distances = distances # + distances fits_files = fits350 # + fits350 print "Started at " + str(datetime.now()) if not MULTICORE: for i, filename in enumerate(fits_files): wrapper(filename, distances[i], beamwidths[i], offsets[i], verbose=False) else: pool = Pool(processes=NCORES) pool.map(single_input, izip(fits_files, distances, beamwidths, offsets)) pool.close() # pool.join()