def get_tcc(configuration, tccrawfile, box, rcut=1., criterium="not marked"): "Get the connected cluster formed by marked or not marked particles" xyz = pl.loadtxt(configuration, skiprows=2, usecols=[1, 2, 3]) cl = pl.loadtxt(tccrawfile, skiprows=3, dtype="S1") if criterium == 'not marked': select = xyz[(cl == 'A') + (cl == 'B')] if criterium == "marked": select = xyz[(cl == 'C') + (cl == 'D')] T = PeriodicCKDTree(box, select) # Find neighbors within a fixed distance of a point balls = T.query_ball_point(select, r=rcut) visited = pl.zeros(select.shape[0]) added = pl.zeros(select.shape[0]) clusters = [] def addballs(p, cluster): if visited[p] == 0: visited[p] = 1 cluster.append(p) for e in balls[p]: addballs(e, cluster) for i in xrange(select.shape[0]): cluster = [] addballs(i, cluster) if len(cluster) > 0: clusters.append(cluster) return clusters
def get_marked(xyz, labels, box, marker=True, rcut=1.4, periodic=False): select = xyz[labels == marker] # print select if periodic: T = PeriodicCKDTree(box, select) else: T = cKDTree(select) # Find neighbors within a fixed distance of a point balls = T.query_ball_point(select, r=rcut) visited = pl.zeros(select.shape[0]) added = pl.zeros(select.shape[0]) clusters = [] def addballs(p, cluster): if visited[p] == 0: visited[p] = 1 cluster.append(p) for e in balls[p]: addballs(e, cluster) for i in xrange(select.shape[0]): cluster = [] addballs(i, cluster) if len(cluster) > 0: clusters.append(cluster) return clusters
def test_random_ball_vectorized_compiled(): n = 20 m = 5 bounds = np.ones(m) T = PeriodicCKDTree(bounds, np.random.randn(n,m)) r = T.query_ball_point(np.random.randn(2,3,m),1) assert_equal(r.shape,(2,3)) assert_(isinstance(r[0,0],list))
def test_random_ball_vectorized_compiled(): n = 20 m = 5 bounds = np.ones(m) T = PeriodicCKDTree(bounds, np.random.randn(n, m)) r = T.query_ball_point(np.random.randn(2, 3, m), 1) assert_equal(r.shape, (2, 3)) assert_(isinstance(r[0, 0], list))
def velocity_profile(self): radius_array = np.linspace(0, 200, self.N + 1) velocity_profile = np.zeros(self.N + 1) N_in_velocity = np.zeros(self.N + 1) bounds = np.array([self.box_size, self.box_size, self.box_size]) tree = PeriodicCKDTree(bounds, self.galaxy_cat) print "Calculating velocity profile" for i in range(len(self.void_cat[:, 0])): #print i current_number_of_galaxies = 0 current_velocity = 0 for j in range(1, self.N + 1): neighbor_inds = tree.query_ball_point(self.void_cat[i, :], r=radius_array[j]) r_void = self.void_cat[i] galaxies_near_point = self.galaxy_cat[neighbor_inds] v_galaxy = self.velocity_cat[neighbor_inds] r_vec = r_void - galaxies_near_point galaxies_near_point = len(galaxies_near_point[:, 0]) galaxies_in_shell = galaxies_near_point - current_number_of_galaxies radial_velocity = (v_galaxy * r_vec).sum( axis=1) / np.linalg.norm(r_vec, axis=1) radial_velocity = np.sum(radial_velocity) - current_velocity velocity_profile[j] += radial_velocity / np.maximum( 1.0, galaxies_in_shell) N_in_velocity[j] += galaxies_in_shell current_velocity += radial_velocity current_number_of_galaxies += galaxies_in_shell #print velocity_profile / np.maximum(np.ones(self.N+1), N_in_velocity) v_final = (velocity_profile / len(self.void_cat[:, 0]) ) #/ np.maximum(np.ones(self.N+1), N_in_velocity)) fig, ax = plt.subplots() ax.plot(radius_array, v_final) ax.set_xlabel("radius [Mpc/h]") ax.set_xlabel(r"$v_r(r)$ km/s") np.save("datafiles/velocity_profiles/velocity_profile" + self.handle, v_final) fig.savefig("figures/velocity_profiles/velocity_profile" + self.handle + ".pdf")
Nlist = np.zeros(s, dtype=np.int) # Boundaries (0 or negative means open boundaries in that dimension) #changing bounds manually bounds = np.array([dx, dy, dz]) # xy periodic, open along z # Build kd-tree T = PeriodicCKDTree(bounds, x) # Find 4 closest neighbors to a random point # (d[j], i[j]) = distance and index of jth closest point # Find neighbors within a fixed distance of a point print "Building Neighborlist..." neighbors = [] for i in xrange(len(x)): localneigh = T.query_ball_point( x[i], r=2.1) #r = cutoff (Angstrom) for making Nlist #localneigh.insert(0,i) localneigh.remove(i) localneigh.insert(0, i) neighbors.append(localneigh) #print neighbors print "Neighborlist built! Writing data to file...." print "***********writing with atom types*****************" #print neighbors outFile = open('Nlist-types' + '-' + outputfile, 'w') for i in xrange(s[0]): #Slice the atomtypes using the neighbor indices, have to subtract 1 #from index because you added it in your neighborlist build
# This will take any void and build shells around it up to 2*R_v # and find the number density per shell using the volume of the shell. R_shell = np.linspace(0.001, 2*zone_rad[np.int(zone[arb_ind])], 20) #shells from ~0 to 2*R_v in units of Mpc/h V_shell = ((4.*pi)/3.)*R_shell**3. #volume of each shell in units of Mpc**3 tot_numden = numpart/(Lbox**3.) count = [] count_void = [] nden = [] for i in R_shell: # Find number of halos in each concetric sphere of radius given by array R count_void.append(len(periodic_tree.query_ball_point([x_denmin[np.int(zone[arb_ind])], y_denmin[np.int(zone[arb_ind])], z_denmin[np.int(zone[arb_ind])]], i))) for i in range(0,len(R_shell)): # This gives me a number density in each shell # looks for number of particles within a volume given by input radius if i==0: count_temp = len(periodic_tree.query_ball_point([x_denmin[np.int(zone[arb_ind])], y_denmin[np.int(zone[arb_ind])], z_denmin[np.int(zone[arb_ind])]], R_shell[i])) nden_temp = count_temp/V_shell[i] else: count_temp1 = len(periodic_tree.query_ball_point([x_denmin[np.int(zone[arb_ind])], y_denmin[np.int(zone[arb_ind])], z_denmin[np.int(zone[arb_ind])]], R_shell[i])) count_temp2 = len(periodic_tree.query_ball_point([x_denmin[np.int(zone[arb_ind])], y_denmin[np.int(zone[arb_ind])], z_denmin[np.int(zone[arb_ind])]], R_shell[i-1])) count_temp = count_temp1-count_temp2 nden_temp = count_temp/(V_shell[i]-V_shell[i-1]) count.append(count_temp)
def overdensity_cylinder(gals, coods, R, dc, L, pc_stats=False, cluster_mass_lim=1e4, n=100, verbose=False): """ Find overdensity statistics over the whole simulation box for cylindrical apertures. Args: gals - dataframe of galaxy properties coods - coordinates to calculate statistcis at. Typically defined as galaxy or random coordinates. R - aperture radius, cMpc dc - half aperture depth, cMpc L - box length, cMpc pc_stats - bool, calculate completeness and purity of each region cluster_mass_lim - limiting descendant mass above which to classify clusters, z0_central_mcrit200 n - chunk length Returns: out_stats - output statistics, numpy array of shape [len(coods), 4] 0 - overdensity 1 - completeness 2 - purity 3 - descendant mass """ dimensions = np.array([L, L, L]) if verbose: print "Building KDtree..." T = PeriodicCKDTree(dimensions, gals[['zn_x', 'zn_y', 'zn_z']]) avg = float(gals.shape[0]) / L**3 # average overdensity cMpc^-3 out_stats = np.zeros((len(coods), 4)) vol_avg = np.pi * R**2 * (2 * dc) * avg # average overdensity in chosen volume for j, c in coods.groupby( np.arange(len(coods)) // n): # can't calculate distances all in one go, so need to chunk if verbose: # print progress if j % 100 == 0: print round( float(c.shape[0] * (j + 1)) / coods.shape[0] * 100, 2), '%' sys.stdout.flush() # find all galaxies within a sphere of radius the max extent of the cylinder gal_index = T.query_ball_point(c, r=(R**2 + dc**2)**0.5) # filter by cylinder using norm_coods() gal_index = [ np.array(gal_index[k])[norm_coods( gals.iloc[gal_index[k]][['zn_x', 'zn_y', 'zn_z']].values, c.ix[k + j * n].values, R=R, half_deltac=dc, L=L)] for k in range(len(c)) ] start_index = (j * n) # save start index # calculate dgal out_stats[start_index:(start_index + len(c)), 0] = (np.array([len(x) for x in gal_index]) - vol_avg) / vol_avg if pc_stats: # calculate completeness and purity statistics for i in range(len(gal_index)): cluster_ids = gals.iloc[gal_index[i]] cluster_ids = Counter( cluster_ids[cluster_ids['z0_central_mcrit200'] > cluster_mass_lim]['z0_centralId']) if len(cluster_ids) > 0: cstats = np.zeros((len(cluster_ids), 2)) for k, (q, no) in enumerate(cluster_ids.items()): cluster_gals = gals.ix[gals['z0_centralId'] == q] cstats[k, 0] = float(no) / len( cluster_gals) # completeness cstats[k, 1] = float(no) / len(gal_index[i]) # purity # find id of max completeness and purity in cstats array max_completeness = np.where( cstats[:, 0] == cstats[:, 0].max())[0] max_purity = np.where(cstats[:, 1] == cstats[:, 1].max())[0] # sometimes multiple clusters can have same completeness or purity in a single candidate # - use the cluster with the highest complementary completeness/purity if len(max_completeness) > 1: # get matches between completeness and purity matches = [x in max_purity for x in max_completeness] if np.sum(matches) > 0: # just use the first one max_completeness = [np.where(matches)[0][0]] max_purity = [np.where(matches)[0][0]] else: max_completeness = [ max_completeness[np.argmax( cstats[max_completeness, 1])] ] if len(max_purity) > 1: matches = [x in max_completeness for x in max_purity] if np.sum(matches) > 0: max_completeness = [np.where(matches)[0][0]] max_purity = [np.where(matches)[0][0]] else: max_purity = [ max_purity[np.argmax(cstats[max_completeness, 0])] ] # sometimes the cluster with the highest completeness does not have the highest purity, or vice versa # - use the cluster with the highest combined purity/completeness added in quadrature if max_completeness[0] != max_purity[0]: max_completeness = [ np.argmax([pow(np.sum(x**2), 0.5) for x in cstats]) ] max_purity = max_completeness # save completeness and purity values out_stats[start_index + i, 1] = cstats[max_completeness[0], 0] # completeness out_stats[start_index + i, 2] = cstats[max_purity[0], 1] # purity # save descendant mass # filter by cluster id, save z0 halo mass # can use either max_completeness or max_purity, both equal by this point out_stats[start_index + i, 3] = gals.loc[gals['z0_centralId'] == cluster_ids .keys()[max_completeness[0]], 'z0_central_mcrit200'].iloc[0] else: # if no galaxies in aperture out_stats[start_index + i, 1] = 0. out_stats[start_index + i, 2] = 0. out_stats[start_index + i, 3] = np.nan return out_stats
w = T2.query(queries) print "PeriodicCKDTree %d lookups:\t%g" % (r, time.time() - t) del w T3 = PeriodicCKDTree(bounds, data, leafsize=n) t = time.time() w = T3.query(queries) print "flat PeriodicCKDTree %d lookups:\t%g" % (r, time.time() - t) del w t = time.time() w1 = T1.query_ball_point(queries, 0.2) print "PeriodicKDTree %d ball lookups:\t%g" % (r, time.time() - t) t = time.time() w2 = T2.query_ball_point(queries, 0.2) print "PeriodicCKDTree %d ball lookups:\t%g" % (r, time.time() - t) t = time.time() w3 = T3.query_ball_point(queries, 0.2) print "flat PeriodicCKDTree %d ball lookups:\t%g" % (r, time.time() - t) all_good = True for a, b in zip(w1, w2): if sorted(a) != sorted(b): all_good = False for a, b in zip(w1, w3): if sorted(a) != sorted(b): all_good = False print "Ball lookups agree? %s" % str(all_good)
w = T2.query(queries) print "PeriodicCKDTree %d lookups:\t%g" % (r, time.time()-t) del w T3 = PeriodicCKDTree(bounds,data,leafsize=n) t = time.time() w = T3.query(queries) print "flat PeriodicCKDTree %d lookups:\t%g" % (r, time.time()-t) del w t = time.time() w1 = T1.query_ball_point(queries, 0.2) print "PeriodicKDTree %d ball lookups:\t%g" % (r, time.time()-t) t = time.time() w2 = T2.query_ball_point(queries, 0.2) print "PeriodicCKDTree %d ball lookups:\t%g" % (r, time.time()-t) t = time.time() w3 = T3.query_ball_point(queries, 0.2) print "flat PeriodicCKDTree %d ball lookups:\t%g" % (r, time.time()-t) all_good = True for a, b in zip(w1, w2): if sorted(a) != sorted(b): all_good = False for a, b in zip(w1, w3): if sorted(a) != sorted(b): all_good = False print "Ball lookups agree? %s" % str(all_good)
def delta_and_sigma_vz_galaxy(self, array_files=None, dictionary=False): """ Calculates the density profile and velocity dispersion of voids in real space. Requires xi_vg_real_func() to be run first as this gives the upper and lower bounds for the radius array to avoid out of bounds for splines. """ #radius_array = np.linspace(0, self.r_corr[-1], self.N + 1) radius_array = np.linspace(1, 200, self.N + 1) if array_files == None: bounds = np.array([self.box_size, self.box_size, self.box_size]) tree = PeriodicCKDTree(bounds, self.galaxy_cat) delta = np.zeros(self.N + 1) v_z = np.zeros(self.N + 1) E_vz = np.zeros(self.N + 1) E_vz2 = np.zeros(self.N + 1) sigma_vz = np.zeros(self.N + 1) galaxies_in_shell_arr = np.zeros(self.N + 1) print "Starting density profile and velocity dispersion calculation" for i in range(len(self.void_cat[:, 0])): current_number_of_galaxies = 0 current_E_vz = 0 current_E_vz2 = 0 E_vz_in_shell = 0 E_vz2_in_shell = 0 for j in range(1, self.N + 1): # Find galaxy position and velocity in a given radius around the current void neighbor_inds = tree.query_ball_point(self.void_cat[i, :], r=radius_array[j]) shell_volume = 4.0 * np.pi * (radius_array[j]**3 - radius_array[j - 1]**3) / 3.0 velocity_near_point = self.galaxy_vz[neighbor_inds] galaxies_near_point = self.galaxy_cat[neighbor_inds] galaxies_near_point = len(galaxies_near_point[:, 0]) galaxies_in_shell = galaxies_near_point - current_number_of_galaxies # Subtracting previous sphere to get galaxies in current shell. # calulcating terms used in expectation values E[v_z**2] and E[v_z]**2 if galaxies_near_point > 0: E_vz2_in_shell = (sum(velocity_near_point**2) - current_E_vz2) E_vz_in_shell = (sum(velocity_near_point) - current_E_vz) galaxies_in_shell_arr[j] += galaxies_in_shell E_vz[j] += E_vz_in_shell E_vz2[j] += E_vz2_in_shell delta[j] += galaxies_in_shell / shell_volume current_E_vz += E_vz_in_shell current_E_vz2 += E_vz2_in_shell current_number_of_galaxies += galaxies_in_shell delta /= (len(self.void_cat[:, 0]) * len(self.galaxy_cat[:, 0]) / self.box_size**3) delta -= 1 for j in range(self.N + 1): if galaxies_in_shell_arr[j] > 0: E_vz[j] /= galaxies_in_shell_arr[j] E_vz2[j] /= galaxies_in_shell_arr[j] sigma_vz = np.sqrt(E_vz2 - E_vz**2) # Replacing zero values to avoid division by zero later sigma_vz[np.where(sigma_vz < 10.0)] = 100.0 if dictionary: #Output for victor code r_dict = np.linspace(2.11, 118.0, 30) sigma_vz_spline = interpolate.interp1d(radius_array, sigma_vz) delta_spline = interpolate.interp1d(radius_array, delta) delta_new = delta_spline(r_dict) sigma_vz_new = sigma_vz_spline(r_dict) vr_dict = {} vr_dict["rvals"] = r_dict vr_dict["sigma_v_los"] = sigma_vz_new np.save( "datafiles/velocity_profiles/sigma_vz_dict" + self.handle, vr_dict) delta_dict = {} delta_dict["rvals"] = r_dict delta_dict["delta"] = delta_new np.save("datafiles/density_profiles/delta_dict" + self.handle, delta_dict) fig, ax = plt.subplots() ax.plot(radius_array, delta) fig.savefig("delta_test.png") fig, ax = plt.subplots() ax.plot(radius_array, sigma_vz) fig.savefig("sigmavz_test.png") print len(delta) np.save("datafiles/density_profiles/delta" + self.handle, delta) np.save("datafiles/velocity_profiles/sigma_vz" + self.handle, sigma_vz) else: delta = np.load(array_files[0]) sigma_vz = np.load(array_files[1]) fig, ax = plt.subplots() ax.plot(radius_array, delta) fig.savefig("delta_test.png") fig, ax = plt.subplots() ax.plot(radius_array, sigma_vz) fig.savefig("sigmavz_test.png") print "Splining density profile" print len(radius_array), len(delta) self.delta = interpolate.interp1d(radius_array, delta, kind="cubic") self.sigma_vz = interpolate.interp1d(radius_array, sigma_vz, kind="cubic") return self.delta, self.sigma_vz
def NN_finder_all(initial_config_data, cut_off_distance, box_dim, path_to_test_dir, atom_list = None, save_results = False, re_calc = False): """ A very general nearest neigbor finder function calculate multiple atom's nearest neighbor all at once using the efficient cKDTree algorithm, the multiple atoms whose item number is listed inside the atom_list input argument, the default is to calculate all atoms inside initial_config_data file User can customize which atoms to calculate by specifying in atom_list Input arguments: initial_config_data: instance of pandas.Dataframe configuration data cut_off_distance: dict dictionary contains the multiple pair cut-off distance currently use tuples as keys for immutability, frozenset may be another way but it reduce duplicates in order to access the turple key without order preference, convert https://stackoverflow.com/questions/36755714/how-to-ignore-the-order-of-elements-in-a-tuple https://www.quora.com/What-advantages-do-tuples-have-over-lists For example, {(1,1):3.7,(1,2):2.7,(2,2):3.0} means that atom_type 1 and 1 cut-off is 3.7, etc box_dim: list a list containing the spatial dimension of simulation box size in x, y, z path_to_test_dir: str str of current test result dir, under it, it save data into nn_results.pkl atom_list: list the list containing the item number of interested atoms whose nearest neighbors are being found save_results: boolean, default True specify whether to save the results dictionary into a nn_results_dict.pkl file Note: this cKDtree algorithm is efficient when: you have many points whose neighbors you want to find, you may save substantial amounts of time by putting them in a cKDTree and using query_ball_tree for molecular simulation: https://github.com/patvarilly/periodic_kdtree returns: nn: dict() key is item id of interested atom values is the pandas.Dataframe of nearest neighbor for atom of interest """ # set up path_to_file and check results out of this function before calling it # if check_results is True: # if path_to_file is None or os.path.exists(path_to_file): # raise Exception("NN results file not found, please specify the correct path to the file") path_to_nn_results = path_to_test_dir + "/nn_results_dict.pkl" if re_calc is False: if os.path.exists(path_to_nn_results): print "nn results dictionary already calculated and saved in pkl file, skip calculation" return pickle.load(open(path_to_nn_results,'r')) nn = dict() # if there is no atom_list specified, use all atoms in initial_config_data if atom_list is None: atom_list = (initial_config_data["item"]).tolist() _data = initial_config_data groups = Atom.classify_df(_data) #_atom_data = initial_config_data[['x','y','z']] _interested_data = _data.loc[_data['item'].isin(atom_list)] interested_groups = Atom.classify_df(_interested_data) #_interested_atom = _interested_data[['x','y','z']] # build the efficient nearest neighbor KDTree algorithm # default distance metric Euclidian norm p = 2 # create tree object using the larger points array for (i, int_group) in interested_groups.items(): for (j, atom_group) in groups.items(): # comparing atom_type_i and atom_type_j for pair in [(i,j),(j,i)]: if pair in cut_off_distance: curr_cut_off = cut_off_distance[pair] # iterate over each row seems inefficient for (index, curr_atom) in int_group.iterrows() result_tree = PeriodicCKDTree(box_dim, atom_group[['x','y','z']].values) result_groups = result_tree.query_ball_point(int_group[['x','y','z']].values, curr_cut_off) #indices = np.unique(IT.chain.from_iterable(result_groups)) #for (int_NN,(index,int_atom)) in (result_groups,int_group.iterrows()): k = 0 for index,int_atom in int_group.iterrows(): # int_NN is a list of index of NN, index is according to the order # in atom_group # curr_NN is a dataframe storing NN found for current atom_group int_NN = result_groups[k] curr_NN = atom_group.iloc[int_NN] if int_atom["item"] not in nn: nn[int_atom["item"]] = curr_NN elif int_atom["item"] in nn: nn[int_atom["item"]] = nn[int_atom["item"]].append(curr_NN) k = k + 1 # it is best practice to save this NN dictionary results into a pkl file # to prevent rerun, if this file exists, let user know that # the file_of_nearest_neighbor exists before calling it if save_results is True: with open(path_to_nn_results, 'w') as f: pickle.dump(nn,f) f.close() return nn