Python PeriodicCKDTree.query_ball_point示例，periodic_kdtree.PeriodicCKDTree.query_ball_point Python示例

示例#1

0

显示文件

文件： clustering.py 项目： FTurci/myModules

def get_tcc(configuration, tccrawfile, box, rcut=1., criterium="not marked"):
    "Get the connected cluster formed by  marked or not marked particles"
    xyz = pl.loadtxt(configuration, skiprows=2, usecols=[1, 2, 3])
    cl = pl.loadtxt(tccrawfile, skiprows=3, dtype="S1")

    if criterium == 'not marked':
        select = xyz[(cl == 'A') + (cl == 'B')]
    if criterium == "marked":
        select = xyz[(cl == 'C') + (cl == 'D')]

    T = PeriodicCKDTree(box, select)
    # Find neighbors within a fixed distance of a point
    balls = T.query_ball_point(select, r=rcut)

    visited = pl.zeros(select.shape[0])
    added = pl.zeros(select.shape[0])
    clusters = []

    def addballs(p, cluster):
        if visited[p] == 0:
            visited[p] = 1
            cluster.append(p)
            for e in balls[p]:
                addballs(e, cluster)

    for i in xrange(select.shape[0]):
        cluster = []
        addballs(i, cluster)
        if len(cluster) > 0:
            clusters.append(cluster)
    return clusters

示例#2

0

显示文件

文件： clustering.py 项目： FTurci/myModules

def get_marked(xyz, labels, box, marker=True, rcut=1.4, periodic=False):
    select = xyz[labels == marker]
    # print select
    if periodic:
        T = PeriodicCKDTree(box, select)
    else:
        T = cKDTree(select)
    # Find neighbors within a fixed distance of a point
    balls = T.query_ball_point(select, r=rcut)

    visited = pl.zeros(select.shape[0])
    added = pl.zeros(select.shape[0])
    clusters = []

    def addballs(p, cluster):
        if visited[p] == 0:
            visited[p] = 1
            cluster.append(p)
            for e in balls[p]:
                addballs(e, cluster)

    for i in xrange(select.shape[0]):
        cluster = []
        addballs(i, cluster)
        if len(cluster) > 0:
            clusters.append(cluster)
    return clusters

示例#3

0

显示文件

文件： test_periodic_kdtree.py 项目： Mahdisadjadi/periodic_kdtree

def test_random_ball_vectorized_compiled():

    n = 20
    m = 5
    bounds = np.ones(m)
    T = PeriodicCKDTree(bounds, np.random.randn(n,m))

    r = T.query_ball_point(np.random.randn(2,3,m),1)
    assert_equal(r.shape,(2,3))
    assert_(isinstance(r[0,0],list))

示例#4

0

显示文件

文件： test_periodic_kdtree.py 项目： neojie/vatic

def test_random_ball_vectorized_compiled():

    n = 20
    m = 5
    bounds = np.ones(m)
    T = PeriodicCKDTree(bounds, np.random.randn(n, m))

    r = T.query_ball_point(np.random.randn(2, 3, m), 1)
    assert_equal(r.shape, (2, 3))
    assert_(isinstance(r[0, 0], list))

示例#5

0

显示文件

文件： theory_model.py 项目： hakontan/Master

    def velocity_profile(self):
        radius_array = np.linspace(0, 200, self.N + 1)
        velocity_profile = np.zeros(self.N + 1)
        N_in_velocity = np.zeros(self.N + 1)

        bounds = np.array([self.box_size, self.box_size, self.box_size])
        tree = PeriodicCKDTree(bounds, self.galaxy_cat)
        print "Calculating velocity profile"
        for i in range(len(self.void_cat[:, 0])):
            #print i
            current_number_of_galaxies = 0
            current_velocity = 0
            for j in range(1, self.N + 1):
                neighbor_inds = tree.query_ball_point(self.void_cat[i, :],
                                                      r=radius_array[j])
                r_void = self.void_cat[i]
                galaxies_near_point = self.galaxy_cat[neighbor_inds]
                v_galaxy = self.velocity_cat[neighbor_inds]
                r_vec = r_void - galaxies_near_point
                galaxies_near_point = len(galaxies_near_point[:, 0])
                galaxies_in_shell = galaxies_near_point - current_number_of_galaxies

                radial_velocity = (v_galaxy * r_vec).sum(
                    axis=1) / np.linalg.norm(r_vec, axis=1)
                radial_velocity = np.sum(radial_velocity) - current_velocity

                velocity_profile[j] += radial_velocity / np.maximum(
                    1.0, galaxies_in_shell)
                N_in_velocity[j] += galaxies_in_shell

                current_velocity += radial_velocity
                current_number_of_galaxies += galaxies_in_shell
            #print velocity_profile / np.maximum(np.ones(self.N+1), N_in_velocity)
        v_final = (velocity_profile / len(self.void_cat[:, 0])
                   )  #/ np.maximum(np.ones(self.N+1), N_in_velocity))
        fig, ax = plt.subplots()
        ax.plot(radius_array, v_final)
        ax.set_xlabel("radius [Mpc/h]")
        ax.set_xlabel(r"$v_r(r)$ km/s")
        np.save("datafiles/velocity_profiles/velocity_profile" + self.handle,
                v_final)
        fig.savefig("figures/velocity_profiles/velocity_profile" +
                    self.handle + ".pdf")

示例#6

0

显示文件

Nlist = np.zeros(s, dtype=np.int)
# Boundaries (0 or negative means open boundaries in that dimension)
#changing bounds manually
bounds = np.array([dx, dy, dz])  # xy periodic, open along z

# Build kd-tree
T = PeriodicCKDTree(bounds, x)

# Find 4 closest neighbors to a random point
# (d[j], i[j]) = distance and index of jth closest point
# Find neighbors within a fixed distance of a point
print "Building Neighborlist..."

neighbors = []
for i in xrange(len(x)):
    localneigh = T.query_ball_point(
        x[i], r=2.1)  #r = cutoff (Angstrom) for making Nlist
    #localneigh.insert(0,i)
    localneigh.remove(i)
    localneigh.insert(0, i)
    neighbors.append(localneigh)

#print neighbors

print "Neighborlist built! Writing data to file...."
print "***********writing with atom types*****************"
#print neighbors
outFile = open('Nlist-types' + '-' + outputfile, 'w')

for i in xrange(s[0]):
    #Slice the atomtypes using the neighbor indices, have to subtract 1
    #from index because you added it in your neighborlist build

示例#7

0

显示文件

# This will take any void and build shells around it up to 2*R_v
# and find the number density per shell using the volume of the shell.

R_shell = np.linspace(0.001, 2*zone_rad[np.int(zone[arb_ind])], 20) #shells from ~0 to 2*R_v in units of Mpc/h
V_shell = ((4.*pi)/3.)*R_shell**3. #volume of each shell in units of Mpc**3 
tot_numden = numpart/(Lbox**3.)


count = []
count_void = []
nden = []


for i in R_shell:
	# Find number of halos in each concetric sphere of radius given by array R
	count_void.append(len(periodic_tree.query_ball_point([x_denmin[np.int(zone[arb_ind])], y_denmin[np.int(zone[arb_ind])], z_denmin[np.int(zone[arb_ind])]], i)))

for i in range(0,len(R_shell)):

	# This gives me a number density in each shell
	# looks for number of particles within a volume given by input radius
	if i==0:
		count_temp = len(periodic_tree.query_ball_point([x_denmin[np.int(zone[arb_ind])], y_denmin[np.int(zone[arb_ind])], z_denmin[np.int(zone[arb_ind])]], R_shell[i]))
		nden_temp = count_temp/V_shell[i]
	else:		
		count_temp1 = len(periodic_tree.query_ball_point([x_denmin[np.int(zone[arb_ind])], y_denmin[np.int(zone[arb_ind])], z_denmin[np.int(zone[arb_ind])]], R_shell[i]))
		count_temp2 = len(periodic_tree.query_ball_point([x_denmin[np.int(zone[arb_ind])], y_denmin[np.int(zone[arb_ind])], z_denmin[np.int(zone[arb_ind])]], R_shell[i-1]))
		count_temp = count_temp1-count_temp2
 		nden_temp = count_temp/(V_shell[i]-V_shell[i-1])

 	count.append(count_temp)

示例#8

0

显示文件

def overdensity_cylinder(gals,
                         coods,
                         R,
                         dc,
                         L,
                         pc_stats=False,
                         cluster_mass_lim=1e4,
                         n=100,
                         verbose=False):
    """
    Find overdensity statistics over the whole simulation box for cylindrical apertures.

    Args:
        gals - dataframe of galaxy properties
        coods - coordinates to calculate statistcis at. Typically defined as galaxy or random coordinates.
        R - aperture radius, cMpc
        dc - half aperture depth, cMpc
        L - box length, cMpc
        pc_stats - bool, calculate completeness and purity of each region
        cluster_mass_lim - limiting descendant mass above which to classify clusters, z0_central_mcrit200
        n - chunk length
        
    Returns:
        out_stats - output statistics, numpy array of shape [len(coods), 4]
                    0 - overdensity
                    1 - completeness
                    2 - purity
                    3 - descendant mass
    """

    dimensions = np.array([L, L, L])

    if verbose: print "Building KDtree..."
    T = PeriodicCKDTree(dimensions, gals[['zn_x', 'zn_y', 'zn_z']])

    avg = float(gals.shape[0]) / L**3  # average overdensity cMpc^-3

    out_stats = np.zeros((len(coods), 4))

    vol_avg = np.pi * R**2 * (2 *
                              dc) * avg  # average overdensity in chosen volume

    for j, c in coods.groupby(
            np.arange(len(coods)) //
            n):  # can't calculate distances all in one go, so need to chunk

        if verbose:  # print progress
            if j % 100 == 0:
                print round(
                    float(c.shape[0] * (j + 1)) / coods.shape[0] * 100, 2), '%'
                sys.stdout.flush()

        # find all galaxies within a sphere of radius the max extent of the cylinder
        gal_index = T.query_ball_point(c, r=(R**2 + dc**2)**0.5)

        # filter by cylinder using norm_coods()
        gal_index = [
            np.array(gal_index[k])[norm_coods(
                gals.iloc[gal_index[k]][['zn_x', 'zn_y', 'zn_z']].values,
                c.ix[k + j * n].values,
                R=R,
                half_deltac=dc,
                L=L)] for k in range(len(c))
        ]

        start_index = (j * n)  # save start index

        # calculate dgal
        out_stats[start_index:(start_index + len(c)),
                  0] = (np.array([len(x)
                                  for x in gal_index]) - vol_avg) / vol_avg

        if pc_stats:  # calculate completeness and purity statistics

            for i in range(len(gal_index)):

                cluster_ids = gals.iloc[gal_index[i]]
                cluster_ids = Counter(
                    cluster_ids[cluster_ids['z0_central_mcrit200'] >
                                cluster_mass_lim]['z0_centralId'])

                if len(cluster_ids) > 0:

                    cstats = np.zeros((len(cluster_ids), 2))

                    for k, (q, no) in enumerate(cluster_ids.items()):
                        cluster_gals = gals.ix[gals['z0_centralId'] == q]
                        cstats[k, 0] = float(no) / len(
                            cluster_gals)  # completeness
                        cstats[k, 1] = float(no) / len(gal_index[i])  # purity

                    # find id of max completeness and purity in cstats array
                    max_completeness = np.where(
                        cstats[:, 0] == cstats[:, 0].max())[0]
                    max_purity = np.where(cstats[:, 1] == cstats[:,
                                                                 1].max())[0]

                    # sometimes multiple clusters can have same completeness or purity in a single candidate
                    # - use the cluster with the highest complementary completeness/purity
                    if len(max_completeness) > 1:

                        # get matches between completeness and purity
                        matches = [x in max_purity for x in max_completeness]

                        if np.sum(matches) > 0:
                            # just use the first one
                            max_completeness = [np.where(matches)[0][0]]
                            max_purity = [np.where(matches)[0][0]]
                        else:
                            max_completeness = [
                                max_completeness[np.argmax(
                                    cstats[max_completeness, 1])]
                            ]

                    if len(max_purity) > 1:

                        matches = [x in max_completeness for x in max_purity]

                        if np.sum(matches) > 0:
                            max_completeness = [np.where(matches)[0][0]]
                            max_purity = [np.where(matches)[0][0]]

                        else:
                            max_purity = [
                                max_purity[np.argmax(cstats[max_completeness,
                                                            0])]
                            ]

                    # sometimes the cluster with the highest completeness does not have the highest purity, or vice versa
                    # - use the cluster with the highest combined purity/completeness added in quadrature
                    if max_completeness[0] != max_purity[0]:
                        max_completeness = [
                            np.argmax([pow(np.sum(x**2), 0.5) for x in cstats])
                        ]
                        max_purity = max_completeness

                    # save completeness and purity values
                    out_stats[start_index + i, 1] = cstats[max_completeness[0],
                                                           0]  # completeness
                    out_stats[start_index + i, 2] = cstats[max_purity[0],
                                                           1]  # purity

                    # save descendant mass
                    # filter by cluster id, save z0 halo mass
                    # can use either max_completeness or max_purity, both equal by this point

                    out_stats[start_index + i,
                              3] = gals.loc[gals['z0_centralId'] == cluster_ids
                                            .keys()[max_completeness[0]],
                                            'z0_central_mcrit200'].iloc[0]

                else:  # if no galaxies in aperture
                    out_stats[start_index + i, 1] = 0.
                    out_stats[start_index + i, 2] = 0.
                    out_stats[start_index + i, 3] = np.nan

    return out_stats

示例#9

0

显示文件

文件： benchmark.py 项目： syrte/periodic_kdtree

w = T2.query(queries)
print "PeriodicCKDTree %d lookups:\t%g" % (r, time.time() - t)
del w

T3 = PeriodicCKDTree(bounds, data, leafsize=n)
t = time.time()
w = T3.query(queries)
print "flat PeriodicCKDTree %d lookups:\t%g" % (r, time.time() - t)
del w

t = time.time()
w1 = T1.query_ball_point(queries, 0.2)
print "PeriodicKDTree %d ball lookups:\t%g" % (r, time.time() - t)

t = time.time()
w2 = T2.query_ball_point(queries, 0.2)
print "PeriodicCKDTree %d ball lookups:\t%g" % (r, time.time() - t)

t = time.time()
w3 = T3.query_ball_point(queries, 0.2)
print "flat PeriodicCKDTree %d ball lookups:\t%g" % (r, time.time() - t)

all_good = True
for a, b in zip(w1, w2):
    if sorted(a) != sorted(b):
        all_good = False
for a, b in zip(w1, w3):
    if sorted(a) != sorted(b):
        all_good = False

print "Ball lookups agree? %s" % str(all_good)

示例#10

0

显示文件

文件： benchmark.py 项目： Mahdisadjadi/periodic_kdtree

w = T2.query(queries)
print "PeriodicCKDTree %d lookups:\t%g" % (r, time.time()-t)
del w

T3 = PeriodicCKDTree(bounds,data,leafsize=n)
t = time.time()
w = T3.query(queries)
print "flat PeriodicCKDTree %d lookups:\t%g" % (r, time.time()-t)
del w

t = time.time()
w1 = T1.query_ball_point(queries, 0.2)
print "PeriodicKDTree %d ball lookups:\t%g" % (r, time.time()-t)

t = time.time()
w2 = T2.query_ball_point(queries, 0.2)
print "PeriodicCKDTree %d ball lookups:\t%g" % (r, time.time()-t)

t = time.time()
w3 = T3.query_ball_point(queries, 0.2)
print "flat PeriodicCKDTree %d ball lookups:\t%g" % (r, time.time()-t)

all_good = True
for a, b in zip(w1, w2):
    if sorted(a) != sorted(b):
        all_good = False
for a, b in zip(w1, w3):
    if sorted(a) != sorted(b):
        all_good = False

print "Ball lookups agree? %s" % str(all_good)

示例#11

0

显示文件

文件： theory_model.py 项目： hakontan/Master

    def delta_and_sigma_vz_galaxy(self, array_files=None, dictionary=False):
        """
        Calculates the density profile and velocity dispersion of voids in real space.
        Requires xi_vg_real_func() to be run first as this gives the upper and lower bounds
        for the radius array to avoid out of bounds for splines.
        """
        #radius_array = np.linspace(0, self.r_corr[-1], self.N + 1)
        radius_array = np.linspace(1, 200, self.N + 1)
        if array_files == None:
            bounds = np.array([self.box_size, self.box_size, self.box_size])
            tree = PeriodicCKDTree(bounds, self.galaxy_cat)

            delta = np.zeros(self.N + 1)
            v_z = np.zeros(self.N + 1)
            E_vz = np.zeros(self.N + 1)
            E_vz2 = np.zeros(self.N + 1)
            sigma_vz = np.zeros(self.N + 1)
            galaxies_in_shell_arr = np.zeros(self.N + 1)

            print "Starting density profile and velocity dispersion calculation"
            for i in range(len(self.void_cat[:, 0])):
                current_number_of_galaxies = 0
                current_E_vz = 0
                current_E_vz2 = 0
                E_vz_in_shell = 0
                E_vz2_in_shell = 0

                for j in range(1, self.N + 1):
                    # Find galaxy position and velocity in a given radius around the current void
                    neighbor_inds = tree.query_ball_point(self.void_cat[i, :],
                                                          r=radius_array[j])
                    shell_volume = 4.0 * np.pi * (radius_array[j]**3 -
                                                  radius_array[j - 1]**3) / 3.0
                    velocity_near_point = self.galaxy_vz[neighbor_inds]
                    galaxies_near_point = self.galaxy_cat[neighbor_inds]
                    galaxies_near_point = len(galaxies_near_point[:, 0])
                    galaxies_in_shell = galaxies_near_point - current_number_of_galaxies  # Subtracting previous sphere to get galaxies in current shell.

                    # calulcating terms used in expectation values E[v_z**2] and E[v_z]**2
                    if galaxies_near_point > 0:
                        E_vz2_in_shell = (sum(velocity_near_point**2) -
                                          current_E_vz2)
                        E_vz_in_shell = (sum(velocity_near_point) -
                                         current_E_vz)

                    galaxies_in_shell_arr[j] += galaxies_in_shell

                    E_vz[j] += E_vz_in_shell
                    E_vz2[j] += E_vz2_in_shell
                    delta[j] += galaxies_in_shell / shell_volume

                    current_E_vz += E_vz_in_shell
                    current_E_vz2 += E_vz2_in_shell
                    current_number_of_galaxies += galaxies_in_shell

            delta /= (len(self.void_cat[:, 0]) * len(self.galaxy_cat[:, 0]) /
                      self.box_size**3)
            delta -= 1
            for j in range(self.N + 1):
                if galaxies_in_shell_arr[j] > 0:
                    E_vz[j] /= galaxies_in_shell_arr[j]
                    E_vz2[j] /= galaxies_in_shell_arr[j]
            sigma_vz = np.sqrt(E_vz2 - E_vz**2)

            # Replacing zero values to avoid division by zero later
            sigma_vz[np.where(sigma_vz < 10.0)] = 100.0

            if dictionary:
                #Output for victor code
                r_dict = np.linspace(2.11, 118.0, 30)
                sigma_vz_spline = interpolate.interp1d(radius_array, sigma_vz)
                delta_spline = interpolate.interp1d(radius_array, delta)

                delta_new = delta_spline(r_dict)
                sigma_vz_new = sigma_vz_spline(r_dict)
                vr_dict = {}
                vr_dict["rvals"] = r_dict
                vr_dict["sigma_v_los"] = sigma_vz_new
                np.save(
                    "datafiles/velocity_profiles/sigma_vz_dict" + self.handle,
                    vr_dict)

                delta_dict = {}
                delta_dict["rvals"] = r_dict
                delta_dict["delta"] = delta_new
                np.save("datafiles/density_profiles/delta_dict" + self.handle,
                        delta_dict)

            fig, ax = plt.subplots()
            ax.plot(radius_array, delta)
            fig.savefig("delta_test.png")
            fig, ax = plt.subplots()
            ax.plot(radius_array, sigma_vz)
            fig.savefig("sigmavz_test.png")
            print len(delta)
            np.save("datafiles/density_profiles/delta" + self.handle, delta)
            np.save("datafiles/velocity_profiles/sigma_vz" + self.handle,
                    sigma_vz)
        else:

            delta = np.load(array_files[0])
            sigma_vz = np.load(array_files[1])
            fig, ax = plt.subplots()
            ax.plot(radius_array, delta)
            fig.savefig("delta_test.png")
            fig, ax = plt.subplots()
            ax.plot(radius_array, sigma_vz)
            fig.savefig("sigmavz_test.png")

        print "Splining density profile"
        print len(radius_array), len(delta)
        self.delta = interpolate.interp1d(radius_array, delta, kind="cubic")
        self.sigma_vz = interpolate.interp1d(radius_array,
                                             sigma_vz,
                                             kind="cubic")

        return self.delta, self.sigma_vz

示例#12

0

显示文件

文件： util.py 项目： benjamin-leifer/ART_data_analyzer

def NN_finder_all(initial_config_data, cut_off_distance, box_dim, path_to_test_dir, atom_list = None, save_results = False, re_calc = False):
	"""
	A very general nearest neigbor finder function calculate multiple atom's nearest neighbor all at once using
	the efficient cKDTree algorithm, the multiple atoms whose item number 
	is listed inside the atom_list input argument,
	the default is to calculate all atoms inside initial_config_data file
	User can customize which atoms to calculate by specifying in atom_list
	Input arguments:
	initial_config_data: instance of pandas.Dataframe
		configuration data
	
	cut_off_distance: dict
		dictionary contains the multiple pair cut-off distance
		currently use tuples as keys for immutability, frozenset may be another way
		but it reduce duplicates
		in order to access the turple key without order preference, convert
		
		https://stackoverflow.com/questions/36755714/how-to-ignore-the-order-of-elements-in-a-tuple
		https://www.quora.com/What-advantages-do-tuples-have-over-lists
		For example,
		{(1,1):3.7,(1,2):2.7,(2,2):3.0} means that atom_type 1 and 1 cut-off
		is 3.7, etc
	
	box_dim: list
		a list containing the spatial dimension of simulation box size in x, y, z
	
	path_to_test_dir: str
		str of current test result dir, under it, it save data into nn_results.pkl
		
	atom_list: list
		the list containing the item number of interested atoms whose nearest neighbors
		are being found
	
	save_results: boolean, default True
		specify whether to save the results dictionary into a nn_results_dict.pkl file
	
	Note:
	this cKDtree algorithm is efficient when:
	you have many points whose neighbors you want to find, you may save 
	substantial amounts of time by putting them in a cKDTree and using query_ball_tree
	
	for molecular simulation: 
	https://github.com/patvarilly/periodic_kdtree
	
	returns:
		nn: dict()
			key is item id of interested atom
			
			values is the pandas.Dataframe of nearest neighbor for atom
			of interest
	"""
	# set up path_to_file and check results out of this function before calling it
	# if check_results is True: 
	# if path_to_file is None or os.path.exists(path_to_file):
	# raise Exception("NN results file not found, please specify the correct path to the file")
		
	path_to_nn_results = path_to_test_dir + "/nn_results_dict.pkl"
	
	if re_calc is False:
		if os.path.exists(path_to_nn_results):
			print "nn results dictionary already calculated and saved in pkl file, skip calculation"
			return pickle.load(open(path_to_nn_results,'r'))
	nn = dict()
		
	# if there is no atom_list specified, use all atoms in initial_config_data
	if atom_list is None:
		atom_list = (initial_config_data["item"]).tolist()
	
	_data = initial_config_data
	
	groups = Atom.classify_df(_data)
	
	#_atom_data = initial_config_data[['x','y','z']]
	
	_interested_data = _data.loc[_data['item'].isin(atom_list)]
	
	interested_groups = Atom.classify_df(_interested_data)
	
	#_interested_atom = _interested_data[['x','y','z']]
	
	
	# build the efficient nearest neighbor KDTree algorithm
	# default distance metric Euclidian norm p = 2
	# create tree object using the larger points array
	for (i, int_group) in interested_groups.items():
		for (j, atom_group) in groups.items():
			# comparing atom_type_i and atom_type_j
			for pair in [(i,j),(j,i)]:
				if pair in cut_off_distance:
					 curr_cut_off = cut_off_distance[pair]
			
			# iterate over each row seems inefficient for (index, curr_atom) in int_group.iterrows()
			result_tree = PeriodicCKDTree(box_dim, atom_group[['x','y','z']].values)
			result_groups = result_tree.query_ball_point(int_group[['x','y','z']].values, curr_cut_off)
			#indices = np.unique(IT.chain.from_iterable(result_groups))
			
			#for (int_NN,(index,int_atom)) in (result_groups,int_group.iterrows()):
			k = 0
			for index,int_atom in int_group.iterrows():
				# int_NN is a list of index of NN, index is according to the order
				# in atom_group 
				# curr_NN is a dataframe storing NN found for current atom_group
				int_NN = result_groups[k]
				curr_NN = atom_group.iloc[int_NN]
				if int_atom["item"] not in nn:
					nn[int_atom["item"]] = curr_NN
				elif int_atom["item"] in nn:
					nn[int_atom["item"]] = nn[int_atom["item"]].append(curr_NN)				
				k = k + 1	
	# it is best practice to save this NN dictionary results into a pkl file 
	# to prevent rerun, if this file exists, let user know that
	# the file_of_nearest_neighbor exists before calling it
	if save_results is True:
		with open(path_to_nn_results, 'w') as f:
			pickle.dump(nn,f)
			f.close()
	return nn