示例#1
0
def get_pde(train, bw):
    x = train['longitude'].values
    y = train['latitude'].values
    xy = np.vstack([x, y])
    X = np.transpose(xy)
    tree = KDTree(X, leaf_size=20)
    parcelDensity = tree.kernel_density(X,
                                        h=bw,
                                        kernel='gaussian',
                                        rtol=0.00001)
    return parcelDensity
示例#2
0
def get_pde(df, bw):
    """
    Assits in plotting a parcel density estimation 2d scatter plot. Use the longitude and latitude as x, y coordinates and color these points by their density.
    """
    x = df['longitude'].values
    y = df['latitude'].values
    xy = np.vstack([x, y])
    X = np.transpose(xy)
    tree = KDTree(X, leaf_size=20)
    parcelDensity = tree.kernel_density(X,
                                        h=bw,
                                        kernel='gaussian',
                                        rtol=0.00001)
    return parcelDensity
示例#3
0
class TimeSmoothAggregatorKDTree(TimeSmoothAggregatorBase):
    def _initialize(self):
        self.aggregator = KDTree(self.X)

    def get_infection_likelihood(self, X_track):
        # self.aggregator = KDTree()
        bandwidth = 10.0
        likelihoods = self.aggregator.kernel_density(X_track, bandwidth)
        return np.prod(likelihoods)

    def sample_heatmap(self, num_samples):
        # 1) sample points inside the tree somehow (uniformly?)

        # 2) call kernel density on each one to obtain their corresponding scores

        # 3) return the sampled points and their scores
        raise NotImplementedError()
示例#4
0
    intact_volume = sitk.ReadImage(m_string3)
    intact_array = sitk.GetArrayFromImage(intact_volume)
else:
    intact_volume = RIM.dicom_series_reader(m_string3)
    intact_array = itk.GetArrayFromImage(intact_volume)
# intact_volume=RIM.dicom_series_reader(str(unicode('\\\\samba.cs.ucalgary.ca\\fatemeh.yazdanbakhsh\Documents\Data_Sets\Calgary\TBone-2015\TBoneCBCT-2015-10\L2963L','utf-8')))

intact_array_original = intact_array
##########################################################################
data = intact_array

data = np.where(data == 255.0)
X = np.asarray(data).transpose()
tree = KDTree(X, leaf_size=10)
tree.kernel_density(X[0:15], h=0.1, kernel='gaussian')
# print(tree.query_radius(X[:1], r=0.3, count_only=True))
print(tree.query_radius(X[0:15], r=0.3))
# ax.scatter(data[0], data[1],data[2], c='b', **plot_kwds)
#
# # plt.show()
#
# import matplotlib.pyplot as plt
# import pandas as pd
#
# import numpy as np
# # import scipy.cluster.hierarchy as shc
# #
# # plt.figure(figsize=(10, 7))
# # plt.title("Customer Dendograms")
# # dend = shc.dendrogram(shc.linkage(data, method='ward'))
示例#5
0
文件: kNN.py 项目: Asichurter/ML_Py
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 15 02:04:00 2018

@author: 10904
"""

#k近邻方法
from sklearn.neighbors import KDTree
import numpy as np

#这是一句用于github测试的注释
#这是github客户端同步的测试注释

data = np.array([[2, 3], [5, 4], [9, 6], [4, 7], [8, 1], [7, 2], [3, 4],
                 [1, 0], [5, 8], [6, 2]])
tree = KDTree(data, leaf_size=2)
dist, index = tree.query(np.array([[2, 3]]), k=3)
print(dist, index, sep='\n')
#使用高斯分布函数作为核函数的核密度估计
#其工作原理是:利用给定的距离度量,计算出每个点到x的距离
#以该距离作为样本点,建立起若干个以这些样本点为中心的高斯分布,这些独立分布进行混合称为一个GMM,利用这个GMM计算x点的概率
#将带宽调高有助于更清楚地区分各个点
print(
    tree.kernel_density(np.array([[2, 3], [5, 4], [9, 6], [4, 7], [8, 1],
                                  [7, 2], [3, 4], [1, 0], [5, 8], [6, 2]]),
                        h=1.0,
                        kernel='gaussian'))
def get_spore_density(file_df, spore_df, cell_df, mask_base, outdir, look_at_file):
    savename = get_cache_path(outdir, file_df, look_at_file)
    if os.path.exists(savename):
        print("skipping ", savename)
        return None

    lspore_df = spore_df[spore_df["global_file_id"] == look_at_file]
    lcell_df = cell_df[cell_df["global_file_id"] == look_at_file]

    spore_rc = lspore_df[["image_row", "image_col"]].values
    cells_rc = lcell_df[["image_row", "image_col"]].values
    spore_kdtree = KDTree(spore_rc)
    cells_kdtree = KDTree(cells_rc)
    
    #area_in_um2 = 150
    #area_in_um2 = 100
    #area_in_um2 = 50
    rad_um = 4 #np.sqrt(area_in_um2/np.pi)
    print("rad um", rad_um)
    pixrad = rad_um / resolutions.PX_TO_UM_LSM700_GIANT
    print("rad pix", pixrad)

    mask_path = get_mask_path(mask_base, file_df, "distmap", look_at_file)
    print("Reading Distance Map ", mask_path)
    tic = time.time()

    distmap = scipy.io.loadmat(mask_path)["distmap_masked"].astype(np.float32)
    
    ## We do not correct for the lack of density at the edges so 
    ## ignore 5 times the std of the gaussian so it will not affect much.
    five_sigma = int(pixrad * 5) 
    distmap = ignore_edges(distmap, five_sigma)
    
    distmap_shape = distmap.shape

    print("flattening, sorting")
    distmap = distmap.flatten()
    # Why do I sort it twice? Why did I not do 
    # sorted_distmap = distmap[sortind_distmap]
    sorted_distmap = np.sort(distmap)
    sortind_distmap = np.argsort(distmap)
    del(distmap)
    toc = time.time()
    print(toc-tic, "seconds to get the distmap, and sort it")

    distances = np.arange(2.0, 150, 0.5)
    window_half_width = 0.25
    centers = distances + window_half_width

    kd_daccum_mean = np.zeros_like(distances)
    kd_daccum_std = np.zeros_like(distances)
    kd_cells_mean = np.zeros_like(distances)
    kd_cells_std = np.zeros_like(distances)
    n_samples = 10000

    for d, dist in enumerate(centers):
        print("doing ", d, dist)
        # find the indices of the distance pixels in the current distance +- window
        dislice_start = np.searchsorted(sorted_distmap, dist - window_half_width, side="right")
        dislice_end = np.searchsorted(sorted_distmap, dist + window_half_width, side="left")
        indices_of_slice = sortind_distmap[dislice_start:dislice_end]
        print("found ", len(indices_of_slice), "points")
        if n_samples < len(indices_of_slice):
            sample_indices = npr.choice(indices_of_slice, size=n_samples, replace=False)
        else:
            sample_indices = indices_of_slice

        # convert these samples back into 2d rows and cols.  
        rr, cc = np.unravel_index(sample_indices, distmap_shape)
        sp = np.vstack([rr, cc]).T

        if sp.shape[0] > 0:
            spore_kd_densities = spore_kdtree.kernel_density(sp, h=pixrad, kernel='gaussian')
            kd_daccum_mean[d]  = np.mean(spore_kd_densities) 
            kd_daccum_std[d]   = np.std(spore_kd_densities) 
            kd_cell_densities  = cells_kdtree.kernel_density(sp, h=pixrad, kernel='gaussian')
            kd_cells_mean[d]   = np.mean(kd_cell_densities) 
            kd_cells_std[d]    = np.std(kd_cell_densities) 

    scipy.io.savemat(savename, {"centers":centers,
                                 "spore_kd_dense_g50": kd_daccum_mean, 
                                 "spore_kd_dense_g50_std": kd_daccum_std, 
                                 "cell_kd_dense_g50": kd_cells_mean, 
                                 "cell_kd_dense_g50_std": kd_cells_std, 
                                 })
    gc.collect()