class Kernel(W): """ Spatial weights based on kernel functions. Parameters ---------- data : array (n,k) or KDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects bandwidth : float or array-like (optional) the bandwidth :math:`h_i` for the kernel. fixed : binary If true then :math:`h_i=h \\forall i`. If false then bandwidth is adaptive across observations. k : int the number of nearest neighbors to use for determining bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). For adaptive bandwidths, :math:`h_i=dknn_i` diagonal : boolean If true, set diagonal weights = 1.0, if false (default), diagonals weights are set to value according to kernel function. function : {'triangular','uniform','quadratic','quartic','gaussian'} kernel function defined as follows with .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = 1/2 \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) eps : float adjustment to ensure knn distance range is closed on the knnth observations Attributes ---------- weights : dict Dictionary keyed by id with a list of weights for each neighbor neighbors : dict of lists of neighbors keyed by observation id bandwidth : array array of bandwidths Examples -------- >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> kw=Kernel(points) >>> kw.weights[0] [1.0, 0.500000049999995, 0.4409830615267465] >>> kw.neighbors[0] [0, 1, 3] >>> kw.bandwidth array([[ 20.000002], [ 20.000002], [ 20.000002], [ 20.000002], [ 20.000002], [ 20.000002]]) >>> kw15=Kernel(points,bandwidth=15.0) >>> kw15[0] {0: 1.0, 1: 0.33333333333333337, 3: 0.2546440075000701} >>> kw15.neighbors[0] [0, 1, 3] >>> kw15.bandwidth array([[ 15.], [ 15.], [ 15.], [ 15.], [ 15.], [ 15.]]) Adaptive bandwidths user specified >>> bw=[25.0,15.0,25.0,16.0,14.5,25.0] >>> kwa=Kernel(points,bandwidth=bw) >>> kwa.weights[0] [1.0, 0.6, 0.552786404500042, 0.10557280900008403] >>> kwa.neighbors[0] [0, 1, 3, 4] >>> kwa.bandwidth array([[ 25. ], [ 15. ], [ 25. ], [ 16. ], [ 14.5], [ 25. ]]) Endogenous adaptive bandwidths >>> kwea=Kernel(points,fixed=False) >>> kwea.weights[0] [1.0, 0.10557289844279438, 9.99999900663795e-08] >>> kwea.neighbors[0] [0, 1, 3] >>> kwea.bandwidth array([[ 11.18034101], [ 11.18034101], [ 20.000002 ], [ 11.18034101], [ 14.14213704], [ 18.02775818]]) Endogenous adaptive bandwidths with Gaussian kernel >>> kweag=Kernel(points,fixed=False,function='gaussian') >>> kweag.weights[0] [0.3989422804014327, 0.2674190291577696, 0.2419707487162134] >>> kweag.bandwidth array([[ 11.18034101], [ 11.18034101], [ 20.000002 ], [ 11.18034101], [ 14.14213704], [ 18.02775818]]) Diagonals to 1.0 >>> kq = Kernel(points,function='gaussian') >>> kq.weights {0: [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 0.3989422804014327, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 0.3989422804014327, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 0.3989422804014327, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 0.3989422804014327, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 0.3989422804014327]} >>> kqd = Kernel(points, function='gaussian', diagonal=True) >>> kqd.weights {0: [1.0, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 1.0, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 1.0, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 1.0, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 1.0, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 1.0]} """ def __init__(self, data, bandwidth=None, fixed=True, k=2, function='triangular', eps=1.0000001, ids=None, diagonal=False): if isKDTree(data): self.kdt = data self.data = self.kdt.data data = self.data else: self.data = data self.kdt = KDTree(self.data) self.k = k + 1 self.function = function.lower() self.fixed = fixed self.eps = eps if bandwidth: try: bandwidth = np.array(bandwidth) bandwidth.shape = (len(bandwidth), 1) except: bandwidth = np.ones((len(data), 1), 'float') * bandwidth self.bandwidth = bandwidth else: self._set_bw() self._eval_kernel() neighbors, weights = self._k_to_W(ids) if diagonal: for i in neighbors: weights[i][neighbors[i].index(i)] = 1.0 W.__init__(self, neighbors, weights, ids) def _k_to_W(self, ids=None): allneighbors = {} weights = {} if ids: ids = np.array(ids) else: ids = np.arange(len(self.data)) for i, neighbors in enumerate(self.kernel): if len(self.neigh[i]) == 0: allneighbors[ids[i]] = [] weights[ids[i]] = [] else: allneighbors[ids[i]] = list(ids[self.neigh[i]]) weights[ids[i]] = self.kernel[i].tolist() return allneighbors, weights def _set_bw(self): dmat, neigh = self.kdt.query(self.data, k=self.k) if self.fixed: # use max knn distance as bandwidth bandwidth = dmat.max() * self.eps n = len(dmat) self.bandwidth = np.ones((n, 1), 'float') * bandwidth else: # use local max knn distance self.bandwidth = dmat.max(axis=1) * self.eps self.bandwidth.shape = (self.bandwidth.size, 1) # identify knn neighbors for each point nnq = self.kdt.query(self.data, k=self.k) self.neigh = nnq[1] def _eval_kernel(self): # get points within bandwidth distance of each point if not hasattr(self, 'neigh'): kdtq = self.kdt.query_ball_point neighbors = [ kdtq(self.data[i], r=bwi[0]) for i, bwi in enumerate(self.bandwidth) ] self.neigh = neighbors # get distances for neighbors bw = self.bandwidth kdtq = self.kdt.query z = [] for i, nids in enumerate(self.neigh): di, ni = kdtq(self.data[i], k=len(nids)) if not isinstance(di, np.ndarray): di = np.asarray([di] * len(nids)) ni = np.asarray([ni] * len(nids)) zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i] z.append(zi) zs = z # functions follow Anselin and Rey (2010) table 5.4 if self.function == 'triangular': self.kernel = [1 - zi for zi in zs] elif self.function == 'uniform': self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs] elif self.function == 'quadratic': self.kernel = [(3. / 4) * (1 - zi**2) for zi in zs] elif self.function == 'quartic': self.kernel = [(15. / 16) * (1 - zi**2)**2 for zi in zs] elif self.function == 'gaussian': c = np.pi * 2 c = c**(-0.5) self.kernel = [c * np.exp(-(zi**2) / 2.) for zi in zs] else: print('Unsupported kernel function', self.function)
def knnW(data, k=2, p=2, ids=None): """ Creates nearest neighbor weights matrix based on k nearest neighbors. Parameters ---------- kdtree : object PySAL KDTree or ArcKDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance Ignored if the KDTree is an ArcKDTree ids : list identifiers to attach to each observation Returns ------- w : W instance Weights object with binary weights Examples -------- >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> kd = pysal.cg.kdtree.KDTree(np.array(points)) >>> wnn2 = pysal.knnW(kd, 2) >>> [1,3] == wnn2.neighbors[0] True ids >>> wnn2 = knnW(kd,2) >>> wnn2[0] {1: 1.0, 3: 1.0} >>> wnn2[1] {0: 1.0, 3: 1.0} now with 1 rather than 0 offset >>> wnn2 = knnW(kd, 2, ids=range(1,7)) >>> wnn2[1] {2: 1.0, 4: 1.0} >>> wnn2[2] {1: 1.0, 4: 1.0} >>> 0 in wnn2.neighbors False Notes ----- Ties between neighbors of equal distance are arbitrarily broken. See Also -------- pysal.weights.W """ if isKDTree(data): kdt = data data = kdt.data else: kdt = KDTree(data) nnq = kdt.query(data, k=k + 1, p=p) info = nnq[1] neighbors = {} for i, row in enumerate(info): row = row.tolist() if i in row: row.remove(i) focal = i if ids: row = [ids[j] for j in row] focal = ids[i] neighbors[focal] = row return pysal.weights.W(neighbors, id_order=ids)
class Kernel(W): """ Spatial weights based on kernel functions. Parameters ---------- data : array (n,k) or KDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects bandwidth : float or array-like (optional) the bandwidth :math:`h_i` for the kernel. fixed : binary If true then :math:`h_i=h \\forall i`. If false then bandwidth is adaptive across observations. k : int the number of nearest neighbors to use for determining bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). For adaptive bandwidths, :math:`h_i=dknn_i` diagonal : boolean If true, set diagonal weights = 1.0, if false (default), diagonals weights are set to value according to kernel function. function : {'triangular','uniform','quadratic','quartic','gaussian'} kernel function defined as follows with .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = 1/2 \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) eps : float adjustment to ensure knn distance range is closed on the knnth observations Attributes ---------- weights : dict Dictionary keyed by id with a list of weights for each neighbor neighbors : dict of lists of neighbors keyed by observation id bandwidth : array array of bandwidths Examples -------- >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> kw=Kernel(points) >>> kw.weights[0] [1.0, 0.500000049999995, 0.4409830615267465] >>> kw.neighbors[0] [0, 1, 3] >>> kw.bandwidth array([[ 20.000002], [ 20.000002], [ 20.000002], [ 20.000002], [ 20.000002], [ 20.000002]]) >>> kw15=Kernel(points,bandwidth=15.0) >>> kw15[0] {0: 1.0, 1: 0.33333333333333337, 3: 0.2546440075000701} >>> kw15.neighbors[0] [0, 1, 3] >>> kw15.bandwidth array([[ 15.], [ 15.], [ 15.], [ 15.], [ 15.], [ 15.]]) Adaptive bandwidths user specified >>> bw=[25.0,15.0,25.0,16.0,14.5,25.0] >>> kwa=Kernel(points,bandwidth=bw) >>> kwa.weights[0] [1.0, 0.6, 0.552786404500042, 0.10557280900008403] >>> kwa.neighbors[0] [0, 1, 3, 4] >>> kwa.bandwidth array([[ 25. ], [ 15. ], [ 25. ], [ 16. ], [ 14.5], [ 25. ]]) Endogenous adaptive bandwidths >>> kwea=Kernel(points,fixed=False) >>> kwea.weights[0] [1.0, 0.10557289844279438, 9.99999900663795e-08] >>> kwea.neighbors[0] [0, 1, 3] >>> kwea.bandwidth array([[ 11.18034101], [ 11.18034101], [ 20.000002 ], [ 11.18034101], [ 14.14213704], [ 18.02775818]]) Endogenous adaptive bandwidths with Gaussian kernel >>> kweag=Kernel(points,fixed=False,function='gaussian') >>> kweag.weights[0] [0.3989422804014327, 0.2674190291577696, 0.2419707487162134] >>> kweag.bandwidth array([[ 11.18034101], [ 11.18034101], [ 20.000002 ], [ 11.18034101], [ 14.14213704], [ 18.02775818]]) Diagonals to 1.0 >>> kq = Kernel(points,function='gaussian') >>> kq.weights {0: [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 0.3989422804014327, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 0.3989422804014327, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 0.3989422804014327, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 0.3989422804014327, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 0.3989422804014327]} >>> kqd = Kernel(points, function='gaussian', diagonal=True) >>> kqd.weights {0: [1.0, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 1.0, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 1.0, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 1.0, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 1.0, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 1.0]} """ def __init__(self, data, bandwidth=None, fixed=True, k=2, function='triangular', eps=1.0000001, ids=None, diagonal=False): if isKDTree(data): self.kdt = data self.data = self.kdt.data data = self.data else: self.data = data self.kdt = KDTree(self.data) self.k = k + 1 self.function = function.lower() self.fixed = fixed self.eps = eps if bandwidth: try: bandwidth = np.array(bandwidth) bandwidth.shape = (len(bandwidth), 1) except: bandwidth = np.ones((len(data), 1), 'float') * bandwidth self.bandwidth = bandwidth else: self._set_bw() self._eval_kernel() neighbors, weights = self._k_to_W(ids) if diagonal: for i in neighbors: weights[i][neighbors[i].index(i)] = 1.0 W.__init__(self, neighbors, weights, ids) @classmethod def from_shapefile(cls, filepath, idVariable=None, **kwargs): """ Kernel based weights from shapefile Arguments --------- shapefile : string shapefile name with shp suffix idVariable : string name of column in shapefile's DBF to use for ids Returns -------- Kernel Weights Object See Also --------- :class:`pysal.weights.Kernel` :class:`pysal.weights.W` """ points = get_points_array_from_shapefile(filepath) if idVariable is not None: ids = get_ids(filepath, idVariable) else: ids = None return cls.from_array(points, ids=ids, **kwargs) @classmethod def from_array(cls, array, **kwargs): """ Construct a Kernel weights from an array. Supports all the same options as :class:`pysal.weights.Kernel` See Also -------- :class:`pysal.weights.Kernel` :class:`pysal.weights.W` """ return cls(array, **kwargs) @classmethod def from_dataframe(cls, df, geom_col='geometry', ids=None, **kwargs): """ Make Kernel weights from a dataframe. Parameters ---------- df : pandas.dataframe a dataframe with a geometry column that can be used to construct a W object geom_col : string column name of the geometry stored in df ids : string or iterable if string, the column name of the indices from the dataframe if iterable, a list of ids to use for the W if None, df.index is used. See Also -------- :class:`pysal.weights.Kernel` :class:`pysal.weights.W` """ pts = get_points_array(df[geom_col]) if ids is None: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() return cls(pts, ids=ids, **kwargs) def _k_to_W(self, ids=None): allneighbors = {} weights = {} if ids: ids = np.array(ids) else: ids = np.arange(len(self.data)) for i, neighbors in enumerate(self.kernel): if len(self.neigh[i]) == 0: allneighbors[ids[i]] = [] weights[ids[i]] = [] else: allneighbors[ids[i]] = list(ids[self.neigh[i]]) weights[ids[i]] = self.kernel[i].tolist() return allneighbors, weights def _set_bw(self): dmat, neigh = self.kdt.query(self.data, k=self.k) if self.fixed: # use max knn distance as bandwidth bandwidth = dmat.max() * self.eps n = len(dmat) self.bandwidth = np.ones((n, 1), 'float') * bandwidth else: # use local max knn distance self.bandwidth = dmat.max(axis=1) * self.eps self.bandwidth.shape = (self.bandwidth.size, 1) # identify knn neighbors for each point nnq = self.kdt.query(self.data, k=self.k) self.neigh = nnq[1] def _eval_kernel(self): # get points within bandwidth distance of each point if not hasattr(self, 'neigh'): kdtq = self.kdt.query_ball_point neighbors = [kdtq(self.data[i], r=bwi[0]) for i, bwi in enumerate(self.bandwidth)] self.neigh = neighbors # get distances for neighbors bw = self.bandwidth kdtq = self.kdt.query z = [] for i, nids in enumerate(self.neigh): di, ni = kdtq(self.data[i], k=len(nids)) if not isinstance(di, np.ndarray): di = np.asarray([di] * len(nids)) ni = np.asarray([ni] * len(nids)) zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i] z.append(zi) zs = z # functions follow Anselin and Rey (2010) table 5.4 if self.function == 'triangular': self.kernel = [1 - zi for zi in zs] elif self.function == 'uniform': self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs] elif self.function == 'quadratic': self.kernel = [(3. / 4) * (1 - zi ** 2) for zi in zs] elif self.function == 'quartic': self.kernel = [(15. / 16) * (1 - zi ** 2) ** 2 for zi in zs] elif self.function == 'gaussian': c = np.pi * 2 c = c ** (-0.5) self.kernel = [c * np.exp(-(zi ** 2) / 2.) for zi in zs] else: print('Unsupported kernel function', self.function)
class Kernel(W): def __init__(self, data, bandwidth=None, fixed=True, k=2, function='triangular', eps=1.0000001, ids=None, diagonal=False, ncores=1): if issubclass(type(data), scipy.spatial.KDTree): self.kdt = data self.data = self.kdt.data data = self.data else: self.data = data self.kdt = KDTree(self.data) self.k = k + 1 self.function = function.lower() self.fixed = fixed self.eps = eps self.ncores = ncores if bandwidth: try: bandwidth = np.array(bandwidth) bandwidth.shape = (len(bandwidth), 1) except: bandwidth = np.ones((len(data), 1), 'float') * bandwidth self.bandwidth = bandwidth else: self._set_bw() self._eval_kernel() neighbors, weights = self._k_to_W(ids) if diagonal: for i in neighbors: weights[i][neighbors[i].index(i)] = 1.0 W.__init__(self, neighbors, weights, ids) def _k_to_W(self, ids=None): allneighbors = {} weights = {} if ids: ids = np.array(ids) else: ids = np.arange(len(self.data)) for i, neighbors in enumerate(self.kernel): if len(self.neigh[i]) == 0: allneighbors[ids[i]] = [] weights[ids[i]] = [] else: allneighbors[ids[i]] = list(ids[self.neigh[i]]) weights[ids[i]] = self.kernel[i].tolist() return allneighbors, weights def _set_bw(self): dmat, neigh = self.kdt.query(self.data, k=self.k) if self.fixed: # use max knn distance as bandwidth bandwidth = dmat.max() * self.eps n = len(dmat) self.bandwidth = np.ones((n, 1), 'float') * bandwidth else: # use local max knn distance self.bandwidth = dmat.max(axis=1) * self.eps self.bandwidth.shape = (self.bandwidth.size, 1) # identify knn neighbors for each point nnq = self.kdt.query(self.data, k=self.k) self.neigh = nnq[1] def _eval_kernel(self): t1 = time.time() # get points within bandwidth distance of each point kdtbq = self.kdt.query_ball_point kdtq = self.kdt.query bw = self.bandwidth if self.ncores > 1: pool = mp.Pool(processes=self.ncores, initializer=loadkd, initargs=(kdtbq, kdtq, bw)) if not hasattr(self, 'neigh'): if self.ncores > 1: neighbors = pool.map(bqwrapper, self.data, chunksize=len(self.bandwidth) / self.ncores) else: neighbors = [ kdtbq(self.data[i], r=bwi[0]) for i, bwi in enumerate(self.bandwidth) ] self.neigh = neighbors t2 = time.time() print "Ball Point Query took {} seconds.".format(t2 - t1) # get distances for neighbors bw = self.bandwidth #kdtq = self.kdt.query z = [] t1 = time.time() if self.ncores > 1: iterable = [(i, nids, self.data[i]) for i, nids in enumerate(self.neigh)] z = pool.map(qwrapper, iterable) else: for i, nids in enumerate(self.neigh): di, ni = kdtq(self.data[i], k=len(nids)) zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i] z.append(zi) t2 = time.time() print "Local query took: {} seconds".format(t2 - t1) zs = z # functions follow Anselin and Rey (2010) table 5.4 if self.function == 'triangular': self.kernel = [1 - zi for zi in zs] elif self.function == 'uniform': self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs] elif self.function == 'quadratic': self.kernel = [(3. / 4) * (1 - zi**2) for zi in zs] elif self.function == 'quartic': self.kernel = [(15. / 16) * (1 - zi**2)**2 for zi in zs] elif self.function == 'gaussian': c = np.pi * 2 c = c**(-0.5) self.kernel = [c * np.exp(-(zi**2) / 2.) for zi in zs] else: print 'Unsupported kernel function', self.function
def knnW(data, k=2, p=2, ids=None, pct_unique=0.25): """ Creates nearest neighbor weights matrix based on k nearest neighbors. Parameters ---------- data : array (n,k) or KDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance ids : list identifiers to attach to each observation pct_unique : float threshold percentage of unique points in data. Below this threshold tree is built on unique values only Returns ------- w : W instance Weights object with binary weights Examples -------- >>> x,y=np.indices((5,5)) >>> x.shape=(25,1) >>> y.shape=(25,1) >>> data=np.hstack([x,y]) >>> wnn2=knnW(data,k=2) >>> wnn4=knnW(data,k=4) >>> set([1,5,6,2]) == set(wnn4.neighbors[0]) True >>> set([0,6,10,1]) == set(wnn4.neighbors[5]) True >>> set([1,5]) == set(wnn2.neighbors[0]) True >>> set([0,6]) == set(wnn2.neighbors[5]) True >>> "%.2f"%wnn2.pct_nonzero '0.08' >>> wnn4.pct_nonzero 0.16 >>> wnn3e=knnW(data,p=2,k=3) >>> set([1,5,6]) == set(wnn3e.neighbors[0]) True >>> wnn3m=knnW(data,p=1,k=3) >>> a = set([1,5,2]) >>> b = set([1,5,6]) >>> c = set([1,5,10]) >>> w0n = set(wnn3m.neighbors[0]) >>> a==w0n or b==w0n or c==w0n True ids >>> wnn2 = knnW(data,2) >>> wnn2[0] {1: 1.0, 5: 1.0} >>> wnn2[1] {0: 1.0, 2: 1.0} now with 1 rather than 0 offset >>> wnn2 = knnW(data,2, ids = range(1,26)) >>> wnn2[1] {2: 1.0, 6: 1.0} >>> wnn2[2] {1: 1.0, 3: 1.0} >>> 0 in wnn2.neighbors False Notes ----- Ties between neighbors of equal distance are arbitrarily broken. See Also -------- pysal.weights.W """ if issubclass(type(data), scipy.spatial.KDTree): kd = data data = kd.data nnq = kd.query(data, k=k+1, p=p) info = nnq[1] elif type(data).__name__ == 'ndarray': # check if unique points are a small fraction of all points ind = np.lexsort(data.T) u = data[np.concatenate(([True],np.any(data[ind[1:]]!=data[ind[:-1]],axis=1)))] pct_u = len(u)*1. / len(data) if pct_u < pct_unique: tree = KDTree(u) nnq = tree.query(data, k=k+1, p=p) info = nnq[1] uid = [np.where((data == ui).all(axis=1))[0][0] for ui in u] new_info = np.zeros((len(data), k + 1), 'int') for i, row in enumerate(info): new_info[i] = [uid[j] for j in row] info = new_info else: kd = KDTree(data) # calculate nnq = kd.query(data, k=k + 1, p=p) info = nnq[1] else: print 'Unsupported type' return None neighbors = {} for i, row in enumerate(info): row = row.tolist() if i in row: row.remove(i) focal = i if ids: row = [ ids[j] for j in row] focal = ids[i] neighbors[focal] = row return pysal.weights.W(neighbors, id_order=ids)
class KNN(W): """ Creates nearest neighbor weights matrix based on k nearest neighbors. Parameters ---------- kdtree : object PySAL KDTree or ArcKDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance Ignored if the KDTree is an ArcKDTree ids : list identifiers to attach to each observation Returns ------- w : W instance Weights object with binary weights Examples -------- >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> kd = pysal.cg.kdtree.KDTree(np.array(points)) >>> wnn2 = pysal.KNN(kd, 2) >>> [1,3] == wnn2.neighbors[0] True ids >>> wnn2 = KNN(kd,2) >>> wnn2[0] {1: 1.0, 3: 1.0} >>> wnn2[1] {0: 1.0, 3: 1.0} now with 1 rather than 0 offset >>> wnn2 = KNN(kd, 2, ids=range(1,7)) >>> wnn2[1] {2: 1.0, 4: 1.0} >>> wnn2[2] {1: 1.0, 4: 1.0} >>> 0 in wnn2.neighbors False Notes ----- Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class:`pysal.weights.W` """ def __init__(self, data, k=2, p=2, ids=None, radius=None, distance_metric='euclidean'): if isKDTree(data): self.kdtree = data self.data = data.data else: self.data = data self.kdtree = KDTree(data, radius=radius, distance_metric=distance_metric) self.k = k self.p = p this_nnq = self.kdtree.query(self.data, k=k+1, p=p) to_weight = this_nnq[1] if ids is None: ids = list(range(to_weight.shape[0])) neighbors = {} for i,row in enumerate(to_weight): row = row.tolist() row.remove(i) row = [ids[j] for j in row] focal = ids[i] neighbors[focal] = row W.__init__(self, neighbors, id_order=ids) @classmethod def from_shapefile(cls, filepath, **kwargs): """ Nearest neighbor weights from a shapefile. Parameters ---------- data : string shapefile containing attribute data. k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance ids : list identifiers to attach to each observation radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : KNN instance; Weights object with binary weights. Examples -------- Polygon shapefile >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> "%.4f"%wc.pct_nonzero '4.0816' >>> set([2,1]) == set(wc.neighbors[0]) True >>> wc3=pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3) >>> set(wc3.neighbors[0]) == set([2,1,3]) True >>> set(wc3.neighbors[2]) == set([4,3,0]) True 1 offset rather than 0 offset >>> wc3_1=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID") >>> set([4,3,2]) == set(wc3_1.neighbors[1]) True >>> wc3_1.weights[2] [1.0, 1.0, 1.0] >>> set([4,1,8]) == set(wc3_1.neighbors[2]) True Point shapefile >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp")) >>> w.pct_nonzero 1.1904761904761905 >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1) >>> "%.3f"%w1.pct_nonzero Notes ----- Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class:`pysal.weights.KNN` :class:`pysal.weights.W` """ return cls(get_points_array_from_shapefile(filepath), **kwargs) @classmethod def from_array(cls, array, **kwargs): """ Creates nearest neighbor weights matrix based on k nearest neighbors. Parameters ---------- array : np.ndarray (n, k) array representing n observations on k characteristics used to measure distances between the n objects **kwargs : keyword arguments, see Rook Returns ------- w : W instance Weights object with binary weights Examples -------- >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> wnn2 = pysal.KNN.from_array(points, 2) >>> [1,3] == wnn2.neighbors[0] True ids >>> wnn2 = KNN.from_array(points,2) >>> wnn2[0] {1: 1.0, 3: 1.0} >>> wnn2[1] {0: 1.0, 3: 1.0} now with 1 rather than 0 offset >>> wnn2 = KNN.from_array(points, 2, ids=range(1,7)) >>> wnn2[1] {2: 1.0, 4: 1.0} >>> wnn2[2] {1: 1.0, 4: 1.0} >>> 0 in wnn2.neighbors False Notes ----- Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class: `pysal.weights.KNN` :class:`pysal.weights.W` """ return cls(array, **kwargs) @classmethod def from_dataframe(cls, df, geom_col='geometry', ids=None, **kwargs): """ Make KNN weights from a dataframe. Parameters ---------- df : pandas.dataframe a dataframe with a geometry column that can be used to construct a W object geom_col : string column name of the geometry stored in df ids : string or iterable if string, the column name of the indices from the dataframe if iterable, a list of ids to use for the W if None, df.index is used. See Also -------- :class: `pysal.weights.KNN` :class:`pysal.weights.W` """ pts = get_points_array(df[geom_col]) if ids is None: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() return cls(pts, ids=ids, **kwargs) def reweight(self, k=None, p=None, new_data=None, new_ids=None, inplace=True): """ Redo K-Nearest Neighbor weights construction using given parameters Parameters ---------- new_data : np.ndarray an array containing additional data to use in the KNN weight new_ids : list a list aligned with new_data that provides the ids for each new observation inplace : bool a flag denoting whether to modify the KNN object in place or to return a new KNN object k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance Ignored if the KDTree is an ArcKDTree Returns ------- A copy of the object using the new parameterization, or None if the object is reweighted in place. """ if (new_data is not None): new_data = np.asarray(new_data).reshape(-1,2) data = np.vstack((self.data, new_data)).reshape(-1,2) if new_ids is not None: ids = copy.deepcopy(self.id_order) ids.extend(list(new_ids)) else: ids = list(range(data.shape[0])) elif (new_data is None) and (new_ids is None): # If not, we can use the same kdtree we have data = self.kdtree ids = self.id_order elif (new_data is None) and (new_ids is not None): Warn('Remapping ids must be done using w.remap_ids') if k is None: k = self.k if p is None: p = self.p if inplace: self._reset() self.__init__(data, ids=ids, k=k, p=p) else: return KNN(data, ids=ids, k=k, p=p)
class KNN(W): """ Creates nearest neighbor weights matrix based on k nearest neighbors. Parameters ---------- kdtree : object PySAL KDTree or ArcKDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance Ignored if the KDTree is an ArcKDTree ids : list identifiers to attach to each observation Returns ------- w : W instance Weights object with binary weights Examples -------- >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> kd = pysal.cg.kdtree.KDTree(np.array(points)) >>> wnn2 = pysal.KNN(kd, 2) >>> [1,3] == wnn2.neighbors[0] True ids >>> wnn2 = KNN(kd,2) >>> wnn2[0] {1: 1.0, 3: 1.0} >>> wnn2[1] {0: 1.0, 3: 1.0} now with 1 rather than 0 offset >>> wnn2 = KNN(kd, 2, ids=range(1,7)) >>> wnn2[1] {2: 1.0, 4: 1.0} >>> wnn2[2] {1: 1.0, 4: 1.0} >>> 0 in wnn2.neighbors False Notes ----- Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class:`pysal.weights.W` """ def __init__(self, data, k=2, p=2, ids=None, radius=None, distance_metric='euclidean'): if isKDTree(data): self.kdtree = data self.data = data.data else: self.data = data self.kdtree = KDTree(data, radius=radius, distance_metric=distance_metric) self.k = k self.p = p this_nnq = self.kdtree.query(self.data, k=k + 1, p=p) to_weight = this_nnq[1] if ids is None: ids = list(range(to_weight.shape[0])) neighbors = {} for i, row in enumerate(to_weight): row = row.tolist() row.remove(i) row = [ids[j] for j in row] focal = ids[i] neighbors[focal] = row W.__init__(self, neighbors, id_order=ids) @classmethod def from_shapefile(cls, filepath, **kwargs): """ Nearest neighbor weights from a shapefile. Parameters ---------- data : string shapefile containing attribute data. k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance ids : list identifiers to attach to each observation radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : KNN instance; Weights object with binary weights. Examples -------- Polygon shapefile >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> "%.4f"%wc.pct_nonzero '4.0816' >>> set([2,1]) == set(wc.neighbors[0]) True >>> wc3=pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3) >>> set(wc3.neighbors[0]) == set([2,1,3]) True >>> set(wc3.neighbors[2]) == set([4,3,0]) True 1 offset rather than 0 offset >>> wc3_1=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID") >>> set([4,3,2]) == set(wc3_1.neighbors[1]) True >>> wc3_1.weights[2] [1.0, 1.0, 1.0] >>> set([4,1,8]) == set(wc3_1.neighbors[2]) True Point shapefile >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp")) >>> w.pct_nonzero 1.1904761904761905 >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1) >>> "%.3f"%w1.pct_nonzero Notes ----- Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class:`pysal.weights.KNN` :class:`pysal.weights.W` """ return cls(get_points_array_from_shapefile(filepath), **kwargs) @classmethod def from_array(cls, array, **kwargs): """ Creates nearest neighbor weights matrix based on k nearest neighbors. Parameters ---------- array : np.ndarray (n, k) array representing n observations on k characteristics used to measure distances between the n objects **kwargs : keyword arguments, see Rook Returns ------- w : W instance Weights object with binary weights Examples -------- >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> wnn2 = pysal.KNN.from_array(points, 2) >>> [1,3] == wnn2.neighbors[0] True ids >>> wnn2 = KNN.from_array(points,2) >>> wnn2[0] {1: 1.0, 3: 1.0} >>> wnn2[1] {0: 1.0, 3: 1.0} now with 1 rather than 0 offset >>> wnn2 = KNN.from_array(points, 2, ids=range(1,7)) >>> wnn2[1] {2: 1.0, 4: 1.0} >>> wnn2[2] {1: 1.0, 4: 1.0} >>> 0 in wnn2.neighbors False Notes ----- Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class: `pysal.weights.KNN` :class:`pysal.weights.W` """ return cls(array, **kwargs) @classmethod def from_dataframe(cls, df, geom_col='geometry', ids=None, **kwargs): """ Make KNN weights from a dataframe. Parameters ---------- df : pandas.dataframe a dataframe with a geometry column that can be used to construct a W object geom_col : string column name of the geometry stored in df ids : string or iterable if string, the column name of the indices from the dataframe if iterable, a list of ids to use for the W if None, df.index is used. See Also -------- :class: `pysal.weights.KNN` :class:`pysal.weights.W` """ pts = get_points_array(df[geom_col]) if ids is None: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() return cls(pts, ids=ids, **kwargs) def reweight(self, k=None, p=None, new_data=None, new_ids=None, inplace=True): """ Redo K-Nearest Neighbor weights construction using given parameters Parameters ---------- new_data : np.ndarray an array containing additional data to use in the KNN weight new_ids : list a list aligned with new_data that provides the ids for each new observation inplace : bool a flag denoting whether to modify the KNN object in place or to return a new KNN object k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance Ignored if the KDTree is an ArcKDTree Returns ------- A copy of the object using the new parameterization, or None if the object is reweighted in place. """ if (new_data is not None): new_data = np.asarray(new_data).reshape(-1, 2) data = np.vstack((self.data, new_data)).reshape(-1, 2) if new_ids is not None: ids = copy.deepcopy(self.id_order) ids.extend(list(new_ids)) else: ids = list(range(data.shape[0])) elif (new_data is None) and (new_ids is None): # If not, we can use the same kdtree we have data = self.kdtree ids = self.id_order elif (new_data is None) and (new_ids is not None): Warn('Remapping ids must be done using w.remap_ids') if k is None: k = self.k if p is None: p = self.p if inplace: self._reset() self.__init__(data, ids=ids, k=k, p=p) else: return KNN(data, ids=ids, k=k, p=p)
def knnW(data, k=2, p=2, ids=None): """ Creates nearest neighbor weights matrix based on k nearest neighbors. Parameters ---------- kdtree : object PySAL KDTree or ArcKDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance Ignored if the KDTree is an ArcKDTree ids : list identifiers to attach to each observation Returns ------- w : W instance Weights object with binary weights Examples -------- >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> kd = pysal.cg.kdtree.KDTree(np.array(points)) >>> wnn2 = pysal.knnW(kd, 2) >>> [1,3] == wnn2.neighbors[0] True ids >>> wnn2 = knnW(kd,2) >>> wnn2[0] {1: 1.0, 3: 1.0} >>> wnn2[1] {0: 1.0, 3: 1.0} now with 1 rather than 0 offset >>> wnn2 = knnW(kd, 2, ids=range(1,7)) >>> wnn2[1] {2: 1.0, 4: 1.0} >>> wnn2[2] {1: 1.0, 4: 1.0} >>> 0 in wnn2.neighbors False Notes ----- Ties between neighbors of equal distance are arbitrarily broken. See Also -------- pysal.weights.W """ if isKDTree(data): kdt = data data = kdt.data else: kdt = KDTree(data) nnq = kdt.query(data, k=k+1, p=p) info = nnq[1] neighbors = {} for i, row in enumerate(info): row = row.tolist() if i in row: row.remove(i) focal = i if ids: row = [ ids[j] for j in row] focal = ids[i] neighbors[focal] = row return pysal.weights.W(neighbors, id_order=ids)
class Kernel(W): def __init__(self, data, bandwidth=None, fixed=True, k=2, function='triangular', eps=1.0000001, ids=None, diagonal=False, ncores=1): if issubclass(type(data), scipy.spatial.KDTree): self.kdt = data self.data = self.kdt.data data = self.data else: self.data = data self.kdt = KDTree(self.data) self.k = k + 1 self.function = function.lower() self.fixed = fixed self.eps = eps self.ncores = ncores if bandwidth: try: bandwidth = np.array(bandwidth) bandwidth.shape = (len(bandwidth), 1) except: bandwidth = np.ones((len(data), 1), 'float') * bandwidth self.bandwidth = bandwidth else: self._set_bw() self._eval_kernel() neighbors, weights = self._k_to_W(ids) if diagonal: for i in neighbors: weights[i][neighbors[i].index(i)] = 1.0 W.__init__(self, neighbors, weights, ids) def _k_to_W(self, ids=None): allneighbors = {} weights = {} if ids: ids = np.array(ids) else: ids = np.arange(len(self.data)) for i, neighbors in enumerate(self.kernel): if len(self.neigh[i]) == 0: allneighbors[ids[i]] = [] weights[ids[i]] = [] else: allneighbors[ids[i]] = list(ids[self.neigh[i]]) weights[ids[i]] = self.kernel[i].tolist() return allneighbors, weights def _set_bw(self): dmat, neigh = self.kdt.query(self.data, k=self.k) if self.fixed: # use max knn distance as bandwidth bandwidth = dmat.max() * self.eps n = len(dmat) self.bandwidth = np.ones((n, 1), 'float') * bandwidth else: # use local max knn distance self.bandwidth = dmat.max(axis=1) * self.eps self.bandwidth.shape = (self.bandwidth.size, 1) # identify knn neighbors for each point nnq = self.kdt.query(self.data, k=self.k) self.neigh = nnq[1] def _eval_kernel(self): t1 = time.time() # get points within bandwidth distance of each point kdtbq = self.kdt.query_ball_point kdtq = self.kdt.query bw = self.bandwidth if self.ncores > 1: pool = mp.Pool(processes=self.ncores, initializer=loadkd, initargs=(kdtbq,kdtq,bw)) if not hasattr(self, 'neigh'): if self.ncores > 1: neighbors = pool.map(bqwrapper,self.data, chunksize = len(self.bandwidth) / self.ncores) else: neighbors = [kdtbq(self.data[i], r=bwi[0]) for i, bwi in enumerate(self.bandwidth)] self.neigh = neighbors t2 = time.time() print "Ball Point Query took {} seconds.".format(t2 - t1) # get distances for neighbors bw = self.bandwidth #kdtq = self.kdt.query z = [] t1 = time.time() if self.ncores > 1: iterable = [(i,nids, self.data[i]) for i, nids in enumerate(self.neigh)] z = pool.map(qwrapper, iterable) else: for i, nids in enumerate(self.neigh): di, ni = kdtq(self.data[i], k=len(nids)) zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i] z.append(zi) t2 = time.time() print "Local query took: {} seconds".format(t2 - t1) zs = z # functions follow Anselin and Rey (2010) table 5.4 if self.function == 'triangular': self.kernel = [1 - zi for zi in zs] elif self.function == 'uniform': self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs] elif self.function == 'quadratic': self.kernel = [(3. / 4) * (1 - zi ** 2) for zi in zs] elif self.function == 'quartic': self.kernel = [(15. / 16) * (1 - zi ** 2) ** 2 for zi in zs] elif self.function == 'gaussian': c = np.pi * 2 c = c ** (-0.5) self.kernel = [c * np.exp(-(zi ** 2) / 2.) for zi in zs] else: print 'Unsupported kernel function', self.function
class Kernel(W): """Spatial weights based on kernel functions Parameters ---------- data : array (n,k) or KDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects bandwidth : float or array-like (optional) the bandwidth :math:`h_i` for the kernel. fixed : binary If true then :math:`h_i=h \\forall i`. If false then bandwidth is adaptive across observations. k : int the number of nearest neighbors to use for determining bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). For adaptive bandwidths, :math:`h_i=dknn_i` diagonal : boolean If true, set diagonal weights = 1.0, if false (default), diagonals weights are set to value according to kernel function. function : string {'triangular','uniform','quadratic','quartic','gaussian'} kernel function defined as follows with .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = 1/2 \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) eps : float adjustment to ensure knn distance range is closed on the knnth observations Examples -------- >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> kw=Kernel(points) >>> kw.weights[0] [1.0, 0.500000049999995, 0.4409830615267465] >>> kw.neighbors[0] [0, 1, 3] >>> kw.bandwidth array([[ 20.000002], [ 20.000002], [ 20.000002], [ 20.000002], [ 20.000002], [ 20.000002]]) >>> kw15=Kernel(points,bandwidth=15.0) >>> kw15[0] {0: 1.0, 1: 0.33333333333333337, 3: 0.2546440075000701} >>> kw15.neighbors[0] [0, 1, 3] >>> kw15.bandwidth array([[ 15.], [ 15.], [ 15.], [ 15.], [ 15.], [ 15.]]) Adaptive bandwidths user specified >>> bw=[25.0,15.0,25.0,16.0,14.5,25.0] >>> kwa=Kernel(points,bandwidth=bw) >>> kwa.weights[0] [1.0, 0.6, 0.552786404500042, 0.10557280900008403] >>> kwa.neighbors[0] [0, 1, 3, 4] >>> kwa.bandwidth array([[ 25. ], [ 15. ], [ 25. ], [ 16. ], [ 14.5], [ 25. ]]) Endogenous adaptive bandwidths >>> kwea=Kernel(points,fixed=False) >>> kwea.weights[0] [1.0, 0.10557289844279438, 9.99999900663795e-08] >>> kwea.neighbors[0] [0, 1, 3] >>> kwea.bandwidth array([[ 11.18034101], [ 11.18034101], [ 20.000002 ], [ 11.18034101], [ 14.14213704], [ 18.02775818]]) Endogenous adaptive bandwidths with Gaussian kernel >>> kweag=Kernel(points,fixed=False,function='gaussian') >>> kweag.weights[0] [0.3989422804014327, 0.2674190291577696, 0.2419707487162134] >>> kweag.bandwidth array([[ 11.18034101], [ 11.18034101], [ 20.000002 ], [ 11.18034101], [ 14.14213704], [ 18.02775818]]) Diagonals to 1.0 >>> kq = Kernel(points,function='gaussian') >>> kq.weights {0: [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 0.3989422804014327, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 0.3989422804014327, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 0.3989422804014327, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 0.3989422804014327, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 0.3989422804014327]} >>> kqd = Kernel(points, function='gaussian', diagonal=True) >>> kqd.weights {0: [1.0, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 1.0, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 1.0, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 1.0, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 1.0, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 1.0]} """ def __init__( self, data, bandwidth=None, fixed=True, k=2, function="triangular", eps=1.0000001, ids=None, diagonal=False ): if issubclass(type(data), scipy.spatial.KDTree): self.kdt = data self.data = self.kdt.data data = self.data else: self.data = data self.kdt = KDTree(self.data) self.k = k + 1 self.function = function.lower() self.fixed = fixed self.eps = eps if bandwidth: try: bandwidth = np.array(bandwidth) bandwidth.shape = (len(bandwidth), 1) except: bandwidth = np.ones((len(data), 1), "float") * bandwidth self.bandwidth = bandwidth else: self._set_bw() self._eval_kernel() neighbors, weights = self._k_to_W(ids) if diagonal: for i in neighbors: weights[i][neighbors[i].index(i)] = 1.0 W.__init__(self, neighbors, weights, ids) def _k_to_W(self, ids=None): allneighbors = {} weights = {} if ids: ids = np.array(ids) else: ids = np.arange(len(self.data)) for i, neighbors in enumerate(self.kernel): if len(self.neigh[i]) == 0: allneighbors[ids[i]] = [] weights[ids[i]] = [] else: allneighbors[ids[i]] = list(ids[self.neigh[i]]) weights[ids[i]] = self.kernel[i].tolist() return allneighbors, weights def _set_bw(self): dmat, neigh = self.kdt.query(self.data, k=self.k) if self.fixed: # use max knn distance as bandwidth bandwidth = dmat.max() * self.eps n = len(dmat) self.bandwidth = np.ones((n, 1), "float") * bandwidth else: # use local max knn distance self.bandwidth = dmat.max(axis=1) * self.eps self.bandwidth.shape = (self.bandwidth.size, 1) # identify knn neighbors for each point nnq = self.kdt.query(self.data, k=self.k) self.neigh = nnq[1] def _eval_kernel(self): # get points within bandwidth distance of each point if not hasattr(self, "neigh"): kdtq = self.kdt.query_ball_point neighbors = [kdtq(self.data[i], r=bwi[0]) for i, bwi in enumerate(self.bandwidth)] self.neigh = neighbors # get distances for neighbors bw = self.bandwidth kdtq = self.kdt.query z = [] for i, nids in enumerate(self.neigh): di, ni = kdtq(self.data[i], k=len(nids)) zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i] z.append(zi) zs = z # functions follow Anselin and Rey (2010) table 5.4 if self.function == "triangular": self.kernel = [1 - zi for zi in zs] elif self.function == "uniform": self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs] elif self.function == "quadratic": self.kernel = [(3.0 / 4) * (1 - zi ** 2) for zi in zs] elif self.function == "quartic": self.kernel = [(15.0 / 16) * (1 - zi ** 2) ** 2 for zi in zs] elif self.function == "gaussian": c = np.pi * 2 c = c ** (-0.5) self.kernel = [c * np.exp(-(zi ** 2) / 2.0) for zi in zs] else: print "Unsupported kernel function", self.function