Python KDTree.query примеры использования

Язык программирования: Python

Пространство имен/Пакет: pysal.common

Класс/Тип: KDTree

Метод/Функция: query

Примеров на hotexamples.com: 11

Python KDTree.query - 11 примеров найдено. Это лучшие примеры Python кода для pysal.common.KDTree.query, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

KDTree(6)

query(5)

sparse_distance_matrix(2)

Пример #1

Показать файл

Файл: Distance.py Проект: shepherdmeng/pysal

class Kernel(W):
    """
    Spatial weights based on kernel functions.

    Parameters
    ----------

    data        : array
                  (n,k) or KDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    bandwidth   : float
                  or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
    fixed       : binary
                  If true then :math:`h_i=h \\forall i`. If false then
                  bandwidth is adaptive across observations.
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    diagonal    : boolean
                  If true, set diagonal weights = 1.0, if false (default),
                  diagonals weights are set to value according to kernel
                  function.
    function    : {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = 1/2 \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)

    eps         : float
                  adjustment to ensure knn distance range is closed on the
                  knnth observations

    Attributes
    ----------
    weights : dict
              Dictionary keyed by id with a list of weights for each neighbor

    neighbors : dict
                of lists of neighbors keyed by observation id

    bandwidth : array
                array of bandwidths

    Examples
    --------

    >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kw=Kernel(points)
    >>> kw.weights[0]
    [1.0, 0.500000049999995, 0.4409830615267465]
    >>> kw.neighbors[0]
    [0, 1, 3]
    >>> kw.bandwidth
    array([[ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002]])
    >>> kw15=Kernel(points,bandwidth=15.0)
    >>> kw15[0]
    {0: 1.0, 1: 0.33333333333333337, 3: 0.2546440075000701}
    >>> kw15.neighbors[0]
    [0, 1, 3]
    >>> kw15.bandwidth
    array([[ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.]])

    Adaptive bandwidths user specified

    >>> bw=[25.0,15.0,25.0,16.0,14.5,25.0]
    >>> kwa=Kernel(points,bandwidth=bw)
    >>> kwa.weights[0]
    [1.0, 0.6, 0.552786404500042, 0.10557280900008403]
    >>> kwa.neighbors[0]
    [0, 1, 3, 4]
    >>> kwa.bandwidth
    array([[ 25. ],
           [ 15. ],
           [ 25. ],
           [ 16. ],
           [ 14.5],
           [ 25. ]])

    Endogenous adaptive bandwidths

    >>> kwea=Kernel(points,fixed=False)
    >>> kwea.weights[0]
    [1.0, 0.10557289844279438, 9.99999900663795e-08]
    >>> kwea.neighbors[0]
    [0, 1, 3]
    >>> kwea.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Endogenous adaptive bandwidths with Gaussian kernel

    >>> kweag=Kernel(points,fixed=False,function='gaussian')
    >>> kweag.weights[0]
    [0.3989422804014327, 0.2674190291577696, 0.2419707487162134]
    >>> kweag.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Diagonals to 1.0

    >>> kq = Kernel(points,function='gaussian')
    >>> kq.weights
    {0: [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 0.3989422804014327, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 0.3989422804014327, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 0.3989422804014327, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 0.3989422804014327, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 0.3989422804014327]}
    >>> kqd = Kernel(points, function='gaussian', diagonal=True)
    >>> kqd.weights
    {0: [1.0, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 1.0, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 1.0, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 1.0, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 1.0, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 1.0]}

    """
    def __init__(self,
                 data,
                 bandwidth=None,
                 fixed=True,
                 k=2,
                 function='triangular',
                 eps=1.0000001,
                 ids=None,
                 diagonal=False):
        if isKDTree(data):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)

    def _k_to_W(self, ids=None):
        allneighbors = {}
        weights = {}
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(len(self.data))
        for i, neighbors in enumerate(self.kernel):
            if len(self.neigh[i]) == 0:
                allneighbors[ids[i]] = []
                weights[ids[i]] = []
            else:
                allneighbors[ids[i]] = list(ids[self.neigh[i]])
                weights[ids[i]] = self.kernel[i].tolist()
        return allneighbors, weights

    def _set_bw(self):
        dmat, neigh = self.kdt.query(self.data, k=self.k)
        if self.fixed:
            # use max knn distance as bandwidth
            bandwidth = dmat.max() * self.eps
            n = len(dmat)
            self.bandwidth = np.ones((n, 1), 'float') * bandwidth
        else:
            # use local max knn distance
            self.bandwidth = dmat.max(axis=1) * self.eps
            self.bandwidth.shape = (self.bandwidth.size, 1)
            # identify knn neighbors for each point
            nnq = self.kdt.query(self.data, k=self.k)
            self.neigh = nnq[1]

    def _eval_kernel(self):
        # get points within bandwidth distance of each point
        if not hasattr(self, 'neigh'):
            kdtq = self.kdt.query_ball_point
            neighbors = [
                kdtq(self.data[i], r=bwi[0])
                for i, bwi in enumerate(self.bandwidth)
            ]
            self.neigh = neighbors
        # get distances for neighbors
        bw = self.bandwidth

        kdtq = self.kdt.query
        z = []
        for i, nids in enumerate(self.neigh):
            di, ni = kdtq(self.data[i], k=len(nids))
            if not isinstance(di, np.ndarray):
                di = np.asarray([di] * len(nids))
                ni = np.asarray([ni] * len(nids))
            zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i]
            z.append(zi)
        zs = z
        # functions follow Anselin and Rey (2010) table 5.4
        if self.function == 'triangular':
            self.kernel = [1 - zi for zi in zs]
        elif self.function == 'uniform':
            self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs]
        elif self.function == 'quadratic':
            self.kernel = [(3. / 4) * (1 - zi**2) for zi in zs]
        elif self.function == 'quartic':
            self.kernel = [(15. / 16) * (1 - zi**2)**2 for zi in zs]
        elif self.function == 'gaussian':
            c = np.pi * 2
            c = c**(-0.5)
            self.kernel = [c * np.exp(-(zi**2) / 2.) for zi in zs]
        else:
            print('Unsupported kernel function', self.function)

Пример #2

Показать файл

Файл: Distance.py Проект: shepherdmeng/pysal

def knnW(data, k=2, p=2, ids=None):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------

    kdtree      : object
                  PySAL KDTree or ArcKDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    k           : int
                  number of nearest neighbors
    p           : float
                  Minkowski p-norm distance metric parameter:
                  1<=p<=infinity
                  2: Euclidean distance
                  1: Manhattan distance
                  Ignored if the KDTree is an ArcKDTree
    ids         : list
                  identifiers to attach to each observation

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------

    >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kd = pysal.cg.kdtree.KDTree(np.array(points))
    >>> wnn2 = pysal.knnW(kd, 2)
    >>> [1,3] == wnn2.neighbors[0]
    True

    ids

    >>> wnn2 = knnW(kd,2)
    >>> wnn2[0]
    {1: 1.0, 3: 1.0}
    >>> wnn2[1]
    {0: 1.0, 3: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = knnW(kd, 2, ids=range(1,7))
    >>> wnn2[1]
    {2: 1.0, 4: 1.0}
    >>> wnn2[2]
    {1: 1.0, 4: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    pysal.weights.W

    """
    if isKDTree(data):
        kdt = data
        data = kdt.data
    else:
        kdt = KDTree(data)
    nnq = kdt.query(data, k=k + 1, p=p)
    info = nnq[1]

    neighbors = {}
    for i, row in enumerate(info):
        row = row.tolist()
        if i in row:
            row.remove(i)
            focal = i
        if ids:
            row = [ids[j] for j in row]
            focal = ids[i]
        neighbors[focal] = row
    return pysal.weights.W(neighbors, id_order=ids)

Пример #3

Показать файл

Файл: Distance.py Проект: lanselin/pysal

class Kernel(W):
    """
    Spatial weights based on kernel functions.

    Parameters
    ----------

    data        : array
                  (n,k) or KDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    bandwidth   : float
                  or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
    fixed       : binary
                  If true then :math:`h_i=h \\forall i`. If false then
                  bandwidth is adaptive across observations.
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    diagonal    : boolean
                  If true, set diagonal weights = 1.0, if false (default),
                  diagonals weights are set to value according to kernel
                  function.
    function    : {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = 1/2 \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)

    eps         : float
                  adjustment to ensure knn distance range is closed on the
                  knnth observations

    Attributes
    ----------
    weights : dict
              Dictionary keyed by id with a list of weights for each neighbor

    neighbors : dict
                of lists of neighbors keyed by observation id

    bandwidth : array
                array of bandwidths

    Examples
    --------

    >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kw=Kernel(points)
    >>> kw.weights[0]
    [1.0, 0.500000049999995, 0.4409830615267465]
    >>> kw.neighbors[0]
    [0, 1, 3]
    >>> kw.bandwidth
    array([[ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002]])
    >>> kw15=Kernel(points,bandwidth=15.0)
    >>> kw15[0]
    {0: 1.0, 1: 0.33333333333333337, 3: 0.2546440075000701}
    >>> kw15.neighbors[0]
    [0, 1, 3]
    >>> kw15.bandwidth
    array([[ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.]])

    Adaptive bandwidths user specified

    >>> bw=[25.0,15.0,25.0,16.0,14.5,25.0]
    >>> kwa=Kernel(points,bandwidth=bw)
    >>> kwa.weights[0]
    [1.0, 0.6, 0.552786404500042, 0.10557280900008403]
    >>> kwa.neighbors[0]
    [0, 1, 3, 4]
    >>> kwa.bandwidth
    array([[ 25. ],
           [ 15. ],
           [ 25. ],
           [ 16. ],
           [ 14.5],
           [ 25. ]])

    Endogenous adaptive bandwidths

    >>> kwea=Kernel(points,fixed=False)
    >>> kwea.weights[0]
    [1.0, 0.10557289844279438, 9.99999900663795e-08]
    >>> kwea.neighbors[0]
    [0, 1, 3]
    >>> kwea.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Endogenous adaptive bandwidths with Gaussian kernel

    >>> kweag=Kernel(points,fixed=False,function='gaussian')
    >>> kweag.weights[0]
    [0.3989422804014327, 0.2674190291577696, 0.2419707487162134]
    >>> kweag.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Diagonals to 1.0

    >>> kq = Kernel(points,function='gaussian')
    >>> kq.weights
    {0: [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 0.3989422804014327, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 0.3989422804014327, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 0.3989422804014327, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 0.3989422804014327, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 0.3989422804014327]}
    >>> kqd = Kernel(points, function='gaussian', diagonal=True)
    >>> kqd.weights
    {0: [1.0, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 1.0, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 1.0, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 1.0, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 1.0, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 1.0]}

    """
    def __init__(self, data, bandwidth=None, fixed=True, k=2,
                 function='triangular', eps=1.0000001, ids=None,
                 diagonal=False):
        if isKDTree(data):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)
    
    @classmethod
    def from_shapefile(cls, filepath, idVariable=None,  **kwargs):
        """
        Kernel based weights from shapefile

        Arguments
        ---------
        shapefile   : string
                      shapefile name with shp suffix
        idVariable  : string
                      name of column in shapefile's DBF to use for ids

        Returns
        --------
        Kernel Weights Object

        See Also
        ---------
        :class:`pysal.weights.Kernel`
        :class:`pysal.weights.W`
        """
        points = get_points_array_from_shapefile(filepath)
        if idVariable is not None:
            ids = get_ids(filepath, idVariable)
        else:
            ids = None
        return cls.from_array(points, ids=ids, **kwargs)
    
    @classmethod
    def from_array(cls, array, **kwargs):
        """
        Construct a Kernel weights from an array. Supports all the same options
        as :class:`pysal.weights.Kernel`

        See Also
        --------
        :class:`pysal.weights.Kernel`
        :class:`pysal.weights.W`
        """
        return cls(array, **kwargs)

    @classmethod
    def from_dataframe(cls, df, geom_col='geometry', ids=None, **kwargs):
        """
        Make Kernel weights from a dataframe.

        Parameters
        ----------
        df      :   pandas.dataframe
                    a dataframe with a geometry column that can be used to
                    construct a W object
        geom_col :   string
                    column name of the geometry stored in df
        ids     :   string or iterable
                    if string, the column name of the indices from the dataframe
                    if iterable, a list of ids to use for the W
                    if None, df.index is used.

        See Also
        --------
        :class:`pysal.weights.Kernel`
        :class:`pysal.weights.W`
        """
        pts = get_points_array(df[geom_col])
        if ids is None:
            ids = df.index.tolist()
        elif isinstance(ids, str):
            ids = df[ids].tolist()
        return cls(pts, ids=ids, **kwargs)

    def _k_to_W(self, ids=None):
        allneighbors = {}
        weights = {}
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(len(self.data))
        for i, neighbors in enumerate(self.kernel):
            if len(self.neigh[i]) == 0:
                allneighbors[ids[i]] = []
                weights[ids[i]] = []
            else:
                allneighbors[ids[i]] = list(ids[self.neigh[i]])
                weights[ids[i]] = self.kernel[i].tolist()
        return allneighbors, weights

    def _set_bw(self):
        dmat, neigh = self.kdt.query(self.data, k=self.k)
        if self.fixed:
            # use max knn distance as bandwidth
            bandwidth = dmat.max() * self.eps
            n = len(dmat)
            self.bandwidth = np.ones((n, 1), 'float') * bandwidth
        else:
            # use local max knn distance
            self.bandwidth = dmat.max(axis=1) * self.eps
            self.bandwidth.shape = (self.bandwidth.size, 1)
            # identify knn neighbors for each point
            nnq = self.kdt.query(self.data, k=self.k)
            self.neigh = nnq[1]

    def _eval_kernel(self):
        # get points within bandwidth distance of each point
        if not hasattr(self, 'neigh'):
            kdtq = self.kdt.query_ball_point
            neighbors = [kdtq(self.data[i], r=bwi[0]) for i,
                         bwi in enumerate(self.bandwidth)]
            self.neigh = neighbors
        # get distances for neighbors
        bw = self.bandwidth

        kdtq = self.kdt.query
        z = []
        for i, nids in enumerate(self.neigh):
            di, ni = kdtq(self.data[i], k=len(nids))
            if not isinstance(di, np.ndarray):
                di = np.asarray([di] * len(nids))
                ni = np.asarray([ni] * len(nids))
            zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i]
            z.append(zi)
        zs = z
        # functions follow Anselin and Rey (2010) table 5.4
        if self.function == 'triangular':
            self.kernel = [1 - zi for zi in zs]
        elif self.function == 'uniform':
            self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs]
        elif self.function == 'quadratic':
            self.kernel = [(3. / 4) * (1 - zi ** 2) for zi in zs]
        elif self.function == 'quartic':
            self.kernel = [(15. / 16) * (1 - zi ** 2) ** 2 for zi in zs]
        elif self.function == 'gaussian':
            c = np.pi * 2
            c = c ** (-0.5)
            self.kernel = [c * np.exp(-(zi ** 2) / 2.) for zi in zs]
        else:
            print('Unsupported kernel function', self.function)

Пример #4

Показать файл

Файл: kerneltest.py Проект: pysal/pPysal

class Kernel(W):
    def __init__(self,
                 data,
                 bandwidth=None,
                 fixed=True,
                 k=2,
                 function='triangular',
                 eps=1.0000001,
                 ids=None,
                 diagonal=False,
                 ncores=1):
        if issubclass(type(data), scipy.spatial.KDTree):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        self.ncores = ncores

        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)

    def _k_to_W(self, ids=None):
        allneighbors = {}
        weights = {}
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(len(self.data))
        for i, neighbors in enumerate(self.kernel):
            if len(self.neigh[i]) == 0:
                allneighbors[ids[i]] = []
                weights[ids[i]] = []
            else:
                allneighbors[ids[i]] = list(ids[self.neigh[i]])
                weights[ids[i]] = self.kernel[i].tolist()
        return allneighbors, weights

    def _set_bw(self):
        dmat, neigh = self.kdt.query(self.data, k=self.k)
        if self.fixed:
            # use max knn distance as bandwidth
            bandwidth = dmat.max() * self.eps
            n = len(dmat)
            self.bandwidth = np.ones((n, 1), 'float') * bandwidth
        else:
            # use local max knn distance
            self.bandwidth = dmat.max(axis=1) * self.eps
            self.bandwidth.shape = (self.bandwidth.size, 1)
            # identify knn neighbors for each point
            nnq = self.kdt.query(self.data, k=self.k)
            self.neigh = nnq[1]

    def _eval_kernel(self):
        t1 = time.time()
        # get points within bandwidth distance of each point
        kdtbq = self.kdt.query_ball_point
        kdtq = self.kdt.query
        bw = self.bandwidth
        if self.ncores > 1:
            pool = mp.Pool(processes=self.ncores,
                           initializer=loadkd,
                           initargs=(kdtbq, kdtq, bw))
        if not hasattr(self, 'neigh'):
            if self.ncores > 1:
                neighbors = pool.map(bqwrapper,
                                     self.data,
                                     chunksize=len(self.bandwidth) /
                                     self.ncores)
            else:
                neighbors = [
                    kdtbq(self.data[i], r=bwi[0])
                    for i, bwi in enumerate(self.bandwidth)
                ]
            self.neigh = neighbors
        t2 = time.time()
        print "Ball Point Query took {} seconds.".format(t2 - t1)
        # get distances for neighbors
        bw = self.bandwidth

        #kdtq = self.kdt.query
        z = []
        t1 = time.time()
        if self.ncores > 1:
            iterable = [(i, nids, self.data[i])
                        for i, nids in enumerate(self.neigh)]
            z = pool.map(qwrapper, iterable)
        else:
            for i, nids in enumerate(self.neigh):
                di, ni = kdtq(self.data[i], k=len(nids))
                zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i]
                z.append(zi)
        t2 = time.time()
        print "Local query took: {} seconds".format(t2 - t1)
        zs = z
        # functions follow Anselin and Rey (2010) table 5.4
        if self.function == 'triangular':
            self.kernel = [1 - zi for zi in zs]
        elif self.function == 'uniform':
            self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs]
        elif self.function == 'quadratic':
            self.kernel = [(3. / 4) * (1 - zi**2) for zi in zs]
        elif self.function == 'quartic':
            self.kernel = [(15. / 16) * (1 - zi**2)**2 for zi in zs]
        elif self.function == 'gaussian':
            c = np.pi * 2
            c = c**(-0.5)
            self.kernel = [c * np.exp(-(zi**2) / 2.) for zi in zs]
        else:
            print 'Unsupported kernel function', self.function

Пример #5

Показать файл

Файл: Distance.py Проект: cheneason/pysal

def knnW(data, k=2, p=2, ids=None, pct_unique=0.25):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------

    data       : array (n,k) or KDTree where KDtree.data is array (n,k)
                 n observations on k characteristics used to measure
                 distances between the n objects
    k          : int
                 number of nearest neighbors
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    ids        : list
                 identifiers to attach to each observation
    pct_unique : float
                 threshold percentage of unique points in data. Below this
                 threshold tree is built on unique values only

    Returns
    -------

    w         : W instance
                Weights object with binary weights

    Examples
    --------

    >>> x,y=np.indices((5,5))
    >>> x.shape=(25,1)
    >>> y.shape=(25,1)
    >>> data=np.hstack([x,y])
    >>> wnn2=knnW(data,k=2)
    >>> wnn4=knnW(data,k=4)
    >>> set([1,5,6,2]) == set(wnn4.neighbors[0])
    True
    >>> set([0,6,10,1]) == set(wnn4.neighbors[5])
    True
    >>> set([1,5]) == set(wnn2.neighbors[0])
    True
    >>> set([0,6]) == set(wnn2.neighbors[5])
    True
    >>> "%.2f"%wnn2.pct_nonzero
    '0.08'
    >>> wnn4.pct_nonzero
    0.16
    >>> wnn3e=knnW(data,p=2,k=3)
    >>> set([1,5,6]) == set(wnn3e.neighbors[0])
    True
    >>> wnn3m=knnW(data,p=1,k=3)
    >>> a = set([1,5,2])
    >>> b = set([1,5,6])
    >>> c = set([1,5,10])
    >>> w0n = set(wnn3m.neighbors[0])
    >>> a==w0n or b==w0n or c==w0n
    True

    ids

    >>> wnn2 = knnW(data,2)
    >>> wnn2[0]
    {1: 1.0, 5: 1.0}
    >>> wnn2[1]
    {0: 1.0, 2: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = knnW(data,2, ids = range(1,26))
    >>> wnn2[1]
    {2: 1.0, 6: 1.0}
    >>> wnn2[2]
    {1: 1.0, 3: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    pysal.weights.W

    """

    if issubclass(type(data), scipy.spatial.KDTree):
        kd = data
        data = kd.data
        nnq = kd.query(data, k=k+1, p=p)
        info = nnq[1]
    elif type(data).__name__ == 'ndarray':
        # check if unique points are a small fraction of all points
        ind =  np.lexsort(data.T)
        u = data[np.concatenate(([True],np.any(data[ind[1:]]!=data[ind[:-1]],axis=1)))]
        pct_u = len(u)*1. / len(data)
        if pct_u < pct_unique:
            tree = KDTree(u)
            nnq = tree.query(data, k=k+1, p=p)
            info = nnq[1]
            uid = [np.where((data == ui).all(axis=1))[0][0] for ui in u]
            new_info = np.zeros((len(data), k + 1), 'int')
            for i, row in enumerate(info):
                new_info[i] = [uid[j] for j in row]
            info = new_info
        else:
            kd = KDTree(data)
            # calculate
            nnq = kd.query(data, k=k + 1, p=p)
            info = nnq[1]
    else:
        print 'Unsupported type'
        return None

    neighbors = {}
    for i, row in enumerate(info):
        row = row.tolist()
        if i in row:
            row.remove(i)
            focal = i
        if ids:
            row = [ ids[j] for j in row]
            focal = ids[i]
        neighbors[focal] = row
    return pysal.weights.W(neighbors,  id_order=ids)

Пример #6

Показать файл

Файл: Distance.py Проект: lanselin/pysal

class KNN(W):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------
    kdtree      : object
                  PySAL KDTree or ArcKDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    k           : int
                  number of nearest neighbors
    p           : float
                  Minkowski p-norm distance metric parameter:
                  1<=p<=infinity
                  2: Euclidean distance
                  1: Manhattan distance
                  Ignored if the KDTree is an ArcKDTree
    ids         : list
                  identifiers to attach to each observation

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------
    >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kd = pysal.cg.kdtree.KDTree(np.array(points))
    >>> wnn2 = pysal.KNN(kd, 2)
    >>> [1,3] == wnn2.neighbors[0]
    True

    ids

    >>> wnn2 = KNN(kd,2)
    >>> wnn2[0]
    {1: 1.0, 3: 1.0}
    >>> wnn2[1]
    {0: 1.0, 3: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = KNN(kd, 2, ids=range(1,7))
    >>> wnn2[1]
    {2: 1.0, 4: 1.0}
    >>> wnn2[2]
    {1: 1.0, 4: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    :class:`pysal.weights.W`
    """
    def __init__(self, data, k=2, p=2, ids=None, radius=None, distance_metric='euclidean'):
        if isKDTree(data):
            self.kdtree = data
            self.data = data.data
        else:
            self.data = data
            self.kdtree = KDTree(data, radius=radius, distance_metric=distance_metric)
        self.k = k 
        self.p = p
        this_nnq = self.kdtree.query(self.data, k=k+1, p=p)
        
        to_weight = this_nnq[1]
        if ids is None:
            ids = list(range(to_weight.shape[0]))
        
        neighbors = {}
        for i,row in enumerate(to_weight):
            row = row.tolist()
            row.remove(i)
            row = [ids[j] for j in row]
            focal = ids[i]
            neighbors[focal] = row
        W.__init__(self, neighbors, id_order=ids)
    
    @classmethod
    def from_shapefile(cls, filepath, **kwargs):
        """
        Nearest neighbor weights from a shapefile.

        Parameters
        ----------

        data       : string
                     shapefile containing attribute data.
        k          : int
                     number of nearest neighbors
        p          : float
                     Minkowski p-norm distance metric parameter:
                     1<=p<=infinity
                     2: Euclidean distance
                     1: Manhattan distance
        ids        : list
                     identifiers to attach to each observation
        radius     : float
                     If supplied arc_distances will be calculated
                     based on the given radius. p will be ignored.

        Returns
        -------

        w         : KNN
                    instance; Weights object with binary weights.

        Examples
        --------

        Polygon shapefile

        >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"))
        >>> "%.4f"%wc.pct_nonzero
        '4.0816'
        >>> set([2,1]) == set(wc.neighbors[0])
        True
        >>> wc3=pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3)
        >>> set(wc3.neighbors[0]) == set([2,1,3])
        True
        >>> set(wc3.neighbors[2]) == set([4,3,0])
        True

        1 offset rather than 0 offset

        >>> wc3_1=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID")
        >>> set([4,3,2]) == set(wc3_1.neighbors[1])
        True
        >>> wc3_1.weights[2]
        [1.0, 1.0, 1.0]
        >>> set([4,1,8]) == set(wc3_1.neighbors[2])
        True


        Point shapefile

        >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"))
        >>> w.pct_nonzero
        1.1904761904761905
        >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1)
        >>> "%.3f"%w1.pct_nonzero

        Notes
        -----

        Ties between neighbors of equal distance are arbitrarily broken.

        See Also
        --------
        :class:`pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        return cls(get_points_array_from_shapefile(filepath), **kwargs)
    
    @classmethod
    def from_array(cls, array, **kwargs):
        """
        Creates nearest neighbor weights matrix based on k nearest
        neighbors.

        Parameters
        ----------
        array       : np.ndarray
                      (n, k) array representing n observations on 
                      k characteristics used to measure distances 
                      between the n objects
        **kwargs    : keyword arguments, see Rook

        Returns
        -------
        w         : W
                    instance
                    Weights object with binary weights

        Examples
        --------
        >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
        >>> wnn2 = pysal.KNN.from_array(points, 2)
        >>> [1,3] == wnn2.neighbors[0]
        True

        ids

        >>> wnn2 = KNN.from_array(points,2)
        >>> wnn2[0]
        {1: 1.0, 3: 1.0}
        >>> wnn2[1]
        {0: 1.0, 3: 1.0}

        now with 1 rather than 0 offset

        >>> wnn2 = KNN.from_array(points, 2, ids=range(1,7))
        >>> wnn2[1]
        {2: 1.0, 4: 1.0}
        >>> wnn2[2]
        {1: 1.0, 4: 1.0}
        >>> 0 in wnn2.neighbors
        False

        Notes
        -----

        Ties between neighbors of equal distance are arbitrarily broken.

        See Also
        --------
        :class: `pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        return cls(array, **kwargs)

    @classmethod
    def from_dataframe(cls, df, geom_col='geometry', ids=None, **kwargs):
        """
        Make KNN weights from a dataframe.

        Parameters
        ----------
        df      :   pandas.dataframe
                    a dataframe with a geometry column that can be used to
                    construct a W object
        geom_col :   string
                    column name of the geometry stored in df
        ids     :   string or iterable
                    if string, the column name of the indices from the dataframe
                    if iterable, a list of ids to use for the W
                    if None, df.index is used.

        See Also
        --------
        :class: `pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        pts = get_points_array(df[geom_col])
        if ids is None:
            ids = df.index.tolist()
        elif isinstance(ids, str):
            ids = df[ids].tolist()
        return cls(pts, ids=ids, **kwargs)

    def reweight(self, k=None, p=None, new_data=None, new_ids=None, inplace=True):
        """
        Redo K-Nearest Neighbor weights construction using given parameters

        Parameters
        ----------
        new_data    : np.ndarray
                      an array containing additional data to use in the KNN
                      weight
        new_ids     : list
                      a list aligned with new_data that provides the ids for
                      each new observation
        inplace     : bool
                      a flag denoting whether to modify the KNN object 
                      in place or to return a new KNN object
        k           : int
                      number of nearest neighbors
        p           : float
                      Minkowski p-norm distance metric parameter:
                      1<=p<=infinity
                      2: Euclidean distance
                      1: Manhattan distance
                      Ignored if the KDTree is an ArcKDTree

        Returns
        -------
        A copy of the object using the new parameterization, or None if the
        object is reweighted in place.
        """
        if (new_data is not None):
            new_data = np.asarray(new_data).reshape(-1,2)
            data = np.vstack((self.data, new_data)).reshape(-1,2)
            if new_ids is not None:
                ids = copy.deepcopy(self.id_order)
                ids.extend(list(new_ids))
            else:
                ids = list(range(data.shape[0]))
        elif (new_data is None) and (new_ids is None):
            # If not, we can use the same kdtree we have
            data = self.kdtree
            ids = self.id_order
        elif (new_data is None) and (new_ids is not None):
            Warn('Remapping ids must be done using w.remap_ids')
        if k is None:
            k = self.k
        if p is None:
            p = self.p
        if inplace:
            self._reset()
            self.__init__(data, ids=ids, k=k, p=p)
        else:
            return KNN(data, ids=ids, k=k, p=p)

Пример #7

Показать файл

Файл: Distance.py Проект: jkang1643/GIS

class KNN(W):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------
    kdtree      : object
                  PySAL KDTree or ArcKDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    k           : int
                  number of nearest neighbors
    p           : float
                  Minkowski p-norm distance metric parameter:
                  1<=p<=infinity
                  2: Euclidean distance
                  1: Manhattan distance
                  Ignored if the KDTree is an ArcKDTree
    ids         : list
                  identifiers to attach to each observation

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------
    >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kd = pysal.cg.kdtree.KDTree(np.array(points))
    >>> wnn2 = pysal.KNN(kd, 2)
    >>> [1,3] == wnn2.neighbors[0]
    True

    ids

    >>> wnn2 = KNN(kd,2)
    >>> wnn2[0]
    {1: 1.0, 3: 1.0}
    >>> wnn2[1]
    {0: 1.0, 3: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = KNN(kd, 2, ids=range(1,7))
    >>> wnn2[1]
    {2: 1.0, 4: 1.0}
    >>> wnn2[2]
    {1: 1.0, 4: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    :class:`pysal.weights.W`
    """
    def __init__(self,
                 data,
                 k=2,
                 p=2,
                 ids=None,
                 radius=None,
                 distance_metric='euclidean'):
        if isKDTree(data):
            self.kdtree = data
            self.data = data.data
        else:
            self.data = data
            self.kdtree = KDTree(data,
                                 radius=radius,
                                 distance_metric=distance_metric)
        self.k = k
        self.p = p
        this_nnq = self.kdtree.query(self.data, k=k + 1, p=p)

        to_weight = this_nnq[1]
        if ids is None:
            ids = list(range(to_weight.shape[0]))

        neighbors = {}
        for i, row in enumerate(to_weight):
            row = row.tolist()
            row.remove(i)
            row = [ids[j] for j in row]
            focal = ids[i]
            neighbors[focal] = row
        W.__init__(self, neighbors, id_order=ids)

    @classmethod
    def from_shapefile(cls, filepath, **kwargs):
        """
        Nearest neighbor weights from a shapefile.

        Parameters
        ----------

        data       : string
                     shapefile containing attribute data.
        k          : int
                     number of nearest neighbors
        p          : float
                     Minkowski p-norm distance metric parameter:
                     1<=p<=infinity
                     2: Euclidean distance
                     1: Manhattan distance
        ids        : list
                     identifiers to attach to each observation
        radius     : float
                     If supplied arc_distances will be calculated
                     based on the given radius. p will be ignored.

        Returns
        -------

        w         : KNN
                    instance; Weights object with binary weights.

        Examples
        --------

        Polygon shapefile

        >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"))
        >>> "%.4f"%wc.pct_nonzero
        '4.0816'
        >>> set([2,1]) == set(wc.neighbors[0])
        True
        >>> wc3=pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3)
        >>> set(wc3.neighbors[0]) == set([2,1,3])
        True
        >>> set(wc3.neighbors[2]) == set([4,3,0])
        True

        1 offset rather than 0 offset

        >>> wc3_1=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID")
        >>> set([4,3,2]) == set(wc3_1.neighbors[1])
        True
        >>> wc3_1.weights[2]
        [1.0, 1.0, 1.0]
        >>> set([4,1,8]) == set(wc3_1.neighbors[2])
        True


        Point shapefile

        >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"))
        >>> w.pct_nonzero
        1.1904761904761905
        >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1)
        >>> "%.3f"%w1.pct_nonzero

        Notes
        -----

        Ties between neighbors of equal distance are arbitrarily broken.

        See Also
        --------
        :class:`pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        return cls(get_points_array_from_shapefile(filepath), **kwargs)

    @classmethod
    def from_array(cls, array, **kwargs):
        """
        Creates nearest neighbor weights matrix based on k nearest
        neighbors.

        Parameters
        ----------
        array       : np.ndarray
                      (n, k) array representing n observations on 
                      k characteristics used to measure distances 
                      between the n objects
        **kwargs    : keyword arguments, see Rook

        Returns
        -------
        w         : W
                    instance
                    Weights object with binary weights

        Examples
        --------
        >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
        >>> wnn2 = pysal.KNN.from_array(points, 2)
        >>> [1,3] == wnn2.neighbors[0]
        True

        ids

        >>> wnn2 = KNN.from_array(points,2)
        >>> wnn2[0]
        {1: 1.0, 3: 1.0}
        >>> wnn2[1]
        {0: 1.0, 3: 1.0}

        now with 1 rather than 0 offset

        >>> wnn2 = KNN.from_array(points, 2, ids=range(1,7))
        >>> wnn2[1]
        {2: 1.0, 4: 1.0}
        >>> wnn2[2]
        {1: 1.0, 4: 1.0}
        >>> 0 in wnn2.neighbors
        False

        Notes
        -----

        Ties between neighbors of equal distance are arbitrarily broken.

        See Also
        --------
        :class: `pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        return cls(array, **kwargs)

    @classmethod
    def from_dataframe(cls, df, geom_col='geometry', ids=None, **kwargs):
        """
        Make KNN weights from a dataframe.

        Parameters
        ----------
        df      :   pandas.dataframe
                    a dataframe with a geometry column that can be used to
                    construct a W object
        geom_col :   string
                    column name of the geometry stored in df
        ids     :   string or iterable
                    if string, the column name of the indices from the dataframe
                    if iterable, a list of ids to use for the W
                    if None, df.index is used.

        See Also
        --------
        :class: `pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        pts = get_points_array(df[geom_col])
        if ids is None:
            ids = df.index.tolist()
        elif isinstance(ids, str):
            ids = df[ids].tolist()
        return cls(pts, ids=ids, **kwargs)

    def reweight(self,
                 k=None,
                 p=None,
                 new_data=None,
                 new_ids=None,
                 inplace=True):
        """
        Redo K-Nearest Neighbor weights construction using given parameters

        Parameters
        ----------
        new_data    : np.ndarray
                      an array containing additional data to use in the KNN
                      weight
        new_ids     : list
                      a list aligned with new_data that provides the ids for
                      each new observation
        inplace     : bool
                      a flag denoting whether to modify the KNN object 
                      in place or to return a new KNN object
        k           : int
                      number of nearest neighbors
        p           : float
                      Minkowski p-norm distance metric parameter:
                      1<=p<=infinity
                      2: Euclidean distance
                      1: Manhattan distance
                      Ignored if the KDTree is an ArcKDTree

        Returns
        -------
        A copy of the object using the new parameterization, or None if the
        object is reweighted in place.
        """
        if (new_data is not None):
            new_data = np.asarray(new_data).reshape(-1, 2)
            data = np.vstack((self.data, new_data)).reshape(-1, 2)
            if new_ids is not None:
                ids = copy.deepcopy(self.id_order)
                ids.extend(list(new_ids))
            else:
                ids = list(range(data.shape[0]))
        elif (new_data is None) and (new_ids is None):
            # If not, we can use the same kdtree we have
            data = self.kdtree
            ids = self.id_order
        elif (new_data is None) and (new_ids is not None):
            Warn('Remapping ids must be done using w.remap_ids')
        if k is None:
            k = self.k
        if p is None:
            p = self.p
        if inplace:
            self._reset()
            self.__init__(data, ids=ids, k=k, p=p)
        else:
            return KNN(data, ids=ids, k=k, p=p)

Пример #8

Показать файл

Файл: Distance.py Проект: denadai2/pysal

def knnW(data, k=2, p=2, ids=None):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------

    kdtree      : object
                  PySAL KDTree or ArcKDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    k           : int
                  number of nearest neighbors
    p           : float
                  Minkowski p-norm distance metric parameter:
                  1<=p<=infinity
                  2: Euclidean distance
                  1: Manhattan distance
                  Ignored if the KDTree is an ArcKDTree
    ids         : list
                  identifiers to attach to each observation

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------

    >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kd = pysal.cg.kdtree.KDTree(np.array(points))
    >>> wnn2 = pysal.knnW(kd, 2)
    >>> [1,3] == wnn2.neighbors[0]
    True

    ids

    >>> wnn2 = knnW(kd,2)
    >>> wnn2[0]
    {1: 1.0, 3: 1.0}
    >>> wnn2[1]
    {0: 1.0, 3: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = knnW(kd, 2, ids=range(1,7))
    >>> wnn2[1]
    {2: 1.0, 4: 1.0}
    >>> wnn2[2]
    {1: 1.0, 4: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    pysal.weights.W

    """
    if isKDTree(data):
        kdt = data
        data = kdt.data
    else:
        kdt = KDTree(data)
    nnq = kdt.query(data, k=k+1, p=p)
    info = nnq[1]

    neighbors = {}
    for i, row in enumerate(info):
        row = row.tolist()
        if i in row:
            row.remove(i)
            focal = i
        if ids:
            row = [ ids[j] for j in row]
            focal = ids[i]
        neighbors[focal] = row
    return pysal.weights.W(neighbors,  id_order=ids)

Пример #9

Показать файл

Файл: kerneltest.py Проект: giserh/cybergis-toolkit

class Kernel(W):
    def __init__(self, data, bandwidth=None, fixed=True, k=2,
                 function='triangular', eps=1.0000001, ids=None,
                 diagonal=False, ncores=1):
        if issubclass(type(data), scipy.spatial.KDTree):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        self.ncores = ncores

        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)

    def _k_to_W(self, ids=None):
        allneighbors = {}
        weights = {}
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(len(self.data))
        for i, neighbors in enumerate(self.kernel):
            if len(self.neigh[i]) == 0:
                allneighbors[ids[i]] = []
                weights[ids[i]] = []
            else:
                allneighbors[ids[i]] = list(ids[self.neigh[i]])
                weights[ids[i]] = self.kernel[i].tolist()
        return allneighbors, weights

    def _set_bw(self):
        dmat, neigh = self.kdt.query(self.data, k=self.k)
        if self.fixed:
            # use max knn distance as bandwidth
            bandwidth = dmat.max() * self.eps
            n = len(dmat)
            self.bandwidth = np.ones((n, 1), 'float') * bandwidth
        else:
            # use local max knn distance
            self.bandwidth = dmat.max(axis=1) * self.eps
            self.bandwidth.shape = (self.bandwidth.size, 1)
            # identify knn neighbors for each point
            nnq = self.kdt.query(self.data, k=self.k)
            self.neigh = nnq[1]

    def _eval_kernel(self):
        t1 = time.time()
        # get points within bandwidth distance of each point
        kdtbq = self.kdt.query_ball_point
        kdtq = self.kdt.query
        bw = self.bandwidth
        if self.ncores > 1:
            pool = mp.Pool(processes=self.ncores, initializer=loadkd, initargs=(kdtbq,kdtq,bw))
        if not hasattr(self, 'neigh'):
            if self.ncores > 1:
                neighbors = pool.map(bqwrapper,self.data, chunksize = len(self.bandwidth) / self.ncores)
            else:
                neighbors = [kdtbq(self.data[i], r=bwi[0]) for i,
                            bwi in enumerate(self.bandwidth)]
            self.neigh = neighbors
        t2 = time.time()
        print "Ball Point Query took {} seconds.".format(t2 - t1)
        # get distances for neighbors
        bw = self.bandwidth

        #kdtq = self.kdt.query
        z = []
        t1 = time.time()
        if self.ncores > 1:
            iterable = [(i,nids, self.data[i]) for i, nids in enumerate(self.neigh)]
            z = pool.map(qwrapper, iterable)
        else:
            for i, nids in enumerate(self.neigh):
                di, ni = kdtq(self.data[i], k=len(nids))
                zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i]
                z.append(zi)
        t2 = time.time()
        print "Local query took: {} seconds".format(t2 - t1)
        zs = z
        # functions follow Anselin and Rey (2010) table 5.4
        if self.function == 'triangular':
            self.kernel = [1 - zi for zi in zs]
        elif self.function == 'uniform':
            self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs]
        elif self.function == 'quadratic':
            self.kernel = [(3. / 4) * (1 - zi ** 2) for zi in zs]
        elif self.function == 'quartic':
            self.kernel = [(15. / 16) * (1 - zi ** 2) ** 2 for zi in zs]
        elif self.function == 'gaussian':
            c = np.pi * 2
            c = c ** (-0.5)
            self.kernel = [c * np.exp(-(zi ** 2) / 2.) for zi in zs]
        else:
            print 'Unsupported kernel function', self.function

Пример #10

Показать файл

def knnW(data, k=2, p=2, ids=None, pct_unique=0.25):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------

    data       : array (n,k) or KDTree where KDtree.data is array (n,k)
                 n observations on k characteristics used to measure
                 distances between the n objects
    k          : int
                 number of nearest neighbors
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    ids        : list
                 identifiers to attach to each observation
    pct_unique : float
                 threshold percentage of unique points in data. Below this
                 threshold tree is built on unique values only

    Returns
    -------

    w         : W instance
                Weights object with binary weights

    Examples
    --------

    >>> x,y=np.indices((5,5))
    >>> x.shape=(25,1)
    >>> y.shape=(25,1)
    >>> data=np.hstack([x,y])
    >>> wnn2=knnW(data,k=2)
    >>> wnn4=knnW(data,k=4)
    >>> set([1,5,6,2]) == set(wnn4.neighbors[0])
    True
    >>> set([0,6,10,1]) == set(wnn4.neighbors[5])
    True
    >>> set([1,5]) == set(wnn2.neighbors[0])
    True
    >>> set([0,6]) == set(wnn2.neighbors[5])
    True
    >>> "%.2f"%wnn2.pct_nonzero
    '0.08'
    >>> wnn4.pct_nonzero
    0.16
    >>> wnn3e=knnW(data,p=2,k=3)
    >>> set([1,5,6]) == set(wnn3e.neighbors[0])
    True
    >>> wnn3m=knnW(data,p=1,k=3)
    >>> a = set([1,5,2])
    >>> b = set([1,5,6])
    >>> c = set([1,5,10])
    >>> w0n = set(wnn3m.neighbors[0])
    >>> a==w0n or b==w0n or c==w0n
    True

    ids

    >>> wnn2 = knnW(data,2)
    >>> wnn2[0]
    {1: 1.0, 5: 1.0}
    >>> wnn2[1]
    {0: 1.0, 2: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = knnW(data,2, ids = range(1,26))
    >>> wnn2[1]
    {2: 1.0, 6: 1.0}
    >>> wnn2[2]
    {1: 1.0, 3: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    pysal.weights.W

    """

    if issubclass(type(data), scipy.spatial.KDTree):
        kd = data
        data = kd.data
        nnq = kd.query(data, k=k+1, p=p)
        info = nnq[1]
    elif type(data).__name__ == 'ndarray':
        # check if unique points are a small fraction of all points
        ind =  np.lexsort(data.T)
        u = data[np.concatenate(([True],np.any(data[ind[1:]]!=data[ind[:-1]],axis=1)))]
        pct_u = len(u)*1. / len(data)
        if pct_u < pct_unique:
            tree = KDTree(u)
            nnq = tree.query(data, k=k+1, p=p)
            info = nnq[1]
            uid = [np.where((data == ui).all(axis=1))[0][0] for ui in u]
            new_info = np.zeros((len(data), k + 1), 'int')
            for i, row in enumerate(info):
                new_info[i] = [uid[j] for j in row]
            info = new_info
        else:
            kd = KDTree(data)
            # calculate
            nnq = kd.query(data, k=k + 1, p=p)
            info = nnq[1]
    else:
        print 'Unsupported type'
        return None

    neighbors = {}
    for i, row in enumerate(info):
        row = row.tolist()
        if i in row:
            row.remove(i)
            focal = i
        if ids:
            row = [ ids[j] for j in row]
            focal = ids[i]
        neighbors[focal] = row
    return pysal.weights.W(neighbors,  id_order=ids)

Пример #11

Показать файл

Файл: Distance.py Проект: andyreagan/pysal

class Kernel(W):
    """Spatial weights based on kernel functions

    Parameters
    ----------

    data        : array (n,k) or KDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    bandwidth   : float or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
    fixed       : binary
                  If true then :math:`h_i=h \\forall i`. If false then
                  bandwidth is adaptive across observations.
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    diagonal    : boolean
                  If true, set diagonal weights = 1.0, if false (default),
                  diagonals weights are set to value according to kernel
                  function.
    function    : string {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = 1/2 \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)

    eps         : float
                  adjustment to ensure knn distance range is closed on the
                  knnth observations

    Examples
    --------

    >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kw=Kernel(points)
    >>> kw.weights[0]
    [1.0, 0.500000049999995, 0.4409830615267465]
    >>> kw.neighbors[0]
    [0, 1, 3]
    >>> kw.bandwidth
    array([[ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002]])
    >>> kw15=Kernel(points,bandwidth=15.0)
    >>> kw15[0]
    {0: 1.0, 1: 0.33333333333333337, 3: 0.2546440075000701}
    >>> kw15.neighbors[0]
    [0, 1, 3]
    >>> kw15.bandwidth
    array([[ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.]])

    Adaptive bandwidths user specified

    >>> bw=[25.0,15.0,25.0,16.0,14.5,25.0]
    >>> kwa=Kernel(points,bandwidth=bw)
    >>> kwa.weights[0]
    [1.0, 0.6, 0.552786404500042, 0.10557280900008403]
    >>> kwa.neighbors[0]
    [0, 1, 3, 4]
    >>> kwa.bandwidth
    array([[ 25. ],
           [ 15. ],
           [ 25. ],
           [ 16. ],
           [ 14.5],
           [ 25. ]])

    Endogenous adaptive bandwidths

    >>> kwea=Kernel(points,fixed=False)
    >>> kwea.weights[0]
    [1.0, 0.10557289844279438, 9.99999900663795e-08]
    >>> kwea.neighbors[0]
    [0, 1, 3]
    >>> kwea.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Endogenous adaptive bandwidths with Gaussian kernel

    >>> kweag=Kernel(points,fixed=False,function='gaussian')
    >>> kweag.weights[0]
    [0.3989422804014327, 0.2674190291577696, 0.2419707487162134]
    >>> kweag.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Diagonals to 1.0

    >>> kq = Kernel(points,function='gaussian')
    >>> kq.weights
    {0: [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 0.3989422804014327, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 0.3989422804014327, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 0.3989422804014327, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 0.3989422804014327, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 0.3989422804014327]}
    >>> kqd = Kernel(points, function='gaussian', diagonal=True)
    >>> kqd.weights
    {0: [1.0, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 1.0, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 1.0, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 1.0, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 1.0, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 1.0]}
    """

    def __init__(
        self, data, bandwidth=None, fixed=True, k=2, function="triangular", eps=1.0000001, ids=None, diagonal=False
    ):
        if issubclass(type(data), scipy.spatial.KDTree):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), "float") * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)

    def _k_to_W(self, ids=None):
        allneighbors = {}
        weights = {}
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(len(self.data))
        for i, neighbors in enumerate(self.kernel):
            if len(self.neigh[i]) == 0:
                allneighbors[ids[i]] = []
                weights[ids[i]] = []
            else:
                allneighbors[ids[i]] = list(ids[self.neigh[i]])
                weights[ids[i]] = self.kernel[i].tolist()
        return allneighbors, weights

    def _set_bw(self):
        dmat, neigh = self.kdt.query(self.data, k=self.k)
        if self.fixed:
            # use max knn distance as bandwidth
            bandwidth = dmat.max() * self.eps
            n = len(dmat)
            self.bandwidth = np.ones((n, 1), "float") * bandwidth
        else:
            # use local max knn distance
            self.bandwidth = dmat.max(axis=1) * self.eps
            self.bandwidth.shape = (self.bandwidth.size, 1)
            # identify knn neighbors for each point
            nnq = self.kdt.query(self.data, k=self.k)
            self.neigh = nnq[1]

    def _eval_kernel(self):
        # get points within bandwidth distance of each point
        if not hasattr(self, "neigh"):
            kdtq = self.kdt.query_ball_point
            neighbors = [kdtq(self.data[i], r=bwi[0]) for i, bwi in enumerate(self.bandwidth)]
            self.neigh = neighbors
        # get distances for neighbors
        bw = self.bandwidth

        kdtq = self.kdt.query
        z = []
        for i, nids in enumerate(self.neigh):
            di, ni = kdtq(self.data[i], k=len(nids))
            zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i]
            z.append(zi)
        zs = z
        # functions follow Anselin and Rey (2010) table 5.4
        if self.function == "triangular":
            self.kernel = [1 - zi for zi in zs]
        elif self.function == "uniform":
            self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs]
        elif self.function == "quadratic":
            self.kernel = [(3.0 / 4) * (1 - zi ** 2) for zi in zs]
        elif self.function == "quartic":
            self.kernel = [(15.0 / 16) * (1 - zi ** 2) ** 2 for zi in zs]
        elif self.function == "gaussian":
            c = np.pi * 2
            c = c ** (-0.5)
            self.kernel = [c * np.exp(-(zi ** 2) / 2.0) for zi in zs]
        else:
            print "Unsupported kernel function", self.function