Exemplo n.º 1
0
def plot_samples_distance(dataset, sortbyattr=None):
    """Plot the euclidean distances between all samples of a dataset.

    Parameters
    ----------
    dataset : Dataset
      Providing the samples.
    sortbyattr : None or str
      If None, the samples distances will be in the same order as their
      appearance in the dataset. Alternatively, the name of a samples
      attribute can be given, which wil then be used to sort/group the
      samples, e.g. to investigate the similarity samples by label or by
      chunks.
    """
    if sortbyattr is not None:
        slicer = []
        for attr in dataset.sa[sortbyattr].unique:
            slicer += \
                get_samples_by_attr(dataset, sortbyattr, attr).tolist()
        samples = dataset.samples[slicer]
    else:
        samples = dataset.samples

    ed = np.sqrt(squared_euclidean_distance(samples))

    pl.imshow(ed, interpolation='nearest')
    pl.colorbar()
Exemplo n.º 2
0
    def test_euclid_dist(self):
        """Euclidean distance kernel testing"""

        # select some block of data from already generated
        data = datasets['uni4large'].samples[:5, :8]

        ed = squared_euclidean_distance(data)

        # XXX not sure if that is right: 'weight' seems to be given by
        # feature (i.e. column), but distance is between samples (i.e. rows)
        # current behavior is:
        true_size = (5, 5)
        self.assertTrue(ed.shape == true_size)

        # slow version to compute distance matrix
        ed_manual = np.zeros(true_size, 'd')
        for i in range(true_size[0]):
            for j in range(true_size[1]):
                #ed_manual[i,j] = np.sqrt(((data[i,:] - data[j,:] )** 2).sum())
                ed_manual[i,j] = ((data[i,:] - data[j,:] )** 2).sum()
        ed_manual[ed_manual < 0] = 0

        self.assertTrue(np.diag(ed_manual).sum() < 0.0000000001)
        self.assertTrue(np.diag(ed).sum() < 0.0000000001)

        # let see whether Kernel does the same
        self.assertTrue((ed - ed_manual).sum() < 0.0000001)
Exemplo n.º 3
0
    def test_euclid_dist(self):
        """Euclidean distance kernel testing"""

        # select some block of data from already generated
        data = datasets['uni4large'].samples[:5, :8]

        ed = squared_euclidean_distance(data)

        # XXX not sure if that is right: 'weight' seems to be given by
        # feature (i.e. column), but distance is between samples (i.e. rows)
        # current behavior is:
        true_size = (5, 5)
        self.assertTrue(ed.shape == true_size)

        # slow version to compute distance matrix
        ed_manual = np.zeros(true_size, 'd')
        for i in range(true_size[0]):
            for j in range(true_size[1]):
                #ed_manual[i,j] = np.sqrt(((data[i,:] - data[j,:] )** 2).sum())
                ed_manual[i, j] = ((data[i, :] - data[j, :])**2).sum()
        ed_manual[ed_manual < 0] = 0

        self.assertTrue(np.diag(ed_manual).sum() < 0.0000000001)
        self.assertTrue(np.diag(ed).sum() < 0.0000000001)

        # let see whether Kernel does the same
        self.assertTrue((ed - ed_manual).sum() < 0.0000001)
Exemplo n.º 4
0
    def _compute(self, data1, data2):
        """Compute kernel matrix.

        Parameters
        ----------
        data1 : numpy.ndarray
          lhs data
        data2 : numpy.ndarray
          rhs data
        """
        tmp = squared_euclidean_distance(
                data1, data2, weight=1.0 / (self.length_scale ** 2))
        self._k = \
            self.sigma_f**2 * (1.0 + tmp / (2.0 * self.alpha)) ** -self.alpha
Exemplo n.º 5
0
    def _compute(self, data1, data2):
        """Compute kernel matrix.

        Parameters
        ----------
        data1 : numpy.ndarray
          data
        data2 : numpy.ndarray
          data
          (Defaults to None)
        """
        # weighted squared euclidean distance matrix:
        self.wdm2 = squared_euclidean_distance(data1, data2, weight=(self.length_scale**-2))
        self._k = self.sigma_f**2 * np.exp(-0.5*self.wdm2)
Exemplo n.º 6
0
    def _compute(self, data1, data2):
        """Compute kernel matrix.

        Parameters
        ----------
        data1 : numpy.ndarray
          lhs data
        data2 : numpy.ndarray
          rhs data
        """
        tmp = squared_euclidean_distance(data1,
                                         data2,
                                         weight=1.0 / (self.length_scale**2))
        self._k = \
            self.sigma_f**2 * (1.0 + tmp / (2.0 * self.alpha)) ** -self.alpha
Exemplo n.º 7
0
    def _compute(self, data1, data2):
        """Compute kernel matrix.

        Parameters
        ----------
        data1 : numpy.ndarray
          data
        data2 : numpy.ndarray
          data
          (Defaults to None)
        """
        # weighted squared euclidean distance matrix:
        self.wdm2 = squared_euclidean_distance(data1,
                                               data2,
                                               weight=(self.length_scale**-2))
        self._k = self.sigma_f**2 * np.exp(-0.5 * self.wdm2)
Exemplo n.º 8
0
    def _compute(self, data1, data2):
        """Compute kernel matrix.

        Parameters
        ----------
        data1 : numpy.ndarray
          lhs data
        data2 : numpy.ndarray
          rhs data
        """
        params = self.params
        # XXX the following computation can be (maybe) made more
        # efficient since length_scale is squared and then
        # square-rooted uselessly.
        # Weighted euclidean distance matrix:
        self.wdm = np.sqrt(squared_euclidean_distance(
            data1, data2, weight=(params.length_scale**-2)))
        self._k = \
            params.sigma_f**2 * np.exp(-self.wdm)
Exemplo n.º 9
0
    def _compute(self, data1, data2):
        """Compute kernel matrix.

        Parameters
        ----------
        data1 : numpy.ndarray
          lhs data
        data2 : numpy.ndarray
          rhs data
        """
        params = self.params
        # XXX the following computation can be (maybe) made more
        # efficient since length_scale is squared and then
        # square-rooted uselessly.
        # Weighted euclidean distance matrix:
        self.wdm = np.sqrt(
            squared_euclidean_distance(data1,
                                       data2,
                                       weight=(params.length_scale**-2)))
        self._k = \
            params.sigma_f**2 * np.exp(-self.wdm)
Exemplo n.º 10
0
    def _compute(self, data1, data2):
        """Compute kernel matrix.

        Parameters
        ----------
        data1 : numpy.ndarray
          lhs data
        data2 : numpy.ndarray
          rhs data
        """
        tmp = squared_euclidean_distance(
                data1, data2, weight=0.5 / (self.length_scale ** 2))
        if self.numerator == 3.0:
            tmp = np.sqrt(tmp)
            self._k = \
                self.sigma_f**2 * (1.0 + np.sqrt(3.0) * tmp) \
                * np.exp(-np.sqrt(3.0) * tmp)
        elif self.numerator == 5.0:
            tmp2 = np.sqrt(tmp)
            self._k = \
                self.sigma_f**2 * (1.0 + np.sqrt(5.0) * tmp2 + 5.0 / 3.0 * tmp) \
                * np.exp(-np.sqrt(5.0) * tmp2)
Exemplo n.º 11
0
    def _compute(self, data1, data2):
        """Compute kernel matrix.

        Parameters
        ----------
        data1 : numpy.ndarray
          lhs data
        data2 : numpy.ndarray
          rhs data
        """
        tmp = squared_euclidean_distance(data1,
                                         data2,
                                         weight=0.5 / (self.length_scale**2))
        if self.numerator == 3.0:
            tmp = np.sqrt(tmp)
            self._k = \
                self.sigma_f**2 * (1.0 + np.sqrt(3.0) * tmp) \
                * np.exp(-np.sqrt(3.0) * tmp)
        elif self.numerator == 5.0:
            tmp2 = np.sqrt(tmp)
            self._k = \
                self.sigma_f**2 * (1.0 + np.sqrt(5.0) * tmp2 + 5.0 / 3.0 * tmp) \
                * np.exp(-np.sqrt(5.0) * tmp2)
Exemplo n.º 12
0
 def _compute(self, d1, d2):
     # Do the Rbf
     self._k = np.exp(-squared_euclidean_distance(d1,d2) / self.params.sigma)
Exemplo n.º 13
0
 def _compute(self, d1, d2):
     # Do the Rbf
     self._k = np.exp(-squared_euclidean_distance(d1, d2) /
                      self.params.sigma)