Exemplo n.º 1
0
    def test_pnorm_w(self):
        data0 = datasets['uni4large'].samples.T
        weight = np.abs(data0[11, :60])

        self.assertRaises(ValueError,
                          pnorm_w_python,
                          data0[:10, :2],
                          p=1.2,
                          heuristic='buga')
        self.assertRaises(ValueError,
                          pnorm_w_python,
                          data0[:10, :2],
                          weight=weight)

        self.assertRaises(ValueError,
                          pnorm_w_python,
                          data0[:10, :2],
                          data0[:10, :3],
                          weight=weight)
        self.assertRaises(ValueError,
                          pnorm_w,
                          data0[:10, :2],
                          data0[:10, :3],
                          weight=weight)

        self.assertRaises(ValueError, pnorm_w, data0[:10, :2], weight=weight)

        # some sanity checks
        for did, (data1, data2, w) in enumerate([
            (data0[:2, :60], None, None),
            (data0[:2, :60], data0[3:4, 1:61], None),
            (data0[:2, :60], None, weight),
            (data0[:2, :60], data0[3:4, 1:61], weight),
        ]):
            # test different norms
            for p in [1, 2, 1.2]:
                kwargs = {'data1': data1, 'data2': data2, 'weight': w, 'p': p}
                d = pnorm_w(**kwargs)  # default one
                # to assess how far we are
                kwargs0 = kwargs.copy()
                kwargs0['data2'] = np.zeros(data1.shape)
                d0 = pnorm_w(**kwargs0)
                d0norm = np.linalg.norm(d - d0, 'fro')
                # test different implementations
                for iid, d2 in enumerate([
                        pnorm_w_python(**kwargs),
                        pnorm_w_python(use_sq_euclidean=True, **kwargs),
                        pnorm_w_python(heuristic='auto', **kwargs),
                        pnorm_w_python(use_sq_euclidean=False, **kwargs)
                ] + [
                        pnorm_w_python(
                            heuristic=h, use_sq_euclidean=False, **kwargs)
                        for h in ('auto', 'samples', 'features')
                ]):
                    dnorm = np.linalg.norm(d2 - d, 'fro')
                    self.assertTrue(
                        dnorm / d0norm < 1e-7,
                        msg="Failed comparison of different implementations on "
                        "data #%d, implementation #%d, p=%s. "
                        "Norm of the difference is %g" % (did, iid, p, dnorm))
Exemplo n.º 2
0
    def test_pnorm_w(self):
        data0 = datasets['uni4large'].samples.T
        weight = np.abs(data0[11, :60])

        self.assertRaises(
            ValueError,
            pnorm_w_python,
            data0[:10, :2],
            p=1.2,
            heuristic='buga')
        self.assertRaises(
            ValueError, pnorm_w_python, data0[:10, :2], weight=weight)

        self.assertRaises(
            ValueError,
            pnorm_w_python,
            data0[:10, :2],
            data0[:10, :3],
            weight=weight)
        self.assertRaises(
            ValueError, pnorm_w, data0[:10, :2], data0[:10, :3], weight=weight)

        self.assertRaises(ValueError, pnorm_w, data0[:10, :2], weight=weight)

        # some sanity checks
        for did, (data1, data2, w) in enumerate([
            (data0[:2, :60], None, None),
            (data0[:2, :60], data0[3:4, 1:61], None),
            (data0[:2, :60], None, weight),
            (data0[:2, :60], data0[3:4, 1:61], weight),
        ]):
            # test different norms
            for p in [1, 2, 1.2]:
                kwargs = {'data1': data1, 'data2': data2, 'weight': w, 'p': p}
                d = pnorm_w(**kwargs)  # default one
                # to assess how far we are
                kwargs0 = kwargs.copy()
                kwargs0['data2'] = np.zeros(data1.shape)
                d0 = pnorm_w(**kwargs0)
                d0norm = np.linalg.norm(d - d0, 'fro')
                # test different implementations
                for iid, d2 in enumerate([
                        pnorm_w_python(**kwargs),
                        pnorm_w_python(use_sq_euclidean=True, **kwargs),
                        pnorm_w_python(heuristic='auto', **kwargs),
                        pnorm_w_python(use_sq_euclidean=False, **kwargs)
                ] + [
                        pnorm_w_python(
                            heuristic=h, use_sq_euclidean=False, **kwargs)
                        for h in ('auto', 'samples', 'features')
                ]):
                    dnorm = np.linalg.norm(d2 - d, 'fro')
                    self.assertTrue(
                        dnorm / d0norm < 1e-7,
                        msg="Failed comparison of different implementations on "
                        "data #%d, implementation #%d, p=%s. "
                        "Norm of the difference is %g" % (did, iid, p, dnorm))
Exemplo n.º 3
0
    def _call(self, dataset):
        """Computes featurewise I-RELIEF weights."""
        samples = dataset.samples
        NS, NF = samples.shape[:2]

        if self.w_guess is None:
            w = np.ones(NF, 'd')

        w /= (w**2).sum()  # do normalization in all cases to be safe :)

        M, H = self.compute_M_H(dataset.targets)

        while True:
            d_w_k = self.k(pnorm_w(data1=samples, weight=w, p=1))
            ni = np.zeros(NF, 'd')
            for n in range(NS):
                # d_w_k[n, n] could be omitted since == 0.0
                gamma_n = 1.0 - np.nan_to_num(d_w_k[n, M[n]].sum() \
                                / (d_w_k[n, :].sum() - d_w_k[n, n]))
                alpha_n = np.nan_to_num(d_w_k[n, M[n]] /
                                        (d_w_k[n, M[n]].sum()))
                beta_n = np.nan_to_num(d_w_k[n, H[n]] / (d_w_k[n, H[n]].sum()))

                m_n = (np.abs(samples[n, :] - samples[M[n], :]) \
                       * alpha_n[:, None]).sum(0)
                h_n = (np.abs(samples[n, :] - samples[H[n], :]) \
                       * beta_n[:, None]).sum(0)
                ni += gamma_n * (m_n - h_n)

            ni = ni / NS

            ni_plus = np.clip(ni, 0.0,
                              np.inf)  # set all negative elements to zero
            w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum())))
            change = np.abs(w_new - w).sum()
            if __debug__ and 'IRELIEF' in debug.active:
                debug('IRELIEF',
                      "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d" \
                      % (change, w_new.max(), w_new.min(), w_new.mean(),
                         w_new.std(), np.isnan(w_new).sum()))

            # update weights:
            w = w_new
            if change < self.threshold:
                break

        self.w = w
        return Dataset(self.w[np.newaxis])
Exemplo n.º 4
0
    def _call(self, dataset):
        """Computes featurewise I-RELIEF weights."""
        samples = dataset.samples
        NS, NF = samples.shape[:2]

        if self.w_guess == None:
            w = np.ones(NF, 'd')

        w /= (w ** 2).sum() # do normalization in all cases to be safe :)

        M, H = self.compute_M_H(dataset.targets)

        while True:
            d_w_k = self.k(pnorm_w(data1=samples, weight=w, p=1))
            ni = np.zeros(NF, 'd')
            for n in range(NS):
                 # d_w_k[n, n] could be omitted since == 0.0
                gamma_n = 1.0 - np.nan_to_num(d_w_k[n, M[n]].sum() \
                                / (d_w_k[n, :].sum() - d_w_k[n, n]))
                alpha_n = np.nan_to_num(d_w_k[n, M[n]] / (d_w_k[n, M[n]].sum()))
                beta_n = np.nan_to_num(d_w_k[n, H[n]] / (d_w_k[n, H[n]].sum()))

                m_n = (np.abs(samples[n, :] - samples[M[n], :]) \
                       * alpha_n[:, None]).sum(0)
                h_n = (np.abs(samples[n, :] - samples[H[n], :]) \
                       * beta_n[:, None]).sum(0)
                ni += gamma_n*(m_n - h_n)

            ni = ni / NS

            ni_plus = np.clip(ni, 0.0, np.inf) # set all negative elements to zero
            w_new = np.nan_to_num(ni_plus / (np.sqrt((ni_plus**2).sum())))
            change = np.abs(w_new - w).sum()
            if __debug__ and 'IRELIEF' in debug.active:
                debug('IRELIEF',
                      "change=%.4f max=%f min=%.4f mean=%.4f std=%.4f #nan=%d" \
                      % (change, w_new.max(), w_new.min(), w_new.mean(),
                         w_new.std(), np.isnan(w_new).sum()))

            # update weights:
            w = w_new
            if change < self.threshold:
                break

        self.w = w
        return Dataset(self.w[np.newaxis])