Exemplo n.º 1
0
    def fit(self, X, y):
        """Fit the model

Parameters
----------
X: 2d array, shape (n_samples, n_features)
    Data array
y: 2d array, shape (n_pairs, 3)
    Array containing pairs elements indices and associated label.
    The indices of pairs elements are contained in ``y[:, :2]``, while the
    labels are given by ``y[:, 2]``. Labels are in {-1, 1}. The elements
    of the i-th pair are ``X[y[i, 0]]`` and ``X[y[i, 1]]``.
"""
        # pylint: disable=E1101
        X = np.asarray(X)
        y = np.asarray(y)
        pairs = y[:, :2].astype("int")
        labels = y[:, 2]

        # initialize with whiten PCA
        L0 = pca(X, self.n_components)

        # Normalize to have unit average distance
        L0 /= np.sqrt(compute_distances(X, pairs, proj_mat=L0).mean())
        b0 = 1.0

        self._diff = compute_differences(X, pairs)
        self._labels = labels

        n_pairs = len(pairs)
        n_samples, self.n_features = X.shape

        if self.kernel:
            self._U = np.zeros((n_pairs, n_samples))
            self._U[range(n_pairs), pairs[:, 0]] = 1.0
            self._U[range(n_pairs), pairs[:, 1]] = -1.0
            self._U = csr_matrix(self._U)

        x0 = MatThreshold(L0, b0)

        if self.callback is not None:
            self.callback(x0)

        L, self.threshold_ = lgbopt.fmin_gd(self._compute_obj,
                                            self._compute_grad,
                                            x0,
                                            callback=self.callback,
                                            inner=MatThreshold.dot,
                                            **self.opt_args)[0]

        self.coefs_ = L.T

        del self._diff, self._labels
        if self.kernel:
            del self._U
        return self
Exemplo n.º 2
0
Arquivo: dml.py Projeto: joelscp/DML
    def fit(self, X, y):
        """Fit the model

Parameters
----------
X: 2d array, shape (n_samples, n_features)
    Data array
y: 2d array, shape (n_pairs, 3)
    Array containing pairs elements indices and associated label.
    The indices of pairs elements are contained in ``y[:, :2]``, while the
    labels are given by ``y[:, 2]``. Labels are in {-1, 1}. The elements
    of the i-th pair are ``X[y[i, 0]]`` and ``X[y[i, 1]]``.
"""
        # pylint: disable=E1101
        X = np.asarray(X)
        y = np.asarray(y)
        pairs = y[:, :2].astype("int")
        labels = y[:, 2]

        # initialize with whiten PCA
        L0 = pca(X, self.n_components)

        # Normalize to have unit average distance
        L0 /= np.sqrt(compute_distances(X, pairs, proj_mat=L0).mean())
        b0 = 1.0

        self._diff = compute_differences(X, pairs)
        self._labels = labels

        n_pairs = len(pairs)
        n_samples, self.n_features = X.shape

        if self.kernel:
            self._U = np.zeros((n_pairs, n_samples))
            self._U[range(n_pairs), pairs[:, 0]] = 1.0
            self._U[range(n_pairs), pairs[:, 1]] = -1.0
            self._U = csr_matrix(self._U)

        x0 = MatThreshold(L0, b0)

        if self.callback is not None:
            self.callback(x0)

        L, self.threshold_ = lgbopt.fmin_gd(
            self._compute_obj, self._compute_grad, x0,
            callback=self.callback, inner=MatThreshold.dot,
            **self.opt_args)[0]

        self.coefs_ = L.T

        del self._diff, self._labels
        if self.kernel:
            del self._U
        return self
Exemplo n.º 3
0
    def fit(self, X, y):
        """Fit the model

Parameters
----------
X: 2d array, shape (n_samples, n_features)
    Data array
y: 2d array, shape (n_pairs, 3)
    Array containing pairs elements indices and associated label.
    The indices of pairs elements are contained in ``y[:, :2]``, while the
    labels are given by ``y[:, 2]``. Labels are in {-1, 1}. The elements
    of the i-th pair are ``X[y[i, 0]]`` and ``X[y[i, 1]]``.
"""
        # pylint: disable=E1101
        X1 = np.asarray(X[0])
        X2 = np.asarray(X[1])
        y = np.asarray(y)
        pairs = y[:, :2].astype("int")
        labels = y[:, 2]

        # initialize with whiten PCA
        L0 = pca(np.hstack(X), self.n_components)
        A0 = L0[:, :X1.shape[1]]
        B0 = L0[:, X1.shape[1]:]

        # Normalize to have unit average distance
        dist = np.sqrt(
            compute_cm_distances(X1, X2, pairs, proj_mat1=A0,
                                 proj_mat2=B0).mean())
        b0 = 1.0
        A0 /= dist
        B0 /= dist

        self._labels = labels

        n_pairs = len(pairs)
        n_samples, self.n_features1 = X1.shape

        if self.kernel:
            self._U = np.zeros((n_pairs, n_samples))
            self._U[range(n_pairs), pairs[:, 0]] = 1.0
            self._U = csr_matrix(self._U)

            self._V = np.zeros((n_pairs, n_samples))
            self._V[range(n_pairs), pairs[:, 1]] = 1.0
            self._V = csr_matrix(self._V)

        self._X1 = X1
        self._X2 = X2
        self._pairs = pairs

        x0 = Mat2Threshold(A0, B0, b0)

        if self.callback is not None:
            self.callback(x0)

        A, B, self.threshold_ = lgbopt.fmin_gd(self._compute_obj,
                                               self._compute_grad,
                                               x0,
                                               callback=self.callback,
                                               inner=Mat2Threshold.dot,
                                               **self.opt_args)[0]

        self.coefs_ = [A.T, B.T]

        del self._labels, self._X1, self._X2, self._pairs
        if self.kernel:
            del self._U, self._V
        return self
Exemplo n.º 4
0
Arquivo: dml.py Projeto: joelscp/DML
    def fit(self, X, y):
        """Fit the model

Parameters
----------
X: 2d array, shape (n_samples, n_features)
    Data array
y: 2d array, shape (n_pairs, 3)
    Array containing pairs elements indices and associated label.
    The indices of pairs elements are contained in ``y[:, :2]``, while the
    labels are given by ``y[:, 2]``. Labels are in {-1, 1}. The elements
    of the i-th pair are ``X[y[i, 0]]`` and ``X[y[i, 1]]``.
"""
        # pylint: disable=E1101
        X1 = np.asarray(X[0])
        X2 = np.asarray(X[1])
        y = np.asarray(y)
        pairs = y[:, :2].astype("int")
        labels = y[:, 2]

        # initialize with whiten PCA
        L0 = pca(np.hstack(X), self.n_components)
        A0 = L0[:, :X1.shape[1]]
        B0 = L0[:, X1.shape[1]:]

        # Normalize to have unit average distance
        dist = np.sqrt(compute_cm_distances(X1, X2, pairs,
                                            proj_mat1=A0,
                                            proj_mat2=B0).mean())
        b0 = 1.0
        A0 /= dist
        B0 /= dist

        self._labels = labels

        n_pairs = len(pairs)
        n_samples, self.n_features1 = X1.shape

        if self.kernel:
            self._U = np.zeros((n_pairs, n_samples))
            self._U[range(n_pairs), pairs[:, 0]] = 1.0
            self._U = csr_matrix(self._U)

            self._V = np.zeros((n_pairs, n_samples))
            self._V[range(n_pairs), pairs[:, 1]] = 1.0
            self._V = csr_matrix(self._V)

        self._X1 = X1
        self._X2 = X2
        self._pairs = pairs

        x0 = Mat2Threshold(A0, B0, b0)

        if self.callback is not None:
            self.callback(x0)

        A, B, self.threshold_ = lgbopt.fmin_gd(
            self._compute_obj, self._compute_grad, x0,
            callback=self.callback, inner=Mat2Threshold.dot,
            **self.opt_args)[0]

        self.coefs_ = [A.T, B.T]

        del self._labels, self._X1, self._X2, self._pairs
        if self.kernel:
            del self._U, self._V
        return self