示例#1
0
def test_tensor_3d_prod():

    def get_prod_val(t, a, b, c, i1, i2, i3):
        t1, t2, t3 = t.shape
        val = 0.0
        for j1 in xrange(t1):
            for j2 in xrange(t2):
                for j3 in xrange(t3):
                    val += (t[j1, j2, j3] * a[j1, i1] * b[j2, i2] * c[j3, i3])
        return val

    rng = np.random.RandomState(3)

    tensor = rng.rand(3, 4, 5)
    a = rng.rand(3, 6)
    b = rng.rand(4, 7)
    c = rng.rand(5, 8)

    t2 = tensor_3d_prod(tensor, a, b, c)
    assert_equal(6, t2.shape[0])
    assert_equal(7, t2.shape[1])
    assert_equal(8, t2.shape[2])

    for i in xrange(6):
        for j in xrange(7):
            for k in xrange(8):
                val_true = get_prod_val(tensor, a, b, c, i, j, k)
                assert_almost_equal(val_true, t2[i, j, k])
示例#2
0
文件: cdbn.py 项目: dfdx/cdbn
 def _fit(self, V0):
     Ph0 = np.zeros((self.n_hiddens,) + self.h_shape)
     H0 = np.zeros((self.n_hiddens,) + self.h_shape)
     Grad0 = np.zeros((self.n_hiddens,) + (self.w_size, self.w_size))
     for k in xrange(self.n_hiddens):          
         Ph0[k] = logistic_sigmoid(convolve(V0, self.weights[k])
                                   + self.h_intercepts[k])
         Grad0[k] = convolve(V0, Ph0[k])
         H0[k][self.rng.uniform(size=self.h_shape) < Ph0[k]] = 1
         
     h_convolved = self.v_intercept
     for k in xrange(self.n_hiddens):
         h_convolved += convolve(H0[k], np.flipud(np.fliplr(self.weights[k])))
     V1m = logistic_sigmoid(h_convolved)
     V1 = V0.copy()
     middle_offset = self.w_size - 1
     V1[middle_offset:-middle_offset, middle_offset:-middle_offset] = V1m
     
     Ph1 = np.zeros((self.n_hiddens,) + self.h_shape)        
     Grad1 = np.zeros((self.n_hiddens,) + (self.w_size, self.w_size))
     for k in xrange(self.n_hiddens):
         Ph1[k] = logistic_sigmoid(convolve(V1, self.weights[k])
                                   + self.h_intercepts[k])
         Grad1[k] = convolve(V1, Ph1[k])
         self.weights += self.lr * (Grad0[k] - Grad1[k])
     return self._net_probability(V0)
示例#3
0
    def transform(self, X):
        '''
        Transforms X according to the linear transformation corresponding to
        shifting the input eigenvalues to all be at least ``self.min_eig``.

        Parameters
        ----------
        X : array, shape [n_test, n]
            The test similarities to training points.

        Returns
        -------
        Xt : array, shape [n_test, n]
            The transformed test similarites to training points. Only different
            from X if X is the training data.
        '''
        n = self.train_.shape[0]
        if X.ndim != 2 or X.shape[1] != n:
            msg = "X should have {} columns, the number of samples at fit time"
            raise TypeError(msg.format(n))

        if self.copy:
            X = X.copy()

        if self.shift_ != 0 and X is self.train_ or (
                X.shape == self.train_.shape and np.allclose(X, self.train_)):
            X[xrange(n), xrange(n)] += self.shift_
        return X
示例#4
0
文件: cdbn.py 项目: JunLuo-BIT/cdbn
    def _fit(self, V0):
        Ph0 = np.zeros((self.n_hiddens, ) + self.h_shape)
        H0 = np.zeros((self.n_hiddens, ) + self.h_shape)
        Grad0 = np.zeros((self.n_hiddens, ) + (self.w_size, self.w_size))
        for k in xrange(self.n_hiddens):
            Ph0[k] = logistic_sigmoid(
                convolve(V0, self.weights[k]) + self.h_intercepts[k])
            Grad0[k] = convolve(V0, Ph0[k])
            H0[k][self.rng.uniform(size=self.h_shape) < Ph0[k]] = 1

        h_convolved = self.v_intercept
        for k in xrange(self.n_hiddens):
            h_convolved += convolve(H0[k],
                                    np.flipud(np.fliplr(self.weights[k])))
        V1m = logistic_sigmoid(h_convolved)
        V1 = V0.copy()
        middle_offset = self.w_size - 1
        V1[middle_offset:-middle_offset, middle_offset:-middle_offset] = V1m

        Ph1 = np.zeros((self.n_hiddens, ) + self.h_shape)
        Grad1 = np.zeros((self.n_hiddens, ) + (self.w_size, self.w_size))
        for k in xrange(self.n_hiddens):
            Ph1[k] = logistic_sigmoid(
                convolve(V1, self.weights[k]) + self.h_intercepts[k])
            Grad1[k] = convolve(V1, Ph1[k])
            self.weights += self.lr * (Grad0[k] - Grad1[k])
        return self._net_probability(V0)
示例#5
0
def test_unique_labels():
    # Empty iterable
    assert_raises(ValueError, unique_labels)

    # Multiclass problem
    assert_array_equal(unique_labels(xrange(10)), np.arange(10))
    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))

    # Multilabels
    assert_array_equal(
        assert_warns(DeprecationWarning, unique_labels, [(0, 1, 2), (0,), tuple(), (2, 1)]), np.arange(3)
    )
    assert_array_equal(assert_warns(DeprecationWarning, unique_labels, [[0, 1, 2], [0], list(), [2, 1]]), np.arange(3))

    assert_array_equal(unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3))

    assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3))

    # Several arrays passed
    assert_array_equal(unique_labels([4, 0, 2], xrange(5)), np.arange(5))
    assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3))

    # Border line case with binary indicator matrix
    assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5)))
    assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5)))
    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5))

    # Some tests with strings input
    assert_array_equal(unique_labels(["a", "b", "c"], ["d"]), ["a", "b", "c", "d"])

    assert_array_equal(
        assert_warns(DeprecationWarning, unique_labels, [["a", "b"], ["c"]], [["d"]]), ["a", "b", "c", "d"]
    )
示例#6
0
def _parallel_predict_proba(trees, X, n_classes, n_outputs):
    """Private function used to compute a batch of predictions within a job."""
    n_samples = X.shape[0]

    if n_outputs == 1:
        proba = np.zeros((n_samples, n_classes))

        for tree in trees:
            proba_tree = tree.predict_proba(X)

            if n_classes == tree.n_classes_:
                proba += proba_tree

            else:
                for j, c in enumerate(tree.classes_):
                    proba[:, c] += proba_tree[:, j]

    else:
        proba = []

        for k in xrange(n_outputs):
            proba.append(np.zeros((n_samples, n_classes[k])))

        for tree in trees:
            proba_tree = tree.predict_proba(X)

            for k in xrange(n_outputs):
                if n_classes[k] == tree.n_classes_[k]:
                    proba[k] += proba_tree[k]

                else:
                    for j, c in enumerate(tree.classes_[k]):
                        proba[k][:, c] += proba_tree[k][:, j]

    return proba
示例#7
0
    def transform(self, X):
        '''
        Transforms X according to the linear transformation corresponding to
        shifting the input eigenvalues to all be at least ``self.min_eig``.

        Parameters
        ----------
        X : array, shape [n_test, n]
            The test similarities to training points.

        Returns
        -------
        Xt : array, shape [n_test, n]
            The transformed test similarites to training points. Only different
            from X if X is the training data.
        '''
        n = self.train_.shape[0]
        if X.ndim != 2 or X.shape[1] != n:
            msg = "X should have {} columns, the number of samples at fit time"
            raise TypeError(msg.format(n))

        if self.copy:
            X = X.copy()

        if self.shift_ != 0 and X is self.train_ or np.all(X == self.train_):
            X[xrange(n), xrange(n)] += self.shift_
        return X
示例#8
0
def test_unique_labels():
    # Empty iterable
    assert_raises(ValueError, unique_labels)

    # Multiclass problem
    assert_array_equal(unique_labels(xrange(10)), np.arange(10))
    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))

    # Multilabel indicator
    assert_array_equal(
        unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])),
        np.arange(3))

    assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])),
                       np.arange(3))

    # Several arrays passed
    assert_array_equal(unique_labels([4, 0, 2], xrange(5)), np.arange(5))
    assert_array_equal(unique_labels((0, 1, 2), (0, ), (2, 1)), np.arange(3))

    # Border line case with binary indicator matrix
    assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5)))
    assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5)))
    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))),
                       np.arange(5))
def test_unique_labels():
    # Empty iterable
    assert_raises(ValueError, unique_labels)

    # Multiclass problem
    assert_array_equal(unique_labels(xrange(10)), np.arange(10))
    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))

    # Multilabel indicator
    assert_array_equal(unique_labels(np.array([[0, 0, 1],
                                               [1, 0, 1],
                                               [0, 0, 0]])),
                       np.arange(3))

    assert_array_equal(unique_labels(np.array([[0, 0, 1],
                                               [0, 0, 0]])),
                       np.arange(3))

    # Several arrays passed
    assert_array_equal(unique_labels([4, 0, 2], xrange(5)),
                       np.arange(5))
    assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)),
                       np.arange(3))

    # Border line case with binary indicator matrix
    assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5)))
    assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5)))
    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))),
                       np.arange(5))
示例#10
0
def test_tensor_3d_permute():
    rng = np.random.RandomState(0)

    dim1 = rng.randint(10, 20)
    dim2 = rng.randint(10, 20)
    dim3 = rng.randint(10, 20)

    tensor = rng.rand(dim1, (dim2 * dim3))

    #dim1 = 2
    #dim2 = 3
    #dim3 = 4

    #mtx = np.arange(6).reshape(2, 3)
    #vector = np.array([7, 8, 9, 10])

    #tensor = tensor_3d_from_matrix_vector(mtx, vector)

    # test (2, 3, 1) mode
    permute_2_3_1 = tensor_3d_permute(tensor, (dim1, dim2, dim3), a=2, b=3, c=1)
    assert_equal(dim2, permute_2_3_1.shape[0])
    assert_equal(dim3 * dim1, permute_2_3_1.shape[1])

    #print tensor
    #print permute_2_3_1

    for i1 in xrange(dim2):
        for i2 in xrange(dim3):
            for i3 in xrange(dim1):
                val_permute = permute_2_3_1[i1, (dim3 * i3) + i2]
                val_origin = tensor[i3, (dim2 * i2) + i1]
                assert_equal(val_permute, val_origin)
示例#11
0
文件: lda.py 项目: emgong/topicModels
    def _approx_bound(self, X, gamma, sub_sampling):
        """
        calculate approximate bound for data X and topic distribution gamma


        Parameters
        ----------
        X: sparse matrix, [n_docs, n_vocabs]

        gamma: array, shape = [n_docs, n_topics]
            document distribution (can be either normalized & un-normalized)

        sub_sampling: boolean, optional, (default: False)
            Compensate for the subsampling of the population of documents
            set subsampling to `True` for online learning

        Returns
        -------
        score: float, score of gamma
        """
        X = self._to_csr(X)
        n_docs, n_topics = gamma.shape
        score = 0
        Elogtheta = _dirichlet_expectation(gamma)

        X_data = X.data
        X_indices = X.indices
        X_indptr = X.indptr

        # E[log p(docs | theta, beta)]
        for d in xrange(0, n_docs):
            ids = X_indices[X_indptr[d]:X_indptr[d + 1]]
            cnts = X_data[X_indptr[d]:X_indptr[d + 1]]
            phinorm = np.zeros(len(ids))
            for i in xrange(0, len(ids)):
                temp = Elogtheta[d, :] + self.Elogbeta[:, ids[i]]
                tmax = temp.max()
                phinorm[i] = np.log(np.sum(np.exp(temp - tmax))) + tmax
            score += np.sum(cnts * phinorm)

        # E[log p(theta | alpha) - log q(theta | gamma)]
        score += np.sum((self.alpha - gamma) * Elogtheta)
        score += np.sum(gammaln(gamma) - gammaln(self.alpha))
        score += np.sum(
            gammaln(self.alpha * self.n_topics) - gammaln(np.sum(gamma, 1)))

        # Compensate for the subsampling of the population of documents
        # E[log p(beta | eta) - log q (beta | lambda)]
        score += np.sum((self.eta - self.components_) * self.Elogbeta)
        score += np.sum(gammaln(self.components_) - gammaln(self.eta))
        score += np.sum(gammaln(self.eta * self.n_vocabs)
                        - gammaln(np.sum(self.components_, 1)))

        # Compensate for the subsampling of the population of documents
        if sub_sampling:
            doc_ratio = float(self.n_docs) / n_docs
            score *= doc_ratio

        return score
示例#12
0
    def _approx_bound(self, X, gamma, sub_sampling):
        """
        calculate approximate bound for data X and topic distribution gamma


        Parameters
        ----------
        X: sparse matrix, [n_docs, n_vocabs]

        gamma: array, shape = [n_docs, n_topics]
            document distribution (can be either normalized & un-normalized)

        sub_sampling: boolean, optional, (default: False)
            Compensate for the subsampling of the population of documents
            set subsampling to `True` for online learning

        Returns
        -------
        score: float, score of gamma
        """
        X = self._to_csr(X)
        n_docs, n_topics = gamma.shape
        score = 0
        Elogtheta = _dirichlet_expectation(gamma)

        X_data = X.data
        X_indices = X.indices
        X_indptr = X.indptr

        # E[log p(docs | theta, beta)]
        for d in xrange(0, n_docs):
            ids = X_indices[X_indptr[d]:X_indptr[d + 1]]
            cnts = X_data[X_indptr[d]:X_indptr[d + 1]]
            phinorm = np.zeros(len(ids))
            for i in xrange(0, len(ids)):
                temp = Elogtheta[d, :] + self.Elogbeta[:, ids[i]]
                tmax = max(temp)
                phinorm[i] = np.log(sum(np.exp(temp - tmax))) + tmax
            score += np.sum(cnts * phinorm)

        # E[log p(theta | alpha) - log q(theta | gamma)]
        score += np.sum((self.alpha - gamma) * Elogtheta)
        score += np.sum(gammaln(gamma) - gammaln(self.alpha))
        score += sum(
            gammaln(self.alpha * self.n_topics) - gammaln(np.sum(gamma, 1)))

        # Compensate for the subsampling of the population of documents
        # E[log p(beta | eta) - log q (beta | lambda)]
        score += np.sum((self.eta - self.components_) * self.Elogbeta)
        score += np.sum(gammaln(self.components_) - gammaln(self.eta))
        score += np.sum(gammaln(self.eta * self.n_vocabs)
                        - gammaln(np.sum(self.components_, 1)))

        # Compensate for the subsampling of the population of documents
        if sub_sampling:
            doc_ratio = float(self.n_docs) / n_docs
            score *= doc_ratio

        return score
示例#13
0
 def get_prod_val(t, a, b, c, i1, i2, i3):
     t1, t2, t3 = t.shape
     val = 0.0
     for j1 in xrange(t1):
         for j2 in xrange(t2):
             for j3 in xrange(t3):
                 val += (t[j1, j2, j3] * a[j1, i1] * b[j2, i2] * c[j3, i3])
     return val
示例#14
0
    def _derivativenorm(self):
        """Compute the derivative of the norm
        Returns
        -------
        derivative : numpy array, shape (m_parameters,)
        """
        w2 = np.reshape(self.w, (self.n_features, self.d, self.D, self.D))
        derivative = np.zeros((self.n_features, self.d, self.D, self.D))

        tmp = np.zeros((self.n_features, self.D * self.D))
        tmp2 = np.zeros((self.n_features, self.D * self.D))
        tmp[0, :] = np.tensordot(w2[0, :, 0, :],
                                 w2[0, :, 0, :],
                                 axes=([0], [0])).reshape(self.D * self.D)
        for i in xrange(1, self.n_features - 1):
            tmp[i, :] = np.dot(
                tmp[i - 1, :],
                np.tensordot(w2[i, :, :, :], w2[i, :, :, :],
                             axes=([0], [0])).transpose(
                                 (0, 2, 1, 3)).reshape(self.D * self.D,
                                                       self.D * self.D))
        tmp[self.n_features - 1, :] = np.inner(
            tmp[self.n_features - 2, :],
            np.tensordot(w2[self.n_features - 1, :, :, 0],
                         w2[self.n_features - 1, :, :, 0],
                         axes=([0], [0])).reshape(self.D * self.D))

        tmp2[self.n_features - 1, :] = np.tensordot(
            w2[self.n_features - 1, :, :, 0],
            w2[self.n_features - 1, :, :, 0],
            axes=([0], [0])).reshape(self.D * self.D)
        for i in xrange(self.n_features - 2, -1, -1):
            tmp2[i, :] = np.dot(
                np.tensordot(w2[i, :, :, :], w2[i, :, :, :],
                             axes=([0], [0])).transpose(
                                 (0, 2, 1, 3)).reshape(self.D * self.D,
                                                       self.D * self.D),
                tmp2[i + 1, :])
        tmp2[0, :] = np.inner(
            np.tensordot(w2[0, :, 0, :], w2[0, :, 0, :],
                         axes=([0], [0])).reshape(self.D * self.D), tmp2[1, :])

        for j in xrange(self.d):
            derivative[0, j, 0, :] = 2 * np.dot(
                tmp2[1, :].reshape(self.D, self.D), w2[0, j, 0, :])
            derivative[self.n_features - 1, j, :, 0] = 2 * np.dot(
                tmp[self.n_features - 2, :].reshape(self.D, self.D),
                w2[self.n_features - 1, j, :, 0])
        for i in xrange(1, self.n_features - 1):
            temp1 = tmp[i - 1, :].reshape(self.D, self.D)
            temp2 = tmp2[i + 1, :].reshape(self.D, self.D)

            for j in xrange(self.d):
                temp3 = np.dot(np.dot(temp1, w2[i, j, :, :]),
                               temp2.transpose())
                derivative[i, j, :, :] = 2 * np.copy(temp3)

        return derivative.reshape(self.m_parameters)
示例#15
0
 def get_oob_score(self, X, y):
     """Calculate the Out-Of-Bag Score if bootstraping"""
     # Get a list of the classes
     classes_ = self.classes_
     # Get the count of all the classs
     n_classes_ = self.n_classes_
     # Get the number of outputs
     n_samples = y.shape[0]
     # Init the score to zero
     oob_score = 0.0
     # Make a container for all decision function
     oob_decision_function = []
     # Make a container for all output predictions
     predictions = []
     # For each output
     for k in xrange(self.n_outputs_):
         # Make a container for all predictions
         predictions.append(np.zeros((n_samples, n_classes_[k])))
     # For each tree in the forest
     for estimator in self.estimators_:
         # Make a mask
         mask = np.ones(n_samples, dtype=np.bool)
         # Then mask all of the indices that the tree was trained from
         mask[estimator.indices_] = False
         # Then ask the tree to predict from only the novel points it's never seen
         p_estimator = estimator.predict_proba(X[mask, :])
         # In the specal case of being trained with ouly one output
         if self.n_outputs_ == 1:
             # Then set that as the only p_estimator
             p_estimator = [p_estimator]
         # Then for each output
         for k in xrange(self.n_outputs_):
             # Add the predictions for the current output
             # But only for the predictions of the novel points
             predictions[k][mask, :] += p_estimator[k]
     # For each output
     for k in xrange(self.n_outputs_):
         # Normilize the predictions made for each output
         # by the number of predictions made
         decision = (predictions[k] / predictions[k].sum(axis=1)[:, np.newaxis])
         # Then stor this oob_decision function
         oob_decision_function.append(decision)
         # Use the majoraty vote to pick the predicted class
         y_pred = classes_[k].take(np.argmax(predictions[k], axis=1), axis=0)
         # Get matchs of predictions to real lables
         matches = y[:, k] == y_pred
         # And tack on the mean correct the oob score
         oob_score += np.mean(matches)
     # In the specal case of being trained with ouly one output
     if self.n_outputs_ == 1:
         # Our score is just the first element
         self.oob_decision_function_ = oob_decision_function[0]
     # If we were trained with multiple outputs
     else:
         # Our score are all element in the container
         self.oob_decision_function_ = oob_decision_function
     # Now normilize this score by the number of outputs used to derive it
     self.oob_score_ = oob_score / self.n_outputs_
示例#16
0
def _triples_expectation(X):
    # calculate the exact triple words expectation
    # this will generate a (n_features, n_features * n_features)
    # matrix

    n_samples, n_features = X.shape
    X_data = X.data
    X_indices = X.indices
    X_indptr = X.indptr

    ignored_cnt = 0
    e_triples = np.zeros((n_features, n_features, n_features))

    for idx_d in xrange(n_samples):
        # get word_id and count in each document
        ids = X_indices[X_indptr[idx_d]:X_indptr[idx_d + 1]]
        cnts = X_data[X_indptr[idx_d]:X_indptr[idx_d + 1]]
        unique_ids = len(ids)
        total = cnts.sum()
        coef = 1. / (total * (total - 1.) * (total - 2.))
        # min word count for triples in a doc is 3.
        # ignore others
        if total < 3:
            ignored_cnt += 1
            continue

        for i in xrange(unique_ids):
            id_i = ids[i]
            cnt_i = cnts[i]
            for j in xrange(unique_ids):
                id_j = ids[j]
                cnt_j = cnts[j]
                for k in xrange(unique_ids):
                    id_k = ids[k]
                    cnt_k = cnts[k]
                    # case_1: i = j = k
                    if i == j and j == k:
                        if cnt_i >= 3:
                            combinations = cnt_i * (cnt_i - 1.) * (cnt_i - 2.)
                        else:
                            combinations = 0.
                    # case_2: i = j, j != k
                    elif i == j and j != k:
                        combinations = cnt_i * (cnt_i - 1.) * cnt_k
                    # case_3: j = k, i != j
                    elif j == k and i != j:
                        combinations = cnt_j * (cnt_j - 1.) * cnt_i
                    # case_4: i = k, j != k
                    elif i == k and j != k:
                        combinations = cnt_i * (cnt_i - 1.) * cnt_j
                    # case_5: i != k, j != k, i != k
                    else:
                        combinations = cnt_i * cnt_j * cnt_k
                    e_triples[id_i, id_j, id_k] += (coef * combinations)
    e_triples /= (n_samples - ignored_cnt)
    return e_triples
示例#17
0
    def _derivativenorm(self):
        """Compute the derivative of the norm
        Returns
        -------
        derivative : numpy array, shape (m_parameters,)
        """

        w2 = np.reshape(self.w,
                        (self.n_features, self.d, self.D, self.D, self.mu))
        derivative = np.zeros(
            (self.n_features, self.d, self.D, self.D, self.mu),
            dtype=np.complex128)

        tmp = np.zeros((self.n_features, self.D * self.D), dtype=np.complex128)
        tmp2 = np.zeros((self.n_features, self.D * self.D),
                        dtype=np.complex128)

        tmp[0, :] = np.einsum('ijk,ilk->jl', w2[0, :, 0, :, :],
                              np.conj(w2[0, :,
                                         0, :, :])).reshape(self.D * self.D)
        for i in xrange(1, self.n_features - 1):
            newtmp = np.einsum('pimj,pklj->ikml', w2[i, :, :, :, :],
                               np.conj(w2[i, :, :, :, :])).reshape(
                                   (self.D * self.D, self.D * self.D))
            tmp[i, :] = np.dot(tmp[i - 1, :], newtmp)
        newtmp = np.einsum('ijk,ilk->jl', w2[self.n_features - 1, :, :, 0, :],
                           np.conj(w2[self.n_features - 1, :, :,
                                      0, :])).reshape(self.D * self.D)
        mpscontracted = np.inner(tmp[self.n_features - 2, :], newtmp)
        tmp[self.n_features - 1, :] = mpscontracted

        tmp2[self.n_features - 1, :] = newtmp
        for i in xrange(self.n_features - 2, -1, -1):
            newtmp = np.einsum('pimj,pklj->ikml', w2[i, :, :, :, :],
                               np.conj(w2[i, :, :, :, :])).reshape(
                                   (self.D * self.D, self.D * self.D))
            tmp2[i, :] = np.dot(newtmp, tmp2[i + 1, :])
        newtmp = np.einsum('ijk,ilk->jl', w2[0, :, 0, :, :],
                           np.conj(w2[0, :, 0, :, :])).reshape(self.D * self.D)
        tmp2[0, :] = np.inner(newtmp, tmp2[1, :])

        for j in xrange(self.d):
            derivative[0, j, 0, :, :] = 2 * np.einsum(
                'ij,il->lj', w2[0, j, 0, :, :], tmp2[1, :].reshape(
                    self.D, self.D))
            derivative[self.n_features-1,j,:,0,:]=\
            2*np.einsum('ij,il->lj',w2[self.n_features-1,j,:,0,:],
                            tmp[self.n_features-2,:].reshape(self.D,self.D))
        for i in xrange(1, self.n_features - 1):
            temp1 = tmp[i - 1, :].reshape(self.D, self.D)
            temp2 = tmp2[i + 1, :].reshape(self.D, self.D)
            for j in xrange(self.d):
                derivative[i, j, :, :, :] = 2 * np.einsum(
                    'ikm,ij,kl->jlm', w2[i, j, :, :, :], temp1, temp2)

        return derivative.reshape(self.m_parameters)
示例#18
0
def kmeans(input_file, n_clusters, Output):
    lvltrace.lvltrace("LVLEntree dans kmeans unsupervised")
    ncol=tools.file_col_coma(input_file)
    data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
    X = data[:,1:]
    y = data[:,0]
    sample_size, n_features = X.shape
    k_means=cluster.KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
    k_means.fit(X)
    reduced_data = k_means.transform(X)
    values = k_means.cluster_centers_.squeeze()
    labels = k_means.labels_
    k_means_cluster_centers = k_means.cluster_centers_
    print "#########################################################################################################\n"
    #print y
    #print labels
    print "K-MEANS\n"
    print('homogeneity_score: %f'%metrics.homogeneity_score(y, labels))
    print('completeness_score: %f'%metrics.completeness_score(y, labels))
    print('v_measure_score: %f'%metrics.v_measure_score(y, labels))
    print('adjusted_rand_score: %f'%metrics.adjusted_rand_score(y, labels))
    print('adjusted_mutual_info_score: %f'%metrics.adjusted_mutual_info_score(y,  labels))
    print('silhouette_score: %f'%metrics.silhouette_score(X, labels, metric='euclidean', sample_size=sample_size))
    print('\n')
    print "#########################################################################################################\n"
    results = Output+"kmeans_scores.txt"
    file = open(results, "w")
    file.write("K-Means Scores\n")
    file.write("Homogeneity Score: %f\n"%metrics.homogeneity_score(y, labels))
    file.write("Completeness Score: %f\n"%metrics.completeness_score(y, labels))
    file.write("V-Measure: %f\n"%metrics.v_measure_score(y, labels))
    file.write("The adjusted Rand index: %f\n"%metrics.adjusted_rand_score(y, labels))
    file.write("Adjusted Mutual Information: %f\n"%metrics.adjusted_mutual_info_score(y,  labels))
    file.write("Silhouette Score: %f\n"%metrics.silhouette_score(X, labels, metric='euclidean', sample_size=sample_size))
    file.write("\n")
    file.write("True Value, Cluster numbers, Iteration\n")
    for n in xrange(len(y)):
        file.write("%f, %f, %i\n"%(y[n],labels[n],(n+1)))
    file.close()
    import pylab as pl
    from itertools import cycle
    # plot the results along with the labels
    k_means_cluster_centers = k_means.cluster_centers_
    fig, ax = plt.subplots()
    im=ax.scatter(X[:, 0], X[:, 1], c=labels, marker='.')
    for k in xrange(n_clusters):
        my_members = labels == k
        cluster_center = k_means_cluster_centers[k]
        ax.plot(cluster_center[0], cluster_center[1], 'w', color='b',
                marker='x', markersize=6)
    fig.colorbar(im)
    plt.title("Number of clusters: %i"%n_clusters)
    save = Output + "kmeans.png"
    plt.savefig(save)
    lvltrace.lvltrace("LVLsortie dans kmeans unsupervised")
示例#19
0
    def _derivative(self, x):
        """Compute the derivative of P(x)
        Parameters
        ----------
        x : numpy array, shape (n_features,)
            One configuration
        Returns
        -------
        derivative : numpy array, shape (m_parameters,)
        """
        w2=np.reshape(self.w,(self.n_features,self.d,self.D,self.D,self.mu))
        derivative=np.zeros((self.n_features,self.d,self.D,self.D,self.mu),dtype=np.float64)
        
        #Store intermediate tensor contractions for the derivatives: 
        #left to right and right to left
        #tmp stores the contraction of the first i+1 tensors from the left 
        #in tmp[i,:,:], tmp2 the remaining tensors on the right
        #the mps contracted is the remaining contraction tmp[i-1]w[i]tmp2[i+1]
        tmp=np.zeros((self.n_features,self.D*self.D),dtype=np.float64)
        tmp2=np.zeros((self.n_features,self.D*self.D),dtype=np.float64)
        tmp[0,:] = np.einsum('ij,kj->ik',w2[0,x[0],0,:,:],
                        np.conjugate(w2[0,x[0],0,:,:])).reshape(self.D*self.D)
        for i in xrange(1,self.n_features-1):
            newtmp = np.einsum('imj,klj->ikml',w2[i,x[i],:,:,:],
                        np.conjugate(w2[i,x[i],:,:,:])).reshape((self.D*self.D,self.D*self.D))
            tmp[i,:]=np.dot(tmp[i-1,:],newtmp)  
        newtmp = np.einsum('ij,kj->ik',w2[self.n_features-1,
                            x[self.n_features-1],:,0,:],np.conjugate(w2[self.n_features-1,
                            x[self.n_features-1],:,0,:])).reshape(self.D*self.D)
        mpscontracted=np.inner(tmp[self.n_features-2,:],newtmp)
        tmp[self.n_features-1,:]=mpscontracted
        
        
        tmp2[self.n_features-1,:]=newtmp
        for i in xrange(self.n_features-2,-1,-1):
            newtmp = np.einsum('imj,klj->ikml',w2[i,x[i],:,:,:],
                        np.conjugate(w2[i,x[i],:,:,:])).reshape((self.D*self.D,self.D*self.D))
            tmp2[i,:]=np.dot(newtmp,tmp2[i+1,:])
        newtmp=np.einsum('ij,kj->ik',w2[0,x[0],0,:,:],np.conjugate(w2[0,x[0],0,:,:])).reshape(self.D*self.D)
        tmp2[0,:]=np.inner(newtmp,tmp2[1,:])
    
        #Now for each tensor, the derivative is the contraction of the rest of the tensors
        
        derivative[0,x[0],0,:,:]=2*np.einsum('ij,il->lj',
                    w2[0,x[0],0,:,:],tmp2[1,:].reshape(self.D,self.D))
        derivative[self.n_features-1,x[self.n_features-1],:,0,:]=\
            2*np.einsum('ij,il->lj',w2[self.n_features-1,
                    x[self.n_features-1],:,0,:],tmp[self.n_features-2,:].reshape(self.D,self.D))
        for i in xrange(1,self.n_features-1):
            temp1=tmp[i-1,:].reshape(self.D,self.D)
            temp2=tmp2[i+1,:].reshape(self.D,self.D)
            derivative[i,x[i],:,:,:]=2*np.einsum('ikm,ij,kl->jlm',w2[i,x[i],:,:,:],temp1,temp2)

        return derivative.reshape(self.m_parameters)
    def _probability(self, x):
        """Unnormalized probability of one configuration P(x)
        Parameters
        ----------
        x : numpy array, shape (n_features,)
            One configuration
        Returns
        -------
        probability : float
        """

        #n_features : the number of tensor cores, i.e. length of input
        #d : physical dimension, i.e. dimension of input
        #D : bond dimension

        ###CONTRACTING THE NETWORK USING TORCH.DOT###
        #take the parameter vector and reshape it as a n_fxdxDxD, then square it
        weights_tensor = torch.tensor.reshape(
            self.w, (self.n_features, self.d, self.D, self.D),
            requires_grad=True)
        #They square the entries of the weights tensor for their calculations of the derivative later on
        #not sure if we need to square the weights if used autograd in pytorch. If not, simply replace weights_squared with weights_tensor
        #the first tensor in the network is a vector
        weights_squared = torch.square(weights_tensor[0, x[0], 0, :])
        #now contract the network, from left to right to get your probability: perform matrix vector multiplication at each step, contracting the virtual indices
        for i in xrange(1, self.n_features - 1):
            weights_squared = torch.dot(
                weights_squared,
                torch.square(weights_tensor[i, x[i], :, :]))  #MPS contraction
        #take the inner product between the built up vector (from previous contraction steps) and the end vector
        probability = torch.dot(
            weights_squared,
            torch.square(weights_tensor[self.n_features - 1,
                                        x[self.n_features - 1], :, 0]))

        return probability

        ###CONTRACTING RECURRENT NETWORK USING EINSUM####
        #take one core tensor of shape (d,D,D) and copy it n_features time, where n_features is the length of the sequence
        weights_tensor = self.core[None].repeat(n_features, 0)
        #contract the intiial bond dimension using left boundary vector and square the weights in order to ensure positive parameters
        contracting_tensor = torch.square(
            torch.einsum('i, ij -> j', self.left_boundary,
                         weights_tensor[0, x[0], :, :]))
        #contract the network from left to right, performing a series of matrix-vector multiplications
        for i in xrange(1, n_features):
            torch.einsum('i, ij -> j', contracting_tensor,
                         torch.square(weights_tensor[i, x[i], :, :]))
        #contract the final bond dimension using the right boundary vector
        probability = torch.einsum('i, i ->', contracting_tensor,
                                   self.right_boundary)

        return probability
示例#21
0
 def pool(self, I):
     n_cols, n_rows = I.shape[1:]
     y_stride, x_stride = self.stride
     blocks = np.zeros(I.shape)
     for r in xrange(int(np.ceil(float(n_rows) / y_stride))):
         rows = range(r * y_stride, (r + 1) * y_stride)
         for c in xrange(int(np.ceil(float(n_cols) / x_stride))):
             cols = range(c * x_stride, (c + 1) * x_stride)
             block_val = I[:, rows, cols].sum()                
             block_val = np.swapaxes(np.swapaxes(block_val, 0, 1), 1, 2)
             blocks[:, rows, cols] = block_val
     return blocks
示例#22
0
 def pool(self, I):
     n_cols, n_rows = I.shape[1:]
     y_stride, x_stride = self.stride
     blocks = np.zeros(I.shape)
     for r in xrange(int(np.ceil(float(n_rows) / y_stride))):
         rows = range(r * y_stride, (r + 1) * y_stride)
         for c in xrange(int(np.ceil(float(n_cols) / x_stride))):
             cols = range(c * x_stride, (c + 1) * x_stride)
             block_val = I[:, rows, cols].sum()
             block_val = np.swapaxes(np.swapaxes(block_val, 0, 1), 1, 2)
             blocks[:, rows, cols] = block_val
     return blocks
def test_sparse_dot():
    for data in (bin_dense, bin_csr):
        K = linear_kernel(data)
        K2 = np.zeros_like(K)
        ds = get_dataset(data)

        for i in xrange(data.shape[0]):
            for j in xrange(i, data.shape[0]):
                K2[i, j] = sparse_dot(ds, i, j)
                K2[j, i] = K[i, j]

    assert_array_almost_equal(K, K2)
示例#24
0
def test_sparse_dot():
    for data in (bin_dense, bin_csr):
        K = linear_kernel(data)
        K2 = np.zeros_like(K)
        ds = get_dataset(data)

        for i in xrange(data.shape[0]):
            for j in xrange(i, data.shape[0]):
                K2[i, j] = sparse_dot(ds, i, j)
                K2[j, i] = K[i, j]

    assert_array_almost_equal(K, K2)
示例#25
0
def test_create_3d_rank_1_tensor_symmetric():
    rng = np.random.RandomState(0)
    dim = rng.randint(20, 25)
    v = rng.rand(dim)
    tensor = rank_1_tensor_3d(v, v, v)

    for i in xrange(dim):
        for j in xrange(i, dim):
            for k in xrange(j, dim):
                true_val = v[i] * v[j] * v[k]
                # check all permutation have same values
                for perm in permutations([i, j, k]):
                    tensor_val = tensor[perm[0], (dim * perm[2]) + perm[1]]
                    assert_almost_equal(true_val, tensor_val)
示例#26
0
文件: cdbn.py 项目: dfdx/cdbn
 def _net_probability(self, V):
     """
     Computes pseudo probability of the current network
     """
     v_energy = 0
     for k in xrange(self.n_hiddens):
         v_energy -= (self.hiddens[k] * convolve(V, self.weights[k])).sum()
     h_int_energy = 0
     for k in xrange(self.n_hiddens):
         h_int_energy -= self.h_intercepts[k].sum() * self.hiddens[k].sum()
     v_int_energy = - self.v_intercept.sum() * V.sum()
     energy = v_energy + h_int_energy + v_int_energy
     print(energy)
     return logistic_sigmoid(- energy)
示例#27
0
文件: cdbn.py 项目: JunLuo-BIT/cdbn
 def _net_probability(self, V):
     """
     Computes pseudo probability of the current network
     """
     v_energy = 0
     for k in xrange(self.n_hiddens):
         v_energy -= (self.hiddens[k] * convolve(V, self.weights[k])).sum()
     h_int_energy = 0
     for k in xrange(self.n_hiddens):
         h_int_energy -= self.h_intercepts[k].sum() * self.hiddens[k].sum()
     v_int_energy = -self.v_intercept.sum() * V.sum()
     energy = v_energy + h_int_energy + v_int_energy
     print(energy)
     return logistic_sigmoid(-energy)
示例#28
0
def _update_gamma(X, expElogbeta, alpha, rng, max_iters,
                  meanchangethresh, cal_delta):
    """
    E-step: update latent variable gamma
    """

    n_docs, n_vocabs = X.shape
    n_topics = expElogbeta.shape[0]

    # gamma is non-normailzed topic distribution
    gamma = rng.gamma(100., 1. / 100., (n_docs, n_topics))
    expElogtheta = np.exp(_dirichlet_expectation(gamma))
    # diff on component (only calculate it when keep_comp_change is True)
    delta_component = np.zeros(expElogbeta.shape) if cal_delta else None

    X_data = X.data
    X_indices = X.indices
    X_indptr = X.indptr

    for d in xrange(n_docs):
        ids = X_indices[X_indptr[d]:X_indptr[d + 1]]
        cnts = X_data[X_indptr[d]:X_indptr[d + 1]]
        gammad = gamma[d, :]
        expElogthetad = expElogtheta[d, :]
        expElogbetad = expElogbeta[:, ids]
        # The optimal phi_{dwk} is proportional to
        # expElogthetad_k * expElogbetad_w. phinorm is the normalizer.
        phinorm = np.dot(expElogthetad, expElogbetad) + 1e-100

        # Iterate between gamma and phi until convergence
        for it in xrange(0, max_iters):
            lastgamma = gammad
            # We represent phi implicitly to save memory and time.
            # Substituting the value of the optimal phi back into
            # the update for gamma gives this update. Cf. Lee&Seung 2001.
            gammad = alpha + expElogthetad * \
                np.dot(cnts / phinorm, expElogbetad.T)
            expElogthetad = np.exp(_dirichlet_expectation(gammad))
            phinorm = np.dot(expElogthetad, expElogbetad) + 1e-100

            meanchange = np.mean(abs(gammad - lastgamma))
            if (meanchange < meanchangethresh):
                break
        gamma[d, :] = gammad
        # Contribution of document d to the expected sufficient
        # statistics for the M step.
        if cal_delta:
            delta_component[:, ids] += np.outer(expElogthetad, cnts / phinorm)

    return (gamma, delta_component)
示例#29
0
文件: lda.py 项目: emgong/topicModels
def _update_gamma(X, expElogbeta, alpha, rng, max_iters,
                  meanchangethresh, cal_delta):
    """
    E-step: update latent variable gamma
    """

    n_docs, n_vocabs = X.shape
    n_topics = expElogbeta.shape[0]

    # gamma is non-normailzed topic distribution
    gamma = rng.gamma(100., 1. / 100., (n_docs, n_topics))
    expElogtheta = np.exp(_dirichlet_expectation(gamma))
    # diff on component (only calculate it when keep_comp_change is True)
    delta_component = np.zeros(expElogbeta.shape) if cal_delta else None

    X_data = X.data
    X_indices = X.indices
    X_indptr = X.indptr

    for d in xrange(n_docs):
        ids = X_indices[X_indptr[d]:X_indptr[d + 1]]
        cnts = X_data[X_indptr[d]:X_indptr[d + 1]]
        gammad = gamma[d, :]
        expElogthetad = expElogtheta[d, :]
        expElogbetad = expElogbeta[:, ids]
        # The optimal phi_{dwk} is proportional to
        # expElogthetad_k * expElogbetad_w. phinorm is the normalizer.
        phinorm = np.dot(expElogthetad, expElogbetad) + 1e-100

        # Iterate between gamma and phi until convergence
        for it in xrange(0, max_iters):
            lastgamma = gammad
            # We represent phi implicitly to save memory and time.
            # Substituting the value of the optimal phi back into
            # the update for gamma gives this update. Cf. Lee&Seung 2001.
            gammad = alpha + expElogthetad * \
                np.dot(cnts / phinorm, expElogbetad.T)
            expElogthetad = np.exp(_dirichlet_expectation(gammad))
            phinorm = np.dot(expElogthetad, expElogbetad) + 1e-100

            meanchange = np.mean(abs(gammad - lastgamma))
            if (meanchange < meanchangethresh):
                break
        gamma[d, :] = gammad
        # Contribution of document d to the expected sufficient
        # statistics for the M step.
        if cal_delta:
            delta_component[:, ids] += np.outer(expElogthetad, cnts / phinorm)

    return (gamma, delta_component)
示例#30
0
    def _probability(self, x):
        """Unnormalized probability of one configuration P(x)
        Parameters
        ----------
        x : numpy array, shape (n_features,)
            One configuration
        Returns
        -------
        probability : float
        """

        #n_features : the number of tensor cores, i.e. length of input
        #d : physical dimension, i.e. dimension of input
        #D : bond dimension

        ###CONTRACTING THE NETWORK USING TORCH.DOT###
        #reshape weights of model to be a fourth order tensor (n_fxdxDxD)
        weights_tensor = torch.reshape(self.w, (self.n_features, self.d, self.D, self.D), requires_grad=True)
        weights_tensor = torch.(self.core, (self.n_features, self.d, self.D, self.D), requires_grad=True)
        weights_tensor = self.core[None].repeat(n_features, 0)

        
        #initialize first tensor to be a vector
        contracting_tensor = weights_tensor[0, x[0], :, :] #First tensor
        #go through and contract the network from left to right, perform vector matrix multiplication 
        for i in xrange(1, self.n_features-1):
            contracting_tensor = torch.dot(contracting_tensor, weights_tensor[i, x[i], :, :]) #MPS contraction  
        probability = torch.dot(contracting_tensor,
                        weights_tensor[self.n_features-1, x[self.n_features-1], :, 0])**2
        return probability      

        ###CONTRACTING NON-RECURRENT NETWORK USING EINSUM####
        #reshape weights of model to be a fourth order tensor (n_fxdxDxD)
        weights_tensor = torch.tensor.reshape(self.w, (self.n_features, self.d, self.D, self.D), requires_grad=True)
        #first tensor
        contracting_tensor = weights_tensor[0, x[0], 0, :]
        for i in range(1, self.n_features-1):
            contracting_tensor = torch.einsum('i, ij -> j', contracting_tensor, weights_tensor[i, x[i], :, :])
        probability = torch.einsum('i, i ->', contracting_tensor, weights_tensor[self.n_features-1, x[self.n_features-1], :, 0])**2

        return probability

        ###CONTRACTING RECURRENT NETWORK WITH EINSUM###
        weights_tensor = self.core[None].repeat(n_features, 0)
        #perform left boundary contraction
        contracting_tensor = torch.einsum('i, ij -> j', self.left_boundary, weights_tensor[0, x[0], :, :])
        #contract the network
        for i in xrange(1, n_features):
            contracting_tensor = torch.einsum('i, ij -> j', contracting_tensor, weights_tensor[i, x[i], :, :])
        probability = torch.einsum('i,i -> ', contracting_tensor, self.right_boundary)**2
示例#31
0
    def predict_proba(self, X):
        """Predict class probabilities for X.

        The predicted class probabilities of an input sample is computed as
        the mean predicted class probabilities of the trees in the forest.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        p : array of shape = [n_samples, n_classes], or a list of n_outputs
            such arrays if n_outputs > 1.
            The class probabilities of the input samples. Classes are
            ordered by arithmetical order.
        """
        # Check data
        #if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
        #X = array2d(X, dtype=DTYPE)

        # Assign chunk of trees to jobs
        n_jobs, n_trees, starts = _partition_trees(self)

        # Parallel loop
        all_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
            delayed(_parallel_predict_proba)(
                self.estimators_[starts[i]:starts[i + 1]], X, self.n_classes_,
                self.n_outputs_) for i in range(n_jobs))

        # Reduce
        proba = all_proba[0]

        if self.n_outputs_ == 1:
            for j in xrange(1, len(all_proba)):
                proba += all_proba[j]

            proba /= self.n_estimators

        else:
            for j in xrange(1, len(all_proba)):
                for k in xrange(self.n_outputs_):
                    proba[k] += all_proba[j][k]

            for k in xrange(self.n_outputs_):
                proba[k] /= self.n_estimators

        return proba
示例#32
0
    def fit(self, X):
        """Fit SGVB to the data

        Parameters
        ----------
        X : array-like, shape (N, n_features)
            The data that the SGVB needs to fit on

        Returns
        -------
        list_lowerbound : list of int
        list of lowerbound over time
        """
        #X, = check_arrays(X, sparse_format='csr', dtype=np.float)
        X = check_array(X)
        [N, dimX] = X.shape
        rng = check_random_state(self.random_state)

        self._initParams(dimX, rng)
        list_lowerbound = np.array([])

        n_batches = int(np.ceil(float(N) / self.batch_size))
        batch_slices = list(
            gen_even_slices(n_batches * self.batch_size, n_batches, N))

        if self.verbose:
            print "Initializing gradients for AdaGrad"
        for i in xrange(10):
            self._initH(X[batch_slices[i]], rng)

        begin = time.time()
        for iteration in xrange(1, self.n_iter + 1):
            iteration_lowerbound = 0

            for batch_slice in batch_slices:
                lowerbound = self._updateParams(X[batch_slice], N, rng)
                iteration_lowerbound += lowerbound

            if self.verbose:
                end = time.time()
                print(
                    "[%s] Iteration %d, lower bound = %.2f,"
                    " time = %.2fs" % (self.__class__.__name__, iteration,
                                       iteration_lowerbound / N, end - begin))
                begin = end

            list_lowerbound = np.append(list_lowerbound,
                                        iteration_lowerbound / N)
        return list_lowerbound
示例#33
0
def _fix_connectivity(X, connectivity, affinity):
    """
    Fixes the connectivity matrix

        - copies it
        - makes it symmetric
        - converts it to LIL if necessary
        - completes it if necessary
    """
    n_samples = X.shape[0]
    if (connectivity.shape[0] != n_samples or
            connectivity.shape[1] != n_samples):
        raise ValueError('Wrong shape for connectivity matrix: %s '
                         'when X is %s' % (connectivity.shape, X.shape))

    # Make the connectivity matrix symmetric:
    connectivity = connectivity + connectivity.T

    # Convert connectivity matrix to LIL
    if not sparse.isspmatrix_lil(connectivity):
        if not sparse.isspmatrix(connectivity):
            connectivity = sparse.lil_matrix(connectivity)
        else:
            connectivity = connectivity.tolil()

    # Compute the number of nodes
    n_components, labels = connected_components(connectivity)

    if n_components > 1:
        warnings.warn("the number of connected components of the "
                      "connectivity matrix is %d > 1. Completing it to avoid "
                      "stopping the tree early." % n_components,
                      stacklevel=2)
        # XXX: Can we do without completing the matrix?
        for i in xrange(n_components):
            idx_i = np.where(labels == i)[0]
            Xi = X[idx_i]
            for j in xrange(i):
                idx_j = np.where(labels == j)[0]
                Xj = X[idx_j]
                D = pairwise_distances(Xi, Xj, metric=affinity)
                ii, jj = np.where(D == np.min(D))
                ii = ii[0]
                jj = jj[0]
                connectivity[idx_i[ii], idx_j[jj]] = True
                connectivity[idx_j[jj], idx_i[ii]] = True

    return connectivity, n_components
    def fit(self, X):
        """Fit SGVB to the data

        Parameters
        ----------
        X : array-like, shape (N, n_features)
            The data that the SGVB needs to fit on

        Returns
        -------
        list_lowerbound : list of int
        list of lowerbound over time
        """
        X, = check_arrays(X, sparse_format='csr', dtype=np.float)
        [N, dimX] = X.shape
        rng = check_random_state(self.random_state)

        self._initParams(dimX, rng)
        list_lowerbound = np.array([])

        n_batches = int(np.ceil(float(N) / self.batch_size))
        batch_slices = list(gen_even_slices(n_batches * self.batch_size,
                                            n_batches, N))

        if self.verbose:
            print "Initializing gradients for AdaGrad"
        for i in xrange(10):
            self._initH(X[batch_slices[i]], rng)

        begin = time.time()
        for iteration in xrange(1, self.n_iter + 1):
            iteration_lowerbound = 0

            for batch_slice in batch_slices:
                lowerbound = self._updateParams(X[batch_slice], N, rng)
                iteration_lowerbound += lowerbound

            if self.verbose:
                end = time.time()
                print("[%s] Iteration %d, lower bound = %.2f,"
                      " time = %.2fs"
                      % (self.__class__.__name__, iteration,
                         iteration_lowerbound / N, end - begin))
                begin = end

            list_lowerbound = np.append(
                list_lowerbound, iteration_lowerbound / N)
        return list_lowerbound
示例#35
0
    def _fit(self, X, Y):
        n_samples, n_features = X.shape
        rng = self._get_random_state()

        if self.eta is None or self.eta == 'auto':
            self.eta = get_auto_step_size(
                    X, self.alpha, self.loss, self.gamma)
            if self.verbose > 0:
                print("Auto stepsize: %s" % self.eta)

        loss = self._get_loss()
        penalty = self._get_penalty()
        n_vectors = Y.shape[1]
        n_inner = int(self.n_inner * n_samples)
        ds = get_dataset(X, order="c")

        self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)
        self.coef_scale_ = np.ones(n_vectors, dtype=np.float64)
        grad = np.zeros((n_vectors, n_samples), dtype=np.float64)

        for i in xrange(n_vectors):
            y = Y[:, i]

            _sag_fit(self, ds, y, self.coef_[i], self.coef_scale_[i:], grad[i],
                     self.eta, self.alpha, self.beta, loss, penalty,
                     self.max_iter, n_inner, self.tol, self.verbose,
                     self.callback, rng, self.is_saga)

        return self
示例#36
0
def _check_predict_proba(clf, X, y):
    proba = clf.predict_proba(X)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # We know that we can have division by zero
        log_proba = clf.predict_log_proba(X)

    y = np.atleast_1d(y)
    if y.ndim == 1:
        y = np.reshape(y, (-1, 1))

    n_outputs = y.shape[1]
    n_samples = len(X)

    if n_outputs == 1:
        proba = [proba]
        log_proba = [log_proba]

    for k in xrange(n_outputs):
        assert_equal(proba[k].shape[0], n_samples)
        assert_equal(proba[k].shape[1], len(np.unique(y[:, k])))
        assert_array_equal(proba[k].sum(axis=1), np.ones(len(X)))
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            # We know that we can have division by zero
            assert_array_equal(np.log(proba[k]), log_proba[k])
示例#37
0
 def _mean_hiddens(self, v):
     h = np.zeros((self.n_hiddens,) + self.h_shape)
     for k in xrange(self.n_hiddens):
         # print('_mean_hiddens (loop): %s' % (v.shape,))
         h[k] = np.exp(conv2(v, self._ff(self.W[k]), mode='valid') + self.c[k])
     h_mean = h / (1 + self.pool(h))
     return h_mean
示例#38
0
文件: sag.py 项目: RTHMaK/sebabulba
    def _fit(self, X, Y):
        n_samples, n_features = X.shape
        rng = self._get_random_state()

        if self.eta is None or self.eta == 'auto':
            self.eta = get_auto_step_size(X, self.alpha, self.loss, self.gamma)
            if self.verbose > 0:
                print("Auto stepsize: %s" % self.eta)

        loss = self._get_loss()
        penalty = self._get_penalty()
        n_vectors = Y.shape[1]
        n_inner = int(self.n_inner * n_samples)
        ds = get_dataset(X, order="c")

        self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)
        self.coef_scale_ = np.ones(n_vectors, dtype=np.float64)
        grad = np.zeros((n_vectors, n_samples), dtype=np.float64)

        for i in xrange(n_vectors):
            y = Y[:, i]

            _sag_fit(self, ds, y, self.coef_[i], self.coef_scale_[i:], grad[i],
                     self.eta, self.alpha, self.beta, loss, penalty,
                     self.max_iter, n_inner, self.tol, self.verbose,
                     self.callback, rng, self.is_saga)

        return self
示例#39
0
def test_fortran_get_column():
    ind = np.arange(X.shape[0])
    for j in xrange(X.shape[1]):
        indices, data, n_nz = fds.get_column(j)
        assert_array_equal(indices, ind)
        assert_array_equal(data, X[:, j])
        assert_equal(n_nz, X.shape[0])
示例#40
0
    def predict_log_proba(self, X):
        """Predict class log-probabilities of the input samples X.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        p : array of shape = [n_samples, n_classes], or a list of n_outputs
            such arrays if n_outputs > 1.
            The class log-probabilities of the input samples. The order of the
            classes corresponds to that in the attribute `classes_`.
        """
        proba = self.predict_proba(X)

        if self.n_outputs_ == 1:
            return np.log(proba)

        else:
            for k in xrange(self.n_outputs_):
                proba[k] = np.log(proba[k])

            return proba
示例#41
0
def _make_nn_regression(n_samples=100, n_features=100, n_informative=10,
                        shuffle=True, random_state=None):

    generator = check_random_state(random_state)

    row = np.repeat(np.arange(n_samples), n_informative)
    col = np.zeros(n_samples * n_informative, dtype=np.int32)
    data = generator.rand(n_samples * n_informative)

    n = 0
    ind = np.arange(n_features)
    for i in xrange(n_samples):
        generator.shuffle(ind)
        col[n:n+n_informative] = ind[:n_informative]
        n += n_informative

    X = sp.coo_matrix((data, (row, col)), shape=(n_samples, n_features))
    X = X.tocsr()

    # Generate a ground truth model with only n_informative features being non
    # zeros (the other features are not correlated to y and should be ignored
    # by a sparsifying regularizers such as L1 or elastic net)
    ground_truth = np.zeros(n_features)
    v = generator.rand(n_informative)
    v += np.min(v)
    ground_truth[:n_informative] = 100 * v
    y = safe_sparse_dot(X, ground_truth)


    # Randomly permute samples and features
    if shuffle:
        X, y = shuffle_func(X, y, random_state=generator)

    return X, y, ground_truth
示例#42
0
def test_cooccurrence_expectation():
    rng = np.random.RandomState(0)

    n_features = 100
    n_samples = rng.randint(100, 200)
    doc_word_mtx = rng.randint(0, 3, size=(n_samples, n_features)).astype('float')

    word_cnts = doc_word_mtx.sum(axis=1).astype('float')
    min_count = int(word_cnts.min() + 1)
    mask = (word_cnts >= min_count)

    result = np.zeros((n_features, n_features))
    for i in xrange(n_samples):
        cnt = word_cnts[i]
        if cnt < min_count:
            continue
        doc_i = doc_word_mtx[i, :]
        result_i = (doc_i * doc_i[:, np.newaxis]) - np.diag(doc_i)
        result_i /= cnt * (cnt - 1)
        result += result_i
    result /= mask.sum()

    e2, ignored_cnt = cooccurrence_expectation(
        doc_word_mtx, min_words=min_count)

    e2_dense = e2.toarray()
    assert_greater(ignored_cnt, 0)
    assert_equal(mask.sum(), n_samples - ignored_cnt)
    assert_array_almost_equal(result, e2_dense)
    # cooccurrence should be symmertic
    assert_array_almost_equal(result, e2_dense.T)
    assert_true(np.all(e2_dense >= 0.))
示例#43
0
    def predict_log_proba(self, X):
        """Predict class log-probabilities of the input samples X.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        p : array of shape = [n_samples, n_classes], or a list of n_outputs
            such arrays if n_outputs > 1.
            The class log-probabilities of the input samples. Classes are
            ordered by arithmetical order.
        """
        proba = self.predict_proba(X)

        if self.n_outputs_ == 1:
            return np.log(proba)

        else:
            for k in xrange(self.n_outputs_):
                proba[k] = np.log(proba[k])

            return proba
示例#44
0
    def _fit(self, X, Y):
        n_samples, n_features = X.shape
        n_vectors = Y.shape[1]

        ds = get_dataset(X, order="c")
        self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64)
        self.dual_coef_ = np.zeros((n_vectors, n_samples), dtype=np.float64)

        alpha1 = self.l1_ratio * self.alpha
        alpha2 = (1 - self.l1_ratio) * self.alpha

        if self.loss == "squared_hinge":
            # For consistency with the rest of lightning.
            alpha1 *= 0.5
            alpha2 *= 0.5

        tol = self.tol
        n_calls = n_samples if self.n_calls is None else self.n_calls
        rng = check_random_state(self.random_state)
        loss = self._get_loss()

        for i in xrange(n_vectors):
            y = Y[:, i]

            if self.l1_ratio == 1.0:
                # Prox-SDCA needs a strongly convex regularizer so adds some
                # L2 penalty (see paper).
                alpha2 = self._get_alpha2_lasso(y, alpha1)
                tol = self.tol * 0.5

            _prox_sdca_fit(self, ds, y, self.coef_[i], self.dual_coef_[i],
                           alpha1, alpha2, loss, self.gamma, self.max_iter,
                           tol, self.callback, n_calls, self.verbose, rng)

        return self
示例#45
0
def test_knn_version_consistency():
    if not have_flann:
        raise SkipTest("No flann, so skipping knn tests.")
    if not have_accel:
        raise SkipTest("No skl-groups-accel, so skipping version consistency.")

    n = 20
    for dim in [1, 7]:
        np.random.seed(47)
        bags = Features([np.random.randn(np.random.randint(30, 100), dim)
                         for _ in xrange(n)])

        div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8')
        Ks = (3, 4)
        get_est = partial(KNNDivergenceEstimator, div_funcs=div_funcs, Ks=Ks)
        results = {}
        for version in ('fast', 'slow', 'best'):
            est = get_est(version=version)
            results[version] = res = est.fit_transform(bags)
            assert res.shape == (len(div_funcs), len(Ks), n, n)
            assert np.all(np.isfinite(res))

        for df, fast, slow in zip(div_funcs, results['fast'], results['slow']):
            assert_array_almost_equal(
                fast, slow, decimal=1 if df == 'js' else 5,
                err_msg="({}, dim {})".format(df, dim))
            # TODO: debug JS differences

        est = get_est(version='fast', n_jobs=-1)
        res = est.fit_transform(bags)
        assert np.all(results['fast'] == res)

        est = get_est(version='slow', n_jobs=-1)
        res = est.fit_transform(bags)
        assert np.all(results['slow'] == res)
示例#46
0
def test_knn_memory():
    if not have_flann:
        raise SkipTest("No flann, so skipping knn tests.")

    dim = 3
    n = 20
    np.random.seed(47)
    bags = Features([np.random.randn(np.random.randint(30, 100), dim)
                     for _ in xrange(n)])

    tdir = tempfile.mkdtemp()
    div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8')
    Ks = (3, 4)
    est = KNNDivergenceEstimator(div_funcs=div_funcs, Ks=Ks, memory=tdir)
    res1 = est.fit_transform(bags)

    with LogCapture('skl_groups.divergences.knn', level=logging.INFO) as l:
        res2 = est.transform(bags)
        assert len(l.records) == 0
    assert np.all(res1 == res2)

    with LogCapture('skl_groups.divergences.knn', level=logging.INFO) as l:
        res3 = est.fit_transform(bags)
        for r in l.records:
            assert not r.message.startswith("Getting divergences")
    assert np.all(res1 == res3)
示例#47
0
def test_contiguous_get_row():
    ind = np.arange(X.shape[1])
    for i in xrange(X.shape[0]):
        indices, data, n_nz = cds.get_row(i)
        assert_array_equal(indices, ind)
        assert_array_equal(data, X[i])
        assert_equal(n_nz, X.shape[1])
示例#48
0
    def _fit_binary(self, K, y, rs):
        n_samples = K.shape[0]
        coef = np.zeros(n_samples)
        if n_samples < 1000:
            sv = np.ones(n_samples, dtype=bool)
        else:
            sv = np.zeros(n_samples, dtype=bool)
            sv[:1000] = True
            rs.shuffle(sv)

        for t in xrange(1, self.max_iter + 1):
            if self.verbose:
                print("Iteration", t, "#SV=", np.sum(sv))

            K_sv = K[sv][:, sv]
            I = np.diag(self.alpha * np.ones(K_sv.shape[0]))

            coef_sv = self._solve(K_sv + I, y[sv])

            coef *= 0
            coef[sv] = coef_sv
            pred = np.dot(K, coef)
            errors = 1 - y * pred
            last_sv = sv
            sv = errors > 0

            if np.array_equal(last_sv, sv):
                if self.verbose:
                    print("Converged at iteration", t)
                break

        return coef
示例#49
0
def test_whitening_triples_expectation_simple():
    # TODO: for debug. delete it later
    rng = np.random.RandomState(4)

    doc_word_mtx = np.array([
        [2, 3, 0, 1],
        [3, 4, 5, 7],
        [1, 4, 6, 7],
        [5, 5, 5, 5],
        [1, 4, 7, 10],
    ])

    n_components = 2
    doc_word_mtx = sp.csr_matrix(doc_word_mtx)

    # use random matrix as whitening matrix
    W = rng.rand(4, 2)

    e3_w = whitening_triples_expectation(doc_word_mtx, 3, W)

    # compute E3(W, W, W) directly
    e3 = _triples_expectation(doc_word_mtx)
    e3_w_true = tensor_3d_prod(e3, W, W, W)
    # flatten
    e3_w_true_flatten = np.hstack([e3_w_true[:, :, i] for i in xrange(n_components)])
    #print e3
    #print e3_w_true
    #print e3_w_true_flatten
    #print e3_w
    assert_array_almost_equal(e3_w_true_flatten, e3_w)
示例#50
0
    def predict_log_proba(self, X):
        """Predict class log-probabilities of the input samples X.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        p : array of shape = [n_samples, n_classes], or a list of n_outputs
            such arrays if n_outputs > 1.
            The class log-probabilities of the input samples. Classes are
            ordered by arithmetical order.
        """
        proba = self.predict_proba(X)

        if self.n_outputs_ == 1:
            return np.log(proba)

        else:
            for k in xrange(self.n_outputs_):
                proba[k] = np.log(proba[k])

            return proba
示例#51
0
def test_basic():
    bags = [np.random.normal(5, 3, size=(np.random.randint(10, 100), 20))
            for _ in xrange(50)]
    feats = Features(bags, stack=True)

    stder = BagStandardizer()
    stdized = stder.fit_transform(bags)
    stdized.make_stacked()

    assert np.allclose(np.mean(stdized.stacked_features), 0)
    assert np.allclose(np.std(stdized.stacked_features), 1)

    first_five = stder.transform(bags[:5])
    assert first_five == stdized[:5]

    minmaxer = BagMinMaxScaler([3, 7])
    minmaxed = minmaxer.fit_transform(feats)
    minmaxed.make_stacked()
    assert np.allclose(np.min(minmaxed.stacked_features, 0), 3)
    assert np.allclose(np.max(minmaxed.stacked_features, 0), 7)

    normer = BagNormalizer('l1')
    normed = normer.fit_transform(Features(bags))
    normed.make_stacked()
    assert np.allclose(np.sum(np.abs(normed.stacked_features), 1), 1)

    class GetMean(BaseEstimator, TransformerMixin):
        def fit(self, X, y=None):
            return self
        def transform(self, X):
            return X.mean(axis=1)[None, :]
    m = BagPreprocesser(GetMean())
    assert_raises(ValueError, lambda: m.transform(bags))
def draw_nstd_ellipses_classifier_and_means(gmm,
                                            means,
                                            ax,
                                            feature_idx,
                                            n_std=3):
    color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])
    for i, (mean_pair, covar,
            color) in enumerate(zip(means, gmm._get_covars(), color_iter)):
        try:
            eigen_values, eigen_vectors = linalg.eigh(covar)
            eigen_values = np.sqrt(eigen_values)
            u = eigen_vectors[feature_idx] / linalg.norm(
                eigen_vectors[feature_idx])

            angle = np.degrees(np.arctan2(u[feature_idx + 1], u[feature_idx]))
            for k in xrange(1, n_std):
                width, height = eigen_values[
                    feature_idx] * k * 2, eigen_values[feature_idx + 1] * k * 2
                ell = mpl.patches.Ellipse(mean_pair,
                                          width,
                                          height,
                                          180 + angle,
                                          color=color)
                ell.set_clip_box(ax.bbox)
                ell.set_alpha(0.25)
                ax.add_artist(ell)
        except Exception, e:
            print 'error in make_ellipse', e
示例#53
0
文件: rbm.py 项目: stachon/binet
    def fit(self, X, y=None):
        """Fit the model to the data X.

        Parameters
        ----------
        X : {array-like, sparse matrix} shape (n_samples, n_features)
            Training data.

        Returns
        -------
        self : BernoulliRBM
            The fitted model.
        """
        self.h_samples_ *= 0
        begin = time.time()
        for self.current_epoch in xrange(self.n_iter):
            for batch_slice in generate_slices(X.shape[0], self.batch_size):
                self.partial_fit(X[batch_slice])

            if self.verbose:
                end = time.time()
                H = self.transform(X)
                R = self._mean_visibles(H)
                d = np.sqrt((op.sum((R-X)**2))/X.shape[0])
                print("[%s] Iteration %d, ReconstructionRMSE %.4f  time = %.2fs"
                      % (type(self).__name__, self.current_epoch, d, end - begin))
                begin = end

        return self
示例#54
0
    def reduce_data(self, X, y):
        if self.classifier == None:
            self.classifier = KNeighborsClassifier(n_neighbors=self.n_neighbors)
        if self.classifier.n_neighbors != self.n_neighbors:
            self.classifier.n_neighbors = self.n_neighbors

        X, y = check_arrays(X, y, sparse_format="csr")

        classes = np.unique(y)
        self.classes_ = classes

        if self.n_neighbors >= len(X):
            self.X_ = np.array(X)
            self.y_ = np.array(y)
            self.reduction_ = 0.0

        mask = np.zeros(y.size, dtype=bool)

        tmp_m = np.ones(y.size, dtype=bool)
        for i in xrange(y.size):
            tmp_m[i] = not tmp_m[i]
            self.classifier.fit(X[tmp_m], y[tmp_m])
            sample, label = X[i], y[i]

            if self.classifier.predict(sample) == [label]:
                mask[i] = not mask[i]

            tmp_m[i] = not tmp_m[i]

        self.X_ = np.asarray(X[mask])
        self.y_ = np.asarray(y[mask])
        self.reduction_ = 1.0 - float(len(self.y_)) / len(y)
        return self.X_, self.y_
示例#55
0
 def fit(self, X):
     n_batches = X.shape[0]
     w, h = self.v_shape        
     for itr in xrange(self.n_iter):
         sum_err = 0
         # print('fit')
         for vi in xrange(n_batches):
             v = X[vi].reshape(self.v_shape)
             # print('fit2: ' + str(v.shape))
             dW, db, dc = self._gradients(v)
             self._apply_gradients(dW, db, dc)
             sum_err += self._batch_err(v)
         print('Iter %d: error = %d' % (itr, sum_err))
         dc_sparse = self.lr * self.sparse_gain * \
                     (self.sparsity - self.h_mean.mean(axis=2).mean(axis=1))
         self.c = self.c + dc_sparse
示例#56
0
def test_parameter_grid():
    # Test basic properties of ParameterGrid.
    params1 = {"foo": [1, 2, 3]}
    grid1 = ParameterGrid(params1)
    assert_true(isinstance(grid1, Iterable))
    assert_true(isinstance(grid1, Sized))
    assert_equal(len(grid1), 3)
    assert_grid_iter_equals_getitem(grid1)

    params2 = {"foo": [4, 2], "bar": ["ham", "spam", "eggs"]}
    grid2 = ParameterGrid(params2)
    assert_equal(len(grid2), 6)

    # loop to assert we can iterate over the grid multiple times
    for i in xrange(2):
        # tuple + chain transforms {"a": 1, "b": 2} to ("a", 1, "b", 2)
        points = set(tuple(chain(*(sorted(p.items())))) for p in grid2)
        assert_equal(
            points,
            set(("bar", x, "foo", y)
                for x, y in product(params2["bar"], params2["foo"])))

    assert_grid_iter_equals_getitem(grid2)

    # Special case: empty grid (useful to get default estimator settings)
    empty = ParameterGrid({})
    assert_equal(len(empty), 1)
    assert_equal(list(empty), [{}])
    assert_grid_iter_equals_getitem(empty)
    assert_raises(IndexError, lambda: empty[1])

    has_empty = ParameterGrid([{'C': [1, 10]}, {}, {'C': [.5]}])
    assert_equal(len(has_empty), 4)
    assert_equal(list(has_empty), [{'C': 1}, {'C': 10}, {}, {'C': .5}])
    assert_grid_iter_equals_getitem(has_empty)
示例#57
0
 def fit(self, X):
     n_batches = X.shape[0]
     w, h = self.v_shape
     for itr in xrange(self.n_iter):
         sum_err = 0
         # print('fit')
         for vi in xrange(n_batches):
             v = X[vi].reshape(self.v_shape)
             # print('fit2: ' + str(v.shape))
             dW, db, dc = self._gradients(v)
             self._apply_gradients(dW, db, dc)
             sum_err += self._batch_err(v)
         print('Iter %d: error = %d' % (itr, sum_err))
         dc_sparse = self.lr * self.sparse_gain * \
                     (self.sparsity - self.h_mean.mean(axis=2).mean(axis=1))
         self.c = self.c + dc_sparse
def test_parameter_grid():
    """Test basic properties of ParameterGrid."""
    params1 = {"foo": [1, 2, 3]}
    grid1 = ParameterGrid(params1)
    assert_true(isinstance(grid1, Iterable))
    assert_true(isinstance(grid1, Sized))
    assert_equal(len(grid1), 3)

    params2 = {"foo": [4, 2],
               "bar": ["ham", "spam", "eggs"]}
    grid2 = ParameterGrid(params2)
    assert_equal(len(grid2), 6)

    # loop to assert we can iterate over the grid multiple times
    for i in xrange(2):
        # tuple + chain transforms {"a": 1, "b": 2} to ("a", 1, "b", 2)
        points = set(tuple(chain(*(sorted(p.items())))) for p in grid2)
        assert_equal(points,
                     set(("bar", x, "foo", y)
                         for x, y in product(params2["bar"], params2["foo"])))

    # Special case: empty grid (useful to get default estimator settings)
    empty = ParameterGrid({})
    assert_equal(len(empty), 1)
    assert_equal(list(empty), [{}])

    has_empty = ParameterGrid([{'C': [1, 10]}, {}])
    assert_equal(len(has_empty), 3)
    assert_equal(list(has_empty), [{'C': 1}, {'C': 10}, {}])
示例#59
0
def get_period(inputs, targets, thres=0.05, min_dist=2):
    """Period detection routine.

    Finds the period in *targets* by taking its autocorrelation and its first
    order difference. By using *thres* and *min_dist* parameters, it is
    possible to reduce the number of detected peaks. *targets* must be signed.

    Parameters
    ----------
    inputs : ndarray
        support of *targets*.

    targets : ndarray (signed)
        1D amplitude data to search for peaks.

    thres : float between [0., 1.]
        Normalized threshold. Only the peaks with amplitude higher than the
        threshold will be detected.

    min_dist : int
        Minimum distance between each detected peak. The peak with the highest
        amplitude is preferred to satisfy this constraint.

    Returns
    -------
    float
        a period estimation of the signal targets = f(inputs).
    """
    spikes_mean_diff = zeros(targets.shape[1])
    for i in xrange(targets.shape[1]):
        auto_corr = autocorrelation(targets[:, i])
        spikes = indexes(auto_corr, thres=(thres / max(auto_corr)),
                         min_dist=min_dist)
        spikes_mean_diff[i] = mean(diff(spikes.ravel()))
    return mean(diff(spikes.ravel())) * mean(diff(inputs.ravel()))