def build_H_matrix(X, kohonen):
    """Build the reprentation of the hits matrix from the INPUT space into
    the OUTPUT space, given the sample matrix X.
    Required is to have the same sample set as used for training, that is 
    to say the same n_features dimensions.

    Parameters
    ----------
    X: array-like CSR matrix
        Sparse representation of the samples in the INPUT space in a CSR
        matrix of shape = [n_samples, n_features]. Must be the same as 
        that used for fitting the map.

    Return
    ------
    H_matrix : ndarray
        Numpy array of shape = [n_nodes] of integers giving for each node
        the number of best matching documents
    """
    # Initialize the hits matrix as an ndarray of ints
    debug("Build_H_matrix","Starting the counting of hits...")
    debug("Build_H_matrix","Using %d documents with %d features" % X.shape)

    n_nodes = kohonen.shape[0]
    H_matrix = np.zeros(n_nodes, dtype = np.int)
    KN = Normalizer().fit_transform(kohonen)

    # Get the best matching units for all vectors
    n_samples, n_features = X.shape
    for i in xrange(n_samples):
        bmu_idx = c_get_bmu(KN,X.getrow(i))[0]
        H_matrix[bmu_idx]+=1
        print bmu_idx
    return H_matrix
def build_H_matrix(X, kohonen):
    """Build the reprentation of the hits matrix from the INPUT space into
    the OUTPUT space, given the sample matrix X.
    Required is to have the same sample set as used for training, that is 
    to say the same n_features dimensions.

    Parameters
    ----------
    X: array-like CSR matrix
        Sparse representation of the samples in the INPUT space in a CSR
        matrix of shape = [n_samples, n_features]. Must be the same as 
        that used for fitting the map.

    Return
    ------
    H_matrix : ndarray
        Numpy array of shape = [n_nodes] of integers giving for each node
        the number of best matching documents
    """
    # Initialize the hits matrix as an ndarray of ints
    debug("Build_H_matrix", "Starting the counting of hits...")
    debug("Build_H_matrix", "Using %d documents with %d features" % X.shape)

    n_nodes = kohonen.shape[0]
    H_matrix = np.zeros(n_nodes, dtype=np.int)
    KN = Normalizer().fit_transform(kohonen)

    # Get the best matching units for all vectors
    n_samples, n_features = X.shape
    for i in xrange(n_samples):
        bmu_idx = c_get_bmu(KN, X.getrow(i))[0]
        H_matrix[bmu_idx] += 1
        print bmu_idx
    return H_matrix
Пример #3
0
def batch_unit_deltas(Kohonen, kernel, X, normalized_X, kshape, Xshape, topology="rect"):
    """Loop over the CSR sample number for computing the batch training on a single
    epoch (training cycle)
    Return:
    -------
    unit_deltas: csr matrix of shape = [n_nodes, n_features]
        Gives for each node the weight update with competing samples (batch)
    quantization_error: float
        Mean of the quantization error for each item
    """
    # Initialize the dimensions
    n_samples, n_features = Xshape
    n_nodes = kshape[0] * kshape[1]

    # Initilaize the sparse matrices
    normalized_Kohonen = Normalizer().fit_transform(Kohonen)
    unit_deltas = sparse.csr_matrix((n_nodes,n_features), dtype=np.double)
    quantization_error_data = np.empty(n_samples, dtype=np.double)

    for i in np.arange(n_samples):
        # Broadcasting the size of the sample over the first axis of K
        sn = normalized_X.getrow(i)

        # Get the location of the matching node for that sample assume rect map
        location, quantization_error_data[i] = c_get_bmu(normalized_Kohonen, sn)
        bmu = _node_to_coordinate(location, kshape, topology=topology)

        # Train all units at once by unfolding the kernel
        infl =_unfold_kernel(bmu, kernel, kshape).flatten()

        # Update the single interation delta as infl[j]*(X[i]-K[j])
        matrix_repr = c_single_unit_deltas(X, Kohonen, i, infl)
        #matrix_repr = sparse.rand(n_nodes,n_features).tocsr()
        unit_deltas = unit_deltas + sparse.csr_matrix(matrix_repr,
                                                      (n_nodes,n_features))

    # Calculate the quantization error as from the distance between samples and
    # their corresponding best matching unit
    quantization_error = quantization_error_data.mean()
    return unit_deltas, quantization_error
def batch_unit_deltas(Kohonen, kernel, X, normalized_X, kshape, Xshape, topology="rect"):
    """Loop over the CSR sample number for computing the batch training on a single
    epoch (training cycle)
    Return:
    -------
    unit_deltas: csr matrix of shape = [n_nodes, n_features]
        Gives for each node the weight update with competing samples (batch)
    quantization_error: float
        Mean of the quantization error for each item
    """
    # Initialize the dimensions
    n_samples, n_features = Xshape
    n_nodes = kshape[0] * kshape[1]

    # Initilaize the sparse matrices
    normalized_Kohonen = Normalizer().fit_transform(Kohonen)
    unit_deltas = sparse.csr_matrix((n_nodes,n_features), dtype=np.double)
    quantization_error_data = np.empty(n_samples, dtype=np.double)

    for i in np.arange(n_samples):
        # Broadcasting the size of the sample over the first axis of K
        sn = normalized_X.getrow(i)

        # Get the location of the matching node for that sample assume rect map
        location, quantization_error_data[i] = c_get_bmu(normalized_Kohonen, sn)
        bmu = _node_to_coordinate(location, kshape, topology=topology)

        # Train all units at once by unfolding the kernel
        infl =_unfold_kernel(bmu, kernel, kshape).flatten()

        # Update the single interation delta as infl[j]*(X[i]-K[j])
        matrix_repr = c_single_unit_deltas(X, Kohonen, i, infl)
        #matrix_repr = sparse.rand(n_nodes,n_features).tocsr()
        unit_deltas = unit_deltas + sparse.csr_matrix(matrix_repr,
                                                      (n_nodes,n_features))

    # Calculate the quantization error as from the distance between samples and
    # their corresponding best matching unit
    quantization_error = quantization_error_data.mean()
    return unit_deltas, quantization_error
Пример #5
0
    def predict(self, X):
        """ Predict the position of the samples in the estimator Kohonen a simple
        features check is performed.

        Parameters:
        -----------
        X: matrix
            CSR matrix of shape = [n_samples, n_features]

        Return:
        -------
        Y: array
            Shape =  [n_samples]
            Index of the cluster each sample belongs to.

            """
        if hasattr(self, "K_"):
            KN = Normalizer().fit_transform(self.K_)
            return np.array([c_get_bmu(KN, X.getrow(i))[0] 
                             for i in xrange(X.shape[0])])
        else:
            debug("predict", "The Kohonen network must be trained before use")
            return None
    def predict(self, X):
        """ Predict the position of the samples in the estimator Kohonen a simple
        features check is performed.

        Parameters:
        -----------
        X: matrix
            CSR matrix of shape = [n_samples, n_features]

        Return:
        -------
        Y: array
            Shape =  [n_samples]
            Index of the cluster each sample belongs to.

            """
        if hasattr(self, "K_"):
            KN = Normalizer().fit_transform(self.K_)
            return np.array([c_get_bmu(KN, X.getrow(i))[0] 
                             for i in xrange(X.shape[0])])
        else:
            debug("predict", "The Kohonen network must be trained before use")
            return None