Exemplo n.º 1
0
def build_laplacian_regularized(X,
                                laplacian_regularization,
                                var=1.0,
                                eps=0.0,
                                k=0,
                                laplacian_normalization=""):
    """
    Function to construct a regularized Laplacian from data.

    :param X: (n x m) matrix of m-dimensional samples
    :param laplacian_regularization: regularization to add to the Laplacian (parameter gamma)
    :param var: the sigma value for the exponential function, already squared
    :param eps: threshold eps for epsilon graphs
    :param k: number of neighbours k for k-nn. If zero, use epsilon-graph
    :param laplacian_normalization: string selecting which version of the laplacian matrix to construct
                                    'unn':  unnormalized,
                                    'sym': symmetric normalization
                                    'rw':  random-walk normalization
    :return: Q (n x n ) matrix, the regularized Laplacian
    """
    # build the similarity graph W
    W = build_similarity_graph(X, var, eps, k)
    """
    Build the Laplacian L and the regularized Laplacian Q.
    Both are (n x n) matrices.
    """
    L = build_laplacian(W, laplacian_normalization)

    # compute Q
    Q = L + laplacian_regularization * np.eye(W.shape[0])

    return Q
Exemplo n.º 2
0
def soft_hfs(X,
             Y,
             c_l,
             c_u,
             laplacian_regularization=0.000001,
             var=1,
             eps=0,
             k=10,
             laplacian_normalization=""):
    #  a skeleton function to perform soft (unconstrained) HFS,
    #  needs to be completed
    #
    #  Input
    #  X:
    #      (n x m) matrix of m-dimensional samples
    #  Y:
    #      (n x 1) vector with nodes labels [1, ... , num_classes] (0 is unlabeled)
    #  c_l,c_u:
    #      coefficients for C matrix

    #
    #  Output
    #  labels:
    #      class assignments for each (n) nodes
    num_samples = np.size(X, 0)
    Cl = np.unique(Y)
    num_classes = len(Cl) - 1

    # Indices of labelled and unlabelled data
    l_idx = np.argwhere(Y != 0)[:, 0]
    u_idx = np.argwhere(Y == 0)[:, 0]

    # Build C matrix
    diagC = np.zeros((num_samples, ))
    diagC[l_idx] = c_l
    diagC[u_idx] = c_u
    C = np.diag(diagC)
    C_inv = np.diag(1 / diagC)

    # Target vector
    y = one_hot_target(Y)[:, 1:]

    # Build similarity graph and Laplacian
    W = helper.build_similarity_graph(X, var, eps, k)
    L = helper.build_laplacian(W, laplacian_normalization)

    # Q matrix (regularized laplacian)
    Q = L + laplacian_regularization * np.eye(num_samples)

    # Computation of fstar
    D = np.dot(C_inv, Q) + np.eye(num_samples)
    D_inv = np.linalg.inv(D)
    fstar = np.dot(D_inv, y)

    # +1 : labels start at 1
    labels = np.argmax(fstar, axis=1) + 1

    return labels
Exemplo n.º 3
0
def soft_hfs(X, Y, c_l, c_u, laplacian_regularization, var=1, eps=0, k=0, laplacian_normalization=""):
    """
    TO BE COMPLETED.

    Function to perform soft (unconstrained) HFS


    :param X: (n x m) matrix of m-dimensional samples
    :param Y: (n x 1) vector with nodes labels [1, ... , num_classes] (0 is unlabeled)
    :param c_l: coefficients for C matrix
    :param c_u: coefficients for C matrix
    :param laplacian_regularization:
    :param var:
    :param eps:
    :param k:
    :param laplacian_normalization:
    :return: labels, class assignments for each of the n nodes
    """

    num_samples = np.size(Y, 0)
    Cl = np.unique(Y)
    num_classes = len(Cl)-1

    """
    Compute the target y for the linear system  
    y = (n x num_classes) target vector 
    l_idx = (l x num_classes) vector with indices of labeled nodes    
    u_idx = (u x num_classes) vector with indices of unlabeled nodes 
    """
    y = np.zeros((len(Y),num_classes))
    for ind,c in enumerate(Y) :
        if c != 0 :
            y[ind][int(c)-1] = 1    

    """
    compute the hfs solution, remember that you can use build_laplacian_regularized and build_similarity_graph
    f = (n x num_classes) hfs solution 
    C = (n x n) diagonal matrix with c_l for labeled samples and c_u otherwise    
    """
    labeled_ind = (Y != 0)*1
    unlabeled_ind = (Y == 0)*1
    C = np.diag(c_l * labeled_ind + c_u * unlabeled_ind ) 
    
    W = build_similarity_graph(X)
    Q = build_laplacian_regularized(W, laplacian_regularization, laplacian_normalization="")
    
    f_star = np.linalg.inv(np.linalg.inv(C).dot(Q) + np.eye(len(X))).dot(y)

    """
    compute the labels assignment from the hfs solution 
    labels: (n x 1) class assignments [1, ... ,num_classes]  
    """
    labels = np.zeros(len(X))
    labels = np.argmax(f_star, axis =1) + 1
    return labels, np.where(Y == 0)[0]
Exemplo n.º 4
0
def hard_hfs(X,
             Y,
             laplacian_regularization=0.000001,
             var=1,
             eps=0,
             k=10,
             laplacian_normalization=""):
    #  a skeleton function to perform hard (constrained) HFS,
    #  needs to be completed
    #
    #  Input
    #  X:
    #      (n x m) matrix of m-dimensional samples
    #  Y:
    #      (n x 1) vector with nodes labels [1, ... , num_classes] (0 is unlabeled)
    #
    #  Output
    #  labels:
    #      class assignments for each (n) nodes

    num_samples = np.size(X, 0)
    Cl = np.unique(Y)
    num_classes = len(Cl) - 1

    # Indices of labelled and unlabelled data
    l_idx = np.argwhere(Y != 0)[:, 0]
    u_idx = np.argwhere(Y == 0)[:, 0]

    # Build similarity graph and Laplacian
    W = helper.build_similarity_graph(X, var, eps, k)
    L = helper.build_laplacian(W, laplacian_normalization)

    # Extract blocks corresponding to unlabelled and labelled data
    Luu = L[u_idx, :][:, u_idx]
    Wul = W[u_idx, :][:, l_idx]

    # fl with one hot encoding
    fl = one_hot_target(Y[l_idx])

    # Compute fu using regularized Laplacian
    Q = Luu + laplacian_regularization * np.eye(u_idx.shape[0])
    Q_inv = np.linalg.inv(Q)
    fu = np.dot(Q_inv, np.dot(Wul, fl))

    # Infer label from computed fu using thresholding
    fu_lab = np.argmax(fu, axis=1)

    # Consolidate labels from fu and fl
    labels = np.zeros((num_samples, ), dtype=int)
    # +1 because labels start at 1
    labels[u_idx] = fu_lab + 1
    labels[l_idx] = Y[l_idx]

    return labels
Exemplo n.º 5
0
def hard_hfs(X, Y, laplacian_regularization, var=1, eps=0, k=0, laplacian_normalization=""):
    """
    TO BE COMPLETED

    Function to perform hard (constrained) HFS.

    :param X: (n x m) matrix of m-dimensional samples
    :param Y: (n x 1) vector with nodes labels [0, 1, ... , num_classes] (0 is unlabeled)
    :param laplacian_regularization: regularization to add to the Laplacian
    :param var: the sigma value for the exponential function, already squared
    :param eps: threshold eps for epsilon graphs
    :param k: number of neighbours k for k-nn. If zero, use epsilon-graph
    :param laplacian_normalization: string selecting which version of the laplacian matrix to construct
                                    'unn':  unnormalized,
                                    'sym': symmetric normalization
                                    'rw':  random-walk normalization
    :return: labels, class assignments for each of the n nodes
    """

    num_samples = np.size(X, 0)
    Cl = np.unique(Y)
    num_classes = len(Cl)-1

    """
    Build the vectors:
    l_idx = (l x num_classes) vector with indices of labeled nodes
    u_idx = (u x num_classes) vector with indices of unlabeled nodes
    """
    l_idx = np.where(Y != 0)[0]
    u_idx = np.where(Y == 0)[0]

    """
    Compute the hfs solution, remember that you can use the functions build_laplacian_regularized and 
    build_similarity_graph    
    
    f_l = (l x num_classes) hfs solution for labeled data. It is the one-hot encoding of Y for labeled nodes.   
    
    example:         
        if Cl=[0,3,5] and Y=[0,0,0,3,0,0,0,5,5], then f_l is a 3x2  binary matrix where the first column codes 
        the class '3'  and the second the class '5'.    
    
    In case of 2 classes, you can also use +-1 labels      
        
    f_u = array (u x num_classes) hfs solution for unlabeled data
    
    f = array of shape(num_samples, num_classes)
    """
    W = build_similarity_graph(X)
    L = build_laplacian(W, laplacian_normalization="")
    
    f_l = np.zeros((len(l_idx),num_classes))
    for ind,c in enumerate(Y[l_idx]) :
        if c != 0 :
            f_l[ind][int(c)-1] = 1
        
    L_uu = L[u_idx][:,u_idx]
    L_ul = L[u_idx][:,l_idx]
    f_u = np.linalg.pinv(L_uu).dot(- np.dot(L_ul, f_l))
    # f = np.concatenate(f_l, f_u)
    """
    compute the labels assignment from the hfs solution   
    labels: (n x 1) class assignments [1,2,...,num_classes]    
    """
    labels = np.zeros(len(X))
    labels[l_idx] = np.argmax(f_l, axis =1) + 1
    labels[u_idx] = np.argmax(f_u, axis =1) + 1 

    return labels
Exemplo n.º 6
0
def hard_hfs(X, Y, laplacian_regularization, var=1, eps=0, k=6, laplacian_normalization=""):
    """
    Function to perform hard (constrained) HFS.
    
    /!\ WE SUPPOSE HERE THAT X AND Y ARE SORTED ACCORDING TO Y /!\

    :param X: (n x m) matrix of m-dimensional samples
    :param Y: (n x 1) vector with nodes labels [0, 1, ... , num_classes] (0 is unlabeled)
    :param laplacian_regularization: regularization to add to the Laplacian
    =param num_label: the number of kept labels
    :param var: the sigma value for the exponential function, already squared
    :param eps: threshold eps for epsilon graphs
    :param k: number of neighbours k for k-nn. If zero, use epsilon-graph
    :param laplacian_normalization: string selecting which version of the laplacian matrix to construct
                                    'unn':  unnormalized,
                                    'sym': symmetric normalization
                                    'rw':  random-walk normalization
    :return: labels, class assignments for each of the n nodes
    """
    
    num_samples = np.size(X, 0)
    Cl = np.unique(Y)
    num_classes = len(Cl)-1

    """
    Build the vectors:
    l_idx = (l x num_classes) vector with indices of labeled nodes
    u_idx = (u x num_classes) vector with indices of unlabeled nodes
    """    

    l_idx, u_idx = [], []
    for i in range(num_samples):
        if Y[i]==0:
            u_idx.append(i)
        else:
            l_idx.append(i)

    num_labels = len(l_idx)
    print("labels",num_labels)
    print("samples",num_samples)
    

    """
    Compute the hfs solution, remember that you can use the functions build_laplacian_regularized and 
    build_similarity_graph    
    
    f_l = (l x num_classes) hfs solution for labeled data. It is the one-hot encoding of Y for labeled nodes.   
    
    example:         
        if Cl=[0,3,5] and Y=[0,0,0,3,0,0,0,5,5], then f_l is a 3x2  binary matrix where the first column codes 
        the class '3'  and the second the class '5'.    
    
    In case of 2 classes, you can also use +-1 labels      
        
    f_u = array (u x num_classes) hfs solution for unlabeled data
    
    f = array of shape(num_samples, num_classes)
    """
    
    L = build_laplacian_regularized(X,k=k)
        
    Luu = L[num_labels:num_samples,num_labels:num_samples]
    print(Luu.shape)
    print(Luu)
    inv_Luu = np.linalg.inv(Luu)
    
    W = build_similarity_graph(X,k)
    Wul = W[num_labels:num_samples,0:num_labels]
    
    
    f_l = np.zeros((len(l_idx),num_classes))
    for i in l_idx:
        f_l[i][int(Y[i])-1]=1
         
    # this is the closed form solution, showed in class
    f_u = inv_Luu @ (Wul @ f_l)
    
    
    """
    compute the labels assignment from the hfs solution   
    labels: (n x 1) class assignments [1,2,...,num_classes]    
    """
    labels = []
    for i in range(len(f_l)):
        lab = np.argmax([f_l[i][k] for k in range(num_classes)])+1
        labels.append(lab)
    for i in range(len(f_u)):
        lab = np.argmax([f_u[i][k] for k in range(num_classes)])+1
        labels.append(lab)
            
    return labels