Пример #1
0
    def __GFHF(self,X,W,Y,labeledIndexes, hook = None):
        W = W.todense()
        Y = np.copy(Y)
        Y[np.logical_not(labeledIndexes),:] = 0
        
        if Y.ndim == 1:
            Y = gutils.init_matrix(Y,labeledIndexes)
        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")
        
        u = np.reshape(np.array(np.where(np.logical_not(labeledIndexes))),(-1))
        l = np.reshape(np.array(np.where(labeledIndexes)),(-1))
        
        d_inv = np.reciprocal(np.sum(W,axis=0))
        d_inv[np.logical_not(np.isfinite(d_inv))] = 1
        d_inv = np.diag(d_inv)
        
        P  = gutils.deg_matrix(W,-1.0) @ W
        
        I = np.identity(Y.shape[0] - sum(labeledIndexes))
        
        P_ul = P[u[:, None],l]
        P_uu = P[u[:, None],u]
        
        try:
            Y[u,:] = np.linalg.inv(I - P_uu) @ P_ul @ Y[l,:]
        except:
            Y[u,:] = np.linalg.pinv(I - P_uu) @ P_ul @ Y[l,:]
        

        return(Y)
Пример #2
0
    def generateAffMat(self, X, Y=None, labeledIndexes=None, hook=None):
        """ Generates the Affinity Matrix.
        
            Returns:
                `NDArray[float].shape[N,N]: A dense affinity matrix.
         """
        LOG.info("Creating Affinity Matrix...", LOG.ll.MATRIX)

        if not hook is None:
            hook._begin(X=X, Y=Y, labeledIndexes=labeledIndexes, W=None)

        K = self.get_or_calc_Mask(X)

        if self.sigma == "mean":
            self.dist_func = self.handle_adaptive_sigma(K)

        if not K.shape[0] == X.shape[0]:
            raise ValueError("Shapes do not match for X,K")

        W = self.W_from_K(X, K)

        if self.row_normalize == True:
            W = gutils.deg_matrix(W, pwr=-1.0, NA_replace_val=1.0) @ W

        del K
        LOG.info("Creating Affinity Matrix...Done!", LOG.ll.MATRIX)
        assert (W.shape == (X.shape[0], X.shape[0]))
        if np.max(W) == 0:
            raise Exception(
                "Affinity matrix cannot have all entries equal to zero.")

        if not hook is None:
            hook._end(X=X, Y=Y, W=W)

        return (W.astype(np.float32))
Пример #3
0
    def __LGC(self, X, W, Y, labeledIndexes, alpha=0.1, hook=None):
        import scipy.sparse
        if scipy.sparse.issparse(W):
            W = W.todense()
        Y = np.copy(Y)
        if Y.ndim == 1:
            Y = gutils.init_matrix(Y, labeledIndexes)
        Y[np.logical_not(labeledIndexes), :] = 0
        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")
        #Get D^{-1/2}
        d_sqrt = gutils.deg_matrix(W, pwr=-1 / 2)

        I = np.identity(Y.shape[0])
        S = I - gutils.lap_matrix(W, is_normalized=True)

        return (np.matmul(np.linalg.inv(I - alpha * S), Y))
Пример #4
0
 def __GFHF_iter(self,X,W,Y,labeledIndexes,num_iter,  hook = None):
     W = W.todense()
     Y = np.copy(Y)
     
     Y[np.logical_not(labeledIndexes),:] = 0
     if Y.ndim == 1:
         Y = gutils.init_matrix(Y,labeledIndexes)
     if not W.shape[0] == Y.shape[0]:
         raise ValueError("W,Y shape not compatible")
     
     
     P  = gutils.deg_matrix(W,-1.0) @ W
     Yl = Y[labeledIndexes,:]
     for i in range(num_iter):
         Y = P@Y
         Y[labeledIndexes,:] = Yl
         if not hook is None:
             hook._step(step=i,X=X,W=W,Y=Y,labeledIndexes=labeledIndexes) 
         
     return Y
Пример #5
0
    def __MR(self, X, W, Y, labeledIndexes, p, tuning_iter, hook=None):
        Y = np.copy(Y)
        if Y.ndim == 1:
            Y[np.logical_not(labeledIndexes)] = 0
            Y = gutils.init_matrix(Y, labeledIndexes)
        Y[np.logical_not(labeledIndexes), :] = 0
        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")

        l = np.reshape(np.array(np.where(labeledIndexes)), (-1))
        num_lab = l.shape[0]

        if not isinstance(p, int):
            p = int(p * num_lab)
        if p > Y.shape[0]:
            p = Y.shape[0]
            LOG.warn("Warning: p greater than the number of labeled indexes",
                     LOG.ll.FILTER)

        W = scipy_to_np(W)
        L = gutils.lap_matrix(W, is_normalized=False)
        D = gutils.deg_matrix(W)

        def check_symmetric(a, tol=1e-8):
            return np.allclose(a, a.T, atol=tol)

        if check_symmetric(L):
            E = sp.eigh(L, D, eigvals=(1, p))[1]
        else:
            LOG.warn("Warning: Laplacian not symmetric", LOG.ll.FILTER)
            eigenValues, eigenVectors = sp.eig(L, D)
            idx = eigenValues.argsort()
            eigenValues = eigenValues[idx]
            assert eigenValues[0] <= eigenValues[eigenValues.shape[0] - 1]
            eigenVectors = eigenVectors[:, idx]
            E = eigenVectors[:, 1:(p + 1)]

        e_lab = E[labeledIndexes, :]
        """ TIKHONOV REGULARIZATION. Currently set to 0."""
        TIK = np.zeros(shape=e_lab.shape)
        try:
            A = np.linalg.inv(e_lab.T @ e_lab + TIK.T @ TIK) @ e_lab.T
        except:
            A = np.linalg.pinv(e_lab.T @ e_lab + TIK.T @ TIK) @ e_lab.T
        F = np.zeros(shape=Y.shape)

        y_m = np.argmax(Y, axis=1)[labeledIndexes]

        for i in range(Y.shape[1]):
            c = np.ones(num_lab)
            c[y_m != i] = -1
            a = A @ np.transpose(c)
            LOG.debug(a, LOG.ll.FILTER)
            for j in np.arange(F.shape[0]):
                F[j, i] = np.dot(a, E[j, :])

        ERmat = -1 * np.ones((Y.shape[0], ))

        Y_amax = np.argmax(Y, axis=1)
        for i in np.where(labeledIndexes):
            ERmat[i] = np.square(Y[i, Y_amax[i]] - F[i, Y_amax[i]])

        removed_Lids = np.argsort(ERmat)
        removed_Lids = removed_Lids[::-1]

        labeledIndexes = np.array(labeledIndexes)
        Y = np.copy(Y)
        for i in range(tuning_iter):
            labeledIndexes[removed_Lids[i]] = False
            if not hook is None:
                hook._step(step=i,
                           X=X,
                           W=W,
                           Y=Y,
                           labeledIndexes=labeledIndexes)

        return Y, labeledIndexes
Пример #6
0
    def __GTAM(self,
               X,
               W,
               Y,
               labeledIndexes,
               mu=99.0,
               useEstimatedFreq=True,
               num_iter=None,
               constant_prop=False,
               hook=None):
        '''BEGIN initialization'''
        Y = np.copy(Y)
        labeledIndexes = np.array(labeledIndexes)
        if Y.ndim == 1:
            Y = gutils.init_matrix(Y, labeledIndexes)

        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")

        num_labeled = Y[labeledIndexes].shape[0]
        num_unlabeled = Y.shape[0] - num_labeled
        num_classes = Y.shape[1]
        """ Estimate frequency of classes"""
        if not useEstimatedFreq is None:
            if isinstance(useEstimatedFreq, bool):
                estimatedFreq = np.sum(Y[labeledIndexes], axis=0) / num_labeled
            else:
                estimatedFreq = useEstimatedFreq

        else:
            estimatedFreq = np.repeat(1 / num_classes, num_classes)
        LOG.debug("Estimated frequency: {}".format(estimatedFreq),
                  LOG.ll.CLASSIFIER)
        """ IMPORTANT! ERASES LABELS """
        Y[np.logical_not(labeledIndexes), :] = 0

        D = gutils.deg_matrix(W, flat=True)
        #Identity matrix
        I = np.identity(W.shape[0])
        #Get graph laplacian
        L = gutils.lap_matrix(W, is_normalized=True)
        #Propagation matrix
        P = np.linalg.inv(I + L / mu)

        P_t = P.transpose()
        #Matrix A
        A = ((P_t @ L) @ P) + mu * ((P_t - I) @ (P - I))

        A = np.asarray(A)
        #A = A + A.transpose()

        W = scipy.sparse.coo_matrix(W)
        Z = []

        Q = None

        def divide_row_by_sum(e):
            e = gutils.scipy_to_np(e)
            e = e / np.sum(e + 1e-100, axis=1, keepdims=True)
            return e

        #Determine nontuning iter
        if num_iter is None:
            num_iter = num_unlabeled
        else:
            num_iter = min(num_iter, num_unlabeled)

        id_min_line, id_min_col = -1, -1
        '''END initialization'''
        #######################################################################################
        '''BEGIN iterations'''
        for i in np.arange(num_iter):
            '''Z matrix - The binary values of current Y are replaced with their corresponding D entries.
                Then, we normalize each row so that row sums to its estimated influence
            '''
            ul = np.logical_not(labeledIndexes)

            Z = gutils.calc_Z(Y,
                              labeledIndexes,
                              D,
                              estimatedFreq,
                              weigh_by_degree=self.weigh_by_degree)
            if Q is None:
                #Compute graph gradient
                Q = np.matmul(A, Z)
                if not hook is None:
                    Q_pure = np.copy(Q)

                Q[labeledIndexes, :] = np.inf

            else:
                Q[id_min_line, :] = np.inf
                new_el_pct = Z[id_min_line, id_min_col] / np.sum(Z[:,
                                                                   id_min_col])
                Q[ul,id_min_col] =\
                 (1 - new_el_pct) * Q[ul,id_min_col] + Z[id_min_line,id_min_col] * A[ul,id_min_line]

            #Find minimum unlabeled index

            if constant_prop:
                expectedNumLabels = estimatedFreq * sum(labeledIndexes)
                actualNumLabels = np.sum(Y[labeledIndexes], axis=0)
                class_to_label = np.argmax(expectedNumLabels - actualNumLabels)
                id_min_col = class_to_label
                id_min_line = np.argmin(Q[:, class_to_label])

            else:
                id_min = np.argmin(Q)
                id_min_line = id_min // num_classes
                id_min_col = id_min % num_classes

            #Update Y and labeledIndexes
            labeledIndexes[id_min_line] = True
            Y[id_min_line, id_min_col] = 1

            #Maybe plot current iteration

            if not hook is None:
                hook._step(step=i,
                           Y=Y,
                           labeledIndexes=labeledIndexes,
                           P=P,
                           Z=Z,
                           Q=Q_pure,
                           id_min_line=id_min_line,
                           id_min_col=id_min_col)
        '''END iterations'''
        ######################################################################################################

        return np.asarray(P @ Z)
Пример #7
0
    def __MR(self,X,W,Y,labeledIndexes,p,hook=None):
        Y = np.copy(Y)
        if Y.ndim == 1:
            Y = gutils.init_matrix(Y,labeledIndexes)
        Y[np.logical_not(labeledIndexes),:] = 0
        
        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")
        
        l = np.reshape(np.array(np.where(labeledIndexes)),(-1))
        num_lab = l.shape[0]
        
        
        if not isinstance(p, int):
            p = int(p * num_lab)
    
        if p > Y.shape[0]:
            p = Y.shape[0]
            LOG.warn("Warning: p greater than the number of labeled indexes",LOG.ll.CLASSIFIER)
        W = gutils.scipy_to_np(W)
        W =  0.5* (W + W.T)
        L = gutils.lap_matrix(W, is_normalized=False)
        D = gutils.deg_matrix(W)
        
        def check_symmetric(a, tol=1e-8):
            return np.allclose(a, a.T, atol=tol)
        def is_pos_sdef(x):
            return np.all(np.linalg.eigvals(x) >= -1e-06)
       
        
        if check_symmetric(L):
            eigenVectors, E = sp.eigh(L,D,eigvals=(1,p))
        else:
            LOG.warn("Warning: Laplacian not symmetric",LOG.ll.CLASSIFIER)
            eigenValues, eigenVectors = sp.eig(L,D)
            idx = eigenValues.argsort() 
            eigenValues = eigenValues[idx]
            assert eigenValues[0] <= eigenValues[eigenValues.shape[0]-1]
            eigenVectors = eigenVectors[:,idx]
            E = eigenVectors[:,1:(p+1)]
        
        
        
        
        
        e_lab = E[labeledIndexes,:]
        #TIK = np.ones(shape=e_lab.shape)
        TIK = np.zeros(shape=e_lab.shape)
        try:
            A = np.linalg.inv(e_lab.T @ e_lab + TIK.T@TIK) @ e_lab.T        
        except:
            A = np.linalg.pinv(e_lab.T @ e_lab + TIK.T@TIK) @ e_lab.T        
        F = np.zeros(shape=Y.shape)
        
        y_m =  np.argmax(Y, axis=1)[labeledIndexes]
        
        for i in range(p):
            if not hook is None:
                hook._step(step=i,X=X,W=W,Y=E[:,i])
        
        
        for i in range(Y.shape[1]):
            c = np.ones(num_lab)
            c[y_m != i] = -1
            a = A @ np.transpose(c)
            LOG.debug(a,LOG.ll.CLASSIFIER)
            for j in np.arange(F.shape[0]):
                F[j,i] = np.dot(a,E[j,:])
                F[j,i] = max(F[j,i],0)

        return (F)
Пример #8
0
    def __SIIS(self,
               X,
               W,
               Y,
               labeledIndexes,
               m,
               alpha,
               beta,
               rho,
               max_iter,
               hook=None):
        Y = np.copy(Y)
        if Y.ndim == 1:
            Y = gutils.init_matrix(Y, labeledIndexes)
        Y[np.logical_not(labeledIndexes), :] = 0

        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")

        if m is None:
            m = W.shape[0]

        c = Y.shape[1]

        W = scipy.sparse.csr_matrix(W) / np.mean(W.data)

        D = gutils.deg_matrix(W, pwr=1.0)

        L = gutils.lap_matrix(W, is_normalized=True)

        U, SIGMA = gutils.extract_lap_eigvec(L, m, remove_first_eig=True)

        U = scipy.sparse.csr_matrix(U)
        SIGMA = _to_np(SIGMA)

        J = gutils.labels_indicator(labeledIndexes)
        """ !!! """
        P = SIISClassifier.edge_mat(W)
        """ Initialize params """
        LAMB_1 = np.ones((P.shape[0], c))
        LAMB_2 = np.ones((Y.shape[0], c))
        mu = 1.0
        mu_max = 10000000.0
        eps = 1 / (10000)
        """ Reusable matrices """
        JU = _to_np(J @ U)
        PU = _to_np(P @ U)
        PU_T = PU.transpose()
        JU_T = JU.transpose()

        A = np.zeros((m, c))
        Q = None
        B = None

        improvement = 1
        iter = 0
        """ TODO: Tensorflow version 
            import tensorflow as tf
            with tf.Session() as sess:
                A = tf.Variable(1e-06*tf.ones((m,c),dtype=tf.float64))
                sess.run(tf.global_variables_initializer())
                
                C = tf.reduce_sum(tf.linalg.norm(tf.matmul(PU,A),axis=1)) +\
                 alpha*tf.reduce_sum(tf.linalg.norm(tf.matmul(_to_np(U)[labeledIndexes,:],A)-Y[labeledIndexes,:],axis=1)) +\
                 beta* tf.trace(tf.matmul(tf.matmul(tf.transpose(A),SIGMA),A))
                opt = tf.train.AdamOptimizer(learning_rate=0.5*1e-02)
                opt_min = opt.minimize(C)
                sess.run(tf.global_variables_initializer())
                for i in range(2000):
                    sess.run(opt_min)
                    LOG.debug(sess.run(C),LOG.ll.CLASSIFIER)
                LOG.debug(sess.run(C),LOG.ll.CLASSIFIER)    
                F = _to_np(U)@sess.run(A)
                
                LOG.debug(F.shape,LOG.ll.CLASSIFIER)
            
        
        """
        A = np.zeros((m, c))
        while iter <= max_iter and improvement > eps:
            """ Update Q """
            N = PU @ A - (1 / mu) * LAMB_1
            N_norm = np.linalg.norm(N, axis=1)

            to_zero = N_norm <= (1 / mu)
            mult = ((N_norm - (1 / mu)) / N_norm)
            N = N * mult[:, np.newaxis]

            N[to_zero, :] = 0.0
            Q = N
            """ Update B """
            M = JU @ A - Y - (1 / mu) * LAMB_2
            M_norm = np.linalg.norm(M, axis=1)
            to_zero = M_norm <= (alpha / mu)
            mult = ((M_norm - (alpha / mu)) / M_norm)
            M = M * mult[:, np.newaxis]
            M[to_zero, :] = 0.0
            B = M

            old_A = A
            """ Update A """

            A_inv_term = 2 * beta * SIGMA + mu * PU_T @ PU + mu * JU_T @ JU
            A_inv_term = np.linalg.inv(A_inv_term)
            A = A_inv_term @ \
                (PU_T@ LAMB_1 + JU_T@LAMB_2 +\
                  mu * PU_T@Q + mu* JU_T @ (B + Y) )
            """ Update Lagrangian coeffs """
            LAMB_1 = LAMB_1 + mu * (Q - PU @ A)
            LAMB_2 = LAMB_2 + mu * (B - JU @ A + Y)
            """ Update penalty coeffficients """
            mu = min(rho * mu, mu_max)

            if not old_A is None:
                improvement = (np.max(np.abs(A - old_A))) / np.max(
                    np.abs(old_A))

            LOG.debug("Iter {}".format(iter), LOG.ll.CLASSIFIER)
            iter += 1

        C = np.sum(np.linalg.norm(PU@A,axis=1)) + alpha*np.sum(np.linalg.norm(JU@A - Y,axis=1)) +\
             beta*np.trace(A.T@SIGMA@A)
        LOG.debug("Iter {} - Cost {}".format(iter, C), LOG.ll.CLASSIFIER)

        F = U @ A

        for i in range(F.shape[0]):
            mx = np.argmax(F[i, :])
            F[i, :] = 0.0
            F[i, mx] = 1.0

        return F
Пример #9
0
    def LGCLVO(self,
               X,
               W,
               Y,
               labeledIndexes,
               mu=99.0,
               useEstimatedFreq=True,
               tuning_iter=0,
               hook=None,
               constant_prop=False,
               useZ=True,
               normalize_rows=True):

        Y = np.copy(Y)
        #We make a deep copy of labeledindexes
        labeledIndexes = np.array(labeledIndexes)
        lids = np.where(labeledIndexes)[0]
        if Y.ndim == 1:
            Y = gutils.init_matrix(Y, labeledIndexes)
        Y[np.logical_not(labeledIndexes), :] = 0

        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")

        W = 0.5 * (W + W.transpose())

        num_labeled = Y[labeledIndexes].shape[0]
        num_unlabeled = Y.shape[0] - num_labeled
        num_classes = Y.shape[1]

        D = gutils.deg_matrix(W, flat=True)
        if not useEstimatedFreq is None:
            if isinstance(useEstimatedFreq, bool):
                estimatedFreq = np.sum(Y[labeledIndexes], axis=0) / num_labeled
            else:
                estimatedFreq = useEstimatedFreq

        else:
            estimatedFreq = np.repeat(1 / num_classes, num_classes)

        if scipy.sparse.issparse(W):
            l = np.sum(labeledIndexes)

            itertool_prod = [[i, j] for i in range(l) for j in range(l)]

            row = np.asarray([lids[i] for i in range(l)])
            col = np.asarray([i for i in range(l)])
            data = np.asarray([1.0] * l)
            temp_Y = _to_np(
                scipy.sparse.coo_matrix((data, (row, col)),
                                        shape=(W.shape[0], l)))

            PL = LGC_iter_TF(X,
                             W,
                             Y=temp_Y,
                             labeledIndexes=labeledIndexes,
                             alpha=1 / (1 + mu),
                             num_iter=10000)

            PL = PL[labeledIndexes, :]
            PL[range(PL.shape[0]), range(PL.shape[0])] = 0  #Set diagonal to 0

            PL = PL

            del temp_Y

            row = np.asarray(
                [lids[x[0]] for x in itertool_prod if x[0] != x[1]])
            col = np.asarray(
                [lids[x[1]] for x in itertool_prod if x[0] != x[1]])
            data = [PL[x[0], x[1]] for x in itertool_prod if x[0] != x[1]]
            P = scipy.sparse.coo_matrix((data, (row, col)),
                                        shape=W.shape).tocsr()

            P = P
        else:
            #Identity matrix
            I = np.identity(W.shape[0])
            #Get graph laplacian
            L = gutils.lap_matrix(W, is_normalized=True)
            #Propagation matrix
            P = np.zeros(W.shape)
            P[np.ix_(labeledIndexes,
                     labeledIndexes)] = np.linalg.inv(I + 0.5 *
                                                      (L + L.transpose()) /
                                                      mu)[np.ix_(
                                                          labeledIndexes,
                                                          labeledIndexes)]
            P[labeledIndexes, labeledIndexes] = 0
            P[np.ix_(labeledIndexes, labeledIndexes)] = P[np.ix_(
                labeledIndexes, labeledIndexes)] / np.sum(P[np.ix_(
                    labeledIndexes, labeledIndexes)],
                                                          axis=0,
                                                          keepdims=False)

        W = scipy.sparse.csr_matrix(W)

        Z = []

        detected_noisylabels = []
        suggested_labels = []
        where_noisylabels = []
        Q_values = []

        Y_flat = np.argmax(Y, axis=1)

        def divide_row_by_sum(e):

            e = _to_np(e)
            if normalize_rows:
                e = e / np.sum(e + 1e-100, axis=1, keepdims=True)
                return e
            else:
                return e

        def find_argmin(Q, class_to_unlabel):
            id_min_line = np.argmin(Q[:, class_to_unlabel])
            id_min_col = class_to_unlabel
            return id_min_line, id_min_col, Q[id_min_line, id_min_col]

        #######################################################################################
        '''BEGIN iterations'''

        Q = None
        cleanIndexes = np.copy(labeledIndexes)
        for i_iter in range(tuning_iter):

            found_noisy = True
            if np.sum(labeledIndexes) > 0 and found_noisy:
                '''Z matrix - The binary values of current Y are replaced with their corresponding D entries.
                    Then, we normalize each row so that row sums to its estimated influence
                '''
                if (not self.use_baseline) or Q is None:
                    if useZ:
                        Z = gutils.calc_Z(Y,
                                          labeledIndexes,
                                          D,
                                          estimatedFreq,
                                          weigh_by_degree=False)
                        F = P @ Z
                        if scipy.sparse.issparse(F):
                            F = np.asarray(F.toarray())

                        #Compute graph gradient
                        Q = (divide_row_by_sum(F) - divide_row_by_sum(Z))
                    else:
                        F = P @ Y
                        if scipy.sparse.issparse(F):
                            F = np.asarray(F.toarray())
                        Q = (divide_row_by_sum(F) - divide_row_by_sum(Y))
                #import scipy.stats

                #During label tuning, we'll also 'unlabel' the argmax
                unlabeledIndexes = np.logical_not(cleanIndexes)
                if self.early_stop:
                    Q[np.sum(F, axis=1) == 0.0, :] = 9999

                Q[unlabeledIndexes, :] = np.inf

                #Find minimum unlabeled index
                if constant_prop:
                    expectedNumLabels = estimatedFreq * np.sum(labeledIndexes)
                    actualNumLabels = np.sum(Y[labeledIndexes, :], axis=0)
                    temp = expectedNumLabels - actualNumLabels
                    class_priority = np.argsort(temp)

                    found_noisy = False
                    for class_to_unlabel in class_priority:
                        id_min_line, id_min_col, val = find_argmin(
                            Q, class_to_unlabel)
                        if val < 0:
                            #This means that the class would have a different label under the modified label prop
                            found_noisy = True
                            break

                else:
                    id_min = np.argmin(Q)
                    id_min_line = id_min // num_classes
                    id_min_col = id_min % num_classes  #The class previously assigned to instance X_{id_min_line}
                    found_noisy = Q[id_min_line, id_min_col] < 0

                if found_noisy:

                    id_max_col = np.argmax(
                        Q[id_min_line, :])  #The new, suggested class

                    detected_noisylabels.append(id_min_col)
                    where_noisylabels.append(id_min_line)

                    suggested_labels.append(id_max_col)
                    Q_values.append(Q[id_min_line, id_min_col])

                    #Unlabel OP
                    if labeledIndexes[id_min_line] == False:
                        raise Exception(
                            "Error: unlabeled instance was selected")
                    if not Y[id_min_line, id_min_col] == 1:
                        raise Exception("Error: picked wrong class to unlabel")

                    labeledIndexes[id_min_line] = False
                    cleanIndexes[id_min_line] = False

                    if not Y[id_min_line, id_min_col] == 1:
                        raise Exception(
                            "Tried to remove label from unlabeled instance")

                    Y[id_min_line, id_min_col] = 0
                    if self.relabel:
                        labeledIndexes[id_min_line] = True
                        Y[id_min_line, :] = 0
                        Y[id_min_line, id_max_col] = 1

            if not hook is None:
                hook._step(step=(i_iter + 1),
                           X=X,
                           W=W,
                           Y=Y,
                           labeledIndexes=labeledIndexes)
        '''
        MATPLOTLIB stuff 
        '''
        """
        import cv2 as cv
        
        
        #ret2,th2 = cv.threshold(255*np.asarray(Q_values).astype(np.uint8),0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
        
        from skimage.filters import threshold_multiotsu
        Q_values = np.asarray(Q_values)
        th = threshold_multiotsu(Q_values)
        th = np.where(Q_values < th[0])[0]
    
        
        
        for i in range(th.shape[0]):
            th2 = max(0,i - 1)
            if not th[i] == i:
                break

        
        import matplotlib
        matplotlib.use("TkAgg")
        import matplotlib.pyplot as plt
        fig = plt.figure(figsize=(5,2))
        ax = fig.add_subplot()
        ax.plot(np.arange(len(Q_values)),Q_values)
        ax.axvline(10,color='red')
        #plt.axvline(th2,color='purple')
        
        #plt.axhline(-0.5,color='green')
        print(th2)
        plt.show()
        """
        '''END iterations'''
        LOG.info(
            "NUMBER OF DETECTED NOISY INSTANCES:{}".format(
                len(detected_noisylabels)), LOG.ll.FILTER)

        return Y, labeledIndexes
Пример #10
0
    def LDST(self,
             X,
             W,
             Y,
             labeledIndexes,
             mu=99.0,
             useEstimatedFreq=True,
             tuning_iter=0,
             hook=None,
             constant_prop=False,
             useZ=True):
        '''BEGIN initialization'''
        Y = np.copy(Y)
        #We make a deep copy of labeledindexes
        labeledIndexes = np.array(labeledIndexes)

        if Y.ndim == 1:
            Y = gutils.init_matrix(Y, labeledIndexes)
        Y[np.logical_not(labeledIndexes), :] = 0

        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")

        W = 0.5 * (W + W.transpose())

        num_labeled = Y[labeledIndexes].shape[0]
        num_unlabeled = Y.shape[0] - num_labeled
        num_classes = Y.shape[1]

        D = gutils.deg_matrix(W, flat=True)
        """ Estimate frequency of classes"""
        if not useEstimatedFreq is None:
            if isinstance(useEstimatedFreq, bool):
                estimatedFreq = np.sum(Y[labeledIndexes], axis=0) / num_labeled
            else:
                estimatedFreq = useEstimatedFreq

        else:
            estimatedFreq = np.repeat(1 / num_classes, num_classes)

        #Identity matrix
        I = np.identity(W.shape[0])
        #Get graph laplacian
        L = gutils.lap_matrix(W, is_normalized=True)
        #Propagation matrix
        """ !!!!!! """
        P = np.linalg.inv(I + 0.5 * (L + L.transpose()) / mu)
        #P = np.zeros(W.shape)
        #P[np.ix_(labeledIndexes,labeledIndexes)] = np.linalg.inv( I + 0.5*(L + L.transpose())/mu )[np.ix_(labeledIndexes,labeledIndexes)]

        P_t = P.transpose()
        #Matrix A
        A = ((P_t @ L) @ P) + mu * ((P_t - I) @ (P - I))

        Z = []

        #######################################################################################
        '''BEGIN iterations'''
        for i_iter in np.arange(tuning_iter):

            if np.sum(labeledIndexes) > 0:
                '''Z matrix - The binary values of current Y are replaced with their corresponding D entries.
                    Then, we normalize each row so that row sums to its estimated influence
                '''

                if useZ:
                    Z = gutils.calc_Z(Y,
                                      labeledIndexes,
                                      D,
                                      estimatedFreq,
                                      weigh_by_degree=self.weigh_by_degree)
                    #Compute graph gradient
                    Q = np.matmul(A, Z)

                else:
                    Q = np.matmul(A, Y)

                for i_labeled in np.where(labeledIndexes)[0]:
                    assigned_class = np.argmax(Y[i_labeled, :])
                    other_classes = list(range(Y.shape[1]))
                    other_classes.remove(assigned_class)

                    best_other = min([Q[i_labeled, j] for j in other_classes])

                    for j in range(Y.shape[1]):
                        if self.gradient_fix:
                            Q[i_labeled, assigned_class] = -best_other
                        Q[i_labeled, other_classes] = -np.inf
                #During label tuning, we'll also 'unlabel' the argmax
                unlabeledIndexes = np.logical_not(labeledIndexes)
                Q[unlabeledIndexes, :] = -np.inf

                #Find minimum unlabeled index
                if constant_prop:
                    raise ""
                    """expectedNumLabels = estimatedFreq * sum(labeledIndexes)
                    actualNumLabels = np.sum(Y[labeledIndexes],axis=0)
                    class_to_unlabel = np.argmax(actualNumLabels - expectedNumLabels)
                    
                    id_max_line = np.argmax(Q[:,class_to_unlabel])
                    id_max_col = class_to_unlabel
                    """

                else:
                    id_max = np.argmax(Q)
                    id_max_line = id_max // num_classes
                    id_max_col = id_max % num_classes

                if not Y[id_max_line, id_max_col] == 1:
                    print(Y[id_max_line, :])
                    raise Exception(
                        "Tried to remove label from unlabeled instance")

                #Unlabel OP
                labeledIndexes[id_max_line] = False
                Y[id_max_line, id_max_col] = 0

            if not hook is None:
                hook._step(step=i_iter + 1,
                           X=X,
                           W=W,
                           Y=Y,
                           labeledIndexes=labeledIndexes)
        '''END iterations'''
        return Y, labeledIndexes