def __GFHF(self, X, W, Y, labeledIndexes, hook=None): W = W.todense() Y = self.CLEAN_UNLABELED_ROWS(Y, labeledIndexes) if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") u = np.reshape(np.array(np.where(np.logical_not(labeledIndexes))), (-1)) l = np.reshape(np.array(np.where(labeledIndexes)), (-1)) d_inv = np.reciprocal(np.sum(W, axis=0)) d_inv[np.logical_not(np.isfinite(d_inv))] = 1 d_inv = np.diag(d_inv) P = gutils.deg_matrix(W, -1.0) @ W I = np.identity(Y.shape[0] - sum(labeledIndexes)) P_ul = P[u[:, None], l] P_uu = P[u[:, None], u] try: Y[u, :] = np.linalg.inv(I - P_uu) @ P_ul @ Y[l, :] except: Y[u, :] = np.linalg.pinv(I - P_uu) @ P_ul @ Y[l, :] return (Y)
def generateAffMat(self,X,Y=None,labeledIndexes=None,hook=None): """ Generates the Affinity Matrix. Returns: `tflabelprop.gssl.graph.gssl_affmat.AffMat`: An affinity matrix """ """ Return Cached matrix, if cache directory exists """ X = X.astype(np.float32) if AffMat.cache_mat_exists(self.cache_dir): LOG.info(f"Loading Affinity Matrix from {self.cache_dir}...",LOG.ll.MATRIX) return AffMat(W=None,cache_dir=self.cache_dir) LOG.info("Creating Affinity Matrix...",LOG.ll.MATRIX) if not hook is None: hook._begin(X=X,Y=Y,labeledIndexes=labeledIndexes,W=None) K = self.get_or_calc_Mask(X) if self.sigma == "mean": self.dist_func = self.handle_adaptive_sigma(K) if not K.shape[0] == X.shape[0]: raise ValueError("Shapes do not match for X,K") W = self.W_from_K(X,K) if self.row_normalize == True: W = gutils.deg_matrix(W, pwr=-1.0, NA_replace_val=1.0) @ W del K LOG.info("Creating Affinity Matrix...Done!",LOG.ll.MATRIX) assert(W.shape == (X.shape[0],X.shape[0])) if np.max(W)==0: raise Exception("Affinity matrix cannot have all entries equal to zero.") if not hook is None: hook._end(X=X,Y=Y,W=W) return AffMat(W=W.astype(np.float32),cache_dir=self.cache_dir)
def __GFHF_iter(self, X, W, Y, labeledIndexes, num_iter, hook=None): Y = self.CLEAN_UNLABELED_ROWS(Y, labeledIndexes) if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") P = gutils.deg_matrix(W, -1.0) @ W P = scipy.sparse.csr_matrix(P) Yl = Y[labeledIndexes, :] for i in range(num_iter): Y = P @ Y Y[labeledIndexes, :] = Yl if not hook is None: hook._step(step=i, X=X, W=W, Y=Y, labeledIndexes=labeledIndexes) return Y
def LDST(self, X, W, Y, labeledIndexes, mu=99.0, useEstimatedFreq=True, tuning_iter=0, hook=None, constant_prop=False, useZ=True): Y = self.CLEAN_UNLABELED_ROWS(Y, labeledIndexes) labeledIndexes = np.array(labeledIndexes) if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") num_labeled = Y[labeledIndexes].shape[0] num_unlabeled = Y.shape[0] - num_labeled num_classes = Y.shape[1] """ Estimate frequency of classes""" if isinstance(useEstimatedFreq, bool): if useEstimatedFreq == False: estimatedFreq = np.repeat(1 / num_classes, num_classes) elif useEstimatedFreq == True: estimatedFreq = np.sum(Y[labeledIndexes], axis=0) / num_labeled D = gutils.deg_matrix(W, flat=True) #Identity matrix I = np.identity(W.shape[0]) #Get graph laplacian L = gutils.lap_matrix(W, which_lap='sym') #Propagation matrix from scipy.linalg import inv as invert P = invert(I - 1 / (1 + mu) * (I - L)) * mu / (1 + mu) P_t = P.transpose() #Matrix A A = ((P_t @ L) @ P) + mu * ((P_t - I) @ (P - I)) A = 0.5 * (A + A.transpose()) import scipy.sparse if not hook is None: W = scipy.sparse.coo_matrix(W) Z = [] ####################################################################################### '''BEGIN iterations''' for i_iter in np.arange(tuning_iter): if np.sum(labeledIndexes) > 0: '''Z matrix - The binary values of current Y are replaced with their corresponding D entries. Then, we normalize each row so that row sums to its estimated influence ''' if useZ: Z = gutils.calc_Z(Y, labeledIndexes, D, estimatedFreq, weigh_by_degree=self.weigh_by_degree) #Compute graph gradient Q = np.matmul(A, Z) else: Q = np.matmul(A, Y) for i_labeled in np.where(labeledIndexes)[0]: assigned_class = np.argmax(Y[i_labeled, :]) other_classes = list(range(Y.shape[1])) other_classes.remove(assigned_class) best_other = min([Q[i_labeled, j] for j in other_classes]) for j in range(Y.shape[1]): if self.gradient_fix: Q[i_labeled, assigned_class] = -best_other Q[i_labeled, other_classes] = -np.inf #During label tuning, we'll also 'unlabel' the argmax unlabeledIndexes = np.logical_not(labeledIndexes) Q[unlabeledIndexes, :] = -np.inf #Find minimum unlabeled index if constant_prop: raise "" """expectedNumLabels = estimatedFreq * sum(labeledIndexes) actualNumLabels = np.sum(Y[labeledIndexes],axis=0) class_to_unlabel = np.argmax(actualNumLabels - expectedNumLabels) id_max_line = np.argmax(Q[:,class_to_unlabel]) id_max_col = class_to_unlabel """ else: id_max = np.argmax(Q) id_max_line = id_max // num_classes id_max_col = id_max % num_classes if not Y[id_max_line, id_max_col] == 1: print(Y[id_max_line, :]) raise Exception( "Tried to remove label from unlabeled instance") #Unlabel OP labeledIndexes[id_max_line] = False Y[id_max_line, id_max_col] = 0 if not hook is None: hook._step(step=i_iter + 1, X=X, W=W, Y=Y, labeledIndexes=labeledIndexes) '''END iterations''' return Y, labeledIndexes
def __MR(self, X, W, Y, labeledIndexes, p, optimize_labels, hook=None): """ ------------------------------------------------------------- INITIALIZATION -------------------------------------------------------------- """ ORACLE_Y = Y.copy() Y = np.copy(Y) if Y.ndim == 1: Y = gutils.init_matrix(Y, labeledIndexes) Y[np.logical_not(labeledIndexes), :] = 0 if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") l = np.reshape(np.array(np.where(labeledIndexes)), (-1)) num_lab = l.shape[0] if not isinstance(p, int): p = int(p * num_lab) if p > Y.shape[0]: p = Y.shape[0] LOG.warn("Warning: p greater than the number of labeled indexes", LOG.ll.CLASSIFIER) #W = gutils.scipy_to_np(W) #W = 0.5* (W + W.T) L = gutils.lap_matrix(W, which_lap='sym') D = gutils.deg_matrix(W, flat=True, pwr=-1.0) L = 0.5 * (L + L.T) def check_symmetric(a, tol=1e-8): return np.allclose(a, a.T, atol=tol) def is_pos_sdef(x): return np.all(np.linalg.eigvals(x) >= -1e-06) import scipy.sparse sym_err = L - L.T sym_check_res = np.all(np.abs(sym_err.data) < 1e-7) # tune this value assert sym_check_res """--------------------------------------------------------------------------------------------------- EIGENFUNCTION EXTRACTION --------------------------------------------------------------------------------------------------- """ import time start_time = time.time() import os.path as osp from tf_labelprop.settings import INPUT_FOLDER cache_eigvec = osp.join(INPUT_FOLDER, 'eigenVectors.npy') cache_eigval = osp.join(INPUT_FOLDER, 'eigenValues.npy') if False: eigenValues, eigenVectors = np.load(cache_eigval), np.load( cache_eigvec) eigenVectors = eigenVectors[:, :p] eigenValues = eigenValues[:p] else: eigenVectors, eigenValues = W.load_eigenfunctions(p) time_elapsed = time.time() - start_time LOG.info("Took {} seconds to calculate eigenvectors".format( int(time_elapsed))) idx = eigenValues.argsort() eigenValues = eigenValues[idx] LOG.debug(eigenValues) assert eigenValues[0] <= eigenValues[eigenValues.shape[0] - 1] eigenVectors = eigenVectors[:, idx] np.save(cache_eigval, arr=eigenValues) np.save(cache_eigvec, arr=eigenVectors) U = eigenVectors LAMBDA = eigenValues U = U[:, np.argsort(LAMBDA)] LAMBDA = LAMBDA[np.argsort(LAMBDA)] import tensorflow as tf gpus = tf.config.experimental.list_physical_devices('GPU') #tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*8)]) for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) """ ------------------------------------------------------------------------- Define Constants on GPU ------------------------------------------------------------------------------ """ U, X, Y = [tf.constant(x.astype(np.float32)) for x in [U, X, Y]] _U_times_U = tf.multiply(U, U) N = X.shape[0] def to_sp_diag(x): n = tf.cast(x.shape[0], tf.int64) indices = tf.concat([ tf.range(n, dtype=tf.int64)[None, :], tf.range(n, dtype=tf.int64)[None, :] ], axis=0) return tf.sparse.SparseTensor(indices=tf.transpose(indices), values=x, dense_shape=[n, n]) @tf.function def smooth_labels(labels, factor=0.001): # smooth the labels labels = tf.cast(labels, tf.float32) labels *= (1 - factor) labels += (factor / tf.cast(tf.shape(labels)[0], tf.float32)) # returned the smoothed labels return labels @tf.function def divide_by_row(x, eps=1e-07): x = tf.maximum(x, 0 * x) x = x + eps # [N,C] [N,1] return x / (tf.reduce_sum(x, axis=-1)[:, None]) def spd_matmul(x, y): return tf.sparse.sparse_dense_matmul(x, y) def mult_each_row_by(X, by): """ Elementwise multiplies each row by a given row vector. For a 2D tensor, also correponds to multiplying each column by the respective scalar in the given row vector Args: X (Tensor) by (Tensor[shape=(N,)]): row vector """ #[N,C] [N,1] return X * by[None, :] def mult_each_col_by(X, by): #[N,C] [1,C] return X * by[:, None] @tf.function def accuracy(y_true, y_pred): acc = tf.cast( tf.equal(tf.argmax(y_true, axis=-1), tf.argmax(y_pred, axis=-1)), tf.float32) acc = tf.cast(acc, tf.float32) return tf.reduce_mean(acc) """ ----------------------------------------------------------------------------- DEFINE VARS -------------------------------------------------------------------------------- """ MU = tf.Variable(0.1, name="MU") LAMBDA = tf.constant(LAMBDA.astype(np.float32), name="LAMBDA") PI = tf.Variable(tf.ones(shape=(tf.shape(Y)[0], ), dtype=tf.float32), name="PI") _l = LAMBDA.numpy() CUTOFF = tf.Variable(0.0, name='CUTOFF') CUTOFF_K = tf.Variable(1.0) @tf.function def get_alpha(MU): return tf.pow(2.0, -tf.math.reciprocal(tf.abs(100 * MU))) @tf.function def to_prob(x): return tf.nn.softmax(x, axis=1) @tf.function def cutoff(x): return 1.0 / (1.0 + tf.exp(-CUTOFF_K * (CUTOFF - x))) model = tf.keras.Sequential() model.add(tf.keras.layers.Conv1D(8, kernel_size=5, padding='same')) model.add(tf.keras.layers.Activation('relu')) model.add(tf.keras.layers.Conv1D(8, kernel_size=5, padding='same')) model.add(tf.keras.layers.Activation('relu')) model.add(tf.keras.layers.Conv1D(1, kernel_size=3, padding='same')) model.add(tf.keras.layers.Flatten()) """ ----------------------------------------------------------------------------- DEFINE FORWARD -------------------------------------------------------------------------------- """ @tf.function def forward(Y, U, PI, mode='train', remove_diag=True): if mode == 'train': U = tf.gather(U, indices=np.where(labeledIndexes)[0], axis=0) Y = tf.gather(Y, indices=np.where(labeledIndexes)[0], axis=0) #F = tf.gather(F,indices=np.where(labeledIndexes)[0],axis=0) PI = tf.gather(PI, indices=np.where(labeledIndexes)[0], axis=0) pi_Y = spd_matmul(to_sp_diag(tf.abs(PI)), Y) alpha = get_alpha(MU) """ Maybe apply custom convolution to LAMBDA, otherwise just fit LGC's alpha using the corresponding filter 1/(1-alpha + alpha*lambda) """ if not self.custom_conv: lambda_tilde = tf.math.reciprocal(1 - alpha + alpha * LAMBDA) else: #lambda_tilde = tf.math.reciprocal(1-alpha + alpha*LAMBDA) _lambda = (LAMBDA - tf.reduce_mean(LAMBDA)) / tf.math.reduce_std(LAMBDA) lambda_tilde = tf.clip_by_value( 2 * tf.nn.sigmoid( tf.reshape(model(_lambda[None, :, None]), (-1, ))), 0, 1) lambda_tilde = tf.sort(lambda_tilde, direction='DESCENDING') lambda_tilde = tf.reshape(divide_by_row(lambda_tilde[None, :]), (-1, )) _self_infl = mult_each_row_by( tf.square(U), by=lambda_tilde ) #Square each element of U, then dot product of each row with lambda_tilde _self_infl = tf.reduce_sum(_self_infl, axis=1) _P_op = U @ (mult_each_col_by( (tf.transpose(U) @ pi_Y), by=lambda_tilde)) if not remove_diag: _diag_P_op = tf.zeros_like( mult_each_col_by(pi_Y, by=_self_infl)) else: _diag_P_op = mult_each_col_by(pi_Y, by=_self_infl) return divide_by_row(_P_op - _diag_P_op), lambda_tilde, pi_Y """ ----------------------------------------------------------------------------- DEFINE LOSSES and learning schedule -------------------------------------------------------------------------------- """ losses = { 'xent': lambda y_, y: tf.reduce_mean(-tf.reduce_sum(y_ * tf.cast( tf.math.log(smooth_labels(y, factor=0.01)), tf.float32), axis=[1])), 'sq_loss': lambda y_, y: tf.reduce_mean( tf.reduce_sum(tf.square(y_ - y), axis=[1])), 'abs_loss': lambda y_, y: tf.reduce_mean( tf.reduce_sum(tf.abs(y_ - y), axis=[1])), 'hinge': lambda y_, y: tf.reduce_mean( tf.reduce_sum(tf.maximum(1. - y_ * y, tf.zeros_like(y)), axis=1)) } NUM_ITER = 700 lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( 0.5, decay_steps=200, decay_rate=0.9, staircase=False) opt = tf.keras.optimizers.Adam(0.05) Y_l = tf.gather(Y, indices=np.where(labeledIndexes)[0], axis=0) #import matplotlib.pyplot as plt #import matplotlib #matplotlib.use('tkagg') import pandas as pd """ ----------------------------------------------------------------------------- LEARNING -------------------------------------------------------------------------------- """ L = [] df = pd.DataFrame() max_acc, min_loss = [0, np.inf] for i in range(NUM_ITER): #MU.assign(i) with tf.GradientTape() as t: # no need to watch a variable: # trainable variables are always watched pred_L, lambda_tilde, pi_Y = forward(Y, U, PI, mode='train') loss_sq = losses['sq_loss'](pred_L, Y_l) loss = losses['xent'](pred_L, Y_l) loss_xent = losses['xent'](pred_L, Y_l) acc = accuracy(Y_l, pred_L) _not_lab = np.where(np.logical_not(labeledIndexes))[0] acc_true = accuracy( tf.gather(ORACLE_Y, indices=_not_lab, axis=0), tf.gather(forward(Y, U, PI, mode='eval')[0], indices=_not_lab, axis=0)) L.append( np.array([i, loss_sq, loss, loss_xent, acc, acc_true])[None, :]) """ TRAINABLE VARIABLES GO HERE """ if self.custom_conv: trainable_variables = model.weights else: trainable_variables = [MU] if optimize_labels: trainable_variables.append(PI) if acc > max_acc: print(max_acc) best_trainable_variables = [ k.numpy() for k in trainable_variables ] max_acc = acc min_loss = loss counter_since_best = 0 elif acc <= max_acc: counter_since_best += 1 if counter_since_best > 2000: break """ Apply gradients """ gradients = t.gradient(loss, trainable_variables) opt.apply_gradients(zip(gradients, trainable_variables)) """ Project labels such that they sum up to the original amount """ pi = PI.numpy() pi[labeledIndexes] = np.sum( labeledIndexes) * pi[labeledIndexes] / (np.sum( pi[labeledIndexes])) PI.assign(pi) if i % 10 == 0: """ Print info """ if not hook is None: if self.hook_iter_mode == "labeled": plot_y = np.zeros_like(Y) plot_y[labeledIndexes] = Y_l.numpy() else: plot_y = tf.clip_by_value( forward(Y, U, PI, mode='eval')[0], 0, 999999).numpy() hook._step(step=i, X=X, W=W, Y=plot_y, labeledIndexes=labeledIndexes) alpha = get_alpha(MU) PI_l = tf.gather(PI, indices=np.where(labeledIndexes)[0], axis=0) LOG.info( f"Acc: {acc.numpy():.3f}; ACC_TRUE:{acc_true.numpy():.3f} Loss: {loss.numpy():.3f}; alpha = {alpha.numpy():.3f}; PI mean = {tf.reduce_mean(PI_l).numpy():.3f} " ) #plt.scatter(range(lambda_tilde.shape[0]),np.log10(lambda_tilde/LAMBDA),s=2) #plt.show() for k in range(len(trainable_variables)): trainable_variables[k].assign(best_trainable_variables[k]) return tf.clip_by_value(forward(Y, U, PI, mode='eval')[0], 0, 999999).numpy()
def __MR(self, X, W, Y, labeledIndexes, p, optimize_labels, hook=None): """ ------------------------------------------------------------- INITIALIZATION -------------------------------------------------------------- """ ORACLE_Y = Y.copy() Y = np.copy(Y) if Y.ndim == 1: Y = gutils.init_matrix(Y, labeledIndexes) Y[np.logical_not(labeledIndexes), :] = 0 if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") l = np.reshape(np.array(np.where(labeledIndexes)), (-1)) num_lab = l.shape[0] if not isinstance(p, int): p = int(p * num_lab) if p > Y.shape[0]: p = Y.shape[0] LOG.warn("Warning: p greater than the number of labeled indexes", LOG.ll.CLASSIFIER) #W = gutils.scipy_to_np(W) #W = 0.5* (W + W.T) L = gutils.lap_matrix(W) D = gutils.deg_matrix(W, flat=True, pwr=-1.0) L = 0.5 * (L + L.T) def check_symmetric(a, tol=1e-8): return np.allclose(a, a.T, atol=tol) def is_pos_sdef(x): return np.all(np.linalg.eigvals(x) >= -1e-06) import scipy.sparse sym_err = L - L.T sym_check_res = np.all(np.abs(sym_err.data) < 1e-7) # tune this value assert sym_check_res """--------------------------------------------------------------------------------------------------- EIGENFUNCTION EXTRACTION --------------------------------------------------------------------------------------------------- """ import time start_time = time.time() eigenVectors, eigenValues = W.load_eigenfunctions(p) time_elapsed = time.time() - start_time LOG.info("Took {} seconds to calculate eigenvectors".format( int(time_elapsed))) U = eigenVectors LAMBDA = eigenValues """ ------------------------------------------------------------------------- Import and setup Tensorflow ------------------------------------------------------------------------------ """ import tensorflow as tf import tf_labelprop.gssl.classifiers.lgc_lvo_aux as aux gpus = tf.config.experimental.list_physical_devices('GPU') #tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*8)]) for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) """ ------------------------------------------------------------------------- Define Constants on GPU ------------------------------------------------------------------------------ """ U, X, Y = [tf.constant(x.astype(np.float32)) for x in [U, X, Y]] _U_times_U = tf.multiply(U, U) N = X.shape[0] """ ----------------------------------------------------------------------------- DEFINE VARS -------------------------------------------------------------------------------- """ MU = tf.Variable(0.1, name="MU") LAMBDA = tf.constant(LAMBDA.astype(np.float32), name="LAMBDA") PI = tf.Variable(tf.ones(shape=(tf.shape(Y)[0], ), dtype=tf.float32), name="PI") _l = LAMBDA.numpy() """ ----------------------------------------------------------------------------- DEFINE FORWARD -------------------------------------------------------------------------------- """ def forward(Y, U, PI, mode='train', p=None, remove_diag=True): if p is None: p = 99999 pi_Y = aux.spd_matmul(aux.to_sp_diag(tf.abs(PI)), Y) alpha = self.get_alpha(MU) """ Maybe apply custom convolution to LAMBDA, otherwise just fit LGC's alpha using the corresponding filter 1/(1-alpha + alpha*lambda) """ #tf.print(alpha) a = alpha - alpha * LAMBDA lambda_tilde = 1 / (1 - a) """ Set entries corresponding to eigvector e_i to zero for i > p """ lambda_tilde = tf.where( tf.less_equal(tf.range(0, lambda_tilde.shape[0]), p), lambda_tilde, 0 * lambda_tilde) _self_infl = aux.mult_each_row_by( tf.square(U), by=lambda_tilde ) #Square each element of U, then dot product of each row with lambda_tilde B = _self_infl _self_infl = tf.reduce_sum(_self_infl, axis=1) A = aux.mult_each_col_by((tf.transpose(U) @ pi_Y), by=lambda_tilde) _P_op = U @ (A) if not remove_diag: _diag_P_op = tf.zeros_like( aux.mult_each_col_by(pi_Y, by=_self_infl)) else: _diag_P_op = aux.mult_each_col_by(pi_Y, by=_self_infl) if mode == 'eval': return aux.divide_by_row(_P_op - _diag_P_op) else: return A, B, aux.divide_by_row(_P_op - _diag_P_op) def forward_eval(Y, U, PI, mode='train', p=None, remove_diag=True): if p is None: p = 99999 pi_Y = aux.spd_matmul(aux.to_sp_diag(tf.abs(PI)), Y) alpha = self.get_alpha(MU) """ Maybe apply custom convolution to LAMBDA, otherwise just fit LGC's alpha using the corresponding filter 1/(1-alpha + alpha*lambda) """ #tf.print(alpha) a = alpha - alpha * LAMBDA lambda_tilde = 1 / (1 - a) """ Set entries corresponding to eigvector e_i to zero for i > p """ lambda_tilde = tf.where( tf.less_equal(tf.range(0, lambda_tilde.shape[0]), p), lambda_tilde, 0 * lambda_tilde) _self_infl = aux.mult_each_row_by( tf.square(U), by=lambda_tilde ) #Square each element of U, then dot product of each row with lambda_tilde _self_infl = tf.reduce_sum(_self_infl, axis=1) A = aux.mult_each_col_by((tf.transpose(U) @ pi_Y), by=lambda_tilde) _P_op = U @ (A) if not remove_diag: _diag_P_op = tf.zeros_like( aux.mult_each_col_by(pi_Y, by=_self_infl)) else: _diag_P_op = aux.mult_each_col_by(pi_Y, by=_self_infl) return aux.divide_by_row(_P_op - _diag_P_op) """ ----------------------------------------------------------------------------- DEFINE LOSSES and learning schedule -------------------------------------------------------------------------------- """ losses = { 'xent': lambda y_, y: tf.reduce_mean(-tf.reduce_sum(y_ * tf.cast( tf.math.log(aux.smooth_labels(y, factor=0.01)), tf.float32), axis=[1])), 'sq_loss': lambda y_, y: tf.reduce_mean( tf.reduce_sum(tf.square(y_ - y), axis=[1])), 'abs_loss': lambda y_, y: tf.reduce_mean( tf.reduce_sum(tf.abs(y_ - y), axis=[1])), 'hinge': lambda y_, y: tf.reduce_mean( tf.reduce_sum(tf.maximum(1. - y_ * y, tf.zeros_like(y)), axis=1)) } NUM_ITER = 10 Y_l = tf.gather(Y, indices=np.where(labeledIndexes)[0], axis=0) U_l = tf.gather(U, indices=np.where(labeledIndexes)[0], axis=0) PI_l = tf.gather(PI, indices=np.where(labeledIndexes)[0], axis=0) """ ----------------------------------------------------------------------------- LEARNING -------------------------------------------------------------------------------- """ L = [] df = pd.DataFrame() max_acc, min_loss = [0, np.inf] best_p = np.inf for i in range(NUM_ITER, 0, -1): MU.assign(i) A, B, _ = forward(Y_l, U_l, PI_l, mode='train') a1 = np.zeros_like(Y_l) a2 = np.zeros_like(Y_l) for i1 in range(p): a2 += mult_each_col_by(X=Y_l, by=B[:, i1]) a1 += mult_each_col_by( np.tile(A[i1, :][None, :], [a1.shape[0], 1]), U_l[:, i1]) pred_L = aux.divide_by_row(a1 - a2) loss_sq = losses['sq_loss'](pred_L, Y_l) loss = losses['xent'](pred_L, Y_l) loss_xent = losses['xent'](pred_L, Y_l) acc = aux.accuracy(Y_l, pred_L) _not_lab = np.where(np.logical_not(labeledIndexes))[0] if self.DEBUG: acc_true = aux.accuracy( tf.gather(ORACLE_Y, indices=_not_lab, axis=0), tf.gather(forward_eval(Y, U, PI, mode='eval', p=i1), indices=_not_lab, axis=0)) prop = np.max( pd.value_counts(tf.argmax(pred_L, 1).numpy(), normalize=True).values) else: acc_true = 0 prop = 0 L.append( np.array( [i, i1, loss_sq, loss, loss_xent, acc, acc_true, prop])[None, :]) if (max_acc < acc) or (acc == max_acc and min_loss > loss): print( f"acc: {acc},p:{i1},Mu:{int(MU.numpy())}alpha:{self.get_alpha(MU.numpy()).numpy()}" ) best_p = int(i1) best_MU = int(MU.numpy()) max_acc = acc min_loss = loss.numpy() """ if self.DEBUG: alpha = self.get_alpha(MU) I = np.identity(Y.shape[0], dtype = np.float32) match_true = tf.gather(np.linalg.inv(I- alpha*(I - gutils.lap_matrix(W,'sym')))@Y,_not_lab,axis=0) F = forward_eval(Y,U,PI,mode='eval',p=best_p) match_approx = tf.gather(F,indices=_not_lab,axis=0) match = aux.accuracy(match_true, match_approx) print(f"Match rate {np.round(100*match,3)} ") print(f"LGC_acc = {np.round(100*aux.accuracy(match_true,tf.gather(ORACLE_Y,indices=_not_lab,axis=0)),3)} ") print(f"LGCLVO_acc = {np.round(100*aux.accuracy(match_approx,tf.gather(ORACLE_Y,indices=_not_lab,axis=0)),3)} ") """ if i % 1 == 0: """ Print info """ if not hook is None: if self.hook_iter_mode == "labeled": plot_y = np.zeros_like(Y) plot_y[labeledIndexes] = Y_l.numpy() else: MU.assign(best_MU) plot_y = tf.clip_by_value( forward(Y, U, PI, p=best_p, mode='eval'), 0, 999999).numpy() hook._step(step=i, X=X, W=W, Y=plot_y, labeledIndexes=labeledIndexes) alpha = self.get_alpha(MU) LOG.info( f"Acc: {max_acc.numpy():.3f}; Loss: {loss.numpy():.3f}; alpha = {alpha.numpy():.3f};" ) if self.DEBUG: df = pd.DataFrame(np.concatenate(L, axis=0), index=range(len(L)), columns=[ 'i', 'p', 'loss_sq', 'loss', 'loss_xent', 'acc', 'acc_true', 'prop' ]) self.create_3d_mesh(df) print(f"BEst mu: {best_MU}; best p: {best_p}") MU.assign(best_MU) print(MU) return forward_eval(Y, U, PI, mode='eval', p=None).numpy() """ ---------------------------------------------------- PART 2 ------------------------------------------------- """ opt = tf.keras.optimizers.Adam(0.05) max_acc = 0 for i in range(7000): #MU.assign(i) with tf.GradientTape() as t: _, _, pred_L = forward(Y_l, U_l, tf.gather( PI, indices=np.where(labeledIndexes)[0], axis=0), mode='train', p=best_p) loss_sq = losses['sq_loss'](pred_L, Y_l) loss = losses['xent'](pred_L, Y_l) loss_xent = losses['xent'](pred_L, Y_l) acc = aux.accuracy(Y_l, pred_L) _not_lab = np.where(np.logical_not(labeledIndexes))[0] acc_true = aux.accuracy( tf.gather(ORACLE_Y, indices=_not_lab, axis=0), tf.gather(forward(Y, U, PI, mode='eval')[0], indices=_not_lab, axis=0)) L.append( np.array([i, loss_sq, loss, loss_xent, acc, acc_true])[None, :]) """ Project labels such that they sum up to the original amount """ pi = PI.numpy() pi[labeledIndexes] = np.sum( labeledIndexes) * pi[labeledIndexes] / (np.sum( pi[labeledIndexes])) PI.assign(pi) """ TRAINABLE VARIABLES GO HERE """ trainable_variables = [] if optimize_labels: trainable_variables.append(PI) """ Apply gradients """ gradients = t.gradient(loss, trainable_variables) opt.apply_gradients(zip(gradients, trainable_variables)) if acc > max_acc: print(max_acc) best_trainable_variables = [ k.numpy() for k in trainable_variables ] max_acc = acc min_loss = loss counter_since_best = 0 for k in range(len(trainable_variables)): trainable_variables[k].assign(best_trainable_variables[k]) return forward(Y, U, PI, mode='eval', p=None).numpy() """ for c in df.columns: if c.startswith('loss'): df[c] = (df[c] - df[c].min())/(df[c].max()-df[c].min()) for c in df.columns: if not c in 'i': plt.plot(df['i'],df[c],label=c) plt.legend() plt.show() #plt.scatter(range(lambda_tilde.shape[0]),np.log10(lambda_tilde/LAMBDA),s=2) #plt.show() """ return tf.clip_by_value(forward(Y, U, PI, mode='eval')[0], 0, 999999).numpy()
def __MR(self, X, W, Y, labeledIndexes, p, tuning_iter, hook=None): Y = np.copy(Y) if Y.ndim == 1: Y[np.logical_not(labeledIndexes)] = 0 Y = gutils.init_matrix(Y, labeledIndexes) Y[np.logical_not(labeledIndexes), :] = 0 if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") l = np.reshape(np.array(np.where(labeledIndexes)), (-1)) num_lab = l.shape[0] if not isinstance(p, int): p = int(p * num_lab) if p > Y.shape[0]: p = Y.shape[0] LOG.warn("Warning: p greater than the number of labeled indexes", LOG.ll.FILTER) W = scipy_to_np(W) L = gutils.lap_matrix(W, which_lap='sym') D = gutils.deg_matrix(W) def check_symmetric(a, tol=1e-8): return np.allclose(a, a.T, atol=tol) if check_symmetric(L): E = sp.eigh(L, D, eigvals=(1, p))[1] else: LOG.warn("Warning: Laplacian not symmetric", LOG.ll.FILTER) eigenValues, eigenVectors = sp.eig(L, D) idx = eigenValues.argsort() eigenValues = eigenValues[idx] assert eigenValues[0] <= eigenValues[eigenValues.shape[0] - 1] eigenVectors = eigenVectors[:, idx] E = eigenVectors[:, 1:(p + 1)] e_lab = E[labeledIndexes, :] """ TIKHONOV REGULARIZATION. Currently set to 0.""" TIK = np.zeros(shape=e_lab.shape) try: A = np.linalg.inv(e_lab.T @ e_lab + TIK.T @ TIK) @ e_lab.T except: A = np.linalg.pinv(e_lab.T @ e_lab + TIK.T @ TIK) @ e_lab.T F = np.zeros(shape=Y.shape) y_m = np.argmax(Y, axis=1)[labeledIndexes] for i in range(Y.shape[1]): c = np.ones(num_lab) c[y_m != i] = -1 a = A @ np.transpose(c) LOG.debug(a, LOG.ll.FILTER) for j in np.arange(F.shape[0]): F[j, i] = np.dot(a, E[j, :]) ERmat = -1 * np.ones((Y.shape[0], )) Y_amax = np.argmax(Y, axis=1) for i in np.where(labeledIndexes): ERmat[i] = np.square(Y[i, Y_amax[i]] - F[i, Y_amax[i]]) removed_Lids = np.argsort(ERmat) removed_Lids = removed_Lids[::-1] labeledIndexes = np.array(labeledIndexes) Y = np.copy(Y) for i in range(tuning_iter): labeledIndexes[removed_Lids[i]] = False if not hook is None: hook._step(step=i, X=X, W=W, Y=Y, labeledIndexes=labeledIndexes) return Y, labeledIndexes
def __SIIS(self,X,W,Y,labeledIndexes,m,alpha,beta,rho,max_iter,hook=None): Y = self.CLEAN_UNLABELED_ROWS(Y, labeledIndexes) if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") if m is None: m = W.shape[0] c = Y.shape[1] D = gutils.deg_matrix(W, pwr=1.0) L = gutils.lap_matrix(W, which_lap='sym') U, SIGMA = W.load_eigenfunctions(m=m,remove_first_eig=False) U = scipy.sparse.csr_matrix(U) SIGMA = _to_np(scipy.sparse.diags([SIGMA],[0])) J = gutils.labels_indicator(labeledIndexes) """ !!! """ P = SIISClassifier.edge_mat(W) """ Initialize params """ LAMB_1 = np.ones((P.shape[0],c)) LAMB_2 = np.ones((Y.shape[0],c)) mu = 1.0 mu_max = 10000000.0 eps = 1/(10000) """ Reusable matrices """ JU = _to_np(J@U) PU = _to_np(P@U) PU_T = PU.transpose() JU_T = JU.transpose() A = np.zeros((m,c)) Q = None B = None improvement = 1 iter = 0 """ TODO: Tensorflow version import tensorflow as tf with tf.Session() as sess: A = tf.Variable(1e-06*tf.ones((m,c),dtype=tf.float64)) sess.run(tf.global_variables_initializer()) C = tf.reduce_sum(tf.linalg.norm(tf.matmul(PU,A),axis=1)) +\ alpha*tf.reduce_sum(tf.linalg.norm(tf.matmul(_to_np(U)[labeledIndexes,:],A)-Y[labeledIndexes,:],axis=1)) +\ beta* tf.trace(tf.matmul(tf.matmul(tf.transpose(A),SIGMA),A)) opt = tf.train.AdamOptimizer(learning_rate=0.5*1e-02) opt_min = opt.minimize(C) sess.run(tf.global_variables_initializer()) for i in range(2000): sess.run(opt_min) LOG.debug(sess.run(C),LOG.ll.CLASSIFIER) LOG.debug(sess.run(C),LOG.ll.CLASSIFIER) F = _to_np(U)@sess.run(A) LOG.debug(F.shape,LOG.ll.CLASSIFIER) """ A = np.zeros((m,c)) while iter <= max_iter and improvement > eps: """ Update Q """ N = PU@A - (1/mu)*LAMB_1 N_norm = np.linalg.norm(N, axis=1) to_zero = N_norm <= (1/mu) mult = ((N_norm - (1/mu))/N_norm) N = N * mult[:,np.newaxis] N[to_zero,:] = 0.0 Q = N """ Update B """ M = JU@A - Y - (1/mu)*LAMB_2 M_norm = np.linalg.norm(M,axis=1) to_zero = M_norm <= (alpha/mu) mult = ((M_norm - (alpha/mu))/M_norm) M = M * mult[:,np.newaxis] M[to_zero,:] = 0.0 B = M old_A = A """ Update A """ A_inv_term = 2*beta*SIGMA + mu*PU_T@PU + mu*JU_T@JU A_inv_term = np.linalg.inv(A_inv_term) A = A_inv_term @ \ (PU_T@ LAMB_1 + JU_T@LAMB_2 +\ mu * PU_T@Q + mu* JU_T @ (B + Y) ) """ Update Lagrangian coeffs """ LAMB_1 = LAMB_1 + mu* (Q - PU@A) LAMB_2 = LAMB_2 + mu*(B- JU@A + Y) """ Update penalty coeffficients """ mu = min(rho*mu,mu_max) if not old_A is None: improvement = (np.max(np.abs(A-old_A)))/np.max(np.abs(old_A)) LOG.debug("Iter {}".format(iter),LOG.ll.CLASSIFIER) iter += 1 C = np.sum(np.linalg.norm(PU@A,axis=1)) + alpha*np.sum(np.linalg.norm(JU@A - Y,axis=1)) +\ beta*np.trace(A.T@SIGMA@A) LOG.debug("Iter {} - Cost {}".format(iter,C),LOG.ll.CLASSIFIER) F = U@A for i in range(F.shape[0]): mx = np.argmax(F[i,:]) F[i,:] = 0.0 F[i,mx] = 1.0 return F