def baselinePU(Y, label_loc, alpha, vlambda, kx): #random_mat = np.random.random(Y.shape) #label_loc = np.where(random_mat < label_fraction) ## locate the masked entries in the label matrix #### print statistics #print np.where(Y[label_loc] > 0)[0].shape[0] / float(np.where(Y > 0)[0].shape[0]) ## the ratio of "1" entries being masked #print np.where(Y[label_loc] < 1)[0].shape[0] / float(np.where(Y < 1)[0].shape[0]) ## the ratio of "0" entries being masked W = theano.shared(np.random.random((Y.shape[0], kx)), name='W') H = theano.shared(np.random.random((Y.shape[1], kx)), name='H') labelmask = np.ones(Y.shape) labelmask[label_loc] = 0 Y_masked = Y.copy() Y_masked[label_loc] = 0 reconstruction = theano.tensor.dot(W, H.T) X_symbolic = theano.tensor.matrix(name="Y_masked", dtype=Y_masked.dtype) difference = theano.tensor.sqr((X_symbolic - reconstruction)) * (1 - alpha) positive_difference = theano.tensor.sqr( (X_symbolic - reconstruction) * labelmask) * (2 * alpha - 1.) mse = difference.mean() + positive_difference.mean() loss = mse + vlambda * (W * W).mean() + vlambda * (H * H).mean() downhill.minimize(loss=loss, train=[Y_masked], patience=0, algo='rmsprop', batch_size=Y_masked.shape[0], max_gradient_norm=1, learning_rate=0.06, min_improvement=0.00001) return W.get_value(), H.get_value()
def solve(self, X, missing_mask): (n_samples, n_features) = X.shape observed_mask = 1 - missing_mask # Set up a matrix factorization problem to optimize. U_init = self.initializer(n_samples, self.rank).astype(X.dtype) V_init = self.initializer(self.rank, n_features).astype(X.dtype) U = theano.shared(U_init, name="U") V = theano.shared(V_init, name="V") X_symbolic = T.matrix(name="X", dtype=X.dtype) reconstruction = T.dot(U, V) difference = X_symbolic - reconstruction masked_difference = difference * observed_mask err = T.sqr(masked_difference) mse = err.mean() loss = (mse + self.l1_penalty * abs(U).mean() + self.l2_penalty * (V * V).mean()) downhill.minimize(loss=loss, train=[X], patience=self.patience, algo=self.optimization_algorithm, batch_size=n_samples, min_improvement=self.min_improvement, max_gradient_norm=self.max_gradient_norm, learning_rate=self.learning_rate, monitors=[("error", err.mean())], monitor_gradients=self.verbose) U_value = U.get_value() V_value = V.get_value() return np.dot(U_value, V_value)
def completionLR(X, kx, fea_loc, lambdaU, lambdaV): mask = np.ones(X.shape) mask[fea_loc] = 0. #### Theano and downhill U = theano.shared(np.random.random((X.shape[0], kx)), name='U') V = theano.shared(np.random.random((X.shape[1], kx)), name='V') X_symbolic = theano.tensor.matrix(name="X", dtype=X.dtype) reconstruction = theano.tensor.dot(U, V.T) difference = X_symbolic - reconstruction masked_difference = difference * mask err = theano.tensor.sqr(masked_difference) mse = err.mean() xloss = mse + lambdaU * (U * U).mean() + lambdaV * (V * V).mean() #### optimisation downhill.minimize(loss=xloss, train=[X], patience=0, algo='rmsprop', batch_size=X.shape[0], max_gradient_norm=1, learning_rate=0.1, min_improvement=0.0001) return U.get_value(), V.get_value()
def fit(self, X, y): self.w = theano.shared( value=np.random.normal(0, 0.001, (X.shape[1], 1)), # random initialize name="w", borrow=False ) x_ = TT.matrix("X") y_ = TT.matrix("y") e = ((y_ - TT.dot(x_, self.w)) ** 2).sum() l1_penalty = abs(self.w).sum() l2_penalty = TT.sqrt((self.w * self.w).sum()) loss = ( e + self.lambda_1 * l1_penalty + self.lambda_2 * l2_penalty ).sum() x_train, x_valid, y_train, y_valid = cv.train_test_split(X, y) downhill.minimize( loss, XYDataset(x_train, y_train, batch_size=self.batch_size), valid=XYDataset(x_valid, y_valid, batch_size=x_valid.shape[0]), params=[self.w], inputs=[x_, y_], algo="rmsprop", **self.downhill_args ) w = self.w.get_value() self.coef_dist = [ (abs(w) > x).sum() for x in [0.01, 0.001, 0.0001, 0.00001, 0.000001] ]
def completionPUV(X, Y, fea_loc, label_loc, alpha, lambda0, lambda1, lambda2, delta, kx): #delta = 0.3 ### masking out some entries from feature and label matrix mask = np.ones(X.shape) mask[fea_loc] = 0. labelmask = np.ones(Y.shape) labelmask[label_loc] = 0 #### Theano and downhill U = theano.shared(np.random.random((X.shape[0], kx)), name='U') V = theano.shared(np.random.random((X.shape[1], kx)), name='V') W = theano.shared(np.random.random((Y.shape[0], kx)), name='W') H = theano.shared(np.random.random((Y.shape[1], kx)), name='H') feature_mask = theano.tensor.matrix('feature_mask') label_mask = theano.tensor.matrix('label_mask') #feature_mask = theano.shared(mask,name='mask') #label_mask = theano.shared(labelmask,name='labelmask') #### U,V,W and H randomly initialised nsample = X.shape[0] #X_symbolic = theano.tensor.matrix(name="X", dtype=X.dtype) #tX = theano.shared(X.astype(theano.config.floatX),name="X") #tX = theano.shared(X,name="X") tX = theano.tensor.matrix('X') ### symbolic variable difference = tX - theano.tensor.dot(U, V.T) masked_difference = difference * feature_mask err = theano.tensor.sqr(masked_difference) mse = err.mean() xloss = mse + lambda0 * ((U * U).mean() + (V * V).mean()) #tY = theano.shared(Y.astype(theano.config.floatX),name="Y") #tY = theano.shared(Y,name="Y") tY = theano.tensor.matrix('Y') ### symbolic variable Y_reconstruction = theano.tensor.dot(W, H.T) Ydifference = theano.tensor.sqr((tY - Y_reconstruction)) * (1 - alpha) positive_difference = theano.tensor.sqr( (tY - Y_reconstruction) * label_mask) * (2 * alpha - 1.) Ymse = Ydifference.mean() + positive_difference.mean() global_loss = xloss + delta * Ymse + lambda1 * ( (W * W).mean() + (H * H).mean()) + lambda2 * theano.tensor.sqr( (U - W)).mean() #### optimisation downhill.minimize(loss=global_loss, params=[U, V, W, H], train=[X, Y, mask, labelmask], inputs=[tX, tY, feature_mask, label_mask], patience=0, algo='rmsprop', batch_size=nsample, max_gradient_norm=1, learning_rate=0.1, min_improvement=0.0001) return U.get_value(), V.get_value(), W.get_value(), H.get_value()
def TPAMI(X, Y, fea_loc_x, fea_loc_y, label_loc_x, label_loc_y, miu, lambda0, kx): ### X: feature matrix ### Y: label matrix ### fea_loc_x, fea_loc_y: masked entries in feature matrix ### label_loc_x, label_loc_y: masked entries in label matrix ### miu: regularisation parameter on matrix rank ### lambda0: regularisation parameter on label reconstruction ### kx: dimensionality of latent variables used for solving nuclear norm based regularisation M = np.concatenate((Y, X), axis=1) M = M.T label_dim = Y.shape[1] fea_dim = X.shape[1] gamma = 15. featuremask = np.ones(M.shape) labelmask = np.ones(M.shape) for i in range(len(label_loc_x)): labelmask[label_loc_y[i], label_loc_x[i]] = 0. for i in range(len(fea_loc_x)): featuremask[fea_loc_y[i] + label_dim, fea_loc_x[i]] = 0. #### Theano and downhill U = theano.shared(np.random.random((M.shape[0], kx)), name='U') V = theano.shared(np.random.random((M.shape[1], kx)), name='V') #### feature loss M_symbolic = theano.tensor.matrix(name="M", dtype=M.dtype) reconstruction = theano.tensor.dot(U, V.T) difference = M_symbolic - reconstruction masked_difference = difference * featuremask err = theano.tensor.sqr(masked_difference) mse = err.mean() xloss = (1. / float(len(fea_loc_x))) * mse + miu * ((U * U).mean() + (V * V).mean()) #### label loss label_reconstruction_kernel = -1 * gamma * (2 * M - 1) * (reconstruction - M) label_reconstruction_difference = (1. / gamma) * theano.tensor.log( 1 + theano.tensor.exp(label_reconstruction_kernel)) * labelmask label_err = ( 1. / float(len(label_loc_x))) * label_reconstruction_difference.mean() global_loss = xloss + lambda0 * label_err #### optimisation downhill.minimize(loss=global_loss, train=[M], inputs=[M_symbolic], patience=0, algo='rmsprop', batch_size=M.shape[0], max_gradient_norm=1, learning_rate=0.1, min_improvement=0.01) return U.get_value(), V.get_value()
def test_minimize(self): x = theano.shared(-3 + np.zeros((2, ), 'f'), name='x') data = downhill.Dataset(np.zeros((1, 1), 'f'), batch_size=1) data._slices = [[]] downhill.minimize( (100 * (x[1:] - x[:-1] ** 2) ** 2 + (1 - x[:-1]) ** 2).sum(), data, algo='nag', learning_rate=0.001, momentum=0.9, patience=1, min_improvement=0.1, max_gradient_norm=1, ) assert np.allclose(x.get_value(), [1, 1]), x.get_value()
def test_minimize(self): x = theano.shared(-3 + np.zeros((2, ), 'f'), name='x') data = downhill.Dataset(np.zeros((1, 1), 'f'), batch_size=1) data._slices = [[]] downhill.minimize( (100 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2).sum(), data, algo='nag', learning_rate=0.001, momentum=0.9, patience=1, min_improvement=0.1, max_gradient_norm=1, ) assert np.allclose(x.get_value(), [1, 1]), x.get_value()
def completionPUV(X, Y, fea_loc, label_loc, alpha, lambda0, lambda1, lambda2, delta, kx): #delta = 0.3 ### masking out some entries from feature and label matrix mask = np.ones(X.shape) mask[fea_loc] = 0. labelmask = np.ones(Y.shape) labelmask[label_loc] = 0 #### Theano and downhill U = theano.shared(np.random.random((X.shape[0], kx)), name='U') V = theano.shared(np.random.random((X.shape[1], kx)), name='V') W = theano.shared(np.random.random((Y.shape[0], kx)), name='W') H = theano.shared(np.random.random((Y.shape[1], kx)), name='H') X_symbolic = theano.tensor.matrix(name="X", dtype=X.dtype) reconstruction = theano.tensor.dot(U, V.T) difference = X_symbolic - reconstruction masked_difference = difference * mask err = theano.tensor.sqr(masked_difference) mse = err.mean() xloss = mse + lambda0 * ((U * U).mean() + (V * V).mean()) Y_symbolic = theano.tensor.matrix(name="Y", dtype=Y.dtype) Y_reconstruction = theano.tensor.dot(U, H.T) Ydifference = theano.tensor.sqr( (Y_symbolic - Y_reconstruction)) * (1 - alpha) positive_difference = theano.tensor.sqr( (Y_symbolic - Y_reconstruction) * labelmask) * (2 * alpha - 1.) Ymse = Ydifference.mean() + positive_difference.mean() global_loss = xloss + delta * Ymse + lambda1 * ( (W * W).mean() + (H * H).mean()) + lambda2 * theano.tensor.sqr( (U - W)).mean() #### optimisation downhill.minimize(loss=global_loss, train=[X, Y], inputs=[X_symbolic, Y_symbolic], patience=0, algo='rmsprop', batch_size=Y.shape[0], max_gradient_norm=1, learning_rate=0.1, min_improvement=0.0001) return U.get_value(), V.get_value(), W.get_value(), H.get_value()
def fit(self, X, y): if self.select_cols is not None: _X = X[:, self.select_cols] else: _X = X self.w = theano.shared( value=np.random.normal(0, 0.001, (_X.shape[1], 1)), # random initialize name="w", borrow=False ) x_ = TT.matrix("X") y_ = TT.matrix("y") l_ = tsparse.csr_matrix("l") e = ((y_ - TT.dot(x_, self.w)) ** 2).sum() l1_penalty = abs(self.w).sum() l2_penalty = TT.sqrt((self.w * self.w).sum()) s_sparse_penalty = theano.dot(theano.dot(self.w.T, l_), self.w) loss = ( e + self.lambda_1 * l1_penalty + self.lambda_2 * l2_penalty + self.alpha * s_sparse_penalty ).sum() x_train, x_valid, y_train, y_valid = cv.train_test_split(_X, y) downhill.minimize( loss, XYLDataset(x_train, y_train, self.L, batch_size=self.batch_size), valid=XYLDataset(x_valid, y_valid, self.L, batch_size=x_valid.shape[0]), params=[self.w], inputs=[x_, y_, l_], algo="rmsprop", **self.downhill_args ) w = self.w.get_value() self.coef_dist = [ (abs(w) > x).sum() for x in [0.01, 0.001, 0.0001, 0.00001, 0.000001] ]
def solve(self, X, missing_mask): (n_samples, n_features) = X.shape observed_mask = 1 - missing_mask # Set up a matrix factorization problem to optimize. U_init = self.initializer(n_samples, self.rank).astype(X.dtype) V_init = self.initializer(self.rank, n_features).astype(X.dtype) U = theano.shared(U_init, name="U") V = theano.shared(V_init, name="V") X_symbolic = T.matrix(name="X", dtype=X.dtype) reconstruction = T.dot(U, V) difference = X_symbolic - reconstruction masked_difference = difference * observed_mask err = T.sqr(masked_difference) mse = err.mean() loss = ( mse + self.l1_penalty * abs(U).mean() + self.l2_penalty * (V * V).mean()) downhill.minimize( loss=loss, train=[X], patience=self.patience, algo=self.optimization_algorithm, batch_size=n_samples, min_improvement=self.min_improvement, max_gradient_norm=self.max_gradient_norm, learning_rate=self.learning_rate, monitors=[("error", err.mean())], monitor_gradients=self.verbose) U_value = U.get_value() V_value = V.get_value() return np.dot(U_value, V_value)
u = theano.shared(np.random.randn(N * N, K * K).astype('f'), name='u') v = theano.shared(np.random.randn(K * K, B).astype('f'), name='v') err = TT.sqr(x - TT.dot(u, v)).mean() downhill.minimize( loss=err + 100 * (0.01 * abs(u).mean() + (v * v).mean()), params=[u, v], inputs=[x], train=train, valid=valid, batch_size=N * N, monitor_gradients=True, monitors=[ ('err', err), ('u<-0.5', (u < -0.5).mean()), ('u<-0.1', (u < -0.1).mean()), ('u<0.1', (u < 0.1).mean()), ('u<0.5', (u < 0.5).mean()), ], algo='sgd', max_gradient_clip=1, learning_rate=0.5, momentum=0.9, patience=3, min_improvement=0.1, ) plot_images(v.get_value(), 121) plot_images(np.dot(u.get_value(), v.get_value()), 122) plt.show()
#minimize( #loss, #train, #batch_size=32, #monitor_gradients=False, #monitors=(), #valid=None, #params=None, #inputs=None, #algo='rmsprop', #updates=(), #train_batches=None, #valid_batches=None, #**kwargs) downhill.minimize( loss=loss, train=[y], patience=0, batch_size=A, # Process y as a single batch. max_gradient_norm=1, # Prevent gradient explosion! learning_rate=0.1, monitors=monitors, monitor_gradients=True) # Print out the optimized coefficients u and basis v. print('u =', u.get_value()) print('v =', v.get_value())
def rand(a, b): return np.random.randn(a, b).astype('f') A, B, K = 20, 5, 3 # Set up a matrix factorization problem to optimize. u = theano.shared(rand(A, K), name='u') v = theano.shared(rand(K, B), name='v') e = TT.sqr(TT.matrix() - TT.dot(u, v)) # Minimize the regularized loss with respect to a data matrix. y = np.dot(rand(A, K), rand(K, B)) + rand(A, B) downhill.minimize( loss=e.mean() + abs(u).mean() + (v * v).mean(), train=[y], patience=0, batch_size=A, # Process y as a single batch. max_gradient_norm=1, # Prevent gradient explosion! learning_rate=0.1, monitors=( ('err', e.mean()), # Monitor during optimization. ('|u|<0.1', (abs(u) < 0.1).mean()), ('|v|<0.1', (abs(v) < 0.1).mean())), monitor_gradients=True) # Print out the optimized coefficients u and basis v. print('u =', u.get_value()) print('v =', v.get_value())
#minimize( #loss, #train, #batch_size=32, #monitor_gradients=False, #monitors=(), #valid=None, #params=None, #inputs=None, #algo='rmsprop', #updates=(), #train_batches=None, #valid_batches=None, #**kwargs) downhill.minimize( loss=loss, train=[y], patience=0, batch_size=A, # Process y as a single batch. max_gradient_norm=1, # Prevent gradient explosion! learning_rate=0.1, monitors=monitors, monitor_gradients=True) # Print out the optimized coefficients u and basis v. print(('u =', u.get_value())) print(('v =', v.get_value()))
def rand(a, b): return np.random.randn(a, b).astype('f') A, B, K = 20, 5, 3 # Set up a matrix factorization problem to optimize. u = theano.shared(rand(A, K), name='u') v = theano.shared(rand(K, B), name='v') e = TT.sqr(TT.matrix() - TT.dot(u, v)) # Minimize the regularized loss with respect to a data matrix. y = np.dot(rand(A, K), rand(K, B)) + rand(A, B) downhill.minimize( loss=e.mean() + abs(u).mean() + (v * v).mean(), train=[y], patience=0, batch_size=A, # Process y as a single batch. max_gradient_norm=1, # Prevent gradient explosion! learning_rate=0.1, monitors=(('err', e.mean()), # Monitor during optimization. ('|u|<0.1', (abs(u) < 0.1).mean()), ('|v|<0.1', (abs(v) < 0.1).mean())), algo='sgd', monitor_gradients=True) # Print out the optimized coefficients u and basis v. print('u =', u.get_value()) print('v =', v.get_value())