def fit(self, X, y, ns, ufc, ignore_sensitive=False, **params): # fix ns to 1 in current version ns = 1 # compute weights Xw = np.array([[0.0], [1.0]]) self.w_ = ufc.predict_proba(Xw)[:, 1] # add a constanet term if self.fit_intercept: X = np.c_[np.ones(X.shape[0]), X] # check optimization parameters if not 'disp' in params: params['disp'] = False if not 'maxiter' in params: params['maxiter'] = 100 self.coef_ = np.zeros(X.shape[1]) self.coef_ = fmin_cg(self.loss, self.coef_, fprime=self.grad_loss, args=(X, y, ns), **params) # clear the weights for sensitive features if ignore_sensitive: self.coef_[-ns:] = 0.0
def train_alt(self, alpha=0): """ Define the gradient and hand it off to a scipy gradient-based optimizer. """ # Set alpha so it can be referred to later if needed self.alpha = alpha x_total = np.concatenate((self.x_train, self.x_test), axis=0) #similarityMatrix = np.ones((self.x_train.shape[0],x_total.shape[0])) similarityMatrix = similarity_calculator.get_similarities_alt( x_total, self.x_train) # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. # The following has a dimension of [1 x k] where k = |W| dl_by_dWk = lambda W, k: (k > 0) * self.sfRegStep( W, k, similarityMatrix, alpha, x_total) # The full gradient is just an array of componentwise derivatives gradient = lambda W: np.array([dl_by_dWk(W, k) \ for k in range(self.x_train.shape[1])]).transpose() # The function to be minimized # Use the negative log likelihood for the objective function. objectiveFunction = lambda W: -self.likelihood_alt( similarityMatrix, betas=W, alpha=self.alpha) # Optimize print('Optimizing for alpha = {}'.format(alpha)) #self.betas = fmin_bfgs(objectiveFunction, self.betas, fprime=gradient) self.betas = fmin_cg(objectiveFunction, self.betas, fprime=gradient, maxiter=10)
def Train(parameters): optimialParameters = optimize.fmin_cg(f=nnCostFunction, x0=parameters, fprime=nnGradient, args=((input_layer_size, hidden_layer_size, num_labels, X, yVectors, lam))) return optimialParameters
def optimize_theta(th, X, Y, m, n, myLambda): result = optimize.fmin_cg(cost_function, x0=th, fprime=gradient, args=(X, Y, m, n, myLambda), maxiter=100, disp=True, full_output=True) return result[0], result[1]
def train(self, update=True, **params): """ Training of model to generate the theta value for the this classifier Parameters ---------- update : boolean when set to true, the classifier's theta value is updated **params : list inputLayerSize : int Number of input features hiddenLayerSize : int Number of nodes in the hidden layer numLabels : int Number of unique labels (i.e. classes) X : ndarray (2D) Contains the training set, with each row as one record y : ndarray (1D) Contains the corresponding label for each row in X lambdaVal : float Regularization parameter maxIter : int Maximum number of iterations that the optimization algorithm will run for each label Returns: -------- xopt : ndarray (1D) optimized theta value cost : float cost associated to xopt """ inputLayerSize = params["inputLayerSize"] hiddenLayerSize = params["hiddenLayerSize"] numLabels = params["numLabels"] X = params["X"] y = params["y"] lambdaVal = params["lambdaVal"] maxIter = params["maxIter"] theta1 = self.randomInitWeights(inputLayerSize, hiddenLayerSize) theta2 = self.randomInitWeights(hiddenLayerSize, numLabels) nnParams = np.append(theta1, theta2) shortCostFunction = lambda nnParams : self.computeCost(inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, nnParams) shortGradFunction = lambda nnParams : self.computeGradient(inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, nnParams) retVal = fmin_cg(shortCostFunction, x0=nnParams, fprime=shortGradFunction, maxiter=maxIter, full_output=True) nnParams = retVal[0] if update: self.theta1 = np.reshape(nnParams[0:hiddenLayerSize*(inputLayerSize+1)], (hiddenLayerSize, inputLayerSize+1)) self.theta2 = np.reshape(nnParams[hiddenLayerSize*(inputLayerSize+1):], (numLabels, hiddenLayerSize+1)) retVal = (retVal[0], retVal[1]) return retVal
def run(): ## Our data sigma = 0.1 N = 100 latent = np.linspace(1, 4 * np.pi, N) # True latent variable A = np.random.normal(0, 1, (10, 2)) fnonlin = np.column_stack((latent * np.sin(latent), latent * np.cos(latent))) Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N)) # Center Y Y = Y - Y.mean(axis=1)[:, None] ## The cov matrix we need for our goal function S = np.cov(Y, bias=1) # fmin_cg expects a vector, not a matrix..., and it HAS to be a 1-D arr def loglik(W): W = W.reshape(10, 2) C = dot(W, W.transpose()) + sigma ** 2 * eye(10) return N * (log(det(C)) + trace(dot(inv(C), S))) def dloglik(W): W = W.reshape(10, 2) C = dot(W, W.transpose()) + sigma ** 2 * eye(10) t1 = dot(inv(C), S) t2 = dot(inv(C), W) left = dot(t1, t2) right = dot(inv(C), W) grad = N * (-left + right) # Sanity check: check if dloglik(W_star) ~= 0, i.e. I correctly specified # the gradients and we are at a stationary point... return grad.reshape(20) # No noise Winit = np.random.normal(0, 1, 20) W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0) W_star = W_star.reshape(10, 2) ## Recover our latent factors based on the estimated W X_hat = np.zeros((N, 2)) for n in range(N): X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)), dot(W_star.transpose(), Y[:, n])) X_hat1 = np.copy(X_hat) ## Run some experiments ## # Some noise sigma = 1 Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N)) Y = Y - Y.mean(axis=1)[:, None] S = np.cov(Y, bias=1) W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0) W_star = W_star.reshape(10, 2) X_hat = np.zeros((N, 2)) for n in range(N): X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)), dot(W_star.transpose(), Y[:, n])) X_hatNoise = np.copy(X_hat) # Plenty of noise sigma = 10 Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N)) Y = Y - Y.mean(axis=1)[:, None] S = np.cov(Y, bias=1) W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0) W_star = W_star.reshape(10, 2) X_hat = np.zeros((N, 2)) for n in range(N): X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)), dot(W_star.transpose(), Y[:, n])) X_hatNoiseP = np.copy(X_hat) ## Plot it plt.subplot(2, 2, 1) plt.title("The true lower dimensional representation") plt.plot(latent, label="True latent variable") plt.plot(fnonlin[:, 0], fnonlin[:, 1], label="Non linear transform") plt.legend() plt.xlabel("$Xi$") plt.subplot(2, 2, 2) plt.title("Recovered latent variables, no noise") plt.plot(X_hat1[:, 0], X_hat1[:, 1]) plt.xlabel("$X1$") plt.ylabel("$X2$") plt.subplot(2, 2, 3) plt.title("sigma=1") plt.plot(X_hatNoise[:, 0], X_hatNoise[:, 1]) plt.xlabel("$X1$") plt.ylabel("$X2$") plt.subplot(2, 2, 4) plt.title("sigma=10") plt.plot(X_hatNoiseP[:, 0], X_hatNoiseP[:, 1]) plt.xlabel("$X1$") plt.ylabel("$X2$")
def func(p,*args): a,b=p x,y=args cost = y- (a*x + b); return cost x = np.arange(1,10,1); y_true = 3*x+ 4; y_mean = y_true + 10*np.random.rand(len(x)) p0= np.array([1,2]); print p0 rs1= fmin_bfgs(func1,[1,2],args=(x,y_mean)) rs2= fmin_cg(func1,[1,2],args=(x,y_mean)) rs = leastsq(func,p0,args=(x,y_mean)); # # rs1=fmin_bfgs(func,p0,args=(x,y_mean)) print "rs=",rs # print "rs1=",rs1 print "rs2=",rs2 y1= rs[0][0]*x + rs[0][1] y2 = rs1[0]*x + rs1[1] pl.plot(x,y1,'r',label="y1"); pl.plot(x,y2,'b',label="y2"); pl.plot(x,y_mean,'og',label='y_mean'); pl.legend() pl.show()
def train(self, update=True, **params): """ Training of model to generate the theta value for the this classifier Parameters ---------- update : boolean when set to true, the classifier's theta value is updated **params : list X : ndarray (2D) Contains the training set, with each row as one record y : ndarray (1D) Contains the corresponding label for each row in X lambdaVal : float Regularization parameter maxIter : int Maximum number of iterations that the optimization algorithm will run for each label numOfLabels : int Number of unique labels Returns: -------- xopt : ndarray (1D) optimized theta value cost : float cost associated to xopt """ X = params["X"] y = params["y"] lambdaVal = params["lambdaVal"] maxIter = params["maxIter"] numOfLabels = params["numOfLabels"] thetaSize = X.shape[1] retTheta = np.zeros((numOfLabels, thetaSize + 1)) X = np.c_[np.ones(X.shape[0]), X] theta = np.zeros(thetaSize + 1) cost = 0 for i in range(0, numOfLabels): tmpY = (y == i).astype(int) shortCostFunction = lambda theta: self.computeCost( X, tmpY, lambdaVal, theta) shortGradFunction = lambda theta: self.computeGradient( X, tmpY, lambdaVal, theta) retVal = fmin_cg(shortCostFunction, x0=theta, fprime=shortGradFunction, maxiter=maxIter, full_output=True) retTheta[i, :] = retVal[0] cost += retVal[1] cost /= numOfLabels retVal = (retTheta, cost) if update: self.theta = retTheta return retVal
def run(): ## Our data sigma = .1 N = 100 latent = np.linspace(1, 4 * np.pi, N) # True latent variable A = np.random.normal(0, 1, (10, 2)) fnonlin = np.column_stack( (latent * np.sin(latent), latent * np.cos(latent))) Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N)) # Center Y Y = Y - Y.mean(axis=1)[:, None] ## The cov matrix we need for our goal function S = np.cov(Y, bias=1) # fmin_cg expects a vector, not a matrix..., and it HAS to be a 1-D arr def loglik(W): W = W.reshape(10, 2) C = dot(W, W.transpose()) + sigma**2 * eye(10) return N * (log(det(C)) + trace(dot(inv(C), S))) def dloglik(W): W = W.reshape(10, 2) C = dot(W, W.transpose()) + sigma**2 * eye(10) t1 = dot(inv(C), S) t2 = dot(inv(C), W) left = dot(t1, t2) right = dot(inv(C), W) grad = N * ( -left + right ) # Sanity check: check if dloglik(W_star) ~= 0, i.e. I correctly specified # the gradients and we are at a stationary point... return grad.reshape(20) # No noise Winit = np.random.normal(0, 1, 20) W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0) W_star = W_star.reshape(10, 2) ## Recover our latent factors based on the estimated W X_hat = np.zeros((N, 2)) for n in range(N): X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)), dot(W_star.transpose(), Y[:, n])) X_hat1 = np.copy(X_hat) ## Run some experiments ## # Some noise sigma = 1 Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N)) Y = Y - Y.mean(axis=1)[:, None] S = np.cov(Y, bias=1) W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0) W_star = W_star.reshape(10, 2) X_hat = np.zeros((N, 2)) for n in range(N): X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)), dot(W_star.transpose(), Y[:, n])) X_hatNoise = np.copy(X_hat) # Plenty of noise sigma = 10 Y = np.dot(A, fnonlin.transpose()) + np.random.normal(0, sigma, (10, N)) Y = Y - Y.mean(axis=1)[:, None] S = np.cov(Y, bias=1) W_star = fmin_cg(loglik, Winit, fprime=dloglik, disp=0) W_star = W_star.reshape(10, 2) X_hat = np.zeros((N, 2)) for n in range(N): X_hat[n] = dot(inv(dot(W_star.transpose(), W_star)), dot(W_star.transpose(), Y[:, n])) X_hatNoiseP = np.copy(X_hat) ## Plot it plt.subplot(2, 2, 1) plt.title('The true lower dimensional representation') plt.plot(latent, label='True latent variable') plt.plot(fnonlin[:, 0], fnonlin[:, 1], label='Non linear transform') plt.legend() plt.xlabel('$Xi$') plt.subplot(2, 2, 2) plt.title('Recovered latent variables, no noise') plt.plot(X_hat1[:, 0], X_hat1[:, 1]) plt.xlabel('$X1$') plt.ylabel('$X2$') plt.subplot(2, 2, 3) plt.title('sigma=1') plt.plot(X_hatNoise[:, 0], X_hatNoise[:, 1]) plt.xlabel('$X1$') plt.ylabel('$X2$') plt.subplot(2, 2, 4) plt.title('sigma=10') plt.plot(X_hatNoiseP[:, 0], X_hatNoiseP[:, 1]) plt.xlabel('$X1$') plt.ylabel('$X2$')
a, b = p x, y = args cost = y - (a * x + b) return cost x = np.arange(1, 10, 1) y_true = 3 * x + 4 y_mean = y_true + 10 * np.random.rand(len(x)) p0 = np.array([1, 2]) print p0 rs1 = fmin_bfgs(func1, [1, 2], args=(x, y_mean)) rs2 = fmin_cg(func1, [1, 2], args=(x, y_mean)) rs = leastsq(func, p0, args=(x, y_mean)) # # rs1=fmin_bfgs(func,p0,args=(x,y_mean)) print "rs=", rs # print "rs1=", rs1 print "rs2=", rs2 y1 = rs[0][0] * x + rs[0][1] y2 = rs1[0] * x + rs1[1] pl.plot(x, y1, 'r', label="y1") pl.plot(x, y2, 'b', label="y2") pl.plot(x, y_mean, 'og', label='y_mean') pl.legend() pl.show()