Exemplo n.º 1
0
    def train(self, alpha=0):
        """ Define the gradient and hand it off to a scipy gradient-based
        optimizer. """

        # Set alpha so it can be referred to later if needed
        self.alpha = alpha

        # Define the derivative of the likelihood with respect to beta_k.
        # Need to multiply by -1 because we will be minimizing.
        # The following has a dimension of [1 x k] where k = |W|
        dl_by_dWk = lambda B, k: (k > 0) * self.alpha * B[k] - np.sum([ \
                                    self.y_train[i] * self.x_train[i, k] * \
                                    sigmoid(-self.y_train[i] *\
                                            np.dot(B, self.x_train[i,:])) \
                                    for i in range(self.n)])

        # The full gradient is just an array of componentwise derivatives
        gradient = lambda B: np.array([dl_by_dWk(B, k) \
                                 for k in range(self.x_train.shape[1])])

        # The function to be minimized
        # Use the negative log likelihood for the objective function.
        objectiveFunction = lambda B: -self.likelihood(betas=B,
                                                       alpha=self.alpha)

        # Optimize
        print('Optimizing for alpha = {}'.format(alpha))
        self.betas = fmin_bfgs(objectiveFunction, self.betas, fprime=gradient)
Exemplo n.º 2
0
    def train(self):
        ''' Define the gradient and hand it off to a scipy gradient-based
            optimizer. '''
        
        # Define the derivative of the likelihood with respect to beta_k.
        
        # Need to multiply by -1 because we will be minimizing.
        # dB_k = lambda B, k : (k > 0) * self.alpha * B[k] - np.sum([ \
        #     self.y_train[i] * self.x_train[i, k] * \
        #     sigmoid(-self.y_train[i] *\
        #     np.dot(B, self.x_train[i,:])) \
        #     for i in range(self.n)])
        def dB_k(B, k):
            # fit = lambda x: np.array(x)[0]
            # print B, B.shape, self.x_train[1][0], self.x_train[1][0].shape, foo[0], foo[0].shape
            # print self.x_train, self.x_train.shape, type(self.x_train)
            # np.dot(B, self.x_train[1,:], 1)
            foo = (k > 0) * self.alpha * B[k] - \
                np.sum([self.y_train[i] * self.x_train[i, k] * \
                sigmoid(-self.y_train[i] * np.dot(B, self.x_train[i,:])) 
                for i in range(self.n)])
            return foo

        # The full gradient is just an array of componentwise derivatives
        dB = lambda B : np.array([dB_k(B, k) \
            for k in range(self.x_train.shape[1])])
        
        # Optimize
        self.betas = fmin_bfgs(self.negative_lik, self.betas, fprime=dB)
Exemplo n.º 3
0
 def train(self):
   """
   Set gradient and let BFGS optimizer find min of neg log likelihood
   B - -log(likelihood) given betas
   """
   # Set derivative of likelihood w.r.t. beta[k], -1 to minimize -log(likelihood)
   if self.d > 1:
     dB_k = lambda B, k : (k > -1) * self.alpha * B[k] - \
                           np.sum([self.y_train[i] * self.x_train[i,k] * \
                           sigmoid(-self.y_train[i] * \
                                   np.dot(B, self.x_train[i,:]))
                           for i in range(self.n)])
   else:
     dB_k = lambda B, k : (k > -1) * self.alpha * B[k] - \
                         np.sum([self.y_train[i] * self.x_train[i,] * \
                         sigmoid(-self.y_train[i] * \
                                 np.dot(B, self.x_train[i,]))
                         for i in range(self.n)])
   
   # The full gradient is just an array of componentwise derivatives
   dB = lambda B : np.array([dB_k(B, k) for k in range(self.d+1)])
   
   # Optimize
   self.betas = fmin_bfgs(self.negative_like, self.betas, fprime=dB,
                          disp=True)
   return self
Exemplo n.º 4
0
    def train(self):
        """ Define the gradient and hand it off to a scipy gradient-based
        optimizer. """

        # Define the derivative of the likelihood with respect to beta_k.
        # Need to multiply by -1 because we will be minimizing.
        dB_k = lambda B, k : (k > 0) * self.alpha * B[k] - np.sum([ \
                                     self.y_train[i] * self.x_train[i, k] * \
                                     sigmoid(-self.y_train[i] *\
                                             np.dot(B, self.x_train[i,:])) \
                                     for i in range(self.n)])

        # The full gradient is just an array of componentwise derivatives
        dB = lambda B : np.array([dB_k(B, k) \
                                  for k in range(self.x_train.shape[1])])

        # Optimize

        self.initialCost = self.negative_lik(self.betas)

        self.betas = fmin_bfgs(self.negative_lik,
                               self.betas,
                               fprime=dB,
                               callback=self.onThetaIteration)
        # self.betas = myFminBFGS.fminLooped(self.negative_lik, self.betas, fprime=dB, callback= self.onThetaIteration)

        return "Trained"
Exemplo n.º 5
0
    def train(self):
        """
    Set gradient and let BFGS optimizer find min of neg log likelihood
    B - -log(likelihood) given betas
    """
        # Set derivative of likelihood w.r.t. beta[k], -1 to minimize -log(likelihood)
        if self.d > 1:
            dB_k = lambda B, k : (k > -1) * self.alpha * B[k] - \
                                  np.sum([self.y_train[i] * self.x_train[i,k] * \
                                  sigmoid(-self.y_train[i] * \
                                          np.dot(B, self.x_train[i,:]))
                                  for i in range(self.n)])
        else:
            dB_k = lambda B, k : (k > -1) * self.alpha * B[k] - \
                                np.sum([self.y_train[i] * self.x_train[i,] * \
                                sigmoid(-self.y_train[i] * \
                                        np.dot(B, self.x_train[i,]))
                                for i in range(self.n)])

        # The full gradient is just an array of componentwise derivatives
        dB = lambda B: np.array([dB_k(B, k) for k in range(self.d + 1)])

        # Optimize
        self.betas = fmin_bfgs(self.negative_like,
                               self.betas,
                               fprime=dB,
                               disp=True)
        return self
Exemplo n.º 6
0
Arquivo: train.py Projeto: d5h/pyocr
 def train(self):
     for cls in self.classes:
         print "Training on", cls
         x, y = self.get_classified_data(cls)
         z0 = self.get_transformed_data(x[0], self.polynomial_transform_order)
         w = np.random.random_sample(len(z0))
         w = fmin_bfgs(self.make_error(x, y), w, fprime=self.make_error_gradient(x, y))
         self.models[cls] = w
Exemplo n.º 7
0
 def train(self):
     self.featurize_all()
     self.betas = np.random.randn(2 * 2 * self.dim + 2)
     self.betas = fmin_bfgs(self.neg_log_likelihood,
                            self.betas,
                            fprime=self.neg_log_gradient)
     print self.betas
     return
Exemplo n.º 8
0
def learning_parameters(i, y):
    def f(theta):
        return costFunction(theta, i, y)

    def fprime(theta):
        return gradFunction(theta, i, y)
    theta = zeros(3)
    return fmin_bfgs(f, theta, fprime, disp=True, maxiter=400)
Exemplo n.º 9
0
def learning_parameters(i, y):
    def f(theta):
        return costFunction(theta, i, y)

    def fprime(theta):
        return gradFunction(theta, i, y)
    theta = zeros(3)
    return fmin_bfgs(f, theta, fprime, disp=True, maxiter=400)
Exemplo n.º 10
0
def alpha(i, y):
    def f(theta):
        return lossFunction(theta, i, y)

    def fprime(theta):
        return gradient(theta, i, y)
    theta = zeros(3)
    return fmin_bfgs(f, theta, fprime, disp=True, maxiter=400)
Exemplo n.º 11
0
def logisticRegression(y, x, alpha=.1):
    """A simple logistic regression model with L2 regularization (zero-mean
	Gaussian priors on parameters)."""

    n = y.shape[0]
    betas = np.zeros(x.shape[1])

    # Define the gradient and hand it off to a scipy gradient-based optimizer.

    # Define the derivative of the likelihood with respect to beta_k.
    # Need to multiply by -1 because we will be minimizing.
    def dB_k(B, k):
        return \
           (k > 0) \
         * alpha \
         * B[k] \
         - np.sum([
         y[i] * x[i, k] * sigmoid(-y[i] * np.dot(B, x[i,:]))
         for i in range(n)])

    # The full gradient is just an array of componentwise derivatives
    def dB(B):
        return \
         np.array([dB_k(B, k)
         for k in range(x.shape[1])])

    # Optimize

    def neg_lik(betas):
        """ Negative likelihood of the data under the current settings of parameters. """
        # Data likelihood
        l = 0
        for i in range(n):
            l += np.log(sigmoid(y[i] * np.dot(betas, x[i, :])))
        # Prior likelihood
        for k in range(1, x.shape[1]):
            l -= (alpha / 2.0) * betas[k]**2
        return -1.0 * l

    betas = fmin_bfgs(neg_lik, betas, fprime=dB)

    # predict the y's again; not sure why sigmoid needs a
    # transformation...
    py = np.zeros(n)
    for i in range(n):
        py[i] = (sigmoid(np.dot(betas, x[i, :])) - .5) * 2

    # f-score
    precision = sum([
        round(y1) == round(y2) for y1, y2 in zip(y, py) if y2 > 0
    ]) / float(sum([y2 > 0 for y2 in py]))
    recall = sum([round(y1) == round(y2) for y1, y2 in zip(y, py) if y1 > 0
                  ]) / float(sum([y1 > 0 for y1 in y]))
    f1 = 2 * (precision * recall) / (precision + recall)

    print(precision, recall)

    return betas, f1
Exemplo n.º 12
0
    def trainEpochs(self, N):
        """Train the associated module for N epochs."""
        assert len(self.ds) > 0, "Dataset cannot be empty."
        self.module.resetDerivatives()
        
        def updateStatus(params):
            test_error = self.ds_val.evaluateModuleMSE(self.module)
            if self.epoch > 0 and test_error <= amin(self.test_errors):
                self.optimal_params = self.module.params.copy()
                self.optimal_epoch = self.epoch
            print "Epoch %i, E = %g, avg weight: %g" %\
                (self.epoch, (self._last_err / self.ds.getLength()), mean(absolute(self.module.params)))
            print "Test set error: " + str(test_error)
            
            self.train_errors.append(self._last_err / self.ds.getLength())
            self.test_errors.append(test_error)
            self.epoch += 1
            
        def f(params):
            self.module._setParameters(params)
            error = 0
            for seq in self.ds._provideSequences():
                self.module.reset()
                for sample in seq:
                    self.module.activate(sample[0])
                for offset, sample in reversed(list(enumerate(seq))):
                    target = sample[1]
                    outerr = target - self.module.outputbuffer[offset]
                    error += 0.5 * sum(outerr ** 2)
                    
            self._last_err = error
            return error
        
        def df(params):
            self.module._setParameters(params)
            self.module.resetDerivatives()
            for seq in self.ds._provideSequences():
                self.module.reset()
                for sample in seq:
                    self.module.activate(sample[0])
                for offset, sample in reversed(list(enumerate(seq))):
                    target = sample[1]
                    outerr = target - self.module.outputbuffer[offset]
                    str(outerr)
                    self.module.backActivate(outerr)
            # import pdb;pdb.set_trace()
            # self.module.derivs contains the _negative_ gradient
            return -1 * self.module.derivs
        
        new_params = fmin_bfgs(f, self.module.params, df, 
                               maxiter = N, callback = updateStatus, 
                               disp = 0)

        #self.module._setParameters(new_params)
        
        self.epoch += 1
        self.totalepochs += 1
        return self._last_err
Exemplo n.º 13
0
def logisticRegression(y, x, alpha=.1):
	"""A simple logistic regression model with L2 regularization (zero-mean
	Gaussian priors on parameters)."""

	n = y.shape[0]
	betas = np.zeros(x.shape[1])

	# Define the gradient and hand it off to a scipy gradient-based optimizer.

	# Define the derivative of the likelihood with respect to beta_k.
	# Need to multiply by -1 because we will be minimizing.
	def dB_k(B, k):
		return \
		   (k > 0) \
		 * alpha \
		 * B[k] \
		 - np.sum([
			y[i] * x[i, k] * sigmoid(-y[i] * np.dot(B, x[i,:]))
			for i in xrange(n)])

	# The full gradient is just an array of componentwise derivatives
	def dB(B):
		return \
			np.array([dB_k(B, k)
			for k in range(x.shape[1])])

	# Optimize

	def neg_lik(betas):
		""" Negative likelihood of the data under the current settings of parameters. """
		# Data likelihood
		l = 0
		for i in xrange(n):
			l += np.log(sigmoid(y[i] * np.dot(betas, x[i,:])))
		# Prior likelihood
		for k in xrange(1, x.shape[1]):
			l -= (alpha / 2.0) * betas[k]**2
		return -1.0 * l
	betas = fmin_bfgs(neg_lik, betas, fprime=dB)

	# predict the y's again; not sure why sigmoid needs a
	# transformation...
	py = np.zeros(n)
	for i in xrange(n):
		py[i] = (sigmoid(np.dot(betas, x[i,:])) - .5) * 2

	# f-score
	precision = sum([round(y1)==round(y2) for y1, y2 in zip(y, py) if y2 > 0]) / float(sum([y2>0 for y2 in py]))
	recall = sum([round(y1)==round(y2) for y1, y2 in zip(y, py) if y1 > 0]) / float(sum([y1>0 for y1 in y]))
	f1 = 2 * (precision * recall) / (precision + recall)

	print(precision, recall)

	return betas, f1
Exemplo n.º 14
0
def logreg_opt(data,targets,start=None,maxiter=100000):
    """Logistic regression using second order optimization methods."""
    n,d = data1.shape
    n,c = targets.shape
    data = c_[data,ones(len(data))]
    A = start
    if A is None: A = 0.01*randn(c,d+1)
    def f(x): return logloss(data,targets,x,verbose=1)
    def fprime(x): return dlogloss(data,targets,x)
    result = fmin_bfgs(f,A.ravel(),fprime=fprime,maxiter=maxiter)
    result.shape = (c,d+1)
    return result
Exemplo n.º 15
0
def logreg_opt(data,targets,start=None,maxiter=100000):
    """Logistic regression using second order optimization methods."""
    n,d = data1.shape
    n,c = targets.shape
    data = c_[data,ones(len(data))]
    A = start
    if A is None: A = 0.01*randn(c,d+1)
    def f(x): return logloss(data,targets,x,verbose=1)
    def fprime(x): return dlogloss(data,targets,x)
    result = fmin_bfgs(f,A.ravel(),fprime=fprime,maxiter=maxiter)
    result.shape = (c,d+1)
    return result
Exemplo n.º 16
0
    def train(self):
        dBk = lambda B, k: (k > 0) * self.alpha * B[k] - numpy.sum([
            self.trainingLabels[i] * self.trainingVectors[i, k] * self.
            sigmoid(-self.trainingLabels[i] * numpy.dot(
                B, self.trainingVectors[i, :])) for i in range(self.n)
        ])

        dB = lambda B: numpy.array(
            [dBk(B, j) for j in range(self.trainingVectors.shape[1])])

        self.betas = fmin_bfgs(self.negativeLikelihood(self.betas),
                               self.betas,
                               fprime=dB)
Exemplo n.º 17
0
def main():

    offsets = np.array([-0.66171298, -0.76921925, -0.70283083, -0.91615231])

    with open("uwb.pkl", "r") as f:
        data = pickle.load(f)

    arr = np.array(data)

    pos_opt = np.array([0., 0., 0.])
    pos_opt1 = np.array([0., 0., 0.])
    pos = np.array([0., 0.])
    pts_opt = []
    pts_opt1 = []
    pts = []

    arr = arr[100:]

    for row in arr:
        pos_opt1 = optimize.fmin_powell(f_opt,
                                        pos_opt1,
                                        args=(row[0:4] + offsets, ),
                                        xtol=0.0001,
                                        ftol=0.0001,
                                        disp=0)

        pos_opt = optimize.fmin_bfgs(f_opt,
                                     pos_opt,
                                     args=(row[0:4] + offsets, ),
                                     disp=0)
        pos = getPos(ac[0], ac[1], ac[2], row[0], row[1], row[2])

        pts_opt1.append(pos_opt1)
        pts_opt.append(pos_opt)
        pts.append(pos)
        #print(np.linalg.norm(pos - row[4:7]))

    arr = np.array(arr)
    pts = np.array(pts)
    pts_opt = np.array(pts_opt)
    pts_opt1 = np.array(pts_opt1)

    plt.figure()
    plt.scatter(pts_opt[:, 0], pts_opt[:, 2], c="g")
    #plt.scatter(pts_opt1[:,0], pts_opt1[:,2], c="k")
    plt.scatter(pts[:, 0], pts[:, 1], c="b")
    plt.scatter(arr[:, 4], arr[:, 6], c="r")

    plt.show()
Exemplo n.º 18
0
    def train(self):
        """ Define the gradient and hand it off to a scipy gradient-based optimizer. """

        # Define the derivative of the likelihood with respect to beta_k.
        # Need to multiply by -1 because we will be minimizing.

        dB_k = lambda B, k : (k > 0) * self.alpha * B[k] - np.sum([self.y_train[i] *
                                                                   self.x_train[i, k] * sigmoid(-self.y_train[i] *
                                                                    np.dot(B, self.x_train[i,:]))
                                                                   for i in range(self.n)])

        # The full gradient is just an array of componentwise derivatives
        dB = lambda B : np.array([dB_k(B, k) for k in range(self.x_train.shape[1])])

        # Optimize
        self.betas = fmin_bfgs(self.negative_lik, self.betas, fprime=dB)
Exemplo n.º 19
0
    def train(self):
        """ Define the gradient and hand it off to a scipy gradient-based optimizer. """

        # Define the derivative of the likelihood with respect to beta_k.
        # Need to multiply by -1 because we will be minimizing.

        dB_k = lambda B, k: (k > 0) * self.alpha * B[k] - np.sum([
            self.y_train[i] * self.x_train[i, k] * sigmoid(-self.y_train[
                i] * np.dot(B, self.x_train[i, :])) for i in range(self.n)
        ])

        # The full gradient is just an array of componentwise derivatives
        dB = lambda B: np.array(
            [dB_k(B, k) for k in range(self.x_train.shape[1])])

        # Optimize
        self.betas = fmin_bfgs(self.negative_lik, self.betas, fprime=dB)
Exemplo n.º 20
0
    def train(self):

        print "Initial Likelihood : ", self.initialCost

        import dill

        #nns = dill.source.getsource(self.NN)
        #print nns, "************"
        #print dill.dump(self.NN , open('/Users/rohanraja/Dropbox/Distributed Computing Startup/persist/Test.dat', 'w'))

        nnCost = lambda W : self.NN.costFn(self.NN.deLinearize(W), self.x_train, self.y_train)
        nnCostPrime = lambda W : self.NN.costFnPrime(self.NN.deLinearize(W), self.x_train, self.y_train)
        self.betas = fmin_bfgs(self.nnCost, self.betas, fprime=self.nnCostPrime ,callback= self.onThetaIteration)
        #dill.dumps(nnnCost)

        #self.betas = myFminBFGS.fminLooped(nnCost, self.betas, fprime=nnCostPrime ,callback=self.onThetaIteration)
        #self.betas = myFminBFGS.fminLooped(self.nnCost, self.betas, fprime=self.nnCostPrime ,callback= self.onThetaIteration)

        return "Trained"
 def train(self,X,Y):
     """ Define the gradient and hand it off to a scipy gradient-based
     optimizer. """
     
     #normalize all Y data to be between -1 and 1
     low = min(Y)
     high = max(Y)
     
     #there was no data to predict on, so just degenerate to predicting True all the time
     self.degenerate = False
     if high == low: 
         self.degenerate = high
         return 
     
     self.m = 2.0/(high-low)
     self.b = (2.0*low/(low-high))-1
     
     #constants for unscaling the output
     self.z = high-low
     self.w = low 
     
     self.X = np.array(X)
     self.Y = self.m*np.array(Y)+self.b
     self.n = len(X)
     self.betas = np.zeros(len(X[0]))
     
     # Define the derivative of the likelihood with respect to beta_k.
     # Need to multiply by -1 because we will be minimizing.
     dB_k = lambda B, k : (k > 0) * self.alpha * B[k] - np.sum([ \
                                  self.Y[i] * self.X[i, k] * \
                                  sigmoid(-self.Y[i] *\
                                          np.dot(B, self.X[i,:])) \
                                  for i in range(self.n)])
     
     # The full gradient is just an array of componentwise derivatives
     dB = lambda B : np.array([dB_k(B, k) \
                               for k in range(self.X.shape[1])])
     
     # Optimize
     self.betas = fmin_bfgs(self.lik, self.betas, fprime=dB, disp=False)
Exemplo n.º 22
0
    def train(self, X, Y):
        """ Define the gradient and hand it off to a scipy gradient-based
        optimizer. """

        #normalize all Y data to be between -1 and 1
        low = min(Y)
        high = max(Y)

        #there was no data to predict on, so just degenerate to predicting True all the time
        self.degenerate = False
        if high == low:
            self.degenerate = high
            return

        self.m = 2.0 / (high - low)
        self.b = (2.0 * low / (low - high)) - 1

        #constants for unscaling the output
        self.z = high - low
        self.w = low

        self.X = np.array(X)
        self.Y = self.m * np.array(Y) + self.b
        self.n = len(X)
        self.betas = np.zeros(len(X[0]))

        # Define the derivative of the likelihood with respect to beta_k.
        # Need to multiply by -1 because we will be minimizing.
        dB_k = lambda B, k : (k > 0) * self.alpha * B[k] - np.sum([ \
                                     self.Y[i] * self.X[i, k] * \
                                     sigmoid(-self.Y[i] *\
                                             np.dot(B, self.X[i,:])) \
                                     for i in range(self.n)])

        # The full gradient is just an array of componentwise derivatives
        dB = lambda B : np.array([dB_k(B, k) \
                                  for k in range(self.X.shape[1])])

        # Optimize
        self.betas = fmin_bfgs(self.lik, self.betas, fprime=dB, disp=False)
Exemplo n.º 23
0
    def train(self):

        # import ipdb; ipdb.set_trace()
        print("Initial Likelihood : ", self.initialCost)

        #nns = dill.source.getsource(self.NN)
        #print nns, "************"
        #print dill.dump(self.NN , open('/Users/rohanraja/Dropbox/Distributed Computing Startup/persist/Test.dat', 'w'))

        nnCost = lambda W: self.NN.costFn(self.NN.deLinearize(W), self.x_train,
                                          self.y_train)
        nnCostPrime = lambda W: self.NN.costFnPrime(self.NN.deLinearize(W),
                                                    self.x_train, self.y_train)
        #
        #
        # self.betas = minimize(
        #         self.nnCost,
        #         self.betas,
        #         # method = 'BFGS',
        #         method = self.optiMethod,
        #         jac=self.nnCostPrime ,
        #         options={'disp': True},
        #         callback= self.onThetaIteration
        # )

        self.betas = fmin_bfgs(self.nnCost,
                               self.betas,
                               fprime=self.nnCostPrime,
                               callback=self.onThetaIteration)
        # self.betas = myFminBFGS.fminLooped(self.nnCost, self.betas, fprime=self.nnCostPrime ,callback= self.onThetaIteration)
        #dill.dumps(nnnCost)

        #self.betas = myFminBFGS.fminLooped(nnCost, self.betas, fprime=nnCostPrime ,callback=self.onThetaIteration)
        #self.betas = myFminBFGS.fminLooped(self.nnCost, self.betas, fprime=self.nnCostPrime ,callback= self.onThetaIteration)

        return "Trained"
Exemplo n.º 24
0
    def train(self, data, alpha=0):
        """ Define the gradient and hand it off to a scipy gradient-based
        optimizer. """

	# Set alpha so it can be referred to later if needed
	self.alpha = alpha

        # Define the derivative of the likelihood with respect to beta_k.
        # Need to multiply by -1 because we will be minimizing.
        dB_k = lambda B, k: (k > 0) * self.alpha * B[k] - np.sum([ \
                                    data.y_train[i] * data.x_train[i, k] * \
                                    sigmoid(-data.y_train[i] *\
                                            np.dot(B, data.x_train[i,:])) \
                                    for i in range(data.n)])
        
        # The full gradient is just an array of componentwise derivatives
        dB = lambda B: np.array([dB_k(B, k) \
                                 for k in range(data.x_train.shape[1])])
        
	# The function to be minimized
	func = lambda B: -data.likelihood(betas=B, alpha=self.alpha)

        # Optimize
        self.betas = fmin_bfgs(func, self.betas, fprime=dB)
Exemplo n.º 25
0
def optHyper(gpr,logtheta,Ifilter=None,priors=None,maxiter=100,gradcheck=False):
    """optimize hyperparemters of gp gpr starting from gpr
    optHyper(gpr,logtheta,filter=None,prior=None)
    gpr: GP regression classe
    logtheta: starting piont for optimization
    Ifilter  : filter index vector
    prior   : non-default prior, otherwise assume first index amplitude, last noise, rest:lengthscales
    """
    if priors is None:        # use a very crude default prior if we don't get anything else:
        priors = defaultPriors(gpr,logtheta)

    def fixlogtheta(logtheta,limit=1E3):
        """make a valid logtheta which is non-infinite and non-0"""
        rv      = logtheta.copy()
        I_upper = logtheta>limit
        I_lower = logtheta<-limit
        rv[I_upper] = +limit
        rv[I_lower] = -limit
        return rv

    def checklogtheta(logtheta,limit=1E3):
        """make a valid logtheta which is non-infinite and non-0"""
        I_upper = logtheta>limit
        I_lower = logtheta<-limit
        return not (I_upper.any() or I_lower.any())
        
    #TODO: mean-function
    def f(logtheta):
        #logtheta_ = fixlogtheta(logtheta)
        logtheta_ = logtheta
        if not checklogtheta(logtheta):
            print logtheta
            #make optimzier/sampler search somewhere else
            return 1E6

        rv =  gpr.lMl(logtheta_,lml=True,dlml=False,priors=priors)
        LG.debug("L("+str(logtheta_)+")=="+str(rv))
        if isnan(rv):
            return 1E6
        return rv
    def df(logtheta):
        #logtheta_ = fixlogtheta(logtheta)
        logtheta_ = logtheta
        if not checklogtheta(logtheta):
            #make optimzier/sampler search somewhere else
            print logtheta
            return zeros_like(logtheta_)
        rv =  gpr.lMl(logtheta_,lml=False,dlml=True,priors=priors)
        LG.debug("dL("+str(logtheta_)+")=="+str(rv))
        #mask out filtered dimensions
        if not Ifilter is None:
            rv = rv*Ifilter
        if isnan(rv).any():
            In = isnan(rv)
            rv[In] = 1E6
        return rv

    plotit = True
    plotit = False
    if(plotit):
        X = arange(0.001,0.05,0.001)
        Y = zeros(size(X))
        dY = zeros(size(X))
        k=2
        theta = logtheta
        for i in range(len(X)):
            theta[k] = log(X[i])
            Y[i] = f(theta)
            dY[i] = df(theta)[k]
        plot(X,Y)
        hold(True);
        plot(X,dY)
        show()

    #start-parameters
    theta0 = logtheta

    LG.info("startparameters for opt:"+str(exp(logtheta)))
    if gradcheck:
        LG.info("check_grad:" + str(OPT.check_grad(f,df,theta0)))
        raw_input()
    LG.info("start optimization")
    #opt_params=OPT.fmin_cg (f, theta0, fprime = df, args = (), gtol = 1.0000000000000001e-005, maxiter =maxiter, full_output = 1, disp = 1, retall = 0)
    #opt_params=OPT.fmin_ncg (f, theta0, fprime = df, fhess_p=None, fhess=None, args=(), avextol=1.0000000000000001e-04, epsilon=1.4901161193847656e-08, maxiter=maxiter, full_output=1, disp=1, retall=0)

    opt_params=OPT.fmin_bfgs(f, theta0, fprime=df, args=(), gtol=1.0000000000000001e-04, norm=inf, epsilon=1.4901161193847656e-08, maxiter=maxiter, full_output=1, disp=(0), retall=0)

    rv = opt_params[0]
    LG.info("old parameters:")
    LG.info(str(exp(logtheta)))
    LG.info("optimized parameters:")
    LG.info(str(exp(rv)))
    LG.info("grad:"+str(df(rv)))
    return rv
Exemplo n.º 26
0
 def optimise_bfgs(self,start):
   print
   print "***** BFGS OPTIMISATION *****"
   return fmin_bfgs(self.objective, start, fprime=self.grad, callback=self.debug)
Exemplo n.º 27
0
def optHyper(gpr,
             logtheta,
             Ifilter=None,
             priors=None,
             maxiter=100,
             gradcheck=False):
    """optimize hyperparemters of gp gpr starting from gpr
    optHyper(gpr,logtheta,filter=None,prior=None)
    gpr: GP regression classe
    logtheta: starting piont for optimization
    Ifilter  : filter index vector
    prior   : non-default prior, otherwise assume first index amplitude, last noise, rest:lengthscales
    """
    if priors is None:  # use a very crude default prior if we don't get anything else:
        priors = defaultPriors(gpr, logtheta)

    def fixlogtheta(logtheta, limit=1E3):
        """make a valid logtheta which is non-infinite and non-0"""
        rv = logtheta.copy()
        I_upper = logtheta > limit
        I_lower = logtheta < -limit
        rv[I_upper] = +limit
        rv[I_lower] = -limit
        return rv

    def checklogtheta(logtheta, limit=1E3):
        """make a valid logtheta which is non-infinite and non-0"""
        I_upper = logtheta > limit
        I_lower = logtheta < -limit
        return not (I_upper.any() or I_lower.any())

    #TODO: mean-function
    def f(logtheta):
        #logtheta_ = fixlogtheta(logtheta)
        logtheta_ = logtheta
        if not checklogtheta(logtheta):
            print logtheta
            #make optimzier/sampler search somewhere else
            return 1E6

        rv = gpr.lMl(logtheta_, lml=True, dlml=False, priors=priors)
        LG.debug("L(" + str(logtheta_) + ")==" + str(rv))
        if isnan(rv):
            return 1E6
        return rv

    def df(logtheta):
        #logtheta_ = fixlogtheta(logtheta)
        logtheta_ = logtheta
        if not checklogtheta(logtheta):
            #make optimzier/sampler search somewhere else
            print logtheta
            return zeros_like(logtheta_)
        rv = gpr.lMl(logtheta_, lml=False, dlml=True, priors=priors)
        LG.debug("dL(" + str(logtheta_) + ")==" + str(rv))
        #mask out filtered dimensions
        if not Ifilter is None:
            rv = rv * Ifilter
        if isnan(rv).any():
            In = isnan(rv)
            rv[In] = 1E6
        return rv

    plotit = True
    plotit = False
    if (plotit):
        X = arange(0.001, 0.05, 0.001)
        Y = zeros(size(X))
        dY = zeros(size(X))
        k = 2
        theta = logtheta
        for i in range(len(X)):
            theta[k] = log(X[i])
            Y[i] = f(theta)
            dY[i] = df(theta)[k]
        plot(X, Y)
        hold(True)
        plot(X, dY)
        show()

    #start-parameters
    theta0 = logtheta

    LG.info("startparameters for opt:" + str(exp(logtheta)))
    if gradcheck:
        LG.info("check_grad:" + str(OPT.check_grad(f, df, theta0)))
        raw_input()
    LG.info("start optimization")
    #opt_params=OPT.fmin_cg (f, theta0, fprime = df, args = (), gtol = 1.0000000000000001e-005, maxiter =maxiter, full_output = 1, disp = 1, retall = 0)
    #opt_params=OPT.fmin_ncg (f, theta0, fprime = df, fhess_p=None, fhess=None, args=(), avextol=1.0000000000000001e-04, epsilon=1.4901161193847656e-08, maxiter=maxiter, full_output=1, disp=1, retall=0)

    opt_params = OPT.fmin_bfgs(f,
                               theta0,
                               fprime=df,
                               args=(),
                               gtol=1.0000000000000001e-04,
                               norm=inf,
                               epsilon=1.4901161193847656e-08,
                               maxiter=maxiter,
                               full_output=1,
                               disp=(0),
                               retall=0)

    rv = opt_params[0]
    LG.info("old parameters:")
    LG.info(str(exp(logtheta)))
    LG.info("optimized parameters:")
    LG.info(str(exp(rv)))
    LG.info("grad:" + str(df(rv)))
    return rv
Exemplo n.º 28
0
 def _cgmin(self, X, Y):
     w = np.copy(self.W)
     res = fmin_bfgs(self._costFunction, w, args=(X,Y), full_output=1, retall=1)
     self.allvec = res[-1]
     self.W = res[0]
     self.gradEval = res[-2]
Exemplo n.º 29
0
 def optimise_bfgs(self, start):
     print
     print "***** BFGS OPTIMISATION *****"
     return fmin_bfgs(self.objective, start, fprime=self.grad, callback=self.debug)
Exemplo n.º 30
0
    return  cost

def func(p,*args):
    a,b=p
   
    x,y=args
    cost = y- (a*x + b);
    return  cost

x = np.arange(1,10,1);
y_true = 3*x+ 4;
y_mean = y_true + 10*np.random.rand(len(x))

p0= np.array([1,2]);
print p0
rs1= fmin_bfgs(func1,[1,2],args=(x,y_mean))

rs2= fmin_cg(func1,[1,2],args=(x,y_mean))
rs = leastsq(func,p0,args=(x,y_mean));
# 
# rs1=fmin_bfgs(func,p0,args=(x,y_mean))

print "rs=",rs
# 
print "rs1=",rs1
print "rs2=",rs2
y1= rs[0][0]*x + rs[0][1]
y2 = rs1[0]*x + rs1[1]
pl.plot(x,y1,'r',label="y1");
pl.plot(x,y2,'b',label="y2");
pl.plot(x,y_mean,'og',label='y_mean');
Exemplo n.º 31
0
def l_bfgs_min():
    print(optimize.fmin_bfgs(f, 0))
Exemplo n.º 32
0
# -*- coding: GBK -*-
'''
Created on 2013Äê11ÔÂ2ÈÕ

@author: asus
'''

from scipy.optimize.optimize import fmin_bfgs
from numpy.ma.core import zeros


def rosen(x, p1, p2):
    return sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0)


x0 = [1.3, 0.7, 0.8, 1.9, 1.2]
xopt = fmin_bfgs(rosen, x0, args=[1, 1])
Exemplo n.º 33
0
    def train_bfgs(self,
                   xs,
                   ys,
                   iterations=10000,
                   iteration_callback=None,
                   validation_xs=None,
                   validation_ys=None,
                   validation_frequency=1,
                   regularization=0.01,
                   plot_errors=None):
        """
        Train on data stored in Theano tensors.

        E.g.
        xs = rng.randn(N, num_features)
        ys = rng.randint(size=N, low=0, high=2)

        iteration_callback is called after each iteration with args (iteration, error array).

        """
        compute_grad = theano.function(inputs=[self.x, self.y],
                                       outputs=self.gtheta,
                                       givens={self.reg_coef: regularization})

        # Prepare cost function to optimize and its gradient (jacobian)
        def train_fn(theta):
            self.theta.set_value(theta, borrow=True)
            return self._cost_fn_reg(xs, ys, reg_coef=regularization)

        def train_fn_grad(theta):
            self.theta.set_value(theta, borrow=True)
            return compute_grad(xs, ys)

        # Prepare a callback for between iterations
        best_validation_error = [numpy.inf]
        iteration_counter = [0]
        validation_errors = []
        training_errors = []

        def callback(new_theta):
            # Update the parameters of the model
            self.theta.set_value(new_theta, borrow=True)

            # Only evaluate on val set every validation_frequencyth iteration
            if validation_xs is not None and (iteration_counter[0] +
                                              1) % validation_frequency == 0:
                # Compute accuracy on validation set
                validation_error = self.error(validation_xs, validation_ys)
                # Compute accuracy on training set
                training_error = self.error(xs, ys)
                # Compute how much we've improved on the previous best validation error
                if validation_error < best_validation_error[0]:
                    validation_improvement = 0.0
                else:
                    validation_improvement = (
                        validation_error - best_validation_error[0]
                    ) / best_validation_error[0] * 100.0
                    best_validation_error[0] = validation_error

                # Plot some graphs
                if plot_errors and validation_error is not None:
                    validation_errors.append(validation_error)
                    training_errors.append(training_error)
                    plot_costs(plot_errors,
                               (training_errors, "training set error"),
                               (validation_errors, "val set error"))
            else:
                validation_error = training_error = validation_improvement = None

            if iteration_callback is not None:
                # TODO Compute training cost?
                iteration_callback(iteration_counter[0], 0.0, training_error,
                                   validation_error, validation_improvement)

            iteration_counter[0] += 1

        # Call scipy's BFGS optimization function
        fmin_bfgs(train_fn,
                  self.theta.get_value(),
                  fprime=train_fn_grad,
                  callback=callback,
                  disp=True,
                  maxiter=iterations)
Exemplo n.º 34
0
def func(p, *args):
    a, b = p

    x, y = args
    cost = y - (a * x + b)
    return cost


x = np.arange(1, 10, 1)
y_true = 3 * x + 4
y_mean = y_true + 10 * np.random.rand(len(x))

p0 = np.array([1, 2])
print p0
rs1 = fmin_bfgs(func1, [1, 2], args=(x, y_mean))

rs2 = fmin_cg(func1, [1, 2], args=(x, y_mean))
rs = leastsq(func, p0, args=(x, y_mean))
#
# rs1=fmin_bfgs(func,p0,args=(x,y_mean))

print "rs=", rs
#
print "rs1=", rs1
print "rs2=", rs2
y1 = rs[0][0] * x + rs[0][1]
y2 = rs1[0] * x + rs1[1]
pl.plot(x, y1, 'r', label="y1")
pl.plot(x, y2, 'b', label="y2")
pl.plot(x, y_mean, 'og', label='y_mean')
Exemplo n.º 35
0
# -*- coding: GBK -*-
'''
Created on 2013Äê11ÔÂ2ÈÕ

@author: asus
'''

from scipy.optimize.optimize import fmin_bfgs
from numpy.ma.core import zeros

def rosen(x,p1,p2):   
        return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)  

    
x0 = [1.3, 0.7, 0.8, 1.9, 1.2]  
xopt = fmin_bfgs(rosen, x0,args=[1,1])