Пример #1
0
    def train(self, data, y, iters=400):
        """ Data is a matrix of values  with the rows being the data points
            and the columns the variables/features.
            y is the matrix of expected results
            - iters: maximum number of iterations to perform
        """
        m = data.shape[0]
        data = np.hstack([np.ones((data.shape[0],1)), data])
        theta0 = np.zeros(data.shape[1])
        sigmoid_cache = SigmoidCache()
        log.info("Training the logistic regressor. Regularized = %s", self.regularize)
        
        if(self.regularize):
            ret = opt.fmin_ncg(regularized_error, theta0, regularized_gradient, 
                    args=(data, y, sigmoid_cache, self.tau), maxiter=iters)
        else:
            # BFGS gives me a numerical problems with the dataset ../datasets/logreg/ex2data1.txt
    #        ret = opt.fmin_bfgs(error, theta0, gradient, 
    #                args=(data, y, sigmoid_cache), maxiter=iters)
    
            # this works
            ret = opt.fmin_ncg(error, theta0, gradient, 
                    args=(data, y, sigmoid_cache), maxiter=iters)
        # this works but does not use the gradient
#        ret = opt.fmin(error, theta0, args=(data, y, sigmoid_cache), maxiter=iters)

        self.trained_theta = ret
        log.info("Finished training theta %s", self.trained_theta)
def newton_cg(x0, f, f_prime, hessian):
    all_x_i = [x0[0]]
    all_y_i = [x0[1]]
    all_f_i = [f(x0)]
    def store(X):
        x, y = X
        all_x_i.append(x)
        all_y_i.append(y)
        all_f_i.append(f(X))
    optimize.fmin_ncg(f, x0, f_prime, fhess=hessian, callback=store,
                avextol=1e-12)
    return all_x_i, all_y_i, all_f_i
Пример #3
0
 def __init__(self, x, f, grad_f, hess_f, maxiter=None, tol=1e-7, verbose=False):
     t0 = time()
     stuff = fmin_ncg(
         f, x, grad_f, fhess=hess_f, args=(), maxiter=maxiter, avextol=tol, full_output=True, disp=verbose
     )
     self.x, self.fval = stuff[0], stuff[1]
     self.time = time() - t0
Пример #4
0
    def Opti(a, b, c):
        # min_w  a * w'*w + b'*w  + c \sum_i{ 1/lam/e^gam * [  lam ti^p exp (zi) - di *( ln lam + ln p + (p-1) ln ti + zi  )    ]
        # zi = beta' xi
        # the SUM * lam * e^gam =

        def obj_value(ww):
            w = np.reshape(ww, (d, 1))
            z = np.dot(X, ww)
            ez = expp(z)
            vl = (
                a * np.dot(w.T, w)
                + np.dot(b, w)
                + c * (np.dot(lamtip, ez) - (np.dot(D, z) + ss + ss1)) / lam / math.exp(GAM)
            )
            return float(vl)

        def obj_grad(ww):
            w = np.reshape(ww, (d, 1))
            z = np.dot(X, w)
            gez = derexpp(z)
            gl = lamtip1 * gez - DT
            grad = 2 * a * w + np.reshape(b, (d, 1)) + c * np.dot(XT, gl) / lam / math.exp(GAM)
            return grad[:, 0]

        bopt = fmin_ncg(obj_value, np.zeros(d), fprime=obj_grad, disp=False)
        return bopt
Пример #5
0
    def test_ncg_hessp(self, use_wrapper=False):
        """ Newton conjugate gradient with Hessian times a vector p """
        if use_wrapper:
            opts = {'maxit': self.maxiter, 'disp': False}
            retval = optimize.minimize(self.func, self.startparams,
                                       method='Newton-CG', jac=self.grad,
                                       hess = self.hessp,
                                       args=(), options=opts,
                                       full_output=False, retall=False)
        else:
            retval = optimize.fmin_ncg(self.func, self.startparams, self.grad,
                                       fhess_p = self.hessp,
                                       args=(), maxiter=self.maxiter,
                                       full_output=False, disp=False,
                                       retall=False)

        params = retval

        err = abs(self.func(params) - self.func(self.solution))
        #print "NCG: Difference is: " + str(err)
        assert_(err < 1e-6)

        # Ensure that function call counts are 'known good'; these are from
        # Scipy 0.7.0. Don't allow them to increase.
        assert_(self.funccalls == 7, self.funccalls)
        assert_(self.gradcalls <= 18, self.gradcalls) # 0.9.0
        #assert_(self.gradcalls == 18, self.gradcalls) # 0.8.0
        #assert_(self.gradcalls == 22, self.gradcalls) # 0.7.0

        # Ensure that the function behaves the same; this is from Scipy 0.7.0
        assert_(np.allclose(self.trace[3:5],
                           [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
                            [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
                           atol=1e-6, rtol=1e-7), self.trace[:5])
Пример #6
0
def maximize(L, DL, D2L, x, method=None, disp=False):
    """Main function to perform numerical optimization. L, DL and D2L are the objective function and its
    derivative and Hessian, and x is the initial guess (current rating).
    
    It will attempt the maximization using four different methods, from fastest and least robust, to slowest
    and most robust. It returns the argmin, or None if an error occured."""
    mL = lambda x: -L(x)
    mDL = lambda x: -DL(x)
    mD2L = lambda x: -D2L(x)

    # Newton Conjugate Gradient
    if method == None or method == 'ncg':
        func = lambda x0: opt.fmin_ncg(mL, x0, fprime=mDL, fhess=mD2L, disp=disp, full_output=True, avextol=1e-10)
        xm = check_max(func, x, 5, 'NCG', disp)
        if xm != None:
            return xm

    # Broyden-Fletcher-Goldfarb-Shanno
    if method == None or method == 'bfgs':
        func = lambda x0: opt.fmin_bfgs(mL, x0, fprime=mDL, disp=disp, full_output=True, gtol=1e-10)
        xm = check_max(func, x, 6, 'BFGS', disp)
        if xm != None:
            return xm

    # Powell
    if method == None or method == 'powell':
        func = lambda x0: opt.fmin_powell(mL, x0, disp=disp, full_output=True, ftol=1e-10)
        xm = check_max(func, x, 5, 'POWELL', disp)
        if xm != None:
            return xm

    # Downhill simplex (last resort)
    func = lambda x0: opt.fmin(mL, x0, disp=disp, full_output=True, ftol=1e-10)
    xm = check_max(func, x, 4, 'DOWNHILL_SIMPLEX', disp)
    return xm
Пример #7
0
def _fit_ncg(f, score, start_params, fargs, kwargs, disp=True,
                 maxiter=100, callback=None, retall=False,
                 full_output=True, hess=None):
    fhess_p = kwargs.setdefault('fhess_p', None)
    avextol = kwargs.setdefault('avextol', 1.0000000000000001e-05)
    epsilon = kwargs.setdefault('epsilon', 1.4901161193847656e-08)
    retvals = optimize.fmin_ncg(f, start_params, score, fhess_p=fhess_p,
                                fhess=hess, args=fargs, avextol=avextol,
                                epsilon=epsilon, maxiter=maxiter,
                                full_output=full_output, disp=disp,
                                retall=retall, callback=callback)
    if full_output:
        if not retall:
            xopt, fopt, fcalls, gcalls, hcalls, warnflag = retvals
        else:
            xopt, fopt, fcalls, gcalls, hcalls, warnflag, allvecs =\
                retvals
        converged = not warnflag
        retvals = {'fopt': fopt, 'fcalls': fcalls, 'gcalls': gcalls,
                   'hcalls': hcalls, 'warnflag': warnflag,
                   'converged': converged}
        if retall:
            retvals.update({'allvecs': allvecs})
    else:
        xopt = None

    return xopt, retvals
Пример #8
0
def fitGLM(X, Y, H, l, hl, sp, norm, of, lateral, num_neurons_to_estimate):
    num_pres, num_neurons = numpy.shape(Y)
    num_pres, kernel_size = numpy.shape(X)

    if H != None:
        (trash, hist_size) = numpy.shape(H)
    else:
        hist_size = 0

    Ks = numpy.zeros((num_neurons, kernel_size + 2 + hist_size + lateral * (num_neurons - 1)))

    laplace = laplaceBias(numpy.sqrt(kernel_size), numpy.sqrt(kernel_size))

    rpi = numpy.linalg.pinv(X.T * X + __main__.__dict__.get("RPILaplaceBias", 0.0001) * laplace) * X.T * Y
    for i in xrange(0, num_neurons_to_estimate):
        print i
        k0 = (
            rpi[:, i].getA1().tolist()
            + [0, 0]
            + numpy.zeros((1, hist_size)).flatten().tolist()
            + numpy.zeros((1, lateral * (num_neurons - 1))).flatten().tolist()
        )
        if lateral and H != None:
            HH = numpy.hstack((H, Y[:, :i], Y[:, i + 1 :]))
        elif lateral:
            HH = numpy.hstack((Y[:, :i], Y[:, i + 1 :]))
        else:
            HH = H

        glm = GLM(numpy.mat(X), numpy.mat(Y[:, i]), l * laplace, HH, hl, sp, norm, of=of)

        K = fmin_ncg(glm.func(), numpy.array(k0), glm.der(), fhess=glm.hess(), avextol=0.0000001, maxiter=200)
        Ks[i, :] = K

    return [Ks, rpi, glm]
Пример #9
0
 def fmin_ncg(self, model, funcs, *args, **kwargs):
     efunc = self.efunc(model, funcs)
     gfunc = self.gfunc(model, funcs)
     hfunc = self.hfunc(model, funcs)
     result = optimize.fmin_ncg(efunc, model.coords, gfunc, fhess = hfunc, *args, **kwargs)
     model.coords = result[0]
     return result
Пример #10
0
    def test_ncg_hessp(self):
        # Newton conjugate gradient with Hessian times a vector p.
        if self.use_wrapper:
            opts = {'maxiter': self.maxiter, 'disp': self.disp,
                    'return_all': False}
            retval = optimize.minimize(self.func, self.startparams,
                                       method='Newton-CG', jac=self.grad,
                                       hessp=self.hessp,
                                       args=(), options=opts)['x']
        else:
            retval = optimize.fmin_ncg(self.func, self.startparams, self.grad,
                                       fhess_p=self.hessp,
                                       args=(), maxiter=self.maxiter,
                                       full_output=False, disp=self.disp,
                                       retall=False)

        params = retval

        assert_allclose(self.func(params), self.func(self.solution),
                        atol=1e-6)

        # Ensure that function call counts are 'known good'; these are from
        # Scipy 0.7.0. Don't allow them to increase.
        assert_(self.funccalls == 7, self.funccalls)
        assert_(self.gradcalls <= 18, self.gradcalls)  # 0.9.0
        # assert_(self.gradcalls == 18, self.gradcalls) # 0.8.0
        # assert_(self.gradcalls == 22, self.gradcalls) # 0.7.0

        # Ensure that the function behaves the same; this is from Scipy 0.7.0
        assert_allclose(self.trace[3:5],
                        [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
                         [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
                        atol=1e-6, rtol=1e-7)
Пример #11
0
 def minimize(fun, x0, jac=None, hess=None, *args, **kwargs):
     method = kwargs.pop("method", "Newton-CG")
     assert method == "Newton-CG"
     r = optimize.fmin_ncg(f=fun, x0=x0, fprime=jac, fhess=hess, full_output=True, *args, **kwargs)
     res = _Result()
     res.x, res.success, res.message = r[0], r[5] == 0, "unknown"
     return res
Пример #12
0
 def __call__(self, net, input, target):
     from scipy.optimize import fmin_ncg
     #if 'disp' not in self.kwargs:
     #    self.kwargs['disp'] = 0
     x = fmin_ncg(self.fcn, self.x.copy(), fprime=self.grad, callback=self.step, **self.kwargs)
     self.x[:] = x
     return None
Пример #13
0
    def test_ncg(self, use_wrapper=False):
        """ line-search Newton conjugate gradient optimization routine
        """
        if use_wrapper:
            opts = {'maxit': self.maxiter, 'disp': False}
            retval = optimize.minimize(self.func, self.startparams,
                                       method='Newton-CG', jac=self.grad,
                                       args=(), options=opts,
                                       full_output=False, retall=False)
        else:
            retval = optimize.fmin_ncg(self.func, self.startparams, self.grad,
                                       args=(), maxiter=self.maxiter,
                                       full_output=False, disp=False,
                                       retall=False)

        params = retval

        assert_allclose(self.func(params), self.func(self.solution),
                        atol=1e-6)

        # Ensure that function call counts are 'known good'; these are from
        # Scipy 0.7.0. Don't allow them to increase.
        assert_(self.funccalls == 7, self.funccalls)
        assert_(self.gradcalls <= 18, self.gradcalls) # 0.9.0
        #assert_(self.gradcalls == 18, self.gradcalls) # 0.8.0
        #assert_(self.gradcalls == 22, self.gradcalls) # 0.7.0

        # Ensure that the function behaves the same; this is from Scipy 0.7.0
        assert_allclose(self.trace[3:5],
                        [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
                         [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
                        atol=1e-6, rtol=1e-7)
Пример #14
0
 def par_est(self):
     start = np.dot(la.inv(spdot(self.x.T, self.x)),
                    spdot(self.x.T, self.y))
     flogl = lambda par: -self.ll(par)
     if self.optim == 'newton':
         fgrad = lambda par: self.gradient(par)
         fhess = lambda par: self.hessian(par)
         par_hat = newton(flogl, start, fgrad, fhess, self.maxiter)
         warn = par_hat[2]
     else:
         fgrad = lambda par: -self.gradient(par)
         if self.optim == 'bfgs':
             par_hat = op.fmin_bfgs(
                 flogl, start, fgrad, full_output=1, disp=0)
             warn = par_hat[6]
         if self.optim == 'ncg':
             fhess = lambda par: -self.hessian(par)
             par_hat = op.fmin_ncg(
                 flogl, start, fgrad, fhess=fhess, full_output=1, disp=0)
             warn = par_hat[5]
     if warn > 0:
         warn = True
     else:
         warn = False
     return par_hat, warn
Пример #15
0
def __updateD(X, A, D, R, nne, optfunc):
    f = 0
    for i in range(len(X)):
        d = D[i, :]
        u = Updater(X[i], A, R)
        if nne > 0:
            bounds = len(d) * [(0, None)]
            res = fmin_l_bfgs_b(
                u.updateD_F, d, u.updateD_G, factr=1e12, bounds=bounds
            )
        else:
            if optfunc == 'lbfgs':
                res = fmin_l_bfgs_b(u.updateD_F, d, u.updateD_G, factr=1e12)
                D[i, :] = res[0]
                f += res[1]
            elif optfunc == 'ncg':
                res = fmin_ncg(
                    u.updateD_F, d, u.updateD_G, fhess=u.updateD_H,
                    full_output=True, disp=False
                )
                # TODO: check return value of ncg and update D, f
                raise NotImplementedError()
            elif optfunc == 'tnc':
                res = fmin_tnc(u.updateD_F, d, u.updateD_G, disp=False)
                # TODO: check return value of tnc and update D, f
                raise NotImplementedError()
    return D, f
Пример #16
0
    def test_ncg(self):
        """ line-search Newton conjugate gradient optimization routine
        """
        retval = optimize.fmin_ncg(self.func, self.startparams, self.grad,
                                   args=(), maxiter=self.maxiter,
                                   full_output=False, disp=False,
                                   retall=False)

        params = retval

        err = abs(self.func(params) - self.func(self.solution))
        #print "NCG: Difference is: " + str(err)
        assert_(err < 1e-6)

        # Ensure that function call counts are 'known good'; these are from
        # Scipy 0.7.0. Don't allow them to increase.
        assert_(self.funccalls == 7, self.funccalls)
        assert_(self.gradcalls <= 18, self.gradcalls) # 0.9.0
        #assert_(self.gradcalls == 18, self.gradcalls) # 0.8.0
        #assert_(self.gradcalls == 22, self.gradcalls) # 0.7.0

        # Ensure that the function behaves the same; this is from Scipy 0.7.0
        assert_(np.allclose(self.trace[3:5],
                           [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
                            [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
                           atol=1e-6, rtol=1e-7), self.trace[:5])
Пример #17
0
def maximize(L, DL, D2L, x, method=None, disp=False):
    mL = lambda x: -L(x)
    mDL = lambda x: -DL(x)
    mD2L = lambda x: -D2L(x)

    if method == None or method == 'ncg':
        func = lambda x0: opt.fmin_ncg(mL, x0, fprime=mDL, fhess=mD2L,\
                                       disp=disp, full_output=True,\
                                       avextol=1e-10)
        xm = check_max(func, x, 5, 'NCG', disp)
        if xm != None:
            return xm

    if method == None or method == 'bfgs':
        func = lambda x0: opt.fmin_bfgs(mL, x0, fprime=mDL,\
                                        disp=disp, full_output=True,\
                                        gtol=1e-10)
        xm = check_max(func, x, 6, 'BFGS', disp)
        if xm != None:
            return xm

    if method == None or method == 'powell':
        func = lambda x0: opt.fmin_powell(mL, x0, disp=disp, full_output=True,\
                                          ftol=1e-10)
        xm = check_max(func, x, 5, 'POWELL', disp)
        if xm != None:
            return xm

    func = lambda x0: opt.fmin(mL, x0, disp=disp, full_output=True, ftol=1e-10)
    xm = check_max(func, x, 4, 'DOWNHILL_SIMPLEX', disp)
    return xm
Пример #18
0
def infer_ctx(options, seq, f_cost, f_ctx_grad, init_ctx=None, f_hess_p=None, maxiter=100):
    if init_ctx == None:
        init_ctx = 1e-3 * numpy.random.randn(1, options["ctx_dim"]).astype("float32")
    x, mask, ctx0 = prepare_data([seq], init_ctx)

    def _g(ctx):
        return f_ctx_grad(x, mask, ctx.reshape([1, ctx.shape[0]]).astype("float32")).reshape([ctx.shape[0]])

    def _c(ctx):
        return f_cost(x, mask, ctx.reshape([1, ctx.shape[0]]).astype("float32"))

    def _hp(ctx, p):
        if f_hess_p:
            return f_hess_p(x, mask, ctx.reshape([1, ctx.shape[0]]), p.reshape([1, p.shape[0]])).astype("float32")
        else:
            return None

    def _cb(ctx):
        cc = f_cost(x, mask, ctx.reshape([1, ctx.shape[0]]).astype("float32"))
        print "Current cost: ", cc

    if f_hess_p:
        ctx_opt = optimize.fmin_ncg(_c, ctx0[0, :], fprime=_g, fhess_p=_hp, callback=None, maxiter=maxiter)
    else:
        ctx_opt = optimize.fmin_bfgs(_c, ctx0[0, :], fprime=_g, callback=None, maxiter=maxiter)

    return ctx_opt
Пример #19
0
def one_vs_all(X, y, num_labels, lamb):
    """ Trains multiple logistic regression classifiers.

    Args:
      X: Matrix of features.
      y: Vector of labels.
      num_labels: Number of classes.
      lamb: Regularization parameter.

    Returns:
      all_theta: Vector of regularized logistic regression parameters (one 
                 per class).

    Raises:
      An error occurs if the number of labels is 0.
    """
    if (num_labels == 0): raise Error('num_labels = 0')
    num_train_ex = X.shape[0]
    num_features = X.shape[1]
    all_theta = numpy.zeros((num_labels, num_features+1))
    ones_vec = numpy.ones((num_train_ex, 1))
    aug_x = numpy.c_[ones_vec, X]
    for label_index in range(0, num_labels):
      theta_vec = numpy.zeros((num_features+1, 1))
      theta_vec_flat = numpy.ndarray.flatten(theta_vec)
      y_arg = (numpy.equal(y, (label_index+1)*numpy.ones((num_train_ex,
                                                          1)))).astype(int)
      fmin_ncg_out = fmin_ncg(compute_cost, theta_vec_flat,
                              fprime=compute_gradient,
                              args=(aug_x, y_arg, num_train_ex, lamb),
                              avextol=1e-10, epsilon=1e-10, maxiter=400,
                              full_output=1)
      theta_opt = numpy.reshape(fmin_ncg_out[0], (1, num_features+1), order='F')
      all_theta[label_index, :] = theta_opt
    return all_theta
Пример #20
0
  def minimize(func, x0, method='CG', options=None, jac=None, callback=None):
      method = method.lower()

      if 'disp' in options:
          disp = options['disp']
      else:
          disp = False

      if 'maxiter' in options:
          maxiter = options['maxiter']
      else:
          maxiter = None

      if method == 'nelder-mead':
          x = fmin(func=func, x0=x0, disp=disp, maxiter=maxiter, callback=callback)
      elif method == 'powell':
          x = fmin_powell(func=func, x0=x0, disp=disp, maxiter=maxiter, callback=callback)
      elif method == 'cg':
          x = fmin_cg(f=func, x0=x0, fprime=jac, disp=disp, maxiter=maxiter, callback=callback)
      elif method == 'bfgs':
          x = fmin_bfgs(f=func, x0=x0, fprime=jac, disp=disp, maxiter=maxiter, callback=callback)
      elif method == 'l-bfgs-b':
          d = ceil(1000000 / len(x0))
          print(d)
          x, _, _ = fmin_l_bfgs_b(func=func, x0=x0, fprime=jac, disp=(d if disp else 0))
      elif method == 'newton-cg':
          x = fmin_ncg(f=func, x0=x0, fprime=jac, disp=disp, maxiter=maxiter, callback=callback)

      class Result(object):
          def __init__(self, x):
              self.x = x

      return Result(x)
Пример #21
0
    def run(self):

        optimizer = self.optimizer
        p = self.problem
        f = p.f
        grad = p.grad
        
        # coerce return types
        f = lambda wt: numpy.float64(p.f(wt))
        grad = lambda wt: numpy.array(list(map(numpy.float64, p.grad(wt))))
        
        # negate for minimization
        neg_f = lambda wt: -f(wt)
        neg_grad = lambda wt: -grad(wt)
        #if not useGrad or not p.useGrad(): neg_grad = None
        if not p.usef: 
            neg_f = lambda wt: -p._fDummy(wt)
        log = logs.getlogger(self.__class__.__name__)
        if optimizer == "bfgs":
            params = dict([k_v for k_v in iter(self.optParams.items()) if k_v[0] in ["gtol", "epsilon", "maxiter"]])
            if self.verbose: print("starting optimization with %s... %s\n" % (optimizer, params))
            wt, f_opt, grad_opt, Hopt, func_calls, grad_calls, warn_flags = fmin_bfgs(neg_f, self.wt, fprime=neg_grad, full_output=True, **params)
            if self.verbose: 
                print("optimization done with %s..." % optimizer)
                print("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags))
        elif optimizer == "cg":            
            params = dict([k_v1 for k_v1 in iter(self.optParams.items()) if k_v1[0] in ["gtol", "epsilon", "maxiter"]])
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt, f_opt, func_calls, grad_calls, warn_flags = fmin_cg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
            log.info("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags))
        elif optimizer == "ncg":            
            params = dict([k_v2 for k_v2 in iter(self.optParams.items()) if k_v2[0] in ["avextol", "epsilon", "maxiter"]])
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt, f_opt, func_calls, grad_calls, warn_flags = fmin_ncg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
            log.info("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags))
        elif optimizer == "fmin":
            params = dict([k_v3 for k_v3 in iter(self.optParams.items()) if k_v3[0] in ["xtol", "ftol", "maxiter"]])
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt = fmin(neg_f, self.wt, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
        elif optimizer == "powell":
            params = dict([k_v4 for k_v4 in iter(self.optParams.items()) if k_v4[0] in ["xtol", "ftol", "maxiter"]])
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt = fmin_powell(neg_f, self.wt, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
        elif optimizer == 'l-bfgs-b':
            params = dict([k_v5 for k_v5 in iter(self.optParams.items()) if k_v5[0] in ["gtol", "epsilon", "maxiter", 'bounds']])
            log.info("starting optimization with %s... %s" % (optimizer, params))
            if 'bounds' in params:
                params['bounds'] = (params['bounds'],) * len(self.wt)
            wt, f_opt, d = fmin_l_bfgs_b(neg_f, self.wt, fprime=neg_grad, **params)
            log.info("optimization done with %s..." % optimizer)
            log.info("f-opt: %.16f\n" % (-f_opt))
        else:
            raise Exception("Unknown optimizer '%s'" % optimizer)
        
        return wt
Пример #22
0
def find_energy_min(eptm, method='fmin_l_bfgs_b',
                    tol=1e-8, approx_grad=0, epsilon=1e-8):
    '''
    Performs the energy minimisation
    '''
    pos0, bounds = precondition(eptm)
    eptm.stamp += 1
    output = 0
    if method == 'fmin_l_bfgs_b':
        ## I set `factr` to 1e11 to avoid too long computation
        output = optimize.fmin_l_bfgs_b(opt_energy,
                                        pos0.flatten(),
                                        fprime=opt_gradient,
                                        #approx_grad=approx_grad,
                                        bounds=bounds.flatten(),
                                        args=(eptm,),
                                        factr=1e10,
                                        m=10,
                                        pgtol=tol,
                                        epsilon=epsilon,
                                        iprint=1,
                                        maxfun=150,
                                        disp=None)

    elif method=='fmin':
        output = optimize.fmin(opt_energy,
                               pos0.flatten(),
                               ftol=tol, xtol=0.01,
                               args=(eptm,),
                               callback=opt_callback)
    elif method=='fmin_ncg':
        output = optimize.fmin_ncg(opt_energy,
                                   pos0.flatten(),
                                   fprime=opt_gradient,
                                   args=(eptm,),
                                   avextol=tol,
                                   retall=True,
                                   maxiter=100)# ,

    elif method=='fmin_tnc':
        output = optimize.fmin_tnc(opt_energy,
                                   pos0.flatten(),
                                   fprime=opt_gradient,
                                   args=(eptm,),
                                   pgtol=tol,
                                   bounds=bounds,
                                   maxCGit=0,
                                   disp=5)
    elif method=='fmin_bfgs':
        output = optimize.fmin_bfgs(opt_energy,
                                    pos0.flatten(),
                                    fprime=opt_gradient,
                                    args=(eptm,),
                                    gtol=tol,
                                    norm=np.inf,
                                    retall=1,
                                    callback=opt_callback)
    return pos0, output
Пример #23
0
 def maximize(self,*a):
   print 'Maximizing using Newton Conjugate Gradient method'
   self.iters=0
   theta, args = self.model.pack(*a)
   theta = opt.fmin_ncg(f=self.logL, x0=theta, fprime=self.logL_grad, 
                        fhess_p=self.logL_hess_p, 
                        args=args, maxiter=None, avextol=1.0e-10,
                        callback=self.callback)
   return self.model.unpack(theta, args)
Пример #24
0
	def run(self):
		optimizer = self.optimizer
		p = self.problem
		f = p.f
		grad = p.grad
		
		# coerce return types
		f = lambda wt: numpy.float64(p.f(wt))
		grad = lambda wt: numpy.array(map(numpy.float64, p.grad(wt)))
		
		# negate for minimization
		neg_f = lambda wt: -f(wt)
		neg_grad = lambda wt: -grad(wt)
		if not p.useGrad(): neg_grad = None
		if not p.useF(): neg_f = lambda wt: -p.__fDummy(wt)
	
		if optimizer == "bfgs":
			params = dict(filter(lambda (k,v): k in ["gtol", "epsilon", "maxiter"], self.optParams.iteritems()))
			print "starting optimization with %s... %s" % (optimizer, params)
			wt, f_opt, grad_opt, Hopt, func_calls, grad_calls, warn_flags = fmin_bfgs(neg_f, self.wt, fprime=neg_grad, full_output=True, **params)
			print "optimization done with %s..." % optimizer
			print "f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags)
		elif optimizer == "cg":			
			params = dict(filter(lambda (k,v): k in ["gtol", "epsilon", "maxiter"], self.optParams.iteritems()))
			print "starting optimization with %s... %s" % (optimizer, params)
			wt, f_opt, func_calls, grad_calls, warn_flags = fmin_cg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params)
			print "optimization done with %s..." % optimizer
			print "f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags)
		elif optimizer == "ncg":			
			params = dict(filter(lambda (k,v): k in ["avextol", "epsilon", "maxiter"], self.optParams.iteritems()))
			print "starting optimization with %s... %s" % (optimizer, params)
			wt, f_opt, func_calls, grad_calls, warn_flags = fmin_ncg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params)
			print "optimization done with %s..." % optimizer
			print "f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags)
		elif optimizer == "fmin":
			params = dict(filter(lambda (k,v): k in ["xtol", "ftol", "maxiter"], self.optParams.iteritems()))
			print "starting optimization with %s... %s" % (optimizer, params)
			wt = fmin(neg_f, self.wt, args=(), full_output=True, **params)
			print "optimization done with %s..." % optimizer
		elif optimizer == "powell":
			params = dict(filter(lambda (k,v): k in ["xtol", "ftol", "maxiter"], self.optParams.iteritems()))
			print "starting optimization with %s... %s" % (optimizer, params)
			wt = fmin_powell(neg_f, self.wt, args=(), full_output=True, **params)
			print "optimization done with %s..." % optimizer
		elif optimizer == 'l-bfgs-b':
			params = dict(filter(lambda (k,v): k in ["gtol", "epsilon", "maxiter", 'bounds'], self.optParams.iteritems()))
			print "starting optimization with %s... %s" % (optimizer, params)
			if 'bounds' in params:
				params['bounds'] = (params['bounds'],) * len(self.wt)
			wt, f_opt, d = fmin_l_bfgs_b(neg_f, self.wt, fprime=neg_grad, **params)
			print "optimization done with %s..." % optimizer
			print "f-opt: %.16f\n" % (-f_opt)
		else:
			raise Exception("Unknown optimizer '%s'" % optimizer)
		
		return wt
 def fit(self, X, y):
     X_c = np.column_stack((np.ones(X.shape[0]),X)) 
     self.theta = np.zeros(X.shape[1]+1)
     if self.solver == 'newton-cg':
         self.theta = fmin_ncg(self.costFunction, self.theta, 
                               fprime=self.gradient, args=(X_c, y),
                               maxiter=self.max_iter, avextol=self.tol, 
                               disp=self.verbose)
     else:
         pass
     self.cost = self.costFunction(self.theta, X_c, y)
Пример #26
0
def trainReg(X, y, lamd, lin=True):
    """If lin=True, train linear regression given datasets X, y and a regularization param\
       lamd else train logistic regression"""
    if type(X) == pd.core.series.Series:
        init_theta = np.zeros(2)
    else:
        init_theta = np.zeros(X.shape[1] + 1)
    J, grad = for_opt_wrapper(regCostFunction, X, y, lamd, lin)
    opt_theta = fmin_ncg(J, init_theta, grad, maxiter=1000)
    print opt_theta
    return opt_theta # this is numpy array its shape is (2,)
Пример #27
0
 def map(self, tol=1e-8):
     """
     Compute the maximum a posteriori regression coefficients.
     """
     cost = lambda w: -self.log_posterior(w)
     grad = lambda w: -self.log_posterior_grad(w)
     hess = lambda w: -self.log_posterior_hess(w)
     w0 = np.zeros(self.X.shape[1])
     w = fmin_ncg(cost, w0, grad, fhess=hess, avextol=tol, disp=False)
     self.cache['map'] = w
     return w
Пример #28
0
def test():
    N = 10
    Qr = np.r_[1:3]
    init = np.ones(N)
    J = lambda phi: objective(phi, Qr)
    dJ = lambda phi: gradient(phi, Qr)
    d2J = lambda phi: hessian(phi, Qr)
    def callback(phi):
        print(J(phi))
    phi = fmin_ncg(J, init, fprime=dJ, fhess=d2J, avextol=1e-16, callback=callback)
    return np.exp(1j*phi)
Пример #29
0
	def laplace_approximation(self):
		"""find the mode and hessian of the (probabiliy of) f, the latent function variables"""
		self.update()
		#self.f_hat = fmin(self.fcost,self.Y.copy().flatten()+np.random.randn(self.N))
		#self.f_hat = fmin_cg(self.fcost,self.f_hat.copy().flatten(),fprime=self.fcost_grad)
		try:
			self.f_hat = fmin_ncg(self.fcost,self.f_hat.copy().flatten(),fprime=self.fcost_grad,fhess=self.fcost_hessian)
		except(ValueError):
			print 'ncg method barfed'
			self.f_hat = fmin_cg(self.fcost,self.f_hat.copy().flatten(),fprime=self.fcost_grad)
			
		self.f_hat = self.f_hat.reshape(self.N,1)
Пример #30
0
    def test_ncg(self):
        """ line-search Newton conjugate gradient optimization routine
        """
        retval = optimize.fmin_ncg(self.func, self.startparams, self.grad,
                                   args=(), maxiter=self.maxiter,
                                   full_output=False, disp=False,
                                   retall=False)

        params = retval

        err = abs(self.func(params) - self.func(self.solution))
        #print "NCG: Difference is: " + str(err)
        assert err < 1e-6
Пример #31
0
    def get_inverse_hvp_cg(self, v, max_iterations=10, grad=None):

        if (isinstance(v, torch.Tensor)):
            v = v.detach().numpy()

        self.initialize(self.X_train, self.Y_train, grad=grad)
        fmin_loss_fn = self.get_fmin_loss_fn(v)
        fmin_grad_fn = self.get_fmin_grad_fn(v)
        cg_callback = self.get_cg_callback(v)

        fmin_results = fmin_ncg(f=fmin_loss_fn,
                                x0=v,
                                fprime=fmin_grad_fn,
                                fhess_p=self.get_fmin_hvp,
                                callback=cg_callback,
                                avextol=1e-8,
                                maxiter=max_iterations)

        return fmin_results
Пример #32
0
    def test_ncg_hessp(self, use_wrapper=False):
        """ Newton conjugate gradient with Hessian times a vector p """
        if use_wrapper:
            opts = {
                'maxiter': self.maxiter,
                'disp': False,
                'return_all': False
            }
            retval = optimize.minimize(self.func,
                                       self.startparams,
                                       method='Newton-CG',
                                       jac=self.grad,
                                       hessp=self.hessp,
                                       args=(),
                                       options=opts)['x']
        else:
            retval = optimize.fmin_ncg(self.func,
                                       self.startparams,
                                       self.grad,
                                       fhess_p=self.hessp,
                                       args=(),
                                       maxiter=self.maxiter,
                                       full_output=False,
                                       disp=False,
                                       retall=False)

        params = retval

        assert_allclose(self.func(params), self.func(self.solution), atol=1e-6)

        # Ensure that function call counts are 'known good'; these are from
        # Scipy 0.7.0. Don't allow them to increase.
        assert_(self.funccalls == 7, self.funccalls)
        assert_(self.gradcalls <= 18, self.gradcalls)  # 0.9.0
        # assert_(self.gradcalls == 18, self.gradcalls) # 0.8.0
        # assert_(self.gradcalls == 22, self.gradcalls) # 0.7.0

        # Ensure that the function behaves the same; this is from Scipy 0.7.0
        assert_allclose(self.trace[3:5],
                        [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
                         [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
                        atol=1e-6,
                        rtol=1e-7)
Пример #33
0
    def get_inverse_hvp_cg(self, v, verbose):
        fmin_loss_fn = self.get_fmin_loss_fn(v)
        fmin_grad_fn = self.get_fmin_grad_fn(v)
        cg_callback = self.get_cg_callback(v, verbose)

        # x0 = np.array([])
        # for param in v:
        #     x0 = np.concatenate([x0,param.flatten()])

        fmin_results = fmin_ncg(
            f=fmin_loss_fn,
            x0=np.concatenate(v),
            fprime=fmin_grad_fn,
            fhess_p=self.get_fmin_hvp,
            callback=cg_callback,
            avextol=self.avextol,
            maxiter=100) 

        return self.vec_to_list(fmin_results)
Пример #34
0
def main():
	# get the training and the test data
	X_train, X_test, y_train, targets_train, targets_test = loadData('iris.data')	
		
	# normalize the features of the training and the test set
	X_train, mu, std = featureNormalize(X_train)
	X_test = (X_test - mu) / std
	
	# useful parameters
	input_layer_size = 4
	hidden_layer_size = 3
	output_layer_size = 3
	Lambda = 0.1				# regularization parameter 
	
	# randomly initialize the weights from a uniform distribution
	epsilon1 = np.sqrt(6.0/(input_layer_size+hidden_layer_size))
	epsilon2 = np.sqrt(6.0/(hidden_layer_size+output_layer_size))
	initial_theta1 = np.random.uniform(-1*epsilon1, epsilon1, size=(hidden_layer_size,input_layer_size+1))
	initial_theta2 = np.random.uniform(-1*epsilon2, epsilon2, size=(output_layer_size,hidden_layer_size+1))
	
	# set the parameters for training the neural network
	initial_params = np.r_[initial_theta1.ravel(), initial_theta2.ravel()]
	args = (X_train, y_train, input_layer_size, hidden_layer_size, output_layer_size, Lambda)

	# train the neural network
	theta = optimize.fmin_ncg(computeCostReg, initial_params, fprime=computeGradientReg, args=args)	
	
	# obtain the optimal weights 
	theta1 = theta[:hidden_layer_size*(input_layer_size+1)].reshape(hidden_layer_size,input_layer_size+1)
	theta2 = theta[hidden_layer_size*(input_layer_size+1):].reshape(output_layer_size,hidden_layer_size+1)
	
	# get the predictions on the training and the test set
	prediction_train = predict(theta1, theta2, X_train)
	prediction_test = predict(theta1, theta2, X_test)
	
	# calculate the training and test set accuracy
	training_acc = np.mean(targets_train == prediction_train) * 100
	test_acc = np.mean(targets_test == prediction_test) * 100

	# report the results
	print 'The training set accuracy of the neural network: %.2f%%' % training_acc
	print 'The test set accuracy of the neural network: %.2f%%' % test_acc
def trainLinearReg(X, y, lambd=0):
    # TRAINLINEARREG Trains linear regression given a dataset (X, y) and a
    # regularization parameter lambd
    #   TRAINLINEARREG (X, y, lambda) trains linear regression using
    #   the dataset (X, y) and regularization parameter lambd. Returns the
    #   trained parameters theta.
    #

    # Initialize some useful values
    m, n = X.shape

    initial_theta = np.zeros((n + 1, ), dtype=float)

    #  Run Newton-Conjugate-Gradient to obtain the optimal theta
    theta = op.fmin_ncg(f=linearRegCostFunction,
                        x0=initial_theta,
                        fprime=linearRegGradient,
                        maxiter=200,
                        args=(X, y, lambd))
    return theta
Пример #36
0
def conjugate_gradient(ax_fn,
                       b,
                       debug_callback=None,
                       avextol=None,
                       maxiter=None):
    """Computes the solution to Ax - b = 0 by minimizing the conjugate objective
    f(x) = x^T A x / 2 - b^T x. This does not require evaluating the matrix A
    explicitly, only the matrix vector product Ax.

    From https://github.com/kohpangwei/group-influence-release/blob/master/influence/conjugate.py.

    Args:
      ax_fn: A function that return Ax given x.
      b: The vector b.
      debug_callback: An optional debugging function that reports the current optimization function. Takes two
          parameters: the current solution and a helper function that evaluates the quadratic and linear parts of the
          conjugate objective separately. (Default value = None)
      avextol:  (Default value = None)
      maxiter:  (Default value = None)

    Returns:
      The conjugate optimization solution.

    """

    cg_callback = None
    if debug_callback:
        cg_callback = lambda x: debug_callback(x, -np.dot(b, x), 0.5 * np.dot(
            x, ax_fn(x)))

    result = fmin_ncg(
        f=lambda x: 0.5 * np.dot(x, ax_fn(x)) - np.dot(b, x),
        x0=np.zeros_like(b),
        fprime=lambda x: ax_fn(x) - b,
        fhess_p=lambda x, p: ax_fn(p),
        callback=cg_callback,
        avextol=avextol,
        maxiter=maxiter,
    )

    return result
Пример #37
0
def argmin_f(f, rho, A, AT, u, c):
    M, N = A.shape

    sq = lambda _x: np.array([safedot(A[i], _x) - c[i] for i in range(M)])
    pr = lambda _sqx: np.dot(u, _sqx) + rho / 2 * sum(_sqx**2)
    fs = lambda _x: sum(f(_x))
    obj = lambda _x: -fs(_x) + pr(sq(_x))

    uTA = [np.dot(AT[i], u) for i in range(N)]
    pprime = lambda _x: np.array(uTA)
    ppprime = lambda _sqx: np.array([np.dot(AT[j], _sqx) for j in range(N)])
    fprime = lambda _x: -1/np.maximum(_x, 1e-4) + 1e2 * np.sign(np.minimum(_x - 1e-4, 0))
    jac = lambda _x: fprime(_x) + pprime(_x) + rho * ppprime(sq(_x))

    x = np.ones(N)

    b = np.asarray(A)[0]
    xs = fmin_ncg(obj, x, fprime=jac,
                  disp=0)
                    #bounds=[(0, np.inf) for i in range(N)], disp=0)
    return xs
Пример #38
0
    def __solver__(self, p):
        def iterfcn(x):
            p.xk, p.fk = x, p.f(x)
            p.iterfcn()
            if p.istop: raise isSolved

        if p.userProvided.d2f: fhess = p.d2f
        else: fhess = None

        xf = fmin_ncg(p.f,
                      p.x0,
                      p.df,
                      fhess=fhess,
                      maxiter=p.maxIter + 15,
                      disp=0,
                      callback=iterfcn)

        ff = p.f(xf)
        p.istop = 1000
        p.xk = p.xf = xf
        p.fk = p.ff = ff
Пример #39
0
def one_vs_all(X, y, num_labels, lamb):
    """ Trains multiple logistic regression classifiers.

    Args:
      X: Matrix of features.
      y: Vector of labels.
      num_labels: Number of classes.
      lamb: Regularization parameter.

    Returns:
      all_theta: Vector of regularized logistic regression parameters (one 
                 per class).

    Raises:
      An error occurs if the number of labels is 0.
    """
    if (num_labels == 0): raise Error('num_labels = 0')
    num_train_ex = X.shape[0]
    num_features = X.shape[1]
    all_theta = numpy.zeros((num_labels, num_features + 1))
    ones_vec = numpy.ones((num_train_ex, 1))
    aug_x = numpy.c_[ones_vec, X]
    for label_index in range(0, num_labels):
        theta_vec = numpy.zeros((num_features + 1, 1))
        theta_vec_flat = numpy.ndarray.flatten(theta_vec)
        y_arg = (numpy.equal(y, (label_index + 1) * numpy.ones(
            (num_train_ex, 1)))).astype(int)
        fmin_ncg_out = fmin_ncg(compute_cost,
                                theta_vec_flat,
                                fprime=compute_gradient,
                                args=(aug_x, y_arg, num_train_ex, lamb),
                                avextol=1e-10,
                                epsilon=1e-10,
                                maxiter=400,
                                full_output=1)
        theta_opt = numpy.reshape(fmin_ncg_out[0], (1, num_features + 1),
                                  order='F')
        all_theta[label_index, :] = theta_opt
    return all_theta
Пример #40
0
    def train_with_fmin(self, train_feed_dict, save_checkpoints=True, verbose=True):
        fmin_loss_fn = self.get_train_fmin_loss_fn(train_feed_dict)
        fmin_grad_fn = self.get_train_fmin_grad_fn(train_feed_dict)
        fmin_hvp_fn = self.get_train_fmin_hvp_fn(train_feed_dict)

        x0 = np.array(self.sess.run(self.params)[0])
        
        # fmin_results = fmin_l_bfgs_b(
        # # fmin_results = fmin_cg(
        #     fmin_loss_fn,
        #     x0,
        #     fmin_grad_fn
        #     # gtol=1e-8
        #     )

        fmin_results = fmin_ncg(
            f=fmin_loss_fn,
            x0=x0,
            fprime=fmin_grad_fn,
            fhess_p=fmin_hvp_fn,            
            avextol=1e-8,
            maxiter=100)

        W = np.reshape(fmin_results, -1)
                
        params_feed_dict = {}
        params_feed_dict[self.W_placeholder] = W        
        self.sess.run(self.set_params_op, feed_dict=params_feed_dict)
        
        if save_checkpoints: self.saver.save(self.sess, self.checkpoint_file, global_step=0)

        if verbose:
            # print('CG training took %s iter.' % model.n_iter_)
            print('After training with CG: ')
            results = self.print_model_eval()
        else:
            results = None

        return results
Пример #41
0
def test_newton_cg():
    # Test that newton_cg gives same result as scipy's fmin_ncg

    rng = np.random.RandomState(0)
    A = rng.normal(size=(10, 10))
    x0 = np.ones(10)

    def func(x):
        Ax = A.dot(x)
        return .5 * (Ax).dot(Ax)

    def grad(x):
        return A.T.dot(A.dot(x))

    def hess(x, p):
        return p.dot(A.T.dot(A.dot(x.all())))

    def func_grad_hess(x):
        return func(x), grad(x), lambda x: A.T.dot(A.dot(x))

    assert_array_almost_equal(
        newton_cg(func_grad_hess, func, grad, x0, tol=1e-10),
        fmin_ncg(f=func, x0=x0, fprime=grad, fhess_p=hess))
Пример #42
0
def train_log_reg(X, y):
    """ Solves for optimal logistic regression weights.
    Args:
      X: Matrix of features.
      y: Vector of labels.
    Returns:
      theta: Vector of parameters for regularized logistic regression.
    """
    num_features = X.shape[1]
    num_train_ex = X.shape[0]
    ones_vec = numpy.ones((num_train_ex, 1))
    X_aug = numpy.c_[ones_vec, X]
    y_vec = numpy.reshape(y, (num_train_ex, 1))
    theta_vec = numpy.zeros((num_features + 1, 1))
    theta_vec_flat = numpy.ndarray.flatten(theta_vec)
    # f_min_ncg_out = fmin_ncg(compute_cost, theta_vec_flat,
    # fprime=compute_gradient, args=(X_aug, y_vec,
    # num_train_ex),
    # avextol=1e-10, epsilon=1e-10, maxiter=400,
    # full_output=1)
    # lamb = 0
    lamb = 1
    # print("Running logistic regression with lamb = %.3f..." % lamb)
    f_min_ncg_out = fmin_ncg(compute_cost_reg,
                             theta_vec_flat,
                             fprime=compute_gradient_reg,
                             args=(X_aug, y_vec, num_train_ex, lamb),
                             avextol=1e-7,
                             epsilon=1e-7,
                             maxiter=400,
                             full_output=1,
                             disp=0)
    theta_opt = numpy.reshape(f_min_ncg_out[0], (num_features + 1, 1),
                              order='F')
    # print("theta:")
    # print("%s\n" % numpy.array_str(numpy.round(theta_opt, 6)))
    return theta_opt
Пример #43
0
    def get_inverse_hvp_cg(self, v, tol=1e-5, max_iter=1000):
        """

        :param v:
        :param tol:
        :param max_iter:
        :return:
        """
        def __cg_objective(x):
            Hx = self.eval_hvp(x)
            obj = np.multiply(0.5, x.T.dot(Hx)) - v.T.dot(x)
            # d0, = obj.shape
            return obj

        def __cg_grad(x):
            Hx = self.eval_hvp(x)
            d0, d1 = Hx.shape
            return (Hx - v).reshape((d0*d1,))

        def __cg_fHess_p(x, p):
            Hp = self.eval_hvp(p)
            d0, d1 = Hp.shape
            return Hp.reshape((d0*d1,))

        def __cg_callback(x):
            print('CG Objective: %s' % __cg_objective(x)[0])

        cg_min_results = fmin_ncg(
            f=__cg_objective,
            x0=np.concatenate(v),
            fprime=__cg_grad,
            fhess_p=__cg_fHess_p,
            callback=__cg_callback,
            avextol=tol,
            maxiter=max_iter)
        return cg_min_results
Пример #44
0
def part_three_linear(X, y, theta, print_output=True):

    #  Set options for minimization function
    kwargs = {'maxiter': 400, 'args': (X, y), 'full_output': True}

    ##  This function will return theta and the cost
    ## Tried a few different minimization functions in python. They all worked
    #theta, nf, rc = optimize.fmin_tnc(func=compute_cost, x0=theta, fprime=compute_grad, args=(X, y))
    #theta, nf, rc = optimize.fmin_tnc(func=cost_function, x0=theta, args=(X, y))
    #cost = compute_cost(theta, X, y)
    #theta, cost, go, bo, nf, ng, w = optimize.fmin_bfgs(f=compute_cost, x0=theta, fprime=compute_grad, **kwargs)
    theta, cost, nf, gf, hf, w = optimize.fmin_ncg(f=compute_cost,
                                                   x0=theta,
                                                   fprime=compute_grad,
                                                   **kwargs)

    # Print theta to screen
    if print_output:
        print('Cost at theta found by fminunc: {:f}'.format(cost))
        print('Expected cost (approx): 0.203')
        print('theta:')
        print(' {}'.format(theta))
        print('Expected theta (approx):\n -25.161\n 0.206\n 0.201\n')

    # Plot Boundary
    plot_decision_boundary(
        theta, X, y, labels=['Admitted', 'Not Admitted', 'Decision Boundary'])

    # Put some labels
    plt.xlabel('Exam 1 Score', size=18)
    plt.ylabel('Exam 2 Score', size=18)
    plt.gca().tick_params(labelsize=14)
    plt.show()

    #raw_input('Program paused. Press key to continue.\n')
    return theta
Пример #45
0
    def _fit_ncg(self, X, y, X_val, Y_val, activations, deltas, coef_grads,
                 intercept_grads, layer_units):
        # Store meta information for the parameters
        self._coef_indptr = []
        self._intercept_indptr = []
        start = 0

        # Save sizes and indices of coefficients for faster unpacking
        for i in range(self.n_layers_ - 1):
            n_fan_in, n_fan_out = layer_units[i], layer_units[i + 1]

            end = start + (n_fan_in * n_fan_out)
            self._coef_indptr.append((start, end, (n_fan_in, n_fan_out)))
            start = end

        # Save sizes and indices of intercepts for faster unpacking
        for i in range(self.n_layers_ - 1):
            end = start + layer_units[i + 1]
            self._intercept_indptr.append((start, end))
            start = end

        # Run Newton-CG
        packed_coef_inter = _pack(self.coefs_, self.intercepts_)

        optimal_parameters, self.loss_, func_calls, grad_calls, h_calls, d = \
            optimize.fmin_ncg(x0=packed_coef_inter,
                              f=self._loss_func,
                              fprime=self._grad_func,
                              maxiter=200,
                            #   maxiter=self.max_iter,
                              disp=True,
                              args=(X, y, activations, deltas, coef_grads, intercept_grads),
                              callback=self._callback,
                              full_output=True)

        self._unpack(optimal_parameters)
Пример #46
0
 def func(self, thetas_p, max_iter, n, c, X_p, y_p, C):
     initial_theta = np.zeros((n + 1, 1), dtype=np.float64)
     args = [X_p[c], y_p[c], C]
     print('Iter: ', c)
     #theta= optimize.fmin_cg(self.cost_func, initial_theta, fprime = self.grad_cost_func, args = args, maxiter=max_iter)
     if self.solver == 'fmincg':
         theta = optimize.fmin_cg(self.cost_func,
                                  initial_theta,
                                  fprime=self.grad_cost_func,
                                  args=args,
                                  maxiter=self.max_iter)
     elif self.solver == 'newton-cg':
         theta = optimize.fmin_ncg(self.cost_func,
                                   initial_theta,
                                   fprime=self.grad_cost_func,
                                   args=args,
                                   maxiter=self.max_iter)
     elif self.solver == 'lbfgs':
         theta = optimize.fmin_l_bfgs_b(self.cost_func,
                                        initial_theta,
                                        fprime=self.grad_cost_func,
                                        args=args,
                                        maxiter=self.max_iter)[0]
     thetas_p[c] = theta.transpose()
Пример #47
0
 def fit(self, X, y,n, m):
     #labels = set(y)
     #n_labels = len(labels)
     #encoder = dict(zip(labels, np.arange(float(n_labels))))
     #self.decoder = dict(zip(np.arange(float(n_labels)), labels))
     #n = X.shape[1]
     #m = X.shape[0]
     
     X_aux = np.concatenate((np.ones((m ,1), dtype = np.float64), X), axis=1)
     initial_theta = np.zeros((n + 1, 1), dtype=np.float64)
     theta = np.zeros((n + 1, 1), dtype=np.float64)
     #y_enc = np.array(list(map(lambda x : encoder[x], y)))
     y_enc = np.array(y)
     args = [X_aux, y_enc, self.C]
     
     self.all_theta = np.zeros((1, n + 1), dtype=np.float64)
     if self.solver == 'fmincg':
         theta= optimize.fmin_cg(self.cost_func, initial_theta, fprime = self.grad_cost_func, args = args, maxiter=self.max_iter)
     elif self.solver == 'newton-cg':
         theta= optimize.fmin_ncg(self.cost_func, initial_theta, fprime = self.grad_cost_func, args = args, maxiter=self.max_iter)
     elif self.solver == 'lbfgs':
         theta= optimize.fmin_l_bfgs_b(self.cost_func, initial_theta, fprime = self.grad_cost_func, args = args, maxiter=self.max_iter)[0]
         #print(theta)
     self.all_theta = theta.transpose()
Пример #48
0
def minimize(func,
             x0,
             gradient=None,
             hessian=None,
             algorithm="default",
             verbose=False,
             **args):
    r"""
    This function is an interface to a variety of algorithms for computing
    the minimum of a function of several variables.

    INPUT:

    - ``func`` -- Either a symbolic function or a Python function whose
      argument is a tuple with `n` components

    - ``x0`` -- Initial point for finding minimum.

    - ``gradient`` -- Optional gradient function. This will be computed
      automatically for symbolic functions.  For Python functions, it allows
      the use of algorithms requiring derivatives.  It should accept a
      tuple of arguments and return a NumPy array containing the partial
      derivatives at that point.

    - ``hessian`` --  Optional hessian function. This will be computed
      automatically for symbolic functions. For Python functions, it allows
      the use of algorithms requiring derivatives. It should accept a tuple
      of arguments and return a NumPy array containing the second partial
      derivatives of the function.

    - ``algorithm`` -- String specifying algorithm to use. Options are
      ``'default'`` (for Python functions, the simplex method is the default)
      (for symbolic functions bfgs is the default):

       - ``'simplex'`` -- using the downhill simplex algorithm

       - ``'powell'`` -- use the modified Powell algorithm

       - ``'bfgs'`` -- (Broyden-Fletcher-Goldfarb-Shanno) requires gradient

       - ``'cg'`` -- (conjugate-gradient) requires gradient

       - ``'ncg'`` -- (newton-conjugate gradient) requires gradient and hessian

    - ``verbose`` -- (optional, default: False) print convergence message

    .. NOTE::

        For additional information on the algorithms implemented in this function,
        consult SciPy's `documentation on optimization and root
        finding <https://docs.scipy.org/doc/scipy/reference/optimize.html>`_

    EXAMPLES:

    Minimize a fourth order polynomial in three variables (see the
    :wikipedia:`Rosenbrock_function`)::

        sage: vars = var('x y z')
        sage: f = 100*(y-x^2)^2+(1-x)^2+100*(z-y^2)^2+(1-y)^2
        sage: minimize(f, [.1,.3,.4]) # abs tol 1e-6
        (1.0, 1.0, 1.0)

    Try the newton-conjugate gradient method; the gradient and hessian are 
    computed automatically::

        sage: minimize(f, [.1, .3, .4], algorithm="ncg") # abs tol 1e-6
        (1.0, 1.0, 1.0)

    We get additional convergence information with the `verbose` option::

        sage: minimize(f, [.1, .3, .4], algorithm="ncg", verbose=True)
        Optimization terminated successfully.
        ...
        (0.9999999..., 0.999999..., 0.999999...)

    Same example with just Python functions::

        sage: def rosen(x): # The Rosenbrock function
        ....:    return sum(100.0r*(x[1r:]-x[:-1r]**2.0r)**2.0r + (1r-x[:-1r])**2.0r)
        sage: minimize(rosen, [.1,.3,.4]) # abs tol 3e-5
        (1.0, 1.0, 1.0)

    Same example with a pure Python function and a Python function to
    compute the gradient::

        sage: def rosen(x): # The Rosenbrock function
        ....:    return sum(100.0r*(x[1r:]-x[:-1r]**2.0r)**2.0r + (1r-x[:-1r])**2.0r)
        sage: import numpy
        sage: from numpy import zeros
        sage: def rosen_der(x):
        ....:    xm = x[1r:-1r]
        ....:    xm_m1 = x[:-2r]
        ....:    xm_p1 = x[2r:]
        ....:    der = zeros(x.shape, dtype=float)
        ....:    der[1r:-1r] = 200r*(xm-xm_m1**2r) - 400r*(xm_p1 - xm**2r)*xm - 2r*(1r-xm)
        ....:    der[0] = -400r*x[0r]*(x[1r]-x[0r]**2r) - 2r*(1r-x[0])
        ....:    der[-1] = 200r*(x[-1r]-x[-2r]**2r)
        ....:    return der
        sage: minimize(rosen, [.1,.3,.4], gradient=rosen_der, algorithm="bfgs") # abs tol 1e-6
        (1.0, 1.0, 1.0)
    """
    from sage.symbolic.expression import Expression
    from sage.ext.fast_eval import fast_callable
    import numpy
    from scipy import optimize
    if isinstance(func, Expression):
        var_list = func.variables()
        var_names = [str(_) for _ in var_list]
        fast_f = fast_callable(func, vars=var_names, domain=float)
        f = lambda p: fast_f(*p)
        gradient_list = func.gradient()
        fast_gradient_functions = [
            fast_callable(gradient_list[i], vars=var_names, domain=float)
            for i in range(len(gradient_list))
        ]
        gradient = lambda p: numpy.array(
            [a(*p) for a in fast_gradient_functions])
    else:
        f = func

    if algorithm == "default":
        if gradient is None:
            min = optimize.fmin(f, [float(_) for _ in x0],
                                disp=verbose,
                                **args)
        else:
            min = optimize.fmin_bfgs(f, [float(_) for _ in x0],
                                     fprime=gradient,
                                     disp=verbose,
                                     **args)
    else:
        if algorithm == "simplex":
            min = optimize.fmin(f, [float(_) for _ in x0],
                                disp=verbose,
                                **args)
        elif algorithm == "bfgs":
            min = optimize.fmin_bfgs(f, [float(_) for _ in x0],
                                     fprime=gradient,
                                     disp=verbose,
                                     **args)
        elif algorithm == "cg":
            min = optimize.fmin_cg(f, [float(_) for _ in x0],
                                   fprime=gradient,
                                   disp=verbose,
                                   **args)
        elif algorithm == "powell":
            min = optimize.fmin_powell(f, [float(_) for _ in x0],
                                       disp=verbose,
                                       **args)
        elif algorithm == "ncg":
            if isinstance(func, Expression):
                hess = func.hessian()
                hess_fast = [[
                    fast_callable(a, vars=var_names, domain=float) for a in row
                ] for row in hess]
                hessian = lambda p: [[a(*p) for a in row] for row in hess_fast]
                hessian_p = lambda p, v: scipy.dot(numpy.array(hessian(p)), v)
                min = optimize.fmin_ncg(f, [float(_) for _ in x0], fprime=gradient, \
                      fhess=hessian, fhess_p=hessian_p, disp=verbose, **args)
    return vector(RDF, min)
def find_ML_Estimator(image,
                      fitParams,
                      outputHandle=None,
                      setParams=None,
                      modelLookup=None,
                      searchMethod='simplex',
                      preSearchMethod=None,
                      Prior=None,
                      bruteRange=None,
                      biasCorrect=0,
                      calcNoise=None,
                      bcoutputHandle=None,
                      error='Fisher',
                      **iParams):
    import scipy.optimize as opt
    import model_Production as modPro
    from surface_Brightness_Profiles import gaussian_SBProfile_CXX
    import measure_Bias as mBias
    from generalManipulation import makeIterableList
    """
    MAIN ROUTINE FOR THIS MODULE. Takes in an image (at minimum) and a set of values which defines the model parameters (fit and those which are free to vary), and returns the parameter values at which the log-Likelihood is minimised (or Likelihood is maximised). Can correct for first order noise bias (if biasCorrect != 0), and an estimate of the error (if error is equal to a set of pre-defined values [see below]).
    
    Requires:
    -- image: 2d array of pixelised image
    -- fitParams: tuple of strings which define the model parameters which are free to vary (those which will be fit). These must satisfy the definition of model parameters as set out in the default model dictionary. If None, then e1, e2 and T are fit (this could be made stricter by removing the default None initialisation, thereby requiring that a set of parameters to be fit is passed in).
    -- outputHandle: handle of the output file. **Result is always appended**. If not passed in, then result is not output. Output is in ASCII form.
    -- setParams: Default model dictionary containing fixed parameters which describes the model being fixed. One part of a two part approach to setting the full model parameter dictionary, along with iParams. If None, then default model dictionary is taken.
    -- modelLookup: Dictionary containing lookup table for pixelised model images, as defined in model_Production module. If None, no lookup is used, and the model is re-evalauted for each change in model parameters.
    -- searchMethod: String detailing which form of minimisation to use. Accepted values are:
    ___ simplex, brent, powell, cg, bfgs, l_bfgs_b, ncg (as defined in SciPy documentation)
    -- preSearchMethod: String detailing initial search over parameter space to find global Minimium, used as an initial guess for refinement with searchMethod. If None, initial guess is set to default passed in by the combination of setParams and iParams. If not None, then code will run an initial, coarse search over the parameter space to attempt to find the global mimima. By default this is switched off. Where preSearchMethod == grid or brute, the a grid based search is used. Where this is used, a range must either be entered by the user through bruteRange, or it is taken from the entered prior information. NOTE: This still uses a typically coarse grid, therefore if the range is too wide then it is possible that the code may still find a local mimimum if this exists within one grid point interval of the global miminum.
    -- Prior: NOT USED YET. Skeleton to allow for a parameter prior structure to be passed in
    -- bruteRange: [nPar, 2] sized tuple setting the range in which the initial preSearchMethod is evaluated, if this is done using a grid or brute method (both equivalent), where nPar is the number of free model parameters being fit. THIS DOES NOT CONSTITUTE A PRIOR, as the refinement may still find an ML value outside this range, however where the global maximum occurs outside this range the returned ML value may be expected to be biased.
    -- biasCorrect: integer, states what level of noise bias to correct the estimate to. Only 1st order correction (biasCorrect == 1) is supported. If biasCorrect == 0, the uncorrected estimate (and error if applicable) are output. If biasCorrect > 0, the uncorrected, corrected and error (if applicable) are output. When used, it is important that *the entered model parameter dictionary contains an accurate measure of the pixel noise of appropriate signal--to--noise, as the analytic bias scales according to both*. Noise can be estimate using estimate_Noise() before entry.
    -- bcOutputhandle: As outputHandle, except for the bias corrected estimator.
    -- error: String detailing error estiamte to output. Supported values are:
    ___ fisher: Marginalised fisher error for each parameter around the ML point. See docstring for fisher_Error_ML().
    ___ brute: UNSUPPORTED, however an error defined on the parameter likelihood itself can be derived if the preSearchMethod and bruteRange is defined such that the Likelihood has *compact support*. If not, then this would be inaccurate (underestimated). Therefore coding for this is deferred until the application of a prior is developed, as use of a prior ensures compact support by default.
    -- iParams: set of optional arguments which, together with setParams, defines the intial model dictionary. Allows parameter values to be input individually on call, and is particularly useful for setting initial guesses where preSearchMethod == None.
    
    
    Model Parameter entry: Model Parameters can be entered using two methods
    ___ setParams: Full Dictionary of initial guess/fixed value for set of parameters. If None, this is set to default set. May not be complete: if not, then model parameters set to default as given in default_ModelParameter_Dictionary()
    ___iParams: generic input which allows model parameters to be set individually. Keys not set are set to default as given by default_ModelParameter_Dictionary(). Where an iParams key is included in the default dictionary, or setParams, it will be updated to this value (**therefore iParams values have preferrence**). If key not present in default is entered, it is ignored
    ___ The initial choice of model parameters (including intial guesses for the minimisation routine where preSearchMethod == False) is thus set as setParams+{iParams}



    Returns:
    Returned: tuple of length equal to fitParams. Gives ML estimator for each fit parameter, with bias corrected version (if biasCorrect != 0) and error (if applicable) aslways in that order.
    """
    ''' Set up defaults '''

    ##Initialise result variables
    Returned = []
    err = None

    ## Exceptions based on input objects
    if (image is None or sum(image.shape) == 0):
        raise RuntimeError(
            'find_ML_Estimator - image supplied is None or uninitialised')

    if (len(fitParams) > 2 and modelLookup is not None
            and modelLookup['useLookup']):
        raise RuntimeError(
            'find_ML_Estimator - Model Lookup is not supported for more than double parameter fits'
        )

    ##Set up initial params, which sets the intial guess or fixed value for the parameters which defines the model
    ##This line sets up the keywords that are accepted by the routine
    ## pixle_Scale and size should be in arsec/pixel and arcsec respectively. If pixel_scale = 1., then size can be interpreted as size in pixels
    ## centroid should be set to the center of the image, here assumed to be the middle pixel

    if (setParams is None):
        print "Setting parameters to default"
        initialParams = modPro.default_ModelParameter_Dictionary()
    else:
        print "Updating initial parameters with set Params"
        initialParams = modPro.default_ModelParameter_Dictionary()
        modPro.update_Dictionary(initialParams, setParams)
        ## Deprecated initialParams.update(setParams)

    modPro.set_modelParameter(initialParams, iParams.keys(), iParams.values())

    ## Define modelParams
    modelParams = deepcopy(initialParams)

    ## Estimate Noise of Image
    if (calcNoise is not None):
        #Assumes each image is flattened and therefore needs to be reshaped.
        if (len(image.shape) == 2):
            if (image.shape[0] < 2):
                #Use only the first image
                tImage = image[0].reshape(modelParams['stamp_size'])
                maskCentroid = modelParams['centroid']
            else:
                #Use an alternate stack of closest to even (assumes that pixel error is roughly symmetric), (the alternative stack should negate any feature and background, the effect on the noise is uncertain). Can only be used on multiple realisations of the same field
                if (image.shape[0] % 2 == 0):
                    finalIndex = image.shape[0]
                else:
                    finalIndex = image.shape[0] - 1
                    print "Final Index check (should be even): ", finalIndex
                aStackImage = np.zeros(image[0].shape)
                for i in range(finalIndex):
                    aStackImage += image[i]  #*np.power(-1, i)

                print "\nEstimating noise from stack-subtracted image"
                aStackImage /= float(finalIndex)
                tImage = (image[0] - aStackImage).reshape(
                    modelParams['stamp_size'])

                #Turn off centroid masking (as feature should be removed), subtract stacked from each realisation, and flatten for noise estimation
                maskCentroid = None
                aStackImage = np.tile(aStackImage, (image.shape[0], 1))
                tImage = (image - aStackImage).flatten()

                print "--Done"

                #-- Note, this could be improved by removing maskCentroid in this case, thus allowing the flattened array to be used (a larger data vector), and thus reducing the noise on the error estimation

                ##Plot
                # import pylab as pl
                # f = pl.figure()
                # ax = f.add_subplot(111)
                # im = ax.imshow(tImage)
                # pl.colorbar(im)
                # pl.show()

        elif (len(image.shape) == 1):
            tImage = image.reshape(modelParams['stamp_size'])
            maskCentroid = modelParams['centroid']
        else:
            raise ValueError(
                "find_ML_Estimate: calcNoise: image not of expected shape")
        modelParams['noise'] = calcNoise(tImage, maskCentroid)

    ####### Search lnL for minimum
    #Construct initial guess for free parameters by removing them from dictionary
    x0 = modPro.unpack_Dictionary(modelParams, requested_keys=fitParams)

    ###### Sanity check image dimensions compared to model parameters
    imDim = len(image.shape)
    if (imDim > 2):
        raise ValueError(
            "find_ML_Estimator: Image must not have more than two dimensions. Single postage stamp image must be flattened"
        )
    elif (imDim == 1
          and image.shape[0] != np.array(modelParams['stamp_size']).prod()):
        raise ValueError(
            "find_ML_Estimator: Flattened image (1D) length does not correspond to model parameter dimensions"
        )
    elif (imDim == 2
          and image.shape[1] != np.array(modelParams['stamp_size']).prod()):
        print 'Image shape: ', image.shape, ' Model shape:', modelParams[
            'stamp_size']
        raise ValueError(
            "find_ML_Estimator: image sahpe of second dimension is not consistent with expected model parameter dimension. 2D image array must contain multiple images across first dimension, and (flattened) pixels as a data vector in the second dimension: Have you remembered to flatten the image?"
        )

    if (preSearchMethod is not None):
        ## Conduct a presearch of the parameter space to set initial guess (usually grid-based or brute-force)
        if (vverbose or debug):
            print '\n Conducting a pre-search of parameter space to idenitfy global minima'
        if (preSearchMethod.lower() == 'grid'
                or preSearchMethod.lower() == 'brute'):
            ##Brute force method over a range either set as the prior, or the input range.
            if (bruteRange is not None):
                if (vverbose or debug):
                    print '\n Using user-defined parameter range:', bruteRange

                print "Using bruteRange: ", bruteRange
                #x0, fval, bruteGrid, bruteVal
                bruteOut = opt.brute(get_logLikelihood,
                                     ranges=bruteRange,
                                     args=(fitParams, image, modelParams,
                                           modelLookup, 'sum'),
                                     finish=None,
                                     full_output=True)
                x0, fval, bruteGrid, bruteVal = bruteOut
                ## x0 has len(nParam); fval is scalar; bruteGrid has len(nParam), nGrid*nParam; bruteVal has nGrid*nParam

                ###Evaluate error based on brute by integration - this would only work if bruteRange cover the full range where the PDF is non-zero

                if (error is not None and error.lower() == 'brute'):
                    raise RuntimeError(
                        'find_ML_Estimator - brute labelled as means of evaluating error. This is possbible, but not coded as limitation in use of bruteRange to cover the whole region where the likelihood is non-zero. When a prior is included, this could be taken to be exact, provided one knows the range where the prior has compact support, and the bruteRange reflects this.'
                    )
                ## use scipy.integrate.trapz(bruteVal, x = bruteGrid[i], axis = i) with i looping over all parameters (ensure axis set properly...

                ##Testing of error determination
                # tErr = fisher_Error_ML(x0, fitParams, image, modelParams, modelLookup)
                # from scipy.stats import norm
                # rv = norm(loc = x0, scale = tErr)
                # ##Plot this
                # import pylab as pl
                # f = pl.figure()
                # ax = f.add_subplot(111)
                # import math
                # ax.plot(bruteGrid, np.exp(-1.*(bruteVal-np.amin(bruteVal))), bruteGrid, (np.sqrt(2*math.pi)*tErr)*rv.pdf(bruteGrid))
                # pl.show()
                # raw_input("Check")

                if (vverbose or debug):
                    print '\n preSearch has found a minimum (on a coarse grid) of:', x0

            elif (Prior is not None):
                if (vverbose or debug):
                    print '\n Using prior range'
                raise RuntimeError(
                    'find_ML_Estimator - Prior entry has not yet been coded up'
                )

            else:
                raise RuntimeError(
                    'find_ML_Estimator - Brute preSearch is active, but prior or range is not set'
                )

    if (debug or vverbose):
        ##Output Model Dictionary and initial guess information
        print 'Model Dictionary:', modelParams
        print '\n Initial Guess:', x0

    ##Find minimum chi^2 using scipy optimize routines
    ##version 11+ maxima = opt.minimize(get_logLikelihood, x0, args = (fitParams, image, modelParams))
    if (searchMethod.lower() == 'simplex'):
        maxima = opt.fmin(get_logLikelihood,
                          x0=x0,
                          xtol=0.00001,
                          args=(fitParams, image, modelParams, modelLookup,
                                'sum'),
                          disp=(verbose or debug))
    elif (searchMethod.lower() == "emcee"):
        import emcee

        if (verbose):
            print "\n-Running emcee....."

        #Define MCMC parameters. These should be passed in
        nWalkers = 6
        nRun = 1000
        nBurn = 100

        if (not isinstance(x0, np.ndarray)):
            x0 = np.array(x0)
        nDim = x0.shape[0]

        print "x0: ", x0

        #Produce a new x0 for each parameter. For now, take as -1.5x0 to 1.5x0. Better to pass this in, or inform from prior range
        p0 = np.zeros((nWalkers, nDim))
        for i in range(x0.shape[0]):
            p0[:, i] = np.random.uniform(-1.5 * x0[i], 1.5 * x0[i], nWalkers)

        print "P0:", p0

        sampler = emcee.EnsembleSampler(nWalkers,
                                        nDim,
                                        get_logLikelihood,
                                        args=(fitParams, image, modelParams,
                                              modelLookup, 'sum', -1))

        #Burn-in
        if (verbose):
            print "-Running burn-in....."
        pos, prob, state = sampler.run_mcmc(p0, nBurn)
        sampler.reset()
        if (verbose):
            print "--Finished burn-in."
            print " Position is ", pos
            print "with prob: ", prob

        #Run
        if (verbose):
            print "-Sampling....."
        sampler.run_mcmc(pos, nRun)
        if (verbose):
            print "--Finished", nRun, " samples."

        #Get output
        chain = sampler.flatchain
        pChain = sampler.flatlnprobability

        maxIndex = np.argmax(pChain, axis=0)
        maxima = chain[maxIndex, :]
        err = np.std(chain, axis=0)

        if (debug):
            import pylab as pl
            f = pl.figure()
            for i in range(1, nDim + 1):
                ax = f.add_subplot(nDim, 1, i)
                ax.hist(chain[:, i - 1], bins=100)
                ax.set_title("Par: " + fitParams[i - 1])

            pl.show()

    elif (searchMethod.lower() == 'brent'):
        maxima = opt.fmin_brent(get_logLikelihood,
                                x0=x0,
                                xtol=0.00001,
                                args=(fitParams, image, modelParams,
                                      modelLookup, 'sum'),
                                disp=(verbose or debug))
    elif (searchMethod.lower() == 'powell'):
        maxima = opt.fmin_powell(get_logLikelihood,
                                 x0=x0,
                                 xtol=0.00001,
                                 args=(fitParams, image, modelParams,
                                       modelLookup, 'sum'),
                                 disp=(verbose or debug))
    elif (searchMethod.lower() == 'cg'):
        ##Not tested (10Aug)
        maxima = opt.fmin_cg(
            get_logLikelihood,
            x0=x0,
            fprime=differentiate_logLikelihood_Gaussian_Analytic,
            args=(fitParams, image, modelParams, modelLookup, 'sum'),
            disp=(verbose or debug),
            ftol=0.000001)
    elif (searchMethod.lower() == 'bfgs'):
        ##Not tested (10Aug)
        maxima = opt.fmin_bfgs(
            get_logLikelihood,
            x0=x0,
            fprime=differentiate_logLikelihood_Gaussian_Analytic,
            args=(fitParams, image, modelParams, modelLookup, 'sum'),
            disp=(verbose or debug))
    elif (searchMethod.lower() == 'l_bfgs_b'):
        ##Not tested (10Aug)
        maxima = opt.fmin_l_bfgs_b(
            get_logLikelihood,
            x0=x0,
            fprime=differentiate_logLikelihood_Gaussian_Analytic,
            args=(fitParams, image, modelParams, modelLookup, 'sum'),
            disp=(verbose or debug))
    elif (searchMethod.lower() == 'ncg'):
        ##Not tested (10Aug)
        maxima = opt.fmin_ncg(
            get_logLikelihood,
            x0=x0,
            fprime=differentiate_logLikelihood_Gaussian_Analytic,
            args=(fitParams, image, modelParams, modelLookup, 'sum'),
            disp=(verbose or debug))
    else:
        raise ValueError(
            'find_ML_Estimator - searchMethod entered is not supported:' +
            str(searchMethod))

    ##Make numpy array (in the case where 1D is used and scalar is returned):
    if (len(fitParams) == 1):
        maxima = np.array(makeIterableList(maxima))

    if (vverbose):
        print 'maxima is:', maxima

    if (debug):
        ##Plot and output residual
        print 'Plotting residual..'

        fittedParams = deepcopy(modelParams)
        modPro.set_modelParameter(fittedParams, fitParams, maxima)
        ''' Deprecated
        for i in range(len(fitParams)):
            fittedParams[fitParams[i]] =  maxima[i]
        '''

        model, disc = modPro.user_get_Pixelised_Model(
            fittedParams, sbProfileFunc=gaussian_SBProfile_CXX)
        residual = image
        if (len(image.shape) == 2):
            residual -= image
        elif (len(image.shape) == 3):
            for i in range(image.shape[0]):
                residual[i] -= image[i]
        else:
            raise ValueError(
                "Error calculating residual: Image has an unknown rank")

        import pylab as pl
        ##Plot image and model
        f = pl.figure()
        ax = f.add_subplot(211)
        ax.set_title('Model')
        im = ax.imshow(model, interpolation='nearest')
        pl.colorbar(im)
        ax = f.add_subplot(212)
        ax.set_title('Image')
        if (len(image.shape) == 3):
            im = ax.imshow(image[0], interpolation='nearest')
        else:
            im = ax.imshow(image, interpolation='nearest')
        pl.colorbar(im)

        pl.show()

        ##Plot Residual
        f = pl.figure()
        ax = f.add_subplot(111)
        im = ax.imshow(residual, interpolation='nearest')
        ax.set_title('Image-Model')
        pl.colorbar(im)
        pl.show()

    if (np.isnan(maxima).sum() > 0):
        raise ValueError('get_ML_estimator - FATAL - NaNs found in maxima:',
                         maxima)

    if (verbose):
        print 'Maxima found to be:', maxima

    ##Output Result
    if (outputHandle is not None):
        np.savetxt(outputHandle, np.array(maxima).reshape(1, maxima.shape[0]))

    ## Bias Correct
    if (biasCorrect == 0):
        Returned.append(maxima)
    elif (biasCorrect == 1):
        ana = mBias.analytic_GaussianLikelihood_Bias(maxima,
                                                     fitParams,
                                                     modelParams,
                                                     order=biasCorrect,
                                                     diffType='analytic')
        bc_maxima = maxima - ana

        ##Output Result
        if (bcoutputHandle is not None):
            np.savetxt(bcoutputHandle,
                       np.array(bc_maxima).reshape(1, bc_maxima.shape[0]))

        if (verbose):
            print 'BC Maxima found to be:', bc_maxima

        ##Return minimised parameters
        Returned.append(maxima, bc_maxima)
    else:
        raise ValueError(
            'get_ML_estimator - biasCorrect(ion) value entered is not applicable:'
            + str(biasCorrect))

    ## Get Error on measurement. Brute error would have been constructed on the original brute force grid evaluation above.
    if (error is not None):
        if (err is not None):
            err = err  #Do nothing
        elif (error.lower() == 'fisher'):
            err = fisher_Error_ML(
                maxima, fitParams, image, modelParams,
                modelLookup)  #Use finalised modelParams here?
        else:
            raise ValueError(
                "get_ML_estimator - failed to return error, error requested, but value not found nor acceptable lable used"
            )
        Returned.append(err)

    return Returned
Пример #50
0
def get_inverse_hvp_cg(model, y, v, data_set, method='Basic', **kwargs):
    # Calculate inverse hessian vector product over the training set using CG method
    # return x, which is the solution of QP, whose value is H^-1 v
    # model: neural network model (e.g. model)
    # y: scalar function output of the neural network (e.g. model.loss)
    # v: vector to be producted by inverse hessian (i.e.H^-1 v) (e.g. v_test)
    # data_set: training set to be summed in Hessian
    # method: Basic-> Conjugate Gradient, Newton -> Newton-Conjugate Gradient
    # kwargs: hyperparameters for conjugate gradient

    # hyperparameters
    batch_size = kwargs.pop('batch_size', 128)
    #batch_size = kwargs.pop('batch_size', 1) 
    # remark) changing the size of batch can induce randomness of output 
    # due to precision loss and parallel computing
    damping = kwargs.pop('damping', 0.0)
    avextol = kwargs.pop('avextol', 1e-8)
    maxiter = kwargs.pop('maxiter', 1e2)
    num_workers = kwargs.pop('num_workers', 6)

    get_inverse_hvp_cg.dl = DataLoader(data_set, batch_size, shuffle=False, num_workers=num_workers)
    get_inverse_hvp_cg.damp = damping
    get_inverse_hvp_cg.cnt = 0
    get_inverse_hvp_cg.fmt = {key: val.shape for (key, val) in v.items()}
    get_inverse_hvp_cg.temp_hvp = dic2vec(v) # temporal hvp for callback

    t0 = time.time()

    def HVP_minibatch_val(y, v):
        # Calculate Hessian vector product w.r.t whole dataset
        # y: scalar function output of the neural network (e.g. model.loss)
        # v: vector to be producted by inverse hessian (i.e.H^-1 v) (numeric dictionary, e.g. v_test)

        ## model: neural network model (e.g. model)
        ## dataloader: dataloader for the training set
        ## damping: damp term to make hessian convex

        num_data = data_set.__len__()

        hvp_batch = {key: np.zeros_like(value) for key,value in v.items()}

        for img, lb in get_inverse_hvp_cg.dl:
            img = img.numpy(); lb = lb.numpy()
            x_feed = {model.X: img, model.y:lb}
            hvp = HVP(y,x_feed,v)
            # add hvp value
            for ks in hvp.keys():
                hvp_batch[ks] += hvp[ks] # gradient will do batch-wise summation

        # normalize after the summation to reduce precision loss
        hvp_batch = {key: val/num_data for (key,val) in hvp_batch.items()}

        # damping term
        for ks in hvp.keys():
            hvp_batch[ks] += get_inverse_hvp_cg.damp * v[ks]

        # update after evaluation
        get_inverse_hvp_cg.temp_hvp = dic2vec(hvp_batch)

        return hvp_batch

    def get_fmin_loss_fn(y, v):
        def fmin_loss_fn(x):
            x_dic = vec2dic(x, get_inverse_hvp_cg.fmt)
            hvp_val = HVP_minibatch_val(y, x_dic)

            return 0.5 * grad_inner_product(hvp_val, x_dic) - grad_inner_product(v, x_dic)
        return fmin_loss_fn

    def get_fmin_grad_fn(y, v):
        def fmin_grad_fn(x):
            # x: 1D vector
            x_dic = vec2dic(x, get_inverse_hvp_cg.fmt)
            hvp_val = HVP_minibatch_val(y, x_dic)
            hvp_flat = dic2vec(hvp_val)
            v_flat = dic2vec(v)

            return hvp_flat - v_flat
        return fmin_grad_fn

    def get_fmin_hvp_fn(y, v):
        def fmin_hvp_fn(x, p):
            p_dic = vec2dic(p, get_inverse_hvp_cg.fmt)
            hvp_val = HVP_minibatch_val(y, p_dic)
            hvp_flat = dic2vec(hvp_val)

            return hvp_flat
        return fmin_hvp_fn

    def get_cg_callback(v, t0):
        def cg_callback(x):
            print('iteration: {}'.format(get_inverse_hvp_cg.cnt), ', ', time.time()-t0, '(sec) elapsed')
            print('vector element-wise square: ', np.inner(x, x))
            grad_prev = get_inverse_hvp_cg.temp_hvp-dic2vec(v) # previous gradient value which should be 0
            print('temporal gradient value: ', np.inner(grad_prev,grad_prev))
            ambiguous_loss = 1/2* np.inner(get_inverse_hvp_cg.temp_hvp,x) - np.inner(x, dic2vec(v))
            print('temporal function value(ambiguous): ', ambiguous_loss)
            get_inverse_hvp_cg.cnt += 1

            return 0
        return cg_callback

    fmin_loss_fn = get_fmin_loss_fn(y, v)
    fmin_grad_fn = get_fmin_grad_fn(y, v)
    fmin_hvp_fn = get_fmin_hvp_fn(y, v)
    cg_callback = get_cg_callback(v, t0)

    if method == 'Newton':
        fmin_results = fmin_ncg(\
                f = fmin_loss_fn, x0 = dic2vec(v), fprime = fmin_grad_fn,\
                fhess_p = fmin_hvp_fn, avextol = avextol, maxiter = maxiter, callback=cg_callback)
    else:
        fmin_results = fmin_cg(\
                f = fmin_loss_fn, x0 = dic2vec(v), fprime = fmin_grad_fn,\
                maxiter = maxiter, callback = cg_callback)

    return vec2dic(fmin_results, get_inverse_hvp_cg.fmt)
Пример #51
0
#A = io.mmread('bcsstk06.mtx.gz') # clustered eigenvalues
#B = io.mmread('bcsstm06.mtx.gz')
n = A.shape[0]
B = speye(n, n)
random.seed(1)
v_0 = random.rand(n)

print("try fmin_bfgs")
full_output = 1
data = []
v,fopt, gopt, Hopt, func_calls, grad_calls, warnflag, allvecs = \
        optimize.fmin_bfgs(R,v_0,fprime=Rp,full_output=full_output,retall=1)
if warnflag == 0:
    plt.semilogy(np.arange(0, len(data)), data)
    print('Rayleigh quotient BFGS', R(v))

print("fmin_bfgs OK")

print("try fmin_ncg")

#
# WARNING: the program may hangs if fmin_ncg is used
#
data = []
v,fopt, fcalls, gcalls, hcalls, warnflag, allvecs = \
        optimize.fmin_ncg(R,v_0,fprime=Rp,fhess=Rpp,full_output=full_output,retall=1)
if warnflag == 0:
    plt.figure()
    plt.semilogy(np.arange(0, len(data)), data)
    print('Rayleigh quotient NCG', R(v))
cost = costFunction(initial_theta, X, y)
grad = gradient(initial_theta, X, y)

print('Cost at initial theta (zeros): ', cost)
print('Gradient at initial theta (zeros): ')
print(grad)

## ============= Part 3: Optimizing using advance optimization problem  =============
#  In this exercise, you will use a built-in function to find the
#  optimal parameters theta.

import scipy.optimize as op

#  Run Newton-Conjugate-Gradient to obtain the optimal theta
theta = op.fmin_ncg(f=costFunction,
                    x0=initial_theta,
                    fprime=gradient,
                    args=(X, y))
cost = costFunction(theta, X, y)

# Print theta to screen
print('Cost at theta found by fminunc: ', cost)
print('theta: ')
print(theta)

# Plot Boundary
plotDecisionBoundary(theta, X, y)

## ============== Part 4: Predict and Accuracies ==============
#  After learning the parameters, you'll like to use it to predict the outcomes
#  on unseen data. In this part, you will use the logistic regression model
#  to predict the probability that a student with score 45 on exam 1 and
Пример #53
0
def optimize_J_reg(theta, X, y, L):
    return fmin_ncg(costFunctionReg, x0=theta, fprime=gradientReg, args=(X, y, L), maxiter=400)
Пример #54
0
    def fit(self, X, y):
        labels = set(y)  #conjunto de clases
        n_labels = len(labels)  #número de etiquetas o de clases
        #creamos un encoder para el mapeo de etiquetas
        # que podrían ser de tipo distinto a
        #flotante a valores de tipo flotante de 0 ... n_labels
        encoder = dict(zip(labels, np.arange(float(n_labels))))
        #Mapeo inverso al anterior
        self.decoder = dict(zip(np.arange(float(n_labels)), labels))
        #Número de dimensiones o de características
        n = X.shape[1]
        #Número de ejemplos
        m = X.shape[0]

        #Agregamos un uno a cada ejemplo para tomar en cuenta el término de bias
        X_aux = np.concatenate((np.ones((m, 1), dtype=np.float64), X), axis=1)
        #Valor inicial del vector de theta para el optimizador
        initial_theta = np.zeros((n + 1, 1), dtype=np.float64)
        #Vector de parámetros del modelo a entrenar
        theta = np.zeros((n + 1, 1), dtype=np.float64)
        #Vector de etiquetas codificadas a valores reales para poder
        #utilizarlas en el cálculo de costos y gradiente
        y_enc = np.array(list(map(lambda x: encoder[x], y)))
        #Lista de argumentos que se le pasará a las funciones del optimizador
        args = [X_aux, y_enc, self.C]

        #Si es un problema de clasificación multiclase
        if n_labels > 2:
            #Creamos una matriz donde se va a depositar un vector de parámetros por cada modelo
            #correspondiente a cada etiqueta
            self.all_theta = np.zeros((n_labels, n + 1), dtype=np.float64)
            #Si no se quiere ejecutar en paralelo
            if self.n_jobs is None:
                #Para cada clase
                for c in range(n_labels):
                    #Asignar uno a todos los ejemplos de la clase c y 0 a todos los demás
                    args[1] = np.array(list(
                        map(lambda x: 1.0 if x == c else 0.0, y_enc)),
                                       dtype=np.float64)
                    #Utilizamos el optimizador elegido, y le pasamos la funcion de costo, la theta inicial, el gradiente de la función de costo, y la lista de argumentos
                    #que se le pasará tanto a la función de costo como al gradiente
                    if self.solver == 'fmincg':
                        theta = optimize.fmin_cg(self.cost_func,
                                                 initial_theta,
                                                 fprime=self.grad_cost_func,
                                                 args=args,
                                                 maxiter=self.max_iter)
                    elif self.solver == 'newton-cg':
                        theta = optimize.fmin_ncg(self.cost_func,
                                                  initial_theta,
                                                  fprime=self.grad_cost_func,
                                                  args=args,
                                                  maxiter=self.max_iter)
                    elif self.solver == 'lbfgs':
                        theta = optimize.fmin_l_bfgs_b(
                            self.cost_func,
                            initial_theta,
                            fprime=self.grad_cost_func,
                            args=args,
                            maxiter=self.max_iter)[0]
                        #print(theta)
                    #Guardamos el vector del modelo entrenado para la clase c
                    #trasponemos el vector columna para colocarlo en su renglón correspondiente
                    #en la matriz de vectores de modelo
                    self.all_theta[c, :] = theta.transpose()
            else:
                #Si se quiere ejecutar en paralelo
                #Diccionario de 'y', para cada clase vamos a modificar
                #los valores de y para que sean uno para los ejemplos de la clase
                #y cero para todos los demás, si no depositamos en un diccionario
                #estos vectores, varios threads van querer modificar al mismo tiempo
                #los valores de 'y', llevando a corrupción de los datos
                y_p = {}
                #Misma lógica que para los valores de 'y', cada thread va a entrenar
                #y por lo tanto a modificar los valores de theta
                thetas = {}
                X_p = {}
                #Creamos un threadpool para evitar crear y destruir constantemente threads
                #y tan sólo crear un conjunto fijo de threads al inicio de la ejecución
                with concurrent.futures.ThreadPoolExecutor(
                        max_workers=self.n_jobs) as executor:
                    #Vamos a ejecutar el for de manera concurrente con un límite de concurrencia de n_jobs
                    #cada thread ejecuta una iteración del for, cualquier bloque de código que se coloque
                    #dentro del for se ejecuta de manera serial en el contexto del thread que lo esté ejecutando
                    for c in range(n_labels):
                        #Convertimos los valores de 'y' y los asignamos al diccionario y_p en su llave correspondiente
                        future = executor.submit(self.func2, y_p, c, y_enc,
                                                 X_p, X_aux)
                        #Ejecutamos el entrenamiento del modelo para la clase c
                        future = executor.submit(self.func, thetas,
                                                 self.max_iter, n, c, X_p, y_p,
                                                 self.C)
                #Al terminar de entrenar cada modelo, asignamos cada vector de cada modelo a su renglón correspondiente
                #dentro de la matriz de modelos
                for c in range(n_labels):
                    self.all_theta[c, :] = thetas[c]

        #Para la regresión logística binaria
        else:
            #Tenemos un solo modelo, que nos servirá para determinar los ejemplos que son de una clase
            # y los que no lo son (y evidentemente pertenecen a la otra clase)
            self.all_theta = np.zeros((1, n + 1), dtype=np.float64)
            if self.solver == 'fmincg':
                theta = optimize.fmin_cg(self.cost_func,
                                         initial_theta,
                                         fprime=self.grad_cost_func,
                                         args=args,
                                         maxiter=self.max_iter)
            elif self.solver == 'newton-cg':
                theta = optimize.fmin_ncg(self.cost_func,
                                          initial_theta,
                                          fprime=self.grad_cost_func,
                                          args=args,
                                          maxiter=self.max_iter)
            elif self.solver == 'lbfgs':
                theta = optimize.fmin_l_bfgs_b(self.cost_func,
                                               initial_theta,
                                               fprime=self.grad_cost_func,
                                               args=args,
                                               maxiter=self.max_iter)[0]
                #print(theta)
            self.all_theta = theta.transpose()
Пример #55
0
    def fit(self,
            method='fmin_powell',
            iterlim=1000,
            tol=.0001,
            verbose=0,
            no_callback=False,
            **kwargs):
        """
        N.fit(method='fmin_powell', iterlim=1000, tol=.001):

        Causes the normal approximation object to fit itself.

        method: May be one of the following, from the scipy.optimize package:
            -fmin_l_bfgs_b
            -fmin_ncg
            -fmin_cg
            -fmin_powell
            -fmin

        no_callback: Boolean indicating whether or not to use a callback
        function. If True and a callback keyword is provided in kwargs, then
        the user-supplied callback will be used. Otherwise, if False,
        and verbose > 0, a default callback will print iteration progress.

        The kwargs are passed to the scipy.optimize functions. See there
        for more information.
        """
        self.tol = tol
        self.method = method
        self.verbose = verbose

        p = zeros(self.len, dtype=float)
        for stochastic in self.stochastics:
            p[self._slices[stochastic]] = ravel(stochastic.value)

        if not self.method == 'newton':
            if not scipy_imported:
                raise ImportError('Scipy is required to use EM and NormApprox')

        default_callback = (verbose > 0 and not no_callback)
        if default_callback and 'callback' in kwargs:
            raise ValueError("For user-provided callback and verbose output"
                             " set use_callback to True")

        if default_callback:

            def callback(p):
                try:
                    print_('Current log-probability : %f' % self.logp)
                except ZeroProbability:
                    print_('Current log-probability : %f' % -Inf)
        elif 'callback' in kwargs:
            callback = kwargs.pop('callback')
        else:

            def callback(p):
                pass

        if self.method == 'fmin_ncg':
            p = fmin_ncg(f=self.func,
                         x0=p,
                         fprime=self.gradfunc,
                         fhess=self.hessfunc,
                         epsilon=self.eps,
                         maxiter=iterlim,
                         callback=callback,
                         avextol=tol,
                         disp=verbose,
                         **kwargs)

        elif self.method == 'fmin':

            p = fmin(func=self.func,
                     x0=p,
                     callback=callback,
                     maxiter=iterlim,
                     ftol=tol,
                     disp=verbose,
                     **kwargs)

        elif self.method == 'fmin_powell':
            p = fmin_powell(func=self.func,
                            x0=p,
                            callback=callback,
                            maxiter=iterlim,
                            ftol=tol,
                            disp=verbose,
                            **kwargs)

        elif self.method == 'fmin_cg':
            p = fmin_cg(f=self.func,
                        x0=p,
                        fprime=self.gradfunc,
                        epsilon=self.eps,
                        callback=callback,
                        maxiter=iterlim,
                        gtol=tol,
                        disp=verbose,
                        **kwargs)

        elif self.method == 'fmin_l_bfgs_b':
            from scipy import __version__ as sp_version
            from distutils.version import LooseVersion
            if LooseVersion(sp_version) >= LooseVersion('0.12.0'):
                p = fmin_l_bfgs_b(func=self.func,
                                  x0=p,
                                  fprime=self.gradfunc,
                                  epsilon=self.eps,
                                  callback=callback,
                                  pgtol=tol,
                                  iprint=verbose - 1,
                                  **kwargs)[0]
            else:
                if verbose > 0:
                    from warnings import warn
                    warn(
                        "Callbacks are not available for fmin_l_bfgs_b in "
                        "SciPy < 0.12.0. Optimization progress will not be"
                        "displayed.", UserWarning)
                p = fmin_l_bfgs_b(func=self.func,
                                  x0=p,
                                  fprime=self.gradfunc,
                                  epsilon=self.eps,
                                  pgtol=tol,
                                  iprint=verbose - 1,
                                  **kwargs)[0]
        else:
            raise ValueError('Method unknown.')

        self._set_stochastics(p)
        self._mu = p

        try:
            self.logp_at_max = self.logp
        except:
            raise RuntimeError(
                'Posterior probability optimization converged to value with zero probability.'
            )

        lnL = sum([x.logp for x in self.observed_stochastics
                   ])  # log-likelihood of observed stochastics
        self.lnL = lnL
        self.AIC = 2. * (self.len - lnL)  # 2k - 2 ln(L)
        self.AICc = self.AIC + (
            (2 * self.len *
             (self.len + 1)) / float(self.data_len - self.len - 1))
        try:
            self.BIC = self.len * log(
                self.data_len) - 2. * lnL  # k ln(n) - 2 ln(L)
        except FloatingPointError:
            self.BIC = -Inf

        self.fitted = True
Пример #56
0
    cost, grad = costFunctionReg(initial_theta, X, y, lambda_val)

    print('Cost at initial theta (zeros): %f' % cost)

    raw_input('Program paused. Press enter to continue')

    # =================== Part 2: Regularization and Accuracies ===================

    initial_theta = np.zeros((X.shape[1], 1))

    lambda_val = 1

    fmin_ret = fmin_ncg(lambda t: (costFunctionReg(t, X, y, lambda_val)[0]),
                        initial_theta,
                        lambda t: (costFunctionReg(t, X, y, lambda_val)[1]),
                        maxiter=400,
                        full_output=True)

    theta = fmin_ret[0]
    cost = fmin_ret[1]

    print('Cost at theta found by fmin: %f' % cost)
    print('theta:')
    print(theta)

    plotDecisionBoundary(theta, X, y)
    plt.title('lambda = %d' % lambda_val)
    plt.legend()

    p = predict(theta, X)
Пример #57
0
x_bfgs = optimize.fmin_bfgs(f, K[0], disp=0)[0]
print('       BFGS: time %.2fs, x error %.2f, f error %.2f' %
      (time.time() - t0, np.sqrt(np.sum(
          (x_bfgs - x_ref)**2)), f(x_bfgs) - f_ref))

t0 = time.time()
x_l_bfgs = optimize.fmin_l_bfgs_b(f, K[0], approx_grad=1, disp=0)[0]
print('     L-BFGS: time %.2fs, x error %.2f, f error %.2f' %
      (time.time() - t0, np.sqrt(np.sum(
          (x_l_bfgs - x_ref)**2)), f(x_l_bfgs) - f_ref))

t0 = time.time()
x_bfgs = optimize.fmin_bfgs(f, K[0], f_prime, disp=0)[0]
print("  BFGS w f': time %.2fs, x error %.2f, f error %.2f" %
      (time.time() - t0, np.sqrt(np.sum(
          (x_bfgs - x_ref)**2)), f(x_bfgs) - f_ref))

t0 = time.time()
x_l_bfgs = optimize.fmin_l_bfgs_b(f, K[0], f_prime, disp=0)[0]
print("L-BFGS w f': time %.2fs, x error %.2f, f error %.2f" %
      (time.time() - t0, np.sqrt(np.sum(
          (x_l_bfgs - x_ref)**2)), f(x_l_bfgs) - f_ref))

t0 = time.time()
x_newton = optimize.fmin_ncg(f, K[0], f_prime, fhess=hessian, disp=0)[0]
print("     Newton: time %.2fs, x error %.2f, f error %.2f" %
      (time.time() - t0, np.sqrt(np.sum(
          (x_newton - x_ref)**2)), f(x_newton) - f_ref))

pl.show()
print('Cost at initial theta (zeros): ', cost)

## ============= Part 2: Regularization and Accuracies =============
#  Optional Exercise:
#  In this part, you will get to try different values of lambda and 
#  see how regularization affects the decision coundart
#
#  Try the following values of lambda (0, 1, 10, 100).
#
#  How does the decision boundary change when you vary lambda? How does
#  the training set accuracy vary?
#

import scipy.optimize as op

#  Run Newton-Conjugate-Gradient to obtain the optimal theta
theta = op.fmin_ncg(f=costFunctionReg, x0=initial_theta, fprime=gradientReg, args=(X, y, lambd))
cost = costFunctionReg(theta, X, y, lambd)

# Plot Boundary
plotDecisionBoundary(theta, X, y)


# Compute accuracy on our training set
p = predict(theta, X);

acc = (np.sum(p == y) * 100.0)/m
print('Train Accuracy: ', acc)


Пример #59
0
def minimize(func,
             x0,
             gradient=None,
             hessian=None,
             algorithm="default",
             **args):
    r"""
    This function is an interface to a variety of algorithms for computing
    the minimum of a function of several variables.


    INPUT:

    - ``func`` -- Either a symbolic function or a Python function whose
      argument is a tuple with `n` components

    - ``x0`` -- Initial point for finding minimum.

    - ``gradient`` -- Optional gradient function. This will be computed
      automatically for symbolic functions.  For Python functions, it allows
      the use of algorithms requiring derivatives.  It should accept a
      tuple of arguments and return a NumPy array containing the partial
      derivatives at that point.

    - ``hessian`` --  Optional hessian function. This will be computed
      automatically for symbolic functions. For Python functions, it allows
      the use of algorithms requiring derivatives. It should accept a tuple
      of arguments and return a NumPy array containing the second partial
      derivatives of the function.

    - ``algorithm`` -- String specifying algorithm to use. Options are
      ``'default'`` (for Python functions, the simplex method is the default)
      (for symbolic functions bfgs is the default):

       - ``'simplex'``

       - ``'powell'``

       - ``'bfgs'`` -- (Broyden-Fletcher-Goldfarb-Shanno) requires
         ``gradient``

       - ``'cg'`` -- (conjugate-gradient) requires gradient

       - ``'ncg'`` -- (newton-conjugate gradient) requires gradient and hessian


    EXAMPLES::

        sage: vars=var('x y z')
        sage: f=100*(y-x^2)^2+(1-x)^2+100*(z-y^2)^2+(1-y)^2
        sage: minimize(f,[.1,.3,.4],disp=0)
        (1.00..., 1.00..., 1.00...)

        sage: minimize(f,[.1,.3,.4],algorithm="ncg",disp=0)
        (0.9999999..., 0.999999..., 0.999999...)

    Same example with just Python functions::

        sage: def rosen(x): # The Rosenbrock function
        ...      return sum(100.0r*(x[1r:]-x[:-1r]**2.0r)**2.0r + (1r-x[:-1r])**2.0r)
        sage: minimize(rosen,[.1,.3,.4],disp=0)
        (1.00..., 1.00..., 1.00...)

    Same example with a pure Python function and a Python function to
    compute the gradient::

        sage: def rosen(x): # The Rosenbrock function
        ...      return sum(100.0r*(x[1r:]-x[:-1r]**2.0r)**2.0r + (1r-x[:-1r])**2.0r)
        sage: import numpy
        sage: from numpy import zeros
        sage: def rosen_der(x):
        ...      xm = x[1r:-1r]
        ...      xm_m1 = x[:-2r]
        ...      xm_p1 = x[2r:]
        ...      der = zeros(x.shape,dtype=float)
        ...      der[1r:-1r] = 200r*(xm-xm_m1**2r) - 400r*(xm_p1 - xm**2r)*xm - 2r*(1r-xm)
        ...      der[0] = -400r*x[0r]*(x[1r]-x[0r]**2r) - 2r*(1r-x[0])
        ...      der[-1] = 200r*(x[-1r]-x[-2r]**2r)
        ...      return der
        sage: minimize(rosen,[.1,.3,.4],gradient=rosen_der,algorithm="bfgs",disp=0)
        (1.00...,  1.00..., 1.00...)
    """
    from sage.symbolic.expression import Expression
    from sage.ext.fast_eval import fast_callable
    import scipy
    from scipy import optimize
    if isinstance(func, Expression):
        var_list = func.variables()
        var_names = map(str, var_list)
        fast_f = fast_callable(func, vars=var_names, domain=float)
        f = lambda p: fast_f(*p)
        gradient_list = func.gradient()
        fast_gradient_functions = [
            fast_callable(gradient_list[i], vars=var_names, domain=float)
            for i in xrange(len(gradient_list))
        ]
        gradient = lambda p: scipy.array(
            [a(*p) for a in fast_gradient_functions])
    else:
        f = func

    if algorithm == "default":
        if gradient == None:
            min = optimize.fmin(f, map(float, x0), **args)
        else:
            min = optimize.fmin_bfgs(f,
                                     map(float, x0),
                                     fprime=gradient,
                                     **args)
    else:
        if algorithm == "simplex":
            min = optimize.fmin(f, map(float, x0), **args)
        elif algorithm == "bfgs":
            min = optimize.fmin_bfgs(f,
                                     map(float, x0),
                                     fprime=gradient,
                                     **args)
        elif algorithm == "cg":
            min = optimize.fmin_cg(f, map(float, x0), fprime=gradient, **args)
        elif algorithm == "powell":
            min = optimize.fmin_powell(f, map(float, x0), **args)
        elif algorithm == "ncg":
            if isinstance(func, Expression):
                hess = func.hessian()
                hess_fast = [[
                    fast_callable(a, vars=var_names, domain=float) for a in row
                ] for row in hess]
                hessian = lambda p: [[a(*p) for a in row] for row in hess_fast]
                hessian_p = lambda p, v: scipy.dot(scipy.array(hessian(p)), v)
                min = optimize.fmin_ncg(f,
                                        map(float, x0),
                                        fprime=gradient,
                                        fhess=hessian,
                                        fhess_p=hessian_p,
                                        **args)
    return vector(RDF, min)
Пример #60
0
    def fit(self, method='fmin', iterlim=1000, tol=.0001, verbose=0):
        """
        N.fit(method='fmin', iterlim=1000, tol=.001):

        Causes the normal approximation object to fit itself.

        method: May be one of the following, from the scipy.optimize package:
            -fmin_l_bfgs_b
            -fmin_ncg
            -fmin_cg
            -fmin_powell
            -fmin
        """
        self.tol = tol
        self.method = method
        self.verbose = verbose

        p = zeros(self.len, dtype=float)
        for stochastic in self.stochastics:
            p[self._slices[stochastic]] = ravel(stochastic.value)

        if not self.method == 'newton':
            if not scipy_imported:
                raise ImportError('Scipy is required to use EM and NormApprox')

        if self.verbose > 0:

            def callback(p):
                try:
                    print_('Current log-probability : %f' % self.logp)
                except ZeroProbability:
                    print_('Current log-probability : %f' % -Inf)
        else:

            def callback(p):
                pass

        if self.method == 'fmin_ncg':
            p = fmin_ncg(f=self.func,
                         x0=p,
                         fprime=self.gradfunc,
                         fhess=self.hessfunc,
                         epsilon=self.eps,
                         maxiter=iterlim,
                         callback=callback,
                         avextol=tol,
                         disp=verbose)

        elif self.method == 'fmin':

            p = fmin(func=self.func,
                     x0=p,
                     callback=callback,
                     maxiter=iterlim,
                     ftol=tol,
                     disp=verbose)

        elif self.method == 'fmin_powell':
            p = fmin_powell(func=self.func,
                            x0=p,
                            callback=callback,
                            maxiter=iterlim,
                            ftol=tol,
                            disp=verbose)

        elif self.method == 'fmin_cg':
            p = fmin_cg(f=self.func,
                        x0=p,
                        fprime=self.gradfunc,
                        epsilon=self.eps,
                        callback=callback,
                        maxiter=iterlim,
                        gtol=tol,
                        disp=verbose)

        elif self.method == 'fmin_l_bfgs_b':
            p = fmin_l_bfgs_b(
                func=self.func,
                x0=p,
                fprime=self.gradfunc,
                epsilon=self.eps,
                # callback=callback,
                pgtol=tol,
                iprint=verbose - 1)[0]

        else:
            raise ValueError('Method unknown.')

        self._set_stochastics(p)
        self._mu = p

        try:
            self.logp_at_max = self.logp
        except:
            raise RuntimeError(
                'Posterior probability optimization converged to value with zero probability.'
            )

        lnL = sum([x.logp for x in self.observed_stochastics
                   ])  # log-likelihood of observed stochastics
        self.AIC = 2. * (self.len - lnL)  # 2k - 2 ln(L)
        try:
            self.BIC = self.len * log(
                self.data_len) - 2. * lnL  # k ln(n) - 2 ln(L)
        except FloatingPointError:
            self.BIC = -Inf

        self.fitted = True