Exemplo n.º 1
def compute_vector_lr(syn0_proxy, bins_proxy, neg_words_mult, lbda, word_idxs):
    syn0 = syn0_proxy.get()
    bins = bins_proxy.get()

    counts = defaultdict(lambda: np.zeros(2).astype(np.uint32))

    for widx in word_idxs:
        counts[widx][0] += 1

    neg_words_idxs = sample(bins, int(neg_words_mult * len(word_idxs)))
    for neg_widx in neg_words_idxs:
        counts[neg_widx][1] += 1

    vectors = syn0[counts.keys()]
    count_pairs = np.vstack(counts.values())

    f = lambda w, params=(vectors, count_pairs[:, 0], count_pairs[:, 1], lbda): log_l(w, *params)

    x0 = np.zeros(syn0.shape[1] + 1)
    opt = lbfgsb.fmin_l_bfgs_b(f, x0)

    if opt[2]["warnflag"]:
        logging.debug("Error in optimization: %s", opt[2])

    lr_vec = opt[0].astype(np.float32)
    if not np.all(np.isfinite(lr_vec)):
        logging.info("Error computing lr vector")
        lr_vec[:] = 0

    return lr_vec
Exemplo n.º 2
    def __solver__(self, p):
        #WholeRepr2LinConst(p)#TODO: remove me

        bounds = []

        # don't work in Python ver < 2.5
        # BOUND = lambda x: x if isfinite(x) else None

        def BOUND(x):
            if isfinite(x): return x
            else: return None

        for i in range(p.n): bounds.append((BOUND(p.lb[i]), BOUND(p.ub[i])))

        xf, ff, d = fmin_l_bfgs_b(p.f, p.x0, fprime=p.df,
                  approx_grad=0,  bounds=bounds,
                  iprint=p.iprint, maxfun=p.maxFunEvals)

        if d['warnflag'] in (0, 2):
            # if 2 - some problems can be present, but final check from RunProbSolver will set negative istop if solution is unfeasible
            if d['warnflag'] == 0: msg = 'converged'
        elif d['warnflag'] == 1:  istop = IS_MAX_FUN_EVALS_REACHED

        p.xk = p.xf = xf
        p.fk = p.ff = ff
        p.istop = istop
Exemplo n.º 3
 def smoothData(self,x,y,weight,nMiss=0):
   smooth data
   import scipy.optimize.lbfgsb as lbfgsb
   from scipy.fftpack.realtransforms import dct,idct
   n0 = len(x)
   #x = np.array([x,x,x]).flatten()
   #y = np.array([y,y,y]).flatten()
   #weight = np.array([weight,weight,weight]).flatten()
   n = len(x)
   weight = 1./weight
   # scale 0 to 1
   weight = weight/np.max(weight)
   i = np.arange(1,n+1)
   eigenvalues = -2. + 2.*np.cos((i-1)*np.pi/n)
   DCTy = dct(y,norm='ortho',type=2)
   dcty2 = DCTy**2
   eigenvalues2 = eigenvalues**2
   x0 = np.atleast_1d(1.)
   y_hat = np.zeros_like(y)
   xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,x0,fprime=None,factr=10.,\
   solvedGamma = np.exp(xpost)[0]
   return y_hat,solvedGamma 
Exemplo n.º 4
    def __solver__(self, p):
        #WholeRepr2LinConst(p)#TODO: remove me

        bounds = []

        # don't work in Python ver < 2.5
        # BOUND = lambda x: x if isfinite(x) else None

        def BOUND(x):
            if isfinite(x): return x
            else: return None

        for i in range(p.n):
            bounds.append((BOUND(p.lb[i]), BOUND(p.ub[i])))

        xf, ff, d = fmin_l_bfgs_b(p.f,

        if d['warnflag'] in (0, 2):
            # if 2 - some problems can be present, but final check from RunProbSolver will set negative istop if solution is unfeasible
            if d['warnflag'] == 0: msg = 'converged'
        elif d['warnflag'] == 1: istop = IS_MAX_FUN_EVALS_REACHED

        p.xk = p.xf = xf
        p.fk = p.ff = ff
        p.istop = istop
 def minimize_lbfgs(self, parameters, x, y):
     parameters2 = parameters.reshape([self.M], order="F")
     # minimizador L-BFGS-B
     result, _, _ = opt2.fmin_l_bfgs_b(self.get_objective,
                                       args=[x, y],
     return result.reshape([-1, 1], order="F")
Exemplo n.º 6
 def minimize_lbfgs(self, parameters, x, y, sigma, emp_counts, classes_idx,
                    nr_x, nr_f, nr_c):
     parameters2 = parameters.reshape([nr_f * nr_c], order="F")
     result, _, d = opt2.fmin_l_bfgs_b(
         args=[x, y, sigma, emp_counts, classes_idx, nr_x, nr_f, nr_c])
     return result.reshape([nr_f, nr_c], order="F")
Exemplo n.º 7
def nls_lbfgs_b(S,
    """Non-negative least squares solver using L-BFGS-B.
    S = ss.csr_matrix(check_array(S, accept_sparse='csr'))
    D = ss.csr_matrix(check_array(D, accept_sparse='csr'))
    n_features = S.shape
    n_components = D.shape[1]

    DtD = safe_sparse_dot(D.T, D)
    DtSD = safe_sparse_dot(D.T, safe_sparse_dot(S, D))

    def f(C, *args):
        C = ss.diags(C)
        tonorm = S - safe_sparse_dot(D, safe_sparse_dot(C, D.T))
        reg = l1_reg * C.diagonal().sum()
        return (0.5 * (ss.linalg.norm(tonorm)**2)) + reg

    def fprime(C, *args):
        C = ss.diags(C)
        DtDCDtD = safe_sparse_dot(DtD, safe_sparse_dot(C, DtD))
        reg = l1_reg * ss.eye(C.shape[0])
        full = DtDCDtD - DtSD + reg
        return full.diagonal()

    if C_init is None:
        C = np.zeros(n_components, dtype=np.float64)
    elif C_init.shape == (n_features, n_features):
        C = np.diag(C_init)
        C = C_init

    C, residual, d = fmin_l_bfgs_b(
        bounds=[(0, None)] * n_components,

    # testing reveals that sometimes, very small negative values occur
    C[C < 0] = 0

    if l1_reg:
        residual -= l1_reg * C.sum()
    residual = np.sqrt(2 * residual)
    if d['warnflag'] > 0:
        print("L-BFGS-B failed to converge")

    return C, residual
Exemplo n.º 8
 def train(self, w0, debug=False):
     if debug:
         iprint = 0
         iprint = -1
     x, f, d = fmin_l_bfgs_b(self.objective, w0, fprime=self.grad, pgtol=1e-09, iprint=iprint)
     if d["warnflag"] != 0:
         raise OptimisationException(d["task"])
     return x
Exemplo n.º 9
 def train(self,w0,debug=False):
   if debug:
     iprint = 0
     iprint = -1
   x,f,d = fmin_l_bfgs_b(self.objective, w0, fprime=self.grad, pgtol=1e-09, iprint=iprint)
   if d['warnflag'] != 0:
     raise OptimisationException(d['task'])
   return x
Exemplo n.º 10
def solve_l1l1_approx(X, y, lbda):
    f = lambda w, params=(X, y, lbda): l1l1_approx(w, *params)

    x0 = np.zeros(X.shape[1] + 1)
    opt = lbfgsb.fmin_l_bfgs_b(f, x0, bounds=[(0, None)] * x0.shape[0])

    return opt[0].astype(np.float32)
Exemplo n.º 11
    def find2(self, POIMobj, motif_len, motif_start, base, path2pwm=None,solver="NLP"):
        self.motif_start = motif_start
        self.motif_len = motif_len
        x0 = tools.ini_pwm(motif_len, 1, len(base))[0]

        x0 = x0.flatten()

        lb = np.ones(x0.shape) * 0.001
        ub = np.ones(x0.shape) * 0.999
        iprint = 0
        maxIter = 1000
        ftol = 1e-04
        gradtol = 1e-03
        diffInt = 1e-05
        contol = 1e-02
        maxFunEvals = 1e04
        maxTime = 100

        lenA = int(len(x0))
        lenk = int(len(x0)) / len(base)
        Aeq = np.zeros((lenk, lenA))
        beq = np.ones(lenk)
        for i in range(lenk):
            for pk in range(i, lenA, lenk):
                Aeq[i, pk] = 1

                # ,Aeq=Aeq,beq=beq,
        cons = {'type': 'eq', 'fun': lambda x: np.dot(Aeq, x) - beq}
        bnds = []
        for i in range(len(x0)):
            bnds.append((lb[i], ub[i]))
        # bnds = np.vstack((lb,ub))

        if solver == "ralg":
            from openopt import NLP
            p = NLP(self.f_L2, x0,lb=lb, ub=ub, Aeq=Aeq,beq=beq, args=(POIMobj.gPOIM,POIMobj.L,motif_start,POIMobj.small_k,motif_len),  diffInt=diffInt, ftol=ftol, plot=0, iprint=iprint,maxIter = maxIter, maxFunEvals = maxFunEvals, show=False, contol=contol)
            result = p._solve(solver)
            x = result.xf
            f = result.ff
        elif solver == "LBFGSB":
            x, f, d = fmin_l_bfgs_b(self.f_L2, x0,
                                    args=(POIMobj.gPOIM, POIMobj.L, motif_start, POIMobj.small_k, motif_len),
        elif solver == "SLSQP":
            result = minimize(self.f_L2, x0,args=(POIMobj.gPOIM, POIMobj.L, motif_start, POIMobj.small_k, motif_len),method='SLSQP',bounds=bnds,constraints=cons)
            x = result.x
            f = result.fun
        self.motif_pwm = np.reshape(x, (4, motif_len))
        fopt = f
        if not(path2pwm is None):
            np.savetxt(path2pwm, self.poim_norm)

        return self.motif_pwm
Exemplo n.º 12
Arquivo: LBFGSB.py Projeto: MSTU/grid
	def Run (self):
		self.iteration = 0
#		print 'point1'
		(self.xopt, f, d) = fmin_l_bfgs_b (self.Objective, self.vl0, approx_grad=1, bounds = self.bounds, m = self.m, factr=self.factr, pgtol=self.pgtol, epsilon=self.epsilon, maxfun=self.maxfun)
#		print 'point2'
Exemplo n.º 13
    def train_lmbfgs(self):
        Train the model by maximising posterior with LM-BFGS.

        The training data should have been set at this stage:
            >> h = hcrf(H, maxw, maxf)
            >> h.X = X
            >> h.Y = Y
            >> h.lamb = lamb
            >> final_params = h.train_lmbfgs()
        Return the final parameter vector.
        initial = self.param[self.param_non_inf_indexes]
        fparam = fmin_l_bfgs_b(self.get_obj, initial)
        return fparam
Exemplo n.º 14
	def Run (self):
		self.iteration = 0
#		print 'point1'

		vl0 = self.NormX (self.vl0)
#		print 'vl0=',self.vl0

		if self.method == 'Opt_1D':
			(xa, xb) = self.bounds[0]
#			print 'xa=',xa,' xb=',xb
			xa = self.NormX ([xa])
			xb = self.NormX ([xb])
			self.xopt = fminbound (self.Objective1D, xa[0], xb[0], xtol = self.xtol, maxfun = self.maxfun)

		elif self.method == 'NelderMead':
			self.xopt = fmin (self.Objective, vl0, xtol=self.xtol, ftol=self.ftol, maxfun=self.maxfun)
		elif self.method == 'Powell':
			self.xopt = fmin_powell (self.Objective, vl0, xtol=self.xtol, ftol=self.ftol, maxfun=self.maxfun)
		elif self.method == 'LBFGSB':
			bounds = self.NormBounds (self.bounds)
			(self.xopt, f, d) = fmin_l_bfgs_b (self.Objective, vl0, approx_grad=1, bounds = bounds, epsilon=self.epsilon, maxfun=self.maxfun)
		elif self.method == 'TNC':
			bounds = self.NormBounds (self.bounds)
			(self.xopt, f, d) = fmin_tnc (self.Objective, vl0, approx_grad=1, bounds = bounds, ftol=self.ftol, xtol=self.xtol, epsilon=self.epsilon, maxfun=self.maxfun)
		elif self.method == 'Anneal':
			(lower, upper) = self.NormBoundsAnneal(self.bounds)
			(self.xopt, r) = anneal (self.Objective, vl0, schedule = self.schedule, 
														maxeval=self.maxfun, feps=self.ftol, lower=lower, upper=upper)
		elif self.method == 'Cobyla':
			self.isCobyla = 1
			self.ce = self.CreateBounds(self.vl, self.fce)
			self.xopt = self.fmin_cobyla (self.Objective, vl0, self.ce, rhobeg=self.rhobeg, rhoend=self.rhoend, maxfun=self.maxfun)

			print 'Optimization Error: method ', self.method, 'is absent'
#		print 'point2'
Exemplo n.º 15
def optimization_layer(result, iprint=-1):
    Implementation of the Optimization layer. It uses L-BFGS [1] as special case of L-BFGS-B [2] in scipy.optimize.
    The result object is modified to yield the optimal BEModel.
    A sub-dictionary with additional information is added under the key result.additional['Opt'].

    [1] D.C. Liu and J. Nocedal. ``On the Limited Memory Method for Large Scale Optimization'',
        Math. Prog. B 45 (3), pp.~503--528, 1989. DOI 10.1007/BF01589116

    [2] C. Zhu, R.H. Byrd and J. Nocedal, ``Algorithm 778: L-BFGS-B: Fortran subroutines for large-scale
        bound-constrained optimization'', ACM Trans. Math. Software 23 (4), pp.~550--560, 1997.
        DOI 10.1145/279232.279236

    result : object
        A valid :py:class:`cobea.model.Result` object.
        The object is modified during processing; the model variables are set to their optimal values.
    iprint : int
        (Optional) verbosity of fmin_l_bfgs_b. Default: -1

    result : object
        Identical to input object.
    x = result._to_statevec()
    print('Optimization layer: running with %i model parameters...' %
    xopt, fval, optimizer_dict = fmin_l_bfgs_b(result._gradient,
                                               args=(result.input_matrix, ),
    print('    ...finished with %i gradient (L-BFGS) iterations.' %
    print('    chi^2 = %.3e (%s)^2' % (fval, result.unit))
    result.additional['Opt'] = optimizer_dict
    return result
Exemplo n.º 16
    def _fit_inner(self, X, y, activations, deltas, coef_grads,
                   intercept_grads, layer_units):

        # Store meta information for the parameters
        self._coef_indptr = []
        self._intercept_indptr = []
        start = 0

        # Save sizes and indices of coefficients for faster unpacking
        for i in range(self.n_layers_ - 1):
            n_fan_in, n_fan_out = layer_units[i], layer_units[i + 1]

            end = start + (n_fan_in * n_fan_out)
            self._coef_indptr.append((start, end, (n_fan_in, n_fan_out)))
            start = end

        # Save sizes and indices of intercepts for faster unpacking
        for i in range(self.n_layers_ - 1):
            end = start + layer_units[i + 1]
            self._intercept_indptr.append((start, end))
            start = end

        # Run LBFGS
        packed_coef_inter = self._pack(self.coefs_, self.intercepts_)

        if self.verbose is True or self.verbose >= 1:
            iprint = 1
            iprint = -1

        optimal_parameters, self.loss_, d = fmin_l_bfgs_b(
            args=(X, y, activations, deltas, coef_grads, intercept_grads))

Exemplo n.º 17
 def train(self,debug=False):
   """Train the mixture model."""
   if debug:
     iprint = 0
     iprint = -1
   # Initialise weights to zero, except interpolation
   num_phrase_features = self.phrase_index[1] - self.phrase_index[0]
   num_models = ((self.interp_index[1] - self.interp_index[0])/num_phrase_features)+1
   w0 = [0.0] * self.interp_index[0]
   w0 += [1.0/num_models] * (self.interp_index[1]-self.interp_index[0])
   bounds = [(None,None)] * len(w0)
   bounds[self.interp_index[0]:self.interp_index[1]] = \
     [(self.interp_floor,1)] * (self.interp_index[1] - self.interp_index[0])
   w0 = np.array(w0)
   x,f,d = fmin_l_bfgs_b(self.objective, w0, fprime=self.gradient, bounds=bounds,  pgtol=1e-09, iprint=iprint)
   if d['warnflag'] != 0:
     raise OptimisationException(d['task'])
   weights = x[:self.interp_index[0]]
   mix_weights = x[self.interp_index[0]:]
   mix_weights = mix_weights.reshape((num_models-1,num_phrase_features))
   mix_weights = np.vstack((mix_weights, 1-np.sum(mix_weights,axis=0)))
   return weights,mix_weights
Exemplo n.º 18
 def train(self, debug=False):
     """Train the mixture model."""
     if debug:
         iprint = 0
         iprint = -1
     # Initialise weights to zero, except interpolation
     num_phrase_features = self.phrase_index[1] - self.phrase_index[0]
     num_models = ((self.interp_index[1] - self.interp_index[0]) / num_phrase_features) + 1
     w0 = [0.0] * self.interp_index[0]
     w0 += [1.0 / num_models] * (self.interp_index[1] - self.interp_index[0])
     bounds = [(None, None)] * len(w0)
     bounds[self.interp_index[0] : self.interp_index[1]] = [(self.interp_floor, 1)] * (
         self.interp_index[1] - self.interp_index[0]
     w0 = np.array(w0)
     x, f, d = fmin_l_bfgs_b(self.objective, w0, fprime=self.gradient, bounds=bounds, pgtol=1e-09, iprint=iprint)
     if d["warnflag"] != 0:
         raise OptimisationException(d["task"])
     weights = x[: self.interp_index[0]]
     mix_weights = x[self.interp_index[0] :]
     mix_weights = mix_weights.reshape((num_models - 1, num_phrase_features))
     mix_weights = np.vstack((mix_weights, 1 - np.sum(mix_weights, axis=0)))
     return weights, mix_weights
Exemplo n.º 19
Arquivo: hcrf.py Projeto: dirko/pyhcrf
    def fit(self, X, y):
        """Fit the model according to the given training data.

        X : List of list of ints. Each list of ints represent a training example. Each int in that list
            is the index of a one-hot encoded feature.

        y : array-like, shape (n_samples,)
            Target vector relative to X.

        self : object
            Returns self.
        classes = list(set(y))
        num_classes = len(classes)
        self.classes_ = classes
        if self.transitions is None:
            self.transitions = self._create_default_transitions(num_classes, self.num_states)

        # Initialise the parameters
        _, num_features = X[0].shape
        num_transitions, _ = self.transitions.shape
        if self.state_parameters is None:
            self.state_parameters = numpy.random.standard_normal((num_features,
                                                                  num_classes)) * self.state_parameter_noise
        if self.transition_parameters is None:
            self.transition_parameters = numpy.random.standard_normal((num_transitions)) * self.transition_parameter_noise

        initial_parameter_vector = self._stack_parameters(self.state_parameters, self.transition_parameters)
        function_evaluations = [0]

        def objective_function(parameter_vector, batch_start_index=0, batch_end_index=-1):
            ll = 0.0
            gradient = numpy.zeros_like(parameter_vector)
            state_parameters, transition_parameters = self._unstack_parameters(parameter_vector)
            for x, ty in zip(X, y)[batch_start_index: batch_end_index]:
                y_index = classes.index(ty)
                dll, dgradient_state, dgradient_transition = log_likelihood(x,
                dgradient = self._stack_parameters(dgradient_state, dgradient_transition)
                ll += dll
                gradient += dgradient

            parameters_without_bias = numpy.array(parameter_vector)  # exclude the bias parameters from being regularized
            parameters_without_bias[0] = 0
            ll -= self.l2_regularization * numpy.dot(parameters_without_bias.T, parameters_without_bias)
            gradient = gradient.flatten() - 2.0 * self.l2_regularization * parameters_without_bias

            if batch_start_index == 0:
                function_evaluations[0] += 1
                if self._verbosity > 0 and function_evaluations[0] % self._verbosity == 0:
                    print '{:10} {:10.2f} {:10.2f}'.format(function_evaluations[0], ll, sum(abs(gradient)))
            return -ll, -gradient

        # If the stochastic gradient stepsize is defined, do 1 epoch of SGD to initialize the parameters.
        if self._sgd_stepsize:
            total_nll = 0.0
            for i in range(len(y)):
                nll, ngradient = objective_function(initial_parameter_vector, i, i + 1)
                total_nll += nll
                initial_parameter_vector -= ngradient * self._sgd_stepsize
                if self._sgd_verbosity > 0:
                    if i % self._sgd_verbosity == 0:
                        print '{:10} {:10.2f} {:10.2f}'.format(i, -total_nll / (i + 1) * len(y), sum(abs(ngradient)))

        self._optimizer_result = fmin_l_bfgs_b(objective_function, initial_parameter_vector, **self.optimizer_kwargs)
        self.state_parameters, self.transition_parameters = self._unstack_parameters(self._optimizer_result[0])
        return self
Exemplo n.º 20
def learn(data,
    if track:
        tracker = ObjectiveTracker(params,

        def obj(p, data, params):
            f_data, f_pen, g = obj_func_general_parametrisation(
                p, data, parametrisation, A, reg_func, loss, penalty, params)
            return f_data, f_pen, g
        counter = 0

        def callback(p):
            nonlocal counter
            counter += 1
            S, alpha, eps = parametrisation(torch.tensor(p), params)
            S = S.reshape(-1).cpu().numpy()
            alpha = alpha.cpu().numpy()
                '\nIteration #{}: Current sampling rate {:.1f}%, alpha {:.2e}, eps {:.2e}'
                        np.mean(S > 0) * 100, alpha.item(), eps.item()))

        def obj(p, data, params):
            f_data, f_pen, g = obj_func_general_parametrisation(
                p, data, parametrisation, A, reg_func, loss, penalty, params)
            return f_data + f_pen, g

    start_time = datetime.datetime.now()
    if 'pgtol' in params['alg_params']['LBFGSB']:
        pgtol = params['alg_params']['LBFGSB']['pgtol']
        pgtol = 1e-10
    if 'maxit' in params['alg_params']['LBFGSB']:
        maxiter = params['alg_params']['LBFGSB']['maxit']
        maxiter = 1000
    print('Learning sampling pattern:')
    p, _, info = fmin_l_bfgs_b(
        lambda p: obj(p, data, params),
        callback=tracker.callback if track else callback)
    end_time = datetime.datetime.now()
    elapsed_time = end_time - start_time
    results = {'elapsed_time': elapsed_time, 'p': p, 'info': info}
    if track:
        results['tracker'] = tracker
    return results
Exemplo n.º 21
Arquivo: smoothn.py Projeto: UCL/kaska
def smoothn(y,
    Robust spline smoothing for 1-D to n-D data.

    SMOOTHN provides a fast, automatized and robust discretized smoothing
    spline for data of any dimension.

    y : numpy array or numpy masked array
        The data to be smoothed.

    nS0 : int, optional
        The number of samples to use when estimating the smoothing parameter.
        Default value is 10.

    smoothOrder : float, optional
        The polynomial order to smooth the function to.
        Default value is 2.0.

    sd : numpy array, optional
        Weighting of the data points in standard deviation format.
        Deafult is to not weight by standard deviation.

    verbose : { True, False }, optional
        Create extra logging during operation.

    s0 : float, optional
        Initial value of the smoothing parameter.
        Defaults to no value, being instead derived from calculation.

    z0 : float, optional
        Initial estimate of the smoothed data.

    isrobust : { False, True }
        Whether the smoothing applies the robust smoothing algorithm. This
        allows the smoothing to ignore outlier data without creating large
        spikes to fit the data.

    w : numpy array, optional
        Linear wighting to apply to the data.
        Default is to assume no linear weighting.

    s : float
        Initial smoothing parameter.
        Default is to calculate a value.

    max_iter : int, optional
        The maximum number of iterations to attempt the smoothing.
        Default is 100 iterations.

    tol_z: float, optional
        Tolerance at which the smoothing will be considered converged.
        Default value is 1e-3

    weightstr : { 'bisquare', 'cauchy', 'talworth'}, optional
        The type of weighting applied to the data when performing robust smoothing.


    (z, s, exitflag)
        A tuple of the returned results.
    z : numpy array
        The smoothed data.
    s : float
        The value of the smoothing parameter used to perform this smoothing.
    exitflag : {0, -1}
        A return flag of 0 indicates successfuly execution, -1 an error
        (see the log).


    Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y
    can be any n-D noisy array (time series, images, 3D data,...). Non
    finite data (NaN or Inf) are treated as missing values.

    Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S.
    S must be a real positive scalar. The larger S is, the smoother the
    output will be. If the smoothing parameter S is omitted (see previous
    option) or empty (i.e. S = []), it is automatically determined using
    the generalized cross-validation (GCV) method.

    Z = SMOOTHN(Y,w) or Z = SMOOTHN(Y,w,S) specifies a weighting array w of
    real positive values, that must have the same size as Y. Note that a
    nil weight corresponds to a missing value.

    Robust smoothing
    Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes
    the influence of outlying data.

    [Z,S] = SMOOTHN(...) also returns the calculated value for S so that
    you can fine-tune the smoothing subsequently if needed.

    An iteration process is used in the presence of weighted and/or missing
    values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the
    termination parameters specified by OPTION_NAME and OPTION_VALUE. They
    can contain the following criteria:
        tol_z:       Termination tolerance on Z (default = 1e-3)
                    tol_z must be in ]0,1[
        max_iter:    Maximum number of iterations allowed (default = 100)
        Initial:    Initial value for the iterative process (default =
                    original data)
    Syntax: [Z,...] = SMOOTHN(...,'max_iter',500,'tol_z',1e-4,'Initial',Z0);

    [Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that
    describes the exit condition of SMOOTHN:
        1       SMOOTHN converged.
        0       Maximum number of iterations was reached.

    Class Support
    Input array can be numeric or logical. The returned array is of class

    The n-D (inverse) discrete cosine transform functions <a
    >DCTN</a> and <a
    >IDCTN</a> are required.

    To be made
    Estimate the confidence bands (see Wahba 1983, Nychka 1988).

    Garcia D, Robust smoothing of gridded data in one and higher dimensions
    with missing values. Computational Statistics & Data Analysis, 2010
    >PDF download</a>

    # 1-D example
    x = linspace(0,100,2**8);
    y = cos(x/10)+(x/50)**2 + randn(size(x))/10;
    y[[70, 75, 80]] = [5.5, 5, 6];
    z = smoothn(y); # Regular smoothing
    zr = smoothn(y,'robust'); # Robust smoothing
    subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2)
    axis square, title('Regular smoothing')
    subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2)
    axis square, title('Robust smoothing')

    # 2-D example
    xp = 0:.02:1;
    [x,y] = meshgrid(xp);
    f = exp(x+y) + sin((x-2*y)*3);
    fn = f + randn(size(f))*0.5;
    fs = smoothn(fn);
    subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square
    subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square

    # 2-D example with missing data
    n = 256;
    y0 = peaks(n);
    y = y0 + rand(size(y0))*2;
    I = randperm(n^2);
    y(I(1:n^2*0.5)) = NaN; # lose 1/2 of data
    y(40:90,140:190) = NaN; # create a hole
    z = smoothn(y); # smooth data
    subplot(2,2,1:2), imagesc(y), axis equal off
    title('Noisy corrupt data')
    subplot(223), imagesc(z), axis equal off
    title('Recovered data ...')
    subplot(224), imagesc(y0), axis equal off
    title('... compared with original data')

    # 3-D example
    [x,y,z] = meshgrid(-2:.2:2);
    xslice = [-0.8,1]; yslice = 2; zslice = [-2,0];
    vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06;
    subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic')
    title('Noisy data')
    v = smoothn(vn);
    subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic')
    title('Smoothed data')

    # Cardioid
    t = linspace(0,2*pi,1000);
    x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1;
    y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1;
    z = smoothn(complex(x,y));
    axis equal tight

    # Cellular vortical flow
    [x,y] = meshgrid(linspace(0,1,24));
    Vx = cos(2*pi*x+pi/2).*cos(2*pi*y);
    Vy = sin(2*pi*x+pi/2).*sin(2*pi*y);
    Vx = Vx + sqrt(0.05)*randn(24,24); # adding Gaussian noise
    Vy = Vy + sqrt(0.05)*randn(24,24); # adding Gaussian noise
    I = randperm(numel(Vx));
    Vx(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
    Vy(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
    Vx(I(31:60)) = NaN; # missing values
    Vy(I(31:60)) = NaN; # missing values
    Vs = smoothn(complex(Vx,Vy),'robust'); # automatic smoothing
    subplot(121), quiver(x,y,Vx,Vy,2.5), axis square
    title('Noisy velocity field')
    subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square
    title('Smoothed velocity field')


    -- Damien Garcia -- 2009/03, revised 2010/11
    Visit my <a
    >website</a> for more details about SMOOTHN

    # Check input arguments


    (y, w) = preprocessing(y, w, sd)

    sizy = y.shape

    # sort axis
    if axis is None:
        axis = tuple(np.arange(y.ndim))

    noe = y.size  # number of elements
    if noe < 2:
        return y, s, EXIT_SUCCESS, W_TOT_DEFAULT

    # ---
    # "Weighting function" criterion
    weightstr = weightstr.lower()
    # ---
    # Weights. Zero weights are assigned to not finite values (Inf or NaN),
    # (Inf/NaN values = missing data).
    is_finite = np.isfinite(y)
    nof = np.sum(is_finite)  # number of finite elements
    # ---
    # Weighted or missing data?
    isweighted = np.any(w != 1)
    # ---
    # Automatic smoothing?
    isauto = not s

    # Creation of the Lambda tensor
    lambda_ = define_lambda(y, axis)

    #  Upper and lower bound for the smoothness parameter
    s_min_bnd, s_max_bnd = smoothness_bounds(y)

    #  Initialize before iterating
    y_tensor_rank = np.sum(np.array(sizy) != 1)  # tensor rank of the y-array
    # ---
    w_tot = w
    # --- Initial conditions for z
    z = initial_z(y, z0, isweighted)
    # ---
    z0 = z
    y[~is_finite] = 0  # arbitrary values for missing y-data
    # ---
    tol = 1.0
    robust_iterative_process = True
    robust_step = 1
    nit = 0
    # --- Error on p. Smoothness parameter s = 10^p
    errp = 0.1
    # opt = optimset('TolX',errp);
    # --- Relaxation factor relaxation_factor: to speedup convergence
    relaxation_factor = 1 + 0.75 * isweighted
    # ??
    #  Main iterative process
    # ---
    xpost = init_xpost(s, s_min_bnd, s_max_bnd, isauto)

    while robust_iterative_process:
        # --- "amount" of weights (see the function GCVscore)
        aow = np.sum(w_tot) / noe  # 0 < aow <= 1
        # ---
        while tol > tol_z and nit < max_iter:
            if verbose:
                LOG.info(f"tol {tol:s} nit {nit:s}")
            nit = nit + 1
            dct_y = dctND(w_tot * (y - z) + z, f=dct)
            if isauto and not np.remainder(np.log2(nit), 1):
                # ---
                # The generalized cross-validation (GCV) method is used.
                # We seek the smoothing parameter s that minimizes the GCV
                # score i.e. s = Argmin(GCVscore).
                # Because this process is time-consuming, it is performed from
                # time to time (when nit is a power of 2)
                # ---
                # errp in here somewhere

                # bounds = [(log10(s_min_bnd),log10(s_max_bnd))]
                # args = (lambda_, aow,dct_y,is_finite,w_tot,y,nof,noe)
                # xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,
                # factr=10., approx_grad=True,bounds=,bounds\
                #   args=args)

                # if we have no clue what value of s to use, better span the
                # possible range to get a reasonable starting point ...
                # only need to do it once though. nS0 is the number of samples
                # used
                if not s0:
                    ss = np.arange(nS0) * (1.0 / (nS0 - 1.0)) * (np.log10(
                        s_max_bnd) - np.log10(s_min_bnd)) + np.log10(s_min_bnd)
                    g = np.zeros_like(ss)
                    for i, p in enumerate(ss):
                        g[i] = gcv(p, lambda_, aow, dct_y, is_finite, w_tot, y,
                                   nof, noe, smoothOrder)
                    xpost = [ss[g == g.min()]]
                    xpost = [s0]
                bounds = [(np.log10(s_min_bnd), np.log10(s_max_bnd))]
                args = (lambda_, aow, dct_y, is_finite, w_tot, y, nof, noe,
                xpost, _, _ = lbfgsb.fmin_l_bfgs_b(gcv,
            s = 10**xpost[0]
            # update the value we use for the initial s estimate
            s0 = xpost[0]

            gamma = gamma_from_lambda(lambda_, s, smoothOrder)

            z = relaxation_factor * dctND(gamma*dct_y, f=idct) +\
                (1 - relaxation_factor) * z
            # if no weighted/missing data => tol=0 (no iteration)
            tol = isweighted * norm(z0 - z) / norm(z)

            z0 = z  # re-initialization
        exitflag = nit < max_iter

        if isrobust:  # -- Robust Smoothing: iteratively re-weighted process
            # --- average leverage
            h = np.sqrt(1 + 16.0 * s)
            h = np.sqrt(1 + h) / np.sqrt(2) / h
            h = h**y_tensor_rank
            # --- take robust weights into account
            w_tot = w * robust_weights(y - z, is_finite, h, weightstr)
            # --- re-initialize for another iterative weighted process
            isweighted = True
            tol = 1
            nit = 0
            # ---
            robust_step = robust_step + 1
            # 3 robust steps are enough.
            robust_iterative_process = robust_step < 3
            robust_iterative_process = False  # stop the whole process

    #  Warning messages
    # ---
    if isauto:
        limit = ""
        if np.abs(np.log10(s) - np.log10(s_min_bnd)) < errp:
            limit = "lower"
        elif np.abs(np.log10(s) - np.log10(s_max_bnd)) < errp:
            limit = "upper"
        warning(f"smoothn:S{limit.capitalize()}Bound", [
            f"s = {s:.3f}: the {limit} bound for s has been reached. " +
            "Put s as an input variable if required."

    return z, s, exitflag, w_tot
Exemplo n.º 22

from scipy.optimize.lbfgsb import fmin_l_bfgs_b

bou = np.array([[-1, 1], [0, 1]])

def f(x):
    x_1 = x[0]
    x_2 = x[1]
    return np.sin(x_1 + x_2) * x_1

x0 = [0.2, 0.3]
x_1, f_1, d_1 = fmin_l_bfgs_b(f, x0, bounds=bou, maxfun=1500, approx_grad=True)

def f_d(x_1, x_2):
    return pdist(np.vstack([x_1, x_2]))

dis = [f_d(x[i], y) for i in range(x.shape[0])]


Exemplo n.º 23
    def optimize(self, parallel=False, parallel_verbose=0, **kwargs):
        # -- Getting parameters --
        # ------------------------
        #Upper level inputs
        mask_type = kwargs.get("mask_type", "")
        learn_mask = kwargs.get("learn_mask", True)
        learn_alpha = kwargs.get("learn_alpha", True)
        l0 = kwargs.get("l0", None)
        p0 = kwargs.get("p0", None)
        shots = False

        # -- Checking inputs --
        if mask_type in ["cartesian", "radial_CO"]:
            if l0 is None:
                raise ValueError(
                    "an initial mask parametrisation l0 must be given")
            shots = True
            if p0 is None: raise ValueError("an initial mask p0 must be given")

        t1 = time.time()
        self.niter = 0

        # -- Initializing --
        # ------------------
        print("Multithread:", parallel)

        if shots:
            n = len(l0) - 1
            self.alphas = [l0[-1]]
            n = len(p0) - 1
            self.alphas = [p0[-1]]

        self.energy_upper = [

        # -- Using L-BFGS-B --
        # --------------------
        if shots:
            #Optimize l
            lf, _, _ = fmin_l_bfgs_b(
                lambda x: E(lk=x,
                lambda x: grad_E(lk=x,
                bounds=[(0, 1)] * n + [(1e-10, np.inf)],
                callback=lambda x: self.fcall(x, mask_type))

            #Optimize p directly
            pf, _, _ = fmin_l_bfgs_b(
                lambda x: E(pk=x,
                lambda x: grad_E(pk=x,
                bounds=[(0, 1)] * n + [(1e-10, np.inf)],
                callback=lambda x: self.fcall(x, mask_type))

        # -- Returning output --
        # ----------------------
        print("\033[1m" + f"\nFINISHED IN {time.time()-t1} SECONDS\n" +
        if shots: return lf, self.energy_upper, self.alphas
        else: return pf, self.energy_upper, self.alphas
Exemplo n.º 24
def train():

    #get sentences, trees and labels
    nExamples = -1
    print "loading data.."
    rnnData = RNNDataCorpus()
    rnnData.load_data(load_file=config.train_data, nExamples=nExamples)

    #initialize params
    print "initializing params"
    params = Params(data=rnnData, wordSize=50, rankWo=2)

    #define theta
    #one vector for all the parameters of mvrnn model:  W, Wm, Wlabel, L, Lm
    n = params.wordSize
    fanIn = params.fanIn
    nWords = params.nWords
    nLabels = params.categories
    rank = params.rankWo
    Wo = 0.01 * np.random.randn(n + 2 * n * rank, nWords)  #Lm, as in paper
    Wo[:n, :] = np.ones((n, Wo.shape[1]))  #Lm, as in paper
    Wcat = 0.005 * np.random.randn(nLabels, fanIn)  #Wlabel, as in paper
    #    Wv = 0.01*np.random.randn(n, nWords)
    #    WO = 0.01*np.random.randn(n, 2*n)
    #    W = 0.01*np.random.randn(n, 2*n+1)

    #load pre-trained weights here
    mats = sio.loadmat(config.pre_trained_weights)
    Wv = mats.get('Wv')  #L, as in paper
    W = mats.get('W')  #W, as in paper
    WO = mats.get('WO')  #Wm, as in paper

    sentencesIdx = np.arange(rnnData.ndoc())
    nTrain = 4 * len(sentencesIdx) / 5
    trainSentIdx = sentencesIdx[0:nTrain]
    testSentIdx = sentencesIdx[nTrain:]
    batchSize = 5
    nBatches = len(trainSentIdx) / batchSize
    evalFreq = 5  #evaluate after every 5 minibatches
    nTestSentEval = 50  #number of test sentences to be evaluated

    rnnData_train = RNNDataCorpus()
    rnnData.copy_into_minibatch(rnnData_train, trainSentIdx)

    rnnData_test = RNNDataCorpus()
    if (len(testSentIdx) > nTestSentEval):
        #        np.random.shuffle(testSentIdx)  #choose random test examples
        thisTestSentIdx = testSentIdx[:nTestSentEval]
        thisTestSentIdx = testSentIdx
    rnnData.copy_into_minibatch(rnnData_test, thisTestSentIdx)

    #    [Wv_test, Wo_test, _] = getRelevantWords(rnnData_test, Wv,Wo,params)
    [Wv_trainTest, Wo_trainTest, all_train_idx
     ] = getRelevantWords(rnnData, Wv, Wo,
                          params)  #sets nWords_reduced, returns new arrays
    theta = np.concatenate((W.flatten(), WO.flatten(), Wcat.flatten(),
                            Wv_trainTest.flatten(), Wo_trainTest.flatten()))

    print "starting training..."
    nIter = 100
    rnnData_minibatch = RNNDataCorpus()
    for i in range(nIter):
        #train in minibatches
        #        ftrain = np.zeros(nBatches)
        #        for ibatch in range(nBatches):
        #            set_minibatch(rnnData, rnnData_minibatch, ibatch, nBatches, trainSentIdx)

        #            print 'Iteration: ', i, ' minibatch: ', ibatch
        tunedTheta, fbatch_train, _ = lbfgsb.fmin_l_bfgs_b(
            args=(rnnData_train, params),

        #map parameters back
        W[:, :], WO[:, :], Wcat[:, :], Wv_trainTest, Wo_trainTest = unroll_theta(
            tunedTheta, params)
        Wv[:, all_train_idx] = Wv_trainTest
        Wo[:, all_train_idx] = Wo_trainTest

        #        ftrain[ibatch] = fbatch_train
        theta = tunedTheta  #for next iteration

        print "========================================"
        print "XXXXXXIteration ", i,
        print "Average cost: ", np.average(fbatch_train)
        evaluate(Wv, Wo, W, WO, Wcat, params, rnnData_test)
        print "========================================"

        #save weights
        save_dict = {'Wv': Wv, 'Wo': Wo, 'Wcat': Wcat, 'W': W, 'WO': WO}
        sio.savemat(config.saved_params_file + '_lbfgs_iter' + str(i),
        print "saved tuned theta. "
Exemplo n.º 25
    def fit(self, X, y):
        """Fit the model according to the given training data.

        X : List of list of ints. Each list of ints represent a training example. Each int in that list
            is the index of a one-hot encoded feature.

        y : array-like, shape (n_samples,)
            Target vector relative to X.

        self : object
            Returns self.
        classes = list(set(y))
        num_classes = len(classes)
        self.classes_ = classes
        if self.transitions is None:
            self.transitions = self._create_default_transitions(
                num_classes, self.num_states)

        # Initialise the parameters
        _, num_features = X[0].shape
        num_transitions, _ = self.transitions.shape
        if self.state_parameters is None:
            self.state_parameters = numpy.random.standard_normal(
                (num_features, self.num_states,
                 num_classes)) * self.state_parameter_noise
        if self.transition_parameters is None:
            self.transition_parameters = numpy.random.standard_normal(
                (num_transitions)) * self.transition_parameter_noise

        initial_parameter_vector = self._stack_parameters(
            self.state_parameters, self.transition_parameters)
        function_evaluations = [0]

        def objective_function(parameter_vector,
            ll = 0.0
            gradient = numpy.zeros_like(parameter_vector)
            state_parameters, transition_parameters = self._unstack_parameters(
            for x, ty in zip(X, y)[batch_start_index:batch_end_index]:
                y_index = classes.index(ty)
                dll, dgradient_state, dgradient_transition = log_likelihood(
                    x, y_index, state_parameters, transition_parameters,
                dgradient = self._stack_parameters(dgradient_state,
                ll += dll
                gradient += dgradient

            parameters_without_bias = numpy.array(
            )  # exclude the bias parameters from being regularized
            parameters_without_bias[0] = 0
            ll -= self.l2_regularization * numpy.dot(parameters_without_bias.T,
            gradient = gradient.flatten(
            ) - 2.0 * self.l2_regularization * parameters_without_bias

            if batch_start_index == 0:
                function_evaluations[0] += 1
                if self._verbosity > 0 and function_evaluations[
                        0] % self._verbosity == 0:
                    print '{:10} {:10.2f} {:10.2f}'.format(
                        function_evaluations[0], ll, sum(abs(gradient)))
            return -ll, -gradient

        # If the stochastic gradient stepsize is defined, do 1 epoch of SGD to initialize the parameters.
        if self._sgd_stepsize:
            total_nll = 0.0
            for i in range(len(y)):
                nll, ngradient = objective_function(initial_parameter_vector,
                                                    i, i + 1)
                total_nll += nll
                initial_parameter_vector -= ngradient * self._sgd_stepsize
                if self._sgd_verbosity > 0:
                    if i % self._sgd_verbosity == 0:
                        print '{:10} {:10.2f} {:10.2f}'.format(
                            i, -total_nll / (i + 1) * len(y),

        self._optimizer_result = fmin_l_bfgs_b(objective_function,
        self.state_parameters, self.transition_parameters = self._unstack_parameters(
        return self
Exemplo n.º 26
    def learn(self, X, y):
        Learn the model from the given data.

        :param X: the attribute data
        :type X: numpy.array

        :param y: the class variable data
        :type y: numpy.array

        def rand(eps):
            """Return random number in interval [-eps, eps]."""
            return rnd.random() * 2 * eps - eps

        def g_func(z):
            """The sigmoid (logistic) function."""
            return 1. / (1. + np.exp(-z))

        def h_func(thetas, x):
            """The model function."""
            a = np.array([[1.] + list(x)]).T # Initialize a
            for l in range(1, len(thetas) + 1): # Forward propagation
                a = np.vstack((np.array([[1.]]), g_func(thetas[l - 1].T.dot(a))))
            return a[1:]

        def llog(val):
            "The limited logarithm."
            e = 1e-10
            return np.log(np.clip(val, e, 1. - e))
        def unroll(thetas):
            """Unrolls a list of thetas into vector."""
            sd = [m.shape for m in thetas] # Keep the shape data
            thetas = np.concatenate([theta.reshape(np.prod(theta.shape))for theta in thetas])
            return thetas, sd

        def roll(thetas, sd):
            """Rolls a vector of thetas back into list."""
            thetas = np.split(thetas, [sum(np.prod(s) for s in sd[:i]) + np.prod(sd[i]) for i in range(len(sd) - 1)])
            return [np.reshape(theta, sd[i]) for i, theta in enumerate(thetas)]

        def cost(thetas, X, y, sd, S, lambda_):
            """The cost function of the neural network."""
            thetas = roll(thetas, sd)
            m, _ = X.shape
            L = len(S)
            reg_factor = (lambda_ / float(2 * m)) * sum(sum(sum(thetas[l][1:, :]**2)) for l in range(L - 1))
            cost = (-1. / float(m)) * sum(sum(self._classes_map[y[s]] * llog(h_func(thetas, X[s])) + (1. - self._classes_map[y[s]]) * llog(1. - h_func(thetas, X[s]))) for s in range(m)) + reg_factor
            if self._verbose: print "Current value of cost func.: " + str(cost[0])
            return cost[0]
        def grad(thetas, X, y, sd, S, lambda_):
            """The gradient (derivate) function which includes the back
            propagation algorithm."""
            thetas = roll(thetas, sd)
            m, _ = X.shape
            L = len(S)
            d = [np.array([[0. for j in range(S[l + 1])] for i in range(S[l] + 1)]) for l in range(L - 1)] # Initialize the delta matrix
            for s in range(m):
                a = [np.array([[1.] + list(X[s])]).T] # Initialize a (only a, d & theta matrices have 1 more element in columns, biases)

                for l in range(1, L): # Forward propagation
                    a.append(np.vstack((np.array([[1.]]), g_func(thetas[l - 1].T.dot(a[l - 1])))))
                # TODO
                # Softmax: treat last a column differently
                #ez = np.exp(thetas[L - 2].T.dot(a[L - 2]))
                #sez = sum(ez)
                #a.append(np.vstack((np.array([[1]]), ez / sez)))
                deltas = [None for l in range(L - 1)] + [a[-1][1:] - self._classes_map[y[s]]]
                for l in range(L - 2, 0, -1): # Backward propagation
                    deltas[l] = (thetas[l].dot(deltas[l + 1]) * (a[l] * (1. - a[l])))[1:]

                for l in range(L - 1):
                    d[l] = d[l] + a[l].dot(deltas[l + 1].T)
            D = [(1. / float(m)) * d[l] + lambda_ * thetas[l] for l in range(L - 1)]
            D = [Di - lambda_ * np.vstack((thetas[l][0], np.zeros((Di.shape[0] - 1, Di.shape[1])))) for l, Di in enumerate(D)] # Where i = 0, don't use regularization
            D, _ = unroll(D)
            return D

        def gradApprox(thetas, X, y, sd, S, lambda_):
            """Approximate the gradient of the cost function
            (only used for debugging, not in final version)."""
            eps = 1e-14
            return (grad(thetas + eps, X, y, sd, S, lambda_) - grad(thetas - eps, X, y, sd, S, lambda_)) / float(2 * eps)
        # Set the random seed
        # Initialize the final layer of neural net (outputs)
        self._classes = list(set(y))
        for i, cl in enumerate(self._classes):
            self._classes_map[cl] = np.zeros((len(self._classes), 1))
            self._classes_map[cl][i] = 1.
        S = [len(X[0])] + self._hl + [len(self._classes)] # Complete information about levels
        L = len(S)
        thetas0 = [np.array([[rand(self._eps) for j in range(S[l + 1])] for i in range(S[l] + 1)]) for l in range(L - 1)] # Initialize the thetas matrix
        thetas0, sd = unroll(thetas0)
        #return grad(thetas0, X, y, sd, S, self._lambda), gradApprox(thetas0, X, y, sd, S, self._lambda) # For testing

        # The L-BFGS-B bounds parameter is redefined: input is (lower_bound, upper_bound) instead of array of bounds for each theta parameter
        if self._opt_args != None and "bounds" in self._opt_args:
            bounds = [self._opt_args["bounds"] for i in range(len(thetas0))]
            self._opt_args["bounds"] = bounds
        self._thetas, self._cost, _ = scp.fmin_l_bfgs_b(cost, thetas0, grad, args = (X, y, sd, S, self._lambda), **self._opt_args)
        self._thetas = roll(self._thetas, sd)
        self._cost = float(self._cost)
        self._can_classify = True
Exemplo n.º 27
def binary_debug(svm, data,
        bfgs_factr=1e11,  # 1e7 for moderate tolerance, 1e12 for low
    n_features, = svm.weights.shape
    X, y = data

    assert set(y) == set([-1, 1])
    _X = theano.shared(X.astype(dtype), allow_downcast=True, borrow=True)
    _yvecs = theano.shared(y.astype(dtype), allow_downcast=True, borrow=True)

    sgd_params = tensor.vector(dtype=dtype)

    sgd_weights = sgd_params[:n_features]
    sgd_bias = sgd_params[n_features]

    margin = _yvecs * (tensor.dot(_X, sgd_weights)
            #+ sgd_bias

    if cost_fn == 'L2Half':
        losses = tensor.maximum(0, 1 - margin) ** 2
    elif cost_fn == 'L2Huber':
        # "Huber-ized" L2-SVM
        losses = tensor.switch(
                margin > -1,
                # -- smooth part
                tensor.maximum(0, 1 - margin) ** 2,
                # -- straight part
                -4 * margin)
    elif cost_fn == 'Hinge':
        losses = tensor.maximum(0, 1 - margin)
        raise ValueError('invalid cost-fn', cost_fn)

    l2_cost = .5 * l2_regularization * tensor.dot(
            sgd_weights, sgd_weights)

    cost = losses.mean() + l2_cost  + sgd_bias ** 2
    dcost_dparams = tensor.grad(cost, sgd_params)

    _f_df = theano.function([sgd_params], [cost, dcost_dparams])

    def flatten_svm(obj):
        # Note this is different from multi-class case because bias is scalar
        return np.concatenate([obj.weights.flatten(), [obj.bias]])

    def f(p):
        c, d = _f_df(p.astype(dtype))
        return c.astype('float64'), d.astype('float64')

    params = np.zeros(n_features + 1)
    params[:n_features] = svm.weights
    params[n_features] = svm.bias

    best, bestval, info_dct = fmin_l_bfgs_b(f,
    best_svm = copy.deepcopy(svm)
    best_svm.weights = np.array(best[:n_features], dtype=dtype)
    best_svm.bias = float(best[n_features])

    # why ???
    _X.set_value(np.ones((2, 2), dtype=dtype))
    _yvecs.set_value(np.ones(2, dtype=dtype))
    return best_svm
Exemplo n.º 28
import openbabel
import numpy

from scipy.optimize.lbfgsb import fmin_l_bfgs_b

def f(x):
    return x[0]**2 + x[1]**2

def g(x):
    return numpy.array([2*x[0], 2*x[1]])

x0 = numpy.array([3,1])

opt, energy, dict = fmin_l_bfgs_b(f, x0, fprime=g)

print opt
Exemplo n.º 29
 def optimise_lbfgs(self, start):
     print "***** LBFGS OPTIMISATION  *****"
     x, f, d = fmin_l_bfgs_b(self.objective, start, fprime=self.grad, pgtol=1e-09, iprint=0)
     return x
Exemplo n.º 30
def smoothn(y,nS0=10,axis=None,smoothOrder=2.0,sd=None,verbose=False,\
   function [z,s,exitflag,Wtot] = smoothn(varargin)

   SMOOTHN Robust spline smoothing for 1-D to N-D data.
   SMOOTHN provides a fast, automatized and robust discretized smoothing
   spline for data of any dimension.

   Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y
   can be any N-D noisy array (time series, images, 3D data,...). Non
   finite data (NaN or Inf) are treated as missing values.

   Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S.
   S must be a real positive scalar. The larger S is, the smoother the
   output will be. If the smoothing parameter S is omitted (see previous
   option) or empty (i.e. S = []), it is automatically determined using
   the generalized cross-validation (GCV) method.

   Z = SMOOTHN(Y,W) or Z = SMOOTHN(Y,W,S) specifies a weighting array W of
   real positive values, that must have the same size as Y. Note that a
   nil weight corresponds to a missing value.

   Robust smoothing
   Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes
   the influence of outlying data.

   [Z,S] = SMOOTHN(...) also returns the calculated value for S so that
   you can fine-tune the smoothing subsequently if needed.

   An iteration process is used in the presence of weighted and/or missing
   values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the
   termination parameters specified by OPTION_NAME and OPTION_VALUE. They
   can contain the following criteria:
       TolZ:       Termination tolerance on Z (default = 1e-3)
                   TolZ must be in ]0,1[
       MaxIter:    Maximum number of iterations allowed (default = 100)
       Initial:    Initial value for the iterative process (default =
                   original data)
   Syntax: [Z,...] = SMOOTHN(...,'MaxIter',500,'TolZ',1e-4,'Initial',Z0);

   [Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that
   describes the exit condition of SMOOTHN:
       1       SMOOTHN converged.
       0       Maximum number of iterations was reached.

   Class Support
   Input array can be numeric or logical. The returned array is of class

   The N-D (inverse) discrete cosine transform functions <a
   >DCTN</a> and <a
   >IDCTN</a> are required.

   To be made
   Estimate the confidence bands (see Wahba 1983, Nychka 1988).

   Garcia D, Robust smoothing of gridded data in one and higher dimensions
   with missing values. Computational Statistics & Data Analysis, 2010. 
   href="matlab:web('http://www.biomecardio.com/pageshtm/publi/csda10.pdf')">PDF download</a>

   # 1-D example
   x = linspace(0,100,2**8);
   y = cos(x/10)+(x/50)**2 + randn(size(x))/10;
   y[[70, 75, 80]] = [5.5, 5, 6];
   z = smoothn(y); # Regular smoothing
   zr = smoothn(y,'robust'); # Robust smoothing
   subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2)
   axis square, title('Regular smoothing')
   subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2)
   axis square, title('Robust smoothing')

   # 2-D example
   xp = 0:.02:1;
   [x,y] = meshgrid(xp);
   f = exp(x+y) + sin((x-2*y)*3);
   fn = f + randn(size(f))*0.5;
   fs = smoothn(fn);
   subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square
   subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square

   # 2-D example with missing data
   n = 256;
   y0 = peaks(n);
   y = y0 + rand(size(y0))*2;
   I = randperm(n^2);
   y(I(1:n^2*0.5)) = NaN; # lose 1/2 of data
   y(40:90,140:190) = NaN; # create a hole
   z = smoothn(y); # smooth data
   subplot(2,2,1:2), imagesc(y), axis equal off
   title('Noisy corrupt data')
   subplot(223), imagesc(z), axis equal off
   title('Recovered data ...')
   subplot(224), imagesc(y0), axis equal off
   title('... compared with original data')

   # 3-D example
   [x,y,z] = meshgrid(-2:.2:2);
   xslice = [-0.8,1]; yslice = 2; zslice = [-2,0];
   vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06;
   subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic')
   title('Noisy data')
   v = smoothn(vn);
   subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic')
   title('Smoothed data')

   # Cardioid
   t = linspace(0,2*pi,1000);
   x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1;
   y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1;
   z = smoothn(complex(x,y));
   axis equal tight

   # Cellular vortical flow
   [x,y] = meshgrid(linspace(0,1,24));
   Vx = cos(2*pi*x+pi/2).*cos(2*pi*y);
   Vy = sin(2*pi*x+pi/2).*sin(2*pi*y);
   Vx = Vx + sqrt(0.05)*randn(24,24); # adding Gaussian noise
   Vy = Vy + sqrt(0.05)*randn(24,24); # adding Gaussian noise
   I = randperm(numel(Vx));
   Vx(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
   Vy(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
   Vx(I(31:60)) = NaN; # missing values
   Vy(I(31:60)) = NaN; # missing values
   Vs = smoothn(complex(Vx,Vy),'robust'); # automatic smoothing
   subplot(121), quiver(x,y,Vx,Vy,2.5), axis square
   title('Noisy velocity field')
   subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square
   title('Smoothed velocity field')


   -- Damien Garcia -- 2009/03, revised 2010/11
   Visit my <a
   href="matlab:web('http://www.biomecardio.com/matlab/smoothn.html')">website</a> for more details about SMOOTHN 

  # Check input arguments

  if type(y) == ma.core.MaskedArray:  # masked array
    is_masked = True
    mask = y.mask
    y = np.array(y)
    y[mask] = 0.
    if W != None:
      W  = np.array(W)
      W[mask] = 0.
    if sd != None:
      W = np.array(1./sd**2)
      W[mask] = 0.
      sd = None
    y[mask] = np.nan
  if sd != None:
    sd_ = np.array(sd)
    mask = (sd > 0.)
    W = np.zeros_like(sd_)
    W[mask] = 1./sd_[mask]**2
    sd = None

  if W != None:
    W = W/W.max()

  sizy = y.shape;

  # sort axis
  if axis == None:
    axis = tuple(np.arange(y.ndim))

  noe = y.size # number of elements
  if noe<2:
    z = y
    exitflag = 0;Wtot=0
    return z,s,exitflag,Wtot
  # Smoothness parameter and weights
  #if s != None:
  #  s = []
  if W == None:
    W = ones(sizy);

  #if z0 == None:
  #  z0 = y.copy()

  # "Weighting function" criterion
  weightstr = weightstr.lower()
  # Weights. Zero weights are assigned to not finite values (Inf or NaN),
  # (Inf/NaN values = missing data).
  IsFinite = np.array(isfinite(y)).astype(bool);
  nof = IsFinite.sum() # number of finite elements
  W = W*IsFinite;
  if any(W<0):
        'Weights must all be >=0')
      #W = W/np.max(W)
  # Weighted or missing data?
  isweighted = any(W != 1);
  # Robust smoothing?
  # Automatic smoothing?
  isauto = not s;
  # DCTN and IDCTN are required
    from scipy.fftpack.realtransforms import dct,idct
    z = y
    exitflag = -1;Wtot=0
    return z,s,exitflag,Wtot

  ## Creation of the Lambda tensor
  # Lambda contains the eingenvalues of the difference matrix used in this
  # penalized least squares process.
  axis = tuple(np.array(axis).flatten())
  d =  y.ndim;
  Lambda = zeros(sizy);
  for i in axis:
    # create a 1 x d array (so e.g. [1,1] for a 2D case
    siz0 = ones((1,y.ndim))[0];
    siz0[i] = sizy[i];
    # cos(pi*(reshape(1:sizy(i),siz0)-1)/sizy(i)))
    # (arange(1,sizy[i]+1).reshape(siz0) - 1.)/sizy[i]
    Lambda = Lambda + (cos(pi*(arange(1,sizy[i]+1) - 1.)/sizy[i]).reshape(siz0))
    #  Lambda = Lambda + siz0
  Lambda = -2.*(len(axis)-Lambda);
  if not isauto:
    Gamma = 1./(1+(s*abs(Lambda))**smoothOrder);

  ## Upper and lower bound for the smoothness parameter
  # The average leverage (h) is by definition in [0 1]. Weak smoothing occurs
  # if h is close to 1, while over-smoothing appears when h is near 0. Upper
  # and lower bounds for h are given to avoid under- or over-smoothing. See
  # equation relating h to the smoothness parameter (Equation #12 in the
  # referenced CSDA paper).
  N = sum(array(sizy) != 1); # tensor rank of the y-array
  hMin = 1e-6; hMax = 0.99;
  # (h/n)**2 = (1 + a)/( 2 a)
  # a = 1/(2 (h/n)**2 -1) 
  # where a = sqrt(1 + 16 s)
  # (a**2 -1)/16
    sMinBnd = np.sqrt((((1+sqrt(1+8*hMax**(2./N)))/4./hMax**(2./N))**2-1)/16.);
    sMaxBnd = np.sqrt((((1+sqrt(1+8*hMin**(2./N)))/4./hMin**(2./N))**2-1)/16.);
    sMinBnd = None
    sMaxBnd = None
  ## Initialize before iterating
  Wtot = W;
  #--- Initial conditions for z
  if isweighted:
    #--- With weighted/missing data
    # An initial guess is provided to ensure faster convergence. For that
    # purpose, a nearest neighbor interpolation followed by a coarse
    # smoothing are performed.
    if z0 != None: # an initial guess (z0) has been provided
        z = z0;
        z = y #InitialGuess(y,IsFinite);
        z[~IsFinite] = 0.
    z = zeros(sizy);
  z0 = z;
  y[~IsFinite] = 0; # arbitrary values for missing y-data
  tol = 1.;
  RobustIterativeProcess = True;
  RobustStep = 1;
  nit = 0;
  #--- Error on p. Smoothness parameter s = 10^p
  errp = 0.1;
  #opt = optimset('TolX',errp);
  #--- Relaxation factor RF: to speedup convergence
  RF = 1 + 0.75*isweighted;
  # ??
  ## Main iterative process
  if isauto:
      xpost = array([(0.9*log10(sMinBnd) + log10(sMaxBnd)*0.1)])
    xpost = array([log10(s)])
  while RobustIterativeProcess:
    #--- "amount" of weights (see the function GCVscore)
    aow = sum(Wtot)/noe; # 0 < aow <= 1
    while tol>TolZ and nit<MaxIter:
        if verbose:
          print 'tol',tol,'nit',nit
        nit = nit+1;
        DCTy = dctND(Wtot*(y-z)+z,f=dct,axis=axis);
        if isauto and not remainder(log2(nit),1):
            # The generalized cross-validation (GCV) method is used.
            # We seek the smoothing parameter s that minimizes the GCV
            # score i.e. s = Argmin(GCVscore).
            # Because this process is time-consuming, it is performed from
            # time to time (when nit is a power of 2)
            # errp in here somewhere
            #xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\
            #   approx_grad=True,bounds=[(log10(sMinBnd),log10(sMaxBnd))],\
            #   args=(Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe))

            # if we have no clue what value of s to use, better span the
            # possible range to get a reasonable starting point ...
            # only need to do it once though. nS0 is teh number of samples used
            if not s0:
              ss = np.arange(nS0)*(1./(nS0-1.))*(log10(sMaxBnd)-log10(sMinBnd))+ log10(sMinBnd)
              g = np.zeros_like(ss)
              for i,p in enumerate(ss):
                g[i] = gcv(p,Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe,smoothOrder,axis)
                #print 10**p,g[i]
              xpost = [np.median(ss[g==g.min()])]
              #print '==============='
              #print nit,tol,g.min(),xpost[0],s
              #print '==============='
              xpost = [s0]
            xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\
        s = 10**xpost[0];
        # update the value we use for the initial s estimate
        s0 = xpost[0]

        Gamma = 1./(1+(s*abs(Lambda))**smoothOrder);

        z = RF*dctND(Gamma*DCTy,f=idct,axis=axis) + (1-RF)*z;
        # if no weighted/missing data => tol=0 (no iteration)
        tol = isweighted*norm(z0-z)/norm(z);
        z0 = z; # re-initialization
    exitflag = nit<MaxIter;

    if isrobust: #-- Robust Smoothing: iteratively re-weighted process
        #--- average leverage
        h = sqrt(1+16.*s); 
        h = sqrt(1+h)/sqrt(2)/h; 
        h = h**N;
        #--- take robust weights into account
        Wtot = W*RobustWeights(y-z,IsFinite,h,weightstr);
        #--- re-initialize for another iterative weighted process
        isweighted = True; tol = 1; nit = 0; 
        RobustStep = RobustStep+1;
        RobustIterativeProcess = RobustStep<3; # 3 robust steps are enough.
        RobustIterativeProcess = False; # stop the whole process

  ## Warning messages
  if isauto:
    if abs(log10(s)-log10(sMinBnd))<errp:
            ['s = %.3f '%(s) + ': the lower bound for s '\
            + 'has been reached. Put s as an input variable if required.'])
    elif abs(log10(s)-log10(sMaxBnd))<errp:
            ['s = %.3f '%(s) + ': the upper bound for s '\
            + 'has been reached. Put s as an input variable if required.'])
    #    ['Maximum number of iterations (%d'%(MaxIter) + ') has '\
    #    + 'been exceeded. Increase MaxIter option or decrease TolZ value.'])
  return z,s,exitflag,Wtot
Exemplo n.º 31
 def optimise_lbfgs(self,start):
   print "***** LBFGS OPTIMISATION  *****"
   x,f,d = fmin_l_bfgs_b(self.objective, start, fprime=self.grad, pgtol=1e-09, iprint=0)
   return x
Exemplo n.º 32
 def minimize_lbfgs(self, parameters, x, y, sigma, emp_counts, classes_idx, nr_x, nr_f, nr_c):
     parameters2 = parameters.reshape([nr_f*nr_c], order="F")
     result, _, d = opt2.fmin_l_bfgs_b(self.get_objective, parameters2, args=[x, y, sigma, emp_counts, classes_idx, nr_x, nr_f, nr_c])
     return result.reshape([nr_f, nr_c], order="F")
Exemplo n.º 33
def train():
    #get sentences, trees and labels
    nExamples = -1
    print "loading data.."
    rnnData = RNNDataCorpus()
    rnnData.load_data(load_file=config.train_data, nExamples=nExamples)  
    #initialize params
    print "initializing params"
    params = Params(data=rnnData, wordSize=50, rankWo=2)

    #define theta
    #one vector for all the parameters of mvrnn model:  W, Wm, Wlabel, L, Lm
    n = params.wordSize; fanIn = params.fanIn; nWords = params.nWords; nLabels = params.categories; rank=params.rankWo
    Wo = 0.01*np.random.randn(n + 2*n*rank, nWords) #Lm, as in paper
    Wo[:n,:] = np.ones((n,Wo.shape[1])) #Lm, as in paper
    Wcat = 0.005*np.random.randn(nLabels, fanIn) #Wlabel, as in paper
#    Wv = 0.01*np.random.randn(n, nWords)
#    WO = 0.01*np.random.randn(n, 2*n)
#    W = 0.01*np.random.randn(n, 2*n+1)
    #load pre-trained weights here
    mats = sio.loadmat(config.pre_trained_weights)
    Wv = mats.get('Wv')  #L, as in paper
    W = mats.get('W') #W, as in paper
    WO = mats.get('WO') #Wm, as in paper
    sentencesIdx = np.arange(rnnData.ndoc())
    nTrain = 4*len(sentencesIdx)/5
    trainSentIdx = sentencesIdx[0:nTrain]
    testSentIdx = sentencesIdx[nTrain:]
    batchSize = 5 
    nBatches = len(trainSentIdx)/batchSize
    evalFreq = 5  #evaluate after every 5 minibatches
    nTestSentEval = 50 #number of test sentences to be evaluated
    rnnData_train = RNNDataCorpus()
    rnnData.copy_into_minibatch(rnnData_train, trainSentIdx)
    rnnData_test = RNNDataCorpus()
    if(len(testSentIdx) > nTestSentEval):
#        np.random.shuffle(testSentIdx)  #choose random test examples
        thisTestSentIdx = testSentIdx[:nTestSentEval]
        thisTestSentIdx = testSentIdx
    rnnData.copy_into_minibatch(rnnData_test, thisTestSentIdx)
#    [Wv_test, Wo_test, _] = getRelevantWords(rnnData_test, Wv,Wo,params) 
    [Wv_trainTest, Wo_trainTest, all_train_idx] = getRelevantWords(rnnData, Wv,Wo,params) #sets nWords_reduced, returns new arrays    
    theta = np.concatenate((W.flatten(), WO.flatten(), Wcat.flatten(), Wv_trainTest.flatten(), Wo_trainTest.flatten()))
    print "starting training..."
    nIter = 100
    rnnData_minibatch = RNNDataCorpus()
    for i in range(nIter):        
        #train in minibatches
#        ftrain = np.zeros(nBatches)
#        for ibatch in range(nBatches):            
#            set_minibatch(rnnData, rnnData_minibatch, ibatch, nBatches, trainSentIdx)
#            print 'Iteration: ', i, ' minibatch: ', ibatch
        tunedTheta, fbatch_train, _ = lbfgsb.fmin_l_bfgs_b(func=costFn, x0=theta, fprime=None, args=(rnnData_train, params), approx_grad=0, bounds=None, m=5,
                                        factr=1000000000000000.0, pgtol=1.0000000000000001e-5, epsilon=1e-08,
                                        iprint=3, maxfun=1, disp=0)
        #map parameters back
        W[:,:], WO[:,:], Wcat[:,:], Wv_trainTest, Wo_trainTest = unroll_theta(tunedTheta, params)
        Wv[:,all_train_idx] = Wv_trainTest
        Wo[:,all_train_idx] = Wo_trainTest
#        ftrain[ibatch] = fbatch_train  
        theta = tunedTheta  #for next iteration         
        print "========================================"
        print "XXXXXXIteration ", i, 
        print "Average cost: ", np.average(fbatch_train)
        evaluate(Wv,Wo,W,WO,Wcat,params, rnnData_test)
        print "========================================"                  
        #save weights
        save_dict = {'Wv':Wv, 'Wo':Wo, 'Wcat':Wcat, 'W':W, 'WO':WO}
        sio.savemat(config.saved_params_file+'_lbfgs_iter'+str(i), mdict=save_dict)
        print "saved tuned theta. "
Exemplo n.º 34
def BlockedTheanoOVA(svm, data,
        GPU_blocksize=1000 * (1024 ** 2), # bytes
    n_features, n_classes  = svm.weights.shape

    _X = theano.shared(np.ones((2, 2), dtype=dtype),
    _yvecs = theano.shared(np.ones((2, 2), dtype=dtype),

    sgd_params = tensor.vector(dtype=dtype)

    flat_sgd_weights = sgd_params[:n_features * n_classes]
    sgd_weights = flat_sgd_weights.reshape((n_features, n_classes))
    sgd_bias = sgd_params[n_features * n_classes:]

    margin = _yvecs * (tensor.dot(_X, sgd_weights) + sgd_bias)
    losses = tensor.maximum(0, 1 - margin) ** 2
    l2_cost = .5 * l2_regularization * tensor.dot(
            flat_sgd_weights, flat_sgd_weights)

    cost = losses.mean(axis=0).sum() + l2_cost
    dcost_dparams = tensor.grad(cost, sgd_params)

    _f_df = theano.function([sgd_params], [cost, dcost_dparams])

    assert dtype == 'float32'
    sizeof_dtype = 4
    X, y = data
    yvecs = np.asarray(
            (y[:, None] == np.arange(n_classes)) * 2 - 1,

    X_blocks = np.ceil(X.size * sizeof_dtype / float(GPU_blocksize))

    examples_per_block = len(X) // X_blocks

    if verbose:
        print 'dividing into', X_blocks, 'blocks of', examples_per_block

    # -- create a dummy class because a nested function cannot modify
    #    params_mean in enclosing scope
    class Dummy(object):
        def __init__(self, collect_estimates):
            params = np.zeros(n_features * n_classes + n_classes)
            params[:n_features * n_classes] = svm.weights.flatten()
            params[n_features * n_classes:] = svm.bias

            self.params = params
            self.params_mean = params.copy().astype('float64')
            self.params_mean_i = 0
            self.collect_estimates = collect_estimates

        def update_mean(self, p):
            self.params_mean_i += 1
            alpha = 1.0 / self.params_mean_i
            self.params_mean *= 1 - alpha
            self.params_mean += alpha * p

        def __call__(self, p):
            if self.collect_estimates:
            c, d = _f_df(p.astype(dtype))
            return c.astype('float64'), d.astype('float64')
    dummy = Dummy(X_blocks > 2)

    i = 0
    while i + examples_per_block <= len(X):
        if verbose:
            print 'training on examples', i, 'to', i + examples_per_block
                X[i:i + examples_per_block],
                yvecs[i:i + examples_per_block],

        best, bestval, info_dct  = fmin_l_bfgs_b(dummy,
                iprint=1 if verbose else -1,
                factr=1e11,  # -- 1e12 for low acc, 1e7 for moderate

        i += examples_per_block

    params = dummy.params_mean

    rval = classifier_from_weights(
            weights=params[:n_classes * n_features].reshape(
                (n_features, n_classes)),
            bias=params[n_classes * n_features:])

    return rval
Exemplo n.º 35
    def learn(self, X, y):
        Learn the model from the given data.

        :param X: the attribute data
        :type X: numpy.array

        :param y: the class variable data
        :type y: numpy.array

        def rand(eps):
            """Return random number in interval [-eps, eps]."""
            return rnd.random() * 2 * eps - eps

        def g_func(z):
            """The sigmoid (logistic) function."""
            return 1. / (1. + np.exp(-z))

        def h_func(thetas, x):
            """The model function."""
            a = np.array([[1.] + list(x)]).T  # Initialize a
            for l in range(1, len(thetas) + 1):  # Forward propagation
                a = np.vstack(
                    (np.array([[1.]]), g_func(thetas[l - 1].T.dot(a))))
            return a[1:]

        def llog(val):
            "The limited logarithm."
            e = 1e-10
            return np.log(np.clip(val, e, 1. - e))

        def unroll(thetas):
            """Unrolls a list of thetas into vector."""
            sd = [m.shape for m in thetas]  # Keep the shape data
            thetas = np.concatenate(
                [theta.reshape(np.prod(theta.shape)) for theta in thetas])
            return thetas, sd

        def roll(thetas, sd):
            """Rolls a vector of thetas back into list."""
            thetas = np.split(thetas, [
                sum(np.prod(s) for s in sd[:i]) + np.prod(sd[i])
                for i in range(len(sd) - 1)
            return [np.reshape(theta, sd[i]) for i, theta in enumerate(thetas)]

        def cost(thetas, X, y, sd, S, lambda_):
            """The cost function of the neural network."""
            thetas = roll(thetas, sd)
            m, _ = X.shape
            L = len(S)
            reg_factor = (lambda_ / float(2 * m)) * sum(
                sum(sum(thetas[l][1:, :]**2)) for l in range(L - 1))
            cost = (-1. / float(m)) * sum(
                sum(self._classes_map[y[s]] * llog(h_func(thetas, X[s])) +
                    (1. - self._classes_map[y[s]]) *
                    llog(1. - h_func(thetas, X[s])))
                for s in range(m)) + reg_factor
            if self._verbose:
                print "Current value of cost func.: " + str(cost[0])
            return cost[0]

        def grad(thetas, X, y, sd, S, lambda_):
            """The gradient (derivate) function which includes the back
            propagation algorithm."""
            thetas = roll(thetas, sd)
            m, _ = X.shape
            L = len(S)
            d = [
                np.array([[0. for j in range(S[l + 1])]
                          for i in range(S[l] + 1)]) for l in range(L - 1)
            ]  # Initialize the delta matrix

            for s in range(m):
                a = [
                    np.array([[1.] + list(X[s])]).T
                ]  # Initialize a (only a, d & theta matrices have 1 more element in columns, biases)

                for l in range(1, L):  # Forward propagation
                                   g_func(thetas[l - 1].T.dot(a[l - 1])))))
                # TODO
                # Softmax: treat last a column differently
                #ez = np.exp(thetas[L - 2].T.dot(a[L - 2]))
                #sez = sum(ez)
                #a.append(np.vstack((np.array([[1]]), ez / sez)))

                deltas = [None for l in range(L - 1)
                          ] + [a[-1][1:] - self._classes_map[y[s]]]
                for l in range(L - 2, 0, -1):  # Backward propagation
                    deltas[l] = (thetas[l].dot(deltas[l + 1]) *
                                 (a[l] * (1. - a[l])))[1:]

                for l in range(L - 1):
                    d[l] = d[l] + a[l].dot(deltas[l + 1].T)

            D = [(1. / float(m)) * d[l] + lambda_ * thetas[l]
                 for l in range(L - 1)]
            D = [
                Di - lambda_ * np.vstack(
                    (thetas[l][0], np.zeros((Di.shape[0] - 1, Di.shape[1]))))
                for l, Di in enumerate(D)
            ]  # Where i = 0, don't use regularization
            D, _ = unroll(D)
            return D

        def gradApprox(thetas, X, y, sd, S, lambda_):
            """Approximate the gradient of the cost function
            (only used for debugging, not in final version)."""
            eps = 1e-14
            return (grad(thetas + eps, X, y, sd, S, lambda_) -
                    grad(thetas - eps, X, y, sd, S, lambda_)) / float(2 * eps)

        # Set the random seed

        # Initialize the final layer of neural net (outputs)
        self._classes = list(set(y))
        for i, cl in enumerate(self._classes):
            self._classes_map[cl] = np.zeros((len(self._classes), 1))
            self._classes_map[cl][i] = 1.

        S = [len(X[0])] + self._hl + [len(self._classes)
                                      ]  # Complete information about levels
        L = len(S)
        thetas0 = [
            np.array([[rand(self._eps) for j in range(S[l + 1])]
                      for i in range(S[l] + 1)]) for l in range(L - 1)
        ]  # Initialize the thetas matrix
        thetas0, sd = unroll(thetas0)
        #return grad(thetas0, X, y, sd, S, self._lambda), gradApprox(thetas0, X, y, sd, S, self._lambda) # For testing

        # The L-BFGS-B bounds parameter is redefined: input is (lower_bound, upper_bound) instead of array of bounds for each theta parameter
        if self._opt_args != None and "bounds" in self._opt_args:
            bounds = [self._opt_args["bounds"] for i in range(len(thetas0))]
            self._opt_args["bounds"] = bounds

        self._thetas, self._cost, _ = scp.fmin_l_bfgs_b(cost,
                                                        args=(X, y, sd, S,
        self._thetas = roll(self._thetas, sd)
        self._cost = float(self._cost)
        self._can_classify = True
Exemplo n.º 36
def SubsampledTheanoOVA(svm, data,
        feature_bytes=1000 * (1024 ** 2), # bytes
        n_runs=None,  # None -> smallest int that uses all data
        n_keep=None,  # None -> X.shape[1] / n_runs
        bfgs_factr=1e11,  # 1e7 for moderate tolerance, 1e12 for low
    # I tried to change the problem to work with reduced regularization
    # or a smaller minimal margin (e.g. < 1) to compensate for the missing
    # features, but nothing really worked.
    # I think the better thing would be to do boosting, in just the way we
    # did in the eccv12 project (see e.g. MarginASGD)
    n_features, n_classes = svm.weights.shape
    X, y = data
    if verbose:
        print 'Training svm on design matrix of size', X.shape
        print '   with', n_classes, 'features'
    if n_keep is None:
        if n_runs is None:
            sizeof_dtype = {'float32': 4, 'float64': 8}[dtype]
            Xbytes = X.size * sizeof_dtype
            keep_ratio = float(feature_bytes) / Xbytes
            n_runs = int(np.ceil(1. / keep_ratio))
        n_keep = int(np.ceil(X.shape[1] / float(n_runs)))
        if n_runs is None:
            n_runs = int(np.ceil(X.shape[1] / float(n_keep)))

    _X = theano.shared(np.ones((2, 2), dtype=dtype),
    _yvecs = theano.shared(np.ones((2, 2), dtype=dtype),

    if decisions is None:
        _decisions = theano.shared(
                np.zeros((len(y), n_classes), dtype=dtype),
        decisions = np.asarray(decisions).astype(dtype)
        # -- N.B. for multi-class the decisions would be an examples x classes
        # matrix
        if decisions.shape != (len(y), n_classes):
            raise ValueError('decisions have wrong shape', decisions.shape)
        _decisions = theano.shared(decisions)
        del decisions

    sgd_params = tensor.vector(dtype=dtype)
    s_n_use = tensor.lscalar()

    flat_sgd_weights = sgd_params[:s_n_use * n_classes]
    sgd_weights = flat_sgd_weights.reshape((s_n_use, n_classes))
    sgd_bias = sgd_params[s_n_use * n_classes:]

    margin = _yvecs * (tensor.dot(_X, sgd_weights) + sgd_bias + _decisions)

    if cost_fn == 'L2Half':
        losses = tensor.maximum(0, 1 - margin) ** 2
    elif cost_fn == 'L2Huber':
        # "Huber-ized" L2-SVM
        losses = tensor.switch(
                margin > -1,
                # -- smooth part
                tensor.maximum(0, 1 - margin) ** 2,
                # -- straight part
                -4 * margin)
    elif cost_fn == 'Hinge':
        losses = tensor.maximum(0, 1 - margin)
        raise ValueError('invalid cost-fn', cost_fn)

    l2_cost = .5 * l2_regularization * tensor.dot(
            flat_sgd_weights, flat_sgd_weights)

    cost = losses.mean(axis=0).sum() + l2_cost
    dcost_dparams = tensor.grad(cost, sgd_params)

    _f_df = theano.function([sgd_params, s_n_use], [cost, dcost_dparams])

    yvecs = np.asarray(
            (y[:, None] == np.arange(n_classes)) * 2 - 1,

    # TODO: reconsider how to use this function when doing partial fitting
    #_f_update_decisions = theano.function([sgd_params, s_n_use], [],
    #        updates={
    #            _decisions: (_decisions
    #                + tensor.dot(_X, sgd_weights) + sgd_bias),
    #            })

    def flatten_svm(obj):
        return np.concatenate([obj.weights.flatten(), obj.bias])

    if verbose:
        print 'keeping', n_keep, 'of', X.shape[1], 'features'

    if rng is None:
        rng = np.random.RandomState(123)

    all_feat_randomized = rng.permutation(X.shape[1])
    bests = []
    for ii in range(n_runs):
        use_features = all_feat_randomized[ii * n_keep: (ii + 1) * n_keep]
        assert len(use_features)
        n_use = len(use_features)

        def f(p):
            c, d = _f_df(p.astype(dtype), n_use)
            return c.astype('float64'), d.astype('float64')

        params = np.zeros(n_use * n_classes + n_classes)
        params[:n_use * n_classes] = svm.weights[use_features].flatten()
        params[n_use * n_classes:] = svm.bias

        _X.set_value(X[:, use_features], borrow=True)
        _yvecs.set_value(yvecs, borrow=True)

        best, bestval, info_dct = fmin_l_bfgs_b(f,
                iprint=1 if verbose else -1,
                factr=bfgs_factr,  # -- 1e12 for low acc, 1e7 for moderate
        best_svm = copy.deepcopy(svm)
        best_svm.weights[use_features] = best[:n_classes * n_use].reshape(
                    (n_use, n_classes))
        best_svm.bias = best[n_classes * n_use:]

    # sum instead of mean here, because each loop iter trains only a subset of
    # features. XXX: This assumes that those subsets are mutually exclusive
    best_params = np.sum(bests, axis=0)
    rval = copy.deepcopy(svm)
    rval.weights = best_params[:n_classes * n_features].reshape(
                (n_features, n_classes))
    rval.bias = best_params[n_classes * n_features:]

    # XXX: figure out why Theano may be not freeing this memory, why does
    # writing little matrices here help?
    _X.set_value(np.ones((2, 2), dtype=dtype))
    _yvecs.set_value(np.ones((2, 2), dtype=dtype))
    _decisions.set_value(np.ones((2, 2), dtype=dtype))
    return rval
Exemplo n.º 37
def BinarySubsampledTheanoOVA(svm, data,
        feature_bytes=1000 * (1024 ** 2), # bytes
        n_runs=None,  # None -> smallest int that uses all data
        bfgs_factr=1e11,  # 1e7 for moderate tolerance, 1e12 for low
    n_features, = svm.weights.shape
    X, y = data

    if n_runs is None:
        sizeof_dtype = {'float32': 4, 'float64': 8}[dtype]
        Xbytes = X.size * sizeof_dtype
        keep_ratio = float(feature_bytes) / Xbytes
        n_runs = int(np.ceil(1. / keep_ratio))
        print 'BinarySubsampledTheanoOVA using n_runs =', n_runs
    n_keep = int(np.ceil(X.shape[1] / float(n_runs)))

    assert set(y) == set([-1, 1])
    _X = theano.shared(np.ones((2, 2), dtype=dtype),
            allow_downcast=True, borrow=True)
    _yvecs = theano.shared(y.astype(dtype),
            allow_downcast=True, borrow=True)
    if decisions:
        decisions = np.asarray(decisions).astype(dtype)
        # -- N.B. for multi-class the decisions would be an examples x classes
        # matrix
        if decisions.shape != y.shape:
            raise ValueError('decisions have wrong shape', decisions.shape)
        _decisions = theano.shared(decisions)
        del decisions
        _decisions = theano.shared(y.astype(dtype) * 0, allow_downcast=True)

    sgd_params = tensor.vector(dtype=dtype)
    s_n_use = tensor.lscalar()

    sgd_weights = sgd_params[:s_n_use]
    sgd_bias = sgd_params[s_n_use]

    margin = _yvecs * (tensor.dot(_X, sgd_weights) + sgd_bias + _decisions)

    if cost_fn == 'L2Half':
        losses = tensor.maximum(0, 1 - margin) ** 2
    elif cost_fn == 'L2Huber':
        # "Huber-ized" L2-SVM
        losses = tensor.switch(
                margin > -1,
                # -- smooth part
                tensor.maximum(0, 1 - margin) ** 2,
                # -- straight part
                -4 * margin)
    elif cost_fn == 'Hinge':
        losses = tensor.maximum(0, 1 - margin)
        raise ValueError('invalid cost-fn', cost_fn)

    l2_cost = .5 * l2_regularization * tensor.dot(
            sgd_weights, sgd_weights)

    cost = losses.mean() + l2_cost
    dcost_dparams = tensor.grad(cost, sgd_params)

    _f_df = theano.function([sgd_params, s_n_use], [cost, dcost_dparams])

    _f_update_decisions = theano.function([sgd_params, s_n_use], [],
                _decisions: (
                    tensor.dot(_X, sgd_weights) + sgd_bias + _decisions),

    def flatten_svm(obj):
        # Note this is different from multi-class case because bias is scalar
        return np.concatenate([obj.weights.flatten(), [obj.bias]])

    if verbose:
        print 'keeping', n_keep, 'of', X.shape[1], 'features, per round'
        print 'running for ', n_runs, 'rounds'

    if rng is None:
        rng = np.random.RandomState(123)

    all_feat_randomized = rng.permutation(X.shape[1])
    bests = []
    for ii in range(n_runs):
        use_features = all_feat_randomized[ii * n_keep: (ii + 1) * n_keep]
        assert len(use_features)
        n_use = len(use_features)

        def f(p):
            c, d = _f_df(p.astype(dtype), n_use)
            return c.astype('float64'), d.astype('float64')

        params = np.zeros(n_use + 1)
        params[:n_use] = svm.weights[use_features].flatten()
        params[n_use] = svm.bias

        _X.set_value(X[:, use_features], borrow=True)

        best, bestval, info_dct = fmin_l_bfgs_b(f,
                iprint=int(verbose) - 1,
        best_svm = copy.deepcopy(svm)
        best_svm.weights[use_features] = np.array(best[:n_use], dtype=dtype)
        best_svm.bias = float(best[n_use])

        _f_update_decisions(best.astype(dtype), n_use)
        margin_ii = _decisions.get_value() * _yvecs.get_value()
        print 'run %i: margin min:%f mean:%f max:%f' % (
                ii, np.min(margin_ii), np.mean(margin_ii), np.max(margin_ii))
        if 0:
            # XXX This is a hack that helps but it's basically wrong. The
            # correct thing to do would be to add two scalars to the
            # optimization: one scalar represents the total l2 norm of the
            # weight vector fit so far.  The second scalar represents how much
            # to down-weight the total vector fit so far in response to the
            # utility of the current feature set.  So this second scalar would
            # scale the vector of previous decisions, and the l2-cost would
            # always be the l2-cost of the entire vector so far.
                    _decisions.get_value() - np.min(margin_ii) * y)
        elif (ii < (n_runs - 1)) and (np.min(margin_ii) > .95):
            print 'Margin has been maximized after', ii, 'of', n_runs

    # N.B. we might have used fewer than n_runs
    best_params = np.sum(bests, axis=0)
    best_params[n_features] /= len(bests)  # bias is estimated on each run
    rval = copy.deepcopy(svm)
    rval.weights = best_params[:n_features].astype(dtype)
    rval.bias = float(best_params[n_features])

    # XXX: figure out why Theano may be not freeing this memory, why does
    # writing little matrices here help?
    _X.set_value(np.ones((2, 2), dtype=dtype))
    _yvecs.set_value(np.ones(2, dtype=dtype))
    return rval
Exemplo n.º 38
def smoothn(y,nS0=10,axis=None,smoothOrder=2.0,sd=None,verbose=False,\
   function [z,s,exitflag,Wtot] = smoothn(varargin)

   SMOOTHN Robust spline smoothing for 1-D to N-D data.
   SMOOTHN provides a fast, automatized and robust discretized smoothing
   spline for data of any dimension.

   Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y
   can be any N-D noisy array (time series, images, 3D data,...). Non
   finite data (NaN or Inf) are treated as missing values.

   Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S.
   S must be a real positive scalar. The larger S is, the smoother the
   output will be. If the smoothing parameter S is omitted (see previous
   option) or empty (i.e. S = []), it is automatically determined using
   the generalized cross-validation (GCV) method.

   Z = SMOOTHN(Y,W) or Z = SMOOTHN(Y,W,S) specifies a weighting array W of
   real positive values, that must have the same size as Y. Note that a
   nil weight corresponds to a missing value.

   Robust smoothing
   Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes
   the influence of outlying data.

   [Z,S] = SMOOTHN(...) also returns the calculated value for S so that
   you can fine-tune the smoothing subsequently if needed.

   An iteration process is used in the presence of weighted and/or missing
   values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the
   termination parameters specified by OPTION_NAME and OPTION_VALUE. They
   can contain the following criteria:
       TolZ:       Termination tolerance on Z (default = 1e-3)
                   TolZ must be in ]0,1[
       MaxIter:    Maximum number of iterations allowed (default = 100)
       Initial:    Initial value for the iterative process (default =
                   original data)
   Syntax: [Z,...] = SMOOTHN(...,'MaxIter',500,'TolZ',1e-4,'Initial',Z0);

   [Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that
   describes the exit condition of SMOOTHN:
       1       SMOOTHN converged.
       0       Maximum number of iterations was reached.

   Class Support
   Input array can be numeric or logical. The returned array is of class

   The N-D (inverse) discrete cosine transform functions <a
   >DCTN</a> and <a
   >IDCTN</a> are required.

   To be made
   Estimate the confidence bands (see Wahba 1983, Nychka 1988).

   Garcia D, Robust smoothing of gridded data in one and higher dimensions
   with missing values. Computational Statistics & Data Analysis, 2010. 
   href="matlab:web('http://www.biomecardio.com/pageshtm/publi/csda10.pdf')">PDF download</a>

   # 1-D example
   x = linspace(0,100,2**8);
   y = cos(x/10)+(x/50)**2 + randn(size(x))/10;
   y[[70, 75, 80]] = [5.5, 5, 6];
   z = smoothn(y); # Regular smoothing
   zr = smoothn(y,'robust'); # Robust smoothing
   subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2)
   axis square, title('Regular smoothing')
   subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2)
   axis square, title('Robust smoothing')

   # 2-D example
   xp = 0:.02:1;
   [x,y] = meshgrid(xp);
   f = exp(x+y) + sin((x-2*y)*3);
   fn = f + randn(size(f))*0.5;
   fs = smoothn(fn);
   subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square
   subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square

   # 2-D example with missing data
   n = 256;
   y0 = peaks(n);
   y = y0 + rand(size(y0))*2;
   I = randperm(n^2);
   y(I(1:n^2*0.5)) = NaN; # lose 1/2 of data
   y(40:90,140:190) = NaN; # create a hole
   z = smoothn(y); # smooth data
   subplot(2,2,1:2), imagesc(y), axis equal off
   title('Noisy corrupt data')
   subplot(223), imagesc(z), axis equal off
   title('Recovered data ...')
   subplot(224), imagesc(y0), axis equal off
   title('... compared with original data')

   # 3-D example
   [x,y,z] = meshgrid(-2:.2:2);
   xslice = [-0.8,1]; yslice = 2; zslice = [-2,0];
   vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06;
   subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic')
   title('Noisy data')
   v = smoothn(vn);
   subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic')
   title('Smoothed data')

   # Cardioid
   t = linspace(0,2*pi,1000);
   x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1;
   y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1;
   z = smoothn(complex(x,y));
   axis equal tight

   # Cellular vortical flow
   [x,y] = meshgrid(linspace(0,1,24));
   Vx = cos(2*pi*x+pi/2).*cos(2*pi*y);
   Vy = sin(2*pi*x+pi/2).*sin(2*pi*y);
   Vx = Vx + sqrt(0.05)*randn(24,24); # adding Gaussian noise
   Vy = Vy + sqrt(0.05)*randn(24,24); # adding Gaussian noise
   I = randperm(numel(Vx));
   Vx(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
   Vy(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers
   Vx(I(31:60)) = NaN; # missing values
   Vy(I(31:60)) = NaN; # missing values
   Vs = smoothn(complex(Vx,Vy),'robust'); # automatic smoothing
   subplot(121), quiver(x,y,Vx,Vy,2.5), axis square
   title('Noisy velocity field')
   subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square
   title('Smoothed velocity field')


   -- Damien Garcia -- 2009/03, revised 2010/11
   Visit my <a
   href="matlab:web('http://www.biomecardio.com/matlab/smoothn.html')">website</a> for more details about SMOOTHN 

  # Check input arguments

  if type(y) == ma.core.MaskedArray:  # masked array
    is_masked = True
    mask = y.mask
    y = np.array(y)
    y[mask] = 0.
    if W != None:
      W  = np.array(W)
      W[mask] = 0.
    if sd != None:
      W = np.array(1./sd**2)
      W[mask] = 0.
      sd = None
    y[mask] = np.nan
  if sd != None:
    sd_ = np.array(sd)
    mask = (sd > 0.)
    W = np.zeros_like(sd_)
    W[mask] = 1./sd_[mask]**2
    sd = None

  if W != None:
    W = W/W.max()

  sizy = y.shape;

  # sort axis
  if axis == None:
    axis = tuple(np.arange(y.ndim))

  noe = y.size # number of elements
  if noe<2:
    z = y
    exitflag = 0;Wtot=0
    return z,s,exitflag,Wtot
  # Smoothness parameter and weights
  #if s != None:
  #  s = []
  if W == None:
    W = ones(sizy);

  #if z0 == None:
  #  z0 = y.copy()

  # "Weighting function" criterion
  weightstr = weightstr.lower()
  # Weights. Zero weights are assigned to not finite values (Inf or NaN),
  # (Inf/NaN values = missing data).
  IsFinite = np.array(isfinite(y)).astype(bool);
  nof = IsFinite.sum() # number of finite elements
  W = W*IsFinite;
  if any(W<0):
        'Weights must all be >=0')
      #W = W/np.max(W)
  # Weighted or missing data?
  isweighted = any(W != 1);
  # Robust smoothing?
  # Automatic smoothing?
  isauto = not s;
  # DCTN and IDCTN are required
    from scipy.fftpack.realtransforms import dct,idct
    z = y
    exitflag = -1;Wtot=0
    return z,s,exitflag,Wtot

  ## Creation of the Lambda tensor
  # Lambda contains the eingenvalues of the difference matrix used in this
  # penalized least squares process.
  axis = tuple(np.array(axis).flatten())
  d =  y.ndim;
  Lambda = zeros(sizy);
  for i in axis:
    # create a 1 x d array (so e.g. [1,1] for a 2D case
    siz0 = ones((1,y.ndim))[0];
    siz0[i] = sizy[i];
    # cos(pi*(reshape(1:sizy(i),siz0)-1)/sizy(i)))
    # (arange(1,sizy[i]+1).reshape(siz0) - 1.)/sizy[i]
    Lambda = Lambda + (cos(pi*(arange(1,sizy[i]+1) - 1.)/sizy[i]).reshape(siz0))
    #  Lambda = Lambda + siz0
  Lambda = -2.*(len(axis)-Lambda);
  if not isauto:
    Gamma = 1./(1+(s*abs(Lambda))**smoothOrder);

  ## Upper and lower bound for the smoothness parameter
  # The average leverage (h) is by definition in [0 1]. Weak smoothing occurs
  # if h is close to 1, while over-smoothing appears when h is near 0. Upper
  # and lower bounds for h are given to avoid under- or over-smoothing. See
  # equation relating h to the smoothness parameter (Equation #12 in the
  # referenced CSDA paper).
  N = sum(array(sizy) != 1); # tensor rank of the y-array
  hMin = 1e-6; hMax = 0.99;
  # (h/n)**2 = (1 + a)/( 2 a)
  # a = 1/(2 (h/n)**2 -1) 
  # where a = sqrt(1 + 16 s)
  # (a**2 -1)/16
    sMinBnd = np.sqrt((((1+sqrt(1+8*hMax**(2./N)))/4./hMax**(2./N))**2-1)/16.);
    sMaxBnd = np.sqrt((((1+sqrt(1+8*hMin**(2./N)))/4./hMin**(2./N))**2-1)/16.);
    sMinBnd = None
    sMaxBnd = None
  ## Initialize before iterating
  Wtot = W;
  #--- Initial conditions for z
  if isweighted:
    #--- With weighted/missing data
    # An initial guess is provided to ensure faster convergence. For that
    # purpose, a nearest neighbor interpolation followed by a coarse
    # smoothing are performed.
    if z0 != None: # an initial guess (z0) has been provided
        z = z0;
        z = y #InitialGuess(y,IsFinite);
        z[~IsFinite] = 0.
    z = zeros(sizy);
  z0 = z;
  y[~IsFinite] = 0; # arbitrary values for missing y-data
  tol = 1.;
  RobustIterativeProcess = True;
  RobustStep = 1;
  nit = 0;
  #--- Error on p. Smoothness parameter s = 10^p
  errp = 0.1;
  #opt = optimset('TolX',errp);
  #--- Relaxation factor RF: to speedup convergence
  RF = 1 + 0.75*isweighted;
  # ??
  ## Main iterative process
  if isauto:
      xpost = array([(0.9*log10(sMinBnd) + log10(sMaxBnd)*0.1)])
    xpost = array([log10(s)])
  while RobustIterativeProcess:
    #--- "amount" of weights (see the function GCVscore)
    aow = sum(Wtot)/noe; # 0 < aow <= 1
    while tol>TolZ and nit<MaxIter:
        if verbose:
        nit = nit+1;
        DCTy = dctND(Wtot*(y-z)+z,f=dct);
        if isauto and not remainder(log2(nit),1):
            # The generalized cross-validation (GCV) method is used.
            # We seek the smoothing parameter s that minimizes the GCV
            # score i.e. s = Argmin(GCVscore).
            # Because this process is time-consuming, it is performed from
            # time to time (when nit is a power of 2)
            # errp in here somewhere
            #xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\
            #   approx_grad=True,bounds=[(log10(sMinBnd),log10(sMaxBnd))],\
            #   args=(Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe))

            # if we have no clue what value of s to use, better span the
            # possible range to get a reasonable starting point ...
            # only need to do it once though. nS0 is teh number of samples used
            if not s0:
              ss = np.arange(nS0)*(1./(nS0-1.))*(log10(sMaxBnd)-log10(sMinBnd))+ log10(sMinBnd)
              g = np.zeros_like(ss)
              for i,p in enumerate(ss):
                g[i] = gcv(p,Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe,smoothOrder)
                #print 10**p,g[i]
              xpost = [ss[g==g.min()]]
              #print '==============='
              #print nit,tol,g.min(),xpost[0],s
              #print '==============='
              xpost = [s0]
            xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\
        s = 10**xpost[0];
        # update the value we use for the initial s estimate
        s0 = xpost[0]

        Gamma = 1./(1+(s*abs(Lambda))**smoothOrder);

        z = RF*dctND(Gamma*DCTy,f=idct) + (1-RF)*z;
        # if no weighted/missing data => tol=0 (no iteration)
        tol = isweighted*norm(z0-z)/norm(z);
        z0 = z; # re-initialization
    exitflag = nit<MaxIter;

    if isrobust: #-- Robust Smoothing: iteratively re-weighted process
        #--- average leverage
        h = sqrt(1+16.*s); 
        h = sqrt(1+h)/sqrt(2)/h; 
        h = h**N;
        #--- take robust weights into account
        Wtot = W*RobustWeights(y-z,IsFinite,h,weightstr);
        #--- re-initialize for another iterative weighted process
        isweighted = True; tol = 1; nit = 0; 
        RobustStep = RobustStep+1;
        RobustIterativeProcess = RobustStep<3; # 3 robust steps are enough.
        RobustIterativeProcess = False; # stop the whole process

  ## Warning messages
  if isauto:
    if abs(log10(s)-log10(sMinBnd))<errp:
            ['s = %.3f '%(s) + ': the lower bound for s '\
            + 'has been reached. Put s as an input variable if required.'])
    elif abs(log10(s)-log10(sMaxBnd))<errp:
            ['s = %.3f '%(s) + ': the upper bound for s '\
            + 'has been reached. Put s as an input variable if required.'])
    #    ['Maximum number of iterations (%d'%(MaxIter) + ') has '\
    #    + 'been exceeded. Increase MaxIter option or decrease TolZ value.'])
  return z,s,exitflag,Wtot
Exemplo n.º 39
import openbabel
import numpy

from scipy.optimize.lbfgsb import fmin_l_bfgs_b

def f(x):
    return x[0]**2 + x[1]**2

def g(x):
    return numpy.array([2 * x[0], 2 * x[1]])

x0 = numpy.array([3, 1])

opt, energy, dict = fmin_l_bfgs_b(f, x0, fprime=g)

print opt