Exemplo n.º 1
0
def comp_sep(A_ev, d, invN, A_dB_ev, comp_of_dB, *minimize_args,
             **minimize_kwargs):
    """ Perform component separation

    Build the (inverse) spectral likelihood and minimize it to estimate the
    parameters of the mixing matrix. Separate the components using the best-fit
    mixing matrix.

    Parameters
    ----------
    A_ev : function
        The evaluator of the mixing matrix. It takes a float or an array as
        argument and returns the mixing matrix, a ndarray with shape
        *(..., n_freq, n_comp)*
    d: ndarray
        The data vector. Shape *(..., n_freq)*.
    invN: ndarray or None
        The inverse noise matrix. Shape *(..., n_freq, n_freq)*.
    A_dB_ev : function
        The evaluator of the derivative of the mixing matrix.
        It returns a list, each entry is the derivative with respect to a
        different parameter.
    comp_of_dB: list of IndexExpression
        It allows to provide as output of *A_dB_ev* only the non-zero columns
        *A*. ``A_dB_ev(x)[i]`` is assumed to be the derivative of
        ``A[comp_of_dB[i]]``.
    minimize_args: list
        Positional arguments to be passed to `scipy.optimize.minimize`.
        At this moment it just contains *x0*, the initial guess for the spectral
        parameters
    minimize_kwargs: dict
        Keyword arguments to be passed to `scipy.optimize.minimize`.
        A good choice for most cases is
        ``minimize_kwargs = {'tol': 1, options: {'disp': True}}``. *tol* depends
        on both the solver and your signal to noise: it should ensure that the
        difference between the best fit -logL and and the minimum is well less
        then 1, without exagereting (a difference of 1e-4 is useless).
        *disp* also triggers a verbose callback that monitors the convergence.

    Returns
    -------
    result : scipy.optimze.OptimizeResult (dict)
        Result of the spectral likelihood maximisation
        It is the output of `scipy.optimize.minimize`, plus some extra.
        It includes

	- **x**: *(ndarray)* - the best-fit spectra indices
        - **Sigma**: *(ndarray)* - the semi-analytic covariance of the best-fit
          spectra indices patch.
        - **s**: *(ndarray)* - Separated components, Shape *(..., n_comp)*
        - **invAtNA** : *(ndarray)* - Covariance of the separated components.
          Shape *(..., n_comp, n_comp)*

    Note
    ----
    The *...* in the arguments denote any extra set of dimension. They have to
    be compatible among different arguments in the `numpy` broadcasting sense.
    """
    # If mixing matrix is fixed, separate and return
    if isinstance(A_ev, np.ndarray):
        res = sp.optimize.OptimizeResult()
        res.s, (u_e_v, L) = Wd(A_ev, d, invN, True)
        res.invAtNA = _invAtNA_svd(u_e_v)
        if L is not None:
            d = _mtv(L, d)
        res.chi = d - _As_svd(u_e_v, res.s)
        return res
    else:
        # Mixing matrix has free paramters: check that x0 was provided
        assert minimize_args
        assert len(minimize_args[0])

    # Check input
    if A_dB_ev is not None:
        A_dB_ev, comp_of_dB = _A_dB_ev_and_comp_of_dB_as_compatible_list(
            A_dB_ev, comp_of_dB, minimize_args[0])
    if 'options' in minimize_kwargs and 'disp' in minimize_kwargs['options']:
        disp = minimize_kwargs['options']['disp']
    else:
        disp = False

    # Prepare functions for minimize
    fun, jac, last_values = _build_bound_inv_logL_and_logL_dB(
        A_ev, d, invN, A_dB_ev, comp_of_dB)
    minimize_kwargs['jac'] = jac

    # Gather minmize arguments
    if disp and 'callback' not in minimize_kwargs:
        minimize_kwargs['callback'] = verbose_callback()

    # Likelihood maximization
    res = sp.optimize.minimize(fun, *minimize_args, **minimize_kwargs)

    # Gather results
    u_e_v_last, A_dB_last, x_last, pw_d = last_values
    if not np.all(x_last[0] == res.x):
        fun(res.x)  #  Make sure that last_values refer to the minimum

    res.s = _Wd_svd(u_e_v_last[0], pw_d[0])
    res.invAtNA = _invAtNA_svd(u_e_v_last[0])
    res.chi = pw_d[0] - _As_svd(u_e_v_last[0], res.s)
    if A_dB_ev is None:
        fisher = numdifftools.Hessian(fun)(res.x)  # TODO: something cheaper
    else:
        fisher = _fisher_logL_dB_dB_svd(u_e_v_last[0], res.s, A_dB_last[0],
                                        comp_of_dB)
        As_dB = (_mv(A_dB_i, res.s[comp_of_dB_i])
                 for A_dB_i, comp_of_dB_i in zip(A_dB_last[0], comp_of_dB))
        res.chi_dB = []
        for comp_of_dB_i, As_dB_i in zip(comp_of_dB, As_dB):
            freq_of_dB = comp_of_dB_i[:-1] + (slice(None), )
            res.chi_dB.append(
                np.sum(res.chi[freq_of_dB] * As_dB_i, -1) /
                np.linalg.norm(As_dB_i, axis=-1))
    try:
        res.Sigma = np.linalg.inv(fisher)
    except np.linalg.LinAlgError:
        res.Sigma = fisher * np.nan
    res.Sigma_inv = fisher
    return res
Exemplo n.º 2
0
    
    dX = np.stack([dX1, dX2, dX3], axis=2)
    sigmas = np.array([sigma_mup, sigma_r, sigma_beta])
    Sigma = est.all_covariances(dX, sigmas) # burn-in for jit
    
    # calculate log=likelihood from this
    return est.log_likelihood(Y, Sigma)


test = return_loglike(estimates)


#%%
import numdifftools as nd

dfun = nd.Hessian(return_loglike, step = 1e-05)
#Hessian_mat = dfun([eps_x, Fvac])
Hessian_mat = dfun([rho_mup, rho_r, rho_beta, sigma_mup, sigma_r, sigma_beta, eps_x, Fvac_factor])
print(Hessian_mat)



#%% Fisher 

obs = Y.flatten().size
I = - Hessian_mat
var = np.linalg.inv(I ) 
stds = np.sqrt( abs(np.diag(var)) ) 
var_covar = abs(np.linalg.inv(I ) )
# std. errors
print(stds)
Exemplo n.º 3
0
 def area_hess(self, params):
     hess_matrix = nd.Hessian(self.area)(params)
     return hess_matrix
Exemplo n.º 4
0
    def _ci_delta(self):
        # Calculate the variance-covariance matrix using the
        # hessian from numdifftools
        # This is used to obtain confidence intervals for the estimators and
        # the return values for several return values.
        #
        # More info about the delta method can be found on:
        #     - Coles, Stuart: "An Introduction to Statistical Modeling of
        #     Extreme Values", Springer (2001)
        #     - https://en.wikipedia.org/wiki/Delta_method

        # data
        c = -self.c  # We negate the shape to avoid inconsistency problems!?
        loc = self.loc
        scale = self.scale
        hess = _ndt.Hessian(self._nnlf)
        T = _np.arange(0.1, 5000, 0.1)
        sT = -_np.log(1. - self.frec / T)
        sT2 = self.distr.isf(self.frec / T)

        # VarCovar matrix and confidence values for estimators and return values
        # Confidence interval for return values (up values and down values)
        ci_Tu = _np.zeros(sT.shape)
        ci_Td = _np.zeros(sT.shape)
        if c:  # If c then we are calculating GEV confidence intervals
            varcovar = _np.linalg.inv(hess([c, loc, scale]))
            self.params_ci = OrderedDict()
            se = _np.sqrt(_np.diag(varcovar))
            self._se = se
            self.params_ci['shape'] = (self.c -
                                       _st.norm.ppf(1 - self.ci / 2) * se[0],
                                       self.c +
                                       _st.norm.ppf(1 - self.ci / 2) * se[0])
            self.params_ci['location'] = (
                self.loc - _st.norm.ppf(1 - self.ci / 2) * se[1],
                self.loc + _st.norm.ppf(1 - self.ci / 2) * se[1])
            self.params_ci['scale'] = (self.scale -
                                       _st.norm.ppf(1 - self.ci / 2) * se[2],
                                       self.scale +
                                       _st.norm.ppf(1 - self.ci / 2) * se[2])
            for i, val in enumerate(sT2):
                gradZ = [
                    scale * (c**-2) * (1 - sT[i]**(-c)) - scale * (c**-1) *
                    (sT[i]**-c) * _np.log(sT[i]), 1, -(1 - sT[i]**(-c)) / c
                ]
                se = _np.dot(_np.dot(gradZ, varcovar), _np.array(gradZ).T)
                ci_Tu[i] = val + _st.norm.ppf(1 - self.ci / 2) * _np.sqrt(se)
                ci_Td[i] = val - _st.norm.ppf(1 - self.ci / 2) * _np.sqrt(se)
        else:  # else then we are calculating Gumbel confidence intervals
            varcovar = _np.linalg.inv(hess([loc, scale]))
            self.params_ci = OrderedDict()
            se = _np.sqrt(_np.diag(varcovar))
            self._se = se
            self.params_ci['shape'] = (0, 0)
            self.params_ci['location'] = (
                self.loc - _st.norm.ppf(1 - self.ci / 2) * se[0],
                self.loc + _st.norm.ppf(1 - self.ci / 2) * se[0])
            self.params_ci['scale'] = (self.scale -
                                       _st.norm.ppf(1 - self.ci / 2) * se[1],
                                       self.scale +
                                       _st.norm.ppf(1 - self.ci / 2) * se[1])
            for i, val in enumerate(sT2):
                gradZ = [1, -_np.log(sT[i])]
                se = _np.dot(_np.dot(gradZ, varcovar), _np.array(gradZ).T)
                ci_Tu[i] = val + _st.norm.ppf(1 - self.ci / 2) * _np.sqrt(se)
                ci_Td[i] = val - _st.norm.ppf(1 - self.ci / 2) * _np.sqrt(se)
        self._ci_Tu = ci_Tu
        self._ci_Td = ci_Td
Exemplo n.º 5
0
    def _optimize_fit(self, obj_type=None, **kwargs):

        if obj_type == self.neg_loglik:
            method = 'MLE'
        else:
            method = 'PML'

        # Starting values
        phi = self.latent_variables.get_z_starting_values()
        phi = kwargs.get('start', phi).copy()  # If user supplied

        # Optimize using L-BFGS-B
        p = optimize.minimize(obj_type, phi, method='L-BFGS-B')

        theta, Y, scores, states, states_var, X_names = self._categorize_model_output(
            p.x)

        # Check that matrix is non-singular; act accordingly
        try:
            ihessian = np.linalg.inv(nd.Hessian(obj_type)(p.x))
            ses = np.power(np.abs(np.diag(ihessian)), 0.5)
            self.latent_variables.set_z_values(p.x, method, ses, None)
            # Change this in future
            try:
                latent_variables_store = self.latent_variables.copy()
            except:
                latent_variables_store = self.latent_variables

            return MLEResults(data_name=self.data_name,
                              X_names=X_names,
                              model_name=self.model_name,
                              model_type=self.model_type,
                              latent_variables=latent_variables_store,
                              results=p,
                              data=Y,
                              index=self.index,
                              multivariate_model=self.multivariate_model,
                              objective_object=obj_type,
                              method=method,
                              ihessian=ihessian,
                              signal=theta,
                              scores=scores,
                              z_hide=self._z_hide,
                              max_lag=self.max_lag,
                              states=states,
                              states_var=states_var)
        except:
            self.latent_variables.set_z_values(p.x, method, None, None)

            # Change this in future
            try:
                latent_variables_store = self.latent_variables.copy()
            except:
                latent_variables_store = self.latent_variables

            return MLEResults(data_name=self.data_name,
                              X_names=X_names,
                              model_name=self.model_name,
                              model_type=self.model_type,
                              latent_variables=latent_variables_store,
                              results=p,
                              data=Y,
                              index=self.index,
                              multivariate_model=self.multivariate_model,
                              objective_object=obj_type,
                              method=method,
                              ihessian=None,
                              signal=theta,
                              scores=scores,
                              z_hide=self._z_hide,
                              max_lag=self.max_lag,
                              states=states,
                              states_var=states_var)
Exemplo n.º 6
0
 def area_hess(self, params):
     """!
     @brief Compute hessian of gaussian area function
     """
     hess_matrix = nd.Hessian(self.area)(params)
     return hess_matrix
    #	H = nd.Hessian(logcosh)([float(w[0]), float(w[1])])
    #	print(np.linalg.eig(H)[0])
    #H = nd.Hessian(logcosh)([float(w[0]), float(w[1])])

    # save direction vectors of hyperplanes
    w_list_0 = []
    margin_list_0 = []
    interval_0 = interval_generator(0, math.pi / 2, math.pi / 100)
    #	interval_0 = [0, math.pi/10, math.pi/9, math.pi/8, math.pi/7, math.pi/6, math.pi/5, math.pi/4, math.pi/3, 2*math.pi/5,3*math.pi/7, math.pi/2]
    largest_eig_sigmoid = []
    for angle in interval_0:
        w = find_hyperplane_vector(angle=angle)
        w_list_0.append(w)
        loss_val_0 = logistic_loss_0(w)
        loss_val_1 = logistic_loss_1(w)
        H_max_0 = nd.Hessian(logistic_loss_0)([float(w[0]), float(w[1])])
        H_max_1 = nd.Hessian(logistic_loss_1)([float(w[0]), float(w[1])])
        largest_eig_sigmoid.append(
            np.amax(np.linalg.eig(H_max_0)[0]) +
            np.amax(np.linalg.eig(H_max_1)[0]))

    for w in w_list_0:
        margin = min([
            abs(np.dot(np.transpose(w), (2, 0))),
            abs(np.dot(np.transpose(w), (0, 2)))
        ])
        margin_list_0.append(margin)
    plt.figure(1)
    plt.subplot(311)
    plt.plot(interval_0, largest_eig_sigmoid, '^-r')
    plt.title("angle vs max eig val")
Exemplo n.º 8
0
spec_plot.set(xlabel=r'log($\lambda / \lambda_{max}$)', ylabel='bin count')
minX, maxX = plt.xlim()
minY, maxY = plt.ylim()
yval = (maxY + minY) / 2. * 0.6
spec_plot.annotate(
    'Sloppiness',
    xy=(minX, yval),
    xytext=((maxX + minX) / 2. * 0.8, yval +
            0.1),  # draws an arrow from one set of coordinates to the other
    arrowprops=dict(facecolor='black',
                    width=3),  # sets style of arrow and colour
    annotation_clip=False)  # This enables the arrow to be outside of the plot
spec_plot.annotate(
    '',
    xy=((maxX + minX) / 2. * 0.8 + 1.1, yval),
    xytext=(maxX,
            yval),  # draws an arrow from one set of coordinates to the other
    arrowprops=dict(facecolor='black',
                    width=3),  # sets style of arrow and colour
    annotation_clip=False)  # This enables the arrow to be outside of the plot
plt.show()


def test_function(x):
    return 2 * np.sin(x[0]) + 3 * np.cos(x[1])


H = nd.Hessian(test_function)([1.3, 0.3])
print(H)

print(os.path.join(os.getcwd(), 'results', 'Sloppiness'))
Exemplo n.º 9
0
                                      bounds_error=False,
                                      fill_value=1.0)

        ang_penalty = (penalty_box(rotx) + penalty_box(roty) +
                       penalty_box(rotz))

        ypenalty = 0
        if no_penalty:
            ang_penalty = 1.0
        return np.array(diff)  #* ang_penalty

    ### Optimize the previously defined function(s)
    no_rot_res = opti.minimize(cost_function, init, method='SLSQP')
    no_rot_soln = no_rot_res.x

    Hfun = ndt.Hessian(cost_function, full_output=True)
    hessian_ndt, info = Hfun(no_rot_soln)
    no_rot_pcov = np.linalg.inv(hessian_ndt)

    for i in [0, 1, 2, 3]:
        init_rot_2[i] = no_rot_soln[i]

    rot_res = opti.minimize(cost_function_rot_2, init_rot_2, method='SLSQP')
    rot_soln = rot_res.x

    Hfun2 = ndt.Hessian(cost_function_rot_2, full_output=True)
    hessian_ndt2, info2 = Hfun2(rot_soln)
    rot_pcov = np.linalg.inv(hessian_ndt2)

    #test_angles = np.linspace(-45.0, 45.0, 200)
    #test_cost = []
Exemplo n.º 10
0
def hapsb_femaleROHcontam_preload(iid,
                                  roh_list,
                                  mpileup_path,
                                  h5_path1000g,
                                  meta_path_ref,
                                  folder_out=None,
                                  init_c=0.025,
                                  trim=0.005,
                                  minLen=0.05,
                                  conPop=["CEU"],
                                  roh_jump=300,
                                  e_rate_ref=1e-3,
                                  processes=1,
                                  n_ref=2504,
                                  exclude_pops=["AFR"],
                                  prefix=None,
                                  logfile=False,
                                  cleanup=False):
    """
    Estimating autosomal contamination rate from a list of ROH blocks. Need at least one ROH for inference.

    Parameters
    ----------
    iid: str
        IID of the sample. We assume that the mpileup file has the format $iid.chr[1-22].mpileup.
    roh_list: str
        Path to a file containing a list of ROH blocks. This file should have the same format as the output of hapROH.
    mpileup_path:
        Directory of mpileup files. One file for each autosome.
    h5_path1000g: str
        Path to the reference panel.
    meta_path_ref: str
        Path to the metadata of reference panel.
    folder_out: str
        Directory in which you want the output to reside. If not given, all output files will be in the parent directory of mpileup_path.
    init_c: float
        Initial value for the BFGS search.
    trim: float
        Trim both ends of inferred ROH blocks (in Morgan).
    minLen: float
        Minimum length of ROH blocks to use in estimating contamination (in Morgan).
    conPop: list of str
        Contaminant Ancestry. Must correspond to names in the super_pop or pop column in the 1000G metadata file.
    roh_jump: float
        Copying jump rate.
    e_rate_ref: float
        Haplotype copying error rate.
    processes: int
        Number of processes to use.
    n_ref: int
        Number of samples in the reference panel.
    exclude_pops: list of str
        A list of populations to exclude from the reference panel.
    prefix: str
        Prefix of the output and log file. The output will follow $iid.$prefix.hapCON_ROH.txt. And the log file will follow $iid.$prefix.hapCON_ROH.log.
    logfile: bool
        Whether to produce a log file.
    cleanup: bool
        Whether to delete intermediary HDF5 files generated during this function run.

    Returns
    ---------
    conMLE: float
        MLE estimate for contamination.
    se: float
        Standard error of the estimated contamination rate.
     
    """

    # should be the same as hapsb_femaleROHcontam, but a faster implementation
    if not folder_out:
        folder_out = os.path.dirname(os.path.abspath(mpileup_path))

    prepare_path_general(folder_out, iid, prefix, "hapCON_ROH", logfile)
    chunks = []
    with open(roh_list) as f:
        f.readline()
        line = f.readline()
        while line:
            _, _, StartM, EndM, _, lengthM, _, ch, _, _ = line.strip().split(
                ',')
            StartM, EndM, lengthM = float(StartM), float(EndM), float(lengthM)
            if lengthM >= minLen:
                chunks.append((ch, StartM + trim, EndM - trim))
                print(f'chr{ch}\t{round(StartM, 6)}\t{round(EndM, 6)}')
            line = f.readline()

    if len(chunks) > 0:
        sumROH = 0
        for _, start, end in chunks:
            sumROH += end - start
        print(
            f'a total of {len(chunks)} ROH blocks passing filtering threshold found, total length after trimming: {100*sumROH:.3f}cM.'
        )

        hdf5_path = os.path.join(folder_out, "hdf5")
        if not os.path.exists(hdf5_path):
            os.makedirs(hdf5_path)
            print(f'saving hdf5 files in {hdf5_path}')

        t1 = time.time()
        prms = [ [os.path.join(mpileup_path, f'{iid}.chr{ch}.mpileup'),
            h5_path1000g + str(ch) + ".hdf5", iid, -np.inf, np.inf, hdf5_path, False] \
                for ch in range(1, 23)]
        results = multi_run(mpileup2hdf5, prms, processes)
        e_rate = np.mean(np.array([err for err, _, _ in results]))
        print(f'finished reading mpileup files, takes {time.time()-t1:.3f}s')
        print(f'estimated genotyping error: {e_rate:.3f}')

        # preload hmm models
        t1 = time.time()
        hmms = []
        for ch, start, end in chunks:
            path_targets = hdf5_path + "/" + f"{iid}.chr{ch}.hdf5"
            hmm = preload(iid,
                          ch,
                          start,
                          end,
                          path_targets,
                          h5_path1000g,
                          meta_path_ref,
                          folder_out,
                          conPop=conPop,
                          roh_jump=roh_jump,
                          e_rate=e_rate,
                          e_rate_ref=e_rate_ref,
                          n_ref=n_ref,
                          exclude_pops=exclude_pops)
            hmms.append(hmm)
        print(
            f'{len(chunks)} hmm models loaded, takes {round(time.time()-t1, 3)}s'
        )

        # the actual optimization part
        kargs = (hmms, processes)
        res = minimize(hapsb_multiChunk_preload,
                       init_c,
                       args=kargs,
                       method='L-BFGS-B',
                       bounds=[(0, 0.5)])
        if not res.success:
            print(
                'L-BFGS-B does not converge. Printing its result log for diagnostic purpose.'
            )
            print(res)
            print('please treat the final estimate with caution.')
        Hfun = ndt.Hessian(hapsb_multiChunk_preload,
                           step=1e-4,
                           full_output=True)
        try:
            x = res.x[0]
            h, info = Hfun(x, *kargs)
            h = h[0][0]
            if h < 0:
                print(
                    'WARNING: Cannot estimate standard error because the likelihood curve is concave up...'
                )
                se = np.nan
            else:
                if x > 0:
                    se = math.sqrt(1 / (h))
                else:
                    # hessian does not work well at the boundary, use a different approach
                    print(
                        f'use quadracitc interpolation to obtain likelihood confidence interval...'
                    )
                    step = 1e-6
                    grad = (hapsb_multiChunk_preload(step, *kargs) -
                            hapsb_multiChunk_preload(0, *kargs)) / step
                    assert (grad > 0)
                    findroot = lambda x, x0, grad, hess: hess * (
                        x - x0)**2 / 2.0 + (x - x0) * grad - 1.92
                    findroot_prime = lambda x, x0, grad, hess: (x - x0
                                                                ) * hess + grad
                    res = newton(findroot,
                                 x,
                                 fprime=findroot_prime,
                                 args=(x, grad, h))
                    se = res / 1.96
            if prefix:
                fileName = f'{iid}.{prefix}.hapCON_ROH.txt'
            else:
                fileName = f'{iid}.hapCON_ROH.txt'
            with open(f'{folder_out}/{fileName}', 'w') as f:
                f.write(f'Method1: Fixing genotyping error rate at {e_rate}\n')
                f.write(f'\tROH blocks obtained from: {roh_list}\n')
                f.write(f'\tNumber of ROH blocks found: {len(chunks)}\n')
                f.write(
                    f'\tTotal length of ROH after trimming: {round(100*sumROH,3)}cM\n'
                )
                f.write(
                    f'\tMLE for contamination using BFGS: {round(x, 6)} ({round(x-1.96*se, 6)} - {round(x+1.96*se, 6)})\n'
                )
            return x, se
        except AssertionError:
            print(
                f'cannot estimate the Hessian of the loglikelihood around {res.x}'
            )
            se = np.nan
        finally:
            if cleanup:
                shutil.rmtree(hdf5_path)
                print(f'deleted intermediary hdf5 files at {hdf5_path}')
            return x, se
    else:
        print(f'not enough ROH blocks found to estimate contamination...')
        sys.exit()
 def calc_hessian(self, x):
     return nd.Hessian(self.calc_fun)(x)
Exemplo n.º 12
0
    def _evaluate(self, x, out, *args, **kwargs):
        out["F"] = self.func(x)

        import numdifftools as nd
        out["dF"] = nd.Gradient(self.func)(x)[None, None, :]
        out["ddF"] = nd.Hessian(self.func)(x)[None, None, :]
Exemplo n.º 13
0
        [el for el in all_parameters if el not in parameters_to_test])

    parameters_to_test = ['kSTATbinding', 'kSTATunbinding', 'kpa']
    # Add jitter to best fit parameters to avoid numerical instability of finding Hessian at functional 0 (?)
    best_parameters = np.log([
        Detailed_Model.parameters[key] * np.random.uniform(0.97, 1.03)
        for key in parameters_to_test
    ])

    #   Now define the X**2 function which we want to compute Hessian of. It is defined using higher order function
    #   since this allows the Hessian to defined dynamically.
    new_function = function_builder(Detailed_Model, parameters_to_test,
                                    list(np.linspace(0, 60, num=61)))

    # Use numdifftools to compute the Hessian
    H = nd.Hessian(new_function)(best_parameters)
    H = np.nan_to_num(H)
    pickle.dump(
        H,
        open(os.path.join(os.getcwd(), 'results', 'Sloppiness', 'Hessian.pkl'),
             "wb"))
    print(H)
    # Compute eigenspectrum and normalize by largest eigenvalue
    #   - small eigenvalues correspond to sloppy parameters while large eigenvalues give stiff parameters
    #   - strictly speaking the eigenvalues correspond to the sloppiness along principal axes (eigenvectors) of H
    #   - the width of the error ellipse along the corresponding axis is 1/sqrt(eval)
    evals = np.linalg.eigvals(H)
    evals = np.nan_to_num(evals)
    normalized_evals = np.divide(evals, max(evals))
    log_normalized_evals = log10(np.absolute(normalized_evals))
Exemplo n.º 14
0
def masked_curve_fit(f,
                     x,
                     y,
                     p0=None,
                     sigma=None,
                     fixed=None,
                     method='lm',
                     **kw):
    """
    Wrapper around *scipy.optimize.curve_fit* which allows fixed parameters.

    *fixed* is a list of integer parameter numbers that should be fixed
    during the fit.  The parameter values for the fixed parameters are
    given in *p0*.  The returned *popt* contains the fixed values of these
    parameters, and *pcov* contains rows/columns of zero for these parameters.

    Otherwise the interface is the same as *curve_fit*.
    """
    from scipy.optimize import curve_fit
    # Hide fixed parameters
    if fixed is not None:
        p = p0 + 0.
        fitted = (p == p)
        fitted[fixed] = False
        init = p[fitted]

        def cost(x, *args, **kw):
            p[fitted] = args
            return f(x, *p, **kw)
    else:
        cost = f
        init = p0

    # Perform fit
    if method == 'lm':
        popt, pcov = curve_fit(cost, x, y, p0=init, sigma=sigma, **kw)
    else:
        if sigma is None:
            sigma = 1

        def chisq(p):
            resid = (cost(x, *p, **kw) - y) / sigma
            v = np.sum(resid**2) / (len(x) - len(p))
            return v

        from scipy.optimize import minimize
        res = minimize(chisq, init, method=method, options={'maxiter': 1000})
        popt = res.x
        #from scipy.optimize import fmin
        #popt = fmin(chisq, init, maxiter=10)
        try:
            import numdifftools as nd
            H = nd.Hessian(chisq)(popt)
            L = np.linalg.cholesky(H)
            Linv = np.linalg.inv(L)
            pcov = np.dot(Linv.T.conj(), Linv)
        except Exception as exc:
            print(exc)
            pcov = np.zeros((len(p0), len(p0)))

    # Restore fixed parameters
    if fixed is not None:
        p[fitted] = popt
        pcov = zero_insert(pcov, fixed)
    else:
        p = popt

    return p, pcov
Exemplo n.º 15
0
    def eval_QoIHessian(self, mu, xi):
        def func(xi):
            return self.eval_QoI(mu, xi)

        H = nd.Hessian(func)(xi)
        return H
Exemplo n.º 16
0
    print he[0] - 2*np.dot(x.T, x)

    for eps in [1e-3,1e-4,1e-5,1e-6]:
        print 'eps =', eps
        print approx_hess(xk,fun2,eps,args)[0] - 2*np.dot(x.T, x)

    hcs2 = approx_hess_cs2(xk,fun2,args=args)
    print 'hcs2'
    print hcs2 - 2*np.dot(x.T, x)

    hfd3 = approx_hess3(xk,fun2,args=args)
    print 'hfd3'
    print hfd3 - 2*np.dot(x.T, x)

    import numdifftools as nd
    hnd = nd.Hessian(lambda a: fun2(a, y, x))
    hessnd = hnd(xk)
    print 'numdiff'
    print hessnd - 2*np.dot(x.T, x)
    #assert_almost_equal(hessnd, he[0])
    gnd = nd.Gradient(lambda a: fun2(a, y, x))
    gradnd = gnd(xk)
'''
>>> hnd = nd.Hessian(lambda a: fun2(a, x))
>>> hnd(xk)
array([[ 216.87702746,   -3.41892545,    1.87887281],
       [  -3.41892545,  180.76379116,  -13.74326021],
       [   1.87887281,  -13.74326021,  198.5155617 ]])
>>> he
(array([[ 216.87702746,   -3.41892545,    1.87887281],
       [  -3.41892545,  180.76379116,  -13.74326021],
Exemplo n.º 17
0
    def _optimize_fit(self, obj_type=None, **kwargs):
        """
        This function fits models using Maximum Likelihood or Penalized Maximum Likelihood
        """

        preopt_search = kwargs.get('preopt_search', True)  # If user supplied

        if obj_type == self.neg_loglik:
            method = 'MLE'
        else:
            method = 'PML'

        # Starting values - check to see if model has preoptimize method, if not, simply use default starting values
        if preopt_search is True:
            try:
                phi = self._preoptimize_model(
                    self.latent_variables.get_z_starting_values(), method)
                preoptimized = True
            except:
                phi = self.latent_variables.get_z_starting_values()
                preoptimized = False
        else:
            preoptimized = False
            phi = self.latent_variables.get_z_starting_values()

        phi = kwargs.get('start', phi).copy()  # If user supplied

        # Optimize using L-BFGS-B
        p = optimize.minimize(obj_type,
                              phi,
                              method='L-BFGS-B',
                              options={'gtol': 1e-8})
        if preoptimized is True:
            p2 = optimize.minimize(
                obj_type,
                self.latent_variables.get_z_starting_values(),
                method='L-BFGS-B',
                options={'gtol': 1e-8})
            if self.neg_loglik(p2.x) < self.neg_loglik(p.x):
                p = p2

        theta, Y, scores, states, states_var, X_names = self._categorize_model_output(
            p.x)

        # Check that matrix is non-singular; act accordingly
        try:
            ihessian = np.linalg.inv(nd.Hessian(obj_type)(p.x))
            ses = np.power(np.abs(np.diag(ihessian)), 0.5)
            self.latent_variables.set_z_values(p.x, method, ses, None)

        except:
            ihessian = None
            ses = None
            self.latent_variables.set_z_values(p.x, method, None, None)

        self.latent_variables.estimation_method = method

        # Change this in future
        try:
            latent_variables_store = self.latent_variables.copy()
        except:
            latent_variables_store = self.latent_variables

        return MLEResults(data_name=self.data_name,
                          X_names=X_names,
                          model_name=self.model_name,
                          model_type=self.model_type,
                          latent_variables=latent_variables_store,
                          results=p,
                          data=Y,
                          index=self.index,
                          multivariate_model=self.multivariate_model,
                          objective_object=obj_type,
                          method=method,
                          ihessian=ihessian,
                          signal=theta,
                          scores=scores,
                          z_hide=self._z_hide,
                          max_lag=self.max_lag,
                          states=states,
                          states_var=states_var)