示例#1
0
def lanczos_iteration(Fvp_fn, dim, k=20):
    v = torch.FloatTensor(dim).uniform_()
    v /= torch.norm(v, 2)

    diag = []
    diag_adj = []

    w = Fvp_fn(v)
    alpha = w.dot(v)
    w -= alpha * v
    diag.append(alpha)

    for i in range(k - 1):
        beta = torch.norm(w, 2)
        if beta == 0:
            break
        v_prev = v.clone()
        v = w / beta
        w = Fvp_fn(v)
        alpha = w.dot(v)
        diag.append(alpha)
        diag_adj.append(beta)
        w = w - alpha * v - beta * v_prev

    diag, diag_adj = np.array(diag), np.array(diag_adj)
    # print ("Lanc diag: ", diag)
    # print ("Lanc diag_adj: ", diag_adj)
    w = eigvalsh_tridiagonal(np.array(diag), np.array(diag_adj))
    return w
示例#2
0
def jacobi_sampler_tridiag(M_1, M_2, N, beta=2):
    """
    .. seealso::

        :cite:`KiNe04` Theorem 2
    """

    if not (beta > 0):
        raise ValueError('`beta` must be positive. Given: {}'.format(beta))

    # c_odd = c_1, c_2, ..., c_2N-1
    c_odd = np.random.beta(a=0.5 * beta * np.arange(M_1, M_1 - N, step=-1),
                           b=0.5 * beta * np.arange(M_2, M_2 - N, step=-1))

    # c_even = c_0, c_2, c_2N-2
    c_even = np.zeros(N)
    c_even[1:] = np.random.beta(
        a=0.5 * beta * np.arange(N - 1, 0, step=-1),
        b=0.5 * beta *
        np.arange(M_1 + M_2 - N, M_1 + M_2 - 2 * N + 1, step=-1))

    # xi_odd = xi_2i-1 = (1-c_2i-2) c_2i-1
    xi_odd = (1 - c_even) * c_odd

    # xi_even = xi_0=0, xi_2, xi_2N-2
    # xi_2i = (1-c_2i-1)*c_2i
    xi_even = np.zeros(N)
    xi_even[1:] = (1 - c_odd[:-1]) * c_even[1:]

    # alpha_i = xi_2i-2 + xi_2i-1, xi_0 = 0
    alpha_coef = xi_even + xi_odd
    # beta_i+1 = xi_2i-1 * xi_2i
    beta_coef = xi_odd[:-1] * xi_even[1:]

    return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#3
0
def laguerre_sampler_tridiag(M, N, beta=2):
    """
    .. seealso::

        :cite:`DuEd02` III-B
    """

    if not (beta > 0):
        raise ValueError('`beta` must be positive. Given: {}'.format(beta))
    # M=>N

    # xi_odd = xi_1, ... , xi_2N-1
    xi_odd = np.random.chisquare(beta * np.arange(M, M - N, step=-1))

    # xi_even = xi_0=0, xi_2, ... ,xi_2N-2
    xi_even = np.zeros(N)
    xi_even[1:] = np.random.chisquare(beta * np.arange(N - 1, 0, step=-1))

    # alpha_i = xi_2i-2 + xi_2i-1
    # alpha_1 = xi_0 + xi_1 = xi_1
    alpha_coef = xi_even + xi_odd
    # beta_i+1 = xi_2i-1 * xi_2i
    beta_coef = xi_odd[:-1] * xi_even[1:]

    return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#4
0
 def _levels(Ec, EJ, ng=0.0, gridSize=51, select_range=(0, 10)):
     n = np.arange(gridSize) - gridSize // 2
     w = eigvalsh_tridiagonal(4 * Ec * (n - ng)**2,
                              -EJ / 2 * np.ones(gridSize - 1),
                              select='i',
                              select_range=select_range)
     return w
示例#5
0
def _generate_GOE_tridiagonal_direct(size: int = 100,
                                     seed: int = None,
                                     dowarn: bool = True) -> ndarray:
    """See: Edelman, A., Sutton, B. D., & Wang, Y. (2014).
    Random matrix theory, numerical computation and applications.
    Modern Aspects of Random Matrix Theory, 72, 53.
    """
    if dowarn:
        warn(
            "While this method is fast, and uses the least memory, it appears that"
            "`eigvalsh_tridiagonal` is considerably less precise, and will result"
            "in significant deviations from the expected values for the long range"
            "spectral observables (e.g. spectral rigidity, level number variance)."
        )
    if seed is not None:
        np.random.seed(seed)
    size = size + 2
    chi_range = size - 1 - np.arange(size - 1)
    chi = np.sqrt(np.random.chisquare(chi_range))
    diagonal = np.random.normal(0, np.sqrt(2), size) / np.sqrt(2)
    eigs = eigvalsh_tridiagonal(
        diagonal,
        chi,
        # select="a",
        check_finite=False,
        select="i",
        select_range=(1, size - 2),
        lapack_driver="stebz",
        tol=4 * np.finfo(np.float64).eps,
    )
    return eigs
示例#6
0
def mu_ref_normal_sampler_tridiag(loc=0.0, scale=1.0, beta=2, size=10,
                                  random_state=None):
    """Implementation of the tridiagonal model to sample from

    .. math::

        \\Delta(x_{1}, \\dots, x_{N})^{\\beta}
        \\prod_{n=1}^{N} \\exp(-\\frac{(x_i-\\mu)^2}{2\\sigma^2} ) dx_i

    .. seealso::

        :cite:`DuEd02` II-C
    """

    rng = check_random_state(random_state)

    if not (beta > 0):
        raise ValueError('`beta` must be positive. Given: {}'.format(beta))

    # beta/2*[N-1, N-2, ..., 1]
    b_2_Ni = 0.5 * beta * np.arange(size - 1, 0, step=-1)

    alpha_coef = rng.normal(loc=loc, scale=scale, size=size)
    beta_coef = rng.gamma(shape=b_2_Ni, scale=scale**2)

    return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#7
0
def mu_ref_gamma_sampler_tridiag(shape=1.0, scale=1.0, beta=2, size=10):
    """
    .. seealso::

        :cite:`DuEd02` III-B
    """

    if not (beta > 0):
        raise ValueError('`beta` must be positive. Given: {}'.format(beta))

    # beta/2*[N-1, N-2, ..., 1, 0]
    b_2_Ni = 0.5 * beta * np.arange(size - 1, -1, step=-1)

    # xi_odd = xi_1, ... , xi_2N-1
    xi_odd = np.random.gamma(shape=b_2_Ni + shape, scale=scale)  # odd

    # xi_even = xi_0=0, xi_2, ... ,xi_2N-2
    xi_even = np.zeros(size)
    xi_even[1:] = np.random.gamma(shape=b_2_Ni[:-1], scale=scale)  # even

    # alpha_i = xi_2i-2 + xi_2i-1
    # alpha_1 = xi_0 + xi_1 = xi_1
    alpha_coef = xi_even + xi_odd
    # beta_i+1 = xi_2i-1 * xi_2i
    beta_coef = xi_odd[:-1] * xi_even[1:]

    return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#8
0
def jacobi_sampler_tridiag(M_1, M_2, N, beta=2):
	"""
	.. seealso::

		:cite:`KiNe04` Theorem 2
	"""

	# c_odd = c_1, c_2, ..., c_2N-1
	c_odd = np.random.beta(
				0.5*beta*np.arange(M_1, M_1-N, step=-1),
				0.5*beta*np.arange(M_2, M_2-N, step=-1))

	# c_even = c_0, c_2, c_2N-2
	c_even = np.zeros(N)
	c_even[1:] = np.random.beta(
					0.5*beta*np.arange(N-1, 0, step=-1),
					0.5*beta*np.arange(M_1+M_2-N, M_1+M_2-2*N+1,step=-1))

	# xi_odd = xi_2i-1 = (1-c_2i-2) c_2i-1
	xi_odd = (1-c_even)*c_odd

	# xi_even = xi_0=0, xi_2, xi_2N-2
	# xi_2i = (1-c_2i-1)*c_2i
	xi_even = np.zeros(N)
	xi_even[1:] = (1-c_odd[:-1])*c_even[1:]

	# alpha_i = xi_2i-2 + xi_2i-1
	# alpha_1 = xi_0 + xi_1 = xi_1
	alpha_coef = xi_even + xi_odd
	# beta_i+1 = xi_2i-1 * xi_2i
	beta_coef = xi_odd[:-1] * xi_even[1:]

	return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#9
0
def mu_ref_beta_sampler_tridiag(a, b, beta=2, size=10):

	"""
	.. seealso::

		:cite:`KiNe04` Theorem 2
	"""

	# beta/2*[N-1, N-2, ..., 1, 0]
	b_2_Ni = 0.5*beta*np.arange(size-1,-1,step=-1)

	# c_odd = c_1, c_2, ..., c_2N-1
	c_odd = np.random.beta(
				b_2_Ni + a,
				b_2_Ni + b)

	# c_even = c_0, c_2, c_2N-2
	c_even = np.zeros(size)
	c_even[1:] = np.random.beta(b_2_Ni[:-1], b_2_Ni[1:] + a + b)

	# xi_odd = xi_2i-1 = (1-c_2i-2) c_2i-1
	xi_odd = (1-c_even)*c_odd

	# xi_even = xi_0=0, xi_2, xi_2N-2
	# xi_2i = (1-c_2i-1)*c_2i
	xi_even = np.zeros(size)
	xi_even[1:] = (1-c_odd[:-1])*c_even[1:]

	# alpha_i = xi_2i-2 + xi_2i-1
	# alpha_1 = xi_0 + xi_1 = xi_1
	alpha_coef = xi_even + xi_odd
	# beta_i+1 = xi_2i-1 * xi_2i
	beta_coef = xi_odd[:-1] * xi_even[1:]

	return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#10
0
def hermite_sampler_tridiag(N, beta=2):
	"""
	.. seealso::

		:cite:`DuEd02` II-C
	"""

	alpha_coef = np.sqrt(2)*np.random.randn(N)
	beta_coef = np.random.chisquare(beta*np.arange(N-1, 0, step=-1))

	return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#11
0
def hermite_sampler_tridiag(N, beta=2):
    """
    .. seealso::

        :cite:`DuEd02` II-C
    """

    if not (beta > 0):
        raise ValueError('`beta` must be positive. Given: {}'.format(beta))

    alpha_coef = np.sqrt(2) * np.random.randn(N)
    beta_coef = np.random.chisquare(beta * np.arange(N - 1, 0, step=-1))

    return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#12
0
def muref_normal_sampler_tridiag(loc=0.0, scale=1.0, beta=2, size=10):
	"""
	.. seealso::

		:cite:`DuEd02` II-C
	"""

	# beta/2*[N-1, N-2, ..., 1]
	b_2_Ni = 0.5*beta*np.arange(size-1, 0, step=-1)

	alpha_coef = np.random.normal(loc=loc, scale=scale, size=size)
	beta_coef = np.random.gamma(shape=b_2_Ni, scale=scale**2)

	return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#13
0
def mu_ref_normal_sampler_tridiag(loc=0.0, scale=1.0, beta=2, size=10):
    """
    .. seealso::

        :cite:`DuEd02` II-C
    """

    if not (beta > 0):
        raise ValueError('`beta` must be positive. Given: {}'.format(beta))

    # beta/2*[N-1, N-2, ..., 1]
    b_2_Ni = 0.5 * beta * np.arange(size - 1, 0, step=-1)

    alpha_coef = np.random.normal(loc=loc, scale=scale, size=size)
    beta_coef = np.random.gamma(shape=b_2_Ni, scale=scale**2)

    return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#14
0
def mu_ref_beta_sampler_tridiag(a, b, beta=2, size=10,
                                random_state=None):
    """ Implementation of the tridiagonal model given by Theorem 2 of :cite:`KiNe04` to sample from

    .. math::

        \\Delta(x_{1}, \\dots, x_{N})^{\\beta}
        \\prod_{n=1}^{N} x^{a-1} (1-x)^{b-1} dx

    .. seealso::

        :cite:`KiNe04` Theorem 2
    """

    rng = check_random_state(random_state)

    if not (beta > 0):
        raise ValueError('`beta` must be positive. Given: {}'.format(beta))

    # beta/2*[N-1, N-2, ..., 1, 0]
    b_2_Ni = 0.5 * beta * np.arange(size - 1, -1, step=-1)

    # c_odd = c_1, c_3, ..., c_2N-1
    c_odd = rng.beta(b_2_Ni + a, b_2_Ni + b)

    # c_even = c_0, c_2, c_2N-2
    c_even = np.zeros(size)
    c_even[1:] = rng.beta(b_2_Ni[:-1], b_2_Ni[1:] + a + b)

    # xi_odd = xi_2i-1 = (1-c_2i-2) c_2i-1
    xi_odd = (1 - c_even) * c_odd

    # xi_even = xi_0=0, xi_2, xi_2N-2
    # xi_2i = (1-c_2i-1)*c_2i
    xi_even = np.zeros(size)
    xi_even[1:] = (1 - c_odd[:-1]) * c_even[1:]

    # alpha_i = xi_2i-2 + xi_2i-1
    # alpha_1 = xi_0 + xi_1 = xi_1
    alpha_coef = xi_even + xi_odd
    # beta_i+1 = xi_2i-1 * xi_2i
    beta_coef = xi_odd[:-1] * xi_even[1:]

    return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#15
0
def laguerre_sampler_tridiag(M, N, beta=2):
	"""
	.. seealso::

		:cite:`DuEd02` III-B
	"""
	# M=>N

	# xi_odd = xi_1, ... , xi_2N-1
	xi_odd = np.random.chisquare(beta*np.arange(M, M-N, step=-1)) # odd

	# xi_even = xi_0=0, xi_2, ... ,xi_2N-2
	xi_even = np.zeros(N)
	xi_even[1:] = np.random.chisquare(beta*np.arange(N-1, 0, step=-1)) # even

	# alpha_i = xi_2i-2 + xi_2i-1
	# alpha_1 = xi_0 + xi_1 = xi_1
	alpha_coef = xi_even + xi_odd
	# beta_i+1 = xi_2i-1 * xi_2i
	beta_coef = xi_odd[:-1] * xi_even[1:]

	return la.eigvalsh_tridiagonal(alpha_coef, np.sqrt(beta_coef))
示例#16
0
    def step(
            self,
            closure,
            execute_update=True):  #Fvp_fn, execute_update=True, closure=None):
        """Performs a single optimization step.

        Arguments:
            Fvp_fn (callable): A closure that accepts a vector of parameters and a vector of length
                equal to the number of model paramsters and returns the Fisher-vector product.
        """
        state = self.state
        # State initialization
        if len(state) == 0:
            state['step'] = 0
            # Set shrinkage to defaults, i.e. no shrinkage
            state['rho'] = 0.0
            state['diag_shrunk'] = 1.0

        state['step'] += 1

        # Get flat grad
        g = gradients_to_vector(self._params)

        if 'ng_prior' not in state:
            state['ng_prior'] = torch.zeros_like(g)

        curv_type = self._param_group['curv_type']
        if curv_type not in self.valid_curv_types:
            raise ValueError("Invalid curv_type.")

        # Create closure to pass to Lanczos and CG
        if curv_type == 'fisher':
            Fvp_theta_fn = make_fvp_fun(closure, self._params)
        elif curv_type == 'gauss_newton':
            Fvp_theta_fn = make_gnvp_fun(closure, self._params)

        shrinkage_method = self._param_group['shrinkage_method']
        lanczos_amortization = self._param_group['lanczos_amortization']
        if shrinkage_method == 'lanczos' and (state['step'] -
                                              1) % lanczos_amortization == 0:
            # print ("Computing Lanczos shrinkage at step ", state['step'])
            w = lanczos_iteration(Fvp_theta_fn,
                                  self._numel(),
                                  k=self._param_group['lanczos_iters'])
            rho, diag_shrunk = estimate_shrinkage(
                w, self._numel(), self._param_group['batch_size'])
            state['rho'] = rho
            state['diag_shrunk'] = diag_shrunk

        M = None
        if self._param_group['cg_precondition_empirical']:
            # Empirical Fisher is g * g
            M = (g * g + self._param_group['cg_precondition_regu_coef'] *
                 torch.ones_like(g))**self._param_group['cg_precondition_exp']

        # Do CG solve with hvp fn closure
        extract_tridiag = self._param_group['shrinkage_method'] == 'cg'
        cg_result = cg_solve(
            Fvp_theta_fn,
            g.data.clone(),
            x_0=self._param_group['cg_prev_init_coef'] * state['ng_prior'],
            M=M,
            cg_iters=self._param_group['cg_iters'],
            cg_residual_tol=self._param_group['cg_residual_tol'],
            shrunk=self._param_group['shrinkage_method'] is not None,
            rho=state['rho'],
            Dshrunk=state['diag_shrunk'],
            extract_tridiag=extract_tridiag)

        if extract_tridiag:
            # print ("Computing CG shrinkage at step ", state['step'])
            ng, (diag_elems, off_diag_elems) = cg_result
            w = eigvalsh_tridiagonal(diag_elems, off_diag_elems)
            rho, diag_shrunk = estimate_shrinkage(
                w, self._numel(), self._param_group['batch_size'])
            state['rho'] = rho
            state['diag_shrunk'] = diag_shrunk
        else:
            ng = cg_result

        state['ng_prior'] = ng.data.clone()

        # Normalize NG
        lr = self._param_group['lr']
        alpha = torch.sqrt(torch.abs(lr / (torch.dot(g, ng) + 1e-20)))

        # Unflatten grad
        vector_to_gradients(ng, self._params)

        if execute_update:
            # Apply step
            for p in self._params:
                if p.grad is None:
                    continue
                d_p = p.grad.data
                p.data.add_(-alpha, d_p)

        return dict(alpha=alpha, delta=lr, natural_grad=ng)
示例#17
0
    def step(self, closure, execute_update=True):
        """Performs a single optimization step.

        Arguments:
            Fvp_fn (callable): A closure that accepts a vector of parameters and a vector of length
                equal to the number of model paramsters and returns the Fisher-vector product.
        """

        # Update theta old for all blocks first, only approx update is supported
        params_i = 0
        params_j = 0

        for gi, group in enumerate(self.param_groups):
            params = group['params']
            params_j += len(params)

            num_params = self._numel(gi, params)
            # print ("num_params: ", num_params, params_i, params_j)

            state = self.state[gi]
            if len(state) == 0:
                state['step'] = 0
                # Exponential moving average of gradient values
                state['m'] = torch.zeros(num_params)
                # Maintain adaptive preconditioner if needed
                if group['cg_precondition_empirical']:
                    state['M'] = torch.zeros(num_params)
                # Set shrinkage to defaults, i.e. no shrinkage
                state['rho'] = 0.0
                state['diag_shrunk'] = 1.0
                state['lagged'] = []
                for i in range(len(params)):
                    state['lagged'].append(params[i] +
                                           torch.randn(params[i].shape) *
                                           0.0001)

            beta1, beta2 = group['betas']

            theta = parameters_to_vector(params)
            theta_old = parameters_to_vector(state['lagged'])

            # Update theta_old beta2 portion towards theta
            theta_old = beta2 * theta_old + (1 - beta2) * theta
            vector_to_parameters(theta_old, state['lagged'])
            # print (theta_old)
            # input("")

        info = {}

        # If doing block diag, perform the update for each param group
        params_i = 0
        params_j = 0

        for gi, group in enumerate(self.param_groups):
            params = group['params']
            params_j += len(params)

            num_params = self._numel(gi, params)

            # NOTE: state is initialized above
            state = self.state[gi]

            m = state['m']
            beta1, beta2 = group['betas']
            state['step'] += 1
            params_old = state['lagged']  #

            bias_correction1 = 1 - beta1**state['step']
            bias_correction2 = 1 - beta2**state['step']

            # Get flat grad
            g = gradients_to_vector(params)

            # Update moving average mean
            m.mul_(beta1).add_(1 - beta1, g)
            g_hat = m / bias_correction1

            if 'ng_prior' not in state:
                state['ng_prior'] = torch.zeros_like(
                    g)  #g_hat) #g_hat.data.clone()

            curv_type = group['curv_type']
            if curv_type not in self.valid_curv_types:
                raise ValueError("Invalid curv_type.")

            # Now that theta_old has been updated, do CG with only theta old
            if curv_type == 'fisher':
                fvp_fn_div_beta2 = make_fvp_fun_idx(
                    closure,
                    params_old,
                    params_i,
                    params_j,
                    bias_correction2=bias_correction2)
            elif curv_type == 'gauss_newton':
                fvp_fn_div_beta2 = make_gnvp_fun(
                    closure, params_old, bias_correction2=bias_correction2)

            shrinkage_method = group['shrinkage_method']
            lanczos_amortization = group['lanczos_amortization']
            if shrinkage_method == 'lanczos' and (
                    state['step'] - 1) % lanczos_amortization == 0:
                # print ("Computing Lanczos shrinkage at step ", state['step'])
                w = lanczos_iteration(fvp_fn_div_beta2,
                                      num_params,
                                      k=group['lanczos_iters'])
                rho, diag_shrunk = estimate_shrinkage(w, num_params,
                                                      group['batch_size'])
                state['rho'] = rho
                state['diag_shrunk'] = diag_shrunk

            M = None
            if group['cg_precondition_empirical']:
                # Empirical Fisher is g * g
                V = state['M']
                Mt = (g * g + group['cg_precondition_regu_coef'] *
                      torch.ones_like(g))**group['cg_precondition_exp']
                Vhat = V.mul(beta2).add(1 - beta2, Mt) / bias_correction2
                V = torch.max(V, Vhat)
                M = V

            extract_tridiag = group['shrinkage_method'] == 'cg'
            cg_result = cg_solve(fvp_fn_div_beta2,
                                 g_hat.data.clone(),
                                 x_0=group['cg_prev_init_coef'] *
                                 state['ng_prior'],
                                 M=M,
                                 cg_iters=group['cg_iters'],
                                 cg_residual_tol=group['cg_residual_tol'],
                                 shrunk=group['shrinkage_method'] is not None,
                                 rho=state['rho'],
                                 Dshrunk=state['diag_shrunk'],
                                 extract_tridiag=extract_tridiag)

            if extract_tridiag:
                # print ("Computing CG shrinkage at step ", state['step'])
                ng, (diag_elems, off_diag_elems) = cg_result
                w = eigvalsh_tridiagonal(diag_elems, off_diag_elems)
                rho, diag_shrunk = estimate_shrinkage(w, num_params,
                                                      group['batch_size'])
                state['rho'] = rho
                state['diag_shrunk'] = diag_shrunk
            else:
                ng = cg_result
            # print ("NG: ", ng)

            state['ng_prior'] = ng.data.clone()

            # Normalize NG
            lr = group['lr']
            alpha = torch.sqrt(torch.abs(lr / (torch.dot(g_hat, ng) + 1e-20)))

            # Unflatten grad
            vector_to_gradients(ng, params)

            if execute_update:
                # Apply step
                for p in params:
                    if p.grad is None:
                        continue
                    d_p = p.grad.data
                    p.data.add_(-alpha, d_p)

            params_i = params_j
            info[gi] = dict(alpha=alpha, delta=lr, natural_grad=ng)

        return info
示例#18
0
    return f

if __name__ == "__main__":

    n = 100
    P = np.random.random((100, 100))
    A = P @ P.T
    M = np.diag(A)
    Minv_mat = np.diag(1.0/M)

    w1 = np.linalg.eigvals(A)
    w1b = np.linalg.eigvals(Minv_mat @ A)

    b = np.ones((n,))
    fvp_fn = make_fvp_fn(A)

    cg_result = cg_solve(fvp_fn, b, cg_iters=n, extract_tridiag=True)
    ng, (diag_elems, off_diag_elems) = cg_result
    w2 = eigvalsh_tridiagonal(diag_elems, off_diag_elems)

    cg_result = cg_solve(fvp_fn, b, cg_iters=n, M=M, extract_tridiag=True)
    ng, (diag_elems, off_diag_elems) = cg_result
    w3 = eigvalsh_tridiagonal(diag_elems, off_diag_elems)
    w4 = Minv_mat @ np.diag(w3)

    print ("Originals: ", np.max(w1), np.linalg.norm(w1))
    print ("CG no prec: ", np.max(w2), np.linalg.norm(w2), np.max(w1)-np.max(w2), np.linalg.norm(w1-w2))
    print ("CG w/ prec: ", np.max(w3), np.linalg.norm(w3), np.max(w1)-np.max(w3), np.linalg.norm(w1-w3))
    print ("CG w/ prec vs orig: ", np.max(w4), np.linalg.norm(w4), np.max(w1)-np.max(w4), np.linalg.norm(w1-w4))
    print ("CG w/ prec vs true MinvA: ", np.max(w4), np.linalg.norm(w4), np.max(w1b)-np.max(w4), np.linalg.norm(w1b-w4))
    def step(self, closure, execute_update=True):
        """Performs a single optimization step.

        Arguments:
            Fvp_fn (callable): A closure that accepts a vector of length equal to the number of
                model paramsters and returns the Fisher-vector product.
        """
        state = self.state
        param_vec = parameters_to_vector(self._params)
        # State initialization
        if len(state) == 0:
            state['step'] = 0
            # Exponential moving average of gradient values
            state['m'] = torch.zeros_like(param_vec.data)
            # Maintain adaptive preconditioner if needed
            if self._param_group['cg_precondition_empirical']:
                state['M'] = torch.zeros_like(param_vec.data)
            # Set shrinkage to defaults, i.e. no shrinkage
            state['rho'] = 0.0
            state['diag_shrunk'] = 1.0

        m = state['m']
        beta1, beta2 = self._param_group['betas']
        state['step'] += 1

        bias_correction1 = 1 - beta1**state['step']
        bias_correction2 = 1 - beta2**state['step']

        # Get flat grad
        g = gradients_to_vector(self._params)

        # Update moving average mean
        m.mul_(beta1).add_(1 - beta1, g)
        g_hat = m / bias_correction1

        theta = parameters_to_vector(self._params)
        theta_old = parameters_to_vector(self._params_old)

        if 'ng_prior' not in state:
            state['ng_prior'] = torch.zeros_like(g_hat)  #g_hat.data.clone()
        if 'max_fisher_spectral_norm' not in state:
            state['max_fisher_spectral_norm'] = 0.0

        curv_type = self._param_group['curv_type']
        if curv_type not in self.valid_curv_types:
            raise ValueError("Invalid curv_type.")

        if curv_type == 'fisher':
            weighted_fvp_fn_div_beta2 = self._make_combined_fvp_fun(
                closure,
                self._params,
                self._params_old,
                bias_correction2=bias_correction2)
        elif curv_type == 'gauss_newton':
            weighted_fvp_fn_div_beta2 = self._make_combined_gnvp_fun(
                closure,
                self._params,
                self._params_old,
                bias_correction2=bias_correction2)

        fisher_norm = lanczos_iteration(weighted_fvp_fn_div_beta2,
                                        self._numel(),
                                        k=1)[0]
        is_max_norm = fisher_norm > state['max_fisher_spectral_norm'] or state[
            'step'] == 1
        if is_max_norm:
            state['max_fisher_spectral_norm'] = fisher_norm

        if is_max_norm:
            if self._param_group['assume_locally_linear']:
                # Update theta_old beta2 portion towards theta
                theta_old = beta2 * theta_old + (1 - beta2) * theta
            else:
                # Do linesearch first to update theta_old. Then can do CG with only one HVP at each itr.
                ng = self.state['ng_prior'].clone(
                ) if state['step'] > 1 else g_hat.data.clone()
                if curv_type == 'fisher':
                    weighted_fvp_fn = self._make_combined_fvp_fun(
                        closure, self._params, self._params_old)
                    f = make_fvp_obj_fun(closure, weighted_fvp_fn, ng)
                elif curv_type == 'gauss_newton':
                    weighted_fvp_fn = self._make_combined_gnvp_fun(
                        closure, self._params, self._params_old)
                    f = make_gnvp_obj_fun(closure, weighted_fvp_fn, ng)
                xmin, fmin, alpha = randomized_linesearch(
                    f, theta_old.data, theta.data)
                theta_old = Variable(xmin.float())
            vector_to_parameters(theta_old, self._params_old)

        # Now that theta_old has been updated, do CG with only theta old
        # If not max norm, then this will remain the old params.
        if curv_type == 'fisher':
            fvp_fn_div_beta2 = make_fvp_fun(closure,
                                            self._params_old,
                                            bias_correction2=bias_correction2)
        elif curv_type == 'gauss_newton':
            fvp_fn_div_beta2 = make_gnvp_fun(closure,
                                             self._params_old,
                                             bias_correction2=bias_correction2)

        shrinkage_method = self._param_group['shrinkage_method']
        lanczos_amortization = self._param_group['lanczos_amortization']
        if shrinkage_method == 'lanczos' and (state['step'] -
                                              1) % lanczos_amortization == 0:
            # print ("Computing Lanczos shrinkage at step ", state['step'])
            w = lanczos_iteration(fvp_fn_div_beta2,
                                  self._numel(),
                                  k=self._param_group['lanczos_iters'])
            rho, diag_shrunk = estimate_shrinkage(
                w, self._numel(), self._param_group['batch_size'])
            state['rho'] = rho
            state['diag_shrunk'] = diag_shrunk

        M = None
        if self._param_group['cg_precondition_empirical']:
            # Empirical Fisher is g * g
            V = state['M']
            Mt = (g * g + self._param_group['cg_precondition_regu_coef'] *
                  torch.ones_like(g))**self._param_group['cg_precondition_exp']
            Vhat = V.mul(beta2).add(1 - beta2, Mt) / bias_correction2
            V = torch.max(V, Vhat)
            M = V

        extract_tridiag = self._param_group['shrinkage_method'] == 'cg'
        cg_result = cg_solve(
            fvp_fn_div_beta2,
            g_hat.data.clone(),
            x_0=self._param_group['cg_prev_init_coef'] * state['ng_prior'],
            M=M,
            cg_iters=self._param_group['cg_iters'],
            cg_residual_tol=self._param_group['cg_residual_tol'],
            shrunk=self._param_group['shrinkage_method'] is not None,
            rho=state['rho'],
            Dshrunk=state['diag_shrunk'],
            extract_tridiag=extract_tridiag)

        if extract_tridiag:
            # print ("Computing CG shrinkage at step ", state['step'])
            ng, (diag_elems, off_diag_elems) = cg_result
            w = eigvalsh_tridiagonal(diag_elems, off_diag_elems)
            rho, diag_shrunk = estimate_shrinkage(
                w, self._numel(), self._param_group['batch_size'])
            state['rho'] = rho
            state['diag_shrunk'] = diag_shrunk
        else:
            ng = cg_result

        self.state['ng_prior'] = ng.data.clone()

        # Normalize NG
        lr = self._param_group['lr']
        alpha = torch.sqrt(torch.abs(lr / (torch.dot(g_hat, ng) + 1e-20)))

        # Unflatten grad
        vector_to_gradients(ng, self._params)

        if execute_update:
            # Apply step
            for p in self._params:
                if p.grad is None:
                    continue
                d_p = p.grad.data
                p.data.add_(-alpha, d_p)

        return dict(alpha=alpha, delta=lr, natural_grad=ng)
示例#20
0
import numpy as np
from scipy.linalg import eigvalsh_tridiagonal, eigvalsh

diagonal = np.array([1.5833333333333332593, -0.01259572752922188954, 2.3690214303404664165, 0.06024096385542132559, 1.9941915593928158934, 1.0058084406071843286])
subdiagonal = np.array([-2.3964673074247340168, 0.93475927884341891705,-2.0788632064407330802, 6.3258425909268308882e-016, -0.075991464158134569562])
eigenvectors = eigvalsh_tridiagonal(diagonal, subdiagonal)
print("Eigenvectors of matrixA to: ")
print(eigenvectors)
import numpy as np
from scipy.linalg import eigvalsh_tridiagonal

diagonal = np.array([1.58333, -0.0125957, 2.36902, 0.060241, 1.90646, 1.09354])
subdiagonal = np.array([-2.396467, 0.9347593, -2.078863, 1.177896e-15, -0.2911902])

result = eigvalsh_tridiagonal(diagonal, subdiagonal)
print("Eigenvalues: ", result)

示例#22
0
    def sample_mcmc(self,
                    N=10,
                    nb_gibbs_passes=10,
                    sample_exact_cond=False,
                    nb_mala_steps=100,
                    return_chain_of_eig_vals=False,
                    return_chain_of_lambda_max=False,
                    random_state=None):
        """ Gibbs sampler on Jacobi matrices to sample approximately from the corresponding :math:`\\beta`-ensemble.

        :param N:
            Number of points/size of the :math:`\\beta`-ensemble
        :type N:
            int

        :param nb_gibbs_passes:
            Number of passes/sweeps over the variables using the Gibbs sampler
        :type nb_gibbs_passes:
            int

        :param sample_exact_cond:
            Flag to force (``True``) exact sampling from the conditionals when it is possible.
            Otherwise run MALA for ``nb_mala_steps` to sample from the conditionals.
        :type sample_exact_cond:
            bool (default 100)

        :param nb_mala_steps:
            Number of steps of Metropolis Ajusted Langevin Algorithm (MALA) to perform when the conditionals are sampled approximately
        :type nb_mala_steps:
            int, default 100

        :param return_chain_of_eig_vals:
            Flag to return the chain of eigenvalues associated to the chain of Jacobi matrices.
            If ``True`` the whole chain of eigenvalues is returned
            If ``False`` only the last sequence of eigenvalues is returned
        :type return_chain_of_eig_vals:
            bool (default False)

        :param return_chain_of_lambda:
            Flag to return the chain of the **largest** eigenvalues associated to the chain of Jacobi matrices.
            If ``True`` the whole chain of the **largest** eigenvalues is returned
            If ``False`` only the **largest** eigenvalue of the last Jacobi matrix is returned
        :type return_chain_of_eig_vals:
            bool (default False)
        """

        rng = check_random_state(random_state)

        if sample_exact_cond:
            if self.V[3]:
                raise ValueError(
                    'Sampling exactly the conditionals a_i |... from V = ... + x^3 + ... is not supported, given g_3={}. Conditionals are not log-concave, cannot use Dev12 sampler'
                    .format(self.V[3]))

            if self.V.order >= 5:
                raise ValueError(
                    'Sampling exactly the conditionals a_i |... from V = ... + x^5 + ... is not supported, deg(V)={}>=5. Conditionals are not log-concave, cannot use Dev12 sampler'
                    .format(self.V.order))

            even_coefs_V = self.V.coef[::-1][2::2]
            if not all(even_coefs_V >= 0):
                raise ValueError('\n'.join([
                    'even coefs of V are not all >=0', ', '.join([
                        'g_{}={}'.format(2 * (n + 1), g_2n)
                        for n, g_2n in enumerate(even_coefs_V)
                    ]),
                    'Conditionals are not log-concave, cannot use Dev12 sampler',
                    'You may retry swithching `sample_exact_cond` to False'
                ]))

        self.N = N
        self.nb_gibbs_passes = nb_gibbs_passes

        a, b = np.zeros((2, N + 3))

        if return_chain_of_eig_vals:
            eig_vals = np.zeros((N, nb_gibbs_passes))
        elif return_chain_of_lambda_max:
            lambda_max = np.zeros(nb_gibbs_passes)

        for p in range(nb_gibbs_passes):
            if (p + 1) % 50 == 0:
                print(p + 1)

            for i in range(1, N + 1):

                # a_i | ... propto exp - P_a_i
                P_a_i = 0.5 * self.beta * N * P_a_cond(i, a, b, self.V)
                if sample_exact_cond:
                    a[i], _ = sampler_exact_convex_quartic(P=P_a_i,
                                                           random_state=rng)
                else:
                    a[i] = sampler_mala(a[i],
                                        V=P_a_i,
                                        sigma=0.01,
                                        nb_steps=nb_mala_steps,
                                        random_state=rng)

                # b_i | ... propto x^(shape-1) * exp - P_b_i
                if i < N:
                    P_b_i = 0.5 * self.beta * N * P_b_cond(i, a, b, self.V)
                    b[i], _ = sampler_exact_convex_quartic(P=P_b_i,
                                                           shape=0.5 *
                                                           self.beta * (N - i),
                                                           random_state=rng)

            if return_chain_of_eig_vals:
                eig_vals[:,
                         p] = la.eigvalsh_tridiagonal(a[1:N + 1],
                                                      np.sqrt(b[1:N]))
            elif return_chain_of_lambda_max:
                lambda_max[p] = la.eigvalsh_tridiagonal(
                    a[1:N + 1],
                    np.sqrt(b[1:N]),
                    select='i',
                    select_range=(N - 1, N - 1))[0]

        if return_chain_of_eig_vals:
            return eig_vals
        if return_chain_of_lambda_max:
            return lambda_max

        return la.eigvalsh_tridiagonal(a[1:N + 1], np.sqrt(b[1:N]))
示例#23
0
def comp_modes(dh, N2, f0=1.0, eivec=False, wmode=False, diag=False):
  '''
  Compute eigenvalues (and eigenvectors) of the sturm-liouville
  equation 

       d  ( f^2  d     )     1
       -- ( ---  -- psi)  + ---- psi = 0
       dz ( N^2  dz    )    Rd^2

  for a given stratification

  The eigenvectors correspond to the matrices for the mode/layer
  conversion

  mod2lay[:,0] is the barotropic mode: should be 1..1
  mod2lay[:,i] is the ith baroclinic mode

  -To convert from physical to modal:

  u_mod = np.dot(lay2mod[:,:],u_lev) # if u_lev is 1D
  u_mod = np.einsum('ij,jkl->ikl',lay2mod,u_lev) # if u_lev is 3D
  u_mod = np.einsum('ijkl,jkl->ikl',lay2mod,u_lev) #if u_lev is 3D and N2 variable

  -To go back to the physical space:

  u_lev = np.dot(mod2lay[:,:],u_mod)
  u_lev = np.einsum('ij,jkl->ikl',mod2lay,u_mod) # if u_mod is 3D
  u_lev = np.einsum('ijkl,jkl->ikl',mod2lay,u_mod) #if u_mod is 3D and N2 variable

  the w_modes are related to the p_modes by
  w_modes = -1/N2 d p_modes/dz


  Parameters
  ----------

  dh : array [nz]
  N2 : array [nz (,ny,nx)]
  f0 : scalar or array [(ny,nx)]
  eivec : Bool
  wmode : Bool
  diag : Bool
    Use transformation matrix to solve a symetric matrix

  Returns
  -------
  
  if eivec == T
  Rd: array [nz (,ny,nx)]
  lay2mod: array [nz,nz (,ny,nx)]
  mod2lay: array [nz,nz (,ny,nx)]

  if eivec == F
  Rd: array [nz (,ny,nx)]

  '''

  N2,f0 = reshape3d(dh,N2,f0)
  nl,si_y,si_x = N2.shape
  
  mat_format = "dense"
  if diag:
    mat_format = "sym_diag"

  S = gamma_stretch(dh,N2,f0,wmode=wmode,squeeze=False,mat_format=mat_format)

  nlt = (N2 == 0).argmax(axis=0)
  nlt = np.where(nlt == 0,nl,nlt)

  # put variables in right format
  Ht = np.cumsum(dh)
#  Ht = np.sum(dh)
  dhi = 0.5*(dh[1:] + dh[:-1])
  dhcol = dh[:,None]
  dhicol = dhi[:,None]


  if wmode:
    Rd = np.zeros((nl,si_y,si_x))
    if eivec:
      mod2lay = np.zeros((nl,nl,si_y,si_x))
      lay2mod = np.zeros((nl,nl,si_y,si_x))
  else:
    nlt = nlt + 1
    Rd = np.zeros((nl+1,si_y,si_x))
    if eivec:
      mod2lay = np.zeros((nl+1,nl+1,si_y,si_x))
      lay2mod = np.zeros((nl+1,nl+1,si_y,si_x))

  for j,i in np.ndindex((si_y,si_x)):

    if eivec:
      if diag:
        iRd2, eigs = la.eigh_tridiagonal(S[1,:nlt[j,i],j,i], S[0,1:nlt[j,i],j,i])
        eigr = S[2,:nlt[j,i],j,i,None]*eigs # D*w
        eigl = eigs/S[2,:nlt[j,i],j,i,None] # w*D^-1 if eigenvectors are stored in lines but eigl is eigl.T so we do D^-1*w
      else:
        iRd2, eigl,eigr= la.eig(S[:nlt[j,i],:nlt[j,i],j,i],left=True)
    else:
      if diag:
        iRd2 = la.eigvalsh_tridiagonal(S[1,:nlt[j,i],j,i], S[0,1:nlt[j,i],j,i])
      else:
        iRd2 = la.eig(S[:nlt[j,i],:nlt[j,i],j,i],right=False)
  
    iRd2 = -iRd2.real
    idx = np.argsort(iRd2)
  
    iRd2 = iRd2[idx]
    with np.errstate(divide='ignore', invalid='ignore'):
      Rd_loc = 1./np.sqrt(iRd2)

    Rd[:nlt[j,i],j,i] = Rd_loc

    if eivec:  
      eigl = eigl[:,idx]
      eigr = eigr[:,idx]
    
      # Normalize eigenvectors
      N2col = N2[:nlt[j,i],j,i][:,None]
      cm = Rd_loc[:nlt[j,i],None]*f0[j,i]
  
      if wmode:
        scap = np.sum(dhi[:nlt[j,i],None]*eigr*eigr*N2col*cm.T**2,0)
        Htt = Ht[nlt[j,i]]
      else:
        scap = np.sum(dh[:nlt[j,i],None]*eigr*eigr,0)
        Htt = Ht[nlt[j,i]-1]
      flip = np.sign(eigr[0,:])
      eigr = eigr*np.sqrt(Htt/scap)*flip
      

      # # scalar product
      # if wmode:
      #   check = np.sum(N2col.T*eigr[:,1]*eigr[:,1]*dhicol.T*(Rd_loc[1]*f0[j,i])**2)
      # else:
      #   check = np.sum(dhcol.T*eigr[:,2]*eigr[:,2])/Ht
  
      if diag:
        eigl = eigl/np.sqrt(Htt/scap)*flip
      else:
        scap2 =  np.sum(eigl*eigr,0)
        eigl = eigl/scap2

      lay2mod[:nlt[j,i],:nlt[j,i],j,i] = eigl.T
      mod2lay[:nlt[j,i],:nlt[j,i],j,i] = eigr
  
  if eivec:  
    return Rd.squeeze(), lay2mod.squeeze(), mod2lay.squeeze()
  else:
    return Rd.squeeze()
示例#24
0
    def step(self, closure, execute_update=True):
        """Performs a single optimization step.

        Arguments:
            Fvp_fn (callable): A closure that accepts a vector of parameters and a vector of length
                equal to the number of model paramsters and returns the Fisher-vector product.
        """
        info = {}

        # If doing block diag, perform the update for each param group
        params_i = 0
        params_j = 0

        for gi, group in enumerate(self.param_groups):
            params = group['params']
            params_j += len(params)

            state = self.state[gi]
            if len(state) == 0:
                state['step'] = 0
                # Set shrinkage to defaults, i.e. no shrinkage
                state['rho'] = 0.0
                state['diag_shrunk'] = 1.0

            state['step'] += 1

            g = gradients_to_vector(params)

            if 'ng_prior' not in state:
                state['ng_prior'] = torch.zeros_like(g)

            curv_type = group['curv_type']
            if curv_type not in self.valid_curv_types:
                raise ValueError("Invalid curv_type.")

            # Create closure to pass to Lanczos and CG
            if curv_type == 'fisher':
                Fvp_theta_fn = make_fvp_fun_idx(closure, params, params_i,
                                                params_j)
            elif curv_type == 'gauss_newton':
                # Pass indices instead of actual params, since these params should be the same at
                # the model params anyway. Then the closure should set only the subset of params
                # and only return the tmp_params from that subset.
                # This would require that the param groups are order in a specific manner?
                Fvp_theta_fn = make_gnvp_fun_idx(closure, params, params_i,
                                                 params_j)

            num_params = self._numel(gi, params)

            shrinkage_method = group['shrinkage_method']
            lanczos_amortization = group['lanczos_amortization']
            if shrinkage_method == 'lanczos' and (
                    state['step'] - 1) % lanczos_amortization == 0:
                # print ("Computing Lanczos shrinkage at step ", state['step'])
                w = lanczos_iteration(Fvp_theta_fn,
                                      num_params,
                                      k=group['lanczos_iters'])
                rho, diag_shrunk = estimate_shrinkage(w, num_params,
                                                      group['batch_size'])
                state['rho'] = rho
                state['diag_shrunk'] = diag_shrunk

            M = None
            if group['cg_precondition_empirical']:
                # Empirical Fisher is g * g
                M = (g * g + group['cg_precondition_regu_coef'] *
                     torch.ones_like(g))**group['cg_precondition_exp']

            # Do CG solve with hvp fn closure
            extract_tridiag = group['shrinkage_method'] == 'cg'
            cg_result = cg_solve(Fvp_theta_fn,
                                 g.data.clone(),
                                 x_0=group['cg_prev_init_coef'] *
                                 state['ng_prior'],
                                 M=M,
                                 cg_iters=group['cg_iters'],
                                 cg_residual_tol=group['cg_residual_tol'],
                                 shrunk=group['shrinkage_method'] is not None,
                                 rho=state['rho'],
                                 Dshrunk=state['diag_shrunk'],
                                 extract_tridiag=extract_tridiag)

            if extract_tridiag:
                # print ("Computing CG shrinkage at step ", state['step'])
                ng, (diag_elems, off_diag_elems) = cg_result
                w = eigvalsh_tridiagonal(diag_elems, off_diag_elems)
                rho, diag_shrunk = estimate_shrinkage(w, num_params,
                                                      group['batch_size'])
                state['rho'] = rho
                state['diag_shrunk'] = diag_shrunk
            else:
                ng = cg_result

            state['ng_prior'] = ng.data.clone()

            # Normalize NG
            lr = group['lr']
            alpha = torch.sqrt(torch.abs(lr / (torch.dot(g, ng) + 1e-20)))

            # Unflatten grad
            vector_to_gradients(ng, params)

            if execute_update:
                # Apply step
                for p in params:
                    if p.grad is None:
                        continue
                    d_p = p.grad.data
                    p.data.add_(-alpha, d_p)

            params_i = params_j
            info[gi] = dict(alpha=alpha, delta=lr, natural_grad=ng)

        return info