def test_studentT_likelihood(df: float, loc: float, scale: float): dfs = torch.zeros((NUM_SAMPLES, )) + df locs = torch.zeros((NUM_SAMPLES, )) + loc scales = torch.zeros((NUM_SAMPLES, )) + scale distr = StudentT(df=dfs, loc=locs, scale=scales) samples = distr.sample() init_bias = [ inv_softplus(df - 2), loc - START_TOL_MULTIPLE * TOL * loc, inv_softplus(scale - START_TOL_MULTIPLE * TOL * scale), ] df_hat, loc_hat, scale_hat = maximum_likelihood_estimate_sgd( StudentTOutput(), samples, init_biases=init_bias, num_epochs=15, learning_rate=1e-3, ) assert (np.abs(df_hat - df) < TOL * df), f"df did not match: df = {df}, df_hat = {df_hat}" assert (np.abs(loc_hat - loc) < TOL * loc), f"loc did not match: loc = {loc}, loc_hat = {loc_hat}" assert (np.abs(scale_hat - scale) < TOL * scale ), f"scale did not match: scale = {scale}, scale_hat = {scale_hat}"
class EpsiSampler: def __init__(self, x, epsi_nu): self.x = x self.len = self.x.shape[0] self.epsi_nu = epsi_nu self.tdistribution = StudentT(self.epsi_nu) def epsisamp(self, epsi, tau, mu): # assumes no covariance between epsilons; does not sample as a single block # Newton-Raphson iterations to find proposal density mu_f, hf, hf_inv = self.epsi_nr(epsi, mu, tau) # now propose with multivariate t centered at epsiMLE with covariance matrix from Hessian # note that since Hessian is diagonal, we can just simulate from n univariate t's. epsi_p = mu_f + hf_inv.neg().sqrt() * self.tdistribution.sample( torch.Size([self.len, 1])) # epsi_p = torch.randn(mu_f, -hf_inv) arat = self.pratepsi(epsi, epsi_p, tau, mu) + \ tqrat(epsi, epsi_p, mu_f, mu_f, hf_inv.neg().sqrt(), hf_inv.neg().sqrt(), self.epsi_nu) ridx = torch.rand(self.len, 1).log() >= arat.clamp(max=0) ridx_float = ridx.type(torch.float32) epsi[~ridx] = epsi_p[~ridx] mrej = (1 - ridx_float).mean() return epsi, mrej # TODO: find out if .exp() legal here def pratepsi(self, epsi, epsi_p, tau, mu): pr = epsi_p * self.x / tau.sqrt() - (mu + epsi_p / tau.sqrt()).exp() - epsi_p ** 2 / 2 - \ (epsi * self.x / tau.sqrt() - (mu + epsi / tau.sqrt()).exp() - epsi ** 2 / 2) return pr def epsi_nr(self, epsi, mu, tau): h, h_inv = 0, 0 for i in range(1, 100): h, h_inv = self.hessepsi(epsi, tau, mu) # N - R update grad = self.gradepsi(epsi, tau, mu) epsi = epsi - h_inv * grad # we've reached a local maximum if grad.norm() < 1e-6: break return epsi, h, h_inv @staticmethod def hessepsi(epsi, tau, mu): h = -(mu + epsi / tau.sqrt()).exp() / tau - 1 h_inv = 1 / h return h, h_inv def gradepsi(self, epsi, tau, mu): gr = self.x / tau.sqrt() - ( mu + epsi / torch.sqrt(tau)).exp() / tau.sqrt() - epsi return gr
def _reweight(self, N=100000): # Expect value: \mathbb{E}_{x~X}Ramp(|x|) if not hasattr(self, 'epv'): self.Hfunc = self.config.Hfunc # self.Hfunc = 'ramp' if self.real == 'Student': tdist = StudentT(df=self.config.r_df) x = tdist.sample((5000000, )) elif self.real == 'Gaussian': ndist = Normal(0, 1) x = ndist.sample((5000000, )) self.epv = self._HFunc(x, mode=self.Hfunc).mean().item() def sov_func(a, bs=1000): # find a suitable factor a to match expected value. r = AveMeter() for _ in range(N // bs): if self.config.use_ig: ub1 = torch.randn(bs, self.netGXi.input_dim // 2).to(device) ub2 = torch.randn( bs, self.netGXi.input_dim - self.netGXi.input_dim // 2).to(device) ub2.data.div_(torch.abs(ub2.data) + self.config.delta) ub = torch.cat([ub1, ub2], dim=1) else: ub = torch.randn(bs, self.netGXi.input_dim).to(device) with torch.no_grad(): xib = self.netGXi(ub) zb = torch.randn(bs, self.dim).to(device) vu = (zb[:, 0].div_(zb.norm(2, dim=1)) + self.config.delta).to(device) r.update( self._HFunc(a * xib * vu, mode=self.Hfunc).mean().item(), bs) return r.avg - self.epv # if sov_func(1) > 0: down,up= 0,3 # elif sov_func(3) > 0: down,up = 0,5 # elif sov_func(10) > 0: down,up = 1,12 # elif sov_func(25) > 0: down,up = 8,27 # elif sov_func(75) > 0: down,up = 23,77 if sov_func(250) > 0: down, up = 0, 3000 else: logger.info('Factor is larger than 2500!') return 250 factor = bisect(sov_func, down, up) print(factor) return factor