def GenerateData(decoy): is_mached = False from scipy.stats import t while is_mached !=True: test_v_a = t.rvs(df, location, scale, 1) test_v_b = t.rvs(df, location, scale, 1) test_v_d = t.rvs(df, location, scale, 1) #test_v_a = np.random.normal(guass_mu, guass_sigma, 1) #test_v_b = np.random.normal(guass_mu, guass_sigma, 1) #test_v_d = np.random.normal(guass_mu, guass_sigma, 1) test_p_a = np.random.beta(beta_a, beta_b, 1) test_p_b = np.random.beta(beta_a, beta_b, 1) test_p_d = np.random.beta(beta_a, beta_b, 1) if decoy == 0: if (test_p_a[0] > test_p_d[0]) and (test_p_d[0] > test_p_b[0]) and (test_v_b[0] > test_v_a[0]) and (test_v_a[0] > test_v_d[0]): is_mached = True return round(test_v_a[0], 2), round(test_v_b[0], 2), round(test_v_d[0], 2), round(test_p_a[0], 2), round(test_p_b[0], 2), round(test_p_d[0],2) else: is_mached = False else: if (test_p_a[0] > test_p_b[0]) and (test_p_b[0] > test_p_d[0]) and (test_v_b[0] > test_v_d[0]) and (test_v_d[0] > test_v_a[0]): is_mached = True return round(test_v_a[0], 2), round(test_v_b[0], 2), round(test_v_d[0], 2), round(test_p_a[0], 2), round(test_p_b[0], 2), round(test_p_d[0],2) else: is_mached = False
def GenerateData(): is_mached = False from scipy.stats import t while is_mached !=True: test_v_a = t.rvs(df, location, scale, 1) test_v_b = t.rvs(df, location, scale, 1) test_v_d = t.rvs(df, location, scale, 1) #test_v_a = np.random.normal(guass_mu, guass_sigma, 1) #test_v_b = np.random.normal(guass_mu, guass_sigma, 1) #test_v_d = np.random.normal(guass_mu, guass_sigma, 1) test_p_a = np.random.beta(beta_a, beta_b, 1) test_p_b = np.random.beta(beta_a, beta_b, 1) test_p_d = np.random.beta(beta_a, beta_b, 1) #test_e_a = test_p_a * test_v_a #test_e_b = test_p_b * test_v_b #test_e_d = test_p_d * test_v_d #return test_v_a[0], test_v_b[0], test_v_d[0], test_p_a[0], test_p_b[0], test_p_d[0] #return round(test_v_a[0], 2), round(test_v_b[0], 2), round(test_v_d[0], 2), round(test_p_a[0], 2), round(test_p_b[0], 2), round(test_p_d[0],2) #''' #if (test_p_a[0] > test_p_b[0]) and (test_p_b[0] > test_p_d[0]) and (test_v_b[0] > test_v_d[0]) and (test_v_d[0] > test_v_a[0]): if (test_p_a[0] > test_p_d[0]) and (test_p_d[0] > test_p_b[0]) and (test_v_b[0] > test_v_a[0]) and (test_v_a[0] > test_v_d[0]): #if (test_p_a[0] > test_p_d[0]) and (test_p_d[0] > test_p_b[0]) and (test_v_b[0] > test_v_a[0]) and (test_v_a[0] > test_v_d[0]) and (abs(test_e_a - test_e_b) < 0.1): #if (test_p_a[0] > test_p_b[0]) and (test_p_b[0] > test_p_d[0]) and (test_v_b[0] > test_v_d[0]) and (test_v_d[0] > test_v_a[0]): #if (test_p_a[0] > test_p_b[0]) and (test_p_b[0] > test_p_d[0]) and (test_v_b[0] > test_v_a[0]) and (test_v_a[0] > test_v_d[0]): is_mached = True return round(test_v_a[0], 2), round(test_v_b[0], 2), round(test_v_d[0], 2), round(test_p_a[0], 2), round(test_p_b[0], 2), round(test_p_d[0],2) #return test_v_a[0], test_v_b[0], test_v_d[0], test_p_a[0], test_p_b[0], test_p_d[0] else: is_mached = False
def pred_dist_rvs(pred_params: pd.DataFrame, n_samples: int, seed: int): """ Function that draws n_samples from a predicted response distribution. pred_params: pd.DataFrame Dataframe with predicted distributional parameters. n_samples: int Number of sample to draw from predicted response distribution. seed: int Manual seed. Returns ------- pd.DataFrame with n_samples drawn from predicted response distribution. """ pred_dist_list = [] for i in range(pred_params.shape[0]): pred_dist_list.append( student_t.rvs(loc=pred_params.loc[i, "location"], scale=pred_params.loc[i, "scale"], df=pred_params.loc[i, "nu"], size=n_samples, random_state=seed)) pred_dist = pd.DataFrame(pred_dist_list) return pred_dist
def ts(self, n): Z = tdist.rvs(1/self.gamma, size=n) X = np.empty(n) X[0] = self.beta / (1 - self.alpha) # Stationary mean for t in range(1, n): X[t] = self.beta + self.alpha * X[t-1] + self.s * Z[t] return X
def rvs(self, n): #return np.random.randn(n) * self.sigma + self.mu from scipy.stats import t #[np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)]) ret = t.rvs(self.nu,loc=0,scale=self.A, size=n) ret[ret<0] = 0 return ret
def calibrate(self, week_num): # 61 >= week_num >= 53 mu = [np.mean(item) for item in np.asarray(self.log_return)[:, self.index[week_num-53]:self.index[week_num]]] self.mu = mu cov_matrix = np.cov(np.asarray(self.log_return)[:, self.index[week_num-53]:self.index[week_num]]) self.cov_matrix = cov_matrix ## Fitted by normal X_normal = np.random.multivariate_normal(mu, cov_matrix, 1000) L = np.array( [-sum( self.lambda_dict[self.end_date[week_num - 1]] * np.asarray(self.price_list)[:, self.index[week_num]] * (np.exp(np.asarray(X_normal[i])) - 1) ) for i in range(len(X_normal[:, 0])) ] ) weight = 1 / 15 L_delta = np.array([sum(-weight * self.V_t[week_num] * np.asarray(X_normal[i])) for i in range(len(X_normal[:, 0]))] ) VaR = np.percentile(L_delta, 0.95); ## Fitted by t-student L_act = [-(self.V_t[self.index[i+1]] - self.V_t[self.index[i]]) for i in range(week_num-53, week_num)] parameters = t.fit(L_act) L_t = t.rvs(parameters[0], parameters[1], parameters[2], 1000) return [L, L_delta, L_t]
def tStudentBrownianMotion(self, lineCorr=True, df=1): output = {'process': [], 'increments': []} for i in range(0, self.n): x0 = np.asarray(0) r = t.rvs(size=x0.shape + (self.steps - 1, ), scale=np.sqrt(self.dt), df=df) r = np.insert(r, 0, 0) out = np.empty(r.shape) np.cumsum(r, axis=-1, out=out) out += np.expand_dims(x0, axis=-1) output['process'].append(out) output['increments'].append(r) if lineCorr: output['process'] = listInterpreter(output['process']) output['increments'] = listInterpreter(output['increments']) return namedtuple( 'Output', ['process', 'increments'])(**{ "process": output['process'], "increments": output['increments'] })
def invLogLamSample(logLam0, a, b, B_lam,sigma_mass, z,logRich, size = 100): #NOTE Returns ln(M), not log10(M)! This is how the formula is defined! mu = invLogLam(logLam0, a, b, B_lam, z, logRich) if sigma_mass == 0: return mu return np.array([t.rvs(df, loc = m, scale = sigma_mass, size = size)\ for m in mu])#(logRich.shape[0], size)
def simulateDelay(currentNetwork): ''' @description: generates a delay based on the appropriate distribution ''' wifiDelay = [3.0659475327, 14.6918344498] # min and max delay observed for wifi cellularDelay = [4.2531193161, 14.3172883892] # min and max delay observed for 3G if currentNetwork == 1: # wifi # johnson su in python (fitter.Fitter.fit()) and t location-scale in matlab (allfitdist) # in python, error is higher for t compared to johnson su delay = min( max( johnsonsu.rvs(0.29822254217554717, 0.71688524931466857, loc=6.6093350624107909, scale=0.5595970482712973), wifiDelay[0]), wifiDelay[1]) else: # t in python (fitter.Fitter.fit()) and t location-scale in matlab (allfitdist) delay = min( max( t.rvs(0.43925241212097499, loc=4.4877772816533934, scale=0.024357324434644639), cellularDelay[0]), cellularDelay[1]) if DEBUG >= 1: print( colored( "Delay for " + str(availableNetworkName[currentNetwork - 1]) + ": " + str(delay), "cyan")) # input() return delay
def rvs(self, n): # return np.random.randn(n) * self.sigma + self.mu from scipy.stats import t # [np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)]) ret = t.rvs(self.nu, loc=0, scale=self.A, size=n) ret[ret < 0] = 0 return ret
def sample_post(hp, ss): z = _intermediates(hp, ss) l_star = gamma(z.alpha, 1. / z.beta) while True: m_star = t.rvs(2 * z.alpha, z.mu, z.beta / (z.alpha * z.tau))**-1 if m_star > 0: break return (m_star, l_star)
def duplicates(): df = get_duplicate_data() df['error'] = df["POSIX_AGG_PERF_BY_SLOWEST_LOG10"] - df["prediction"] df = df[np.abs(df.error) < np.log10(1.5)] df.time_diff = np.log10(df.time_diff + 0.01) # df.error = np.abs(df.error) cuts = [-np.inf] + list(range(9)) groups = [ df[(df.time_diff >= low) & (df.time_diff < high)].error for low, high in zip(cuts[:-1], cuts[1:]) ] # fit a student t distribution from scipy.stats import t param = t.fit(groups[0]) norm_gen_data = t.rvs(param[0], param[1], param[2], 10000) groups = list(reversed([norm_gen_data] + groups)) labels = list( reversed(["t-distribution fit", "0s to 1s"] + [ "$10^{}s$ to $10^{}s$".format(low, high) for low, high in zip(cuts[1:-1], cuts[2:]) ])) fig, axes = joypy.joyplot(groups, colormap=matplotlib.cm.coolwarm_r, overlap=0.3, linewidth=1., ylim='own', range_style='own', tails=0.2, bins=100, labels=labels, figsize=(2.5, 3)) for idx, ax in enumerate(axes): try: ax.set_yticklabels([labels[idx]], fontsize=8, rotation=120) except: pass ax.set_xlim([-0.2, 0.2]) ax.set_xticks(np.log10([1 / 1.5, 1 / 1.2, 1, 1.2, 1.5])) ax.set_xticklabels([ "$.67\\times$", "$.83\\times$", "$1\\times$", "$1.2\\times$", "$1.5\\times$" ], rotation=90, fontsize=8) plt.xlabel("Error", rotation=180) plt.ylabel("Time ranges") plt.savefig("figures/figure_5.pdf", dpi=600, bbox_inches='tight', pad_inches=0)
def sample_post(hp, ss): z = _intermediates(hp, ss) l_star = gamma(z.alpha, 1. / z.beta) while True: m_star = t.rvs(2 * z.alpha, z.mu, z.beta / (z.alpha * z.tau)) ** -1 if m_star > 0: break return (m_star, l_star)
def rt(n,df,ncp=0): """ Generates random variables from the t-distribution """ from scipy.stats import t,nct if ncp==0: result=t.rvs(size=n,df=df,loc=0,scale=1) else: result=nct.rvs(size=n,df=df,nc=ncp,loc=0,scale=1) return result
def get_Y(X, beta, noise_std, noise_distr): if noise_distr == 'gaussian': return X @ beta + noise_std * np.random.randn(X.shape[0]) elif noise_distr == 't': # student's t w/ 3 degrees of freedom return X @ beta + noise_std * t.rvs(df=3, size=X.shape[0]) elif noise_distr == 'gaussian_scale_var': # want variance of noise to scale with squared norm of x return X @ beta + noise_std * np.multiply(np.random.randn(X.shape[0]), np.linalg.norm(X, axis=1)) elif noise_distr == 'thresh': return (X > 0).astype(np.float32) @ beta + noise_std * np.random.randn( X.shape[0])
def generateRandomTSampleMatrix(sims=int, basketSize=1, dof=2.74335149908): """ generate a matrix of n independent variables from t distribution, $Z_1, Z_2$ nb: not available on GPU :param sims: :return: """ return t.rvs(df=dof, size=(sims, basketSize))
def test__fit(self): distribution = StudentTUnivariate() data = t.rvs(size=50000, df=3, loc=1, scale=1) distribution._fit(data) expected = { 'df': 3, 'loc': 1, 'scale': 1, } for key, value in distribution._params.items(): np.testing.assert_allclose(value, expected[key], rtol=0.3)
def sample(self,timestep,num_samples = 1): mean = self.f[timestep] variance = self.Q[timestep] stdev = np.sqrt(variance) if self.obs_discount: df = self.gamma_n[timestep-1] sample_value = student_t.rvs(df = df, loc = mean, scale = stdev,size = num_samples) else: sample_value = norm.rvs(loc = mean, scale = stdev,size=num_samples) return np.squeeze(sample_value)
def computeDelay(self): ''' description: generates a delay for switching between WiFi networks, which is modeled using Johnson’s SU distribution (identified as a best fit to 500 delay values), and delay for switching between WiFi and cellular networks, modeled using Student's t-distribution (identified as best fit to 500 delay values) args: self returns: a delay value ''' wifiDelay = [3.0659475327, 14.6918344498] # min and max delay observed for wifi in some real experiments; used as caps for the delay generated cellularDelay = [4.2531193161, 14.3172883892] # min and max delay observed for 3G in some real experiments; used as caps for the delay generated if networkList[getListIndex(networkList, self.currentNetwork)].getWirelessTechnology() == 'WiFi': delay = min(max(johnsonsu.rvs(0.29822254217554717, 0.71688524931466857, loc=6.6093350624107909, scale=0.5595970482712973), wifiDelay[0]), wifiDelay[1]) else: delay = min(max(t.rvs(0.43925241212097499, loc=4.4877772816533934, scale=0.024357324434644639), cellularDelay[0]), cellularDelay[1]) return delay
def sample(self, x, n, use_stddev=False): mu, nu, alpha, beta = self.B[x, :] scale = np.square(self.w) * max( 0.001, beta * (nu + 1) / (nu * alpha) ) #np.square(self.w) * max(0.01, beta * (nu + 1)/(nu * alpha)) df = 2 * alpha try: q_sa = t.rvs(df=df, loc=mu, scale=scale, size=n) #before Poster except Exception as e: print(e) print(scale) print(mu, nu, alpha, beta) exit() return q_sa
def initialize(self, method='laplace'): # fit mixture of Gaussian to Laplace mog = MoGaussian(num_components=self.marginals[0].num_components) if method.lower() == 'laplace': mog.train(laplace.rvs(size=[1, 10000]), max_iter=100) elif method.lower() == 'student': mog.train(t.rvs(1, size=[1, 10000]), max_iter=100) else: raise ValueError('Unknown initialization method \'{0}\'.'.format(method)) for m in self.marginals: m.priors = mog.priors.copy() m.scales = mog.scales.copy() m.means = mog.means.copy()
def ttest_bayes_ci(x_val, iterations=1000, credible_mass=0.95): """ Originally from https://github.com/tszanalytics/BayesTesting.jl Adapted and extended by Giuseppe Insana on 2019.08.19 Arguments: x_val=array of values iterations=iterations for samples of posterior credible_mass (for HDI highest density interval) Returns: hdi: highest density interval of posterior for specified credible_mass """ num = len(x_val) dof = num - 1 xmean = np.mean(x_val) std_err = np.std(x_val) / np.sqrt(num) t_s = std_err * t.rvs(dof, size=iterations) + xmean hdi = hdi_from_mcmc(t_s, credible_mass=credible_mass) return hdi
def initialize(self, method='laplace'): # fit mixture of Gaussian to Laplace mog = MoGaussian(num_components=self.marginals[0].num_components) if method.lower() == 'laplace': mog.train(laplace.rvs(size=[1, 10000]), max_iter=100) elif method.lower() == 'student': mog.train(t.rvs(1, size=[1, 10000]), max_iter=100) else: raise ValueError( 'Unknown initialization method \'{0}\'.'.format(method)) for m in self.marginals: m.priors = mog.priors.copy() m.scales = mog.scales.copy() m.means = mog.means.copy()
def func(self, *params, n_obs=100, batch_size=1, random_state=None): results = list() params = np.array( params ).reshape(self.param_dim, -1) batches = params.shape[1] # print('Sim:', params) for i in range(0, batches): x = params[0, i] y = params[1, i] # print(x, y) mic_pair = self.choose_rand_mic_pair() itd = self.get_itd(x, y, mic_pair) temp = [] for _ in range(10): temp.append(t.rvs(df=3, scale=0.01, loc=itd)) results.append( [ np.mean(temp), np.std(temp) ] ) # print(results) # print('mean:', np.mean(results, axis=1)) # print('std:', np.std(y_obs, axis=1)) return results
def is_t_distributed(X, K=500): # get t parameters nu, mu, sigma = t.fit(X, loc=X.mean(), scale=X.std()) # Kolmogorov-Smirnoff of original sample stat0, _ = kstest(X.to_numpy(), t.cdf, args=(nu, )) # distribution d = [] for k in range(K): # generate tsample = t.rvs(nu, loc=mu, scale=sigma, size=X.shape[0]) # KS stat, _ = kstest(tsample, t.cdf, args=(nu, )) d.append(stat) d = np.array(d) # compute pvalue pvalue = (np.sum(d > stat0) + 1) / (d.shape[0] + 1) return Distr(pvalue)
def main(): # Build model print('Loading model ...\n') net = DnCNN(channels=1, num_of_layers=opt.num_of_layers) device_ids = [0] model = nn.DataParallel(net, device_ids=device_ids).cuda() model.load_state_dict( torch.load(os.path.join(opt.logdir, 'model_Best.pth'))) model.eval() # load data info print('Loading data info ...\n') files_source = glob.glob(os.path.join('data', opt.test_data, '*.png')) files_source.sort() # process data psnr_test = 0 df = 5.2 for f in files_source: # image Img = cv2.imread(f) Img = normalize(np.float32(Img[:, :, 0])) Img = np.expand_dims(Img, 0) Img = np.expand_dims(Img, 1) ISource = torch.Tensor(Img) # noise # noise = torch.FloatTensor(ISource.size()).uniform_(-1.732*opt.test_noiseL/255., 1.732*opt.test_noiseL/255.) # noisy image flatSize = getSize(ISource) noise = torch.FloatTensor(t.rvs(df, size=flatSize)) noise = noise.view(ISource.size()) INoisy = ISource + noise ISource, INoisy = Variable(ISource.cuda()), Variable(INoisy.cuda()) with torch.no_grad(): # this can save much memory Out = torch.clamp(INoisy - model(INoisy), 0., 1.) ## if you are using older version of PyTorch, torch.no_grad() may not be supported # ISource, INoisy = Variable(ISource.cuda(),volatile=True), Variable(INoisy.cuda(),volatile=True) # Out = torch.clamp(INoisy-model(INoisy), 0., 1.) psnr = batch_PSNR(Out, ISource, 1.) psnr_test += psnr print("%s PSNR %f" % (f, psnr)) psnr_test /= len(files_source) print("\nPSNR on test data %f" % psnr_test)
def test_tstudent(self): from scipy.stats import t import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) df = 2.74 mean, var, skew, kurt = t.stats(df, moments='mvsk') x = np.linspace(t.ppf(0.01, df), t.ppf(0.99, df), 100) ax.plot(x, t.pdf(x, df), 'r-', lw=5, alpha=0.6, label='t pdf') rv = t(df) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = t.ppf([0.001, 0.5, 0.999], df) np.allclose([0.001, 0.5, 0.999], t.cdf(vals, df)) r = t.rvs(df, size=1000) ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) self.assertEqual(str(ax), "AxesSubplot(0.125,0.11;0.775x0.77)")
def test(self): n_iter = 100 n_samples = [ 10, 20, 50, 100, 200, 300, 400, 500, 800, 1000, 2000, 3000, 5000, 8000, 10000, 20000, 50000, 100000 ] for n in n_samples: x = t.rvs(df=2, size=(n, 2)) x_ = x[:, 0] y_ = x[:, 1] + np.sign(x[:, 0]) * np.abs(x[:, 0])**1.3 np.testing.assert_allclose(pearson(x_, y_), tuple(pearsonr(x_, y_))) np.testing.assert_allclose(spearman(x_, y_), tuple(spearmanr(x_, y_))) np.testing.assert_allclose(kendall(x_, y_), kendalltau(x_, y_)[0]) t0 = time.time() for i in range(n_iter): spearman(x_, y_) t1 = (time.time() - t0) / n_iter t0 = time.time() for i in range(n_iter): spearmanr(x_, y_) t2 = (time.time() - t0) / n_iter self.assertLess(t1, t2) if n > 20: x[:10, :] = 0.0 np.testing.assert_allclose(pearson(x_, y_), tuple(pearsonr(x_, y_))) np.testing.assert_allclose(spearman(x_, y_), tuple(spearmanr(x_, y_)))
def rt(n=1, df=1, loc=0, scale=1, ncp=None): """ Creates an array of random numbers from a t distribution, where you can specify the number of items, and the degrees of freedom. ARGS: --------------------- :param n (int): size of the array :param df (float): degrees of freedom :param loc: array_like, optional location parameter (default=0) :param scale: float, optional scale (default=1) :param ncp (float): non-centrality parameter delta. Currently not implemented. RETURN: --------------- :return: returns an array of random numbers EXAMPLES: -------------------- rt() # returns a random number from a the t # distribution (df=1) rt(10) # returns 10 such random numbers rt(10, df=15) # returns 10 random numbers from a t # distribution with 15 degrees of freedom. """ # ========================================================================== return t.rvs(df=df, loc=loc, scale=scale, size=n)
lim_mult = 0.25 x_lim = [np.min(x) - lim_mult * x_rang, np.max(x) + lim_mult * x_rang] #y_lim = [np.min(y) - lim_mult*y_rang, np.max(y) + lim_mult*y_rang] y_lim = [-10, 40] x_post_pred = np.linspace(x_lim[0], x_lim[1], 20) # Define matrix for recording posterior predicted y values at each x value. # One row per x value, with each row holding random predicted y values. post_samp_size = len(b1) y_post_pred = np.zeros((len(x_post_pred), post_samp_size)) # Define matrix for recording HDI limits of posterior predicted y values: y_HDI_lim = np.zeros((len(x_post_pred), 2)) # Generate posterior predicted y values. # This gets only one y value, at each x, for each step in the chain. for chain_idx in range(post_samp_size): y_post_pred[:,chain_idx] = t.rvs(df=np.repeat([tdf_samp[chain_idx]], [len(x_post_pred)]), loc = b0[chain_idx] + b1[chain_idx] * x_post_pred, scale = np.repeat([sigma[chain_idx]], [len(x_post_pred)]), size=len(x_post_pred)) for x_idx in range(len(x_post_pred)): y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx]) # Display believable beta0 and b1 values plt.figure() thin_idx = 5 plt.plot(b1[::thin_idx], b0[::thin_idx], '.') plt.ylabel("Intercept") plt.xlabel("Slope") plt.savefig('Figure_16.x0.png') # Display the posterior of the b1:
def rvs(self, n): from scipy.stats import t ret = t.rvs(self.nu, loc=self.mu, scale=self.sigma, size=n) return ret
def sampler_student_t(df, loc, scale): return t.rvs(df, loc = loc, scale = scale)
plt.style.use('seaborn') from ARPM_utils import save_plot from HistogramFP import HistogramFP from Tscenarios import Tscenarios # input parameters n_ = 100 # number of variables j_ = 5000 # number of simulations nu = 5 # degrees of freedom # - # ## Generate iid t-draws X_ = t.rvs(nu, size=(n_, j_)) # ## Generate uncorrelated t-draws optionT = namedtuple('option', 'dim_red stoc_rep') optionT.dim_red = 0 optionT.stoc_rep = 0 X = Tscenarios(nu, zeros((n_, 1)), eye(n_), j_, optionT, 'Chol') # ## Compute the simulations of the sums Y_ = ones((1, n_)) @ X_ Y = ones((1, n_)) @ X # ## Plot normalized histograms and pdf's of the normal and t distributions
def q_learning(num_episodes, discount_factor=0.9, alpha=0.1, ordinal_error = 0.0, epsilon_start=1.0, epsilon_end=0.05, epsilon_decay_steps=500): Q = defaultdict(lambda: np.zeros(num_action)) # The epsilon decay schedule epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) policy = make_epsilon_greedy_policy(Q, num_action) epsilon = epsilon_start for i_episode in range(num_episodes): from scipy.stats import t value_v_a = t.rvs(df, location, scale, num_size) value_v_b = t.rvs(df, location, scale, num_size) value_v_d = t.rvs(df, location, scale, num_size) #value_v_a = np.random.normal(guass_mu, guass_sigma, num_size) #value_v_b = np.random.normal(guass_mu, guass_sigma, num_size) #value_v_d = np.random.normal(guass_mu, guass_sigma, num_size) value_p_a = np.random.beta(beta_a, beta_b,num_size) value_p_b = np.random.beta(beta_a, beta_b,num_size) value_p_d = np.random.beta(beta_a, beta_b,num_size) value_v_a = np.round(value_v_a, 2) value_v_b = np.round(value_v_b, 2) value_v_d = np.round(value_v_d, 2) value_p_a = np.round(value_p_a, 2) value_p_b = np.round(value_p_b, 2) value_p_d = np.round(value_p_d, 2) value_e_a = value_p_a * value_v_a value_e_b = value_p_b * value_v_b value_e_d = value_p_d * value_v_d max_EV = np.zeros(num_size) t_length = 0 for i_sample in range(num_size): state = 0 max_EV[i_sample] = np.max([value_e_a[i_sample], value_e_b[i_sample], value_e_d[i_sample]]) for t_length in itertools.count(): # Epsilon for this time step epsilon = epsilons[min(i_episode, epsilon_decay_steps-1)] action_probs = policy(state, epsilon) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) next_state, reward, done = Env_calculate_transition_prob(i_sample, state, action, ordinal_error, value_v_a, value_v_b, value_v_d, value_p_a, value_p_b, value_p_d) best_next_action = np.argmax(Q[next_state]) td_target = reward + discount_factor * Q[next_state][best_next_action] td_delta = td_target - Q[state][action] #print(td_delta) Q[state][action] += alpha * td_delta if done or t_length == 10: break state = next_state return Q
def _noise(n, df=np.inf): if df == np.inf: return np.random.standard_normal(n) else: sd_t = np.std(tdist.rvs(df,size=50000)) return tdist.rvs(df, size=n) / sd_t
beta = c_rank[:, :3].copy() beta_v = np.zeros(shape=(id_num * T_num, 3)) for i in range(T_num): for j in range(3): beta_v[200 * i:200 * (i + 1), j] = beta[200 * i:200 * (i + 1), j] * temp_array[j, i] data_fr = 5 data_mean = 0 data_scale = 0.05 data_size = id_num * T_num epsilon2_it = t_norm.rvs(df=data_fr, loc=data_mean, scale=data_scale, size=data_size) e = beta_v[:, 0] + beta_v[:, 1] + beta_v[:, 2] + epsilon2_it final_data_100['e'] = e ##3 xt import math p = 0.95 data_mean = 0 data_std = math.sqrt(1 - 0.95 * 0.95) data_size = 1 #u1_t_array = norm.rvs(loc=data_mean, scale=data_std, size=data_size)
else: rej = rej + 1 tildexs.append(x_obs[j]) cond_density_log = cond_density_log + q_prop_pdf_log(j, true_vec, ws[j]) + log(1-gamma*min(1,math.exp(acc_ratio_log))) if j+2<=p: true_vec_j = np.concatenate([parallel_chains[j+1,:], ws[0:j]]) alter_vec_j = np.concatenate([parallel_chains[j+1,:], ws[0:j]]) alter_vec_j[j] = ws[j] j_acc_ratio_log = q_prop_pdf_log(j, alter_vec_j, x_obs[j]) + parallel_cond_density_log[j] + parallel_marg_density_log[j] + p_marginal_trans_log(j+2,ws[j+1],ws[j]) - p_marginal_trans_log(j+2,x_obs[j+1],ws[j]) if j+3<=p: j_acc_ratio_log = j_acc_ratio_log + p_marginal_trans_log(j+3,x_obs[j+2],ws[j+1]) - p_marginal_trans_log(j+3,x_obs[j+2],x_obs[j+1]) j_acc_ratio_log = j_acc_ratio_log - (parallel_cond_density_log[j+1] + parallel_marg_density_log[j+1] + q_prop_pdf_log(j, true_vec_j, ws[j])) parallel_cond_density_log[j+1] = parallel_cond_density_log[j+1] + q_prop_pdf_log(j, true_vec_j, ws[j]) + log(1-gamma*min(1,math.exp(j_acc_ratio_log))) if j+3<=p: for ii in range(j+2,p): parallel_cond_density_log[ii] = cond_density_log tildexs.append(rej) return(tildexs) bigmatrix = np.zeros([numsamples,2*p]) rejections = 0 for i in range(numsamples): bigmatrix[i,0] = t.rvs(df=df_t)*math.sqrt((df_t-2)/df_t) for j in range(1,p): bigmatrix[i,j] = math.sqrt(1-rhos[j-1]**2)*t.rvs(df=df_t)*math.sqrt((df_t-2)/df_t) + rhos[j-1]*bigmatrix[i,j-1] knockoff_scep = SCEP_MH_MC(bigmatrix[i,0:p],1,[0]*p,prop_mat,cond_means_coeff, cond_vars) bigmatrix[i,p:(2*p)] = knockoff_scep[0:p] rejections = rejections + knockoff_scep[p] # bigmatrix is an nx2p matrix, each row being an indpendent sample of (X, \tilde X). print("The rejection rate is "+str(rejections/(p*numsamples))+".")
def initialize(self, X=None, method='data'): """ Initializes parameter values with more sensible values. @type X: array_like @param X: data points stored in columns @type method: string @param method: type of initialization ('data', 'gabor' or 'random') """ if self.noise: L = self.A[:, :self.num_visibles] if method.lower() == 'data': # initialize features with data points if X is not None: if X.shape[1] < self.num_hiddens: raise ValueError('Number of data points to small.') else: # whitening matrix val, vec = eig(cov(X)) # whiten data X_ = dot(dot(diag(1. / sqrt(val)), vec.T), X) # sort by norm in whitened space indices = argsort(sqrt(sum(square(X_), 0)))[::-1] # pick 25% largest data points and normalize X_ = X_[:, indices[:max([X.shape[1] / 4, self.num_hiddens])]] X_ = X_ / sqrt(sum(square(X_), 0)) # pick first basis vector at random A = X_[:, [randint(X_.shape[1])]] for _ in range(self.num_hiddens - 1): # pick vector with large angle to all other vectors A = hstack([ A, X_[:, [argmin(max(abs(dot(A.T, X_)), 0))]]]) # orthogonalize and unwhiten A = dot(sqrtmi(dot(A, A.T)), A) A = dot(dot(vec, diag(sqrt(val))), A) self.A = A elif method.lower() == 'gabor': # initialize features with Gabor filters if self.subspaces[0].dim > 1 and not mod(self.num_hiddens, 2): for i in range(self.num_hiddens / 2): G = gaborf(self.num_visibles) self.A[:, 2 * i] = real(G) self.A[:, 2 * i + 1] = imag(G) else: for i in range(len(self.subspaces)): self.A[:, i] = gaborf(self.num_visibles, complex=False) elif method.lower() == 'random': # initialize with Gaussian white noise self.A = randn(num_visibles, num_hiddens) elif method.lower() in ['laplace', 'student', 'cauchy', 'exponpow']: if method.lower() == 'laplace': # approximate multivariate Laplace with GSM samples = randn(self.subspaces[0].dim, 10000) samples = samples / sqrt(sum(square(samples), 0)) samples = laplace.rvs(size=[1, 10000]) * samples elif method.lower() == 'student': samples = randn(self.subspaces[0].dim, 50000) samples = samples / sqrt(sum(square(samples), 0)) samples = t.rvs(2., size=[1, 50000]) * samples elif method.lower() == 'exponpow': exponent = 0.8 samples = randn(self.subspaces[0].dim, 200000) samples = samples / sqrt(sum(square(samples), 0)) samples = gamma(1. / exponent, 1., (1, 200000))**(1. / exponent) * samples else: samples = randn(self.subspaces[0].dim, 100000) samples = samples / sqrt(sum(square(samples), 0)) samples = cauchy.rvs(size=[1, 100000]) * samples if self.noise: # ignore first subspace gsm = GSM(self.subspaces[1].dim, self.subspaces[1].num_scales) gsm.train(samples, max_iter=200, tol=1e-8) for m in self.subspaces[1:]: m.scales = gsm.scales.copy() else: # approximate distribution with GSM gsm = GSM(self.subspaces[0].dim, self.subspaces[0].num_scales) gsm.train(samples, max_iter=200, tol=1e-8) for m in self.subspaces: m.scales = gsm.scales.copy() else: raise ValueError('Unknown initialization method \'{0}\'.'.format(method)) if self.noise: # don't initialize noise covariance self.A[:, :self.num_visibles] = L
# # mean = 0.2, std = 0.3 #x = np.random.normal(mean, std, (10000,1)) x = np.linspace(-5,5,200) n1 = norm.rvs(loc=mean, scale=std, size=10000) # normal random variable num_data = len(x) sample_mean_n = np.mean(n1) sample_std_n = np.std(n1) pdf_n = norm.pdf(x, loc=mean, scale=std) # normal probability distribution function dof = 2.5 # degree of freedom for student t distribution t1 = t.rvs(10, loc=mean, scale=std, size=10000) # generate student-t random variable sample_mean_t = np.mean(t1) sample_std_t = np.std(t1) #x = np.linspace(t.ppf(0.01, dof, loc=mean, scale=std), t.ppf(0.99, dof, loc=mean, scale=std), 100) pdf_t = t.pdf(x, dof, loc=mean, scale=std) plt.figure(figure_count) figure_count += 1 plt.plot(x, pdf_t, 'r-', lw=2, alpha=0.6, label='t pdf, dof=2.5') plt.plot(x, pdf_n, 'k-', lw=2, alpha=0.6, label='normal pdf') #ax.hist(t1, normed=True, histtype='stepfilled', alpha=0.2) plt.legend(loc='best', frameon=False) plt.show() # Calculate mean cumsum = 0;
from scipy.stats import f ## # discussion items # 1. Show histogram of all distributions def plot_sample_hist(sample, title): plt.figure() plt.title(title) plt.hist(sample) sample = norm.rvs(size=1000) plot_sample_hist(sample, 'normal distribution') sample = expon.rvs(size=1000) plot_sample_hist(sample, 'exponential distribution') sample = binom.rvs(10, 0.5, size=1000) plot_sample_hist(sample, 'binomial distribution') sample = chi2.rvs(10, size=1000) plot_sample_hist(sample, 'chi-square distribution') sample = t.rvs(10, size=1000) plot_sample_hist(sample, 't distribution') sample = f.rvs(10, 20, size=1000) plot_sample_hist(sample, 'f distribution')
def _noise(n, df=np.inf): if df == np.inf: return np.random.standard_normal(n) else: sd_t = np.std(tdist.rvs(df, size=50000)) return tdist.rvs(df, size=n) / sd_t
def simulate_latent_space(t, labels, seed=None, var=.2, split_prob=.1, gap=.75): """ Simulate splitting events in the latent space. The input time t is a one dimensional array having the times in it. The labels is a int array-like, which holds the labels for the wanted cell types. Basically it is an array of repetitions of 1 to number of cell types, e.g.: array([1..1,2..2,3..3,4..4]) for 4 cell types. :param array_like t: the time as [nx1] array, where n is the number of cells. :param array_like labels: the labels for the cells before splitting. :param int seed: the seed for this splitting, for reproducability. :param scalar var: the variance of spread of the first split, increasing after that. :param [0,1] split_prop: probability of split in the beginning, halfs with each split. :param [0,1] gap: the gap size between splitends and the beginning of the next. The method returns Xsim, seed, labels, time:: - Xsim is the two dimensional latent space with splits included. - seed is the seed generated, for reproduceability. - labels are the corrected labels, for split events. - time is the corrected timeline for split events. """ seed = seed or np.random.randint(1000,10000) np.random.seed(seed) n_data = t.shape[0] newlabs = [] assert np.issubdtype(labels.dtype, np.int_) and np.greater(labels, 0).all(), "labels need to be of positive integer dtype, 0 is not allowed" ulabs = [] for x in range(n_data): if labels[x] not in ulabs: ulabs.append(labels[x]) Xsim = np.zeros((n_data, 2)) split_ends = [Xsim[0]] prev_ms = [[.1,.1]] split_end_times = [t[labels==ulabs[0]].max()] t = np.sort(t.copy(), 0) tmax = t.max() for lab in ulabs: fil = (lab==labels).nonzero()[0] # zero out, for simulating linear relation within cluster: new_se = [] new_m = [] new_set = [] splits = np.array_split(fil, len(split_ends)) i = 1 for s in range(len(split_ends)): # for all previously done splits: prev_m = prev_ms[s] split = splits[s] split_end = split_ends[s] split_end_time = split_end_times[s] pre_theta = None prev_split_time = None for split in np.array_split(split, np.random.binomial(1, split_prob)+1): newlabs.extend(["{} {}".format(_c, i) for _c in labels[split]]) i += 1 # If we split a collection into two, we want the two times to match up now: if prev_split_time is None: prev_split_time = t[split].ptp() else: t[split.min():] -= prev_split_time t[split] -= (t[split.min()]-split_end_time) # make splits longer, the farther in we are into # the split process, it scales with sqrt(<split#>) x = t[split].copy() x -= x.min() x /= x.max() x *= np.sqrt(lab) # rotate m away a little from the previous direction: if pre_theta is None: pre_theta = theta = np.random.uniform(-45, 45) else: theta = ((pre_theta+90)%90)-90 theta *= (np.pi/180.) # radians for rotation matrix rot_m = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) m = np.dot(rot_m, prev_m) # later splits have bigger spread: v = (x.mean(0) - np.abs((-x+x.mean(0)))) v -= v.min(0)-1e-6 v /= v.max(0) v *= var*t[split]/tmax # make the split Xsim[split] = np.random.normal(split_end + m*x, v) # put a gap between this and the next split: p = m*x[-1] #p /= np.sqrt(GPy.util.linalg.tdot(p)) # save the new sets of splits new_se.append(split_end + (1+gap)*p) new_m.append(m) new_set.append(t[split.max()]) split_ends = new_se prev_ms = new_m split_end_times = new_set # The split probability goes up every time the cell stage changes: split_prob = min(1., split_prob*2) Xsim -= Xsim.mean(0) Xsim /= Xsim.std(0) #Xsim += np.random.normal(0,var,Xsim.shape) from scipy.stats import t as tdist Xsim += tdist.rvs(3, loc=0, scale=.1*var, size=Xsim.shape) #Add outliers return Xsim, seed, np.asarray(newlabs), t