def test_student(self): grp1_mu = 0.0 grp1_sigma = 1.0 grp1_sample_size = 29 grp1_sample = Sample() grp2_mu = 0.09 grp2_sigma = 2.0 grp2_sample_size = 28 grp2_sample = Sample() for i in range(grp1_sample_size): grp1_sample.add_numeric(normal(grp1_mu, grp1_sigma)) for i in range(grp2_sample_size): grp2_sample.add_numeric(normal(grp2_mu, grp2_sigma)) sampling_distribution = MeanDiffSamplingDistribution( grp1_sample_distribution=SampleDistribution(grp1_sample), grp2_sample_distribution=SampleDistribution(grp2_sample)) self.assertEqual(sampling_distribution.distribution_family, DistributionFamily.student_t) testing = MeanDiffTesting(sampling_distribution=sampling_distribution) print('one tail p-value: ' + str(testing.p_value_one_tail)) print('two tail p-value: ' + str(testing.p_value_two_tail)) reject_one_tail, reject_two_tail = testing.will_reject(0.01) print('will reject mean_1 == mean_2 (one-tail) ? ' + str(reject_one_tail)) print('will reject mean_1 == mean_2 (two-tail) ? ' + str(reject_two_tail)) self.assertFalse(reject_one_tail) self.assertFalse(reject_two_tail)
def get_truncated_normal(): global STD_SPEED, MIN_V_SPEED, MAX_V_SPEED speed = STD_SPEED * normal(0.0, 1.0) + get_expected_velocity() while speed < 0 or speed > MAX_V_SPEED or speed < MIN_V_SPEED: speed = STD_SPEED * normal(0.0, 1.0) + get_expected_velocity() return speed
def derive_qois(df_original): df = df_original.copy() ns = df.shape[0] rstar_d = normal(rstar.value, rstare.value, size=ns) * Rsun period = df.p.values if 'p' in df.columns else df.pr.values df['period'] = period df['k_true'] = sqrt(df.k2_true) df['k_app'] = sqrt(df.k2_app) df['cnt'] = 1. - df.k2_app / df.k2_true df['a_st'] = as_from_rhop(df.rho.values, period) df['a_au'] = df.a_st * rstar_d.to(AU) df['inc'] = degrees(i_from_ba(df.b.values, df.a_st.values)) df['t14'] = d_from_pkaiews(period, df.k_true.values, df.a_st.values, radians(df.inc.values), 0.0, 0.0, 1) df['t14_h'] = 24 * df.t14 df['r_app'] = df.k_app.values * rstar_d.to(Rjup) df['r_true'] = df.k_true.values * rstar_d.to(Rjup) df['r_app_point'] = df.k_app.values * rstar.to(Rjup) df['r_true_point'] = df.k_true.values * rstar.to(Rjup) df['r_app_rsun'] = df.k_app.values * rstar_d.to(Rsun) df['r_true_rsun'] = df.k_true.values * rstar_d.to(Rsun) df['teff_p'] = Teq(normal(*star_teff, size=ns), df.a_st, uniform(0.25, 0.50, ns), uniform(0, 0.4, ns)) return df
def get_truncated_normal(self): speed = self.STD_SPEED * normal(0.0, 1.0) + self.get_expected_velocity() while speed < 0 or speed > self.MAX_V_SPEED or speed < self.MIN_V_SPEED: speed = self.STD_SPEED * normal( 0.0, 1.0) + self.get_expected_velocity() return speed
def features(label, means, variances, noisemeans, noisevars): feats = [] fctr = itertools.count() for mean, var in zip(means[label], variances[label]): feats.append(next(fctr)) feats.append(normal(mean, var)) for mean, var in zip(noisemeans, noisevars): feats.append(next(fctr)) feats.append(normal(mean, var)) return feats
def features(label, means, variances, noisemeans, noisevars): feats = [] fctr = itertools.count() for mean,var in zip(means[label], variances[label]): feats.append(next(fctr)) feats.append(normal(mean, var)) for mean,var in zip(noisemeans, noisevars): feats.append(next(fctr)) feats.append(normal(mean, var)) return feats
def normal(mean, std, shape=[]): """normal(mean, std, n) or normal(mean, std, [n, m, ...]) returns array of random numbers randomly distributed with specified mean and standard deviation""" if shape == []: shape = None return mt.normal(mean, std, shape)
def gen_data(b, m, e, n=50): xs = linspace(15, 100, n) ys = m * xs + b if e: ys += normal(scale=e, size=n) # ys[6] = ys[6] + 10 return xs, ys
def derive_qois(data: DataFrame, rstar: tuple = None, teff: tuple = None, distance_unit: Unit = R_jup): df = data.copy() ns = df.shape[0] df['period'] = period = df.p.values if 'p' in df else df.pr.values if 'k2_true' in df: df['k_true'] = sqrt(df.k2_true) if 'k2_app' in df: df['k_app'] = sqrt(df.k2_app) if 'k2_true' in df and 'k2_app' in df: df['cnt'] = 1. - df.k2_app / df.k2_true if 'g' in df: if 'k' in df: df['b'] = df.g * (1 + df.k) elif 'k_true' in df: df['b'] = df.g * (1 + df.k_true) df['a'] = as_from_rhop(df.rho.values, period) df['inc'] = i_from_ba(df.b.values, df.a.values) df['t14'] = d_from_pkaiews(period, df.k_true.values, df.a.values, df.inc.values, 0.0, 0.0, 1) df['t14_h'] = 24 * df.t14 if rstar is not None: from astropy.units import R_sun rstar_d = (normal(*rstar, size=ns) * R_sun).to(distance_unit).value df['r_app'] = df.k_app.values * rstar_d df['r_true'] = df.k_true.values * rstar_d df['a_au'] = df.a * (rstar_d * distance_unit).to(AU) if teff is not None: df['teq_p'] = equilibrium_temperature(normal(*teff, size=ns), df.a, uniform(0.25, 0.50, ns), uniform(0, 0.4, ns)) return df
def test_steepest_iris(): data = iris() x = add_bias(StandardScaler().fit_transform(data['x'])) y = binarize(data['y']) theta = normal(scale=.001, size=(y.shape[1] - 1) * x.shape[1]) theta.shape = y.shape[1] - 1, x.shape[1] c = lambda theta: maxent.model.cost(x, y, theta, 1.) g = lambda theta: maxent.model.grad(x, y, theta, 1.) assert_array_almost_equal([[0.0425072, -1.76158, 1.40147, -2.77042, -2.63817], [2.06606, -0.0531037, -0.120857, -1.19605, -2.26611]], steepest_gd(c, g, theta, max_iter=500, rho=.5)[0], decimal=2)
def real_function(a_0, a_1, noise_sigma, x, covs=[1]): """ Evaluates the real function """ N = len(x) tmpSum = 0 for i in range(len(covs)): tmpSum = tmpSum + covs[i]*pow(x,i) if noise_sigma==0: # Recovers the true function return tmpSum else: return tmpSum + normal(0, noise_sigma, N)
def test_cgd_fr_iris(): data = iris() sc = StandardScaler().fit(data['x']) x = add_bias(sc.transform(data['x'])) y = binarize(data['y']) theta = normal(scale=.001, size=(y.shape[1] - 1) * x.shape[1]) theta.shape = y.shape[1] - 1, x.shape[1] c = lambda theta: maxent.model.cost(x, y, theta, 1.) g = lambda theta: maxent.model.grad(x, y, theta, 1.) assert_array_almost_equal([[0.5157, -1.6937, 1.5391, -2.9251, -2.7841], [2.3700, -0.0502, -0.0128, -1.4165, -2.3897]], conjugate_gd_fr(c, g, theta, max_iter=50)[0], decimal=2)
def test_anova(self): sample = Sample() mu1 = 1.0 sigma1 = 1.0 mu2 = 1.1 sigma2 = 1.0 mu3 = 1.09 sigma3 = 1.0 for i in range(100): sample.add_numeric(normal(mu1, sigma1), 'group1') sample.add_numeric(normal(mu2, sigma2), 'group2') sample.add_numeric(normal(mu3, sigma3), 'group3') testing = Anova(sample=sample) print('p-value: ' + str(testing.p_value)) reject = testing.will_reject(0.01) print('will reject [same mean for all groups] ? ' + str(reject)) self.assertFalse(reject)
def test_newton_iris(): data = iris() x = add_bias(data['x']) y = binarize(data['y']) theta = normal(scale=.001, size=(y.shape[1] - 1)*x.shape[1]) theta.shape = y.shape[1] - 1, x.shape[1] c = lambda theta: maxent.model.cost(x, y, theta, 1.) g = lambda theta: maxent.model.grad(x, y, theta, 1.) h = lambda theta: maxent.model.hessian(x, theta, 1.) assert_array_almost_equal([[17.8988, -0.783738, 1.24289, -3.87904, -1.65902], [11.7486, 0.260549, -0.33588, -1.83314, -2.06362]], newton(c, g, h, theta)[0], decimal=3)
def fit(self, x, y): np.random.seed(self.seed) if len(x.shape) == 1: x = x.reshape(-1, 1) x, y = check_X_y(x, y) self.classes = np.unique(y) self.nclass = self.classes.shape[0] ctab = pd.crosstab(y, list(x.T)).T.reset_index() xdim = x.shape[1] xcols = list(ctab.columns[:xdim]) ycols = list(ctab.columns[xdim:]) xtab = pd.DataFrame(x, columns=xcols) xtab = xtab.merge(ctab, how='left', on=xcols) self.class_priors = xtab[ycols].div(xtab[ycols].sum(axis=1), axis=0).mean().values if self.leave_one_out: xtab[ycols] -= pd.get_dummies(y) xtab[ycols] = xtab[ycols].add(self.class_priors * self.alpha). \ div(xtab[ycols].sum(axis=1) + self.alpha + 1E-15, axis=0) if self.noise > 0: xtab[ycols] = np.abs( xtab[ycols] + normal(0, scale=self.noise, size=xtab[ycols].shape)) xtab[ycols] = xtab[ycols].div(xtab[ycols].sum(axis=1), axis=0) self.x_likelihoods = xtab[ycols].values xtab_agg = xtab.groupby(xcols, as_index=False)[ycols].agg(['mean']).fillna(0) xtab_agg.columns = xtab_agg.columns.get_level_values(1) self.likelihoods = xtab_agg.T.ix['mean'].reset_index( drop=True).T.reset_index() # self.likelihoods = xtab_agg.T.ix['mean'].reset_index(drop=True).to_dict('list') # self.likelihoods_cov = xtab_agg.T.ix['std'].reset_index(drop=True).to_dict('list') # self.likelihoods_cov = dict((k, np.diag(v)) for k, v in self.likelihoods_cov.items()) return self
def test_mean_student(self): mu = 0.0 sigma = 1.0 sample_size = 29 sample = Sample() for i in range(sample_size): sample.add_numeric(normal(mu, sigma)) sampling_distribution = MeanSamplingDistribution(sample_distribution=SampleDistribution(sample)) testing = MeanTesting(sampling_distribution=sampling_distribution, mean_null=0.0) print('one tail p-value: ' + str(testing.p_value_one_tail)) print('two tail p-value: ' + str(testing.p_value_two_tail)) reject_one_tail, reject_two_tail = testing.will_reject(0.01) print('will reject mean = 0 (one-tail) ? ' + str(reject_one_tail)) print('will reject mean = 0 (two-tail) ? ' + str(reject_two_tail)) self.assertFalse(reject_one_tail) self.assertFalse(reject_two_tail)
def add_poisson_gaussian_noise(image, alpha=5, sigma=0.01, sap=0.0, quant_bits=8, dtype=numpy.float32, clip=True, fix_seed=True): if fix_seed: numpy.random.seed(0) rnd = normal(size=image.shape) rnd_bool = uniform(size=image.shape) < sap noisy = image + numpy.sqrt(alpha * image + sigma**2) * rnd noisy = noisy * (1 - rnd_bool) + rnd_bool * uniform(size=image.shape) noisy = numpy.around((2**quant_bits) * noisy) / 2**quant_bits noisy = numpy.clip(noisy, 0, 1) if clip else noisy noisy = noisy.astype(dtype) return noisy
def predict_proba(self, x, noise=False): if len(x.shape) == 1: x = x.reshape(-1, 1) if x.shape[1] != 1: raise ValueError('x must be one dimensional.') xx = pd.DataFrame(x, columns=['x']).merge(self.likelihoods, how='left', left_on='x', right_index=True) xx.drop('x', axis=1, inplace=True) xx.loc[xx.isnull().any(axis=1) | (xx == 0).all(axis=1), :] = self.class_priors if noise: np.random.seed(self.seed) _noise = noise if isinstance(noise, float) else self.noise if _noise > 1E-12: xx = np.abs(xx + normal(0, scale=_noise, size=xx.shape)) xx = xx.div(xx.sum(axis=1), axis=0) # return np.apply_along_axis(self._get_likelihood, 1, x, noise) return xx.values
def test_sgd_iris(): numpy.random.seed(1) data = iris() x = add_bias(StandardScaler().fit_transform(data['x'])) y = binarize(data['y']) theta = normal(scale=.001, size=(y.shape[1] - 1) * x.shape[1]) theta.shape = y.shape[1] - 1, x.shape[1] c = lambda theta: maxent.model.cost(x, y, theta, 1.) g = lambda theta, batch: maxent.model.grad(batch[0], batch[1], theta, 1.) b = MiniBatch(x, y, size=50) stats = {'method': 'sgd'} stats_ = sgd(c, g, b, theta, rho=.5, max_iter=100, stats=stats)[0] assert_array_almost_equal([[-0.27884785, -1.21284649, 0.88989122, -1.77701123, -1.68204016], [ 1.01925233, -0.14850723, -0.41679621, -0.58286958, -1.29281991]], stats_, decimal=3) numpy.random.seed()
def predict_proba(self, x, noise=False): if len(x.shape) == 1: x = x.reshape(-1, 1) x = check_array(x) xx = pd.DataFrame(x, columns=self.likelihoods.columns[:-self.nclass]) xx = xx.merge(self.likelihoods, how='left') xx.drop(xx.columns[:-self.nclass], axis=1, inplace=True) xx.loc[xx.isnull().any(axis=1) | (xx == 0).all(axis=1), :] = self.class_priors if noise: np.random.seed(self.seed) _noise = noise if isinstance(noise, float) else self.noise if _noise > 1E-12: xx = np.abs(xx + normal(0, scale=_noise, size=xx.shape)) xx = xx.div(xx.sum(axis=1), axis=0) # return np.apply_along_axis(self._get_likelihood, 1, x, noise) return xx.values
def fit(self, x, y): np.random.seed(self.seed) if len(x.shape) == 1: x = x.reshape(-1, 1) if x.shape[1] != 1: raise ValueError('x must be one dimensional.') x, y = check_X_y(x, y) self.classes = np.unique(y) self.nclass = self.classes.shape[0] ctab = pd.crosstab(x[:, 0], y).reset_index() xcol = ctab.columns[0] ycols = list(ctab.columns[1:]) xtab = pd.DataFrame(x).rename(columns={0: xcol}) xtab = xtab.merge(ctab, how='left', on=xcol) self.class_priors = xtab[ycols].div(xtab[ycols].sum(axis=1), axis=0).mean().values if self.leave_one_out: xtab[ycols] -= pd.get_dummies(y) xtab[ycols] = xtab[ycols].add(self.class_priors * self.alpha). \ div(xtab[ycols].sum(axis=1) + self.alpha + 1E-15, axis=0) if self.noise > 0: xtab[ycols] = np.abs(xtab[ycols] + normal(0, scale=self.noise, size=xtab[ycols].shape)) xtab[ycols] = xtab[ycols].div(xtab[ycols].sum(axis=1), axis=0) self.x_likelihoods = xtab[ycols].values xtab_agg = xtab.groupby(xcol, as_index=False)[ycols].agg(['mean', 'std']).fillna(0) xtab_agg.columns = xtab_agg.columns.get_level_values(1) self.likelihoods = xtab_agg.T.ix['mean'].reset_index(drop=True).T # self.likelihoods = xtab_agg.T.ix['mean'].reset_index(drop=True).to_dict('list') # self.likelihoods_cov = xtab_agg.T.ix['std'].reset_index(drop=True).to_dict('list') # self.likelihoods_cov = dict((k, np.diag(v)) for k, v in self.likelihoods_cov.items()) return self
def make_theta(c, p): theta = normal(scale=.001, size=c * p) theta.shape = c, p return theta
def rand_log_normal(alpha, beta, shape): return N0.exp(R.normal(alpha, beta, shape))
vvar = options.vvar feats = options.feats noiseFeats = options.noisefeats sigma = options.sigma print(sys.stderr, "%d real features, %d noise features" %\ (feats, noiseFeats)) clusterPrior = computePrior(options) #cluster means are normal(0, sigma) means = zeros((len(clusterPrior),feats)) for cluster in range(len(clusterPrior)): for feat in range(feats): means[cluster][feat] = normal(0, cvar) variances = zeros((len(clusterPrior), feats)) if vvar == 0: variances += sigma else: for cluster in range(len(clusterPrior)): for feat in range(feats): variances[cluster][feat] += invGamma(vvar, 1.0) #currently all noise features have mean 0 noiseMeans = zeros((noiseFeats,)) #noise variances are all sigma noiseVariances = zeros((noiseFeats,)) if vvar == 0:
def make_theta(c): return normal(scale=.001, size=c)
def augment_batch(images: torch.Tensor, p: float) -> torch.Tensor: warnings.warn("augment_batch is deprecated", DeprecationWarning) batch_size, channels, h_orig, w_orig = images.size() images = pad(images, padding=(w_orig - 1, h_orig - 1, w_orig - 1, h_orig - 1), padding_mode='reflect') batch_size, channels, h, w = images.size() mask = (torch.rand(batch_size) < p).logical_and( torch.rand(batch_size) < 0.5) images[mask] = hflip(images[mask]) output_images = images.new_zeros((batch_size, channels, h_orig, w_orig)) translate = (0, 0) angle_step = choice([0, 1, 2, 3]) angle = -90 * angle_step scale_iso_mask = torch.rand(batch_size) < p scale_iso = lognormal(0, 0.2 * math.log(2)) scale = (scale_iso, scale_iso) p_rot = 1 - math.sqrt(1 - p) rot_mask = torch.rand(batch_size) < p_rot theta = uniform(-180, 180) angle += theta scale_mask = torch.rand(batch_size) < p scale_factor = lognormal(0, 0.2 * math.log(2)) scale_x, scale_y = scale scale = (scale_x * scale_factor, scale_y / scale_factor) new_size = (int(h * scale[0]), int(w * scale[1])) if torch.any(rot_mask): affine_transformed = affine(images[rot_mask], angle=angle, translate=list(translate), shear=[0., 0.], scale=1) images[rot_mask] = affine_transformed resize_mask = scale_iso_mask.logical_and(scale_mask) resized_images = resize(images[resize_mask], list(new_size)) output_images[resize_mask.logical_not()] = center_crop( images[resize_mask.logical_not()], (h_orig, w_orig)) output_images[resize_mask] = center_crop(resized_images, (h_orig, w_orig)) images = output_images mask = torch.rand(batch_size) < p brightness = normal(1, 0.2) images[mask] = adjust_brightness(images[mask], brightness) mask = torch.rand(batch_size) < p contrast = lognormal(0, (0.5 * math.log(2))) images[mask] = adjust_contrast(images[mask], contrast) mask = torch.rand(batch_size) < p image_data = rgb_to_ycbcr(images[mask]) image_data[..., 0, :, :] = (1 - image_data[..., 0, :, :]) images[mask] = ycbcr_to_rgb(image_data) mask = torch.rand(batch_size) < p if torch.any(mask): hue_factor = uniform(-0.5, 0.5) images[mask] = adjust_hue(images[mask], hue_factor) mask = torch.rand(batch_size) < p saturation = lognormal(0, math.log(2)) images[mask] = adjust_saturation(images[mask], saturation) mask = torch.rand(batch_size) < p std_dev = abs(normal(0, 0.1)) noise_images = torch.randn_like(images[mask]) * std_dev images[mask] += noise_images.clamp(0, 1) return images
vvar = options.vvar feats = options.feats noiseFeats = options.noisefeats sigma = options.sigma print(sys.stderr, "%d real features, %d noise features" %\ (feats, noiseFeats)) clusterPrior = computePrior(options) #cluster means are normal(0, sigma) means = zeros((len(clusterPrior), feats)) for cluster in range(len(clusterPrior)): for feat in range(feats): means[cluster][feat] = normal(0, cvar) variances = zeros((len(clusterPrior), feats)) if vvar == 0: variances += sigma else: for cluster in range(len(clusterPrior)): for feat in range(feats): variances[cluster][feat] += invGamma(vvar, 1.0) #currently all noise features have mean 0 noiseMeans = zeros((noiseFeats, )) #noise variances are all sigma noiseVariances = zeros((noiseFeats, )) if vvar == 0:
def gaussian_noise(matrix, mean=0, std=0.1): return transform(matrix, lambda px: truncate_range(px + normal(loc=mean, scale=std)))