def test_student(self):
        grp1_mu = 0.0
        grp1_sigma = 1.0
        grp1_sample_size = 29
        grp1_sample = Sample()

        grp2_mu = 0.09
        grp2_sigma = 2.0
        grp2_sample_size = 28
        grp2_sample = Sample()

        for i in range(grp1_sample_size):
            grp1_sample.add_numeric(normal(grp1_mu, grp1_sigma))

        for i in range(grp2_sample_size):
            grp2_sample.add_numeric(normal(grp2_mu, grp2_sigma))

        sampling_distribution = MeanDiffSamplingDistribution(
            grp1_sample_distribution=SampleDistribution(grp1_sample),
            grp2_sample_distribution=SampleDistribution(grp2_sample))
        self.assertEqual(sampling_distribution.distribution_family,
                         DistributionFamily.student_t)
        testing = MeanDiffTesting(sampling_distribution=sampling_distribution)
        print('one tail p-value: ' + str(testing.p_value_one_tail))
        print('two tail p-value: ' + str(testing.p_value_two_tail))
        reject_one_tail, reject_two_tail = testing.will_reject(0.01)
        print('will reject mean_1 == mean_2 (one-tail) ? ' +
              str(reject_one_tail))
        print('will reject mean_1 == mean_2 (two-tail) ? ' +
              str(reject_two_tail))
        self.assertFalse(reject_one_tail)
        self.assertFalse(reject_two_tail)
示例#2
0
def get_truncated_normal():
    global STD_SPEED, MIN_V_SPEED, MAX_V_SPEED

    speed = STD_SPEED * normal(0.0, 1.0) + get_expected_velocity()
    while speed < 0 or speed > MAX_V_SPEED or speed < MIN_V_SPEED:
        speed = STD_SPEED * normal(0.0, 1.0) + get_expected_velocity()
    return speed
示例#3
0
def derive_qois(df_original):
    df = df_original.copy()
    ns = df.shape[0]

    rstar_d = normal(rstar.value, rstare.value, size=ns) * Rsun
    period = df.p.values if 'p' in df.columns else df.pr.values

    df['period'] = period
    df['k_true'] = sqrt(df.k2_true)
    df['k_app'] = sqrt(df.k2_app)
    df['cnt'] = 1. - df.k2_app / df.k2_true
    df['a_st'] = as_from_rhop(df.rho.values, period)
    df['a_au'] = df.a_st * rstar_d.to(AU)
    df['inc'] = degrees(i_from_ba(df.b.values, df.a_st.values))
    df['t14'] = d_from_pkaiews(period, df.k_true.values, df.a_st.values,
                               radians(df.inc.values), 0.0, 0.0, 1)
    df['t14_h'] = 24 * df.t14

    df['r_app'] = df.k_app.values * rstar_d.to(Rjup)
    df['r_true'] = df.k_true.values * rstar_d.to(Rjup)
    df['r_app_point'] = df.k_app.values * rstar.to(Rjup)
    df['r_true_point'] = df.k_true.values * rstar.to(Rjup)

    df['r_app_rsun'] = df.k_app.values * rstar_d.to(Rsun)
    df['r_true_rsun'] = df.k_true.values * rstar_d.to(Rsun)
    df['teff_p'] = Teq(normal(*star_teff, size=ns), df.a_st,
                       uniform(0.25, 0.50, ns), uniform(0, 0.4, ns))
    return df
示例#4
0
 def get_truncated_normal(self):
     speed = self.STD_SPEED * normal(0.0,
                                     1.0) + self.get_expected_velocity()
     while speed < 0 or speed > self.MAX_V_SPEED or speed < self.MIN_V_SPEED:
         speed = self.STD_SPEED * normal(
             0.0, 1.0) + self.get_expected_velocity()
     return speed
示例#5
0
def features(label, means, variances, noisemeans, noisevars):
    feats = []
    fctr = itertools.count()
    for mean, var in zip(means[label], variances[label]):
        feats.append(next(fctr))
        feats.append(normal(mean, var))
    for mean, var in zip(noisemeans, noisevars):
        feats.append(next(fctr))
        feats.append(normal(mean, var))
    return feats
示例#6
0
def features(label, means, variances, noisemeans, noisevars):
    feats = []
    fctr = itertools.count()
    for mean,var in zip(means[label], variances[label]):
        feats.append(next(fctr))
        feats.append(normal(mean, var))
    for mean,var in zip(noisemeans, noisevars):
        feats.append(next(fctr))
        feats.append(normal(mean, var))
    return feats
def normal(mean, std, shape=[]):
    """normal(mean, std, n) or normal(mean, std, [n, m, ...]) returns
    array of random numbers randomly distributed with specified mean and
    standard deviation"""
    if shape == []:
        shape = None
    return mt.normal(mean, std, shape)
示例#8
0
def normal(mean, std, shape=[]):
    """normal(mean, std, n) or normal(mean, std, [n, m, ...]) returns
    array of random numbers randomly distributed with specified mean and
    standard deviation"""
    if shape == []:
        shape = None
    return mt.normal(mean, std, shape)
示例#9
0
def gen_data(b, m, e, n=50):
    xs = linspace(15, 100, n)
    ys = m * xs + b
    if e:
        ys += normal(scale=e, size=n)

    # ys[6] = ys[6] + 10
    return xs, ys
示例#10
0
def derive_qois(data: DataFrame,
                rstar: tuple = None,
                teff: tuple = None,
                distance_unit: Unit = R_jup):
    df = data.copy()
    ns = df.shape[0]

    df['period'] = period = df.p.values if 'p' in df else df.pr.values

    if 'k2_true' in df:
        df['k_true'] = sqrt(df.k2_true)
    if 'k2_app' in df:
        df['k_app'] = sqrt(df.k2_app)

    if 'k2_true' in df and 'k2_app' in df:
        df['cnt'] = 1. - df.k2_app / df.k2_true

    if 'g' in df:
        if 'k' in df:
            df['b'] = df.g * (1 + df.k)
        elif 'k_true' in df:
            df['b'] = df.g * (1 + df.k_true)

    df['a'] = as_from_rhop(df.rho.values, period)
    df['inc'] = i_from_ba(df.b.values, df.a.values)
    df['t14'] = d_from_pkaiews(period, df.k_true.values, df.a.values,
                               df.inc.values, 0.0, 0.0, 1)
    df['t14_h'] = 24 * df.t14

    if rstar is not None:
        from astropy.units import R_sun
        rstar_d = (normal(*rstar, size=ns) * R_sun).to(distance_unit).value
        df['r_app'] = df.k_app.values * rstar_d
        df['r_true'] = df.k_true.values * rstar_d
        df['a_au'] = df.a * (rstar_d * distance_unit).to(AU)

    if teff is not None:
        df['teq_p'] = equilibrium_temperature(normal(*teff, size=ns), df.a,
                                              uniform(0.25, 0.50, ns),
                                              uniform(0, 0.4, ns))
    return df
示例#11
0
def test_steepest_iris():
    data = iris()
    x = add_bias(StandardScaler().fit_transform(data['x']))
    y = binarize(data['y'])
    theta = normal(scale=.001, size=(y.shape[1] - 1) * x.shape[1])
    theta.shape = y.shape[1] - 1, x.shape[1]

    c = lambda theta: maxent.model.cost(x, y, theta, 1.)
    g = lambda theta: maxent.model.grad(x, y, theta, 1.)

    assert_array_almost_equal([[0.0425072, -1.76158, 1.40147, -2.77042, -2.63817],
                               [2.06606, -0.0531037, -0.120857, -1.19605, -2.26611]],
                              steepest_gd(c, g, theta, max_iter=500, rho=.5)[0], decimal=2)
示例#12
0
def real_function(a_0, a_1, noise_sigma, x, covs=[1]):
    """
    Evaluates the real function
    """
    N = len(x)
    tmpSum = 0 
    for i in range(len(covs)):
        tmpSum = tmpSum + covs[i]*pow(x,i)
    if noise_sigma==0:
        # Recovers the true function
        return tmpSum
    else:
        return tmpSum + normal(0, noise_sigma, N)
示例#13
0
def test_cgd_fr_iris():
    data = iris()
    sc = StandardScaler().fit(data['x'])
    x = add_bias(sc.transform(data['x']))
    y = binarize(data['y'])
    theta = normal(scale=.001, size=(y.shape[1] - 1) * x.shape[1])
    theta.shape = y.shape[1] - 1, x.shape[1]

    c = lambda theta: maxent.model.cost(x, y, theta, 1.)
    g = lambda theta: maxent.model.grad(x, y, theta, 1.)

    assert_array_almost_equal([[0.5157, -1.6937, 1.5391, -2.9251, -2.7841],
                               [2.3700, -0.0502, -0.0128, -1.4165, -2.3897]],
                              conjugate_gd_fr(c, g, theta, max_iter=50)[0], decimal=2)
示例#14
0
    def test_anova(self):
        sample = Sample()

        mu1 = 1.0
        sigma1 = 1.0

        mu2 = 1.1
        sigma2 = 1.0

        mu3 = 1.09
        sigma3 = 1.0

        for i in range(100):
            sample.add_numeric(normal(mu1, sigma1), 'group1')
            sample.add_numeric(normal(mu2, sigma2), 'group2')
            sample.add_numeric(normal(mu3, sigma3), 'group3')

        testing = Anova(sample=sample)

        print('p-value: ' + str(testing.p_value))
        reject = testing.will_reject(0.01)
        print('will reject [same mean for all groups] ? ' + str(reject))
        self.assertFalse(reject)
示例#15
0
def test_newton_iris():
    data = iris()
    x = add_bias(data['x'])
    y = binarize(data['y'])
    theta = normal(scale=.001, size=(y.shape[1] - 1)*x.shape[1])
    theta.shape = y.shape[1] - 1, x.shape[1]

    c = lambda theta: maxent.model.cost(x, y, theta, 1.)
    g = lambda theta: maxent.model.grad(x, y, theta, 1.)
    h = lambda theta: maxent.model.hessian(x, theta, 1.)

    assert_array_almost_equal([[17.8988, -0.783738, 1.24289, -3.87904, -1.65902],
                               [11.7486, 0.260549, -0.33588, -1.83314, -2.06362]],
                              newton(c, g, h, theta)[0], decimal=3)
示例#16
0
    def fit(self, x, y):
        np.random.seed(self.seed)
        if len(x.shape) == 1:
            x = x.reshape(-1, 1)

        x, y = check_X_y(x, y)

        self.classes = np.unique(y)
        self.nclass = self.classes.shape[0]

        ctab = pd.crosstab(y, list(x.T)).T.reset_index()

        xdim = x.shape[1]
        xcols = list(ctab.columns[:xdim])
        ycols = list(ctab.columns[xdim:])

        xtab = pd.DataFrame(x, columns=xcols)
        xtab = xtab.merge(ctab, how='left', on=xcols)

        self.class_priors = xtab[ycols].div(xtab[ycols].sum(axis=1),
                                            axis=0).mean().values

        if self.leave_one_out:
            xtab[ycols] -= pd.get_dummies(y)

        xtab[ycols] = xtab[ycols].add(self.class_priors * self.alpha). \
            div(xtab[ycols].sum(axis=1) + self.alpha + 1E-15, axis=0)
        if self.noise > 0:
            xtab[ycols] = np.abs(
                xtab[ycols] +
                normal(0, scale=self.noise, size=xtab[ycols].shape))
            xtab[ycols] = xtab[ycols].div(xtab[ycols].sum(axis=1), axis=0)
        self.x_likelihoods = xtab[ycols].values

        xtab_agg = xtab.groupby(xcols,
                                as_index=False)[ycols].agg(['mean']).fillna(0)
        xtab_agg.columns = xtab_agg.columns.get_level_values(1)

        self.likelihoods = xtab_agg.T.ix['mean'].reset_index(
            drop=True).T.reset_index()
        # self.likelihoods = xtab_agg.T.ix['mean'].reset_index(drop=True).to_dict('list')
        # self.likelihoods_cov = xtab_agg.T.ix['std'].reset_index(drop=True).to_dict('list')
        # self.likelihoods_cov = dict((k, np.diag(v)) for k, v in self.likelihoods_cov.items())

        return self
示例#17
0
    def test_mean_student(self):
        mu = 0.0
        sigma = 1.0
        sample_size = 29
        sample = Sample()

        for i in range(sample_size):
            sample.add_numeric(normal(mu, sigma))

        sampling_distribution = MeanSamplingDistribution(sample_distribution=SampleDistribution(sample))
        testing = MeanTesting(sampling_distribution=sampling_distribution, mean_null=0.0)

        print('one tail p-value: ' + str(testing.p_value_one_tail))
        print('two tail p-value: ' + str(testing.p_value_two_tail))
        reject_one_tail, reject_two_tail = testing.will_reject(0.01)
        print('will reject mean = 0 (one-tail) ? ' + str(reject_one_tail))
        print('will reject mean = 0 (two-tail) ? ' + str(reject_two_tail))
        self.assertFalse(reject_one_tail)
        self.assertFalse(reject_two_tail)
示例#18
0
def add_poisson_gaussian_noise(image,
                               alpha=5,
                               sigma=0.01,
                               sap=0.0,
                               quant_bits=8,
                               dtype=numpy.float32,
                               clip=True,
                               fix_seed=True):
    if fix_seed:
        numpy.random.seed(0)
    rnd = normal(size=image.shape)
    rnd_bool = uniform(size=image.shape) < sap

    noisy = image + numpy.sqrt(alpha * image + sigma**2) * rnd
    noisy = noisy * (1 - rnd_bool) + rnd_bool * uniform(size=image.shape)
    noisy = numpy.around((2**quant_bits) * noisy) / 2**quant_bits
    noisy = numpy.clip(noisy, 0, 1) if clip else noisy
    noisy = noisy.astype(dtype)
    return noisy
示例#19
0
    def predict_proba(self, x, noise=False):
        if len(x.shape) == 1:
            x = x.reshape(-1, 1)

        if x.shape[1] != 1:
            raise ValueError('x must be one dimensional.')

        xx = pd.DataFrame(x, columns=['x']).merge(self.likelihoods, how='left', left_on='x', right_index=True)
        xx.drop('x', axis=1, inplace=True)
        xx.loc[xx.isnull().any(axis=1) | (xx == 0).all(axis=1), :] = self.class_priors

        if noise:
            np.random.seed(self.seed)
            _noise = noise if isinstance(noise, float) else self.noise
            if _noise > 1E-12:
                xx = np.abs(xx + normal(0, scale=_noise, size=xx.shape))
                xx = xx.div(xx.sum(axis=1), axis=0)

        # return np.apply_along_axis(self._get_likelihood, 1, x, noise)
        return xx.values
示例#20
0
def test_sgd_iris():
    numpy.random.seed(1)

    data = iris()
    x = add_bias(StandardScaler().fit_transform(data['x']))
    y = binarize(data['y'])
    theta = normal(scale=.001, size=(y.shape[1] - 1) * x.shape[1])
    theta.shape = y.shape[1] - 1, x.shape[1]

    c = lambda theta: maxent.model.cost(x, y, theta, 1.)
    g = lambda theta, batch: maxent.model.grad(batch[0], batch[1], theta, 1.)
    b = MiniBatch(x, y, size=50)
    stats = {'method': 'sgd'}

    stats_ = sgd(c, g, b, theta, rho=.5, max_iter=100, stats=stats)[0]
    assert_array_almost_equal([[-0.27884785, -1.21284649,  0.88989122, -1.77701123, -1.68204016],
                               [ 1.01925233, -0.14850723, -0.41679621, -0.58286958, -1.29281991]],
                              stats_, decimal=3)

    numpy.random.seed()
示例#21
0
    def predict_proba(self, x, noise=False):
        if len(x.shape) == 1:
            x = x.reshape(-1, 1)

        x = check_array(x)

        xx = pd.DataFrame(x, columns=self.likelihoods.columns[:-self.nclass])
        xx = xx.merge(self.likelihoods, how='left')
        xx.drop(xx.columns[:-self.nclass], axis=1, inplace=True)
        xx.loc[xx.isnull().any(axis=1) | (xx == 0).all(axis=1), :] = self.class_priors

        if noise:
            np.random.seed(self.seed)
            _noise = noise if isinstance(noise, float) else self.noise
            if _noise > 1E-12:
                xx = np.abs(xx + normal(0, scale=_noise, size=xx.shape))
                xx = xx.div(xx.sum(axis=1), axis=0)

        # return np.apply_along_axis(self._get_likelihood, 1, x, noise)
        return xx.values
示例#22
0
    def fit(self, x, y):
        np.random.seed(self.seed)
        if len(x.shape) == 1:
            x = x.reshape(-1, 1)

        if x.shape[1] != 1:
            raise ValueError('x must be one dimensional.')

        x, y = check_X_y(x, y)

        self.classes = np.unique(y)
        self.nclass = self.classes.shape[0]

        ctab = pd.crosstab(x[:, 0], y).reset_index()
        xcol = ctab.columns[0]
        ycols = list(ctab.columns[1:])

        xtab = pd.DataFrame(x).rename(columns={0: xcol})
        xtab = xtab.merge(ctab, how='left', on=xcol)

        self.class_priors = xtab[ycols].div(xtab[ycols].sum(axis=1), axis=0).mean().values

        if self.leave_one_out:
            xtab[ycols] -= pd.get_dummies(y)

        xtab[ycols] = xtab[ycols].add(self.class_priors * self.alpha). \
            div(xtab[ycols].sum(axis=1) + self.alpha + 1E-15, axis=0)
        if self.noise > 0:
            xtab[ycols] = np.abs(xtab[ycols] + normal(0, scale=self.noise, size=xtab[ycols].shape))
            xtab[ycols] = xtab[ycols].div(xtab[ycols].sum(axis=1), axis=0)
        self.x_likelihoods = xtab[ycols].values

        xtab_agg = xtab.groupby(xcol, as_index=False)[ycols].agg(['mean', 'std']).fillna(0)
        xtab_agg.columns = xtab_agg.columns.get_level_values(1)

        self.likelihoods = xtab_agg.T.ix['mean'].reset_index(drop=True).T
        # self.likelihoods = xtab_agg.T.ix['mean'].reset_index(drop=True).to_dict('list')
        # self.likelihoods_cov = xtab_agg.T.ix['std'].reset_index(drop=True).to_dict('list')
        # self.likelihoods_cov = dict((k, np.diag(v)) for k, v in self.likelihoods_cov.items())

        return self
示例#23
0
文件: model.py 项目: andrely/mimir
def make_theta(c, p):
    theta = normal(scale=.001, size=c * p)
    theta.shape = c, p

    return theta
示例#24
0
def rand_log_normal(alpha, beta, shape):
    return N0.exp(R.normal(alpha, beta, shape))
示例#25
0
    vvar = options.vvar

    feats = options.feats
    noiseFeats = options.noisefeats
    sigma = options.sigma

    print(sys.stderr, "%d real features, %d noise features" %\
          (feats, noiseFeats))

    clusterPrior = computePrior(options)

    #cluster means are normal(0, sigma)
    means = zeros((len(clusterPrior),feats))
    for cluster in range(len(clusterPrior)):
        for feat in range(feats):
            means[cluster][feat] = normal(0, cvar)

    variances = zeros((len(clusterPrior), feats))
    if vvar == 0:
        variances += sigma
    else:
        for cluster in range(len(clusterPrior)):
            for feat in range(feats):
                variances[cluster][feat] += invGamma(vvar, 1.0)

    #currently all noise features have mean 0
    noiseMeans = zeros((noiseFeats,))

    #noise variances are all sigma
    noiseVariances = zeros((noiseFeats,))
    if vvar == 0:
示例#26
0
文件: model.py 项目: andrely/mimir
def make_theta(c):
    return normal(scale=.001, size=c)
示例#27
0
def augment_batch(images: torch.Tensor, p: float) -> torch.Tensor:
    warnings.warn("augment_batch is deprecated", DeprecationWarning)
    batch_size, channels, h_orig, w_orig = images.size()
    images = pad(images,
                 padding=(w_orig - 1, h_orig - 1, w_orig - 1, h_orig - 1),
                 padding_mode='reflect')
    batch_size, channels, h, w = images.size()
    mask = (torch.rand(batch_size) < p).logical_and(
        torch.rand(batch_size) < 0.5)
    images[mask] = hflip(images[mask])
    output_images = images.new_zeros((batch_size, channels, h_orig, w_orig))

    translate = (0, 0)
    angle_step = choice([0, 1, 2, 3])
    angle = -90 * angle_step

    scale_iso_mask = torch.rand(batch_size) < p
    scale_iso = lognormal(0, 0.2 * math.log(2))
    scale = (scale_iso, scale_iso)

    p_rot = 1 - math.sqrt(1 - p)
    rot_mask = torch.rand(batch_size) < p_rot
    theta = uniform(-180, 180)
    angle += theta

    scale_mask = torch.rand(batch_size) < p
    scale_factor = lognormal(0, 0.2 * math.log(2))
    scale_x, scale_y = scale
    scale = (scale_x * scale_factor, scale_y / scale_factor)
    new_size = (int(h * scale[0]), int(w * scale[1]))

    if torch.any(rot_mask):
        affine_transformed = affine(images[rot_mask],
                                    angle=angle,
                                    translate=list(translate),
                                    shear=[0., 0.],
                                    scale=1)
        images[rot_mask] = affine_transformed

    resize_mask = scale_iso_mask.logical_and(scale_mask)
    resized_images = resize(images[resize_mask], list(new_size))
    output_images[resize_mask.logical_not()] = center_crop(
        images[resize_mask.logical_not()], (h_orig, w_orig))
    output_images[resize_mask] = center_crop(resized_images, (h_orig, w_orig))

    images = output_images

    mask = torch.rand(batch_size) < p
    brightness = normal(1, 0.2)
    images[mask] = adjust_brightness(images[mask], brightness)

    mask = torch.rand(batch_size) < p
    contrast = lognormal(0, (0.5 * math.log(2)))
    images[mask] = adjust_contrast(images[mask], contrast)

    mask = torch.rand(batch_size) < p
    image_data = rgb_to_ycbcr(images[mask])
    image_data[..., 0, :, :] = (1 - image_data[..., 0, :, :])
    images[mask] = ycbcr_to_rgb(image_data)

    mask = torch.rand(batch_size) < p
    if torch.any(mask):
        hue_factor = uniform(-0.5, 0.5)
        images[mask] = adjust_hue(images[mask], hue_factor)

    mask = torch.rand(batch_size) < p
    saturation = lognormal(0, math.log(2))
    images[mask] = adjust_saturation(images[mask], saturation)

    mask = torch.rand(batch_size) < p
    std_dev = abs(normal(0, 0.1))
    noise_images = torch.randn_like(images[mask]) * std_dev
    images[mask] += noise_images.clamp(0, 1)

    return images
示例#28
0
    vvar = options.vvar

    feats = options.feats
    noiseFeats = options.noisefeats
    sigma = options.sigma

    print(sys.stderr, "%d real features, %d noise features" %\
          (feats, noiseFeats))

    clusterPrior = computePrior(options)

    #cluster means are normal(0, sigma)
    means = zeros((len(clusterPrior), feats))
    for cluster in range(len(clusterPrior)):
        for feat in range(feats):
            means[cluster][feat] = normal(0, cvar)

    variances = zeros((len(clusterPrior), feats))
    if vvar == 0:
        variances += sigma
    else:
        for cluster in range(len(clusterPrior)):
            for feat in range(feats):
                variances[cluster][feat] += invGamma(vvar, 1.0)

    #currently all noise features have mean 0
    noiseMeans = zeros((noiseFeats, ))

    #noise variances are all sigma
    noiseVariances = zeros((noiseFeats, ))
    if vvar == 0:
示例#29
0
def gaussian_noise(matrix, mean=0, std=0.1):
    return transform(matrix, lambda px: truncate_range(px + normal(loc=mean, scale=std)))