示例#1
0
    def test_uniform_range_bounds(self):
        fmin = np.finfo('float').min
        fmax = np.finfo('float').max

        func = rnd.uniform
        np.testing.assert_raises(OverflowError, func, -np.inf, 0)
        np.testing.assert_raises(OverflowError, func, 0, np.inf)
        # this should not throw any error, since rng can be sampled as fmin*u + fmax*(1-u)
        # for 0<u<1 and it stays completely in range
        rnd.uniform(fmin, fmax)

        # (fmax / 1e17) - fmin is within range, so this should not throw
        rnd.uniform(low=fmin, high=fmax / 1e17)
示例#2
0
def make_propensity_based_simulated_labeler(treat_strength, con_strength, noise_level,
                                            base_propensity_scores, example_indices, exogeneous_con=0.,
                                            setting="simple", seed=42):
    np.random.seed(seed)
    all_noise = random.normal(0, 1, base_propensity_scores.shape[0]).astype(np.float32)
    all_threshholds = np.array(random.uniform(0, 1, base_propensity_scores.shape[0]), dtype=np.float32)

    extra_confounding = random.normal(0, 1, base_propensity_scores.shape[0]).astype(np.float32)

    all_propensity_scores = expit((1.-exogeneous_con)*logit(base_propensity_scores) + exogeneous_con * extra_confounding).astype(np.float32)
    all_treatments = random.binomial(1, all_propensity_scores).astype(np.int32)

    # indices in dataset refer to locations in entire corpus,
    # but propensity scores will typically only inlcude a subset of the examples
    reindex_hack = np.zeros(12000, dtype=np.int32)
    reindex_hack[example_indices] = np.arange(example_indices.shape[0], dtype=np.int32)

    def labeler(data):
        index = data['index']
        index_hack = tf.gather(reindex_hack, index)
        treatment = tf.gather(all_treatments, index_hack)
        confounding = 3.0 * (tf.gather(all_propensity_scores, index_hack) - 0.25)
        noise = tf.gather(all_noise, index_hack)

        y, y0, y1 = outcome_sim(treat_strength, con_strength, noise_level, tf.cast(treatment, tf.float32), confounding, noise, setting=setting)
        simulated_prob = tf.nn.sigmoid(y)
        y0 = tf.nn.sigmoid(y0)
        y1 = tf.nn.sigmoid(y1)
        threshold = tf.gather(all_threshholds, index)
        simulated_outcome = tf.cast(tf.greater(simulated_prob, threshold), tf.int32)

        return {**data, 'outcome': simulated_outcome, 'y0': y0, 'y1': y1, 'treatment': treatment}

    return labeler
示例#3
0
def make_buzzy_based_simulated_labeler(treat_strength, con_strength, noise_level, setting="simple", seed=0):
    # hardcode probability of theorem given buzzy / not_buzzy
    theorem_given_buzzy_probs = np.array([0.27, 0.07], dtype=np.float32)

    np.random.seed(seed)
    all_noise = np.array(random.normal(0, 1, 12000), dtype=np.float32)
    all_threshholds = np.array(random.uniform(0, 1, 12000), dtype=np.float32)

    def labeler(data):
        buzzy = data['buzzy_title']
        index = data['index']
        treatment = data['theorem_referenced']
        treatment = tf.cast(treatment, tf.float32)
        confounding = 3.0*(tf.gather(theorem_given_buzzy_probs, buzzy) - 0.25)

        noise = tf.gather(all_noise, index)

        y, y0, y1 = outcome_sim(treat_strength, con_strength, noise_level, treatment, confounding, noise, setting=setting)
        simulated_prob = tf.nn.sigmoid(y)
        y0 = tf.nn.sigmoid(y0)
        y1 = tf.nn.sigmoid(y1)
        threshold = tf.gather(all_threshholds, index)
        simulated_outcome = tf.cast(tf.greater(simulated_prob, threshold), tf.int32)

        return {**data, 'outcome': simulated_outcome, 'y0': y0, 'y1': y1}

    return labeler
def symmetric_random_walk(size, seed=None, scale=1.0, normalize=True):
    # 0. Preparation
    if seed != None:
        rnd.seed(seed)
    else:
        rnd.seed()
    time_steps = size[0]
    num_dims = len(size) - 1

    # 1. Generate random walk noise levels (integer)
    random_walk = 2 * rnd.randint(2, size=time_steps) - 1
    random_walk = np.cumsum(random_walk)

    # 2. Normalize random walk  noise levels to the range [0,1]
    if normalize == True:
        random_walk += np.abs(random_walk.min())
        random_walk = random_walk / random_walk.max()

    # 3. Scale random walk noise levels to max_level
    random_walk *= scale

    # 4. Generate noise
    noise = rnd.uniform(low=-np.sqrt(12) / 2, high=np.sqrt(12) / 2, size=size)

    # 5. Scale noise to desired std over time_steps
    random_walk = random_walk.reshape((-1, ) + (1, ) * num_dims)
    noise *= random_walk

    return noise.astype(np.float32), random_walk.astype(np.float32)
示例#5
0
 def test_uniform(self):
     rnd.seed(self.seed, self.brng)
     actual = rnd.uniform(low=1.23, high=10.54, size=(3, 2))
     desired = np.array([[10.38982478047721, 1.408218254214153],
                         [2.8756430713785814, 7.836974412682466],
                         [6.057706432128325, 10.426505200380925]])
     np.testing.assert_array_almost_equal(actual, desired, decimal=10)