def test_n_zero(self): # Tests the corner case of n == 0 for the binomial distribution. # binomial(0, p) should be zero for any p in [0, 1]. # This test addresses issue #3480. zeros = np.zeros(2, dtype='int') for p in [0, .5, 1]: assert_(rnd.binomial(0, p) == 0) np.testing.assert_array_equal(rnd.binomial(zeros, p), zeros)
def make_propensity_based_simulated_labeler(treat_strength, con_strength, noise_level, base_propensity_scores, example_indices, exogeneous_con=0., setting="simple", seed=42): np.random.seed(seed) all_noise = random.normal(0, 1, base_propensity_scores.shape[0]).astype(np.float32) all_threshholds = np.array(random.uniform(0, 1, base_propensity_scores.shape[0]), dtype=np.float32) extra_confounding = random.normal(0, 1, base_propensity_scores.shape[0]).astype(np.float32) all_propensity_scores = expit((1.-exogeneous_con)*logit(base_propensity_scores) + exogeneous_con * extra_confounding).astype(np.float32) all_treatments = random.binomial(1, all_propensity_scores).astype(np.int32) # indices in dataset refer to locations in entire corpus, # but propensity scores will typically only inlcude a subset of the examples reindex_hack = np.zeros(12000, dtype=np.int32) reindex_hack[example_indices] = np.arange(example_indices.shape[0], dtype=np.int32) def labeler(data): index = data['index'] index_hack = tf.gather(reindex_hack, index) treatment = tf.gather(all_treatments, index_hack) confounding = 3.0 * (tf.gather(all_propensity_scores, index_hack) - 0.25) noise = tf.gather(all_noise, index_hack) y, y0, y1 = outcome_sim(treat_strength, con_strength, noise_level, tf.cast(treatment, tf.float32), confounding, noise, setting=setting) simulated_prob = tf.nn.sigmoid(y) y0 = tf.nn.sigmoid(y0) y1 = tf.nn.sigmoid(y1) threshold = tf.gather(all_threshholds, index) simulated_outcome = tf.cast(tf.greater(simulated_prob, threshold), tf.int32) return {**data, 'outcome': simulated_outcome, 'y0': y0, 'y1': y1, 'treatment': treatment} return labeler
def make_propensity_based_simulated_labeler(treat_strength, con_strength, noise_level, base_propensity_scores, example_indices, exogeneous_con=0., setting="simple", seed=42): np.random.seed(seed) all_noise = random.normal(0, 1, base_propensity_scores.shape[0]).astype( np.float32) # extra_confounding = random.binomial(1, 0.5*np.ones_like(base_propensity_scores)).astype(np.float32) extra_confounding = random.normal( 0, 1, base_propensity_scores.shape[0]).astype(np.float32) all_propensity_scores = expit((1. - exogeneous_con) * logit(base_propensity_scores) + exogeneous_con * extra_confounding).astype( np.float32) all_treatments = random.binomial(1, all_propensity_scores).astype(np.int32) # indices in dataset refer to locations in entire corpus, # but propensity scores will typically only inlcude a subset of the examples reindex_hack = np.zeros(422206, dtype=np.int32) reindex_hack[example_indices] = np.arange(example_indices.shape[0], dtype=np.int32) def labeler(data): index = data['index'] index_hack = tf.gather(reindex_hack, index) treatment = tf.gather(all_treatments, index_hack) confounding = tf.gather(all_propensity_scores, index_hack) - 0.5 noise = tf.gather(all_noise, index_hack) simulated_score, y0, y1 = outcome_sim(treat_strength, con_strength, noise_level, tf.cast(treatment, tf.float32), confounding, noise, setting=setting) return { **data, 'outcome': simulated_score, 'y0': y0, 'y1': y1, 'treatment': treatment } return labeler
def test_binomial(self): rnd.seed(self.seed, self.brng) actual = rnd.binomial(100.123, .456, size=(3, 2)) desired = np.array([[43, 48], [55, 48], [46, 53]]) np.testing.assert_array_equal(actual, desired)