示例#1
0
文件: synthetic.py 项目: nwjnwj/pyro
def main(args):
    pyro.enable_validation(__debug__)
    if args.cuda:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

    # Generate synthetic data.
    pyro.set_rng_seed(args.seed)
    x_train, t_train, y_train, _ = generate_data(args)

    # Train.
    pyro.set_rng_seed(args.seed)
    pyro.clear_param_store()
    cevae = CEVAE(feature_dim=args.feature_dim,
                  latent_dim=args.latent_dim,
                  hidden_dim=args.hidden_dim,
                  num_layers=args.num_layers,
                  num_samples=10)
    cevae.fit(x_train, t_train, y_train,
              num_epochs=args.num_epochs,
              batch_size=args.batch_size,
              learning_rate=args.learning_rate,
              learning_rate_decay=args.learning_rate_decay,
              weight_decay=args.weight_decay)

    # Evaluate.
    x_test, t_test, y_test, true_ite = generate_data(args)
    true_ate = true_ite.mean()
    print("true ATE = {:0.3g}".format(true_ate.item()))
    naive_ate = y_test[t_test == 1].mean() - y_test[t_test == 0].mean()
    print("naive ATE = {:0.3g}".format(naive_ate))
    if args.jit:
        cevae = cevae.to_script_module()
    est_ite = cevae.ite(x_test)
    est_ate = est_ite.mean()
    print("estimated ATE = {:0.3g}".format(est_ate.item()))
示例#2
0
def test_serialization(jit, feature_dim, outcome_dist):
    x, t, y = generate_data(num_data=32, feature_dim=feature_dim)
    if outcome_dist == "exponential":
        y.clamp_(min=1e-20)
    cevae = CEVAE(feature_dim, outcome_dist=outcome_dist, num_samples=1000, hidden_dim=32)
    cevae.fit(x, t, y, num_epochs=4, batch_size=8)
    pyro.set_rng_seed(0)
    expected_ite = cevae.ite(x)

    if jit:
        traced_cevae = cevae.to_script_module()
        f = io.BytesIO()
        torch.jit.save(traced_cevae, f)
        f.seek(0)
        loaded_cevae = torch.jit.load(f)
    else:
        f = io.BytesIO()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=UserWarning)
            torch.save(cevae, f)
        f.seek(0)
        loaded_cevae = torch.load(f)

    pyro.set_rng_seed(0)
    actual_ite = loaded_cevae.ite(x)
    assert_close(actual_ite, expected_ite, atol=0.1)
示例#3
0
def test_smoke(num_data, feature_dim, outcome_dist):
    x, t, y = generate_data(num_data, feature_dim)
    if outcome_dist == "exponential":
        y.clamp_(min=1e-20)
    cevae = CEVAE(feature_dim, outcome_dist)
    cevae.fit(x, t, y, num_epochs=2)
    ite = cevae.ite(x)
    assert ite.shape == (num_data,)
示例#4
0
文件: cevae.py 项目: zoeonly/causalml
class CEVAE:
    def __init__(self, outcome_dist="studentt", latent_dim=20, hidden_dim=200, num_epochs=50, num_layers=3,
                 batch_size=100, learning_rate=1e-3, learning_rate_decay=0.1, num_samples=1000, weight_decay=1e-4):
        """
        Initializes CEVAE.

            Args:
                outcome_dist (str): Outcome distribution as one of: "bernoulli" , "exponential", "laplace", "normal",
                                    and "studentt"
                latent_dim (int) : Dimension of the latent variable
                hidden_dim (int) : Dimension of hidden layers of fully connected networks
                num_epochs (int): Number of training epochs
                num_layers (int): Number of hidden layers in fully connected networks
                batch_size (int): Batch size
                learning_rate (int): Learning rate
                learning_rate_decay (float/int): Learning rate decay over all epochs; the per-step decay rate will
                                                 depend on batch size and number of epochs such that the initial
                                                 learning rate will be learning_rate and the
                                                 final learning rate will be learning_rate * learning_rate_decay
                num_samples (int) : Number of samples to calculate ITE
                weight_decay (float) : Weight decay
        """
        self.outcome_dist = outcome_dist
        self.latent_dim = latent_dim
        self.hidden_dim = hidden_dim
        self.num_epochs = num_epochs
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.learning_rate_decay = learning_rate_decay
        self.num_samples = num_samples
        self.weight_decay = weight_decay

    def fit(self, X, treatment, y, p=None):
        """
        Fits CEVAE.

        Args:
            X (np.matrix or np.array or pd.Dataframe): a feature matrix
            treatment (np.array or pd.Series): a treatment vector
            y (np.array or pd.Series): an outcome vector
        """
        X, treatment, y = convert_pd_to_np(X, treatment, y)

        self.cevae = CEVAEModel(outcome_dist=self.outcome_dist,
                           feature_dim=X.shape[-1],
                           latent_dim=self.latent_dim,
                           hidden_dim=self.hidden_dim,
                           num_layers=self.num_layers)

        self.cevae.fit(x=torch.tensor(X, dtype=torch.float),
                       t=torch.tensor(treatment, dtype=torch.float),
                       y=torch.tensor(y, dtype=torch.float),
                       num_epochs=self.num_epochs,
                       batch_size=self.batch_size,
                       learning_rate=self.learning_rate,
                       learning_rate_decay=self.learning_rate_decay,
                       weight_decay=self.weight_decay)

    def predict(self, X, treatment=None, y=None, p=None):
        """
        Calls predict on fitted DragonNet.

        Args:
            X (np.matrix or np.array or pd.Dataframe): a feature matrix
        Returns:
            (np.ndarray): Predictions of treatment effects.
        """
        return self.cevae.ite(torch.tensor(X, dtype=torch.float),
                              num_samples=self.num_samples,
                              batch_size=self.batch_size).cpu().numpy()

    def fit_predict(self, X, treatment, y, p=None):
        """
        Fits the CEVAE model and then predicts.

        Args:
            X (np.matrix or np.array or pd.Dataframe): a feature matrix
            treatment (np.array or pd.Series): a treatment vector
            y (np.array or pd.Series): an outcome vector
        Returns:
            (np.ndarray): Predictions of treatment effects.
        """
        self.fit(X, treatment, y)
        return self.predict(X)