def __init__(self, enable_cuda=False):
        AbstractMethod.__init__(self)
        self.g = None
        self.f = None
        self.dev_f_collection = None

        g_models = [
            DefaultCNN(cuda=enable_cuda),
        ]
        f_models = [
            DefaultCNN(cuda=enable_cuda),
            # LeakySoftmaxCNN(input_c=1, input_h=28, input_w=28,
            #                 channel_sizes=[10, 20], kernel_sizes=[3, 3],
            #                 extra_padding=[0, 1], cuda=enable_cuda),
        ]

        g_learning_rates = [5e-6, 2e-6, 1e-6]
        # g_learning_rates = [0.00001]
        game_objective = OptimalMomentObjective()
        # g_learning_rates = [0.0005]
        # game_objectives = [OptimalMomentObjective(lambda_1=0.5)]
        learning_setups = []
        for g_lr in g_learning_rates:
            learning_setup = {
                "g_optimizer_factory": OptimizerFactory(
                    OAdam, lr=g_lr, betas=(0.5, 0.9)),
                "f_optimizer_factory": OptimizerFactory(
                    OAdam, lr=5.0*g_lr, betas=(0.5, 0.9)),
                "game_objective": game_objective
            }
            learning_setups.append(learning_setup)

        default_g_opt_factory = OptimizerFactory(
            Adam, lr=0.0001, betas=(0.5, 0.9))
        default_f_opt_factory = OptimizerFactory(
            Adam, lr=0.0001, betas=(0.5, 0.9))
        g_simple_model_eval = SGDSimpleModelEval(
            max_num_epoch=50, max_no_progress=10, batch_size=1024, eval_freq=1)
        f_simple_model_eval = SGDSimpleModelEval(
            max_num_epoch=50, max_no_progress=10, batch_size=1024, eval_freq=1)
        learning_eval = FHistoryLearningEvalSGDNoStop(
            num_epochs=60, eval_freq=1, batch_size=1024)
        self.model_selection = FHistoryModelSelectionV3(
            g_model_list=g_models,
            f_model_list=f_models,
            learning_args_list=learning_setups,
            default_g_optimizer_factory=default_g_opt_factory,
            default_f_optimizer_factory=default_f_opt_factory,
            g_simple_model_eval=g_simple_model_eval,
            f_simple_model_eval=f_simple_model_eval,
            learning_eval=learning_eval,
            psi_eval_burn_in=30, psi_eval_max_no_progress=10,
        )
        self.default_g_opt_factory = default_g_opt_factory
示例#2
0
    def __init__(self, enable_cuda=False):
        AbstractMethod.__init__(self)
        self.g = None
        self.f = None
        self.dev_f_collection = None

        g_models = [
            MLPModel(input_dim=1, layer_widths=[200, 200],
                     activation=nn.LeakyReLU).double(),
        ]
        f_models = [
            DefaultCNN(cuda=enable_cuda),
            #OtherCNN(cuda=enable_cuda),
            #OtherCNNV2(cuda=enable_cuda),
            #OtherCNNV3(cuda=enable_cuda),
        ]
        if enable_cuda:
            for g in g_models:
                g.cuda()

        g_learning_rates = [0.00010, 0.000050, 0.000020]
        # g_learning_rates = [0.00001]
        game_objective = OptimalMomentObjective()
        # g_learning_rates = [0.0005]
        # game_objectives = [OptimalMomentObjective(lambda_1=0.5)]
        learning_setups = []
        for g_lr in g_learning_rates:
            learning_setup = {
                "g_optimizer_factory": OptimizerFactory(
                    OAdam, lr=g_lr, betas=(0.5, 0.9)),
                "f_optimizer_factory": OptimizerFactory(
                    OAdam, lr=5.0*g_lr, betas=(0.5, 0.9)),
                "game_objective": game_objective
            }
            learning_setups.append(learning_setup)

        default_g_opt_factory = OptimizerFactory(
            Adam, lr=0.001, betas=(0.5, 0.9))
        default_f_opt_factory = OptimizerFactory(
            Adam, lr=0.0001, betas=(0.5, 0.9))
        g_simple_model_eval = GradientDecentSimpleModelEval(
            max_num_iter=4000, max_no_progress=10, eval_freq=100)
        f_simple_model_eval = SGDSimpleModelEval(
            max_num_epoch=50, max_no_progress=10, batch_size=512, eval_freq=1)
        learning_eval = FHistoryLearningEvalSGDNoStop(
            num_epochs=60, eval_freq=1, batch_size=1024)
        self.model_selection = FHistoryModelSelectionV3(
            g_model_list=g_models,
            f_model_list=f_models,
            learning_args_list=learning_setups,
            default_g_optimizer_factory=default_g_opt_factory,
            default_f_optimizer_factory=default_f_opt_factory,
            g_simple_model_eval=g_simple_model_eval,
            f_simple_model_eval=f_simple_model_eval,
            learning_eval=learning_eval,
            psi_eval_max_no_progress=10, psi_eval_burn_in=30,
        )
        self.default_g_opt_factory = default_g_opt_factory
示例#3
0
    def _fit(self, x, y, z, context=None):
        model = DefaultCNN(cuda=torch.cuda.is_available())
        model.float()
        optimizer = torch.optim.Adam(model.parameters(), lr=self._lr)
        model.train()

        x = self.augment(x, context)
        x = torch.tensor(x, dtype=torch.float)
        y = torch.tensor(y, dtype=torch.float)
        if torch.cuda.is_available():
            x = x.cuda()
            y = y.cuda()
        t0 = time.time()
        train = data_utils.DataLoader(data_utils.TensorDataset(x, y),
                                      batch_size=self._n_batch_size,
                                      shuffle=True)
        for epoch in range(self._n_epochs):
            losses = list()
            print("Epoch: ", epoch + 1, "/", self._n_epochs, " batch size: ",
                  self._n_batch_size)
            for i, (x, y) in enumerate(train):
                optimizer.zero_grad()
                y_pred = model(x)
                loss = F.mse_loss(y_pred, y)
                losses += [loss.data.cpu().numpy()]
                loss.backward()
                optimizer.step()
            print("   train loss", np.mean(losses))
        self._model = model
        return time.time() - t0
示例#4
0
class GMM(AbstractBaseline):
    models = {
        "linear":
        lambda input_dim: torch.nn.Linear(input_dim, 1),
        "2-layer":
        lambda input_dim: torch.nn.Sequential(torch.nn.Linear(
            input_dim, 20), torch.nn.LeakyReLU(0.2), torch.nn.Linear(20, 1)),
        "mnist":
        lambda input_dim: DefaultCNN(cuda=torch.cuda.is_available())
    }

    def __init__(self,
                 g_model="linear",
                 f_feature_mapping=None,
                 g_feature_mapping=None,
                 n_steps=1,
                 g_epochs=200):
        '''
        Generalized methods of moments.
        - g_model: Model to estimate for g
        - f_feature_mapping: mapping of raw instruments z
        - g_feature_mapping: mapping of raw features x
        - norm: additional information
        '''
        super().__init__()

        if f_feature_mapping is None:
            self.f_mapping = VanillaFeatures()
        else:
            self.f_mapping = f_feature_mapping

        if g_feature_mapping is None:
            self.g_mapping = VanillaFeatures(add_constant=False)
        else:
            self.g_mapping = g_feature_mapping

        if g_model in self.models:
            self._g = self.models[g_model]
        else:
            raise ValueError("g_model has invalid value " + str(g_model))
        self._optimizer = None
        self._n_steps = n_steps
        self._g_epochs = g_epochs

    def display(self):
        for name, param in self._model.named_parameters():
            print(name, self.arr2str(param.data.cpu().numpy()))

    def fit_g_minibatch(self, train, loss):
        losses = list()
        for i, (x_b, y_b, z_b) in enumerate(train):
            if torch.cuda.is_available():
                x_b = x_b.cuda()
                y_b = y_b.cuda()
                z_b = z_b.cuda()
            loss_val = self._optimizer.step(lambda: loss(x_b, y_b, z_b))
            losses += [loss_val.data.cpu().numpy()]
        print("  train loss ", np.mean(losses))

    def fit_g_batch(self, x, y, z, loss):
        _ = self._optimizer.step(lambda: loss(x, y, z))

    def _fit(self, x, y, z, context=None):
        z = self.augment(z, context)
        z = self.f_mapping.transform(z)
        x = self.augment(x, context)
        x = self.g_mapping.transform(x)
        x = torch.tensor(x, dtype=torch.float)
        y = torch.tensor(y, dtype=torch.float)
        z = torch.tensor(z, dtype=torch.float)
        if torch.cuda.is_available():
            x = x.cuda()
            y = y.cuda()
            z = z.cuda()

        n_samples = x.size(0)
        x_dim, z_dim = x.size(1), z.size(1)

        g_model = self._g(x_dim)
        if torch.cuda.is_available():
            g_model = g_model.cuda()
        g_model.float()
        self._optimizer = torch.optim.Adam(g_model.parameters(), lr=0.01)
        weights = torch.eye(z_dim)
        if torch.cuda.is_available():
            weights = weights.cuda()
        self._model = g_model

        def loss(x_b, y_b, z_b):
            moment_conditions = z_b.mul(y_b - g_model(x_b))
            moms = moment_conditions.mean(dim=0, keepdim=True)
            loss = torch.mm(torch.mm(moms, weights), moms.t())
            self._optimizer.zero_grad()
            loss.backward()
            return loss

        batch_mode = "mini" if n_samples > 5000 else "full"
        t0 = time.time()
        train = data_utils.DataLoader(data_utils.TensorDataset(x, y, z),
                                      batch_size=128,
                                      shuffle=True)

        for step in range(self._n_steps):
            print("GMM step %d/%d" % (step + 1, self._n_steps))
            if step > 0:
                # optimize weights
                with torch.no_grad():
                    moment_conditions = z.mul(y - g_model(x))
                    covariance_matrix = torch.mm(moment_conditions.t(),
                                                 moment_conditions) / n_samples
                    weights = torch.as_tensor(
                        np.linalg.pinv(covariance_matrix.cpu().numpy(),
                                       rcond=1e-9))
                    if torch.cuda.is_available():
                        weights = weights.cuda()

            for epoch in range(self._g_epochs):
                if batch_mode == "full":
                    self.fit_g_batch(x, y, z, loss)
                else:
                    print("g epoch %d / %d" % (epoch + 1, self._g_epochs))
                    self.fit_g_minibatch(train, loss)
            self._model = g_model
        return time.time() - t0

    def _predict(self, x, context):
        x = self.augment(x, context)
        x = self.g_mapping.transform(x)
        x = torch.tensor(x, dtype=torch.float)
        if torch.cuda.is_available():
            x = x.cuda()
        return self._model(x).data.cpu().numpy()
示例#5
0
def main():
    num_train = 10000
    num_dev = 10000
    num_test = 10000
    num_epochs = 500
    batch_size = 1000

    scenario_name = "mnist_xz"
    print("\nLoading " + scenario_name + "...")
    scenario = AbstractScenario(filename="data/" + scenario_name + "/main.npz")
    scenario.info()
    scenario.to_tensor()
    scenario.to_cuda()

    train = scenario.get_dataset("train")
    dev = scenario.get_dataset("dev")
    test = scenario.get_dataset("test")

    x_train, z_train, y_train = train.x, train.z, train.y
    x_dev, z_dev, y_dev, g_of_x_oracle_dev = dev.x, dev.z, dev.y, dev.g
    x_test, z_test, y_test, g_of_x_oracle_test = test.x, test.z, test.y, test.g

    # scenario name can be e.g. "abs", "linear", "sin", "step" to replicate
    # the respective scenarios from the paper
    #     scenario_name = "step"
    # create data from respective scenario
    #     scenario = Standardizer(AGMMZoo(scenario_name, two_gps=False,
    #                                     n_instruments=2))
    #     scenario.setup(num_train=num_train, num_dev=num_dev, num_test=num_test)
    #     scenario.to_tensor()
    #     if torch.cuda.is_available() and ENABLE_CUDA:
    #         scenario.to_cuda()

    #     x_train, z_train, y_train, _, _ = scenario.get_train_data()
    #     x_dev, z_dev, y_dev, g_of_x_oracle_dev, _ = scenario.get_dev_data()
    #     x_test, z_test, y_test, g_of_x_oracle_test, _ = scenario.get_test_data()

    # set up f and g models and optimizers
    #     g = MLPModel(input_dim=1, layer_widths=[20, 3],
    #                  activation=nn.LeakyReLU).double()
    #     f = MLPModel(input_dim=2, layer_widths=[20],
    #                  activation=nn.LeakyReLU).double()
    g = DefaultCNN(cuda=ENABLE_CUDA)
    f = DefaultCNN(cuda=ENABLE_CUDA)

    if torch.cuda.is_available() and ENABLE_CUDA:
        g = g.cuda()
        f = f.cuda()
    g_optimizer = OAdam(g.parameters(), lr=0.00005,
                        betas=(0.5, 0.9))  #was 0.00001
    f_optimizer = OAdam(f.parameters(), lr=0.000025,
                        betas=(0.5, 0.9))  #0.000005
    # train models using DeepGMM algorithm
    g = train_deep_gmm(g=g,
                       f=f,
                       g_optimizer=g_optimizer,
                       f_optimizer=f_optimizer,
                       num_epochs=num_epochs,
                       batch_size=batch_size,
                       x_train=x_train,
                       z_train=z_train,
                       y_train=y_train,
                       verbose=True,
                       print_freq=20,
                       x_dev=x_dev,
                       z_dev=z_dev,
                       y_dev=y_dev,
                       g_of_x_oracle_dev=g_of_x_oracle_dev)

    # test output g function on test data
    test_mse = calc_mse_safe_test(x_test,
                                  g_of_x_oracle_test,
                                  g,
                                  batch_size=batch_size)
    torch.save({'model': g.state_dict()}, 'g_mnist.pth')
    print("MSE on test data: %f" % test_mse)
    print("")