def __init__(self, enable_cuda=False): AbstractMethod.__init__(self) self.g = None self.f = None self.dev_f_collection = None g_models = [ DefaultCNN(cuda=enable_cuda), ] f_models = [ DefaultCNN(cuda=enable_cuda), # LeakySoftmaxCNN(input_c=1, input_h=28, input_w=28, # channel_sizes=[10, 20], kernel_sizes=[3, 3], # extra_padding=[0, 1], cuda=enable_cuda), ] g_learning_rates = [5e-6, 2e-6, 1e-6] # g_learning_rates = [0.00001] game_objective = OptimalMomentObjective() # g_learning_rates = [0.0005] # game_objectives = [OptimalMomentObjective(lambda_1=0.5)] learning_setups = [] for g_lr in g_learning_rates: learning_setup = { "g_optimizer_factory": OptimizerFactory( OAdam, lr=g_lr, betas=(0.5, 0.9)), "f_optimizer_factory": OptimizerFactory( OAdam, lr=5.0*g_lr, betas=(0.5, 0.9)), "game_objective": game_objective } learning_setups.append(learning_setup) default_g_opt_factory = OptimizerFactory( Adam, lr=0.0001, betas=(0.5, 0.9)) default_f_opt_factory = OptimizerFactory( Adam, lr=0.0001, betas=(0.5, 0.9)) g_simple_model_eval = SGDSimpleModelEval( max_num_epoch=50, max_no_progress=10, batch_size=1024, eval_freq=1) f_simple_model_eval = SGDSimpleModelEval( max_num_epoch=50, max_no_progress=10, batch_size=1024, eval_freq=1) learning_eval = FHistoryLearningEvalSGDNoStop( num_epochs=60, eval_freq=1, batch_size=1024) self.model_selection = FHistoryModelSelectionV3( g_model_list=g_models, f_model_list=f_models, learning_args_list=learning_setups, default_g_optimizer_factory=default_g_opt_factory, default_f_optimizer_factory=default_f_opt_factory, g_simple_model_eval=g_simple_model_eval, f_simple_model_eval=f_simple_model_eval, learning_eval=learning_eval, psi_eval_burn_in=30, psi_eval_max_no_progress=10, ) self.default_g_opt_factory = default_g_opt_factory
def __init__(self, enable_cuda=False): AbstractMethod.__init__(self) self.g = None self.f = None self.dev_f_collection = None g_models = [ MLPModel(input_dim=1, layer_widths=[200, 200], activation=nn.LeakyReLU).double(), ] f_models = [ DefaultCNN(cuda=enable_cuda), #OtherCNN(cuda=enable_cuda), #OtherCNNV2(cuda=enable_cuda), #OtherCNNV3(cuda=enable_cuda), ] if enable_cuda: for g in g_models: g.cuda() g_learning_rates = [0.00010, 0.000050, 0.000020] # g_learning_rates = [0.00001] game_objective = OptimalMomentObjective() # g_learning_rates = [0.0005] # game_objectives = [OptimalMomentObjective(lambda_1=0.5)] learning_setups = [] for g_lr in g_learning_rates: learning_setup = { "g_optimizer_factory": OptimizerFactory( OAdam, lr=g_lr, betas=(0.5, 0.9)), "f_optimizer_factory": OptimizerFactory( OAdam, lr=5.0*g_lr, betas=(0.5, 0.9)), "game_objective": game_objective } learning_setups.append(learning_setup) default_g_opt_factory = OptimizerFactory( Adam, lr=0.001, betas=(0.5, 0.9)) default_f_opt_factory = OptimizerFactory( Adam, lr=0.0001, betas=(0.5, 0.9)) g_simple_model_eval = GradientDecentSimpleModelEval( max_num_iter=4000, max_no_progress=10, eval_freq=100) f_simple_model_eval = SGDSimpleModelEval( max_num_epoch=50, max_no_progress=10, batch_size=512, eval_freq=1) learning_eval = FHistoryLearningEvalSGDNoStop( num_epochs=60, eval_freq=1, batch_size=1024) self.model_selection = FHistoryModelSelectionV3( g_model_list=g_models, f_model_list=f_models, learning_args_list=learning_setups, default_g_optimizer_factory=default_g_opt_factory, default_f_optimizer_factory=default_f_opt_factory, g_simple_model_eval=g_simple_model_eval, f_simple_model_eval=f_simple_model_eval, learning_eval=learning_eval, psi_eval_max_no_progress=10, psi_eval_burn_in=30, ) self.default_g_opt_factory = default_g_opt_factory
def _fit(self, x, y, z, context=None): model = DefaultCNN(cuda=torch.cuda.is_available()) model.float() optimizer = torch.optim.Adam(model.parameters(), lr=self._lr) model.train() x = self.augment(x, context) x = torch.tensor(x, dtype=torch.float) y = torch.tensor(y, dtype=torch.float) if torch.cuda.is_available(): x = x.cuda() y = y.cuda() t0 = time.time() train = data_utils.DataLoader(data_utils.TensorDataset(x, y), batch_size=self._n_batch_size, shuffle=True) for epoch in range(self._n_epochs): losses = list() print("Epoch: ", epoch + 1, "/", self._n_epochs, " batch size: ", self._n_batch_size) for i, (x, y) in enumerate(train): optimizer.zero_grad() y_pred = model(x) loss = F.mse_loss(y_pred, y) losses += [loss.data.cpu().numpy()] loss.backward() optimizer.step() print(" train loss", np.mean(losses)) self._model = model return time.time() - t0
class GMM(AbstractBaseline): models = { "linear": lambda input_dim: torch.nn.Linear(input_dim, 1), "2-layer": lambda input_dim: torch.nn.Sequential(torch.nn.Linear( input_dim, 20), torch.nn.LeakyReLU(0.2), torch.nn.Linear(20, 1)), "mnist": lambda input_dim: DefaultCNN(cuda=torch.cuda.is_available()) } def __init__(self, g_model="linear", f_feature_mapping=None, g_feature_mapping=None, n_steps=1, g_epochs=200): ''' Generalized methods of moments. - g_model: Model to estimate for g - f_feature_mapping: mapping of raw instruments z - g_feature_mapping: mapping of raw features x - norm: additional information ''' super().__init__() if f_feature_mapping is None: self.f_mapping = VanillaFeatures() else: self.f_mapping = f_feature_mapping if g_feature_mapping is None: self.g_mapping = VanillaFeatures(add_constant=False) else: self.g_mapping = g_feature_mapping if g_model in self.models: self._g = self.models[g_model] else: raise ValueError("g_model has invalid value " + str(g_model)) self._optimizer = None self._n_steps = n_steps self._g_epochs = g_epochs def display(self): for name, param in self._model.named_parameters(): print(name, self.arr2str(param.data.cpu().numpy())) def fit_g_minibatch(self, train, loss): losses = list() for i, (x_b, y_b, z_b) in enumerate(train): if torch.cuda.is_available(): x_b = x_b.cuda() y_b = y_b.cuda() z_b = z_b.cuda() loss_val = self._optimizer.step(lambda: loss(x_b, y_b, z_b)) losses += [loss_val.data.cpu().numpy()] print(" train loss ", np.mean(losses)) def fit_g_batch(self, x, y, z, loss): _ = self._optimizer.step(lambda: loss(x, y, z)) def _fit(self, x, y, z, context=None): z = self.augment(z, context) z = self.f_mapping.transform(z) x = self.augment(x, context) x = self.g_mapping.transform(x) x = torch.tensor(x, dtype=torch.float) y = torch.tensor(y, dtype=torch.float) z = torch.tensor(z, dtype=torch.float) if torch.cuda.is_available(): x = x.cuda() y = y.cuda() z = z.cuda() n_samples = x.size(0) x_dim, z_dim = x.size(1), z.size(1) g_model = self._g(x_dim) if torch.cuda.is_available(): g_model = g_model.cuda() g_model.float() self._optimizer = torch.optim.Adam(g_model.parameters(), lr=0.01) weights = torch.eye(z_dim) if torch.cuda.is_available(): weights = weights.cuda() self._model = g_model def loss(x_b, y_b, z_b): moment_conditions = z_b.mul(y_b - g_model(x_b)) moms = moment_conditions.mean(dim=0, keepdim=True) loss = torch.mm(torch.mm(moms, weights), moms.t()) self._optimizer.zero_grad() loss.backward() return loss batch_mode = "mini" if n_samples > 5000 else "full" t0 = time.time() train = data_utils.DataLoader(data_utils.TensorDataset(x, y, z), batch_size=128, shuffle=True) for step in range(self._n_steps): print("GMM step %d/%d" % (step + 1, self._n_steps)) if step > 0: # optimize weights with torch.no_grad(): moment_conditions = z.mul(y - g_model(x)) covariance_matrix = torch.mm(moment_conditions.t(), moment_conditions) / n_samples weights = torch.as_tensor( np.linalg.pinv(covariance_matrix.cpu().numpy(), rcond=1e-9)) if torch.cuda.is_available(): weights = weights.cuda() for epoch in range(self._g_epochs): if batch_mode == "full": self.fit_g_batch(x, y, z, loss) else: print("g epoch %d / %d" % (epoch + 1, self._g_epochs)) self.fit_g_minibatch(train, loss) self._model = g_model return time.time() - t0 def _predict(self, x, context): x = self.augment(x, context) x = self.g_mapping.transform(x) x = torch.tensor(x, dtype=torch.float) if torch.cuda.is_available(): x = x.cuda() return self._model(x).data.cpu().numpy()
def main(): num_train = 10000 num_dev = 10000 num_test = 10000 num_epochs = 500 batch_size = 1000 scenario_name = "mnist_xz" print("\nLoading " + scenario_name + "...") scenario = AbstractScenario(filename="data/" + scenario_name + "/main.npz") scenario.info() scenario.to_tensor() scenario.to_cuda() train = scenario.get_dataset("train") dev = scenario.get_dataset("dev") test = scenario.get_dataset("test") x_train, z_train, y_train = train.x, train.z, train.y x_dev, z_dev, y_dev, g_of_x_oracle_dev = dev.x, dev.z, dev.y, dev.g x_test, z_test, y_test, g_of_x_oracle_test = test.x, test.z, test.y, test.g # scenario name can be e.g. "abs", "linear", "sin", "step" to replicate # the respective scenarios from the paper # scenario_name = "step" # create data from respective scenario # scenario = Standardizer(AGMMZoo(scenario_name, two_gps=False, # n_instruments=2)) # scenario.setup(num_train=num_train, num_dev=num_dev, num_test=num_test) # scenario.to_tensor() # if torch.cuda.is_available() and ENABLE_CUDA: # scenario.to_cuda() # x_train, z_train, y_train, _, _ = scenario.get_train_data() # x_dev, z_dev, y_dev, g_of_x_oracle_dev, _ = scenario.get_dev_data() # x_test, z_test, y_test, g_of_x_oracle_test, _ = scenario.get_test_data() # set up f and g models and optimizers # g = MLPModel(input_dim=1, layer_widths=[20, 3], # activation=nn.LeakyReLU).double() # f = MLPModel(input_dim=2, layer_widths=[20], # activation=nn.LeakyReLU).double() g = DefaultCNN(cuda=ENABLE_CUDA) f = DefaultCNN(cuda=ENABLE_CUDA) if torch.cuda.is_available() and ENABLE_CUDA: g = g.cuda() f = f.cuda() g_optimizer = OAdam(g.parameters(), lr=0.00005, betas=(0.5, 0.9)) #was 0.00001 f_optimizer = OAdam(f.parameters(), lr=0.000025, betas=(0.5, 0.9)) #0.000005 # train models using DeepGMM algorithm g = train_deep_gmm(g=g, f=f, g_optimizer=g_optimizer, f_optimizer=f_optimizer, num_epochs=num_epochs, batch_size=batch_size, x_train=x_train, z_train=z_train, y_train=y_train, verbose=True, print_freq=20, x_dev=x_dev, z_dev=z_dev, y_dev=y_dev, g_of_x_oracle_dev=g_of_x_oracle_dev) # test output g function on test data test_mse = calc_mse_safe_test(x_test, g_of_x_oracle_test, g, batch_size=batch_size) torch.save({'model': g.state_dict()}, 'g_mnist.pth') print("MSE on test data: %f" % test_mse) print("")