def _build_optimizer(optim_method, optim_param): if optim_method == 'AdaGrad': optim = optimizer.AdagradOptimizer(**optim_param) elif optim_method == 'SgdOptimizer': optim = optimizer.SgdOptimizer(**optim_param) elif optim_method == 'Adam': optim = optimizer.AdamOptimizer(**optim_param) else: raise Exception('Did you foget to implement {}?'.format(optim_method)) return optim
batch_size=100) model.input_feature_schema.sig_input.set_value(sig_input.get(), unsafe=True) model.input_feature_schema.tanh_input.set_value(tanh_input.get(), unsafe=True) model.input_feature_schema.adjoint_input.set_value(adjoint_input.get(), unsafe=True) model.trainer_extra_schema.sig_loss_record.label.set_value( sig_adjoint_label.get(), unsafe=True) model.trainer_extra_schema.tanh_loss_record.label.set_value( tanh_adjoint_label.get(), unsafe=True) # Build model (origin_pred, sig_adjoint_pred, tanh_adjoint_pred, loss) = build_adjoint_pinn(model, sig_net_dim=[10, 1], tanh_net_dim=[10, 1], weight_optim=optimizer.AdagradOptimizer( alpha=0.01, epsilon=1e-4, ), bias_optim=optimizer.AdagradOptimizer( alpha=0.01, epsilon=1e-4, )) # Train the model train_init_net, train_net = instantiator.generate_training_nets(model) workspace.RunNetOnce(train_init_net) workspace.CreateNet(train_net) num_iter = 1000 eval_num_iter = 100 # loss_lst = [] for i in range(eval_num_iter): workspace.RunNet(train_net.Proto().name, num_iter=num_iter)
def test_multiple_optimizers(self): from caffe2.python import brew, core, optimizer from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test") fc1 = brew.fc(model, 'data', 'fc1', 100, 50) fc2 = brew.fc(model, fc1, 'fc2', 50, 25) pred = brew.fc(model, fc2, 'fc3', 25, 10) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) param_to_device = optimizer._get_param_to_device(model) def infer_blob_device(blob_name): return optimizer.get_param_device(blob_name, "{}_grad".format(blob_name), param_to_device) sgd_1 = optimizer.SgdOptimizer(base_learning_rate=0.1) sgd_2 = optimizer.SgdOptimizer(base_learning_rate=0.2) adagrad = optimizer.AdagradOptimizer() # Check same optimizer share the same learning rate. with core.DeviceScope(infer_blob_device("fc1_w")): sgd_1(model.net, model.param_init_net, "fc1_w", "fc1_w_grad") with core.DeviceScope(infer_blob_device("fc1_b")): sgd_1(model.net, model.param_init_net, "fc1_b", "fc1_b_grad") fc1_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'WeightedSum' and op.input[0] == 'fc1_w' or \ op.input[0] == 'fc1_b': fc1_lr_blobs.append(op.input[3]) self.assertEqual(fc1_lr_blobs[0], fc1_lr_blobs[1]) # Check different instance of the same optimizer has a different lr. with core.DeviceScope(infer_blob_device("fc2_w")): sgd_2(model.net, model.param_init_net, "fc2_w", "fc2_w_grad") with core.DeviceScope(infer_blob_device("fc2_b")): sgd_2(model.net, model.param_init_net, "fc2_b", "fc2_b_grad") fc2_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'WeightedSum' and op.input[0] == 'fc2_w' or \ op.input[0] == 'fc2_b': self.assertTrue(op.input[3] not in fc1_lr_blobs) fc2_lr_blobs.append(op.input[3]) self.assertEqual(fc2_lr_blobs[0], fc2_lr_blobs[1]) # Check different optimizer type case with core.DeviceScope(infer_blob_device("fc3_w")): adagrad(model.net, model.param_init_net, "fc3_w", "fc3_w_grad") with core.DeviceScope(infer_blob_device("fc3_b")): adagrad(model.net, model.param_init_net, "fc3_b", "fc3_b_grad") fc3_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'Adagrad' and op.input[0] == 'fc3_w' or \ op.input[0] == 'fc3_b': self.assertTrue(op.input[3] not in fc2_lr_blobs) self.assertTrue(op.input[3] not in fc1_lr_blobs) fc3_lr_blobs.append(op.input[3]) self.assertEqual(fc3_lr_blobs[0], fc3_lr_blobs[1])
model, db_name, 'minidb', ['origin_input', 'adjoint_input', 'label'], batch_size=100 ) model.input_feature_schema.origin_input.set_value( origin_input.get(), unsafe=True) model.input_feature_schema.adjoint_input.set_value( adjoint_input.get(), unsafe=True) model.trainer_extra_schema.label.set_value( label.get(), unsafe=True) # Build model origin_pred, adjoint_pred, loss = build_adjoint_mlp( model, input_dim=input_dim, hidden_dims=hidden_dims, output_dim=output_dim, optim=optimizer.AdagradOptimizer(alpha=0.01, epsilon=1e-4,)) # Train the model train_init_net, train_net = instantiator.generate_training_nets(model) workspace.RunNetOnce(train_init_net) workspace.CreateNet(train_net) num_iter = 10000 eval_num_iter = 1 for i in range(eval_num_iter): workspace.RunNet(train_net.Proto().name, num_iter=num_iter) print(schema.FetchRecord(loss).get()) import matplotlib.pyplot as plt origin_pred_array = np.squeeze(schema.FetchRecord(origin_pred).get()) plt.plot(x_array, np.gradient(origin_pred_array, np.squeeze(x_array)), 'r') plt.plot(x_array, origin_pred_array, 'r') plt.plot(x_array, schema.FetchRecord(adjoint_pred).get(), 'b')