model = MeanInterpolator(state_dim, args.h_dim, args.z_dim, scaling=args.scaling).to(device).double() optimizer = torch.optim.Adam([{ 'params': model.feature_extractor.parameters(), 'lr': args.lr_nn }, { 'params': model.interpolator.parameters(), 'lr': args.lr_nn }]) # train model_trainer = MITrainer(device, model, optimizer, args, print_freq=30) """create replay memory""" replay_memory = ReplayMemoryDataset(args.replay_memory_size, use_mean=True) """create agent""" if args.pick: agent = AgentPicker(env, model, args.device_np, args.num_context, running_state=running_state, render=args.render, pick_dist=None, fixed_sigma=args.fixed_sigma) else: agent = Agent(env,
'params': model.feature_extractor.parameters(), 'lr': learning_rate }, { 'params': model.interpolator.parameters(), 'lr': 1e-1 }]) try: os.mkdir(args.directory_path) except FileExistsError: pass print('start training') model_trainer = MITrainer(device, model, optimizer, num_context=args.num_context, print_freq=10) model_trainer.train(data_loader, args.epochs, early_stopping=args.early_stopping) # Visualize data samples plt.figure(1) #plt.title('Samples from gp with kernels: ' + ' '.join(kernel)) for i in range(args.num_tot_samples): x, y = dataset[i] plt.plot(x.cpu().numpy(), y.cpu().numpy(), c='b', alpha=0.5) plt.xlabel('x') plt.ylabel('y') plt.xlim(x_range[0], x_range[1])
args.h_mi_dim, args.z_mi_dim, scaling=args.scaling).to(device).double() optimizer_mi = torch.optim.Adam([{ 'params': model.feature_extractor.parameters(), 'lr': args.lr_nn }, { 'params': model.interpolator.parameters(), 'lr': args.lr_nn }]) # trainer model_trainer = MITrainer(device, model, optimizer_mi, num_context=args.num_context, num_target=args.num_testing_points, print_freq=50) if args.value_net: value_net = Value(state_dim) value_net.to(args.device_np) # RM replay_memory_mi = ReplayMemoryDataset(args.replay_memory_size) value_replay_memory = ValueReplay(args.replay_memory_size) """create agent""" agent_mi = Agent_all_ctxt(env, model, args.device_np, running_state=None,
if args.use_attention: model_np = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, args.a_dim_np, att_type='multihead').to(device) else: model_np = NeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np).to(device) optimizer_np = torch.optim.Adam(model_np.parameters(), lr=learning_rate) np_trainer = NeuralProcessTrainer(device, model_np, optimizer_np, num_context_range=args.context_range, num_extra_target_range=args.num_points, print_freq=5040) # MI model_mi = MeanInterpolator(1, args.h_dim_mi, args.z_dim_mi).to(device).double() optimizer_mi = torch.optim.Adam([ {'params': model_mi.feature_extractor.parameters(), 'lr': learning_rate}, {'params': model_mi.interpolator.parameters(), 'lr': learning_rate}]) trainer_mi = MITrainer(device, model_mi, optimizer_mi, num_context=args.test_context, num_target=args.num_points-args.test_context, print_freq=10) # DKL for data_init in data_loader: break x_init, y_init = data_init x_init, y_init, _, _ = context_target_split_CinT(x_init[0:1], y_init[0:1], args.test_context, args.num_points-args.test_context) likelihood = gpytorch.likelihoods.GaussianLikelihood().to(device) # noise_constraint=gpytorch.constraints.GreaterThan(5e-2) model_dkl = GPRegressionModel(x_init, y_init.squeeze(0).squeeze(-1), likelihood, args.h_dim_dkl, args.z_dim_dkl, name_id='DKL').to(device) optimizer_dkl = torch.optim.Adam([ {'params': model_dkl.feature_extractor.parameters()}, {'params': model_dkl.covar_module.parameters()}, {'params': model_dkl.mean_module.parameters()}, {'params': model_dkl.likelihood.parameters()}], lr=0.05)