# Visualize data samples for i in range(64): x, y = dataset[i] plt.plot(x.numpy(), y.numpy(), c='b', alpha=0.5) plt.xlim(-pi, pi) from neural_process import NeuralProcess x_dim = 1 y_dim = 1 r_dim = 50 # Dimension of representation of context points z_dim = 50 # Dimension of sampled latent variable h_dim = 50 # Dimension of hidden layers in encoder and decoder neuralprocess = NeuralProcess(x_dim, y_dim, r_dim, z_dim, h_dim) from torch.utils.data import DataLoader from training import NeuralProcessTrainer batch_size = 2 num_context = 4 num_target = 4 data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=3e-4) np_trainer = NeuralProcessTrainer(device, neuralprocess, optimizer, num_context_range=(num_context, num_context), num_extra_target_range=(num_target,
"""get advantage estimation from the trajectories""" advantages, returns = estimate_advantages(rewards, masks, values, args.gamma, args.tau, device) """perform TRPO update""" trpo_step(policy_net, value_net, states, actions, returns, advantages, args.max_kl_trpo, args.damping, args.l2_reg) '''create neural process''' if args.use_attentive_np: policy_np = AttentiveNeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim, args.h_dim, args.a_dim, use_self_att=False).to(args.device_np) else: policy_np = NeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim, args.h_dim).to(args.device_np) optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4) np_trainer = NeuralProcessTrainerLoo(args.device_np, policy_np, optimizer, num_context_range=(num_context_points, num_context_points), num_extra_target_range=(args.num_testing_points, args.num_testing_points), print_freq=50) if args.v_use_attentive_np: value_np = AttentiveNeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim, args.v_r_dim, args.a_dim, use_self_att=False).to(args.device_np) else: value_np = NeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim, args.v_h_dim).to(args.device_np) value_optimizer = torch.optim.Adam(value_np.parameters(), lr=3e-4) value_np_trainer = NeuralProcessTrainerLoo(args.device_np, value_np, value_optimizer, num_context_range=(num_context_points, num_context_points),
running_state = None """seeding""" np.random.seed(args.seed) torch.manual_seed(args.seed) env.seed(args.seed) if args.use_attentive_np: value_np = AttentiveNeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim, args.v_h_dim, args.v_z_dim, use_self_att=False).to(args.device) else: value_np = NeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim, args.v_h_dim).to(args.device) value_optimizer = torch.optim.Adam(value_np.parameters(), lr=3e-4) if args.loo: value_np_trainer = NeuralProcessTrainerLoo( args.device, value_np, value_optimizer, num_context_range=(num_context_points, num_context_points), num_extra_target_range=(args.num_testing_points, args.num_testing_points), print_freq=50) else: value_np_trainer = NeuralProcessTrainerRL( args.device, value_np, value_optimizer,
for data_init in data_loader: break x_init, y_init = data_init x_init, y_init, _, _ = context_target_split(x_init[0:1], y_init[0:1], args.num_context, args.num_target) print('dataset created', x_init.size()) # create model likelihood = gpytorch.likelihoods.GaussianLikelihood().to(device) model_dkl = GPRegressionModel(x_init, y_init.squeeze(0).squeeze(-1), likelihood, args.h_dim_dkl, args.z_dim_dkl, name_id='DKL').to(device) if anp: model_np = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, args.a_dim_np, use_self_att=True, fixed_sigma=None).to(device) else: model_np = NeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, fixed_sigma=None).to(device) optimizer_dkl = torch.optim.Adam([ {'params': model_dkl.feature_extractor.parameters()}, {'params': model_dkl.covar_module.parameters()}, {'params': model_dkl.mean_module.parameters()}, {'params': model_dkl.likelihood.parameters()}], lr=0.01) trainer_dkl = DKMTrainer(device, model_dkl, optimizer_dkl, args, print_freq=args.print_freq) optimizer_np = torch.optim.Adam(model_np.parameters(), lr=learning_rate) np_trainer = NeuralProcessTrainer(device, model_np, optimizer_np, num_context_range=(args.num_context,args.num_context), num_extra_target_range=(args.num_target,args.num_target), print_freq=args.print_freq) # train print('start dkl training')
state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] is_disc_action = len(env.action_space.shape) == 0 if args.use_running_state: running_state = ZFilter((state_dim,), clip=5) # running list of states that allows to access precise mean and std else: running_state = None # running_reward = ZFilter((1,), demean=False, clip=10) """seeding""" np.random.seed(args.seed) torch.manual_seed(args.seed) env.seed(args.seed) '''create neural process''' policy_np = NeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim, args.h_dim).to(args.device_np) optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4) np_trainer = NeuralProcessTrainerRL(args.device_np, policy_np, optimizer, num_context_range=(400, 500), num_extra_target_range=(400, 500), print_freq=10) value_np = NeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim, args.v_h_dim).to(args.device_np) value_optimizer = torch.optim.Adam(value_np.parameters(), lr=3e-4) value_np_trainer = NeuralProcessTrainerRL(args.device_np, value_np, value_optimizer, num_context_range=(400, 500), num_extra_target_range=(400, 500), print_freq=10) """create replay memory""" replay_memory = ReplayMemoryDataset(args.replay_memory_size) value_replay_memory = ValueReplay(args.v_replay_memory_size)
state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] is_disc_action = len(env.action_space.shape) == 0 if use_running_state: running_state = ZFilter( (state_dim, ), clip=5 ) # running list of states that allows to access precise mean and std else: running_state = None # running_reward = ZFilter((1,), demean=False, clip=10) """seeding""" np.random.seed(args.seed) torch.manual_seed(args.seed) env.seed(args.seed) '''create neural process''' policy_np = NeuralProcess(state_dim, action_dim, r_dim, z_dim, h_dim).to(device_np) optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4) np_trainer = NeuralProcessTrainerRL(device_np, policy_np, optimizer, num_context_range=(400, 500), num_extra_target_range=(400, 500), print_freq=100) """create replay memory""" replay_memory = ReplayMemoryDataset(replay_memory_size) """create agent""" agent = Agent(env, policy_np, device_np, running_state=running_state, render=args.render,
plt.savefig(plots_path + '-'.join(kernel) + '_data') plt.close() # create and train np if use_attention: neuralprocess = AttentiveNeuralProcess(x_dim, y_dim, r_dim, z_dim, h_dim, a_dim, use_self_att=use_self_att, fixed_sigma=fix_sigma).to(device) else: neuralprocess = NeuralProcess(x_dim, y_dim, r_dim, z_dim, h_dim, fixed_sigma=fix_sigma).to(device) optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=learning_rate) np_trainer = NeuralProcessTrainer(device, neuralprocess, optimizer, num_context_range=num_context, num_extra_target_range=num_target, print_freq=5040) neuralprocess.training = True np_trainer.train(data_loader, epochs) plt.figure(2) plt.title('average loss over epochs')
id = mdl + time.ctime() + '{}e_{}b_{}c{}t_{}lr_{}r_{}z_{}a'.format( epochs, batch_size, num_context, num_target, l, r_dim, z_dim, a_dim) # create and train np if use_attention: neuralprocess = AttentiveNeuralProcess( x_dim, y_dim, r_dim, z_dim, h_dim, a_dim, use_self_att=use_self_att).to(device) first = False else: neuralprocess = NeuralProcess(x_dim, y_dim, r_dim, z_dim, h_dim).to(device) t0 = time.time() optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=learning_rate) np_trainer = NeuralProcessTrainer(device, neuralprocess, optimizer, num_context_range=num_context, num_extra_target_range=num_target, print_freq=50000) neuralprocess.training = True np_trainer.train(data_loader, epochs, early_stopping=0) '''plot training epochs''' n_ep = len(np_trainer.epoch_loss_history) ax_epoch.plot(np.linspace(0, n_ep - 1, n_ep), np_trainer.epoch_loss_history,
def sample_context(x, y, num_context=100): num_points = x.shape[1] # Sample locations of context and target points locations = np.random.choice(num_points, size=num_context, replace=False) x_context = x[:, locations[:num_context], :] y_context = y[:, locations[:num_context], :] return x_context, y_context if use_attention: neuralprocess = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim, args.z_dim, args.h_dim, args.a_dim, use_self_att=True).to(device) else: neuralprocess = NeuralProcess(args.x_dim, args.y_dim, args.r_dim, args.z_dim, args.h_dim).to(device) optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=3e-4) np_trainer = NeuralProcessTrainerRL(device, neuralprocess, optimizer, num_context_range=(400, 500), num_extra_target_range= (400, 500), print_freq=2) def get_dataset(i_iter): file_name = memory_dir + str(i_iter) + '^iter_' + env_name with open(file_name, 'rb') as file_m: memory_iter = pickle.load(file_m) # memory_iter.memory to access list of transitions dataset = MemoryDataset(memory_iter.memory, max_len=999)
default=1, metavar='N', help='interval between training status logs (default: 10)') parser.add_argument( '--save-model-interval', type=int, default=0, metavar='N', help="interval between saving model (default: 0, means don't save)") parser.add_argument('--gpu-index', type=int, default=0, metavar='N') args = parser.parse_args() policy_np = NeuralProcess(2, 1, args.r_dim, args.z_dim, args.h_dim, fixed_sigma=None).to(args.device_np) optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4) np_trainer = NeuralProcessTrainerRL(args.device_np, policy_np, optimizer, num_context_range=(400, 500), num_extra_target_range=(400, 500), print_freq=10) env = gym.make(args.env_name) def sample_initial_context_normal(num_episodes): initial_episodes = [] max_episode_len = 999
dataset = SineData(amplitude_range=(-1., 1.), shift_range=(-.5, .5), num_points=400, num_samples=800) data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) #if config["dataset"] == "mnist": # data_loader, _ = mnist(batch_size=batch_size, size=img_size[1]) #elif config["dataset"] == "celeba": # data_loader = celeba(batch_size=batch_size, size=img_size[1]) #np_img = NeuralProcessImg(img_size, r_dim, z_dim, h_dim).to(device) gru = GRUNet(50, 256, 50, 2) hidden = gru.init_hidden(batch_size) input_data = NeuralProcess(1, 1, 50, 50, 50, gru, hidden) optimizer = torch.optim.Adam(input_data.parameters(), lr=config["lr"]) np_trainer = NeuralProcessTrainer(device, input_data, optimizer, num_context_range, num_extra_target_range, print_freq=100) for epoch in range(epochs): print("Epoch {}".format(epoch + 1)) np_trainer.train(data_loader, 1) # Save losses at every epoch
np.random.seed(args.seed) torch.manual_seed(args.seed) env.seed(args.seed) max_episode_len = env._max_episode_steps '''create neural process''' ep_frq = 50 if args.use_attentive_np: policy_np = AttentiveNeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim, args.h_dim, args.a_dim, use_self_att=False).to(device_np) else: policy_np = NeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim, args.h_dim).to(device_np) optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4) np_trainer = NeuralProcessTrainerRL(device_np, policy_np, optimizer, (1, max_episode_len // 2), print_freq=ep_frq) '''create MKI model''' mi_model = MeanInterpolator(state_dim, args.h_mi_dim, args.z_mi_dim, scaling=args.scaling).to(device_np).double() optimizer_mi = torch.optim.Adam([{ 'params': mi_model.feature_extractor.parameters(),
# # Create dataset if args.x_dim == 1: dataset = MultiGPData(args.mean, args.kernel, num_samples=args.num_tot_samples, amplitude_range=args.x_range[0], num_points=args.num_points) data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) test_dataset = MultiGPData(args.mean, args.kernel, num_samples=1, amplitude_range=[v*2 for v in args.x_range[0]], num_points=args.num_points) test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=True) # # Create models # NP if args.use_attention: model_np = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, args.a_dim_np, att_type='multihead').to(device) else: model_np = NeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np).to(device) optimizer_np = torch.optim.Adam(model_np.parameters(), lr=learning_rate) np_trainer = NeuralProcessTrainer(device, model_np, optimizer_np, num_context_range=args.context_range, num_extra_target_range=args.num_points, print_freq=5040) # MI model_mi = MeanInterpolator(1, args.h_dim_mi, args.z_dim_mi).to(device).double() optimizer_mi = torch.optim.Adam([ {'params': model_mi.feature_extractor.parameters(), 'lr': learning_rate}, {'params': model_mi.interpolator.parameters(), 'lr': learning_rate}]) trainer_mi = MITrainer(device, model_mi, optimizer_mi, num_context=args.test_context, num_target=args.num_points-args.test_context, print_freq=10) # DKL