def GetResnet101Features(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") data_folder = 'C:/Users/paoca/Documents/UVA PHD/NLP/PROJECT/UnnecesaryDataFolder' # folder with data files saved by create_input_files.py data_name = 'coco_5_cap_per_img_5_min_word_freq' word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=5, shuffle=False, pin_memory=True) with torch.no_grad(): encoder = Encoder() encoder.fine_tune(False) emb_dim = 512 decoder_dim = 512 encoderVae_encoder = EncodeVAE_Encoder(embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map)) encoderVae_encoder.fine_tune(False) encoder.eval() encoderVae_encoder.eval() encoder = encoder.to(device) encoderVae_encoder = encoderVae_encoder.to(device) for i, (imgs, caps, caplens) in enumerate(train_loader): if i % 100 == 0: print(i) imgs = imgs.to(device) caps = caps.to(device) caplens = caplens.to(device) res = encoder(imgs) h = encoderVae_encoder(imgs, caps, caplens) pickle.dump( res[0].cpu().numpy(), open( "C:/Users/paoca/Documents/UVA PHD/NLP/PROJECT/UnnecesaryDataFolder/TrainResnet101Features/" + str(i) + ".p", "wb")) pickle.dump( h[0].cpu().numpy(), open( "C:/Users/paoca/Documents/UVA PHD/NLP/PROJECT/UnnecesaryDataFolder/TrainResnet101Features/VAE_" + str(i) + ".p", "wb"))
def validate_models(channels): """ Validate trained models :param channels: List of compressed channels used :return: None """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") test_loader = test_dataloader() test_batch = next(iter(test_loader)).to(device) reconstructed_images = {} for channel in channels: NUM_CHANNELS = channel encoder = Encoder(NUM_CHANNELS).to(device) generator = Generator(NUM_CHANNELS).to(device) encoder.load_state_dict( torch.load(f"../models/encoder_{NUM_CHANNELS}.model", map_location=torch.device("cpu")) ) generator.load_state_dict( torch.load(f"../models/generator_{NUM_CHANNELS}.model", map_location=torch.device("cpu")) ) encoder.eval() generator.eval() reconstructed_image = generator(encoder(test_batch)) reconstructed_images[NUM_CHANNELS] = reconstructed_image plot_image_grid(test_batch, reconstructed_images, NUM_IMAGES_GRID) save_images(test_batch, reconstructed_images) calculate_metric(channels)
def main(imgurl): # Load word map (word2ix) with open('input_files/WORDMAP.json', 'r') as j: word_map = json.load(j) rev_word_map = {v: k for k, v in word_map.items()} # ix2word # Load model decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None decoder.load_state_dict( torch.load('output_files/BEST_checkpoint_decoder.pth.tar')) encoder.load_state_dict( torch.load('output_files/BEST_checkpoint_encoder.pth.tar')) decoder = decoder.to(device) decoder.eval() encoder = encoder.to(device) encoder.eval() # Encode, decode with attention and beam search seq, alphas = caption_image_beam_search(encoder, decoder, imgurl, word_map, beam_size=5) alphas = torch.FloatTensor(alphas) # Visualize caption and attention of best sequence # visualize_att(img, seq, alphas, rev_word_map, args.smooth) words = [rev_word_map[ind] for ind in seq] caption = ' '.join(words[1:-1]) visualize_att(imgurl, seq, alphas, rev_word_map)
def encoder_test(seq_len=4, decoder_batch_size=2, model_name='xception'): encoder = Encoder(seq_len=seq_len, decoder_batch_size=decoder_batch_size, model_name=model_name) encoder.cuda() encoder.eval() with torch.no_grad(): images = [] for i in range(decoder_batch_size): images.append(torch.rand((seq_len, 3, 299, 299))) images = torch.stack(images).cuda() features = encoder.forward(images) split_features = [] for i in range(decoder_batch_size): split_features.append(encoder._forward_old(images[i])) split_features = torch.stack(split_features) assert(torch.all(split_features == features) == 1) print('encoder test passed!')
def initEncoderDecoder(self): if self.opt.dataset == 'SMPL': num_verts = 6890 elif self.opt.dataset == 'all_animals': num_verts = 3889 encoder = Encoder() decoder = Decoder(num_verts=num_verts) encoder.load_state_dict(torch.load(self.encoder_weights)) decoder.load_state_dict(torch.load(self.decoder_weights)) self.encoder = encoder.eval() self.decoder = decoder.eval()
class Trainer(): def __init__(self, params, experience_replay_buffer,metrics,results_dir,env): self.parms = params self.D = experience_replay_buffer self.metrics = metrics self.env = env self.tested_episodes = 0 self.statistics_path = results_dir+'/statistics' self.model_path = results_dir+'/model' self.video_path = results_dir+'/video' self.rew_vs_pred_rew_path = results_dir+'/rew_vs_pred_rew' self.dump_plan_path = results_dir+'/dump_plan' #if folder do not exists, create it os.makedirs(self.statistics_path, exist_ok=True) os.makedirs(self.model_path, exist_ok=True) os.makedirs(self.video_path, exist_ok=True) os.makedirs(self.rew_vs_pred_rew_path, exist_ok=True) os.makedirs(self.dump_plan_path, exist_ok=True) # Create models self.transition_model = TransitionModel(self.parms.belief_size, self.parms.state_size, self.env.action_size, self.parms.hidden_size, self.parms.embedding_size, self.parms.activation_function).to(device=self.parms.device) self.observation_model = ObservationModel(self.parms.belief_size, self.parms.state_size, self.parms.embedding_size, self.parms.activation_function).to(device=self.parms.device) self.reward_model = RewardModel(self.parms.belief_size, self.parms.state_size, self.parms.hidden_size, self.parms.activation_function).to(device=self.parms.device) self.encoder = Encoder(self.parms.embedding_size,self.parms.activation_function).to(device=self.parms.device) self.param_list = list(self.transition_model.parameters()) + list(self.observation_model.parameters()) + list(self.reward_model.parameters()) + list(self.encoder.parameters()) self.optimiser = optim.Adam(self.param_list, lr=0 if self.parms.learning_rate_schedule != 0 else self.parms.learning_rate, eps=self.parms.adam_epsilon) self.planner = MPCPlanner(self.env.action_size, self.parms.planning_horizon, self.parms.optimisation_iters, self.parms.candidates, self.parms.top_candidates, self.transition_model, self.reward_model,self.env.action_range[0], self.env.action_range[1]) global_prior = Normal(torch.zeros(self.parms.batch_size, self.parms.state_size, device=self.parms.device), torch.ones(self.parms.batch_size, self.parms.state_size, device=self.parms.device)) # Global prior N(0, I) self.free_nats = torch.full((1, ), self.parms.free_nats, dtype=torch.float32, device=self.parms.device) # Allowed deviation in KL divergence def load_checkpoints(self): self.metrics = torch.load(self.model_path+'/metrics.pth') model_path = self.model_path+'/best_model' os.makedirs(model_path, exist_ok=True) files = os.listdir(model_path) if files: checkpoint = [f for f in files if os.path.isfile(os.path.join(model_path, f))] model_dicts = torch.load(os.path.join(model_path, checkpoint[0]),map_location=self.parms.device) self.transition_model.load_state_dict(model_dicts['transition_model']) self.observation_model.load_state_dict(model_dicts['observation_model']) self.reward_model.load_state_dict(model_dicts['reward_model']) self.encoder.load_state_dict(model_dicts['encoder']) self.optimiser.load_state_dict(model_dicts['optimiser']) print("Loading models checkpoints!") else: print("Checkpoints not found!") def update_belief_and_act(self, env, belief, posterior_state, action, observation, reward, min_action=-inf, max_action=inf,explore=False): # Infer belief over current state q(s_t|o≤t,a<t) from the history encoded_obs = self.encoder(observation).unsqueeze(dim=0).to(device=self.parms.device) belief, _, _, _, posterior_state, _, _ = self.transition_model(posterior_state, action.unsqueeze(dim=0), belief, encoded_obs) # Action and observation need extra time dimension belief, posterior_state = belief.squeeze(dim=0), posterior_state.squeeze(dim=0) # Remove time dimension from belief/state action,pred_next_rew,_,_,_ = self.planner(belief, posterior_state,explore) # Get action from planner(q(s_t|o≤t,a<t), p) if explore: action = action + self.parms.action_noise * torch.randn_like(action) # Add exploration noise ε ~ p(ε) to the action action.clamp_(min=min_action, max=max_action) # Clip action range next_observation, reward, done = env.step(action.cpu() if isinstance(env, EnvBatcher) else action[0].cpu()) # If single env is istanceted perform single action (get item from list), else perform all actions return belief, posterior_state, action, next_observation, reward, done,pred_next_rew def fit_buffer(self,episode): #### # Fit data taken from buffer ###### # Model fitting losses = [] tqdm.write("Fitting buffer") for s in tqdm(range(self.parms.collect_interval)): # Draw sequence chunks {(o_t, a_t, r_t+1, terminal_t+1)} ~ D uniformly at random from the dataset (including terminal flags) observations, actions, rewards, nonterminals = self.D.sample(self.parms.batch_size, self.parms.chunk_size) # Transitions start at time t = 0 # Create initial belief and state for time t = 0 init_belief, init_state = torch.zeros(self.parms.batch_size, self.parms.belief_size, device=self.parms.device), torch.zeros(self.parms.batch_size, self.parms.state_size, device=self.parms.device) encoded_obs = bottle(self.encoder, (observations[1:], )) # Update belief/state using posterior from previous belief/state, previous action and current observation (over entire sequence at once) beliefs, prior_states, prior_means, prior_std_devs, posterior_states, posterior_means, posterior_std_devs = self.transition_model(init_state, actions[:-1], init_belief, encoded_obs, nonterminals[:-1]) # Calculate observation likelihood, reward likelihood and KL losses (for t = 0 only for latent overshooting); sum over final dims, average over batch and time (original implementation, though paper seems to miss 1/T scaling?) # LOSS observation_loss = F.mse_loss(bottle(self.observation_model, (beliefs, posterior_states)), observations[1:], reduction='none').sum((2, 3, 4)).mean(dim=(0, 1)) kl_loss = torch.max(kl_divergence(Normal(posterior_means, posterior_std_devs), Normal(prior_means, prior_std_devs)).sum(dim=2), self.free_nats).mean(dim=(0, 1)) reward_loss = F.mse_loss(bottle(self.reward_model, (beliefs, posterior_states)), rewards[:-1], reduction='none').mean(dim=(0, 1)) # Update model parameters self.optimiser.zero_grad() (observation_loss + reward_loss + kl_loss).backward() # BACKPROPAGATION nn.utils.clip_grad_norm_(self.param_list, self.parms.grad_clip_norm, norm_type=2) self.optimiser.step() # Store (0) observation loss (1) reward loss (2) KL loss losses.append([observation_loss.item(), reward_loss.item(), kl_loss.item()])#, regularizer_loss.item()]) #save statistics and plot them losses = tuple(zip(*losses)) self.metrics['observation_loss'].append(losses[0]) self.metrics['reward_loss'].append(losses[1]) self.metrics['kl_loss'].append(losses[2]) lineplot(self.metrics['episodes'][-len(self.metrics['observation_loss']):], self.metrics['observation_loss'], 'observation_loss', self.statistics_path) lineplot(self.metrics['episodes'][-len(self.metrics['reward_loss']):], self.metrics['reward_loss'], 'reward_loss', self.statistics_path) lineplot(self.metrics['episodes'][-len(self.metrics['kl_loss']):], self.metrics['kl_loss'], 'kl_loss', self.statistics_path) def explore_and_collect(self,episode): tqdm.write("Collect new data:") reward = 0 # Data collection with torch.no_grad(): done = False observation, total_reward = self.env.reset(), 0 belief, posterior_state, action = torch.zeros(1, self.parms.belief_size, device=self.parms.device), torch.zeros(1, self.parms.state_size, device=self.parms.device), torch.zeros(1, self.env.action_size, device=self.parms.device) t = 0 real_rew = [] predicted_rew = [] total_steps = self.parms.max_episode_length // self.env.action_repeat explore = True for t in tqdm(range(total_steps)): # Here we need to explore belief, posterior_state, action, next_observation, reward, done, pred_next_rew = self.update_belief_and_act(self.env, belief, posterior_state, action, observation.to(device=self.parms.device), [reward], self.env.action_range[0], self.env.action_range[1], explore=explore) self.D.append(observation, action.cpu(), reward, done) real_rew.append(reward) predicted_rew.append(pred_next_rew.to(device=self.parms.device).item()) total_reward += reward observation = next_observation if self.parms.flag_render: env.render() if done: break # Update and plot train reward metrics self.metrics['steps'].append( (t * self.env.action_repeat) + self.metrics['steps'][-1]) self.metrics['episodes'].append(episode) self.metrics['train_rewards'].append(total_reward) self.metrics['predicted_rewards'].append(np.array(predicted_rew).sum()) lineplot(self.metrics['episodes'][-len(self.metrics['train_rewards']):], self.metrics['train_rewards'], 'train_rewards', self.statistics_path) double_lineplot(self.metrics['episodes'], self.metrics['train_rewards'], self.metrics['predicted_rewards'], "train_r_vs_pr", self.statistics_path) def train_models(self): # from (init_episodes) to (training_episodes + init_episodes) tqdm.write("Start training.") for episode in tqdm(range(self.parms.num_init_episodes +1, self.parms.training_episodes) ): self.fit_buffer(episode) self.explore_and_collect(episode) if episode % self.parms.test_interval == 0: self.test_model(episode) torch.save(self.metrics, os.path.join(self.model_path, 'metrics.pth')) torch.save({'transition_model': self.transition_model.state_dict(), 'observation_model': self.observation_model.state_dict(), 'reward_model': self.reward_model.state_dict(), 'encoder': self.encoder.state_dict(), 'optimiser': self.optimiser.state_dict()}, os.path.join(self.model_path, 'models_%d.pth' % episode)) if episode % self.parms.storing_dataset_interval == 0: self.D.store_dataset(self.parms.dataset_path+'dump_dataset') return self.metrics def test_model(self, episode=None): #no explore here if episode is None: episode = self.tested_episodes # Set models to eval mode self.transition_model.eval() self.observation_model.eval() self.reward_model.eval() self.encoder.eval() # Initialise parallelised test environments test_envs = EnvBatcher(ControlSuiteEnv, (self.parms.env_name, self.parms.seed, self.parms.max_episode_length, self.parms.bit_depth), {}, self.parms.test_episodes) total_steps = self.parms.max_episode_length // test_envs.action_repeat rewards = np.zeros(self.parms.test_episodes) real_rew = torch.zeros([total_steps,self.parms.test_episodes]) predicted_rew = torch.zeros([total_steps,self.parms.test_episodes]) with torch.no_grad(): observation, total_rewards, video_frames = test_envs.reset(), np.zeros((self.parms.test_episodes, )), [] belief, posterior_state, action = torch.zeros(self.parms.test_episodes, self.parms.belief_size, device=self.parms.device), torch.zeros(self.parms.test_episodes, self.parms.state_size, device=self.parms.device), torch.zeros(self.parms.test_episodes, self.env.action_size, device=self.parms.device) tqdm.write("Testing model.") for t in range(total_steps): belief, posterior_state, action, next_observation, rewards, done, pred_next_rew = self.update_belief_and_act(test_envs, belief, posterior_state, action, observation.to(device=self.parms.device), list(rewards), self.env.action_range[0], self.env.action_range[1]) total_rewards += rewards.numpy() real_rew[t] = rewards predicted_rew[t] = pred_next_rew observation = self.env.get_original_frame().unsqueeze(dim=0) video_frames.append(make_grid(torch.cat([observation, self.observation_model(belief, posterior_state).cpu()], dim=3) + 0.5, nrow=5).numpy()) # Decentre observation = next_observation if done.sum().item() == self.parms.test_episodes: break real_rew = torch.transpose(real_rew, 0, 1) predicted_rew = torch.transpose(predicted_rew, 0, 1) #save and plot metrics self.tested_episodes += 1 self.metrics['test_episodes'].append(episode) self.metrics['test_rewards'].append(total_rewards.tolist()) lineplot(self.metrics['test_episodes'], self.metrics['test_rewards'], 'test_rewards', self.statistics_path) write_video(video_frames, 'test_episode_%s' % str(episode), self.video_path) # Lossy compression # Set models to train mode self.transition_model.train() self.observation_model.train() self.reward_model.train() self.encoder.train() # Close test environments test_envs.close() return self.metrics def dump_plan_video(self, step_before_plan=120): #number of steps before to start to collect frames to dump step_before_plan = min(step_before_plan, (self.parms.max_episode_length // self.env.action_repeat)) # Set models to eval mode self.transition_model.eval() self.observation_model.eval() self.reward_model.eval() self.encoder.eval() video_frames = [] reward = 0 with torch.no_grad(): observation = self.env.reset() belief, posterior_state, action = torch.zeros(1, self.parms.belief_size, device=self.parms.device), torch.zeros(1, self.parms.state_size, device=self.parms.device), torch.zeros(1, self.env.action_size, device=self.parms.device) tqdm.write("Executing episode.") for t in range(step_before_plan): #floor division belief, posterior_state, action, next_observation, reward, done, _ = self.update_belief_and_act(self.env, belief, posterior_state, action, observation.to(device=self.parms.device), [reward], self.env.action_range[0], self.env.action_range[1]) observation = next_observation video_frames.append(make_grid(torch.cat([observation.cpu(), self.observation_model(belief, posterior_state).to(device=self.parms.device).cpu()], dim=3) + 0.5, nrow=5).numpy()) # Decentre if done: break self.create_and_dump_plan(self.env, belief, posterior_state, action, observation.to(device=self.parms.device), [reward], self.env.action_range[0], self.env.action_range[1]) # Set models to train mode self.transition_model.train() self.observation_model.train() self.reward_model.train() self.encoder.train() # Close test environments self.env.close() def create_and_dump_plan(self, env, belief, posterior_state, action, observation, reward, min_action=-inf, max_action=inf): tqdm.write("Dumping plan") video_frames = [] encoded_obs = self.encoder(observation).unsqueeze(dim=0) belief, _, _, _, posterior_state, _, _ = self.transition_model(posterior_state, action.unsqueeze(dim=0), belief, encoded_obs) belief, posterior_state = belief.squeeze(dim=0), posterior_state.squeeze(dim=0) # Remove time dimension from belief/state next_action,_, beliefs, states, plan = self.planner(belief, posterior_state,False) # Get action from planner(q(s_t|o≤t,a<t), p) predicted_frames = self.observation_model(beliefs, states).to(device=self.parms.device) for i in range(self.parms.planning_horizon): plan[i].clamp_(min=env.action_range[0], max=self.env.action_range[1]) # Clip action range next_observation, reward, done = env.step(plan[i].cpu()) next_observation = next_observation.squeeze(dim=0) video_frames.append(make_grid(torch.cat([next_observation, predicted_frames[i]], dim=1) + 0.5, nrow=2).numpy()) # Decentre write_video(video_frames, 'dump_plan', self.dump_plan_path, dump_frame=True)
class SACAgent(): def __init__(self, action_size, state_size, config): self.seed = config["seed"] torch.manual_seed(self.seed) np.random.seed(seed=self.seed) self.env = gym.make(config["env_name"]) self.env = FrameStack(self.env, config) self.env.seed(self.seed) self.action_size = action_size self.state_size = state_size self.tau = config["tau"] self.gamma = config["gamma"] self.batch_size = config["batch_size"] self.lr = config["lr"] self.history_length = config["history_length"] self.size = config["size"] if not torch.cuda.is_available(): config["device"] == "cpu" self.device = config["device"] self.eval = config["eval"] self.vid_path = config["vid_path"] print("actions size ", action_size) self.critic = QNetwork(state_size, action_size, config["fc1_units"], config["fc2_units"]).to(self.device) self.q_optim = torch.optim.Adam(self.critic.parameters(), config["lr_critic"]) self.target_critic = QNetwork(state_size, action_size, config["fc1_units"], config["fc2_units"]).to(self.device) self.target_critic.load_state_dict(self.critic.state_dict()) self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha = self.log_alpha.exp() self.alpha_optim = Adam([self.log_alpha], lr=config["lr_alpha"]) self.policy = SACActor(state_size, action_size).to(self.device) self.policy_optim = Adam(self.policy.parameters(), lr=config["lr_policy"]) self.encoder = Encoder(config).to(self.device) self.encoder_optimizer = torch.optim.Adam(self.encoder.parameters(), self.lr) self.episodes = config["episodes"] self.memory = ReplayBuffer((self.history_length, self.size, self.size), (1, ), config["buffer_size"], config["image_pad"], self.seed, self.device) pathname = config["seed"] tensorboard_name = str(config["res_path"]) + '/runs/' + str(pathname) self.writer = SummaryWriter(tensorboard_name) self.steps = 0 self.target_entropy = -torch.prod( torch.Tensor(action_size).to(self.device)).item() def act(self, state, evaluate=False): with torch.no_grad(): state = torch.FloatTensor(state).to(self.device).unsqueeze(0) state = state.type(torch.float32).div_(255) self.encoder.eval() state = self.encoder.create_vector(state) self.encoder.train() if evaluate is False: action = self.policy.sample(state) else: action_prob, _ = self.policy(state) action = torch.argmax(action_prob) action = action.cpu().numpy() return action # action = np.clip(action, self.min_action, self.max_action) action = action.cpu().numpy()[0] return action def train_agent(self): average_reward = 0 scores_window = deque(maxlen=100) t0 = time.time() for i_epiosde in range(1, self.episodes): episode_reward = 0 state = self.env.reset() t = 0 while True: t += 1 action = self.act(state) next_state, reward, done, _ = self.env.step(action) episode_reward += reward if i_epiosde > 10: self.learn() self.memory.add(state, reward, action, next_state, done) state = next_state if done: scores_window.append(episode_reward) break if i_epiosde % self.eval == 0: self.eval_policy() ave_reward = np.mean(scores_window) print("Epiosde {} Steps {} Reward {} Reward averge{:.2f} Time {}". format(i_epiosde, t, episode_reward, np.mean(scores_window), time_format(time.time() - t0))) self.writer.add_scalar('Aver_reward', ave_reward, self.steps) def learn(self): self.steps += 1 states, rewards, actions, next_states, dones = self.memory.sample( self.batch_size) states = states.type(torch.float32).div_(255) states = self.encoder.create_vector(states) states_detached = states.detach() qf1, qf2 = self.critic(states) q_value1 = qf1.gather(1, actions) q_value2 = qf2.gather(1, actions) with torch.no_grad(): next_states = next_states.type(torch.float32).div_(255) next_states = self.encoder.create_vector(next_states) q1_target, q2_target = self.target_critic(next_states) min_q_target = torch.min(q1_target, q2_target) next_action_prob, next_action_log_prob = self.policy(next_states) next_q_target = ( next_action_prob * (min_q_target - self.alpha * next_action_log_prob)).sum( dim=1, keepdim=True) next_q_value = rewards + (1 - dones) * self.gamma * next_q_target # --------------------------update-q-------------------------------------------------------- loss = F.mse_loss(q_value1, next_q_value) + F.mse_loss( q_value2, next_q_value) self.q_optim.zero_grad() self.encoder_optimizer.zero_grad() loss.backward() self.q_optim.step() self.encoder_optimizer.zero_grad() self.writer.add_scalar('loss/q', loss, self.steps) # --------------------------update-policy-------------------------------------------------------- action_prob, log_action_prob = self.policy(states_detached) with torch.no_grad(): q_pi1, q_pi2 = self.critic(states_detached) min_q_values = torch.min(q_pi1, q_pi2) #policy_loss = (action_prob * ((self.alpha * log_action_prob) - min_q_values).detach()).sum(dim=1).mean() policy_loss = (action_prob * ((self.alpha * log_action_prob) - min_q_values)).sum( dim=1).mean() self.policy_optim.zero_grad() policy_loss.backward() self.policy_optim.step() self.writer.add_scalar('loss/policy', policy_loss, self.steps) # --------------------------update-alpha-------------------------------------------------------- alpha_loss = (action_prob.detach() * (-self.log_alpha * (log_action_prob + self.target_entropy).detach())).sum( dim=1).mean() self.alpha_optim.zero_grad() alpha_loss.backward() self.alpha_optim.step() self.writer.add_scalar('loss/alpha', alpha_loss, self.steps) self.soft_udapte(self.critic, self.target_critic) self.alpha = self.log_alpha.exp() def soft_udapte(self, online, target): for param, target_parm in zip(online.parameters(), target.parameters()): target_parm.data.copy_(self.tau * param.data + (1 - self.tau) * target_parm.data) def eval_policy(self, eval_episodes=4): env = gym.make(self.env_name) env = wrappers.Monitor(env, str(self.vid_path) + "/{}".format(self.steps), video_callable=lambda episode_id: True, force=True) average_reward = 0 scores_window = deque(maxlen=100) for i_epiosde in range(eval_episodes): print("Eval Episode {} of {} ".format(i_epiosde, eval_episodes)) episode_reward = 0 state = env.reset() while True: action = self.act(state, evaluate=True) state, reward, done, _ = env.step(action) episode_reward += reward if done: break scores_window.append(episode_reward) average_reward = np.mean(scores_window) self.writer.add_scalar('Eval_reward', average_reward, self.steps)
class Plan(object): def __init__(self): self.results_dir = os.path.join( 'results', '{}_seed_{}_{}_action_scale_{}_no_explore_{}_pool_len_{}_optimisation_iters_{}_top_planning-horizon' .format(args.env, args.seed, args.algo, args.action_scale, args.pool_len, args.optimisation_iters, args.top_planning_horizon)) args.results_dir = self.results_dir args.MultiGPU = True if torch.cuda.device_count( ) > 1 and args.MultiGPU else False self.__basic_setting() self.__init_sample() # Sampleing The Init Data # Initialise model parameters randomly self.transition_model = TransitionModel( args.belief_size, args.state_size, self.env.action_size, args.hidden_size, args.embedding_size, args.dense_activation_function).to(device=args.device) self.observation_model = ObservationModel( args.symbolic_env, self.env.observation_size, args.belief_size, args.state_size, args.embedding_size, args.cnn_activation_function).to(device=args.device) self.reward_model = RewardModel( args.belief_size, args.state_size, args.hidden_size, args.dense_activation_function).to(device=args.device) self.encoder = Encoder( args.symbolic_env, self.env.observation_size, args.embedding_size, args.cnn_activation_function).to(device=args.device) print("We Have {} GPUS".format(torch.cuda.device_count()) ) if args.MultiGPU else print("We use CPU") self.transition_model = nn.DataParallel( self.transition_model.to(device=args.device) ) if args.MultiGPU else self.transition_model self.observation_model = nn.DataParallel( self.observation_model.to(device=args.device) ) if args.MultiGPU else self.observation_model self.reward_model = nn.DataParallel( self.reward_model.to( device=args.device)) if args.MultiGPU else self.reward_model # encoder = nn.DataParallel(encoder.cuda()) # actor_model = nn.DataParallel(actor_model.cuda()) # value_model = nn.DataParallel(value_model.cuda()) # share the global parameters in multiprocessing self.encoder.share_memory() self.observation_model.share_memory() self.reward_model.share_memory() # Set all_model/global_actor_optimizer/global_value_optimizer self.param_list = list(self.transition_model.parameters()) + list( self.observation_model.parameters()) + list( self.reward_model.parameters()) + list( self.encoder.parameters()) self.model_optimizer = optim.Adam( self.param_list, lr=0 if args.learning_rate_schedule != 0 else args.model_learning_rate, eps=args.adam_epsilon) def update_belief_and_act(self, args, env, belief, posterior_state, action, observation, explore=False): # Infer belief over current state q(s_t|o≤t,a<t) from the history # print("action size: ",action.size()) torch.Size([1, 6]) belief, _, _, _, posterior_state, _, _ = self.upper_transition_model( posterior_state, action.unsqueeze(dim=0), belief, self.encoder(observation).unsqueeze(dim=0), None) if hasattr(env, "envs"): belief, posterior_state = list( map(lambda x: x.view(-1, args.test_episodes, x.shape[2]), [x for x in [belief, posterior_state]])) belief, posterior_state = belief.squeeze( dim=0), posterior_state.squeeze( dim=0) # Remove time dimension from belief/state action = self.algorithms.get_action(belief, posterior_state, explore) if explore: action = torch.clamp( Normal(action, args.action_noise).rsample(), -1, 1 ) # Add gaussian exploration noise on top of the sampled action # action = action + args.action_noise * torch.randn_like(action) # Add exploration noise ε ~ p(ε) to the action next_observation, reward, done = env.step( action.cpu() if isinstance(env, EnvBatcher) else action[0].cpu( )) # Perform environment step (action repeats handled internally) return belief, posterior_state, action, next_observation, reward, done def run(self): if args.algo == "dreamer": print("DREAMER") from algorithms.dreamer import Algorithms self.algorithms = Algorithms(self.env.action_size, self.transition_model, self.encoder, self.reward_model, self.observation_model) elif args.algo == "p2p": print("planing to plan") from algorithms.plan_to_plan import Algorithms self.algorithms = Algorithms(self.env.action_size, self.transition_model, self.encoder, self.reward_model, self.observation_model) elif args.algo == "actor_pool_1": print("async sub actor") from algorithms.actor_pool_1 import Algorithms_actor self.algorithms = Algorithms_actor(self.env.action_size, self.transition_model, self.encoder, self.reward_model, self.observation_model) elif args.algo == "aap": from algorithms.asynchronous_actor_planet import Algorithms self.algorithms = Algorithms(self.env.action_size, self.transition_model, self.encoder, self.reward_model, self.observation_model) else: print("planet") from algorithms.planet import Algorithms # args.MultiGPU = False self.algorithms = Algorithms(self.env.action_size, self.transition_model, self.reward_model) if args.test: self.test_only() self.global_prior = Normal( torch.zeros(args.batch_size, args.state_size, device=args.device), torch.ones(args.batch_size, args.state_size, device=args.device)) # Global prior N(0, I) self.free_nats = torch.full( (1, ), args.free_nats, device=args.device) # Allowed deviation in KL divergence # Training (and testing) # args.episodes = 1 for episode in tqdm(range(self.metrics['episodes'][-1] + 1, args.episodes + 1), total=args.episodes, initial=self.metrics['episodes'][-1] + 1): losses = self.train() # self.algorithms.save_loss_data(self.metrics['episodes']) # Update and plot loss metrics self.save_loss_data(tuple( zip(*losses))) # Update and plot loss metrics self.data_collection(episode=episode) # Data collection # args.test_interval = 1 if episode % args.test_interval == 0: self.test(episode=episode) # Test model self.save_model_data(episode=episode) # save model self.env.close() # Close training environment def train_env_model(self, beliefs, prior_states, prior_means, prior_std_devs, posterior_states, posterior_means, posterior_std_devs, observations, actions, rewards, nonterminals): # Calculate observation likelihood, reward likelihood and KL losses (for t = 0 only for latent overshooting); sum over final dims, average over batch and time (original implementation, though paper seems to miss 1/T scaling?) if args.worldmodel_LogProbLoss: observation_dist = Normal( bottle(self.observation_model, (beliefs, posterior_states)), 1) observation_loss = -observation_dist.log_prob( observations[1:]).sum( dim=2 if args.symbolic_env else (2, 3, 4)).mean(dim=(0, 1)) else: observation_loss = F.mse_loss( bottle(self.observation_model, (beliefs, posterior_states)), observations[1:], reduction='none').sum( dim=2 if args.symbolic_env else (2, 3, 4)).mean(dim=(0, 1)) if args.worldmodel_LogProbLoss: reward_dist = Normal( bottle(self.reward_model, (beliefs, posterior_states)), 1) reward_loss = -reward_dist.log_prob(rewards[:-1]).mean(dim=(0, 1)) else: reward_loss = F.mse_loss(bottle(self.reward_model, (beliefs, posterior_states)), rewards[:-1], reduction='none').mean(dim=(0, 1)) # transition loss div = kl_divergence(Normal(posterior_means, posterior_std_devs), Normal(prior_means, prior_std_devs)).sum(dim=2) kl_loss = torch.max(div, self.free_nats).mean( dim=(0, 1) ) # Note that normalisation by overshooting distance and weighting by overshooting distance cancel out if args.global_kl_beta != 0: kl_loss += args.global_kl_beta * kl_divergence( Normal(posterior_means, posterior_std_devs), self.global_prior).sum(dim=2).mean(dim=(0, 1)) # Calculate latent overshooting objective for t > 0 if args.overshooting_kl_beta != 0: overshooting_vars = [ ] # Collect variables for overshooting to process in batch for t in range(1, args.chunk_size - 1): d = min(t + args.overshooting_distance, args.chunk_size - 1) # Overshooting distance t_, d_ = t - 1, d - 1 # Use t_ and d_ to deal with different time indexing for latent states seq_pad = ( 0, 0, 0, 0, 0, t - d + args.overshooting_distance ) # Calculate sequence padding so overshooting terms can be calculated in one batch # Store (0) actions, (1) nonterminals, (2) rewards, (3) beliefs, (4) prior states, (5) posterior means, (6) posterior standard deviations and (7) sequence masks overshooting_vars.append( (F.pad(actions[t:d], seq_pad), F.pad(nonterminals[t:d], seq_pad), F.pad(rewards[t:d], seq_pad[2:]), beliefs[t_], prior_states[t_], F.pad(posterior_means[t_ + 1:d_ + 1].detach(), seq_pad), F.pad(posterior_std_devs[t_ + 1:d_ + 1].detach(), seq_pad, value=1), F.pad( torch.ones(d - t, args.batch_size, args.state_size, device=args.device), seq_pad)) ) # Posterior standard deviations must be padded with > 0 to prevent infinite KL divergences overshooting_vars = tuple(zip(*overshooting_vars)) # Update belief/state using prior from previous belief/state and previous action (over entire sequence at once) beliefs, prior_states, prior_means, prior_std_devs = self.upper_transition_model( torch.cat(overshooting_vars[4], dim=0), torch.cat(overshooting_vars[0], dim=1), torch.cat(overshooting_vars[3], dim=0), None, torch.cat(overshooting_vars[1], dim=1)) seq_mask = torch.cat(overshooting_vars[7], dim=1) # Calculate overshooting KL loss with sequence mask kl_loss += ( 1 / args.overshooting_distance ) * args.overshooting_kl_beta * torch.max((kl_divergence( Normal(torch.cat(overshooting_vars[5], dim=1), torch.cat(overshooting_vars[6], dim=1)), Normal(prior_means, prior_std_devs) ) * seq_mask).sum(dim=2), self.free_nats).mean(dim=(0, 1)) * ( args.chunk_size - 1 ) # Update KL loss (compensating for extra average over each overshooting/open loop sequence) # Calculate overshooting reward prediction loss with sequence mask if args.overshooting_reward_scale != 0: reward_loss += ( 1 / args.overshooting_distance ) * args.overshooting_reward_scale * F.mse_loss( bottle(self.reward_model, (beliefs, prior_states)) * seq_mask[:, :, 0], torch.cat(overshooting_vars[2], dim=1), reduction='none' ).mean(dim=(0, 1)) * ( args.chunk_size - 1 ) # Update reward loss (compensating for extra average over each overshooting/open loop sequence) # Apply linearly ramping learning rate schedule if args.learning_rate_schedule != 0: for group in self.model_optimizer.param_groups: group['lr'] = min( group['lr'] + args.model_learning_rate / args.model_learning_rate_schedule, args.model_learning_rate) model_loss = observation_loss + reward_loss + kl_loss # Update model parameters self.model_optimizer.zero_grad() model_loss.backward() nn.utils.clip_grad_norm_(self.param_list, args.grad_clip_norm, norm_type=2) self.model_optimizer.step() return observation_loss, reward_loss, kl_loss def train(self): # Model fitting losses = [] print("training loop") # args.collect_interval = 1 for s in tqdm(range(args.collect_interval)): # Draw sequence chunks {(o_t, a_t, r_t+1, terminal_t+1)} ~ D uniformly at random from the dataset (including terminal flags) observations, actions, rewards, nonterminals = self.D.sample( args.batch_size, args.chunk_size) # Transitions start at time t = 0 # Create initial belief and state for time t = 0 init_belief, init_state = torch.zeros( args.batch_size, args.belief_size, device=args.device), torch.zeros(args.batch_size, args.state_size, device=args.device) # Update belief/state using posterior from previous belief/state, previous action and current observation (over entire sequence at once) obs = bottle(self.encoder, (observations[1:], )) beliefs, prior_states, prior_means, prior_std_devs, posterior_states, posterior_means, posterior_std_devs = self.upper_transition_model( prev_state=init_state, actions=actions[:-1], prev_belief=init_belief, obs=obs, nonterminals=nonterminals[:-1]) # Calculate observation likelihood, reward likelihood and KL losses (for t = 0 only for latent overshooting); sum over final dims, average over batch and time (original implementation, though paper seems to miss 1/T scaling?) observation_loss, reward_loss, kl_loss = self.train_env_model( beliefs, prior_states, prior_means, prior_std_devs, posterior_states, posterior_means, posterior_std_devs, observations, actions, rewards, nonterminals) # Dreamer implementation: actor loss calculation and optimization with torch.no_grad(): actor_states = posterior_states.detach().to( device=args.device).share_memory_() actor_beliefs = beliefs.detach().to( device=args.device).share_memory_() # if not os.path.exists(os.path.join(os.getcwd(), 'tensor_data/' + args.results_dir)): os.mkdir(os.path.join(os.getcwd(), 'tensor_data/' + args.results_dir)) torch.save( actor_states, os.path.join(os.getcwd(), args.results_dir + '/actor_states.pt')) torch.save( actor_beliefs, os.path.join(os.getcwd(), args.results_dir + '/actor_beliefs.pt')) # [self.actor_pipes[i][0].send(1) for i, w in enumerate(self.workers_actor)] # Parent_pipe send data using i'th pipes # [self.actor_pipes[i][0].recv() for i, _ in enumerate(self.actor_pool)] # waitting the children finish self.algorithms.train_algorithm(actor_states, actor_beliefs) losses.append( [observation_loss.item(), reward_loss.item(), kl_loss.item()]) # if self.algorithms.train_algorithm(actor_states, actor_beliefs) is not None: # merge_actor_loss, merge_value_loss = self.algorithms.train_algorithm(actor_states, actor_beliefs) # losses.append([observation_loss.item(), reward_loss.item(), kl_loss.item(), merge_actor_loss.item(), merge_value_loss.item()]) # else: # losses.append([observation_loss.item(), reward_loss.item(), kl_loss.item()]) return losses def data_collection(self, episode): print("Data collection") with torch.no_grad(): observation, total_reward = self.env.reset(), 0 belief, posterior_state, action = torch.zeros( 1, args.belief_size, device=args.device), torch.zeros( 1, args.state_size, device=args.device), torch.zeros(1, self.env.action_size, device=args.device) pbar = tqdm(range(args.max_episode_length // args.action_repeat)) for t in pbar: # print("step",t) belief, posterior_state, action, next_observation, reward, done = self.update_belief_and_act( args, self.env, belief, posterior_state, action, observation.to(device=args.device)) self.D.append(observation, action.cpu(), reward, done) total_reward += reward observation = next_observation if args.render: self.env.render() if done: pbar.close() break # Update and plot train reward metrics self.metrics['steps'].append(t + self.metrics['steps'][-1]) self.metrics['episodes'].append(episode) self.metrics['train_rewards'].append(total_reward) Save_Txt(self.metrics['episodes'][-1], self.metrics['train_rewards'][-1], 'train_rewards', args.results_dir) # lineplot(metrics['episodes'][-len(metrics['train_rewards']):], metrics['train_rewards'], 'train_rewards', results_dir) def test(self, episode): print("Test model") # Set models to eval mode self.transition_model.eval() self.observation_model.eval() self.reward_model.eval() self.encoder.eval() self.algorithms.train_to_eval() # self.actor_model_g.eval() # self.value_model_g.eval() # Initialise parallelised test environments test_envs = EnvBatcher( Env, (args.env, args.symbolic_env, args.seed, args.max_episode_length, args.action_repeat, args.bit_depth), {}, args.test_episodes) with torch.no_grad(): observation, total_rewards, video_frames = test_envs.reset( ), np.zeros((args.test_episodes, )), [] belief, posterior_state, action = torch.zeros( args.test_episodes, args.belief_size, device=args.device), torch.zeros( args.test_episodes, args.state_size, device=args.device), torch.zeros(args.test_episodes, self.env.action_size, device=args.device) pbar = tqdm(range(args.max_episode_length // args.action_repeat)) for t in pbar: belief, posterior_state, action, next_observation, reward, done = self.update_belief_and_act( args, test_envs, belief, posterior_state, action, observation.to(device=args.device)) total_rewards += reward.numpy() if not args.symbolic_env: # Collect real vs. predicted frames for video video_frames.append( make_grid(torch.cat([ observation, self.observation_model(belief, posterior_state).cpu() ], dim=3) + 0.5, nrow=5).numpy()) # Decentre observation = next_observation if done.sum().item() == args.test_episodes: pbar.close() break # Update and plot reward metrics (and write video if applicable) and save metrics self.metrics['test_episodes'].append(episode) self.metrics['test_rewards'].append(total_rewards.tolist()) Save_Txt(self.metrics['test_episodes'][-1], self.metrics['test_rewards'][-1], 'test_rewards', args.results_dir) # Save_Txt(np.asarray(metrics['steps'])[np.asarray(metrics['test_episodes']) - 1], metrics['test_rewards'],'test_rewards_steps', results_dir, xaxis='step') # lineplot(metrics['test_episodes'], metrics['test_rewards'], 'test_rewards', results_dir) # lineplot(np.asarray(metrics['steps'])[np.asarray(metrics['test_episodes']) - 1], metrics['test_rewards'], 'test_rewards_steps', results_dir, xaxis='step') if not args.symbolic_env: episode_str = str(episode).zfill(len(str(args.episodes))) write_video(video_frames, 'test_episode_%s' % episode_str, args.results_dir) # Lossy compression save_image( torch.as_tensor(video_frames[-1]), os.path.join(args.results_dir, 'test_episode_%s.png' % episode_str)) torch.save(self.metrics, os.path.join(args.results_dir, 'metrics.pth')) # Set models to train mode self.transition_model.train() self.observation_model.train() self.reward_model.train() self.encoder.train() # self.actor_model_g.train() # self.value_model_g.train() self.algorithms.eval_to_train() # Close test environments test_envs.close() def test_only(self): # Set models to eval mode self.transition_model.eval() self.reward_model.eval() self.encoder.eval() with torch.no_grad(): total_reward = 0 for _ in tqdm(range(args.test_episodes)): observation = self.env.reset() belief, posterior_state, action = torch.zeros( 1, args.belief_size, device=args.device), torch.zeros( 1, args.state_size, device=args.device), torch.zeros(1, self.env.action_size, device=args.device) pbar = tqdm( range(args.max_episode_length // args.action_repeat)) for t in pbar: belief, posterior_state, action, observation, reward, done = self.update_belief_and_act( args, self.env, belief, posterior_state, action, observation.to(evice=args.device)) total_reward += reward if args.render: self.env.render() if done: pbar.close() break print('Average Reward:', total_reward / args.test_episodes) self.env.close() quit() def __basic_setting(self): args.overshooting_distance = min( args.chunk_size, args.overshooting_distance ) # Overshooting distance cannot be greater than chunk size print(' ' * 26 + 'Options') for k, v in vars(args).items(): print(' ' * 26 + k + ': ' + str(v)) print("torch.cuda.device_count() {}".format(torch.cuda.device_count())) os.makedirs(args.results_dir, exist_ok=True) np.random.seed(args.seed) torch.manual_seed(args.seed) # Set Cuda if torch.cuda.is_available() and not args.disable_cuda: print("using CUDA") args.device = torch.device('cuda') torch.cuda.manual_seed(args.seed) else: print("using CPU") args.device = torch.device('cpu') self.summary_name = args.results_dir + "/{}_{}_log" self.writer = SummaryWriter(self.summary_name.format( args.env, args.id)) self.env = Env(args.env, args.symbolic_env, args.seed, args.max_episode_length, args.action_repeat, args.bit_depth) self.metrics = { 'steps': [], 'episodes': [], 'train_rewards': [], 'test_episodes': [], 'test_rewards': [], 'observation_loss': [], 'reward_loss': [], 'kl_loss': [], 'merge_actor_loss': [], 'merge_value_loss': [] } def __init_sample(self): if args.experience_replay is not '' and os.path.exists( args.experience_replay): self.D = torch.load(args.experience_replay) self.metrics['steps'], self.metrics['episodes'] = [ self.D.steps ] * self.D.episodes, list(range(1, self.D.episodes + 1)) elif not args.test: self.D = ExperienceReplay(args.experience_size, args.symbolic_env, self.env.observation_size, self.env.action_size, args.bit_depth, args.device) # Initialise dataset D with S random seed episodes print( "Start Multi Sample Processing -------------------------------" ) start_time = time.time() data_lists = [ Manager().list() for i in range(1, args.seed_episodes + 1) ] # Set Global Lists pipes = [Pipe() for i in range(1, args.seed_episodes + 1) ] # Set Multi Pipe workers_init_sample = [ Worker_init_Sample(child_conn=child, id=i + 1) for i, [parent, child] in enumerate(pipes) ] for i, w in enumerate(workers_init_sample): w.start() # Start Single Process pipes[i][0].send( data_lists[i]) # Parent_pipe send data using i'th pipes [w.join() for w in workers_init_sample] # wait sub_process done for i, [parent, child] in enumerate(pipes): # datas = parent.recv() for data in list(parent.recv()): if isinstance(data, tuple): assert len(data) == 4 self.D.append(data[0], data[1], data[2], data[3]) elif isinstance(data, int): t = data self.metrics['steps'].append(t * args.action_repeat + ( 0 if len(self.metrics['steps']) == 0 else self.metrics['steps'][-1])) self.metrics['episodes'].append(i + 1) else: print( "The Recvive Data Have Some Problems, Need To Fix") end_time = time.time() print("the process times {} s".format(end_time - start_time)) print( "End Multi Sample Processing -------------------------------") def upper_transition_model(self, prev_state, actions, prev_belief, obs, nonterminals): actions = torch.transpose(actions, 0, 1) if args.MultiGPU else actions nonterminals = torch.transpose(nonterminals, 0, 1).to( device=args.device ) if args.MultiGPU and nonterminals is not None else nonterminals obs = torch.transpose(obs, 0, 1).to( device=args.device) if args.MultiGPU and obs is not None else obs temp_val = self.transition_model(prev_state.to(device=args.device), actions.to(device=args.device), prev_belief.to(device=args.device), obs, nonterminals) return list( map( lambda x: torch.cat(x.chunk(torch.cuda.device_count(), 0), 1) if x.shape[1] != prev_state.shape[0] else x, [x for x in temp_val])) def save_loss_data(self, losses): self.metrics['observation_loss'].append(losses[0]) self.metrics['reward_loss'].append(losses[1]) self.metrics['kl_loss'].append(losses[2]) self.metrics['merge_actor_loss'].append( losses[3]) if losses.__len__() > 3 else None self.metrics['merge_value_loss'].append( losses[4]) if losses.__len__() > 3 else None Save_Txt(self.metrics['episodes'][-1], self.metrics['observation_loss'][-1], 'observation_loss', args.results_dir) Save_Txt(self.metrics['episodes'][-1], self.metrics['reward_loss'][-1], 'reward_loss', args.results_dir) Save_Txt(self.metrics['episodes'][-1], self.metrics['kl_loss'][-1], 'kl_loss', args.results_dir) Save_Txt(self.metrics['episodes'][-1], self.metrics['merge_actor_loss'][-1], 'merge_actor_loss', args.results_dir) if losses.__len__() > 3 else None Save_Txt(self.metrics['episodes'][-1], self.metrics['merge_value_loss'][-1], 'merge_value_loss', args.results_dir) if losses.__len__() > 3 else None # lineplot(metrics['episodes'][-len(metrics['observation_loss']):], metrics['observation_loss'], 'observation_loss', results_dir) # lineplot(metrics['episodes'][-len(metrics['reward_loss']):], metrics['reward_loss'], 'reward_loss', results_dir) # lineplot(metrics['episodes'][-len(metrics['kl_loss']):], metrics['kl_loss'], 'kl_loss', results_dir) # lineplot(metrics['episodes'][-len(metrics['actor_loss']):], metrics['actor_loss'], 'actor_loss', results_dir) # lineplot(metrics['episodes'][-len(metrics['value_loss']):], metrics['value_loss'], 'value_loss', results_dir) def save_model_data(self, episode): # writer.add_scalar("train_reward", metrics['train_rewards'][-1], metrics['steps'][-1]) # writer.add_scalar("train/episode_reward", metrics['train_rewards'][-1], metrics['steps'][-1]*args.action_repeat) # writer.add_scalar("observation_loss", metrics['observation_loss'][0][-1], metrics['steps'][-1]) # writer.add_scalar("reward_loss", metrics['reward_loss'][0][-1], metrics['steps'][-1]) # writer.add_scalar("kl_loss", metrics['kl_loss'][0][-1], metrics['steps'][-1]) # writer.add_scalar("actor_loss", metrics['actor_loss'][0][-1], metrics['steps'][-1]) # writer.add_scalar("value_loss", metrics['value_loss'][0][-1], metrics['steps'][-1]) # print("episodes: {}, total_steps: {}, train_reward: {} ".format(metrics['episodes'][-1], metrics['steps'][-1], metrics['train_rewards'][-1])) # Checkpoint models if episode % args.checkpoint_interval == 0: # torch.save({'transition_model': transition_model.state_dict(), # 'observation_model': observation_model.state_dict(), # 'reward_model': reward_model.state_dict(), # 'encoder': encoder.state_dict(), # 'actor_model': actor_model_g.state_dict(), # 'value_model': value_model_g.state_dict(), # 'model_optimizer': model_optimizer.state_dict(), # 'actor_optimizer': actor_optimizer_g.state_dict(), # 'value_optimizer': value_optimizer_g.state_dict() # }, os.path.join(results_dir, 'models_%d.pth' % episode)) if args.checkpoint_experience: torch.save( self.D, os.path.join(args.results_dir, 'experience.pth') ) # Warning: will fail with MemoryError with large memory sizes
class DQNAgent(): """Interacts with and learns from the environment.""" def __init__(self, state_size, action_size, config): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(config["seed"]) self.seed = config["seed"] self.gamma = 0.99 self.batch_size = config["batch_size"] self.lr = config["lr"] self.tau = config["tau"] self.fc1 = config["fc1_units"] self.fc2 = config["fc2_units"] self.device = config["device"] # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, self.fc1, self.fc2, self.seed).to(self.device) self.qnetwork_target = QNetwork(state_size, action_size, self.fc1, self.fc2, self.seed).to(self.device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=self.lr) self.encoder = Encoder(config).to(self.device) self.encoder_optimizer = torch.optim.Adam(self.encoder.parameters(), self.lr) # Replay memory # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0 def step(self, memory, writer): self.t_step += 1 if len(memory) > self.batch_size: if self.t_step % 4 == 0: experiences = memory.sample(self.batch_size) self.learn(experiences, writer) def act(self, state, eps=0.): """Returns actions for given state as per current policy. Params ====== state (array_like): current state eps (float): epsilon, for epsilon-greedy action selection """ state = torch.from_numpy(state).float().unsqueeze(0).to(self.device) state = state.type(torch.float32).div_(255) self.qnetwork_local.eval() self.encoder.eval() with torch.no_grad(): state = self.encoder.create_vector(state) action_values = self.qnetwork_local(state) self.qnetwork_local.train() self.encoder.train() # Epsilon-greedy action selection if random.random() > eps: return np.argmax(action_values.cpu().data.numpy()) else: return random.choice(np.arange(self.action_size)) def learn(self, experiences, writer): """Update value parameters using given batch of experience tuples. Params ====== experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples gamma (float): discount factor """ states, actions, rewards, next_states, dones = experiences states = states.type(torch.float32).div_(255) states = self.encoder.create_vector(states) next_states = next_states.type(torch.float32).div_(255) next_states = self.encoder.create_vector(next_states) actions = actions.type(torch.int64) # Get max predicted Q values (for next states) from target model Q_targets_next = self.qnetwork_target(next_states).detach().max( 1)[0].unsqueeze(1) # Compute Q targets for current states Q_targets = rewards + (self.gamma * Q_targets_next * dones) # Get expected Q values from local model Q_expected = self.qnetwork_local(states).gather(1, actions) # Compute loss loss = F.mse_loss(Q_expected, Q_targets) writer.add_scalar('Q_loss', loss, self.t_step) # Minimize the loss self.optimizer.zero_grad() self.encoder_optimizer.zero_grad() loss.backward() self.optimizer.step() self.encoder_optimizer.step() # ------------------- update target network ------------------- # self.soft_update(self.qnetwork_local, self.qnetwork_target) def soft_update(self, local_model, target_model): """Soft update model parameters. θ_target = τ*θ_local + (1 - τ)*θ_target Params ====== local_model (PyTorch model): weights will be copied from target_model (PyTorch model): weights will be copied to tau (float): interpolation parameter """ for target_param, local_param in zip(target_model.parameters(), local_model.parameters()): target_param.data.copy_(self.tau * local_param.data + (1.0 - self.tau) * target_param.data) def save(self, filename): """ """ mkdir("", filename) torch.save(self.qnetwork_local.state_dict(), filename + "_q_net.pth") torch.save(self.optimizer.state_dict(), filename + "_q_net_optimizer.pth") torch.save(self.encoder.state_dict(), filename + "_encoder.pth") torch.save(self.encoder_optimizer.state_dict(), filename + "_encoder_optimizer.pth") print("Save models to {}".format(filename))
# Update and plot train reward metrics metrics['steps'].append(t + metrics['steps'][-1]) metrics['episodes'].append(episode) metrics['train_rewards'].append(total_reward) lineplot(metrics['episodes'][-len(metrics['train_rewards']):], metrics['train_rewards'], 'train_rewards', results_dir) print("Testing!") # Test model if episode % args.test_interval == 0: # Set models to eval mode transition_model.eval() observation_model.eval() reward_model.eval() encoder.eval() # Initialise parallelised test environments test_envs = EnvBatcher( Env, (args.env, args.symbolic_env, args.seed, args.max_episode_length, args.action_repeat, args.bit_depth), {}, args.test_episodes) with torch.no_grad(): observation, total_rewards, video_frames = test_envs.reset( ), np.zeros((args.test_episodes, )), [] belief, posterior_state, action = torch.zeros( args.test_episodes, args.belief_size, device=args.device), torch.zeros( args.test_episodes, args.state_size, device=args.device), torch.zeros(args.test_episodes, env.action_size,
class Solver: def __init__(self): self.train_lr = 1e-4 self.num_classes = 9 self.clf_target = Classifier().cuda() self.clf2 = Classifier().cuda() self.clf1 = Classifier().cuda() self.encoder = Encoder().cuda() self.pretrain_lr = 1e-4 self.weights_coef = 1e-3 def to_var(self, x): """Converts numpy to variable.""" if torch.cuda.is_available(): x = x.cuda() return Variable(x, requires_grad=False).float() def loss(self, predictions, y_true, weights_coef=None): """ :param predictions: list of prediction tensors """ assert len(predictions[0].shape) == 2 and len( y_true.shape) == 1, (predictions.shape, y_true.shape) losses = [F.cross_entropy(y_hat, y_true) for y_hat in predictions] loss = sum(losses) # """ # We add the term |W1^T W2| to the cost function, where W1, W2 denote fully connected layers’ # weights of F1 and F2 which are first applied to the feature F(xi) # """ if weights_coef: lw = torch.matmul(solver.clf1.fc1.weight, solver.clf2.fc1.weight.T).abs().sum().mean() loss += weights_coef * lw return loss def pretrain(self, source_loader, target_val_loader, pretrain_epochs=1): source_iter = iter(source_loader) source_per_epoch = len(source_iter) print("source_per_epoch:", source_per_epoch) # pretrain log_pre = 250 lr = self.pretrain_lr pretrain_iters = source_per_epoch * pretrain_epochs params = reduce( lambda a, b: a + b, map(lambda i: list(i.parameters()), [self.encoder, self.clf1, self.clf2, self.clf_target])) pretrain_optimizer = optim.Adam(params, lr) accuracies = [] for step in range(pretrain_iters + 1): # ============ Initialization ============# # refresh if (step + 1) % source_per_epoch == 0: source_iter = iter(source_loader) # load the data source, s_labels = next(source_iter) source, s_labels = self.to_var(source), self.to_var( s_labels).long().squeeze() # ============ Training ============ # pretrain_optimizer.zero_grad() # forward features = self.encoder(source) y1_hat = self.clf1(features) y2_hat = self.clf2(features) y_target_hat = self.clf_target(features) # loss loss_source_class = self.loss([y1_hat, y2_hat, y_target_hat], s_labels, weights_coef=self.weights_coef) # one step loss_source_class.backward() pretrain_optimizer.step() pretrain_optimizer.zero_grad() # TODO: make this each step and on log_pre step just average and print previous results # ============ Validation ============ # if (step + 1) % log_pre == 0: with torch.no_grad(): source_val_features = self.encoder(source) c_source1 = self.clf1(source_val_features) c_source2 = self.clf2(source_val_features) c_target = self.clf_target(source_val_features) print("Train data (source) scores:") print("Step %d | Source clf1=%.2f, clf2=%.2f | Source data clf_t=%.2f" \ % (step, accuracy(c_source1, s_labels), accuracy(c_source2, s_labels), accuracy(c_target, s_labels)) ) acc = self.eval(target_val_loader, self.clf_target) print("Val target data acc=%.2f" % acc) print() def pseudo_labeling(self, loader, pool_size=4000, threshold=0.9): """ When C1, C2 denote the class which has the maximum predicted probability for y1, y2, we assign a pseudo-label to xk if the following two conditions are satisfied. First, we require C1 = C2 to give pseudo-labels, which means two different classifiers agree with the prediction. The second requirement is that the maximizing probability of y1 or y2 exceeds the threshold parameter, which we set as 0.9 or 0.95 in the experiment. :return: """ pool = [] # x, y_pseudo for x, _ in loader: batch_size = x.shape[0] x = self.to_var(x) ys1 = F.softmax(self.clf1(self.encoder(x))) ys2 = F.softmax(self.clf2(self.encoder(x))) # _, pseudo_labels = torch.max(pseudo_labels, 1) for i in range(batch_size): y1 = ys1[i] y2 = ys2[i] val1, idx1 = torch.max(y1, 0) val2, idx2 = torch.max(y2, 0) if idx1 == idx2 and max(val1, val2) >= threshold: pool.append((x[i].cpu(), idx1.cpu().item())) if len(pool) >= pool_size: return pool return pool def train(self, source_loader, source_val_loader, target_loader, target_val_loader, epochs): """ :param epochs: target epochs the training will be done """ # pretrain log_pre = 30 lr = self.train_lr params1 = reduce( lambda a, b: a + b, map(lambda i: list(i.parameters()), [self.encoder, self.clf1, self.clf2])) params2 = list(self.encoder.parameters()) + list( self.clf_target.parameters()) optimizer1 = optim.Adam(params1, lr) optimizer2 = optim.Adam(params2, lr) # ad-hoc acs1 = [] acs2 = [] acs3 = [] for epoch in range(epochs): source_iter = iter(source_loader) target_iter = iter(target_loader) source_per_epoch = len(source_iter) target_per_epoch = len(target_iter) if epoch == 0: print("source_per_epoch, target_per_epoch:", source_per_epoch, target_per_epoch) if epoch == 3: for param_group in optimizer1.param_groups: param_group['lr'] = lr * 0.1 for param_group in optimizer2.param_groups: param_group['lr'] = lr * 0.1 if epoch == 6: for param_group in optimizer1.param_groups: param_group['lr'] = lr * 0.01 for param_group in optimizer2.param_groups: param_group['lr'] = lr * 0.01 # ============ Pseudo-labeling ============ # # Fill candidates target_candidates = self.pseudo_labeling(target_loader, pool_size=4000 * epoch) print("Target candidates len:", len(target_candidates)) if len(target_candidates) <= 1: target_candidates = self.pseudo_labeling(target_loader, threshold=0.0) print("Target candidates len:", len(target_candidates)) target_candidates_loader = self.wrap_to_loader( target_candidates, batch_size=target_loader.batch_size) for step, (target, t_labels) in enumerate(target_candidates_loader): if (step + 1) % source_per_epoch == 0: source_iter = iter(source_loader) source, s_labels = next(source_iter) target, t_labels = self.to_var(target), self.to_var( t_labels).long().squeeze() source, s_labels = self.to_var(source), self.to_var( s_labels).long().squeeze() # ============ Train F, F1, F2 ============ # optimizer1.zero_grad() # Source data # forward features = self.encoder(source) y1s_hat = self.clf1(features) y2s_hat = self.clf2(features) # loss loss_source_class = self.loss([y1s_hat, y2s_hat], s_labels, weights_coef=self.weights_coef) # Target data # forward features = self.encoder(target) y1t_hat = self.clf1(features) y2t_hat = self.clf2(features) # loss loss_target_class = self.loss([y1t_hat, y2t_hat], t_labels, weights_coef=self.weights_coef) # one step (loss_source_class + loss_target_class).backward() optimizer1.step() optimizer1.zero_grad() # ============ Train F, Ft ============ # optimizer2.zero_grad() # Target data # forward y_target_hat = self.clf_target(self.encoder(target)) # loss loss_target_class = self.loss([y_target_hat], t_labels) # one step loss_target_class.backward() optimizer2.step() optimizer2.zero_grad() # ============ Validation ============ # acs1.append(accuracy(y1s_hat, s_labels).item()) acs2.append(accuracy(y2s_hat, s_labels).item()) acs3.append(accuracy(y_target_hat, t_labels).item()) if (step + 1) % log_pre == 0: acc = self.eval(target_val_loader, self.clf_target) print("Step %d | Val data target classifier acc=%.2f" % (step, acc)) print( " Train accuracy clf1=%.2f, clf2=%.2f, clf_t=%.2f" % (np.mean(acs1), np.mean(acs2), np.mean(acs3))) acs1 = [] acs2 = [] acs3 = [] # acc1 = self.eval(source_val_loader, self.clf1) # print(" | Val data source classifier1 acc=%.2f" % acc1) # acc2 = self.eval(source_val_loader, self.clf2) # print(" | Val data source classifier2 acc=%.2f" % acc2) print() def save_models(self): torch.save(self.encoder, 'encoder.pth') torch.save(self.clf1, 'clf1.pth') torch.save(self.clf2, 'clf2.pth') torch.save(self.clf_target, 'clf_target.pth') def load_models(self): self.encoder = torch.load('encoder.pth') self.clf1 = torch.load('clf1.pth') self.clf2 = torch.load('clf2.pth') self.clf_target = torch.load('clf_target.pth') def eval(self, loader, classifier): """ Evaluate encoder + passed classifier """ # for x, y_true in loader: # y_hat = classifier(self.encoder) # acc = accuracy(y_hat, y_true) class_correct = [0] * self.num_classes class_total = [0.] * self.num_classes classes = shl_processing.coarse_label_mapping self.encoder.eval() classifier.eval() for x, y_true in loader: # forward pass: compute predicted outputs by passing inputs to the model x, y_true = self.to_var(x), self.to_var(y_true).long().squeeze() y_hat = classifier(self.encoder(x)) _, pred = torch.max(y_hat, 1) correct = np.squeeze(pred.eq(y_true.data.view_as(pred))) # calculate test accuracy for each object class for i in range(len(y_true.data)): label = y_true.data[i] class_correct[label] += correct[i].item() class_total[label] += 1 for i in range(self.num_classes): if class_total[i] > 0: print('\tTest Accuracy of %10s: %2d%% (%2d/%2d)' % (classes[i], 100 * class_correct[i] / class_total[i], np.sum(class_correct[i]), np.sum(class_total[i]))) else: print('\tTest Accuracy of %10s: N/A (no training examples)' % (classes[i])) self.encoder.train() classifier.train() return 100. * np.sum(class_correct) / np.sum(class_total) def wrap_to_loader(self, target_candidates, batch_size): """ :param target_candidates: [(x,y_pseudo)] :return: """ assert len(target_candidates) > 0 tmp = target_candidates # CondomDataset(target_candidates) return torch.utils.data.DataLoader(dataset=tmp, batch_size=batch_size, shuffle=True, num_workers=0) def confusion_matrix(self, loader, classifier): labels = [] preds = [] for x, y_true in loader: labels += list(y_true.cpu().detach().numpy().flatten()) x, y_true = self.to_var(x), self.to_var(y_true).long().squeeze() y_hat = classifier(self.encoder(x)) _, pred = torch.max(y_hat, 1) preds += list(pred.cpu().detach().numpy().flatten()) cm = confusion_matrix(labels, preds) df_cm = pd.DataFrame(cm, index=coarse_label_mapping, columns=coarse_label_mapping, dtype=np.int) plt.figure(figsize=(10, 7)) sn.heatmap(df_cm, annot=True) plt.show()
def recover_models(device, model="supervised", m=256, n=4, chann_type="AWGN", verbose=False): """ Function to try to recover an already saved system to a channel Args: device (string): Current device that we are working in model (string): Model that wish to be recovered. Options: supervised or alternated chann_type (string): Channel type. Currently only AWGN available n (int): Length of the encoded messages m ((int): Total number of messages that can be encoded Returns: encoder/tx (Object): Recovered Tx/Encoder model decoder/rx (Object): Recovered Rx/Decoder model """ try: if model == "supervised": enc_filename = "%s/%s_%d_%d_encoder.pth" % (MODELS_FOLDER, chann_type, m, n) dec_filename = "%s/%s_%d_%d_decoder.pth" % (MODELS_FOLDER, chann_type, m, n) encoder = Encoder(m=m, n=n) encoder.load_state_dict(torch.load(enc_filename)) if verbose: print('Model loaded from %s.' % enc_filename) # Put them in the correct device and eval mode encoder.to(device) encoder.eval() decoder = Decoder(m=m, n=n) decoder.load_state_dict(torch.load(dec_filename)) if verbose: print('Model loaded from %s.' % dec_filename) decoder.to(device) decoder.eval() return encoder, decoder else: tx_filename = "%s/%s_%d_%d_tx.pth" % (MODELS_FOLDER, chann_type, m, n) rx_filename = "%s/%s_%d_%d_rx.pth" % (MODELS_FOLDER, chann_type, m, n) tx = Transmitter(m=m, n=n) tx.load_state_dict(torch.load(tx_filename)) if verbose: print('Model loaded from %s.' % tx_filename) # Put them in the correct device and eval mode tx.to(device) tx.eval() rx = Receiver(m=m, n=n) rx.load_state_dict(torch.load(rx_filename)) if verbose: print('Model loaded from %s.' % rx_filename) rx.to(device) rx.eval() return tx, rx except: raise NameError("Something went wrong loading file for system (%s)" % (chann_type))
def main(): """ Describe main process including train and validation. """ global start_epoch, checkpoint, fine_tune_encoder, best_bleu4, epochs_since_improvement, word_map # Read word map word_map_path = os.path.join(data_folder, 'WORDMAP_' + dataset_name + ".json") with open(word_map_path, 'r') as j: word_map = json.load(j) # Set checkpoint or read from checkpoint if checkpoint is None: # No pretrained model, set model from beginning decoder = Decoder(embed_dim=embed_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout_rate) decoder_param = filter(lambda p: p.requires_grad, decoder.parameters()) for param in decoder_param: tensor0 = param.data dist.all_reduce(tensor0, op=dist.reduce_op.SUM) param.data = tensor0 / np.sqrt(np.float(num_nodes)) decoder_optimizer = optim.Adam(params=decoder_param, lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_param = filter(lambda p: p.requires_grad, encoder.parameters()) if fine_tune_encoder: for param in encoder_param: tensor0 = param.data dist.all_reduce(tensor0, op=dist.reduce_op.SUM) param.data = tensor0 / np.sqrt(np.float(num_nodes)) encoder_optimizer = optim.Adam( params=encoder_param, lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint["epoch"] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] #decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] #encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) decoder = decoder.to(device) encoder = encoder.to(device) criterion = nn.CrossEntropyLoss() # Data loader normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_set = CaptionDataset(data_folder=h5data_folder, data_name=dataset_name, split="TRAIN", transform=transforms.Compose([normalize])) val_set = CaptionDataset(data_folder=h5data_folder, data_name=dataset_name, split="VAL", transform=transforms.Compose([normalize])) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) total_start_time = datetime.datetime.now() print("Start the 1st epoch at: ", total_start_time) # Epoch for epoch in range(start_epoch, num_epochs): # Pre-check by epochs_since_improvement if epochs_since_improvement == 20: # If there are 20 epochs that no improvements are achieved break if epochs_since_improvement % 8 == 0 and epochs_since_improvement > 0: adjust_learning_rate(decoder_optimizer) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer) # For every batch batch_time = AverageMeter() # forward prop. + back prop. time data_time = AverageMeter() # data loading time losses = AverageMeter() # loss (per word decoded) top5accs = AverageMeter() # top5 accuracy decoder.train() encoder.train() start = time.time() start_time = datetime.datetime.now( ) # Initialize start time for this epoch # TRAIN for j, (images, captions, caplens) in enumerate(train_loader): if fine_tune_encoder and (epoch - start_epoch > 0 or j > 10): for group in encoder_optimizer.param_groups: for p in group['params']: state = encoder_optimizer.state[p] if (state['step'] >= 1024): state['step'] = 1000 if (epoch - start_epoch > 0 or j > 10): for group in decoder_optimizer.param_groups: for p in group['params']: state = decoder_optimizer.state[p] if (state['step'] >= 1024): state['step'] = 1000 data_time.update(time.time() - start) images = images.to(device) captions = captions.to(device) caplens = caplens.to(device) # Forward enc_images = encoder(images) predictions, enc_captions, dec_lengths, sort_ind = decoder( enc_images, captions, caplens) # Define target as original captions excluding <start> target = enc_captions[:, 1:] # (batch_size, max_caption_length-1) target, _ = pack_padded_sequence( target, dec_lengths, batch_first=True ) # Delete all paddings and concat all other parts predictions, _ = pack_padded_sequence( predictions, dec_lengths, batch_first=True) # (batch_size, sum(dec_lengths)) loss = criterion(predictions, target) # Backward decoder_optimizer.zero_grad() if encoder_optimizer is not None: encoder_optimizer.zero_grad() loss.backward() ## Clip gradients if grad_clip is not None: clip_gradient(decoder_optimizer, grad_clip) if encoder_optimizer is not None: clip_gradient(encoder_optimizer, grad_clip) ## Update decoder_optimizer.step() if encoder_optimizer is not None: encoder_optimizer.step() # Update metrics (AverageMeter) acc_top5 = compute_accuracy(predictions, target, k=5) top5accs.update(acc_top5, sum(dec_lengths)) losses.update(loss.item(), sum(dec_lengths)) batch_time.update(time.time() - start) # Print current status if (j + 1) % print_freq == 0: print( 'Epoch: [{0}][{1}/{2}]\t' 'Current Batch Time: {batch_time.val:.3f} (Average: {batch_time.avg:.3f})\t' 'Current Data Load Time: {data_time.val:.3f} (Average: {data_time.avg:.3f})\t' 'Current Loss: {loss.val:.4f} (Average: {loss.avg:.4f})\t' 'Current Top-5 Accuracy: {top5.val:.3f} (Average: {top5.avg:.3f})' .format(epoch + 1, j + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top5=top5accs)) now_time = datetime.datetime.now() print("Epoch Training Time: ", now_time - start_time) print("Total Time: ", now_time - total_start_time) start = time.time() # VALIDATION decoder.eval() encoder.eval() batch_time = AverageMeter() # forward prop. + back prop. time losses = AverageMeter() # loss (per word decoded) top5accs = AverageMeter() # top5 accuracy references = list( ) # references (true captions) for calculating BLEU-4 score hypotheses = list() # hypotheses (predictions) start_time = datetime.datetime.now() for j, (images, captions, caplens, all_caps) in enumerate(val_loader): start = time.time() images = images.to(device) captions = captions.to(device) caplens = caplens.to(device) # Forward enc_images = encoder(images) predictions, enc_captions, dec_lengths, sort_ind = decoder( enc_images, captions, caplens) # Define target as original captions excluding <start> predictions_copy = predictions.clone() target = enc_captions[:, 1:] # (batch_size, max_caption_length-1) target, _ = pack_padded_sequence( target, dec_lengths, batch_first=True ) # Delete all paddings and concat all other parts predictions, _ = pack_padded_sequence( predictions, dec_lengths, batch_first=True) # (batch_size, sum(dec_lengths)) loss = criterion(predictions, target) # Update metrics (AverageMeter) acc_top5 = compute_accuracy(predictions, target, k=5) top5accs.update(acc_top5, sum(dec_lengths)) losses.update(loss.item(), sum(dec_lengths)) batch_time.update(time.time() - start) # Print current status if (j + 1) % print_freq == 0: print( 'Epoch: [{0}][{1}/{2}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data Load Time {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Top-5 Accuracy {top5.val:.3f} ({top5.avg:.3f})'.format( epoch + 1, j, len(val_loader), batch_time=batch_time, data_time=data_time, loss=losses, top5=top5accs)) now_time = datetime.datetime.now() print("Epoch Validation Time: ", now_time - start_time) print("Total Time: ", now_time - total_start_time) ## Store references (true captions), and hypothesis (prediction) for each image ## If for n images, we have n hypotheses, and references a, b, c... for each image, we need - ## references = [[ref1a, ref1b, ref1c], [ref2a, ref2b], ...], hypotheses = [hyp1, hyp2, ...] # references all_caps = all_caps[sort_ind] for k in range(all_caps.shape[0]): img_caps = all_caps[k].tolist() img_captions = list( map( lambda c: [ w for w in c if w not in {word_map["<start>"], word_map["<pad>"]} ], img_caps)) references.append(img_captions) # hypotheses _, preds = torch.max(predictions_copy, dim=2) preds = preds.tolist() temp_preds = list() for i, p in enumerate(preds): temp_preds.append(preds[i][:dec_lengths[i]]) # remove pads preds = temp_preds hypotheses.extend(preds) assert len(references) == len(hypotheses) ## Compute BLEU-4 Scores #recent_bleu4 = corpus_bleu(references, hypotheses, emulate_multibleu=True) recent_bleu4 = corpus_bleu(references, hypotheses) print( '\n * LOSS - {loss.avg:.3f}, TOP-5 ACCURACY - {top5.avg:.3f}, BLEU-4 - {bleu}\n' .format(loss=losses, top5=top5accs, bleu=recent_bleu4)) # CHECK IMPROVEMENT is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement)) else: epochs_since_improvement = 0 # SAVE CHECKPOINT save_checkpoint(dataset_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best) print("Epoch {}, cost time: {}\n".format(epoch + 1, now_time - total_start_time))
def make_representations(arguments, device): """ Creates representations for all data. :param arguments: Dictionary containing arguments. :param device: PyTorch device object. """ # Loads training and testing data. train_data = Dataset(arguments, "train") test_data = Dataset(arguments, "test") # Creates the data loaders for the training and testing data. training_data_loader = DataLoader(train_data, batch_size=arguments["batch_size"], shuffle=False, num_workers=arguments["data_workers"], pin_memory=False, drop_last=False) testing_data_loader = DataLoader(test_data, batch_size=arguments["batch_size"], shuffle=False, num_workers=arguments["data_workers"], pin_memory=False, drop_last=False) log(arguments, "Loaded Datasets") # Initialises the encoder. encoder = Encoder(0, arguments["image_size"], arguments["pretrained"] == "imagenet") # Loads weights from pretrained Contrastive Predictive Coding model. if arguments["pretrained"].lower() == "cpc": encoder_path = os.path.join( arguments["model_dir"], f"{arguments['experiment']}_encoder_best.pt") encoder.load_state_dict(torch.load(encoder_path, map_location=device), strict=False) # Sets the model to evaluation mode. encoder.eval() # Moves the model to the selected device. encoder.to(device) # If 16 bit precision is being used change the model and optimiser precision. if arguments["precision"] == 16: encoder = amp.initialize(encoder, opt_level="O2", verbosity=False) # Checks if precision level is supported and if not defaults to 32. elif arguments["precision"] != 32: log( arguments, "Only 16 and 32 bit precision supported. Defaulting to 32 bit precision." ) log(arguments, "Models Initialised") # Creates a folder if one does not exist. os.makedirs(os.path.dirname(arguments["representation_dir"]), exist_ok=True) # Creates the HDF5 files used to store the training and testing data representations. train_representations = HDF5Handler( os.path.join(arguments["representation_dir"], f"{arguments['experiment']}_train.h5"), 'x', (encoder.encoder_size, )) test_representations = HDF5Handler( os.path.join(arguments["representation_dir"], f"{arguments['experiment']}_test.h5"), 'x', (encoder.encoder_size, )) log(arguments, "HDF5 Representation Files Created.") # Starts a timer. start_time = time.time() # Performs a representation generation with no gradients. with torch.no_grad(): # Loops through the training data. num_batches = 0 for images, _ in training_data_loader: # Loads the image batch into memory. images = images.to(device) # Gets the representations from of the image batch from the encoder. representations = encoder.forward_features(images) # Moves the representations to the CPU. representations = representations.cpu().data.numpy() # Adds the batch representations to the HDF5 file. train_representations.append(representations) # Prints information about representation extraction process. num_batches += 1 if num_batches % arguments["log_intervals"] == 0: print( f"Training Batches: {num_batches}/{len(train_data) // arguments['batch_size']}" ) # Loops through the testing data. num_batches = 0 for images, _ in testing_data_loader: # Loads the image batch into memory. images = images.to(device) # Gets the representations from of the image batch from the encoder. representations = encoder.forward_features(images) # Moves the representations to the CPU. representations = representations.cpu().data.numpy() # Adds the batch representations to the HDF5 file. test_representations.append(representations) # Prints information about representation extraction process. num_batches += 1 if num_batches % arguments["log_intervals"] == 0: print( f"Testing Batches: {num_batches}/{len(test_data) // arguments['batch_size']}" ) print( f"Representations from {arguments['experiment']} encoder created in {int(time.time() - start_time)}s" )
class Trainer: def __init__(self, device, dset, x_dim, c_dim, z_dim, n_train, n_test, lr, layer_sizes, **kwargs): ''' Trainer class Args: device (torch.device) : Use GPU or CPU x_dim (int) : Feature dimension c_dim (int) : Attribute dimension z_dim (int) : Latent dimension n_train (int) : Number of training classes n_test (int) : Number of testing classes lr (float) : Learning rate for VAE layer_sizes(dict) : List containing the hidden layer sizes **kwargs : Flags for using various regularizations ''' self.device = device self.dset = dset self.lr = lr self.z_dim = z_dim self.n_train = n_train self.n_test = n_test self.gzsl = kwargs.get('gzsl', False) if self.gzsl: self.n_test = n_train + n_test # flags for various regularizers self.use_da = kwargs.get('use_da', False) self.use_ca = kwargs.get('use_ca', False) self.use_support = kwargs.get('use_support', False) self.x_encoder = Encoder(x_dim, layer_sizes['x_enc'], z_dim).to(self.device) self.x_decoder = Decoder(z_dim, layer_sizes['x_dec'], x_dim).to(self.device) self.c_encoder = Encoder(c_dim, layer_sizes['c_enc'], z_dim).to(self.device) self.c_decoder = Decoder(z_dim, layer_sizes['c_dec'], c_dim).to(self.device) self.support_classifier = Classifier(z_dim, self.n_train).to(self.device) params = list(self.x_encoder.parameters()) + \ list(self.x_decoder.parameters()) + \ list(self.c_encoder.parameters()) + \ list(self.c_decoder.parameters()) if self.use_support: params += list(self.support_classifier.parameters()) self.optimizer = optim.Adam(params, lr=lr) self.final_classifier = Classifier(z_dim, self.n_test).to(self.device) self.final_cls_optim = optim.RMSprop( self.final_classifier.parameters(), lr=2e-4) self.criterion = nn.CrossEntropyLoss() self.vae_save_path = './saved_models' self.disc_save_path = './saved_models/disc_model_%s.pth' % self.dset def fit_VAE(self, x, c, y, ep): ''' Train on 1 minibatch of data Args: x (torch.Tensor) : Features of size (batch_size, 2048) c (torch.Tensor) : Attributes of size (batch_size, attr_dim) y (torch.Tensor) : Target labels of size (batch_size,) ep (int) : Epoch number Returns: Loss for the minibatch - 3-tuple with (vae_loss, distributn loss, cross_recon loss) ''' self.anneal_parameters(ep) x = Variable(x.float()).to(self.device) c = Variable(c.float()).to(self.device) y = Variable(y.long()).to(self.device) # VAE for image embeddings mu_x, logvar_x = self.x_encoder(x) z_x = self.reparameterize(mu_x, logvar_x) x_recon = self.x_decoder(z_x) # VAE for class embeddings mu_c, logvar_c = self.c_encoder(c) z_c = self.reparameterize(mu_c, logvar_c) c_recon = self.c_decoder(z_c) # reconstruction loss L_recon_x = self.compute_recon_loss(x, x_recon) L_recon_c = self.compute_recon_loss(c, c_recon) # KL divergence loss D_kl_x = self.compute_kl_div(mu_x, logvar_x) D_kl_c = self.compute_kl_div(mu_c, logvar_c) # VAE Loss = recon_loss - KL_Divergence_loss L_vae_x = L_recon_x - self.beta * D_kl_x L_vae_c = L_recon_c - self.beta * D_kl_c L_vae = L_vae_x + L_vae_c # calculate cross alignment loss L_ca = torch.zeros(1).to(self.device) if self.use_ca: x_recon_from_c = self.x_decoder(z_c) L_ca_x = self.compute_recon_loss(x, x_recon_from_c) c_recon_from_x = self.c_decoder(z_x) L_ca_c = self.compute_recon_loss(c, c_recon_from_x) L_ca = L_ca_x + L_ca_c # calculate distribution alignment loss L_da = torch.zeros(1).to(self.device) if self.use_da: L_da = 2 * self.compute_da_loss(mu_x, logvar_x, mu_c, logvar_c) # calculate loss from support classifier L_sup = torch.zeros(1).to(self.device) if self.use_support: y_prob = F.softmax(self.support_classifier(z_x), dim=0) log_prob = torch.log(torch.gather(y_prob, 1, y.unsqueeze(1))) L_sup = -1 * torch.mean(log_prob) total_loss = L_vae + self.gamma * L_ca + self.delta * L_da + self.alpha * L_sup self.optimizer.zero_grad() total_loss.backward() self.optimizer.step() return L_vae.item(), L_da.item(), L_ca.item() def reparameterize(self, mu, log_var): ''' Reparameterization trick using unimodal gaussian ''' # eps = Variable(torch.randn(mu.size())).to(self.device) eps = Variable(torch.randn(mu.size()[0], 1).expand(mu.size())).to(self.device) z = mu + torch.exp(log_var / 2.0) * eps return z def anneal_parameters(self, epoch): ''' Change weight factors of various losses based on epoch number ''' # weight of kl divergence loss if epoch <= 90: self.beta = 0.0026 * epoch # weight of Cross Alignment loss if epoch < 20: self.gamma = 0 if epoch >= 20 and epoch <= 75: self.gamma = 0.044 * (epoch - 20) # weight of distribution alignment loss if epoch < 5: self.delta = 0 if epoch >= 5 and epoch <= 22: self.delta = 0.54 * (epoch - 5) # weight of support loss if epoch < 5: self.alpha = 0 else: self.alpha = 0.01 def compute_recon_loss(self, x, x_recon): ''' Compute the reconstruction error. ''' l1_loss = torch.abs(x - x_recon).sum() # l1_loss = torch.abs(x - x_recon).sum(dim=1).mean() return l1_loss def compute_kl_div(self, mu, log_var): ''' Compute KL Divergence between N(mu, var) & N(0, 1). ''' kld = 0.5 * (1 + log_var - mu.pow(2) - log_var.exp()).sum() # kld = 0.5 * (1 + log_var - mu.pow(2) - log_var.exp()).sum(dim=1).mean() return kld def compute_da_loss(self, mu1, log_var1, mu2, log_var2): ''' Computes Distribution Alignment loss between 2 normal distributions. Uses Wasserstein distance as distance measure. ''' l1 = (mu1 - mu2).pow(2).sum(dim=1) std1 = (log_var1 / 2.0).exp() std2 = (log_var2 / 2.0).exp() l2 = (std1 - std2).pow(2).sum(dim=1) l_da = torch.sqrt(l1 + l2).sum() return l_da def fit_final_classifier(self, x, y): ''' Train the final classifier on synthetically generated data ''' x = Variable(x.float()).to(self.device) y = Variable(y.long()).to(self.device) logits = self.final_classifier(x) loss = self.criterion(logits, y) self.final_cls_optim.zero_grad() loss.backward() self.final_cls_optim.step() return loss.item() def fit_MOE(self, x, y): ''' Trains the synthetic dataset on a MoE model ''' def get_vae_savename(self): ''' Returns a string indicative of various flags used during training and dataset used. Works as a unique name for saving models ''' flags = '' if self.use_da: flags += '-da' if self.use_ca: flags += '-ca' if self.use_support: flags += '-support' model_name = 'vae_model__dset-%s__lr-%f__z-%d__%s.pth' % ( self.dset, self.lr, self.z_dim, flags) return model_name def save_VAE(self, ep): state = { 'epoch': ep, 'x_encoder': self.x_encoder.state_dict(), 'x_decoder': self.x_decoder.state_dict(), 'c_encoder': self.c_encoder.state_dict(), 'c_decoder': self.c_decoder.state_dict(), 'optimizer': self.optimizer.state_dict(), } model_name = self.get_vae_savename() torch.save(state, os.path.join(self.vae_save_path, model_name)) def load_models(self, model_path=''): if model_path is '': model_path = os.path.join(self.vae_save_path, self.get_vae_savename()) ep = 0 if os.path.exists(model_path): checkpoint = torch.load(model_path) self.x_encoder.load_state_dict(checkpoint['x_encoder']) self.x_decoder.load_state_dict(checkpoint['x_decoder']) self.c_encoder.load_state_dict(checkpoint['c_encoder']) self.c_decoder.load_state_dict(checkpoint['c_decoder']) self.optimizer.load_state_dict(checkpoint['optimizer']) ep = checkpoint['epoch'] return ep def create_syn_dataset(self, test_labels, attributes, seen_dataset, n_samples=400): ''' Creates a synthetic dataset based on attribute vectors of unseen class Args: test_labels: A dict with key as original serial number in provided dataset and value as the index which is predicted during classification by network attributes: A np array containing class attributes for each class of dataset seen_dataset: A list of 3-tuple (x, _, y) where x belongs to one of the seen classes and y is corresponding label. Used for generating latent representations of seen classes in GZSL n_samples: Number of samples of each unseen class to be generated(Default: 400) Returns: A list of 3-tuple (z, _, y) where z is latent representations and y is corresponding label ''' syn_dataset = [] for test_cls, idx in test_labels.items(): attr = attributes[test_cls - 1] self.c_encoder.eval() c = Variable(torch.FloatTensor(attr).unsqueeze(0)).to(self.device) mu, log_var = self.c_encoder(c) Z = torch.cat( [self.reparameterize(mu, log_var) for _ in range(n_samples)]) syn_dataset.extend([(Z[i], test_cls, idx) for i in range(n_samples)]) if seen_dataset is not None: self.x_encoder.eval() for (x, att_idx, y) in seen_dataset: x = Variable(torch.FloatTensor(x).unsqueeze(0)).to(self.device) mu, log_var = self.x_encoder(x) z = self.reparameterize(mu, log_var).squeeze() syn_dataset.append((z, att_idx, y)) return syn_dataset def compute_accuracy(self, generator): y_real_list, y_pred_list = [], [] for idx, (x, _, y) in enumerate(generator): x = Variable(x.float()).to(self.device) y = Variable(y.long()).to(self.device) self.final_classifier.eval() self.x_encoder.eval() mu, log_var = self.x_encoder(x) logits = self.final_classifier(mu) _, y_pred = logits.max(dim=1) y_real = y.detach().cpu().numpy() y_pred = y_pred.detach().cpu().numpy() y_real_list.extend(y_real) y_pred_list.extend(y_pred) ## We have sequence of real and predicted labels ## find seen and unseen classes accuracy if self.gzsl: y_real_list = np.asarray(y_real_list) y_pred_list = np.asarray(y_pred_list) y_seen_real = np.extract(y_real_list < self.n_train, y_real_list) y_seen_pred = np.extract(y_real_list < self.n_train, y_pred_list) y_unseen_real = np.extract(y_real_list >= self.n_train, y_real_list) y_unseen_pred = np.extract(y_real_list >= self.n_train, y_pred_list) acc_seen = accuracy_score(y_seen_real, y_seen_pred) acc_unseen = accuracy_score(y_unseen_real, y_unseen_pred) return acc_seen, acc_unseen else: return accuracy_score(y_real_list, y_pred_list)
def main(args): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f"Evaluating on {device}") with open(args.vocab_path, 'rb') as f: vocab_object = pickle.load(f) print(f"Loaded the vocabulary object from {args.vocab_path}, total size={len(vocab_object)}") if args.glove_embed_path is not None: with open(args.glove_embed_path, 'rb') as f: glove_embeddings = pickle.load(f) print(f"Loaded the glove embeddings from {args.glove_embed_path}, total size={len(glove_embeddings)}") # We are using 300d glove embeddings args.embed_size = 300 weights_matrix = np.zeros((len(vocab_object), args.embed_size)) for word, index in vocab_object.word2index.items(): if word in glove_embeddings: weights_matrix[index] = glove_embeddings[word] else: weights_matrix[index] = np.random.normal(scale=0.6, size=(args.embed_size, )) weights_matrix = torch.from_numpy(weights_matrix).float().to(device) else: weights_matrix = None img_transforms = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) val_dataset = ImageDataset(args.image_root, img_transforms) val_dataloader = torch.utils.data.DataLoader( dataset=val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) encoder = Encoder(args.resnet_size, (3, 224, 224), args.embed_size) encoder = encoder.eval().to(device) decoder = Decoder(args.rnn_type, weights_matrix, len(vocab_object), args.embed_size, args.hidden_size) decoder = decoder.eval().to(device) model_ckpt = torch.load(args.eval_ckpt_path, map_location=lambda storage, loc: storage) encoder.load_state_dict(model_ckpt['encoder']) decoder.load_state_dict(model_ckpt['decoder']) print(f"Loaded model from {args.eval_ckpt_path}") val_results = [] total_examples = len(val_dataloader) for i, (images, image_ids) in enumerate(val_dataloader): images = images.to(device) with torch.no_grad(): image_embeddings = encoder(images) captions_wid = decoder.sample_batch(image_embeddings, args.caption_maxlen) captions_wid = captions_wid.cpu().numpy() captions = [] for caption_wid in captions_wid: caption_words = [] for word_id in caption_wid: word = vocab_object.index2word[word_id] caption_words.append(word) if word == '<end>': break captions.append(' '.join(caption_words[1:-2])) image_ids = image_ids.tolist() for image_id, caption in zip(image_ids, captions): val_results.append({'image_id': image_id, 'caption': caption}) with open(args.results_json_path,'w') as f: json.dump(val_results, f)
def main(): start_epoch = 0 max_loss = math.inf epochs_since_improvement = 0 dataset = GaitSequenceDataset(root_dir = data_dir, longest_sequence = 85, shortest_sequence = 55) train_sampler, validation_sampler = generate_train_validation_samplers(dataset, validation_split=0.2) print('Building dataloaders..') train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, drop_last=True) validation_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=validation_sampler, drop_last=True) if load_pretrained is True: print('Loading pretrained model..') checkpoint = torch.load(checkpoint_path) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] encoder = checkpoint['encoder'] decoder = checkpoint['decoder'] encoder_optimizer = checkpoint['encoder_optimizer'] decoder_optimizer = checkpoint['decoder_optimizer'] else: print('Creating model..') encoder = Encoder(sequence_length, num_features, embedding_dimension) decoder = Decoder(embedding_dimension, num_classes, hidden_dimension, sequence_length) encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate) criterion = nn.MSELoss().to(device) if mode == 'train': summary = SummaryWriter() #summary = None encoder.to(device) decoder.to(device) for epoch in range(start_epoch, start_epoch+num_epochs): if epochs_since_improvement == 20 : break if epochs_since_improvement > 0 and epochs_since_improvement % 4 == 0: adjust_learning_rate(encoder_optimizer, 0.8) train(encoder, decoder, train_dataloader, encoder_optimizer, decoder_optimizer, criterion, clip_gradient, device, epoch, num_epochs, summary, loss_display_interval) current_loss = validate(encoder, decoder, validation_dataloader, criterion, device, epoch, num_epochs, summary, loss_display_interval) is_best = max_loss > current_loss max_loss = min(max_loss, current_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 save_checkpoint(epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, is_best) else: print('testing...') encoder.to(device) decoder.to(device) encoder.eval() decoder.eval() for batch_idx, data in enumerate(validation_dataloader): sequence = data['sequence'][0].unsqueeze(0).permute(1, 0, 2).to(device) seq_len = data['sequence_length'][0] x ,(hidden_state, cell_state)= encoder(sequence) prediction = decoder(hidden_state) sequence = sequence.squeeze(1).detach().cpu().numpy() prediction = prediction.squeeze(1).detach().cpu().numpy() print(sequence.shape) hip_angles_gt = sequence[:seq_len, [0,3]] knee_angles_gt = sequence[:seq_len, [1,4]] ankle_angles_gt = sequence[:seq_len, [2,5]] hip_angles_pred = prediction[:seq_len, [0,3]] knee_angles_pred = prediction[:seq_len, [1,4]] ankle_angles_pred = prediction[:seq_len, [2,5]] time = np.arange(0, len(hip_angles_gt), 1) fig, axs = plt.subplots(4) # fig.suptitle('Hip angle reconstruction') # axs[0].plot(time, hip_angles_gt[:,0]) # axs[0].set_title('Left hip ground truth') # axs[1].plot(time, hip_angles_pred[:,0]) # axs[1].set_title('Left hip prediction') # axs[2].plot(time, hip_angles_gt[:,1]) # axs[2].set_title('Right hip ground truth') # axs[3].plot(time, hip_angles_pred[:,1]) # axs[3].set_title('Right hip prediction') # fig.suptitle('Knee angle reconstruction') # axs[0].plot(time, knee_angles_gt[:,0]) # axs[0].set_title('Left knee ground truth') # axs[1].plot(time, knee_angles_pred[:,0]) # axs[1].set_title('Left knee prediction') # axs[2].plot(time, knee_angles_gt[:,1]) # axs[2].set_title('Right knee ground truth') # axs[3].plot(time, knee_angles_pred[:,1]) # axs[3].set_title('Right knee prediction') fig.suptitle('Ankle angle reconstruction') axs[0].plot(time, ankle_angles_gt[:,0]) axs[0].set_title('Left ankle ground truth') axs[1].plot(time, ankle_angles_pred[:,0]) axs[1].set_title('Left ankle prediction') axs[2].plot(time, ankle_angles_gt[:,1]) axs[2].set_title('Right ankle ground truth') axs[3].plot(time, ankle_angles_pred[:,1]) axs[3].set_title('Right ankle prediction') plt.show() break
class AAETrainer(AbstractTrainer): def __init__(self, opt): super().__init__(opt) print('[info] Dataset:', self.opt.dataset) print('[info] Alhpa = ', self.opt.alpha) print('[info] Latent dimension = ', self.opt.latent_dim) self.opt = opt self.start_visdom() def start_visdom(self): self.vis = utils.Visualizer(env='Adversarial AutoEncoder Training', port=8888) def build_network(self): print('[info] Build the network architecture') self.encoder = Encoder(z_dim=self.opt.latent_dim) if self.opt.dataset == 'SMPL': num_verts = 6890 elif self.opt.dataset == 'all_animals': num_verts = 3889 self.decoder = Decoder(num_verts=num_verts, z_dim=self.opt.latent_dim) self.discriminator = Discriminator(input_dim=self.opt.latent_dim) self.encoder.cuda() self.decoder.cuda() self.discriminator.cuda() def build_optimizer(self): print('[info] Build the optimizer') self.optim_dis = optim.SGD(self.discriminator.parameters(), lr=self.opt.learning_rate) self.optim_AE = optim.Adam(itertools.chain(self.encoder.parameters(), self.decoder.parameters()), lr=self.opt.learning_rate) def build_dataset_train(self): train_data = ACAPData(mode='train', name=self.opt.dataset) self.num_train_data = len(train_data) print('[info] Number of training samples = ', self.num_train_data) self.train_loader = torch.utils.data.DataLoader( train_data, batch_size=self.opt.batch_size, shuffle=True) def build_dataset_valid(self): valid_data = ACAPData(mode='valid', name=self.opt.dataset) self.num_valid_data = len(valid_data) print('[info] Number of validation samples = ', self.num_valid_data) self.valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=128, shuffle=True) def build_losses(self): print('[info] Build the loss functions') self.mseLoss = torch.nn.MSELoss() self.ganLoss = torch.nn.BCELoss() def print_iteration_stats(self): """ print stats at each iteration """ print( '\r[Epoch %d] [Iteration %d/%d] enc = %f dis = %f rec = %f' % (self.epoch, self.iteration, int(self.num_train_data / self.opt.batch_size), self.enc_loss.item(), self.dis_loss.item(), self.rec_loss.item()), end='') def train_iteration(self): self.encoder.train() self.decoder.train() self.discriminator.train() x = self.data.cuda() z = self.encoder(x) ''' Discriminator ''' # sample from N(0, I) z_real = Variable(torch.randn(z.size(0), z.size(1))).cuda() y_real = Variable(torch.ones(z.size(0))).cuda() dis_real_loss = self.ganLoss( self.discriminator(z_real).view(-1), y_real) y_fake = Variable(torch.zeros(z.size(0))).cuda() dis_fake_loss = self.ganLoss(self.discriminator(z).view(-1), y_fake) self.optim_dis.zero_grad() self.dis_loss = 0.5 * (dis_fake_loss + dis_real_loss) self.dis_loss.backward(retain_graph=True) self.optim_dis.step() self.dis_losses.append(self.dis_loss.item()) ''' Autoencoder ''' # Encoder hopes to generate latent vectors that are closed to prior. y_real = Variable(torch.ones(z.size(0))).cuda() self.enc_loss = self.ganLoss(self.discriminator(z).view(-1), y_real) # Decoder hopes to make the reconstruction as similar to input as possible. rec = self.decoder(z) self.rec_loss = self.mseLoss(rec, x) # There is a trade-off here: # Latent regularization V.S. Reconstruction quality self.EG_loss = self.opt.alpha * self.enc_loss + ( 1 - self.opt.alpha) * self.rec_loss self.optim_AE.zero_grad() self.EG_loss.backward() self.optim_AE.step() self.enc_losses.append(self.enc_loss.item()) self.rec_losses.append(self.rec_loss.item()) self.print_iteration_stats() self.increment_iteration() def train_epoch(self): self.reset_iteration() self.dis_losses = [] self.enc_losses = [] self.rec_losses = [] for step, data in enumerate(self.train_loader): self.data = data self.train_iteration() self.dis_losses = torch.Tensor(self.dis_losses) self.dis_losses = torch.mean(self.dis_losses) self.enc_losses = torch.Tensor(self.enc_losses) self.enc_losses = torch.mean(self.enc_losses) self.rec_losses = torch.Tensor(self.rec_losses) self.rec_losses = torch.mean(self.rec_losses) self.vis.draw_line(win='Encoder Loss', x=self.epoch, y=self.enc_losses) self.vis.draw_line(win='Discriminator Loss', x=self.epoch, y=self.dis_losses) self.vis.draw_line(win='Reconstruction Loss', x=self.epoch, y=self.rec_losses) def valid_iteration(self): self.encoder.eval() self.decoder.eval() self.discriminator.eval() x = self.data.cuda() z = self.encoder(x) recon = self.decoder(z) # loss rec_loss = self.mseLoss(recon, x) self.rec_loss.append(rec_loss.item()) self.increment_iteration() def valid_epoch(self): self.reset_iteration() self.rec_loss = [] for step, data in enumerate(self.valid_loader): self.data = data self.valid_iteration() self.rec_loss = torch.Tensor(self.rec_loss) self.rec_loss = torch.mean(self.rec_loss) self.vis.draw_line(win='Valid reconstruction loss', x=self.epoch, y=self.rec_loss) def save_network(self): print("\n[info] saving net...") torch.save(self.encoder.state_dict(), f"{self.opt.save_path}/Encoder.pth") torch.save(self.decoder.state_dict(), f"{self.opt.save_path}/Decoder.pth") torch.save(self.discriminator.state_dict(), f"{self.opt.save_path}/Discriminator.pth")
def main(): start_epoch = 0 max_loss = math.inf epochs_since_improvement = 0 # Creating custom dataset dataset = GaitSequenceDataset(root_dir=data_dir, longest_sequence=85, shortest_sequence=55) # Saplers for training and validation dataloaders train_sampler, validation_sampler = generate_train_validation_samplers( dataset, validation_split=0.2) print('Building dataloaders..') train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, drop_last=True) validation_dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, sampler=validation_sampler, drop_last=True) # Loading a pretrained model if load_pretrained is True: print('Loading pretrained model..') checkpoint = torch.load(best_checkpoint_path) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] encoder = checkpoint['encoder'] decoder = checkpoint['decoder'] encoder_optimizer = checkpoint['encoder_optimizer'] decoder_optimizer = checkpoint['decoder_optimizer'] else: print('Creating model..') encoder = Encoder(sequence_length, num_features, embedding_dimension) decoder = Decoder(embedding_dimension, num_features, hidden_dimension, sequence_length) encoder_optimizer = torch.optim.RMSprop(encoder.parameters(), lr=learning_rate) decoder_optimizer = torch.optim.RMSprop(decoder.parameters(), lr=learning_rate) # Mean Squared Loss criterion = nn.MSELoss().to(device) if mode == 'train': # Using summary writer for logging summary = SummaryWriter() encoder.to(device) decoder.to(device) for epoch in range(start_epoch, start_epoch + num_epochs): # Early stopping if the model does not learn for consecutive 10 epochs if epochs_since_improvement == 10: break # Lower the learning rate by 0.2 after every 4th epoch with no learning if epochs_since_improvement > 0 and epochs_since_improvement % 4 == 0: adjust_learning_rate(encoder_optimizer, 0.8) # Train train(encoder, decoder, train_dataloader, encoder_optimizer, decoder_optimizer, criterion, clip_gradient, device, epoch, num_epochs, summary, loss_display_interval) # Validate current_loss = validate(encoder, decoder, validation_dataloader, criterion, device, epoch, num_epochs, summary, loss_display_interval) is_best = max_loss > current_loss max_loss = min(max_loss, current_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 save_checkpoint(epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, is_best, current_loss, base_name) else: # This part is used for the prurpose of visualizations. print('testing...') encoder.to(device) decoder.to(device) encoder.eval() decoder.eval() for batch_idx, data in enumerate(validation_dataloader): sequence = data['sequence'][0].unsqueeze(0).permute(1, 0, 2).to(device) seq_len = data['sequence_length'][0] x, (hidden_state, cell_state) = encoder(sequence) prediction = decoder(hidden_state) sequence = sequence.squeeze(1).detach().cpu().numpy() prediction = prediction.squeeze(1).detach().cpu().numpy() print(sequence.shape) hip_angles_gt = sequence[:seq_len, [0, 3]] knee_angles_gt = sequence[:seq_len, [1, 4]] ankle_angles_gt = sequence[:seq_len, [2, 5]] hip_angles_pred = prediction[:seq_len, [0, 3]] knee_angles_pred = prediction[:seq_len, [1, 4]] ankle_angles_pred = prediction[:seq_len, [2, 5]] time = np.arange(0, len(hip_angles_gt), 1) # fig, axs = plt.subplots(2) # fig.suptitle('Hip angle reconstruction') # axs[0].plot(time, hip_angles_gt[:,0]) # axs[0].set_title('Left hip ground truth') # axs[1].plot(time, hip_angles_pred[:,0]) # axs[1].set_title('Left hip prediction') plt.plot(time, ankle_angles_gt[:, 1], label='Ground truth') plt.plot(time, ankle_angles_pred[:, 1], label='Prediction') plt.title('Right-ankle angle reconstruction') plt.legend() # axs[0].plot(time, hip_angles_gt[:,1]) # axs[0].set_title('Right hip ground truth') # axs[1].plot(time, hip_angles_pred[:,1]) # axs[1].set_title('Right hip prediction') # fig.suptitle('Knee angle reconstruction') # axs[0].plot(time, knee_angles_gt[:,0]) # axs[0].set_title('Left knee ground truth') # axs[1].plot(time, knee_angles_pred[:,0]) # axs[1].set_title('Left knee prediction') # axs[0].plot(time, knee_angles_gt[:,1]) # axs[0].set_title('Right knee ground truth') # axs[1].plot(time, knee_angles_pred[:,1]) # axs[1].set_title('Right knee prediction') # fig.suptitle('Ankle angle reconstruction') # axs[0].plot(time, ankle_angles_gt[:,0]) # axs[0].set_title('Left ankle ground truth') # axs[1].plot(time, ankle_angles_pred[:,0]) # axs[1].set_title('Left ankle prediction') # axs[0].plot(time, ankle_angles_gt[:,1]) # axs[0].set_title('Right ankle ground truth') # axs[1].plot(time, ankle_angles_pred[:,1]) # axs[1].set_title('Right ankle prediction') plt.show() break