def run(): options = parse_options() print(options) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') os.makedirs(options.data_dir, exist_ok=True) os.makedirs(options.output_dir, exist_ok=True) os.makedirs(options.model_dir, exist_ok=True) with open(os.path.join(options.output_dir, 'options.json'), 'w') as f: json.dump(vars(options), f, indent=4) if options.restore: generator = torch.load(os.path.join(options.model_dir, 'generator.pt')) critic = torch.load(os.path.join(options.model_dir, 'critic.pt')) else: generator = Generator(options.image_size, options.state_size) critic = Critic(options.image_size) generator.apply(init_weights) critic.apply(init_weights) generator = generator.to(device) critic = critic.to(device) transform = transforms.Compose([ transforms.Resize((options.image_size, options.image_size)), transforms.CenterCrop(options.image_size), #redundant? transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) if options.dataset == 'lsun': training_class = options.image_class + '_train' dataset = datasets.LSUN(options.data_dir, classes=[training_class], transform=transform) else: dataset = datasets.ImageFolder(root=options.data_dir, transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=options.batch_size, num_workers=4, shuffle=True, drop_last=True, pin_memory=True) train(generator, critic, dataloader, device, options)
class AgentDDPG: """Deep Deterministic Policy Gradient implementation for continuous action space reinforcement learning tasks""" def __init__(self, state_size, hidden_size, action_size, actor_learning_rate=1e-4, critic_learning_rate=1e-3, gamma=0.99, tau=1e-2, use_cuda=False, actor_path=None, critic_path=None): # Params self.state_size, self.hidden_size, self.action_size = state_size, hidden_size, action_size self.gamma, self.tau = gamma, tau self.use_cuda = use_cuda # Networks self.actor = Actor(state_size, hidden_size, action_size) self.actor_target = Actor(state_size, hidden_size, action_size) self.critic = Critic(state_size + action_size, hidden_size, action_size) self.critic_target = Critic(state_size + action_size, hidden_size, action_size) # Load model state_dicts from saved file if actor_path and path.exists(actor_path): self.actor.load_state_dict(torch.load(actor_path)) if critic_path and path.exists(critic_path): self.critic.load_state_dict(torch.load(critic_path)) # Hard copy params from original networks to target networks copy_params(self.actor, self.actor_target) copy_params(self.critic, self.critic_target) if self.use_cuda: self.actor.cuda() self.actor_target.cuda() self.critic.cuda() self.critic_target.cuda() # Create replay buffer for storing experience self.replay_buffer = ReplayBuffer(cache_size=int(1e6)) # Training self.critic_criterion = nn.MSELoss() self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=actor_learning_rate) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_learning_rate) def save_to_file(self, actor_file, critic_file): # Save the state_dict's of the Actor and Critic networks torch.save(self.actor.state_dict(), actor_file) torch.save(self.critic.state_dict(), critic_file) def get_action(self, state): """Select action with respect to state according to current policy and exploration noise""" state = Variable(torch.from_numpy(state).float()) if self.use_cuda: state = state.cuda() a = self.actor.forward(state) if self.use_cuda: return a.detach().cpu().numpy() return a.detach().numpy() def save_experience(self, state_t, action_t, reward_t, state_t1): self.replay_buffer.add_sample(state_t, action_t, reward_t, state_t1) def update(self, batch_size): states, actions, rewards, next_states = self.replay_buffer.get_samples( batch_size) states = torch.FloatTensor(states) actions = torch.FloatTensor(actions) rewards = torch.FloatTensor(rewards) next_states = torch.FloatTensor(next_states) if self.use_cuda: states = states.cuda() next_states = next_states.cuda() actions = actions.cuda() rewards = rewards.cuda() # Critic loss Qvals = self.critic.forward(states, actions) next_actions = self.actor_target.forward(next_states) next_Q = self.critic_target.forward(next_states, next_actions.detach()) Qprime = rewards + self.gamma * next_Q critic_loss = self.critic_criterion(Qvals, Qprime) # Update critic self.critic_optimizer.zero_grad() critic_loss.backward() self.critic_optimizer.step() # Actor loss policy_loss = -self.critic.forward(states, self.actor.forward(states)).mean() # Update actor self.actor_optimizer.zero_grad() policy_loss.backward() self.actor_optimizer.step() # update target networks soft_copy_params(self.actor, self.actor_target, self.tau) soft_copy_params(self.critic, self.critic_target, self.tau) def add_noise_to_weights(self, amount=0.1): self.actor.apply( lambda x: _add_noise_to_weights(x, amount, self.use_cuda)) self.critic.apply( lambda x: _add_noise_to_weights(x, amount, self.use_cuda)) self.actor_target.apply( lambda x: _add_noise_to_weights(x, amount, self.use_cuda)) self.critic_target.apply( lambda x: _add_noise_to_weights(x, amount, self.use_cuda))
transform = transforms.Compose([ transforms.Resize(64), transforms.ToTensor(), transforms.CenterCrop(64), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = datasets.ImageFolder(root=data_path, transform=transform) loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) critic = Critic(n_channels, width_shape).to(device) generator = Generator(n_dimension, width_shape, n_channels).to(device) generator = generator.apply(weights_init) critic = critic.apply(weights_init) opt_critic = optim.Adam(critic.parameters(), lr=learning_rate, betas=(0.0, 0.9)) opt_gen = optim.Adam(generator.parameters(), lr=learning_rate, betas=(0.0, 0.9)) fixed_sample = torch.randn(batch_size, n_dimension, 1, 1).to(device) if Load == "True": print("Load Weights...") critic.load_state_dict(torch.load("critic_weights.pt")) generator.load_state_dict(torch.load("gen_weights.pt"))
'''Initialize weights: Here, we want to initialize the weights to the normal distribution with mean 0 and standard deviation 0.02 ''' def initialize_weights(m): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): nn.init.normal_(m.weight, 0.0, 0.2) if isinstance(m, nn.BatchNorm2d): nn.init.normal_(m.weight, 0.0, 0.2) nn.init.normal_(m.bias, 0) gen = gen.apply(initialize_weights) crit = crit.apply(initialize_weights) ######################### Train WGAN-GP ############################### """ Finally, we can train the WGAN-GP model! For each epoch, we will process the entire dataset in batches. For every batch, we will update the discriminator and generator. """ ####### Note ####### """ WGAN-GP isn't necessarily meant to improve overall performance of a GAN, but just **increases stability** and **avoids mode collapse**. In general, a WGAN will be able to train in a much more stable way than the vanilla DCGAN, though it will generally run a bit slower. Also, we should train WGAN model for more epochs without it collapsing. """