def __init__(self, args, state_dim, action_dim, action_lim, ram): """ :param state_dim: Dimensions of state (int) :param action_dim: Dimension of action (int) :param action_lim: Used to limit action in [-action_lim,action_lim] :param ram: replay memory buffer object :return: """ self.state_dim = state_dim self.action_dim = action_dim self.action_lim = action_lim self.ram = ram self.iter = 0 self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) self.args = args self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), self.args.learning_rate) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), self.args.learning_rate) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def __init__(self, hp): """Initialize an Agent object. Params ====== hp: hyper parameters """ self.hp = hp # Actor Network (w/ Target Network) self.actor_local = model.Actor(self.hp.state_size, self.hp.action_size, self.hp.random_seed).to(device) self.actor_target = model.Actor(self.hp.state_size, self.hp.action_size, self.hp.random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=self.hp.lr_actor) # Critic Network (w/ Target Network) self.critic_local = model.Critic(self.hp.state_size, self.hp.action_size, self.hp.random_seed).to(device) self.critic_target = model.Critic(self.hp.state_size, self.hp.action_size, self.hp.random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=self.hp.lr_critic, weight_decay=self.hp.weight_decay) self.soft_update(self.critic_local, self.critic_target, 1) self.soft_update(self.actor_local, self.actor_target, 1) # Noise process self.noise = ounoise.OUNoise(self.hp.action_size, self.hp.random_seed)
def __init__(self, state_dim, action_dim, action_lim, ram, device='cpu'): """ :param state_dim: Dimensions of state (int) :param action_dim: Dimension of action (int) :param action_lim: Used to limit action in [-action_lim,action_lim] :param ram: replay memory buffer object :return: """ self.state_dim = state_dim self.action_dim = action_dim self.action_lim = action_lim self.ram = ram self.iter = 0 self.device = device # self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim).to(device) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim).to(device) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE) self.critic = model.Critic(self.state_dim, self.action_dim).to(device) self.target_critic = model.Critic(self.state_dim, self.action_dim).to(device) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def __init__(self, config, state_size, action_size, num_agents, seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.config = config self.state_size = state_size self.action_size = action_size self.num_agents = num_agents self.seed = random.seed(seed) # Initialize the Actor and Critic Networks self.actor = model.Actor(state_size, action_size, seed).to(self.config.device) self.actor_target = model.Actor(state_size, action_size, seed).to(self.config.device) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), self.config.LR_actor) self.critic = model.Critic(state_size, action_size, seed).to(self.config.device) self.critic_target = model.Critic(state_size, action_size, seed).to(self.config.device) self.critic_optimizer = torch.optim.Adam( self.critic.parameters(), self.config.LR_critic, weight_decay=self.config.weight_decay) # Initialize the random-noise-process for action-noise self.is_training = True self.randomer = OUNoise((self.num_agents, self.action_size), seed) # Hard update the target networks to have the same parameters as the local networks for target_param, param in zip(self.actor_target.parameters(), self.actor.parameters()): target_param.data.copy_(param.data) for target_param, param in zip(self.critic_target.parameters(), self.critic.parameters()): target_param.data.copy_(param.data) # Initialize replay-buffer self.memory = ReplayBuffer(self.config.BUFFER_SIZE, self.config.BATCH_SIZE, seed, self.config.device)
def __init__(self, state_size, action_size, random_seed, num_envs=1, checkpt_folder="checkpt"): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.state_size = state_size self.num_envs = num_envs self.action_size = action_size self.seed = random.seed(random_seed) self.CHECKPOINT_FOLDER = checkpt_folder # Actor Network (w/ Target Network) self.actor_local = model.Actor(state_size, action_size, random_seed).to(DEVICE) self.actor_target = model.Actor(state_size, action_size, random_seed).to(DEVICE) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network (w/ Target Network) self.critic_local = model.Critic(state_size, action_size, random_seed).to(DEVICE) self.critic_target = model.Critic(state_size, action_size, random_seed).to(DEVICE) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) '''if os.path.isfile(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth') and os.path.isfile(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth'): self.actor_local.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth')) self.actor_target.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth')) self.critic_local.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth')) self.critic_target.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth'))''' # Noise process self.noise = OUNoise((num_envs, action_size), random_seed) # Replay memory self.memory = ReplayBuffer(DEVICE, action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
def __init__(self, env, results_path, tok, episode_len=20): super(Seq2SeqAgent, self).__init__(env, results_path) self.tok = tok self.episode_len = episode_len self.feature_size = self.env.feature_size # Models self.glove_dim = 300 with open('img_features/objects/object_vocab.txt', 'r') as f_ov: self.obj_vocab = [k.strip() for k in f_ov.readlines()] glove_matrix = get_glove_matrix(self.obj_vocab, self.glove_dim) self.objencoder = ObjEncoder(glove_matrix.size(0), glove_matrix.size(1), glove_matrix).cuda() enc_hidden_size = args.rnn_dim//2 if args.bidir else args.rnn_dim self.encoder = model.EncoderLSTM(tok.vocab_size(), args.wemb, enc_hidden_size, padding_idx, args.dropout, bidirectional=args.bidir).cuda() self.decoder = model.AttnDecoderLSTM_Graph(args.aemb, args.rnn_dim, args.dropout, feature_size=self.feature_size + args.angle_feat_size).cuda() self.critic = model.Critic().cuda() self.models = (self.encoder, self.decoder, self.critic) # Optimizers self.encoder_optimizer = args.optimizer(self.encoder.parameters(), lr=args.lr) self.decoder_optimizer = args.optimizer(self.decoder.parameters(), lr=args.lr) self.critic_optimizer = args.optimizer(self.critic.parameters(), lr=args.lr) self.optimizers = (self.encoder_optimizer, self.decoder_optimizer, self.critic_optimizer) # Evaluations self.losses = [] self.criterion = nn.CrossEntropyLoss(ignore_index=args.ignoreid, size_average=False) # Logs sys.stdout.flush() self.logs = defaultdict(list)
def __init__(self, env, results_path, tok, episode_len=20): super(Seq2SeqAgent, self).__init__(env, results_path) self.tok = tok self.episode_len = episode_len self.feature_size = self.env.feature_size # Models enc_hidden_size = args.rnn_dim//2 if args.bidir else args.rnn_dim self.encoder = model.EncoderLSTM(tok.vocab_size(), args.wemb, enc_hidden_size, padding_idx, args.dropout, bidirectional=args.bidir).cuda() self.decoder = model.AttnDecoderLSTM(args.aemb, args.rnn_dim, args.dropout, feature_size=self.feature_size + args.angle_feat_size).cuda() self.critic = model.Critic().cuda() self.models = (self.encoder, self.decoder, self.critic) # Optimizers self.encoder_optimizer = args.optimizer(self.encoder.parameters(), lr=args.lr) self.decoder_optimizer = args.optimizer(self.decoder.parameters(), lr=args.lr) self.critic_optimizer = args.optimizer(self.critic.parameters(), lr=args.lr) self.optimizers = (self.encoder_optimizer, self.decoder_optimizer, self.critic_optimizer) # Evaluations self.losses = [] self.criterion = nn.CrossEntropyLoss(ignore_index=args.ignoreid, size_average=False) # Logs sys.stdout.flush() self.logs = defaultdict(list)
def __init__(self, state_dim, action_dim, action_lim, ram): """Special method for object initialisation. :param state_dim: Dimensions of state. :type state_dim: int. :param action_dim: Dimension of action. :type action_dim: int. :param action_lim: Used to limit action in [-action_lim, action_lim]. :type action_lim: float. :param ram: replay memory buffer object. :type ram: buffer. """ # Set the parameters. self.state_dim = state_dim self.action_dim = action_dim self.action_lim = action_lim self.ram = ram self.iter = 0 # Set the noise function. self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) # Set the actor. self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE) # Set the critic. self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE) # Update the actor and critic networks self.hard_update(self.target_actor, self.actor) self.hard_update(self.target_critic, self.critic) return
def c_graph(sess, phc): Critic = model.Critic() Y_value = Critic.build(phc['states_c'], phc['is_training_c']) loss_op = tf.reduce_mean(tf.square(Y_value - phc['values_c'])) reg_loss = tf.reduce_sum(Critic.reg_loss) loss_op += reg_loss # update_op = tf.train.MomentumOptimizer(LR, MOMENTUM).minimize(loss_op, var_list=Critic.vars) update_op = tf.train.AdamOptimizer(1e-3).minimize(loss_op, var_list=Critic.vars) return loss_op, Y_value, update_op, Critic.vars
def __init__(self, state_dim, action_dim, ram): """ Initialize actor and critic networks """ self.state_dim = state_dim self.action_dim = action_dim self.ram = ram self.iter = 0 self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim) self.actor = model.Actor(self.state_dim, self.action_dim) self.target_actor = model.Actor(self.state_dim, self.action_dim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE) # copy parameters to target networks utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def __init__(self, env, state_vector_size, action_num, action_limit, ram): """ :param env: Gym environment :param state_dim: Dimensions of state (int) :param action_dim: Dimension of action (int) :param action_lim: Used to limit action in [-action_lim,action_lim] :param ram: replay memory buffer object :return: """ self.env = env self.state_dim = state_vector_size self.action_dim = action_num self.action_lim = action_limit self.ram = ram self.iter = 0 self.noise = OrnsteinUhlenbeckActionNoise(self.action_dim) self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic) self.state_vector_size = state_vector_size self.action_num = action_num self.action_limit = action_limit self.controller = DQNAgent(env, state_vector_size, action_num, action_limit)
def __init__(self, state_dim, action_dim, ram, LR_actor, LR_critic, gamma, tau, batchsize, expl_rate, version): """ :param state_dim: Dimensions of state (int) :param action_dim: Dimension of action (int) :param action_lim: Used to limit action in [-action_lim,action_lim] :param ram: replay memory buffer object :return: """ self.state_dim = state_dim self.action_dim = action_dim self.LR_actor = LR_actor self.LR_critic = LR_critic self.gamma = gamma self.tau = tau self.ram = ram self.batchsize = batchsize self.iter = 0 self.noise = utils.OrnsteinUhlenbeckActionNoise( self.action_dim, 0, 0.15, expl_rate) self.action_lim = 1.0 self.actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.target_actor = model.Actor(self.state_dim, self.action_dim, self.action_lim) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), self.LR_actor) self.critic = model.Critic(self.state_dim, self.action_dim) self.target_critic = model.Critic(self.state_dim, self.action_dim) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), self.LR_critic) utils.hard_update(self.target_actor, self.actor) utils.hard_update(self.target_critic, self.critic)
def __init__(self, env, results_path, tok, episode_len=20): super(Seq2PolicyAgent, self).__init__(env, results_path) self._iter = 0 self.tok = tok self.episode_len = episode_len self.feature_size = self.env.feature_size # Models enc_hidden_size = args.rnn_dim//2 if args.bidir else args.rnn_dim self.encoder = model.EncoderLSTM(tok.vocab_size(), args.wemb, enc_hidden_size, padding_idx, args.dropout, bidirectional=args.bidir).cuda() if args.original_decoder: self.decoder = model.AttnDecoderLSTM(args.aemb, args.rnn_dim, args.dropout, feature_size=self.feature_size + args.angle_feat_size).cuda() else: self.decoder = model.AttnPolicyLSTM(args.aemb, args.rnn_dim, args.dropout, feature_size=self.feature_size + args.angle_feat_size, latent_dim=args.vae_latent_dim).cuda() if args.fix_vae: print("fix the parameters in sub policy") for param in self.decoder.policy.parameters(): param.requires_grad = False self.critic = model.Critic().cuda() self.models = (self.encoder, self.decoder, self.critic) # Optimizers self.encoder_optimizer = args.optimizer(self.encoder.parameters(), lr=args.lr) self.decoder_optimizer = args.optimizer(self.decoder.parameters(), lr=args.lr) self.critic_optimizer = args.optimizer(self.critic.parameters(), lr=args.lr) self.optimizers = (self.encoder_optimizer, self.decoder_optimizer, self.critic_optimizer) # Evaluations self.losses = [] self.criterion = nn.CrossEntropyLoss(ignore_index=args.ignoreid, size_average=False) # Logs sys.stdout.flush() self.logs = defaultdict(list)
return scores, mean_scores_window # In[19]: config = Config(seed=6) config.num_agents = len(env_info.agents) config.state_size = state_size config.action_size = action_size config.actor_fn = lambda: model.Actor(config.state_size, config.action_size, 128, 128) config.actor_opt_fn = lambda params: optim.Adam(params, lr=1e-3) config.critic_fn = lambda: model.Critic(config.state_size, config.action_size, 1, 128, 128) config.critic_opt_fn = lambda params: optim.Adam(params, lr=2e-3) config.replay_fn = lambda: Replay( config.action_size, buffer_size=int(1e6), batch_size=128) config.noise_fn = lambda: OUNoise( config.action_size, mu=0., theta=0.15, sigma=0.1, seed=config.seed) config.discount = 0.99 config.target_mix = 3e-3 config.max_episodes = 3000 config.max_steps = int(1e6) config.goal_score = 1 config.CHECKPOINT_FOLDER = "MultiAgentCheckPt"
torch.manual_seed(args.seed) train_loader = dataloader.train_loader('mnist', args.data_directory, args.batch_size) input_size, hidden_size, latent_size, k, l = args.parameters if args.load_model != '000000000000': critic = torch.load(args.log_directory + '/' + args.load_model + '/critic.pt') generator = torch.load(args.log_directory + '/' + args.load_model + '/generator.pt') args.time_stamp = args.load_model else: critic = model.Critic() generator = model.Generator() critic = critic.to(args.device) generator = generator.to(args.device) writer = SummaryWriter(args.log_directory + '/' + args.time_stamp + '/') critic_optimizer = optim.Adam(critic.parameters(), lr=args.lr, betas=(0, 0.9)) generator_optimizer = optim.Adam(generator.parameters(), lr=args.lr, betas=(0, 0.9)) def train(epoch): critic.train() generator.train()
batch_size=args.batch_size, is_cuda=use_cuda, evaluation=True) # ############################################################################## # Build model # ############################################################################## import model from const import PAD from optim import Optim encode = model.Encode(use_cuda) actor = model.Actor(args.vocab_size, args.dec_hsz, args.rnn_layers, args.batch_size, args.max_len, args.dropout, use_cuda) critic = model.Critic(args.vocab_size, args.dec_hsz, args.rnn_layers, args.batch_size, args.max_len, args.dropout, use_cuda) optim_pre_A = Optim(actor.parameters(), args.pre_lr, True) optim_pre_C = Optim(critic.parameters(), args.pre_lr, True) optim_A = Optim(actor.parameters(), args.lr, False, args.new_lr) optim_C = Optim(critic.parameters(), args.lr, False, args.new_lr) criterion_A = torch.nn.CrossEntropyLoss(ignore_index=PAD) criterion_C = torch.nn.MSELoss() if use_cuda: actor = actor.cuda() critic = critic.cuda() # ##############################################################################
def update_target(self, source, target): new_target_param = parameters_to_vector(source.parameters()) * self.tau + \ (1 - self.tau) * parameters_to_vector(target.parameters()) vector_to_parameters(new_target_param, target.parameters()) return target if __name__ == '__main__': env = gym.make("CartPole-v0") global state_size, action_size state_size = int(np.product(env.observation_space.shape)) action_size = int(env.action_space.n) num_episode = 800 critic = model.Critic(state_size, action_size) actor = model.Actor(state_size, action_size) # actor.eval() # critic.eval() # target network target_critic = deepcopy(critic) target_actor = deepcopy(actor) ddpg = DDPG(env, actor, critic, target_actor, target_critic, num_episode,
# Function to keep track of gradients for visualization purposes def make_grad_hook(): grads = [] def grad_hook(m): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): grads.append(m.weight.grad) return grads, grad_hook if __name__ == "__main__": # specify the parameters gen = model.Generator(config.z_dim).to(config.device) crit = model.Critic().to(config.device) batch_size = config.batch_size gen_opt = torch.optim.Adam(gen.parameters(), lr=config.lr, betas=(config.beta_1, config.beta_2)) crit_opt = torch.optim.Adam(crit.parameters(), lr=config.lr, betas=(config.beta_1, config.beta_2)) gen = gen.apply(weights_init) crit = crit.apply(weights_init) mean_generator_loss = 0 cur_step = 0 generator_losses = [] critic_losses = [] # training the model for e in range(config.n_epochs):
def main(management, hps): # Setup Experiment task = hps.task config_logger(management.log_file, saving=management.save_logs) logger = logging.getLogger('Exp') train_logger = logging.getLogger('Exp.Train') eval_logger = logging.getLogger('Exp.Eval') try: state = State(hps.seed, management) except SingletonError: State.instance = None state = State(hps.seed, management) logger.info( f"Initializing experiment `{management.exp_name}` with hyperparameters:\n%s", repr(hps)) stats = accumulator() # Setup Data if task == 'dirac': def train_data(): dirac = State().convert(torch.Tensor(1, 1).fill_(hps.dirac_target)) while True: yield dirac train_iter = train_data() else: dataset_cfg = dict( type=task, root=management.data_path, download=True, preload_to_gpu=management.preload_to_gpu, num_threads=management.num_workers, batch_size=hps.batch_size, ) train_data = Dataset(**dataset_cfg, mode='train') eval_data = Dataset(**dataset_cfg, mode='test') train_iter = train_data.sampler(infinite=True, project=0) # Setup Generator if task == 'dirac': generator = dirac.DiracGenerator() stats.g_params.append(generator.param.clone().detach().cpu()) else: generator = model.Generator(dimz=hps.generator_dimz, dimh=hps.generator_dimh, default_batch_size=hps.batch_size) test_generator = generator if hps.generator_alpha_ema is not None: test_generator = deepcopy(generator) test_generator.to(device=State().device) test_generator.train() generator.to(device=State().device) generator.train() generator_optim = optim.init_optimizer(generator.parameters(), type=hps.optimizer, lr=hps.generator_lr, betas=hps.generator_betas, wd=hps.generator_wd) logger.info("Generator:\n%s", generator) # Setup Critic if task == 'dirac': critic = dirac.DiracCritic() stats.c_params.append(critic.param.clone().detach().cpu()) else: critic = model.Critic(dimh=hps.critic_dimh, sn=hps.critic_use_sn) critic.to(device=State().device) critic.train() critic_optim = optim.init_optimizer(critic.parameters(), type=hps.optimizer, lr=hps.critic_lr, betas=hps.critic_betas, wd=hps.critic_wd) logger.info("Critic:\n%s", critic) # Train step = 0 train_loss_meter = running_average_meter() train_step = gan.make_train_step(hps.loss_type, critic_inner_iters=hps.critic_inner_iters, reg_type=hps.critic_reg_type, reg_cf=hps.critic_reg_cf, alpha_ema=hps.generator_alpha_ema) if task != 'dirac': eval_step = gan.make_eval_step(os.path.join( management.exp_path, task + '_inception_stats.npz'), eval_data.sampler(infinite=False, project=0), hps.generator_dimz, persisting_Z=100, device=State().device) logger.info("Training") while True: if step >= hps.max_iters: break step += 1 train_loss = train_step(train_iter, critic, critic_optim, generator, test_generator, generator_optim) train_loss_meter.update(train_loss.clone().detach()) if step % management.log_every == 0 and task != 'dirac': train_logger.info("step %d | loss(%s) %.3f (%.3f)", step, hps.loss_type, train_loss_meter.avg.item(), train_loss_meter.val.item()) if task == 'dirac': stats.g_params.append(test_generator.param.clone().detach().cpu()) stats.c_params.append(critic.param.clone().detach().cpu()) if step % management.eval_every == 0 and task != 'dirac': eval_iter = eval_data.sampler(infinite=False, project=0) samples, results = eval_step(eval_iter, critic, test_generator) if management.viz: from IPython.display import clear_output, display, update_display grid_img = torchvision.utils.make_grid(samples, nrow=10, normalize=True, value_range=(-1., 1.), padding=0) plt.imshow(grid_img.permute(1, 2, 0).cpu()) display(plt.gcf()) eval_logger.info( "step %d | " + ' | '.join([f'{k} {v:.3f}' for k, v in results.items()]), step) torchvision.utils.save_image(samples.cpu(), os.path.join(management.log_path, f'samples-{step}.png'), nrow=10, normalize=True, value_range=(-1., 1.), padding=0) logger.info("Final Evaluation") if task == 'dirac': g_params = torch.stack(stats.g_params) c_params = torch.stack(stats.c_params) trajectory = torch.cat([c_params, g_params], dim=-1).numpy() logger.info(f"Final point in parameter space: {trajectory[-1]}") anima = dirac.animate(trajectory, hps) if management.viz: from IPython.display import HTML, display display(HTML(anima.to_html5_video())) anima.save(os.path.join(management.log_path, 'evolution.mp4')) else: eval_iter = eval_data.sampler(infinite=False, project=0) samples, results = eval_step(eval_iter, critic, test_generator) logger.info( "step %d | " + ' | '.join([f'{k} {v:.3f}' for k, v in results.items()]), step) torchvision.utils.save_image(samples, os.path.join(management.log_path, f'samples-final.png'), nrow=10, normalize=True, value_range=(-1., 1.), padding=0)
if __name__ == '__main__': # set unity environment path (file_name) env = UnityEnvironment(file_name=config.env_name) # env = UnityEnvironment(file_name=config.env_name, worker_id=np.random.randint(100000)) # setting brain for unity default_brain = env.brain_names[0] brain = env.brains[default_brain] train_mode = config.train_mode device = config.device actor = model.Actor(config.action_size, "main").to(device) target_actor = model.Actor(config.action_size, "target").to(device) critic = model.Critic(config.action_size, "main").to(device) target_critic = model.Critic(config.action_size, "target").to(device) optimizer_actor = optim.Adam(actor.parameters(), lr=config.actor_lr) optimizer_critic = optim.Adam(critic.parameters(), lr=config.critic_lr) algorithm = "_DDPG" agent = agent.DDPGAgent(actor, critic, target_actor, target_critic, optimizer_actor, optimizer_critic, device, algorithm) # Initialize target networks agent.hard_update_target() step = 0 episode = 0