def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = '3' # create checkpoint dir if not isdir(args.checkpoint): mkdir_p(args.checkpoint) # create model model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained = False) model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion1 = torch.nn.MSELoss().cuda() # for Global loss criterion2 = torch.nn.MSELoss(reduce=False).cuda() # for refine loss optimizer = torch.optim.Adam(model.parameters(), lr = cfg.lr, weight_decay=cfg.weight_decay) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) pretrained_dict = checkpoint['state_dict'] model.load_state_dict(pretrained_dict) args.start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) logger = Logger(join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'LR', 'Train Loss']) cudnn.benchmark = True print(' Total params: %.2fMB' % (sum(p.numel() for p in model.parameters())/(1024*1024)*4)) train_loader = torch.utils.data.DataLoader( MscocoMulti(cfg), batch_size=cfg.batch_size*args.num_gpus, shuffle=True, num_workers=args.workers, pin_memory=True) for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma) print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) # train for one epoch train_loss = train(train_loader, model, [criterion1, criterion2], optimizer) print('train_loss: ',train_loss) # append logger file logger.append([epoch + 1, lr, train_loss]) save_model({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict(), }, checkpoint=args.checkpoint) logger.close()
def run_train(self): final_epoch = False for epoch in range(1, args.n_epochs + 1): misc.log( self.log_path, 'Elapsed Time: {}/{}\n'.format( self.timer.measure(), self.timer.measure(epoch / float(args.n_epochs)))) if self.scheduler: lr = self.scheduler.get_lr()[0] else: lr = args.lr self.train(epoch) acc = self.evaluate() improvement = acc > self.best_acc self.best_acc = max(acc, self.best_acc) misc.log( self.log_path, 'Best Accuracy: {} | Current Learning Rate: {}'.format( np.round(self.best_acc, 5), np.round(lr, 5))) if epoch == args.n_epochs: final_epoch = True if args.save: misc.save_model(args=args, model_name=self.model_name, best_acc=self.best_acc, stats=util.stats, state={ 'epoch': epoch, 'state_dict': self.net.state_dict(), 'best_acc': self.best_acc, 'optimizer': self.optimizer.state_dict() }, improvement=improvement, epoch=epoch, final_epoch=final_epoch)
def train(): processes = [] if os.path.isdir(args.log_dir): ans = input('{} exists\ncontinue and overwrite? y/n: '.format( args.log_dir)) if ans == 'n': return logger.configure(dir=args.log_dir, format_strs=['stdout', 'log', 'csv']) logger.log(args) json.dump(vars(args), open(os.path.join(args.log_dir, 'params.json'), 'w')) torch.set_num_threads(2) start = time.time() policy_update_time, policy_forward_time = 0, 0 step_time_env, step_time_total, step_time_rewarder = 0, 0, 0 visualize_time = 0 rewarder_fit_time = 0 envs = ContextualEnvInterface(args) if args.look: looker = Looker(args.log_dir) actor_critic, agent = initialize_policy(envs) rollouts = RolloutStorage(args.num_steps, args.num_processes, envs.obs_shape, envs.action_space, actor_critic.recurrent_hidden_state_size) rollouts.to(args.device) def copy_obs_into_beginning_of_storage(obs): rollouts.obs[0].copy_(obs) for j in range(args.num_updates): obs = envs.reset( ) # have to reset here to use updated rewarder to sample tasks copy_obs_into_beginning_of_storage(obs) if args.use_linear_lr_decay: update_linear_schedule(agent.optimizer, j, args.num_updates, args.lr) if args.algo == 'ppo' and args.use_linear_clip_decay: agent.clip_param = args.clip_param * (1 - j / float(args.num_updates)) log_marginal = 0 lambda_log_s_given_z = 0 for step in range(args.num_steps): # Sample actions policy_forward_start = time.time() with torch.no_grad(): value, action, action_log_prob, recurrent_hidden_states = actor_critic.act( rollouts.obs[step], rollouts.recurrent_hidden_states[step], rollouts.masks[step]) policy_forward_time += time.time() - policy_forward_start # Obser reward and next obs step_total_start = time.time() obs, reward, done, info = envs.step(action) step_time_total += time.time() - step_total_start step_time_env += info['step_time_env'] step_time_rewarder += info['reward_time'] if args.rewarder == 'unsupervised' and args.clusterer == 'vae': log_marginal += info['log_marginal'].sum().item() lambda_log_s_given_z += info['lambda_log_s_given_z'].sum( ).item() # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks) assert all(done) # policy update with torch.no_grad(): next_value = actor_critic.get_value( rollouts.obs[-1], rollouts.recurrent_hidden_states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau) policy_update_start = time.time() if args.rewarder != 'supervised' and envs.rewarder.fit_counter == 0: value_loss, action_loss, dist_entropy = 0, 0, 0 else: value_loss, action_loss, dist_entropy = agent.update(rollouts) policy_update_time += time.time() - policy_update_start rollouts.after_update() # metrics trajectories = envs.trajectories_current_update state_entropy = calculate_state_entropy(args, trajectories) return_avg = rollouts.rewards.sum() / args.trials_per_update reward_avg = return_avg / (args.trial_length * args.episode_length) log_marginal_avg = log_marginal / args.trials_per_update / ( args.trial_length * args.episode_length) lambda_log_s_given_z_avg = lambda_log_s_given_z / args.trials_per_update / ( args.trial_length * args.episode_length) num_steps = (j + 1) * args.num_steps * args.num_processes num_episodes = num_steps // args.episode_length num_trials = num_episodes // args.trial_length logger.logkv('state_entropy', state_entropy) logger.logkv('value_loss', value_loss) logger.logkv('action_loss', action_loss) logger.logkv('dist_entropy', dist_entropy) logger.logkv('return_avg', return_avg.item()) logger.logkv('reward_avg', reward_avg.item()) logger.logkv('steps', num_steps) logger.logkv('episodes', num_episodes) logger.logkv('trials', num_trials) logger.logkv('policy_updates', (j + 1)) logger.logkv('time', time.time() - start) logger.logkv('policy_forward_time', policy_forward_time) logger.logkv('policy_update_time', policy_update_time) logger.logkv('step_time_rewarder', step_time_rewarder) logger.logkv('step_time_env', step_time_env) logger.logkv('step_time_total', step_time_total) logger.logkv('visualize_time', visualize_time) logger.logkv('rewarder_fit_time', rewarder_fit_time) if args.rewarder == 'unsupervised' and args.clusterer == 'vae': logger.logkv('log_marginal_avg', log_marginal_avg) logger.logkv('lambda_log_s_given_z_avg', lambda_log_s_given_z_avg) logger.dumpkvs() if (j % args.save_period == 0 or j == args.num_updates - 1) and args.log_dir != '': save_model(args, actor_critic, envs, iteration=j) if j % args.rewarder_fit_period == 0: rewarder_fit_start = time.time() envs.fit_rewarder() rewarder_fit_time += time.time() - rewarder_fit_start if (j % args.vis_period == 0 or j == args.num_updates - 1) and args.log_dir != '': visualize_start = time.time() if args.look: looker.look(iteration=j) if args.plot: p = Popen('python visualize.py --log-dir {}'.format( args.log_dir), shell=True) processes.append(p) visualize_time += time.time() - visualize_start
def train(): processes = [] if os.path.isdir(args.log_dir): ans = input('{} exists\ncontinue and overwrite? y/n: '.format(args.log_dir)) if ans == 'n': return logger.configure(dir=args.log_dir, format_strs=['stdout', 'log', 'csv']) logger.log(args) json.dump(vars(args), open(os.path.join(args.log_dir, 'params.json'), 'w')) torch.set_num_threads(2) start = time.time() policy_update_time, policy_forward_time = 0, 0 step_time_env, step_time_total, step_time_rewarder = 0, 0, 0 visualize_time = 0 rewarder_fit_time = 0 envs = RL2EnvInterface(args) if args.look: looker = Looker(args.log_dir) actor_critic = Policy(envs.obs_shape, envs.action_space, base=RL2Base, base_kwargs={'recurrent': True, 'num_act_dim': envs.action_space.shape[0]}) actor_critic.to(args.device) agent = algo.PPO(actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm) rollouts = RolloutStorage(args.num_steps, args.num_processes, envs.obs_shape, envs.action_space, actor_critic.recurrent_hidden_state_size) rollouts.to(args.device) def copy_obs_into_beginning_of_storage(obs): obs_raw, obs_act, obs_rew, obs_flag = obs rollouts.obs[0].copy_(obs_raw) rollouts.obs_act[0].copy_(obs_act) rollouts.obs_rew[0].copy_(obs_rew) rollouts.obs_flag[0].copy_(obs_flag) for j in range(args.num_updates): obs = envs.reset() copy_obs_into_beginning_of_storage(obs) if args.use_linear_lr_decay: update_linear_schedule(agent.optimizer, j, args.num_updates, args.lr) if args.algo == 'ppo' and args.use_linear_clip_decay: agent.clip_param = args.clip_param * (1 - j / float(args.num_updates)) episode_returns = [0 for i in range(args.trial_length)] episode_final_reward = [0 for i in range(args.trial_length)] i_episode = 0 log_marginal = 0 lambda_log_s_given_z = 0 for step in range(args.num_steps): # Sample actions policy_forward_start = time.time() with torch.no_grad(): value, action, action_log_prob, recurrent_hidden_states = actor_critic.act( rollouts.get_obs(step), rollouts.recurrent_hidden_states[step], rollouts.masks[step]) policy_forward_time += time.time() - policy_forward_start # Obser reward and next obs step_total_start = time.time() obs, reward, done, info = envs.step(action) step_time_total += time.time() - step_total_start step_time_env += info['step_time_env'] step_time_rewarder += info['reward_time'] log_marginal += info['log_marginal'].sum().item() lambda_log_s_given_z += info['lambda_log_s_given_z'].sum().item() episode_returns[i_episode] += reward.sum().item() if all(done['episode']): episode_final_reward[i_episode] += reward.sum().item() i_episode = (i_episode + 1) % args.trial_length # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done['trial']]) rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks) assert all(done['trial']) with torch.no_grad(): next_value = actor_critic.get_value(rollouts.get_obs(-1), rollouts.recurrent_hidden_states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau) policy_update_start = time.time() if args.rewarder != 'supervised' and envs.rewarder.fit_counter == 0 and not args.vae_load: value_loss, action_loss, dist_entropy = 0, 0, 0 else: value_loss, action_loss, dist_entropy = agent.update(rollouts) policy_update_time += time.time() - policy_update_start rollouts.after_update() # metrics trajectories_pre = envs.trajectories_pre_current_update state_entropy_pre = calculate_state_entropy(args, trajectories_pre) trajectories_post = envs.trajectories_post_current_update state_entropy_post = calculate_state_entropy(args, trajectories_post) return_avg = rollouts.rewards.sum() / args.trials_per_update reward_avg = return_avg / (args.trial_length * args.episode_length) log_marginal_avg = log_marginal / args.trials_per_update / (args.trial_length * args.episode_length) lambda_log_s_given_z_avg = lambda_log_s_given_z / args.trials_per_update / (args.trial_length * args.episode_length) num_steps = (j + 1) * args.num_steps * args.num_processes num_episodes = num_steps // args.episode_length num_trials = num_episodes // args.trial_length logger.logkv('state_entropy_pre', state_entropy_pre) logger.logkv('state_entropy_post', state_entropy_post) logger.logkv('value_loss', value_loss) logger.logkv('action_loss', action_loss) logger.logkv('dist_entropy', dist_entropy) logger.logkv('return_avg', return_avg.item()) logger.logkv('reward_avg', reward_avg.item()) logger.logkv('steps', (j + 1) * args.num_steps * args.num_processes) logger.logkv('episodes', num_episodes) logger.logkv('trials', num_trials) logger.logkv('policy_updates', (j + 1)) logger.logkv('time', time.time() - start) logger.logkv('policy_forward_time', policy_forward_time) logger.logkv('policy_update_time', policy_update_time) logger.logkv('step_time_rewarder', step_time_rewarder) logger.logkv('step_time_env', step_time_env) logger.logkv('step_time_total', step_time_total) logger.logkv('visualize_time', visualize_time) logger.logkv('rewarder_fit_time', rewarder_fit_time) logger.logkv('log_marginal_avg', log_marginal_avg) logger.logkv('lambda_log_s_given_z_avg', lambda_log_s_given_z_avg) for i_episode in range(args.trial_length): logger.logkv('episode_return_avg_{}'.format(i_episode), episode_returns[i_episode] / args.trials_per_update) logger.logkv('episode_final_reward_{}'.format(i_episode), episode_final_reward[i_episode] / args.trials_per_update) if (j % args.save_period == 0 or j == args.num_updates - 1) and args.log_dir != '': save_model(args, actor_critic, envs, iteration=j) if not args.vae_freeze and j % args.rewarder_fit_period == 0: rewarder_fit_start = time.time() envs.fit_rewarder() rewarder_fit_time += time.time() - rewarder_fit_start if (j % args.vis_period == 0 or j == args.num_updates - 1) and args.log_dir != '': visualize_start = time.time() if args.look: eval_return_avg, eval_episode_returns, eval_episode_final_reward = looker.look(iteration=j) logger.logkv('eval_return_avg', eval_return_avg) for i_episode in range(args.trial_length): logger.logkv('eval_episode_return_avg_{}'.format(i_episode), eval_episode_returns[i_episode] / args.trials_per_update) logger.logkv('eval_episode_final_reward_{}'.format(i_episode), eval_episode_final_reward[i_episode] / args.trials_per_update) if args.plot: p = Popen('python visualize.py --log-dir {}'.format(args.log_dir), shell=True) processes.append(p) visualize_time += time.time() - visualize_start logger.dumpkvs()
def train_model(model, dataloaders, optimizer, scheduler, num_epochs=1): since = time.time() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") logger = Logger(join(opt.output, f'log.txt')) best_loss = 1e5 # set to some large enough value criterion = Criterion() data_dict = dict() for epoch in range(num_epochs): scheduler.step() lr = scheduler.get_lr()[-1] print(f'Epoch: {epoch+1}/{num_epochs} LR: {lr:.3E}') data_dict['Epoch'] = epoch data_dict['LR'] = lr # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode # Iterate over data. batch_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() end = time.time() bar_name = 'Training' if phase=='train' else 'Testing ' num_batch = len(dataloaders[phase]) bar = Bar(bar_name, max=num_batch) # Iterate over data. for i,(inputs, targets) in enumerate(dataloaders[phase]): # measure data loading time data_time.update(time.time() - end) # move data to GPU inputs = inputs.to(device).float() targets = targets.to(device).float() # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) loss, _ = criterion.eval(outputs, targets) # measure accuracy and record loss loss_meter.update(loss.item(), inputs.shape[0]) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = f'({i+1:04d}/{num_batch:04d}) Data: {data_time.val:.6f}s | Batch: {batch_time.val:.3f}s | Total: {bar.elapsed_td:} | ETA: {bar.eta_td:} | Loss: {loss_meter.avg:.4f}' bar.next() bar.finish() data_dict[f'{phase} Loss'] = loss_meter.avg # save last model if phase == 'train': save_model(model, join(opt.output, f'last.pth')) else: is_best = data_dict[f'val Loss'] < best_loss if is_best: best_loss = data_dict[f'val Loss'] save_model(model, join(opt.output, f'best.pth')) # update the log logger.update(data_dict)
def main(): args = parse_args() update_config(cfg_hrnet, args) # create checkpoint dir if not isdir(args.checkpoint): mkdir_p(args.checkpoint) # create model #print('networks.'+ cfg_hrnet.MODEL.NAME+'.get_pose_net') model = eval('models.' + cfg_hrnet.MODEL.NAME + '.get_pose_net')( cfg_hrnet, is_train=True) model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # show net args.channels = 3 args.height = cfg.data_shape[0] args.width = cfg.data_shape[1] #net_vision(model, args) # define loss function (criterion) and optimizer criterion = torch.nn.MSELoss(reduction='mean').cuda() #torch.optim.Adam optimizer = AdaBound(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) pretrained_dict = checkpoint['state_dict'] model.load_state_dict(pretrained_dict) args.start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) logger = Logger(join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'LR', 'Train Loss']) cudnn.benchmark = True torch.backends.cudnn.enabled = True print(' Total params: %.2fMB' % (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4)) train_loader = torch.utils.data.DataLoader( #MscocoMulti(cfg), KPloader(cfg), batch_size=cfg.batch_size * len(args.gpus)) #, shuffle=True, #num_workers=args.workers, pin_memory=True) #for i, (img, targets, valid) in enumerate(train_loader): # print(i, img, targets, valid) for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma) print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) # train for one epoch train_loss = train(train_loader, model, criterion, optimizer) print('train_loss: ', train_loss) # append logger file logger.append([epoch + 1, lr, train_loss]) save_model( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, checkpoint=args.checkpoint) logger.close()
def main(args): # import pdb; pdb.set_trace() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # device = torch.device("cpu") print(device) writer = SummaryWriter(cfg.tensorboard_path) # create checkpoint dir counter = 0 if not isdir(args.checkpoint): mkdir_p(args.checkpoint) # create model model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained=True) model = torch.nn.DataParallel(model).to(device) # model = model.to(device) # define loss function (criterion) and optimizer criterion_bce = torch.nn.BCELoss().to(device) criterion_abs = torch.nn.L1Loss().to(device) # criterion_abs = offset_loss().to(device) # criterion1 = torch.nn.MSELoss().to(device) # for Global loss # criterion2 = torch.nn.MSELoss(reduce=False).to(device) # for refine loss optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay) if args.resume: print(args.resume) checkpoint_file_resume = os.path.join(args.checkpoint, args.resume + '.pth.tar') if isfile(checkpoint_file_resume): print("=> loading checkpoint '{}'".format(checkpoint_file_resume)) checkpoint = torch.load(checkpoint_file_resume) pretrained_dict = checkpoint['state_dict'] model.load_state_dict(pretrained_dict) args.start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file_resume, checkpoint['epoch'])) logger = Logger(join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format( checkpoint_file_resume)) else: logger = Logger(join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'LR', 'Train Loss']) cudnn.benchmark = True print(' Total params: %.2fMB' % (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4)) train_loader = torch.utils.data.DataLoader(MscocoMulti_double_only(cfg), batch_size=cfg.batch_size * args.num_gpus, shuffle=True, num_workers=args.workers, pin_memory=True) for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma) print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) # train for one epoch train_loss, counter = train(train_loader, model, [criterion_abs, criterion_bce], writer, counter, optimizer, device) print('train_loss: ', train_loss) # append logger file logger.append([epoch + 1, lr, train_loss]) save_model( { 'epoch': epoch + 1, 'info': cfg.info, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, checkpoint=args.checkpoint) writer.export_scalars_to_json("./test.json") writer.close() logger.close()
def run(): # Dataset transform = transforms.Compose([ transforms.Resize(64), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ]) dataset = datasets.MNIST('.', transform=transform, download=True) dataloader = data.DataLoader(dataset, batch_size=4) print("[INFO] Define DataLoader") # Define Model g = Generator() d = Discriminator() print("[INFO] Define Model") # optimizer, loss gan_loss = GANLoss() optim_G = optim.Adam(g.parameters(), lr=0.0002, betas=(0.5, 0.999)) optim_D = optim.Adam(d.parameters(), lr=0.0002, betas=(0.5, 0.999)) print('[INFO] Define optimizer and loss') # train num_epoch = 2 print('[INFO] Start Training!!') for epoch in range(num_epoch): total_batch = len(dataloader) for idx, (image, _) in enumerate(dataloader): d.train() g.train() # fake image 생성 noise = torch.randn(4, 100, 1, 1) output_fake = g(noise) # Loss d_loss_fake = gan_loss(d(output_fake.detach()), False) d_loss_real = gan_loss(d(image), True) d_loss = (d_loss_fake + d_loss_real) / 2 g_loss = gan_loss(d(output_fake), True) # update optim_G.zero_grad() g_loss.backward() optim_G.step() optim_D.zero_grad() d_loss.backward() optim_D.step() if ((epoch * total_batch) + idx) % 1000 == 0: print( 'Epoch [%d/%d], Iter [%d/%d], D_loss: %.4f, G_loss: %.4f' % (epoch, num_epoch, idx + 1, total_batch, d_loss.item(), g_loss.item())) save_model('model', 'GAN', g, {'loss': g_loss.item()})
def train(self): num_param_updates = 0 loss_acc_since_last_log = 0.0 param_updates_since_last_log = 0 num_episodes = 0 state = self.env.reset()[..., np.newaxis] for t in tqdm(range(self.total_timesteps)): last_idx = self.memory.store_frame(state) recent_observations = self.memory.encode_recent_observation() # Choose random action if learning hasn't started yet if t > self.learning_start: action = self.select_epsilon_greedy_action( recent_observations, t).item() else: action = random.randrange(self.num_actions) # Advance a step next_state, reward, done, _ = self.env.step(action) next_state = next_state[..., np.newaxis] # Store result in memory self.memory.store_effect(last_idx, action, reward, done) # Reset if done (life lost, due to atari wrapper) if done: next_state = self.env.reset() next_state = next_state[..., np.newaxis] state = next_state # Train network using experience replay when # memory is sufficiently large. if (t > self.learning_start and t % self.learning_freq == 0 and self.memory.can_sample(self.batch_size)): # Sample from replay buffer ( state_batch, act_batch, r_batch, next_state_batch, done_mask, ) = self.memory.sample(self.batch_size) state_batch = torch.from_numpy(state_batch).type( self.dtype) / 255.0 act_batch = torch.from_numpy(act_batch).long().to(self.device) r_batch = torch.from_numpy(r_batch).to(self.device) next_state_batch = ( torch.from_numpy(next_state_batch).type(self.dtype) / 255.0) not_done_mask = torch.from_numpy(1 - done_mask).type( self.dtype) # Calculate current Q value current_Q_vals = self.Q(state_batch).gather( 1, act_batch.unsqueeze(1)) # Calculate next Q value based on action that gives max Q vals next_max_Q = self.target_Q(next_state_batch).detach().max( dim=1)[0] next_Q_vals = not_done_mask * next_max_Q # Calculate target of current Q values target_Q_vals = r_batch + (self.gamma * next_Q_vals) # Calculate loss and backprop loss = F.smooth_l1_loss(current_Q_vals.squeeze(), target_Q_vals) self.optimizer.zero_grad() loss.backward() for param in self.Q.parameters(): param.grad.data.clamp_(-1, 1) # Update weights self.optimizer.step() num_param_updates += 1 # Store stats loss_acc_since_last_log += loss.item() param_updates_since_last_log += 1 # Update target network periodically if num_param_updates % self.target_update_freq == 0: self.target_Q.load_state_dict(self.Q.state_dict()) # Save model checkpoint if num_param_updates % self.checkpoint_frequency == 0: save_model_checkpoint( self.Q, self.optimizer, t, f"{self.out_dir}/checkpoints/{self.model_name}_{num_param_updates}", ) # Log progress if (num_param_updates % (self.log_freq // 2) == 0 and param_updates_since_last_log > 0): self.writer.add_scalar( "Mean Loss per Update (Updates)", loss_acc_since_last_log / param_updates_since_last_log, num_param_updates, ) loss_acc_since_last_log = 0.0 param_updates_since_last_log = 0 if num_param_updates % self.log_freq == 0: wrapper = get_wrapper_by_name(self.env, "Monitor") episode_rewards = wrapper.get_episode_rewards() mean_reward = round(np.mean(episode_rewards[-101:-1]), 2) sum_reward = np.sum(episode_rewards[-101:-1]) episode_lengths = wrapper.get_episode_lengths() mean_duration = round(np.mean(episode_lengths[-101:-1]), 2) sum_duration = np.sum(episode_lengths[-101:-1]) self.writer.add_scalar( f"Mean Reward (epoch = {self.log_freq} updates)", mean_reward, num_param_updates // self.log_freq, ) self.writer.add_scalar( f"Mean Duration (epoch = {self.log_freq} updates)", mean_duration, num_param_updates // self.log_freq, ) self.writer.add_scalar( f"Mean Reward per Timestep (epoch = {self.log_freq} updates)", round(sum_reward / sum_duration, 2), num_param_updates // self.log_freq, ) if done: num_episodes += 1 # Save model save_model(self.Q, f"{self.out_dir}/{self.model_name}.model") self.env.close() print(f"Number of Episodes: {num_episodes}") return self.Q
# fake image 생성 noise = torch.randn(4, 100, 1, 1) output_fake = g(noise, label) # Loss d_loss_fake = gan_loss(d(output_fake.detach(), label), False) d_loss_real = gan_loss(d(image, label), True) d_loss = (d_loss_fake + d_loss_real) / 2 g_loss = gan_loss(d(output_fake, label), True) # update optim_G.zero_grad() g_loss.backward() optim_G.step() optim_D.zero_grad() d_loss.backward() optim_D.step() if ((epoch * total_batch) + idx) % 1000 == 0: print('Epoch [%d/%d], Iter [%d/%d], D_loss: %.4f, G_loss: %.4f' % (epoch, num_epoch, idx + 1, total_batch, d_loss.item(), g_loss.item())) save_model('model', 'GAN', g, {'loss': g_loss.item()})