def validate(self, phase): btime_rec = utils.AverageMeter(0) dtime_rec = utils.AverageMeter(0) npts_rec = utils.AverageMeter(0) recorder = {} for rec in self.args.trainer['loss_record']: recorder[rec] = utils.AverageMeter(10) self.model.switch_to('eval') end = time.time() all_together = [] for i, (image, sparse, mask, flow_target, rgb_target) in enumerate(self.val_loader): if 'val_iter' in self.args.trainer and self.args.trainer['val_iter'] != -1 and i == self.args.trainer['val_iter']: break assert image.shape[0] > 0 dtime_rec.update(time.time() - end) npts_rec.update(int(torch.sum(mask) / mask.size(0) / mask.size(1))) image = image.cuda() sparse = sparse.cuda() mask = mask.cuda() flow_target = flow_target.cuda() rgb_target = rgb_target.cuda() self.model.set_input(image, torch.cat([sparse, mask], dim=1), flow_target, rgb_target) tensor_dict, loss_dict = self.model.eval() for k in loss_dict.keys(): recorder[k].update(utils.reduce_tensors(loss_dict[k]).item()) btime_rec.update(time.time() - end) end = time.time() # tb visualize if self.rank == 0: if i >= self.args.trainer['val_disp_start_iter'] and i < self.args.trainer['val_disp_end_iter']: all_together.append(utils.visualize_tensor(image, mask, tensor_dict['flow_tensors'], tensor_dict['common_tensors'], tensor_dict['rgb_tensors'], self.args.data['data_mean'], self.args.data['data_div'])) if i == self.args.trainer['val_disp_end_iter'] and self.args.trainer['val_disp_end_iter'] > self.args.trainer['val_disp_start_iter']: all_together = torch.cat(all_together, dim=2) grid = vutils.make_grid(all_together, nrow=1, normalize=True, range=(0, 255), scale_each=False) if self.tb_logger is not None: self.tb_logger.add_image('Image_' + phase, grid, self.curr_step + 1) # logging if self.rank == 0: loss_str = "" for k in recorder.keys(): if self.tb_logger is not None: self.tb_logger.add_scalar('val_{}'.format(k), recorder[k].avg, self.curr_step + 1) loss_str += '{}: {loss.val:.4g} ({loss.avg:.4g})\t'.format(k, loss=recorder[k]) self.logger.info('Validation Iter: [{0}]\t'.format(self.curr_step) + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format(batch_time=btime_rec) + 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(data_time=dtime_rec) + loss_str + 'NPts {num_pts.val} ({num_pts.avg:.1f})\t'.format(num_pts=npts_rec)) self.model.switch_to("train")
def evaluate(self, phase): btime_rec = utils.AverageMeter(0) dtime_rec = utils.AverageMeter(0) recorder = {} for rec in self.args.trainer['eval_record']: recorder[rec] = utils.AverageMeter() self.model.switch_to('eval') end = time.time() for i, inputs in enumerate( self.eval_loader): # padded samples will be evaluted twice. dtime_rec.update(time.time() - end) self.model.set_input(*inputs) eval_dict = self.model.evaluate() for k in eval_dict.keys(): recorder[k].update( utils.reduce_tensors(eval_dict[k]).item() / self.world_size) btime_rec.update(time.time() - end) end = time.time() # logging if self.rank == 0: eval_str = "" for k in recorder.keys(): if self.tb_logger is not None and phase == 'on_eval': self.tb_logger.add_scalar('eval_{}'.format(k), recorder[k].avg, self.curr_step) eval_str += '{}: {value.avg:.5g}\t'.format(k, value=recorder[k]) self.logger.info( 'Evaluation Iter: [{0}]\t'.format(self.curr_step) + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format( batch_time=btime_rec) + 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( data_time=dtime_rec) + eval_str) self.model.switch_to('train')
def validate(self, phase): btime_rec = utils.AverageMeter(0) dtime_rec = utils.AverageMeter(0) recorder = {} for rec in self.args.trainer['loss_record']: recorder[rec] = utils.AverageMeter(10) self.model.switch_to('eval') end = time.time() all_together = [] for i, inputs in enumerate(self.val_loader): if ('val_iter' in self.args.trainer and self.args.trainer['val_iter'] != -1 and i == self.args.trainer['val_iter']): break dtime_rec.update(time.time() - end) self.model.set_input(*inputs) tensor_dict, loss_dict = self.model.forward() for k in loss_dict.keys(): recorder[k].update( utils.reduce_tensors(loss_dict[k]).item() / self.world_size) btime_rec.update(time.time() - end) end = time.time() # tb visualize if self.rank == 0: disp_start = max(self.args.trainer['val_disp_start_iter'], 0) disp_end = min(self.args.trainer['val_disp_end_iter'], len(self.val_loader)) if (i >= disp_start and i < disp_end): all_together.append( utils.visualize_tensor(tensor_dict['common_tensors'], self.args.data['data_mean'], self.args.data['data_div'])) if (i == disp_end - 1 and disp_end > disp_start): all_together = torch.cat(all_together, dim=2) grid = vutils.make_grid(all_together, nrow=1, normalize=True, range=(0, 255), scale_each=False) if self.tb_logger is not None: self.tb_logger.add_image('Image_' + phase, grid, self.curr_step) cv2.imwrite( "{}/images/{}_{}.png".format(self.args.exp_path, phase, self.curr_step), grid.permute(1, 2, 0).numpy()) # logging if self.rank == 0: loss_str = "" for k in recorder.keys(): if self.tb_logger is not None and phase == 'on_val': self.tb_logger.add_scalar('val_{}'.format(k), recorder[k].avg, self.curr_step) loss_str += '{}: {loss.val:.4g} ({loss.avg:.4g})\t'.format( k, loss=recorder[k]) self.logger.info( 'Validation Iter: [{0}]\t'.format(self.curr_step) + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format( batch_time=btime_rec) + 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( data_time=dtime_rec) + loss_str) self.model.switch_to('train')
def train(self): btime_rec = utils.AverageMeter(10) dtime_rec = utils.AverageMeter(10) recorder = {} for rec in self.args.trainer['loss_record']: recorder[rec] = utils.AverageMeter(10) self.model.switch_to('train') end = time.time() for i, inputs in enumerate(self.train_loader): self.curr_step = self.start_iter + i self.lr_scheduler.step(self.curr_step) curr_lr = self.lr_scheduler.get_lr()[0] # measure data loading time dtime_rec.update(time.time() - end) self.model.set_input(*inputs) loss_dict = self.model.step() for k in loss_dict.keys(): recorder[k].update( utils.reduce_tensors(loss_dict[k]).item() / self.world_size) btime_rec.update(time.time() - end) end = time.time() self.curr_step += 1 # logging if self.rank == 0 and self.curr_step % self.args.trainer[ 'print_freq'] == 0: loss_str = "" if self.tb_logger is not None: self.tb_logger.add_scalar('lr', curr_lr, self.curr_step) for k in recorder.keys(): if self.tb_logger is not None: self.tb_logger.add_scalar('train_{}'.format(k), recorder[k].avg, self.curr_step) loss_str += '{}: {loss.val:.4g} ({loss.avg:.4g})\t'.format( k, loss=recorder[k]) self.logger.info( 'Iter: [{0}/{1}]\t'.format(self.curr_step, len(self.train_loader)) + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'. format(batch_time=btime_rec) + 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( data_time=dtime_rec) + loss_str + 'lr {lr:.2g}'.format(lr=curr_lr)) # save if (self.rank == 0 and (self.curr_step % self.args.trainer['save_freq'] == 0 or self.curr_step == self.args.model['total_iter'])): self.model.save_state( "{}/checkpoints".format(self.args.exp_path), self.curr_step) # validate if (self.curr_step % self.args.trainer['val_freq'] == 0 or self.curr_step == self.args.model['total_iter']): self.validate('on_val') if ((self.curr_step % self.args.trainer['eval_freq'] == 0 or self.curr_step == self.args.model['total_iter']) ) and self.args.trainer['eval']: self.evaluate('on_eval')
def train(self): btime_rec = utils.AverageMeter(10) dtime_rec = utils.AverageMeter(10) npts_rec = utils.AverageMeter(1000) recorder = {} for rec in self.args.trainer['loss_record']: recorder[rec] = utils.AverageMeter(10) self.model.switch_to('train') end = time.time() for i, (image, sparse, mask, flow_target, rgb_target) in enumerate(self.train_loader): self.curr_step = self.start_iter + i self.lr_scheduler.step(self.curr_step) curr_lr = self.lr_scheduler.get_lr()[0] # measure data loading time dtime_rec.update(time.time() - end) npts_rec.update(int(torch.sum(mask)/mask.size(0)/mask.size(1))) assert image.shape[0] > 0 image = image.cuda() sparse = sparse.cuda() mask = mask.cuda() flow_target = flow_target.cuda() rgb_target = rgb_target.cuda() self.model.set_input(image, torch.cat([sparse, mask], dim=1), flow_target, rgb_target) loss_dict = self.model.step() for k in loss_dict.keys(): recorder[k].update(utils.reduce_tensors(loss_dict[k]).item()) btime_rec.update(time.time() - end) end = time.time() # logging if self.rank == 0 and self.curr_step % self.args.trainer['print_freq'] == 0: loss_str = "" if self.tb_logger is not None: self.tb_logger.add_scalar('npts', npts_rec.avg, self.curr_step) self.tb_logger.add_scalar('lr', curr_lr, self.curr_step) for k in recorder.keys(): if self.tb_logger is not None: self.tb_logger.add_scalar('train_{}'.format(k), recorder[k].avg, self.curr_step + 1) loss_str += '{}: {loss.val:.4g} ({loss.avg:.4g})\t'.format(k, loss=recorder[k]) self.logger.info('Iter: [{0}/{1}]\t'.format(self.curr_step, len(self.train_loader)) + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format(batch_time=btime_rec) + 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(data_time=dtime_rec) + loss_str + 'NPts {num_pts.val} ({num_pts.avg:.1f})\t'.format(num_pts=npts_rec) + 'lr {lr:.2g}'.format(lr=curr_lr)) # validate if (self.curr_step + 1) % self.args.trainer['val_freq'] == 0 or (self.curr_step + 1) == self.args.model['total_iter']: self.validate('on_val') # save if self.rank == 0 and ((self.curr_step + 1) % self.args.trainer['save_freq'] == 0 or (self.curr_step + 1) == self.args.model['total_iter']): self.model.save_state("{}/checkpoints".format(self.args.exp_path), self.curr_step + 1)
def train(self): btime_rec = utils.AverageMeter(10) dtime_rec = utils.AverageMeter(10) recorder = {} for rec in self.args.trainer['loss_record']: recorder[rec] = utils.AverageMeter(10) self.model.switch_to('train') end = time.time() total = len(self.train_loader) bar = ProgressBar(total, max_width=80) running_loss = [] for i, inputs in enumerate(self.train_loader): bar.numerator = i + 1 if self.rank == 0: print(bar, end='\r') self.curr_step = self.start_iter + i self.lr_scheduler.step(self.curr_step) curr_lr = self.lr_scheduler.get_lr()[0] # measure data loading time dtime_rec.update(time.time() - end) self.model.set_input(*inputs) loss_dict = self.model.step() for k in loss_dict.keys(): recorder[k].update(utils.reduce_tensors(loss_dict[k]).item()) btime_rec.update(time.time() - end) end = time.time() self.curr_step += 1 # logging if self.rank == 0 and self.curr_step % self.args.trainer[ 'print_freq'] == 0: loss_str = "" if self.tb_logger is not None: self.tb_logger.add_scalar('lr', curr_lr, self.curr_step) for k in recorder.keys(): if self.tb_logger is not None: self.tb_logger.add_scalar('train_{}'.format(k), recorder[k].avg, self.curr_step) loss_str += '{}: {loss.val:.4g} ({loss.avg:.4g})\t'.format( k, loss=recorder[k]) print(Style.BRIGHT + Fore.CYAN + 'Iter: [{0}/{1}]\t'.format( self.curr_step, len(self.train_loader)) + loss_str + 'lr {lr:.2g}'.format(lr=curr_lr)) # save if (self.rank == 0 and (self.curr_step % self.args.trainer['save_freq'] == 0 or self.curr_step == self.args.model['total_iter'])): self.model.save_state( "{}/checkpoints".format(self.args.exp_path), self.curr_step) if (self.curr_step == self.args.model['total_iter']): break