def train_by_epoch(self): tqdm_batch = tqdm(self.dataloader, total=self.total_iter, desc='epoch-{}'.format(self.epoch)) avg_loss = AverageMeter() for curr_it, data in enumerate(tqdm_batch): self.reg.train() edge = data['edge'].float().cuda(async=self.config.async_loading) corner = data['corner'].float().cuda( async=self.config.async_loading) box = data['box'].float().cuda(async=self.config.async_loading) reg_out = self.reg(torch.cat((edge, corner), dim=1)) loss = self.mse(reg_out, box) self.opt.zero_grad() loss.backward() self.opt.step() avg_loss.update(loss) tqdm_batch.close() self.summary_writer.add_scalar('reg/loss', avg_loss.val, self.epoch) self.scheduler.step(avg_loss.val) self.logger.warning('info - lr: {}, loss: {}'.format( get_lr(self.opt), avg_loss.val))
def train_by_epoch(self): tqdm_batch = tqdm(self.dataloader, total=self.total_iter, desc='epoch-{}'.format(self.epoch)) avg_loss = AverageMeter() corner, edge, out = None, None, None for curr_it, data in enumerate(tqdm_batch): self.encoder.train() self.edge.train() self.corner.train() img = data['img'].float().cuda(async=self.config.async_loading) line = data['line'].float().cuda(async=self.config.async_loading) edge = data['edge'].float().cuda(async=self.config.async_loading) corner = data['corner'].float().cuda(async=self.config.async_loading) encoder_out_list = self.encoder(torch.cat((img, line), dim=1)) edge_out_list, edge_out = self.edge(encoder_out_list) corner_out = self.corner(edge_out_list) loss = self.bce(edge_out, edge) loss[edge > 0.] *= 4 loss = loss.mean() c_loss = self.bce(corner_out, corner) c_loss[corner > 0.] *= 4 loss += c_loss.mean() out = (edge_out, corner_out) self.opt.zero_grad() loss.backward() nn.utils.clip_grad_norm_(chain(self.encoder.parameters(), self.edge.parameters(), self.corner.parameters()), 3.0, norm_type='inf') self.opt.step() avg_loss.update(loss) tqdm_batch.close() self.summary_writer.add_image('corner/edge_origin 1', edge[0], self.epoch) self.summary_writer.add_image('corner/edge_origin 2', edge[1], self.epoch) self.summary_writer.add_image('corner/edge_train 1', out[0][0], self.epoch) self.summary_writer.add_image('corner/edge_train 2', out[0][1], self.epoch) self.summary_writer.add_image('corner/corner_origin 1', corner[0], self.epoch) self.summary_writer.add_image('corner/corner_origin 2', corner[1], self.epoch) self.summary_writer.add_image('corner/corner_train 1', out[1][0], self.epoch) self.summary_writer.add_image('corner/corner_train 2', out[1][1], self.epoch) self.summary_writer.add_scalar('corner/loss', avg_loss.val, self.epoch) self.scheduler.step(avg_loss.val) self.logger.warning('info - lr: {}, loss: {}'.format(get_lr(self.opt), avg_loss.val))
def train(cfg): batch_size = int(cfg['batch_size']) n_epochs = int(cfg['n_epochs']) sample_size = int(cfg['fixed_size']) #### DATA LOADING trans_train = [] trans_val = [] if cfg['rnd_sampling']: trans_train.append(RndSampling(sample_size, maintain_prop=False)) #prop_vector=[1, 1])) trans_val.append(RndSampling(sample_size, maintain_prop=False)) dataset, dataloader = get_dataset(cfg, trans=trans_train) val_dataset, val_dataloader = get_dataset(cfg, trans=trans_val, train=False) # summary for tensorboard writer = create_tb_logger(cfg) dump_code(cfg, writer.logdir) #### BUILD THE MODEL classifier = get_model(cfg) if cfg['verbose']: print(classifier) #### SET THE TRAINING optimizer = get_optimizer(cfg, classifier) lr_scheduler = get_lr_scheduler(cfg, optimizer) classifier.cuda() num_batch = len(dataset) / batch_size print('num of batches per epoch: %d' % num_batch) cfg['num_batch'] = num_batch n_iter = 0 #best_pred = 0 best_pred = 10 best_epoch = 0 current_lr = float(cfg['learning_rate']) for epoch in range(n_epochs + 1): # update bn decay if cfg['bn_decay'] and epoch != 0 and epoch % int( cfg['bn_decay_step']) == 0: update_bn_decay(cfg, classifier, epoch) loss, n_iter = train_ep(cfg, dataloader, classifier, optimizer, writer, epoch, n_iter) ### validation during training if epoch % int(cfg['val_freq']) == 0 and cfg['val_in_train']: best_epoch, best_pred = val_ep(cfg, val_dataloader, classifier, writer, epoch, best_epoch, best_pred) # update lr if cfg['lr_type'] == 'step' and current_lr >= float(cfg['min_lr']): lr_scheduler.step() if cfg['lr_type'] == 'plateau': lr_scheduler.step(loss) current_lr = get_lr(optimizer) writer.add_scalar('train/lr', current_lr, epoch) writer.close()