# Generator g_optimizer.zero_grad() fake_score_out = d_net(f_net(fake)) fake_label_out = c_net(f_net(fake)) GD_fake_loss = F.binary_cross_entropy(torch.sigmoid(fake_score_out), real_label) GC_fake_loss = F.nll_loss(F.log_softmax(fake_label_out, 1), voice_label) (GD_fake_loss + GC_fake_loss).backward() GD_fake.update(GD_fake_loss.item()) GC_fake.update(GC_fake_loss.item()) g_optimizer.step() batch_time.update(time.time() - start_time) # print status if it % 200 == 0: print(iteration, data_time, batch_time, D_real, D_fake, C_real, GD_fake, GC_fake) data_time.reset() batch_time.reset() D_real.reset() D_fake.reset() C_real.reset() GD_fake.reset() GC_fake.reset() # snapshot save_model(g_net, NETWORKS_PARAMETERS['g']['model_path']) iteration.update(1)
def train_model(model, train_loader, dev_loader, optimizer, criterion, num_classes, target_classes, label_encoder, device): # create to Meter's classes to track the performance of the model during training and evaluating train_meter = Meter(target_classes) dev_meter = Meter(target_classes) best_f1 = -1 # epoch loop for epoch in range(args.epochs): train_tqdm = tqdm(train_loader) dev_tqdm = tqdm(dev_loader) model.train() # train loop for i, (train_x, train_y, mask, crf_mask) in enumerate(train_tqdm): # get the logits and update the gradients optimizer.zero_grad() logits = model.forward(train_x, mask) if args.no_crf: loss = criterion(logits.reshape(-1, num_classes).to(device), train_y.reshape(-1).to(device)) else: loss = - criterion(logits.to(device), train_y, reduction="token_mean", mask=crf_mask) loss.backward() optimizer.step() # get the current metrics (average over all the train) loss, _, _, micro_f1, _, _, macro_f1 = train_meter.update_params(loss.item(), logits, train_y) # print the metrics train_tqdm.set_description("Epoch: {}/{}, Train Loss: {:.4f}, Train Micro F1: {:.4f}, Train Macro F1: {:.4f}". format(epoch + 1, args.epochs, loss, micro_f1, macro_f1)) train_tqdm.refresh() # reset the metrics to 0 train_meter.reset() model.eval() # evaluation loop -> mostly same as the training loop, but without updating the parameters for i, (dev_x, dev_y, mask, crf_mask) in enumerate(dev_tqdm): logits = model.forward(dev_x, mask) if args.no_crf: loss = criterion(logits.reshape(-1, num_classes).to(device), dev_y.reshape(-1).to(device)) else: loss = - criterion(logits.to(device), dev_y, reduction="token_mean", mask=crf_mask) loss, _, _, micro_f1, _, _, macro_f1 = dev_meter.update_params(loss.item(), logits, dev_y) dev_tqdm.set_description("Dev Loss: {:.4f}, Dev Micro F1: {:.4f}, Dev Macro F1: {:.4f}". format(loss, micro_f1, macro_f1)) dev_tqdm.refresh() dev_meter.reset() # if the current macro F1 score is the best one -> save the model if macro_f1 > best_f1: if not os.path.exists(args.save_path): os.makedirs(args.save_path) print("Macro F1 score improved from {:.4f} -> {:.4f}. Saving model...".format(best_f1, macro_f1)) best_f1 = macro_f1 torch.save(model, os.path.join(args.save_path, "model.pt")) with open(os.path.join(args.save_path, "label_encoder.pk"), "wb") as file: pickle.dump(label_encoder, file)
]) writer.add_scalars( 'data/scalar_group', { "D_real": D_real_loss, "D_fake": D_fake_loss, "C1_real_loss": C1_real_loss, "C2_real_loss": C2_real_loss, "C1_fake_loss": GC1_fake_loss, "C2_fake_loss": GC2_fake_loss, "GD_fake_loss": GD_fake_loss }, it) # info = {'image/real_images': real_images(face, 8), 'image/generated_images': generate_img(fake_face, 8)} # writer.add_images('image/generated_images', generate_img(fake_face, 8), it) batch_time.reset() D_real.reset() D_fake.reset() C1_real.reset() C2_real.reset() C1_fake.reset() C2_fake.reset() GD_fake.reset() # snapshot if it % 2000 == 0: s_time = time.strftime("%m-%d,%H,%M") + '-' + str(it) + '-' # save_model(e_net, 'models/voice_embedding/{}voice_embedding.pth'.format(s_time)) save_model(g_net, 'models/generator/{}generator.pth'.format(s_time)) # save_model(d1_net, 'models/discriminator/{}discriminator.pth'.format(s_time))
def train_model(model, train_loader, dev_loader, optimizer, criterion, num_classes, target_classes, it, label_encoder, device): # create to Meter's classes to track the performance of the model during training and evaluating train_meter = Meter(target_classes) dev_meter = Meter(target_classes) best_f1 = 0 loss, macro_f1 = 0, 0 total_steps = len(train_loader) * args.epochs scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, # Default value in run_glue.py num_training_steps=total_steps) curr_patience = 0 # epoch loop for epoch in range(args.epochs): train_tqdm = tqdm(train_loader, leave=False) model.train() # train loop for i, (train_x, train_y, mask) in enumerate(train_tqdm): train_tqdm.set_description( " Training - Epoch: {}/{}, Loss: {:.4f}, F1: {:.4f}, Best F1: {:.4f}" .format(epoch + 1, args.epochs, loss, macro_f1, best_f1)) train_tqdm.refresh() # get the logits and update the gradients optimizer.zero_grad() logits = model.forward(train_x, mask) loss = criterion( logits.reshape(-1, num_classes).to(device), train_y.reshape(-1).to(device)) loss.backward() optimizer.step() if args.fine_tune: scheduler.step() # get the current metrics (average over all the train) loss, _, _, _, _, _, macro_f1 = train_meter.update_params( loss.item(), logits, train_y) # reset the metrics to 0 train_meter.reset() dev_tqdm = tqdm(dev_loader, leave=False) model.eval() loss, macro_f1 = 0, 0 # evaluation loop -> mostly same as the training loop, but without updating the parameters for i, (dev_x, dev_y, mask) in enumerate(dev_tqdm): dev_tqdm.set_description( " Evaluating - Epoch: {}/{}, Loss: {:.4f}, F1: {:.4f}, Best F1: {:.4f}" .format(epoch + 1, args.epochs, loss, macro_f1, best_f1)) dev_tqdm.refresh() logits = model.forward(dev_x, mask) loss = criterion( logits.reshape(-1, num_classes).to(device), dev_y.reshape(-1).to(device)) loss, _, _, micro_f1, _, _, macro_f1 = dev_meter.update_params( loss.item(), logits, dev_y) dev_meter.reset() # if the current macro F1 score is the best one -> save the model if macro_f1 > best_f1: curr_patience = 0 best_f1 = macro_f1 torch.save( model, os.path.join(args.save_path, "model_{}.pt".format(it + 1))) with open(os.path.join(args.save_path, "label_encoder.pk"), "wb") as file: pickle.dump(label_encoder, file) else: curr_patience += 1 if curr_patience > args.patience: break return best_f1
class Trainer(object): def __init__(self, data_loader, model, optimizer, loss_fn, debug=False, cuda=False, checkpoint_dir='checkpoints', best_model_filename='best_model.pt'): self._data_loader = data_loader self._loss_fn = loss_fn self.data_loader = None self.loss_fn = None self.model = model self.optimizer = optimizer self.visualizer = SegmentationVisualizer() self.train_loss_meter = Meter('Loss/train') self.train_iou_meter = Meter('IoU/train') self.val_loss_meter = Meter('Loss/val') self.val_iou_meter = Meter('IoU/val') self.checkpoint_dir = checkpoint_dir self.best_model_filename = best_model_filename self.debug = debug self.cuda = cuda self.best_iou = 0 self._set_epoch(0) def _set_epoch(self, epoch): if epoch in self._data_loader: print('Switching data loaders') self.data_loader = self._data_loader[epoch] if epoch in self._loss_fn: print('Switching loss function') self.loss_fn = self._loss_fn[epoch] def train_one_epoch(self, epoch): self._set_epoch(epoch) if self.cuda and torch.cuda.is_initialized(): self.model = self.model.cuda() self.loss_fn = self.loss_fn.cuda() self.model.train() self.train_loss_meter.reset() self.train_iou_meter.reset() for i, (src, dst) in enumerate(tqdm(self.data_loader['train'], leave=False)): if self.cuda and torch.cuda.is_initialized(): dst = dst.cuda(non_blocking=True) src = src.cuda(non_blocking=True) self.optimizer.zero_grad() y_head = self.model(src) loss = self.loss_fn(y_head, dst) loss.backward() self.optimizer.step() self.train_loss_meter(loss.item()) self.train_iou_meter( iou_binary((y_head.detach() > 0), dst.detach())) if i % 100 == 0: step = epoch * len(self.data_loader['train']) + i data = { 'loss': self.train_loss_meter.value(), 'accuracy': self.train_iou_meter.value() } self.visualizer.add_scalars(data, step, prefix='train_') if self.debug and i == 0: images = { 'images': src, 'gt_masks': dst, 'masks': y_head.detach() > 0 } self.visualizer.add_images(images, epoch, prefix='train_') print( f'\tFinal {self.train_loss_meter.name}:\t{self.train_loss_meter.mean():.4f}\t', f'final {self.train_iou_meter.name}:\t{self.train_iou_meter.mean():.4f}' ) def validate(self, epoch): self._set_epoch(epoch) self.model.eval() self.val_loss_meter.reset() self.val_iou_meter.reset() for i, (src, dst) in enumerate(tqdm(self.data_loader['val'], leave=False)): if self.cuda and torch.cuda.is_available(): dst = dst.cuda(non_blocking=True) src = src.cuda(non_blocking=True) with torch.no_grad(): y_head = self.model(src) loss = self.loss_fn(y_head, dst) self.val_loss_meter(loss.item()) self.val_iou_meter(iou_binary(y_head.detach() > 0, dst.detach())) if self.debug and i == 0 and epoch % 50 == 0: images = { 'images': src, 'gt_masks': dst, 'masks': y_head.detach() > 0 } self.visualizer.add_images(images, epoch, prefix='val_') data = { 'loss': self.val_loss_meter.mean(), 'accuracy': self.val_iou_meter.mean() } self.visualizer.add_scalars(data, epoch, prefix='val_') print( f'\tFinal {self.val_loss_meter.name}:\t\t{self.val_loss_meter.mean():.4f}\t', f'final {self.val_iou_meter.name}:\t\t{self.val_iou_meter.mean():.4f}' ) self.save_best_model() @property def best_model_checkpoint_filepath(self): return osp.join(self.checkpoint_dir, self.best_model_filename) def load_previous_best_model(self): device = torch.device('cpu') state_dict = torch.load(self.best_model_checkpoint_filepath, map_location=device) self.model.load_state_dict(state_dict) def save_best_model(self): if self.val_iou_meter.mean() > self.best_iou: print( f'Updating best model @{self.val_iou_meter.name}:{self.val_iou_meter.mean():.04f}' ) self.best_iou = self.val_iou_meter.mean() torch.save(self.model.state_dict(), self.best_model_checkpoint_filepath)