class FCNSegmentor(object): """ The class for Pose Estimation. Include train, val, val & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.seg_visualizer = SegVisualizer(configer) self.seg_loss_manager = SegLossManager(configer) self.module_utilizer = ModuleUtilizer(configer) self.seg_model_manager = SegModelManager(configer) self.seg_data_loader = SegDataLoader(configer) self.seg_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.lr = None self.iters = None def init_model(self): self.seg_net = self.seg_model_manager.seg_net() self.iters = 0 self.seg_net, _ = self.module_utilizer.load_net(self.seg_net) self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.seg_net, self.iters) if self.configer.get('dataset') == 'cityscape': self.train_loader = self.seg_data_loader.get_trainloader(FSCityScapeLoader) self.val_loader = self.seg_data_loader.get_valloader(FSCityScapeLoader) else: Log.error('Dataset: {} is not valid!'.format(self.configer.get('dataset'))) exit(1) self.pixel_loss = self.seg_loss_manager.get_seg_loss('cross_entropy_loss') def __train(self): """ Train function of every epoch during train phase. """ self.seg_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) targets = Variable(data_tuple[1].cuda(async=True)) # Forward pass. outputs = self.seg_net(inputs) # Compute the loss of the train batch & backward. loss_pixel = self.pixel_loss(outputs, targets) loss = loss_pixel self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info('Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.seg_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.seg_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) targets = Variable(data_tuple[1].cuda(async=True), volatile=True) # Forward pass. outputs = self.seg_net(inputs) # Compute the loss of the val batch. loss_pixel = self.pixel_loss(outputs, targets) loss = loss_pixel self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.module_utilizer.save_net(self.seg_net, self.iters) # Print the log info & reset the states. Log.info( 'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format( batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.seg_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break
class ConvPoseMachine(object): """ The class for Pose Estimation. Include train, val, val & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.pose_visualizer = PoseVisualizer(configer) self.loss_manager = PoseLossManager(configer) self.model_manager = PoseModelManager(configer) self.train_utilizer = ModuleUtilizer(configer) self.pose_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.best_model_loss = None self.is_best = None self.lr = None self.iters = None def init_model(self, train_loader=None, val_loader=None): self.pose_net = self.model_manager.pose_detector() self.pose_net, self.iters = self.train_utilizer.load_net(self.pose_net) self.optimizer = self.train_utilizer.update_optimizer(self.pose_net, self.iters) self.train_loader = train_loader self.val_loader = val_loader self.heatmap_loss = self.loss_manager.get_pose_loss('heatmap_loss') def __train(self): """ Train function of every epoch during train phase. """ self.pose_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) heatmap = Variable(data_tuple[1].cuda(async=True)) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True)) self.pose_visualizer.vis_tensor(heatmap, name='heatmap') self.pose_visualizer.vis_tensor((inputs*256+128)/255, name='image') # Forward pass. outputs = self.pose_net(inputs) self.pose_visualizer.vis_tensor(outputs, name='output') self.pose_visualizer.vis_peaks(inputs, outputs, name='peak') # Compute the loss of the train batch & backward. loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap) loss = loss_heatmap self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info('Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() self.optimizer = self.train_utilizer.update_optimizer(self.pose_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.pose_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True), volatile=True) # Forward pass. outputs = self.pose_net(inputs) self.pose_visualizer.vis_peaks(inputs, outputs, name='peak_val') # Compute the loss of the val batch. loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap) loss = loss_heatmap self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() # Print the log info & reset the states. Log.info( 'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format( batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.pose_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break def test(self, img_path=None, img_dir=None): if img_path is not None and os.path.exists(img_path): image = Image.open(img_path).convert('RGB')
class Trainer(CheckpointRunner): # noinspection PyAttributeOutsideInit def init_fn(self, shared_model=None, **kwargs): if self.options.model.name == "pixel2mesh": # Visualization renderer self.renderer = MeshRenderer(self.options.dataset.camera_f, self.options.dataset.camera_c, self.options.dataset.mesh_pos) # create ellipsoid self.ellipsoid = Ellipsoid(self.options.dataset.mesh_pos) else: self.renderer = None if shared_model is not None: self.model = shared_model else: if self.options.model.name == "pixel2mesh": # create model self.model = P2MModel(self.options.model, self.ellipsoid, self.options.dataset.camera_f, self.options.dataset.camera_c, self.options.dataset.mesh_pos) elif self.options.model.name == "classifier": self.model = Classifier(self.options.model, self.options.dataset.num_classes) else: raise NotImplementedError("Your model is not found") self.model = torch.nn.DataParallel(self.model, device_ids=self.gpus).cuda() # Setup a joint optimizer for the 2 models if self.options.optim.name == "adam": self.optimizer = torch.optim.Adam( params=list(self.model.parameters()), lr=self.options.optim.lr, betas=(self.options.optim.adam_beta1, 0.999), weight_decay=self.options.optim.wd ) elif self.options.optim.name == "sgd": self.optimizer = torch.optim.SGD( params=list(self.model.parameters()), lr=self.options.optim.lr, momentum=self.options.optim.sgd_momentum, weight_decay=self.options.optim.wd ) else: raise NotImplementedError("Your optimizer is not found") self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( self.optimizer, self.options.optim.lr_step, self.options.optim.lr_factor ) # Create loss functions if self.options.model.name == "pixel2mesh": self.criterion = P2MLoss(self.options.loss, self.ellipsoid).cuda() elif self.options.model.name == "classifier": self.criterion = CrossEntropyLoss() else: raise NotImplementedError("Your loss is not found") # Create AverageMeters for losses self.losses = AverageMeter() # Evaluators self.evaluators = [Evaluator(self.options, self.logger, self.summary_writer, shared_model=self.model)] def models_dict(self): return {'model': self.model} def optimizers_dict(self): return {'optimizer': self.optimizer, 'lr_scheduler': self.lr_scheduler} def train_step(self, input_batch): self.model.train() # Grab data from the batch images = input_batch["images"] # predict with model out = self.model(images) # compute loss loss, loss_summary = self.criterion(out, input_batch) self.losses.update(loss.detach().cpu().item()) # Do backprop self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Pack output arguments to be used for visualization return recursive_detach(out), recursive_detach(loss_summary) def train(self): # Run training for num_epochs epochs for epoch in range(self.epoch_count, self.options.train.num_epochs): self.epoch_count += 1 # Create a new data loader for every epoch train_data_loader = DataLoader(self.dataset, batch_size=self.options.train.batch_size * self.options.num_gpus, num_workers=self.options.num_workers, pin_memory=self.options.pin_memory, shuffle=self.options.train.shuffle, collate_fn=self.dataset_collate_fn) # Reset loss self.losses.reset() # Iterate over all batches in an epoch for step, batch in enumerate(train_data_loader): # Send input to GPU batch = {k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in batch.items()} # Run training step out = self.train_step(batch) self.step_count += 1 # Tensorboard logging every summary_steps steps if self.step_count % self.options.train.summary_steps == 0: self.train_summaries(batch, *out) # Save checkpoint every checkpoint_steps steps if self.step_count % self.options.train.checkpoint_steps == 0: self.dump_checkpoint() # save checkpoint after each epoch self.dump_checkpoint() # Run validation every test_epochs if self.epoch_count % self.options.train.test_epochs == 0: self.test() # lr scheduler step self.lr_scheduler.step() def train_summaries(self, input_batch, out_summary, loss_summary): if self.renderer is not None: # Do visualization for the first 2 images of the batch render_mesh = self.renderer.p2m_batch_visualize(input_batch, out_summary, self.ellipsoid.faces) self.summary_writer.add_image("render_mesh", render_mesh, self.step_count) self.summary_writer.add_histogram("length_distribution", input_batch["length"].cpu().numpy(), self.step_count) # Debug info for filenames self.logger.debug(input_batch["filename"]) # Save results in Tensorboard for k, v in loss_summary.items(): self.summary_writer.add_scalar(k, v, self.step_count) # Save results to log self.logger.info("Epoch %03d, Step %06d/%06d, Time elapsed %s, Loss %.9f (%.9f)" % ( self.epoch_count, self.step_count, self.options.train.num_epochs * len(self.dataset) // ( self.options.train.batch_size * self.options.num_gpus), self.time_elapsed, self.losses.val, self.losses.avg)) def test(self): for evaluator in self.evaluators: evaluator.evaluate()
class FCNSegmentor(object): """ The class for Pose Estimation. Include train, val, val & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.seg_visualizer = SegVisualizer(configer) self.seg_loss_manager = SegLossManager(configer) self.module_utilizer = ModuleUtilizer(configer) self.seg_model_manager = SegModelManager(configer) self.seg_data_loader = SegDataLoader(configer) self.seg_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.lr = None self.iters = None def init_model(self): self.seg_net = self.seg_model_manager.seg_net() self.iters = 0 self.seg_net, _ = self.module_utilizer.load_net(self.seg_net) self.optimizer, self.lr = self.module_utilizer.update_optimizer( self.seg_net, self.iters) if self.configer.get('dataset') == 'cityscape': self.train_loader = self.seg_data_loader.get_trainloader( FSCityScapeLoader) self.val_loader = self.seg_data_loader.get_valloader( FSCityScapeLoader) else: Log.error('Dataset: {} is not valid!'.format( self.configer.get('dataset'))) exit(1) self.pixel_loss = self.seg_loss_manager.get_seg_loss( 'cross_entropy_loss') def __train(self): """ Train function of every epoch during train phase. """ self.seg_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) targets = Variable(data_tuple[1].cuda(async=True)) # Forward pass. outputs = self.seg_net(inputs) # Compute the loss of the train batch & backward. loss_pixel = self.pixel_loss(outputs, targets) loss = loss_pixel self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info( 'Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() self.optimizer, self.lr = self.module_utilizer.update_optimizer( self.seg_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.seg_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) targets = Variable(data_tuple[1].cuda(async=True), volatile=True) # Forward pass. outputs = self.seg_net(inputs) # Compute the loss of the val batch. loss_pixel = self.pixel_loss(outputs, targets) loss = loss_pixel self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.module_utilizer.save_net(self.seg_net, self.iters) # Print the log info & reset the states. Log.info('Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format(batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.seg_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break
class OpenPose(object): """ The class for Pose Estimation. Include train, val, test & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.vis = PoseVisualizer(configer) self.loss_manager = PoseLossManager(configer) self.model_manager = PoseModelManager(configer) self.data_loader = PoseDataLoader(configer) self.module_utilizer = ModuleUtilizer(configer) self.pose_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.lr = None self.iters = None def init_model(self): self.pose_net = self.model_manager.pose_detector() self.iters = 0 self.pose_net, _ = self.module_utilizer.load_net(self.pose_net) self.optimizer, self.lr = self.module_utilizer.update_optimizer( self.pose_net, self.iters) if self.configer.get('dataset') == 'coco': self.train_loader = self.data_loader.get_trainloader(OPCocoLoader) self.val_loader = self.data_loader.get_valloader(OPCocoLoader) else: Log.error('Dataset: {} is not valid!'.format( self.configer.get('dataset'))) exit(1) self.mse_loss = self.loss_manager.get_pose_loss('mse_loss') def __train(self): """ Train function of every epoch during train phase. """ self.pose_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, vecmap) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) heatmap = Variable(data_tuple[1].cuda(async=True)) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True)) # Forward pass. paf_out, heatmap_out = self.pose_net(inputs) self.vis.vis_paf(paf_out, inputs.data.cpu().squeeze().numpy().transpose( 1, 2, 0), name='paf_out') # Compute the loss of the train batch & backward. loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap) loss = loss_heatmap if len(data_tuple) > 3: vecmap = Variable(data_tuple[3].cuda(async=True)) self.vis.vis_paf(vecmap, inputs.data.cpu().squeeze().numpy().transpose( 1, 2, 0), name='paf') loss_associate = self.mse_loss(paf_out, vecmap, maskmap) loss += loss_associate self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info( 'Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() # Adjust the learning rate after every iteration. self.optimizer, self.lr = self.module_utilizer.update_optimizer( self.pose_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.pose_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True), volatile=True) # Forward pass. paf_out, heatmap_out = self.pose_net(inputs) # Compute the loss of the val batch. loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap) loss = loss_heatmap if len(data_tuple) > 3: vecmap = Variable(data_tuple[3].cuda(async=True), volatile=True) loss_associate = self.mse_loss(paf_out, vecmap, maskmap) loss = loss_heatmap + loss_associate self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.module_utilizer.save_net(self.pose_net, self.iters) # Print the log info & reset the states. Log.info('Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format(batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.pose_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break
class ConvPoseMachine(object): """ The class for Pose Estimation. Include train, val, val & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.pose_visualizer = PoseVisualizer(configer) self.loss_manager = PoseLossManager(configer) self.model_manager = PoseModelManager(configer) self.train_utilizer = ModuleUtilizer(configer) self.pose_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.best_model_loss = None self.is_best = None self.lr = None self.iters = None def init_model(self, train_loader=None, val_loader=None): self.pose_net = self.model_manager.pose_detector() self.pose_net, self.iters = self.train_utilizer.load_net(self.pose_net) self.optimizer = self.train_utilizer.update_optimizer( self.pose_net, self.iters) self.train_loader = train_loader self.val_loader = val_loader self.heatmap_loss = self.loss_manager.get_pose_loss('heatmap_loss') def __train(self): """ Train function of every epoch during train phase. """ self.pose_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) heatmap = Variable(data_tuple[1].cuda(async=True)) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True)) self.pose_visualizer.vis_tensor(heatmap, name='heatmap') self.pose_visualizer.vis_tensor((inputs * 256 + 128) / 255, name='image') # Forward pass. outputs = self.pose_net(inputs) self.pose_visualizer.vis_tensor(outputs, name='output') self.pose_visualizer.vis_peaks(inputs, outputs, name='peak') # Compute the loss of the train batch & backward. loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap) loss = loss_heatmap self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info( 'Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() self.optimizer = self.train_utilizer.update_optimizer( self.pose_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.pose_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True), volatile=True) # Forward pass. outputs = self.pose_net(inputs) self.pose_visualizer.vis_peaks(inputs, outputs, name='peak_val') # Compute the loss of the val batch. loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap) loss = loss_heatmap self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() # Print the log info & reset the states. Log.info('Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format(batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.pose_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break def test(self, img_path=None, img_dir=None): if img_path is not None and os.path.exists(img_path): image = Image.open(img_path).convert('RGB')
class OpenPose(object): """ The class for Pose Estimation. Include train, val, test & predict. """ def __init__(self, configer): self.configer = configer self.batch_time = AverageMeter() self.data_time = AverageMeter() self.train_losses = AverageMeter() self.val_losses = AverageMeter() self.vis = PoseVisualizer(configer) self.loss_manager = PoseLossManager(configer) self.model_manager = PoseModelManager(configer) self.data_loader = PoseDataLoader(configer) self.module_utilizer = ModuleUtilizer(configer) self.pose_net = None self.train_loader = None self.val_loader = None self.optimizer = None self.lr = None self.iters = None def init_model(self): self.pose_net = self.model_manager.pose_detector() self.iters = 0 self.pose_net, _ = self.module_utilizer.load_net(self.pose_net) self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.pose_net, self.iters) if self.configer.get('dataset') == 'coco': self.train_loader = self.data_loader.get_trainloader(OPCocoLoader) self.val_loader = self.data_loader.get_valloader(OPCocoLoader) else: Log.error('Dataset: {} is not valid!'.format(self.configer.get('dataset'))) exit(1) self.mse_loss = self.loss_manager.get_pose_loss('mse_loss') def __train(self): """ Train function of every epoch during train phase. """ self.pose_net.train() start_time = time.time() # data_tuple: (inputs, heatmap, maskmap, vecmap) for i, data_tuple in enumerate(self.train_loader): self.data_time.update(time.time() - start_time) # Change the data type. if len(data_tuple) < 2: Log.error('Train Loader Error!') exit(0) inputs = Variable(data_tuple[0].cuda(async=True)) heatmap = Variable(data_tuple[1].cuda(async=True)) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True)) # Forward pass. paf_out, heatmap_out = self.pose_net(inputs) self.vis.vis_paf(paf_out, inputs.data.cpu().squeeze().numpy().transpose(1, 2, 0), name='paf_out') # Compute the loss of the train batch & backward. loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap) loss = loss_heatmap if len(data_tuple) > 3: vecmap = Variable(data_tuple[3].cuda(async=True)) self.vis.vis_paf(vecmap, inputs.data.cpu().squeeze().numpy().transpose(1, 2, 0), name='paf') loss_associate = self.mse_loss(paf_out, vecmap, maskmap) loss += loss_associate self.train_losses.update(loss.data[0], inputs.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.iters += 1 # Print the log info & reset the states. if self.iters % self.configer.get('solver', 'display_iter') == 0: Log.info('Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() # Check to val the current model. if self.val_loader is not None and \ self.iters % self.configer.get('solver', 'test_interval') == 0: self.__val() # Adjust the learning rate after every iteration. self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.pose_net, self.iters) def __val(self): """ Validation function during the train phase. """ self.pose_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True) maskmap = None if len(data_tuple) > 2: maskmap = Variable(data_tuple[2].cuda(async=True), volatile=True) # Forward pass. paf_out, heatmap_out = self.pose_net(inputs) # Compute the loss of the val batch. loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap) loss = loss_heatmap if len(data_tuple) > 3: vecmap = Variable(data_tuple[3].cuda(async=True), volatile=True) loss_associate = self.mse_loss(paf_out, vecmap, maskmap) loss = loss_heatmap + loss_associate self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.module_utilizer.save_net(self.pose_net, self.iters) # Print the log info & reset the states. Log.info( 'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format( batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.pose_net.train() def train(self): cudnn.benchmark = True while self.iters < self.configer.get('solver', 'max_iter'): self.__train() if self.iters == self.configer.get('solver', 'max_iter'): break
class Trainer(CheckpointRunner): # noinspection PyAttributeOutsideInit def init_fn(self, shared_model=None, **kwargs): # Create auxiliary models self.init_auxiliary() if shared_model is not None: self.model = shared_model else: self.model = self.init_model() self.model = DataParallelModel(self.model.cuda(), device_ids=self.gpus) # self.model = torch.nn.DataParallel(self.model, device_ids=self.gpus).cuda() # Setup a joint optimizer for the 2 models self.optimizer = self.init_optimizer(self.options.optim.name) self.lr_scheduler = self.init_lr(self.options.optim.lr_scheduler) # Create loss functions self.criterion = self.init_loss_functions() self.criterion = DataParallelCriterion(self.criterion.cuda(), device_ids=self.gpus) # Create AverageMeters for losses self.losses = AverageMeter() # Evaluators # self.evaluators = [Evaluator(self.options, self.logger, self.summary_writer, shared_model=self.model)] self.dataset_size = None def init_auxiliary(self): pass def init_model(self): raise NotImplementedError("Your model is not found") def init_loss_functions(self): raise NotImplementedError("Your loss is not found") def init_optimizer(self, optim_name): if optim_name == "adam": optimizer = torch.optim.Adam(params=list(self.model.parameters()), lr=self.options.optim.lr, betas=(self.options.optim.adam_beta1, 0.999), weight_decay=self.options.optim.wd) elif optim_name == "sgd": optimizer = torch.optim.SGD( params=list(self.model.parameters()), lr=self.options.optim.lr, momentum=self.options.optim.sgd_momentum, weight_decay=self.options.optim.wd) elif optim_name == "adam_gan": optimizer_d = torch.optim.Adam( params=list(self.model.module.D.parameters()), lr=self.options.optim.lr_d, betas=(self.options.optim.adam_beta1, 0.999), weight_decay=0) optimizer_g = torch.optim.Adam( params=list(self.model.module.G.parameters()), lr=self.options.optim.lr_g, betas=(self.options.optim.adam_beta1, 0.999), weight_decay=0) return {"optimizer_d": optimizer_d, "optimizer_g": optimizer_g} else: raise NotImplementedError("Your optimizer is not found") return optimizer def init_lr(self, lr_scheduler_name): if lr_scheduler_name == "multistep": lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( self.optimizer, self.options.optim.lr_step, self.options.optim.lr_factor) elif lr_scheduler_name == "exp": lr_scheduler = torch.optim.lr_scheduler.ExponentialLR( self.optimizer, gamma=self.options.optim.lr_gamma) elif lr_scheduler_name == "multistep_gan": lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( self.optimizer["optimizer_d"], self.options.optim.lr_step, self.options.optim.lr_factor) else: r_scheduler = None return lr_scheduler def models_dict(self): return {'model': self.model} def optimizers_dict(self): return {'optimizer': self.optimizer, 'lr_scheduler': self.lr_scheduler} def train_step(self, input_batch): # Grab data from the batch, predict with model out = self.model(input_batch) # compute loss loss, loss_summary = self.criterion(out, input_batch) self.losses.update(loss.detach().cpu().item()) # Do backprop self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Pack output arguments to be used for visualization return recursive_detach(out), recursive_detach(loss_summary) def get_dataloader(self): data_loader = DataLoader(self.dataset, batch_size=self.options.train.batch_size * self.options.num_gpus, num_workers=self.options.num_workers, pin_memory=self.options.pin_memory, shuffle=self.options.train.shuffle) return data_loader def train(self): self.logger.info("Start Trainning.") # Create data loader at very begining train_data_loader = self.get_dataloader() self.dataset_size = len(train_data_loader) # Run training for num_epochs epochs for epoch in range(self.epoch_count, self.options.train.num_epochs): self.epoch_count += 1 # Reset loss self.losses.reset() # Iterate over all batches in an epoch for step, batch in enumerate(train_data_loader): # Send input to GPU batch = { k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in batch.items() } # Run training step out = self.train_step(batch) self.step_count += 1 # Tensorboard logging every summary_steps steps if self.step_count % self.options.train.summary_steps == 0: self.train_summaries(batch, *out) # Save checkpoint every checkpoint_steps steps if self.step_count % self.options.train.checkpoint_steps == 0: self.dump_checkpoint() if not self.options.model.name.endswith('gan'): self.dump_checkpoint() if self.lr_scheduler is not None: self.lr_scheduler.step() def train_summaries(self, input_batch, out_summary, loss_summary): # Debug info for filenames self.logger.debug(input_batch["filename"]) # Save results in Tensorboard self.tensorboard_step(loss_summary) # Save results to log self.log_step(loss_summary) def log_step(self, loss_summary): self.logger.info( "Epoch %03d, Step %06d/%06d, Time elapsed %s, Loss %.5f (AvgLoss %.5f)" % (self.epoch_count, self.step_count, self.options.train.num_epochs * len(self.dataset) // (self.options.train.batch_size * self.options.num_gpus), self.time_elapsed, self.losses.val, self.losses.avg)) def tensorboard_step(self, loss_summary): for k, v in loss_summary.items(): self.summary_writer.add_scalar(k, v, self.step_count) def init_with_pretrained_backbone(self): checkpoint_file = os.path.abspath( self.options.train.backbone_pretrained_model) pretrained_dict = torch.load(checkpoint_file) self.model.module.load_state_dict(pretrained_dict, strict=False) self.logger.info("Init with pre-trained backbone from %s." % checkpoint_file) def test(self): self.model.eval() for evaluator in self.evaluators: evaluator.evaluate() self.model.train()