def finetune_first_image(model, images, targets, optimizer,scheduler, logger, cfg): total_iter_finetune = cfg.FINETUNE.TOTAL_ITER model.train() meters = MetricLogger(delimiter=" ") for iteration in range(total_iter_finetune): scheduler.step() loss_dict, _ = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(total_loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() meters.update(lr=optimizer.param_groups[0]["lr"]) if iteration % (total_iter_finetune / 2) == 0 : logger.info( meters.delimiter.join( [ "{meters}", ] ).format( meters=str(meters), ) ) model.eval() return model
def test_update(self): meter = MetricLogger() for i in range(10): meter.update(metric=float(i)) m = meter.meters["metric"] self.assertEqual(m.count, 10) self.assertEqual(m.total, 45) self.assertEqual(m.median, 4) self.assertEqual(m.avg, 4.5)
def evaluator(cfg,args,model,device,iteration): meters_val = MetricLogger(delimiter=" ") data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=False)[0] with torch.no_grad(): # Should be one image for each GPU: print('Calculating evaluation loss.') for iteration_val, batch in enumerate(data_loader_val): #if is_main_process(): # print(iteration_val) if args.debug and iteration_val>10: break images_val, targets_val, _ = batch skip_batch=False nbox=[] for t in targets_val: nbox.append(len(t)) if len(t)<1: skip_batch=True break if skip_batch: continue try: print(iteration_val,nbox) images_val = images_val.to(device) targets_val = [target.to(device) for target in targets_val] loss_dict = model(images_val, targets_val) losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters_val.update(loss=losses_reduced, **loss_dict_reduced) except: print('Warning: ground truth error.') #synchronize() if is_main_process(): print('Save evaluation loss to tensorboard.') for name, meter in meters_val.meters.items(): print(name,meter.global_avg) args.writer.add_scalar('EvalMetrics/'+name, meter.global_avg, iteration / args.iters_per_epoch) print('Pass')
def do_val(model=None, data_loader_val=None, device=None): logger = logging.getLogger("maskrcnn_benchmark.trainer") meters = MetricLogger(delimiter=" ") for images, targets, _ in data_loader_val: images = images.to(device) targets = [target.to(device) for target in targets] with torch.no_grad(): loss_dict = model(images, targets) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) logger.info(meters.delimiter.join(["Val {meters}"]).format(meters=meters.str_avg(),)) return meters
def validation(model, data_loader, device, logger, tensorboard_logger, iteration): logger.info('-' * 40) logger.info("Start Validation") meters = MetricLogger(delimiter=" ") start_validation_time = time.time() max_iter = len(data_loader) for idx, batch in enumerate(tqdm(data_loader)): images, targets, _ = batch images = images.to(device) targets = [target.to(device) for target in targets] with torch.no_grad(): loss_dict, _ = model(images, targets) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(total_loss=losses_reduced, **loss_dict_reduced) tensorboard_logger.write(meters, iteration, phase='Valid') logger.info('Validation:') logger.info( meters.delimiter.join([ "iter: {iter}", "{meters}", "max mem: {memory:.0f}", ]).format( iter=iteration, meters=str(meters), memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) total_validation_time = time.time() - start_validation_time total_time_str = str(datetime.timedelta(seconds=total_validation_time)) logger.info("Total Validation time: {} ({:.4f} s / it)".format( total_time_str, total_validation_time / (max_iter))) logger.info('-' * 40)
def run_eval(self): if isrank0: logging.info("Dataset is %s; len of loader %d" % (self.dataset, len(self.loader))) logging.info("Split is %s" % (self.split)) meters = MetricLogger(delimiter=" ") # loop over data loader start = time.time() # ------------------- forward model ------------------------------ for batch_idx, (inputs, imgs_names, targets, seq_name, starting_frame) in enumerate(self.loader): meters.update(dT=time.time() - start) if batch_idx % 5 == 0: logging.info('[{}] {}/{};{} '.format( args.distributed_manully_rank, batch_idx, len(self.loader), meters)) targets = targets.cuda() # use our collate function inputs = inputs.cuda() cur_device = inputs.device CHECK4D(targets) # B, Len, O, HW CHECK5D(inputs) # B Len D H W if args.load_proposals_dataset: proposals_cur_batch = imgs_names proposals = [] for proposal_cur_vid in proposals_cur_batch: boxlist = list( proposal_cur_vid) # BoxList of current batch boxlist = [b.to(cur_device) for b in boxlist] proposals.append(boxlist) # BoxList of current batch imgs_names = None else: proposals = None with torch.no_grad(): self.evaler(batch_idx, inputs, imgs_names, targets, seq_name, args, proposals) meters.update(bT=time.time() - start) start = time.time()
def main(): num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) # model = train(cfg, args.local_rank, args.distributed) model = build_detection_model(cfg) # add print(model) all_index = [] for index, item in enumerate(model.named_parameters()): all_index.append(index) print(index) print(item[0]) print(item[1].size()) print("All index of the model: ", all_index) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=args.distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD # run_test(cfg, model, args.distributed) # pruning m = Mask(model) m.init_length() m.init_length() print("-" * 10 + "one epoch begin" + "-" * 10) print("remaining ratio of pruning : Norm is %f" % args.rate_norm) print("reducing ratio of pruning : Distance is %f" % args.rate_dist) print("total remaining ratio is %f" % (args.rate_norm - args.rate_dist)) m.modelM = model m.init_mask(args.rate_norm, args.rate_dist) m.do_mask() m.do_similar_mask() model = m.modelM m.if_zero() # run_test(cfg, model, args.distributed) # change to use straightforward function to make its easy to implement Mask # do_train( # model, # data_loader, # optimizer, # scheduler, # checkpointer, # device, # checkpoint_period, # arguments, # ) logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) # print("Loss dict",loss_dict) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() # prun # Mask grad for iteration m.do_grad_mask() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) # prun # 7375 is number iteration to train 1 epoch with batch-size = 16 and number train dataset exam is 118K (in coco) if iteration % args.iter_pruned == 0 or iteration == cfg.SOLVER.MAX_ITER - 5000: m.modelM = model m.if_zero() m.init_mask(args.rate_norm, args.rate_dist) m.do_mask() m.do_similar_mask() m.if_zero() model = m.modelM if args.use_cuda: model = model.cuda() #run_test(cfg, model, args.distributed) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter))) if not args.skip_test: run_test(cfg, model, args.distributed)
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if any(len(target) < 1 for target in targets): logger.error( f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) continue data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe with amp.scale_loss(losses, optimizer) as scaled_losses: scaled_losses.backward() optimizer.step() scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_face_train_triplet( cfg, model, data_loader, data_loader_val, optimizer, scheduler, checkpointer, device, checkpoint_period, test_period, arguments, divs_nums, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() dataset_names = cfg.DATASETS.TEST for iteration, (img_a, img_p, img_n, label_p, label_n) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration img_a_list, _ = divs_tensors(device=device, tensors=img_a, targets=None, divs_nums=divs_nums) img_p_list, label_p_list = divs_tensors(device=device, tensors=img_p, targets=label_p, divs_nums=divs_nums) img_n_list, label_n_list = divs_tensors(device=device, tensors=img_n, targets=label_n, divs_nums=divs_nums) ####======== 拆分batch 可能对bn层有影响 ==========#### optimizer.zero_grad() for img_a, img_p, img_n, label_p, label_n in zip( img_a_list, img_p_list, img_n_list, label_p_list, label_n_list): loss_dict = model(tensors=[img_a, img_p, img_n], targets=[label_p, label_n], batch=iteration, total_batch=None) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) losses /= divs_nums with amp.scale_loss(losses, optimizer) as scaled_losses: scaled_losses.backward() optimizer.step() scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration > 40000: checkpointer.save_backbone("BACKBONE_{:07d}".format(iteration)) #####========= data test ============####### if data_loader_val is not None and test_period > 0 and iteration % test_period == 0: meters_val = MetricLogger(delimiter=" ") synchronize() _ = inference( # The result can be used for additional logging, e. g. for TensorBoard model, # The method changes the segmentation mask format in a data loader, # so every time a new data loader is created: make_data_loader(cfg, is_train=False, is_distributed=(get_world_size() > 1), is_for_period=True), dataset_name="[Validation]", iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=None, ) synchronize() model.train() with torch.no_grad(): # Should be one image for each GPU: for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)): images_val = images_val.to(device) targets_val = [target.to(device) for target in targets_val] loss_dict = model(images_val, targets_val) losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum( loss for loss in loss_dict_reduced.values()) meters_val.update(loss=losses_reduced, **loss_dict_reduced) synchronize() logger.info( meters_val.delimiter.join([ "[Validation]: ", "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters_val), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration == max_iter: checkpointer.save("model_final", **arguments) checkpointer.save_backbone("model_final") total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train(model, data_loader, data_loader_val, optimizer, scheduler, checkpointer, device, checkpoint_period, vis_period, arguments, cfg, tb_writer, distributed): from tools.train_net import run_test logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() vis_num = 0 for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum( v * cfg.SOLVER.LOSS_WEIGHT.MASK_WEIGHT if k == 'loss_mask' else v * cfg.SOLVER.LOSS_WEIGHT.BOX_WEIGHT for k, v in loss_dict.items()) # losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) loss_dict_reduced = { k: (v * cfg.SOLVER.LOSS_WEIGHT.MASK_WEIGHT if k == 'loss_mask' else v * cfg.SOLVER.LOSS_WEIGHT.BOX_WEIGHT) for k, v in loss_dict_reduced.items() } losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) if tb_writer: tb_writer.add_scalars('train/Losses', loss_dict_reduced, global_step=iteration) tb_writer.add_scalar('train/Loss', losses_reduced, global_step=iteration) optimizer.zero_grad() losses.backward() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if cfg.SOLVER.VIS_ON and iteration % vis_period == 0: # visualize predict box # set model to eval mode model.eval() vis_image, vis_image_transformed, target = data_loader_val.dataset.get_image( vis_num) image_list = to_image_list(vis_image_transformed, cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(device) cpu_device = torch.device("cpu") with torch.no_grad(): predictions = model(image_list) predictions = [o.to(cpu_device) for o in predictions] # only one picture predictions = predictions[0] top_predictions = select_topn_predictions(predictions, 3) # visualize result = vis_image.copy() result = overlay_boxes_cls_names(result, top_predictions, target) result = torch.from_numpy(result) result = result.permute(2, 0, 1)[None, :, :, :] result = make_grid([result]) if tb_writer: tb_writer.add_image('Image_train', result, iteration) synchronize() model.train() vis_num += 1 vis_num %= len(data_loader_val.dataset) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) # eval model.eval() results = run_test(cfg, model, distributed, iter=iteration, valid=True) if tb_writer: for result in results: for k, v in result.items(): tb_writer.add_scalar('valid/{}'.format(k), v, global_step=iteration) synchronize() model.train() if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, tb_logger, cfg, ): print('111111111111111111111') logger = logging.getLogger("maskrcnn_benchmark.trainer") print('2222222222222222222222') logger.info("Start training") print('4444444444444444444444') meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() kkk = 0 for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end arguments["iteration"] = iteration #print(kkk) kkk += 1 scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() if cfg.SOLVER.USE_ADAM: torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == (max_iter - 1): #print(kkk * 10000000) logger.info( meters.delimiter.join( [ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ] ).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, ) ) if is_main_process(): for tag, value in loss_dict_reduced.items(): tb_logger.scalar_summary(tag, value.item(), iteration) if iteration % checkpoint_period == 0 and iteration > 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) checkpointer.save("model_{:07d}".format(iteration), **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info( "Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter) ) )
class Evaler(nn.Module): """ engine/container for encoder, decoder and all DMM modules: match_layer feature_extractor """ def __init__(self, DMM, encoder, decoder, args, dmmcfgs): super(Evaler, self).__init__() self.meters = MetricLogger(delimiter=" ") self.decoder = decoder self.encoder = encoder self.DMM = DMM if args.load_proposals and not args.load_proposals_dataset: logging.info('load %s' % args.pred_offline_meta) self.pred_offline_meta = json.load( open(args.pred_offline_meta, 'r')) if 'vidfid2index' in self.pred_offline_meta: self.pred_offline_meta = self.pred_offline_meta['vidfid2index'] if args.use_gpu: self.encoder.cuda() self.decoder.cuda() self.DMM.cuda() timestr = time.strftime('%m-%d') model_name = args.model_name.strip('/') model_id = model_name.split('/')[-2] + '/epo' + model_name.split( '/')[-1].split('epo')[-1] self.eval_output_root = '%s/eval/%s/%s/L%s_%d_s%s/' % ( args.models_root, timestr, model_id, args.eval_flag, args.test_image_h, args.eval_split) self.eval_output_root = self.eval_output_root.replace('__', '_') timestr = time.strftime('%m-%d-%H-%M') save_config_dir = '%s/save_config/%s/' % (self.eval_output_root, timestr) isrank0 = args.local_rank == 0 if isrank0: if not os.path.exists(save_config_dir): make_dir(save_config_dir) yaml.dump( args, open(os.path.join(save_config_dir, 'eval_args.yaml'), 'w')) yaml.dump( dmmcfgs, open(os.path.join(save_config_dir, 'eval_dmm_config.yaml'), 'w')) json_file = open(get_db_path(args.eval_split), 'r') self.seq_dir = get_img_path(args.eval_split) self.anno_dir = get_anno_path(args.eval_split) self.data = json.load(json_file) if isrank0: logging.info('num vid %d' % len(self.data['videos'])) self.DMM.eval() self.encoder.eval() self.decoder.eval() def forward(self, batch_idx, inputs, imgs_names, targets, seq_name, args, proposals_input): """ Evaluation forward a batch of clip """ if args.pad_video: CHECK4D(targets) # B,len,O,HW CHECK5D(inputs) # B,len,O,H,W device_id = torch.cuda.current_device() if args.batch_size == 1: if not args.distributed: seq_name = [seq_name[device_id]] else: batch_size_device = int(args.batch_size / args.ngpus) if not args.distributed: seq_name = seq_name[device_id * batch_size_device:(1 + device_id) * batch_size_device] CHECKEQ(len(seq_name), len(inputs)) njpgs_batch, img_shape, frame_names_batch = self.prepare_frame_names( seq_name) # send batch to GPU prev_thid_list = None B, nframe, O, H, W = inputs.shape max_length_clip = min(nframe, args.length_clip) for frame_idx in range(max_length_clip): tic = time.time() extra_frame = [njpgs <= frame_idx for njpgs in njpgs_batch] proposal_cur, predid_cur_frames = None, None if args.load_proposals and proposals_input is None: predid = [0] * len(seq_name) for b, seq_n in enumerate(seq_name): if extra_frame[b]: predid[b] = len(self.encoder.pred_offline) - 1 else: frame_name = imgs_names[b][frame_idx] # tuple + b name predid[b] = int( self.pred_offline_meta[seq_n][frame_name]) predid_cur_frames = predid elif proposals_input is not None: proposal_cur = [] for b in range(B): if len(proposals_input[b]) > frame_idx: proposal_cur.append(proposals_input[b][frame_idx]) else: proposal_cur.append(proposals_input[b][-1]) x = inputs[:, frame_idx] # B,1->0,O,H,W, select 1 from clip len Bx, Cx, Hx, Wx = x.shape # targets shape: B,len,O,H*W # input shape: B,len,3(Cx),H,W y_mask = targets[:, frame_idx][:, :, :-1].float() CHECKEQ(Hx * Wx, y_mask.shape[-1]) CHECKEQ(Bx, y_mask.shape[0]) B, O, HW = CHECK3D(y_mask) CHECKEQ(Bx, B) if frame_idx == 0: mask_hist = None tplt_dict, tplt_valid_batch, proposals = \ self.forward_timestep_init(args, x, y_mask, predid_cur_frames, proposal_cur) prev_thid_list, thid_list = None, None prev_mask = y_mask.view(B, O, HW) outs = y_mask init_pred_inst = y_mask.view(B, O, H, W) infos = { 'args': args, 'shape': img_shape, 'extra_frame': extra_frame, 'valid': tplt_valid_batch, 'predid': predid_cur_frames } _, prev_thid_list, _, _, _ = self.inference_timestep(infos, tplt_dict, x, y_mask, \ prev_thid_list=prev_thid_list, prev_mask=prev_mask, mask_hist=mask_hist, proposal_cur=proposal_cur) else: # ---- start inference of current batch ---- infos = { 'args': args, 'shape': img_shape, 'extra_frame': extra_frame, 'valid': tplt_valid_batch, 'predid': predid_cur_frames } outs, thid_list, init_pred_inst, proposals, mask_hist = self.inference_timestep( infos, tplt_dict, x, y_mask, prev_thid_list=prev_thid_list, prev_mask=prev_mask, mask_hist=mask_hist, proposal_cur=proposal_cur) self.meters.update(ft=time.time() - tic) prev_mask = outs.view(B, O, HW) if args.only_spatial == False: prev_thid_list = thid_list prev_mask = outs.view(B, O, HW) if frame_idx > 0 else y_mask # ---------------- save merged mask ---------------------- for b in range(B): if extra_frame[b]: continue # skip the extra frames saved_name = self.eval_output_root + 'merged/%s/%s.png' % ( seq_name[b], frame_names_batch[b][frame_idx]) obj_index = tplt_valid_batch[b].sum() refine_mask = outs[b, :obj_index].view(-1, H * W) refine_bg = 1 - refine_mask.max(0)[0] refine_fbg = torch.cat( [refine_bg.view(1, H, W), refine_mask.view(-1, H, W)], dim=0) max_v, max_i = refine_fbg.max(0) eval_helper.plot_scores_map(max_i.float(), saved_name) # ---------------- save outs to mask ---------------------- del outs, thid_list, x, y_mask, init_pred_inst if (batch_idx % 10 == 0 and args.local_rank == 0): logging.info('save at {}'.format(self.eval_output_root)) logging.info(self.meters) def inference_timestep(self, infos, tplt_dict, x, y_mask, prev_thid_list, prev_mask, mask_hist, proposal_cur): r""" inference for frames at current image step, argument: infos: 'args','shape','extra_frame','valid','predid'} img_shape: list, len=B, element: [h,w] x: shape: B,3,H W y_mask: B,O,H W return init_pred_inst: BOHW, prediction from the mask branch, without refine tplt_dict, proposals, mask_hist_new #4,5,6,7,8 """ args = infos['args'] img_shape = infos['shape'] extra_frame, tplt_valid_batch = infos['extra_frame'], infos['valid'] hidden_spatial = None out_masks = [] assert (isinstance(x, torch.Tensor)) features, proposals, _ = self.encoder( args, x, predid_cur_frames=infos['predid'], proposals=proposal_cur) bone_feat = features['backbone_feature'] # B,Lev,(D,H,W); B, D, H, W = x.shape thid_list = [] B, O, HW = CHECK3D(prev_mask) if mask_hist is None: mask_hist = prev_mask.view(B, O, H, W) assert ('mask' in proposals[0].fields()) init_pred_inst, tplt_dict, match_loss, mask_hist_new \ = self.DMM.inference(infos, proposals, bone_feat, mask_hist, tplt_dict ) valid_num_obj_max = max( 1, (tplt_valid_batch.sum(0) > 0).sum()) # shape: 1, O for t in range(0, valid_num_obj_max): if prev_thid_list is not None: hidden_temporal = prev_thid_list[t] if args.only_temporal: hidden_spatial = None else: hidden_temporal = None mask_lstm = [] maxpool = nn.MaxPool2d((2, 2), ceil_mode=True) prev_m_inst = torch.cat([ prev_mask[:, t, :].view(B, 1, H * W), y_mask[:, t, :].view( B, 1, H * W), init_pred_inst[:, t].view(B, 1, H * W) ], dim=2).view(B, 3, H, W) # cat along new dim prev_m_inst = maxpool(prev_m_inst) for _ in range(len(features['refine_input_feat'])): prev_m_inst = maxpool(prev_m_inst) mask_lstm.append(prev_m_inst) mask_lstm = list(reversed(mask_lstm)) out_mask, hidden = self.decoder(features['refine_input_feat'], mask_lstm, hidden_spatial, hidden_temporal) hidden_tmp = [hidden[ss][0] for ss in range(len(hidden))] hidden_spatial = hidden thid_list.append(hidden_tmp) upsample_match = nn.UpsamplingBilinear2d(size=(x.size()[-2], x.size()[-1])) out_mask = upsample_match(out_mask) for b in range(B): # should behave differently for differnet vid; is_template_valid_cur_b = tplt_valid_batch[b, t] # current batch if not is_template_valid_cur_b: continue mask_hist_new[b, t:t + 1, :, :] = torch.sigmoid( out_mask[b]) # shape: B,O,H,W and B,1,H,W out_mask = out_mask.view(out_mask.size(0), -1) out_masks.append(out_mask) del mask_lstm, hidden_temporal, hidden_tmp, prev_m_inst, out_mask out_masks = torch.cat(out_masks, 1).view(out_masks[0].size(0), len(out_masks), -1) # B,O,HW outs = torch.sigmoid(out_masks) outs_pad = outs.new_zeros(B, O, HW) outs_pad[:, :valid_num_obj_max, :] = outs return outs_pad, thid_list, init_pred_inst, proposals, mask_hist_new def forward_timestep_init(self, args, x, y_mask, predid_cur_frames, proposal_cur): features, proposals, cocoloss = self.encoder( args, x, predid_cur_frames=predid_cur_frames, proposals=proposal_cur) B, D, H, W = CHECK4D(x) tplt_valid_batch = [] for b in range(B): prop, template_valid = ohw_mask2boxlist(y_mask[b].view(-1, H, W)) # OHW tplt_valid_batch.append(template_valid) # append O proposals[b] = prop tplt_valid_batch = torch.stack(tplt_valid_batch, dim=0) tplt_dict = self.DMM.fill_template_dict(args, proposals, features, y_mask, tplt_valid_batch) return tplt_dict, tplt_valid_batch, proposals def prepare_frame_names(self, seq_name): njpgs_batch = [] img_shape = [] frame_names_batch = [] for inx, seq_name_b in enumerate(seq_name): frame_names = np.sort(os.listdir(self.seq_dir + '/' + seq_name_b)) frame_names = [ os.path.splitext(os.path.basename(fullname))[0] for fullname in frame_names ] vid_img = np.array( Image.open(self.seq_dir + '/' + seq_name_b + '/%s.jpg' % frame_names[0])) img_h, img_w, _ = vid_img.shape img_shape.append([img_h, img_w]) seq_info = self.data['videos'][seq_name_b]['objects'] frame_names_has_obj = [] for obj_id in seq_info.keys(): # loop over all objects for frame_name in seq_info[obj_id]['frames']: if frame_name not in frame_names_has_obj: # add if this a new frames frame_names_has_obj.append(frame_name) start_annotation_frame = frame_names_has_obj[0] id_start = frame_names.index(start_annotation_frame) if id_start != 0: logging.warning('find a video annotation not start from the first frame in ' + \ 'rgb images :{}; {}'.format(seq_name_b,frame_names[0])) frame_names = frame_names[id_start:] frame_names_batch.append(frame_names) njpgs = len(frame_names) njpgs_batch.append(njpgs) return njpgs_batch, img_shape, frame_names_batch
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, disable_allreduce_for_logging, per_iter_start_callback_fn=None, per_iter_end_callback_fn=None, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() def prefetcher(load_iterator): prefetch_stream = torch.cuda.Stream() pad_batches = [] def _prefetch(): try: # I'm not sure why the trailing _ is necessary but the reference used # "for i, (images, targets, _) in enumerate(data_loader):" so I'll keep it. images, targets, _ = next(load_iterator) except StopIteration: return None, None with torch.cuda.stream(prefetch_stream): # TODO: I'm not sure if the dataloader knows how to pin the targets' datatype. targets = [ target.to(device, non_blocking=True) for target in targets ] images = images.to(device, non_blocking=True) return images, targets next_images, next_targets = _prefetch() while next_images is not None: torch.cuda.current_stream().wait_stream(prefetch_stream) current_images, current_targets = next_images, next_targets next_images, next_targets = _prefetch() yield current_images, current_targets synchronize() optimizer.zero_grad() for iteration, (images, targets) in enumerate(prefetcher(iter(data_loader)), start_iter): if per_iter_start_callback_fn is not None: per_iter_start_callback_fn(iteration=iteration) data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes if not disable_allreduce_for_logging: loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) else: meters.update(loss=losses, **loss_dict) # optimizer.zero_grad() # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe # with optimizer.scale_loss(losses) as scaled_losses: with amp.scale_loss(losses, optimizer) as scaled_losses: scaled_losses.backward() optimizer.step() # set_grads_to_none(model) optimizer.zero_grad() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0 and arguments["save_checkpoints"]: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter and arguments["save_checkpoints"]: checkpointer.save("model_final", **arguments) # per-epoch work (testing) if per_iter_end_callback_fn is not None: # Note: iteration has been incremented previously for # human-readable checkpoint names (i.e. 60000 instead of 59999) # so need to adjust again here early_exit = per_iter_end_callback_fn(iteration=iteration - 1) if early_exit: break total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter))) if per_iter_end_callback_fn is not None: if early_exit: return True else: return False else: return None
def do_train(model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, warmup_layers, warmup_iters): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() NEED_UNFREEZE = False if start_iter < warmup_iters and len(warmup_layers) != 0: l = freeze_modules(model, lambda x: x in warmup_layers) logger.info(f"Warmup layers are {l}") NEED_UNFREEZE = True for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if iteration > warmup_iters and NEED_UNFREEZE: l = freeze_modules(model, lambda x: True) logger.info(f"Train layer {l}") NEED_UNFREEZE = False # Clear cuda cache. # torch.cuda.empty_cache() # TODO check if it helps data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join( [ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ] ).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, ) ) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) checkpointer.save("model_{:07d}".format(iteration), **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info( "Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter) ) )
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, output_dir, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() writer = SummaryWriter(log_dir=os.path.join(output_dir, 'run')) for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) # add tensorboard -- tuo if iteration % 20 == 0: for name, meter in meters.meters.items(): if 'loss' in name: writer.add_scalar(name, meter.avg, iteration) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join( [ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ] ).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, ) ) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info( "Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter) ) )
def do_train( model, data_loader_train, data_loaders_valid, optimizer, scheduler, checkpointer, device, checkpoint_period, validation_period, arguments, exp_name, ): logger = logging.getLogger("Training") logger.info("Start training") meters = MetricLogger(delimiter=" ") tensorboard_path = os.path.join('../output/tensorboard', exp_name) tensorboard_logger = TensorboardXLogger(log_dir=tensorboard_path) max_iter = len(data_loader_train) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() # validation(model, data_loaders_valid, device, logger, tensorboard_logger, start_iter) for iteration, (images, targets, _) in enumerate(data_loader_train, start_iter): data_time = time.time() - end arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict, _ = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(total_loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() meters.update(lr=optimizer.param_groups[0]["lr"]) batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) tensorboard_logger.write(meters, iteration, phase='Train') if iteration % (validation_period / 10) == 0 or iteration == ( max_iter - 1): logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % validation_period == 0 and iteration > 0: validation(model, data_loaders_valid, device, logger, tensorboard_logger, iteration) if iteration % checkpoint_period == 0 and iteration > 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) checkpointer.save("model_{:07d}".format(iteration), **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter))) tensorboard_logger.export_to_json()
def do_train( reid_model, model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() reid_model.eval() start_training_time = time.time() end = time.time() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] result, loss_dict = model(images, targets) images_reid, labels_reid = resize_to_image(images.tensors, targets, result) if images_reid is None: # pass loss_dict.update( dict(cls_loss=torch.tensor(0).type_as( loss_dict['loss_classifier']))) loss_dict.update( dict(tri_loss=torch.tensor(0).type_as( loss_dict['loss_classifier']))) else: images_reid = [o.to(device) for o in images_reid] labels_reid = labels_reid.to(device) loss_dict = reid_model(images_reid, labels_reid, iteration, 'train', loss_dict) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() # with amp.scale_loss(losses, optimizer) as scaled_loss: # scaled_loss.backward() losses.backward() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] # model.eval() # summary(model, [(3, 608, 608)]) model.train() # print(model,'==============================================================================') # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0 # model = model.to(device) start_training_time = time.time() end = time.time() # lambda1 = lambda epoch: 10 ** np.random.uniform(0, -3) lambda1 = lambda iteration: get_triangular_lr(iteration, 1000, 10** (0), 10**(0)) lambda2 = lambda iteration: get_decay_lr(iteration, 10**(0)) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda1) for iteration, (images, targets, _, io) in enumerate(data_loader, start_iter): # for iteration, (images, targets, _) in enumerate(data_loader, start_iter): # for target in targets: # print(target.get_field('rotations'), '==========') # print(len(targets[0]), len(targets[1]), '=========================================') data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] # print(type(targets[1]),targets[1],'===============================================') loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train(model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, epoch_id, eval_in_train, eval_out_dir, eval_in_train_per_iter, iou_thresh_eval, min_loss, eval_aug_thickness): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info(f"Start training {epoch_id}") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() predictions_all = [] losses_last = 100 for iteration, batch in enumerate(data_loader, start_iter): fn = [os.path.basename(os.path.dirname(nm)) for nm in batch['fn']] if SHOW_FN: print(f'\t\t{fn}') data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() batch['x'][1] = batch['x'][1].to(device) batch['y'] = [b.to(device) for b in batch['y']] loss_dict, predictions_i = model(batch['x'], batch['y']) if CHECK_NAN: any_nan = sum(torch.isnan(v.data) for v in loss_dict.values()) if any_nan: print(f'\nGot nan loss:\n{fn}\n') import pdb pdb.set_trace() # XXX BREAKPOINT continue losses = sum(loss for loss in loss_dict.values()) if eval_in_train > 0 and epoch_id % eval_in_train == 0: data_id = batch['id'] for k in range(len(data_id)): predictions_i[k].constants['data_id'] = data_id[k] predictions_i = [p.to(torch.device('cpu')) for p in predictions_i] [p.detach() for p in predictions_i] predictions_all += predictions_i if eval_in_train_per_iter > 0 and epoch_id % eval_in_train_per_iter == 0: logger.info(f'\nepoch {epoch_id}, data_id:{data_id}\n') eval_res_i = evaluate(dataset=data_loader.dataset, predictions=predictions_i, iou_thresh_eval=iou_thresh_eval, output_folder=eval_out_dir, box_only=False) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) with autograd.detect_anomaly(): optimizer.zero_grad() losses.backward() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 1 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) avg_loss = meters.loss.avg tmp_p = max(int(checkpoint_period // 10), 20) if iteration % tmp_p == 0 and avg_loss < min_loss: checkpointer.save("model_min_loss", **arguments) logger.info(f'\nmin loss: {avg_loss} at {iteration}\n') min_loss = avg_loss if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)\n".format( total_time_str, total_training_time / (max_iter))) if eval_in_train > 0 and epoch_id % eval_in_train == 0: logger.info(f'\nepoch {epoch_id}\n') preds = down_sample_for_eval_training(predictions_all) eval_res = evaluate(dataset=data_loader.dataset, predictions=preds, iou_thresh_eval=iou_thresh_eval, output_folder=eval_out_dir, box_only=False, epoch=epoch_id, is_train=True, eval_aug_thickness=eval_aug_thickness) pass return min_loss
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) print(max_iter) start_iter = arguments["iteration"] print(start_iter) model.train() start_training_time = time.time() end = time.time() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration #ipdb.set_trace() #unloader=transforms.ToPILImage() #showimg = unloader(images.tensors[0]) #showimg = np.array(showimg) #bboxs=targets[0].bbox #bboxs = bboxs.numpy() #for bbox in bboxs: # cv2.rectangle(showimg, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 4) #plt.imshow(showimg) #for mask in targets[0].extra_fields['masks'].polygons: # poly = mask.polygons # poly = poly[0] # #poly = poly.numpy() # n = len(poly) # x = [] # y = [] # for i in range(int(n/2)): # x.append(int(poly[i*2])) # y.append(int(poly[i*2+1])) # plt.plot(x,y,color="red",linewidth=2.0) # #plt.scatter(x, y, color = 'red') #ipdb.set_trace() #plt.imshow(showimg) #plt.show() scheduler.step() #time.sleep( 1 ) images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) checkpointer.save("model_{:07d}".format(iteration), **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, use_amp, cfg, dllogger, per_iter_end_callback_fn=None, ): dllogger.log(step="PARAMETER", data={"train_start": True}) meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() if use_amp: scaler = torch.cuda.amp.GradScaler(init_scale=8192.0) for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration images = images.to(device) targets = [target.to(device) for target in targets] if use_amp: with torch.cuda.amp.autocast(): loss_dict = model(images, targets) else: loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe if use_amp: scaler.scale(losses).backward() else: losses.backward() def _take_step(): if use_amp: scaler.step(optimizer) scaler.update() else: optimizer.step() scheduler.step() optimizer.zero_grad() if not cfg.SOLVER.ACCUMULATE_GRAD: _take_step() else: if (iteration + 1) % cfg.SOLVER.ACCUMULATE_STEPS == 0: for param in model.parameters(): if param.grad is not None: param.grad.data.div_(cfg.SOLVER.ACCUMULATE_STEPS) _take_step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: log_data = {"eta":eta_string, "learning_rate":optimizer.param_groups[0]["lr"], "memory": torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 } log_data.update(meters.get_dict()) dllogger.log(step=(iteration,), data=log_data) if cfg.SAVE_CHECKPOINT: if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) # per-epoch work (testing) if per_iter_end_callback_fn is not None: early_exit = per_iter_end_callback_fn(iteration=iteration) if early_exit: break total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) dllogger.log(step=tuple(), data={"e2e_train_time": total_training_time, "train_perf_fps": max_iter * cfg.SOLVER.IMS_PER_BATCH / total_training_time}) logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info( "Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter) ) )
def do_train( model, model_ema, data_loader, optimizer, scheduler, checkpointer, device, local_rank, checkpoint_period, cfg_arg, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") meters_ema = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] ema_decay = arguments["ema_decay"] loss_semi = arguments['loss_semi'] temporal_save_path = cfg_arg["temporal_save_path"] model.train() model_ema.train() box_coder = BoxCoder(weights=(10., 10., 5., 5.)) temporal_ens = {} start_training_time = time.time() end = time.time() labeled_database = arguments["HYPER_PARAMETERS"]['LABELED_DATABASE'] temporal_supervised_losses = [] for iteration, (images, targets_with_trans_info, idx) in enumerate(data_loader, start_iter): targets = [_iter[0] for _iter in targets_with_trans_info] trans_info = [_iter[1] for _iter in targets_with_trans_info] try: db_idx, img_idx, idx_name, bboxes_batch = map_to_img( data_loader, idx) temporal_ens_bboxes = [ ensemble_bboxes(_boxes, _im_sz, arguments["ANCHOR_STRIDES"], arguments["HYPER_PARAMETERS"]['ENS_THRE'], device) for _boxes, _im_sz in zip(bboxes_batch, images.image_sizes) ] img_size = [(_sz[1], _sz[0]) for _sz in images.image_sizes] pred_trans_info = copy.deepcopy(trans_info) temporal_ens_pred = [] for i, _sz in enumerate(img_size): pred_trans_info[i][1] = _sz temporal_ens_per = [ trans_reverse(_temporal_ens, pred_trans_info[i]).to(device) for _temporal_ens in temporal_ens_bboxes[i] ] temporal_ens_pred.append(temporal_ens_per) db_w = [] for i, _db in enumerate(db_idx): if _db not in labeled_database: _bbox = BoxList( torch.zeros([1, 4]), (images.image_sizes[i][1], images.image_sizes[i][0]), mode="xyxy") _bbox.add_field('labels', torch.ones([1])) targets[i] = _bbox db_w.append(0.) else: db_w.append(1.) if any(len(target) < 1 for target in targets): logger.error( f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) continue data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration images = images.to(device) targets = [target.to(device) for target in targets] update_ema_variables(model, model_ema, ema_decay, iteration) _loss_dict, result = model(images, targets) #---------------------loss masked by with torch.no_grad(): _loss_dict_ema, result_ema = model_ema(images, targets) is_labeled_db_weight = torch.tensor( db_w, dtype=torch.float32).to(device) loss_dict = {} loss_dict_ema = {} for _key in _loss_dict.keys(): loss_dict[_key] = torch.sum( torch.stack(_loss_dict[_key], dim=0) * is_labeled_db_weight) loss_dict_ema[_key] = torch.sum( torch.stack(_loss_dict_ema[_key], dim=0) * is_labeled_db_weight) # loss_dict = _loss_dict # loss_dict_ema = _loss_dict_ema #result_origin = [trans_reverse(_res,_info) for _res,_info in zip(result_ema,trans_info)] #result_origin = predict_collect_postprocess(arguments['postprocess'],result_ema,trans_info) result_origin = predict_retina_postprocess( arguments['postprocess'], box_coder, result_ema, trans_info, images.image_sizes) # any_zeros = [_iter.bbox.shape[0] == 0 for _iter in temporal_ens_pred] # if any(any_zeros): # loss_dict['semi_box_reg'] = torch.tensor(0,dtype=torch.float32,device=device) # loss_dict['semi_cls'] = torch.tensor(0,dtype=torch.float32,device=device) # else: # semi_loss = loss_semi( # result, temporal_ens_pred) # for _key in semi_loss.keys(): # loss_dict[_key] = torch.sum(torch.stack(semi_loss[_key],dim=0) * (1 - db_weight)) * arguments["semi_weight"] #balance losses with torch.no_grad(): supversed_loss = (loss_dict['loss_retina_cls'] + loss_dict['loss_retina_reg']) / ( np.sum(db_w) + 0.1) temporal_supervised_losses.append(supversed_loss) temporal_supervised_losses = temporal_supervised_losses[-100:] sup_loss = torch.stack(temporal_supervised_losses).mean() meters.update(sup_loss=sup_loss) if get_world_size() > 1: torch.distributed.all_reduce( torch.stack(temporal_supervised_losses).mean(), op=torch.distributed.ReduceOp.SUM) balance_weight = min(1. / (sup_loss / 0.28)**12, 1.) semi_loss = semi_loss_fn( result, result_ema, temporal_ens_pred, images.image_sizes, box_coder, n_cls=arguments["HYPER_PARAMETERS"]['NCLS'], reg_cons_w=arguments["HYPER_PARAMETERS"]['REG_CONSIST_WEIGHT']) semi_loss_weight = semi_weight_by_epoch( iteration, start_iter=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"]['START_ITER'], rampup_length=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"]['RAMPUP_LENGTH'], consistence_weight=arguments["HYPER_PARAMETERS"] ['CONSISTENCE_WEIGHT'], consistence_trunc=arguments["HYPER_PARAMETERS"] ['MAX_CONSISTENT_LOSS']) #semi_weight_by_epoch(iteration) for _key in semi_loss.keys(): #loss_dict[_key] = torch.sum(semi_loss[_key] * (1 - is_labeled_db_weight))*semi_loss_weight*balance_weight # not used labeled loss_dict[_key] = torch.sum(semi_loss[_key]) * semi_loss_weight for i, (_id, _labeled) in enumerate(zip(idx_name, db_w)): # if _labeled == 1: # continue result_dict = { 'iteration': iteration, 'result': result_origin[i] } if _id in temporal_ens.keys(): temporal_ens[_id].append(result_dict) else: temporal_ens[_id] = [result_dict] #print('id={},{},scores={}----------{}'.format(idx_name[0],idx_name[1],result_origin[0].get_field('objectness')[:5],result_origin[1].get_field('objectness')[:5])) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) loss_dict_reduced_ema = reduce_loss_dict(loss_dict_ema) losses_reduced_ema = sum( loss for loss in loss_dict_reduced_ema.values()) meters_ema.update(loss=losses_reduced_ema, **loss_dict_reduced_ema) optimizer.zero_grad() # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe with amp.scale_loss(losses, optimizer) as scaled_losses: scaled_losses.backward() if not iteration < arguments["HYPER_PARAMETERS"][ 'EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"][ 'START_ITER']: optimizer.step() #scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "{meters_ema}", "lr: {lr:.6f}", "semi_w:{semi_w:2.3f}", "supervised loss{sup_loss:2.3f}," "balance_weight{balance_weight:2.3f}," "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), meters_ema=str(meters_ema), lr=optimizer.param_groups[0]["lr"], semi_w=semi_loss_weight, sup_loss=sup_loss, balance_weight=balance_weight, memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if (iteration - 50) % 100 == 0: for _key in temporal_ens.keys(): for _iter in temporal_ens[_key]: str_folder = os.path.join( temporal_save_path, _key) #"{}/{}".format(temporal_save_path,_key) str_file = '{}/{}_loc{}_iter_x{:07d}.pt'.format( str_folder, _key, local_rank, _iter['iteration']) if not os.path.exists(str_folder): os.makedirs(str_folder) torch.save(_iter['result'], str_file) del _iter['result'] del temporal_ens temporal_ens = {} if iteration % checkpoint_period == 0: save_time = time.time() checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) except Exception as e: print('error in file ', idx_name, img_idx) raise e total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train( model, data_loader, optimizer, scheduler, checkpointer, summary_writer, device, checkpoint_period, summary_period, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration % summary_period == 0: summary_writer.add_image( 'input_image', vutils.make_grid(images.tensors[:, [2, 1, 0]], normalize=True), iteration) summary_writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], iteration) summary_writer.add_scalar( 'model/loss_rpn_box_reg', loss_dict_reduced['loss_rpn_box_reg'].item(), iteration) summary_writer.add_scalar('model/loss_mask', loss_dict_reduced['loss_mask'].item(), iteration) summary_writer.add_scalar('model/loss_box_reg', loss_dict_reduced['loss_box_reg'].item(), iteration) summary_writer.add_scalar( 'model/loss_classifier', loss_dict_reduced['loss_classifier'].item(), iteration) if 'loss_maskiou' in loss_dict_reduced: summary_writer.add_scalar( 'model/loss_maskiou', loss_dict_reduced['loss_maskiou'].item(), iteration) summary_writer.add_scalar( 'model/loss_objectness', loss_dict_reduced['loss_objectness'].item(), iteration) summary_writer.add_scalar('model/loss', losses_reduced.item(), iteration) iteration = iteration + 1 checkpointer.save("model_{:07d}".format(iteration), **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train( cfg, total_model, data_loader, data_loader_val, optimizer, scheduler, checkpointer, device, checkpoint_period, test_period, arguments, args, ): if len(total_model) > 1: model = total_model[1] t_model = total_model[0] else: model = total_model[0] logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() iou_types = ("bbox", ) if cfg[0].MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg[0].MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) dataset_names = cfg[0].DATASETS.TEST pytorch_1_1_0_or_later = is_pytorch_1_1_0_or_later() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration # in pytorch >= 1.1.0, scheduler.step() should be run after optimizer.step() if not pytorch_1_1_0_or_later: scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict, features_dict = model(images, targets) if len(total_model) > 1: with torch.no_grad(): t_loss_dict, t_features_dict = t_model(images, targets) # with torch.no_grad(): # # teacher_model = t_model # t_weight = torch.load('./weights/centermask-V-19-eSE-FPN-ms-3x.pth') # t_weight = t_weight['model'] # new_tweight = OrderedDict() # for k, v in t_weight.items(): # name = k[7:] # remove `module.` # new_tweight[name] = v # t_model.load_state_dict(new_tweight) # t_loss_dict, t_features_dict = t_model(images, targets) if args.loss_head: loss_regression = new_box_loss(t_loss_dict['loss_reg'], loss_dict['loss_reg']) loss_center = new_center_loss(t_loss_dict['loss_centerness'], loss_dict['loss_centerness']) mode = 'KL' # mode = 'KL' or 'cross-entropy' loss_pixel_wise = pixel_wise_loss(features_dict['box_cls'], t_features_dict['box_cls'], mode) loss_head = (loss_regression + loss_center + loss_pixel_wise) loss_dict.setdefault('loss_head', loss_head) del loss_dict['loss_reg'] del loss_dict['loss_centerness'] if iteration > cfg[0].SOLVER.WARMUP_ITERS: if args.loss_correlation: correlation = True loss_corr = get_feature(t_model, model, images, targets, correlation) loss_dict.setdefault('loss_corr', loss_corr) if args.loss_featuremap: correlation = False loss_featuremap = get_feature(t_model, model, images, targets, correlation) loss_dict.setdefault('loss_featuremap', loss_featuremap) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() if pytorch_1_1_0_or_later: scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if data_loader_val is not None and test_period > 0 and iteration % test_period == 0 and iteration != 0: meters_val = MetricLogger(delimiter=" ") synchronize() _ = inference( # The result can be used for additional logging, e. g. for TensorBoard model, # The method changes the segmentation mask format in a data loader, # so every time a new data loader is created: make_data_loader(cfg[0], is_train=False, is_distributed=(get_world_size() > 1), is_for_period=True), dataset_name="[Validation]", iou_types=iou_types, box_only=False if cfg[0].MODEL.MASK_ON else cfg[0].MODEL.RPN_ONLY, device=cfg[0].MODEL.DEVICE, expected_results=cfg[0].TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg[0].TEST. EXPECTED_RESULTS_SIGMA_TOL, output_folder=None, ) synchronize() model.train() with torch.no_grad(): # Should be one image for each GPU: for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)): images_val = images_val.to(device) targets_val = [target.to(device) for target in targets_val] loss_dict = model(images_val, targets_val) if len(loss_dict) > 1: loss_dict = loss_dict[0] else: loss_dict = loss_dict losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum( loss for loss in loss_dict_reduced.values()) meters_val.update(loss=losses_reduced, **loss_dict_reduced) synchronize() logger.info( meters_val.delimiter.join([ "[Validation]: ", "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters_val), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train( cfg, model, data_loader, data_loader_val, optimizer, scheduler, checkpointer, device, checkpoint_period, test_period, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) dataset_names = cfg.DATASETS.TEST for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if any(len(target) < 1 for target in targets): logger.error( f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) continue data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe # with amp.scale_loss(losses, optimizer) as scaled_losses: # scaled_losses.backward() losses.backward() optimizer.step() scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if data_loader_val is not None and test_period > 0 and iteration % test_period == 0: meters_val = MetricLogger(delimiter=" ") synchronize() _ = inference( # The result can be used for additional logging, e. g. for TensorBoard model, # The method changes the segmentation mask format in a data loader, # so every time a new data loader is created: make_data_loader(cfg, is_train=False, is_distributed=(get_world_size() > 1), is_for_period=True), dataset_name="[Validation]", iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=None, ) synchronize() model.train() with torch.no_grad(): # Should be one image for each GPU: for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)): images_val = images_val.to(device) targets_val = [target.to(device) for target in targets_val] loss_dict = model(images_val, targets_val) losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum( loss for loss in loss_dict_reduced.values()) meters_val.update(loss=losses_reduced, **loss_dict_reduced) synchronize() logger.info( meters_val.delimiter.join([ "[Validation]: ", "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters_val), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def test_while_train(cfg, model, distributed, logger, curr_iter, val_tags, data_loader, output_folder): torch.cuda.empty_cache() logger.info("start testing while training...") # only the first one for test model.eval() results_dict = {} device = torch.device('cuda') cpu_device = torch.device("cpu") meters = MetricLogger(delimiter=" ", ) for bid, (images, targets, image_ids, phrase_ids, sent_ids, sentence, precompute_bbox, precompute_score, feature_map, vocab_label_elmo, sent_sg, topN_box) in enumerate(tqdm(data_loader)): # if bid>3: # break vocab_label_elmo = [vocab.to(device) for vocab in vocab_label_elmo] features_list = [feat.to(device) for feat in feature_map] with torch.no_grad(): loss_dict, results = model(images, features_list, targets, phrase_ids, sentence, precompute_bbox, precompute_score, image_ids, vocab_label_elmo, sent_sg, topN_box) loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) # collect and move result to cpu memory moved_res = [] if cfg.MODEL.VG.TWO_STAGE: if cfg.MODEL.RELATION_ON and cfg.MODEL.RELATION.USE_RELATION_CONST: batch_gt_boxes, batch_pred_box, batch_pred_box_topN, batch_pred_box_det,\ batch_pred_similarity, batch_pred_similarity_topN, batch_rel_pred_similarity, batch_rel_gt_label, batch_topN_boxes, batch_reg_offset_topN, batch_rel_score_mat=results for idx, each_gt_boxes in enumerate(batch_gt_boxes): moved_res.append( (each_gt_boxes.to(cpu_device), batch_pred_box[idx].to(cpu_device), batch_pred_box_topN[idx].to(cpu_device), batch_pred_box_det[idx].to(cpu_device), batch_pred_similarity[idx].to(cpu_device), batch_pred_similarity_topN[idx].to(cpu_device), batch_rel_pred_similarity[idx].to(cpu_device), batch_rel_gt_label[idx].to(cpu_device), batch_topN_boxes[idx].to(cpu_device), batch_reg_offset_topN[idx].to(cpu_device), batch_rel_score_mat[idx])) else: batch_gt_boxes, batch_pred_box, batch_pred_box_topN, batch_pred_box_det, batch_pred_similarity = results for idx, each_gt_boxes in enumerate(batch_gt_boxes): moved_res.append( (each_gt_boxes.to(cpu_device), batch_pred_box[idx].to(cpu_device), batch_pred_box_topN[idx].to(cpu_device), batch_pred_box_det[idx].to(cpu_device), batch_pred_similarity[idx].to(cpu_device))) else: batch_gt_boxes, batch_pred_box, batch_pred_box_det, batch_pred_similarity = results for idx, each_gt_boxes in enumerate(batch_gt_boxes): moved_res.append( (each_gt_boxes.to(cpu_device), batch_pred_box[idx].to(cpu_device), batch_pred_box_det[idx].to(cpu_device), batch_pred_similarity[idx].to(cpu_device))) results_dict.update({ img_id + '_' + sent_id: result for img_id, sent_id, result in zip(image_ids, sent_ids, moved_res) }) synchronize() (predictions, image_ids) = _accumulate_predictions_from_multiple_gpus(results_dict) if output_folder: with open( os.path.join(output_folder, "predictions_{}.pkl".format(curr_iter)), 'wb') as f: pickle.dump(predictions, f) torch.save( predictions, os.path.join(output_folder, "predictions_{}.pth".format(curr_iter))) torch.cuda.empty_cache() if not is_main_process(): return logger.info('Total items num is {}'.format(len(predictions))) # with open(os.path.join(cfg.OUTPUT_DIR, 'prediction.pkl'), 'wb') as handle: # pickle.dump(predictions, handle, protocol=pickle.HIGHEST_PROTOCOL) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) box_only = False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY expected_results = cfg.TEST.EXPECTED_RESULTS expected_results_sigma_tol = cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL extra_args = dict( box_only=False, iou_types=iou_types, expected_results=expected_results, expected_results_sigma_tol=expected_results_sigma_tol, ) acc, acc_topN, acc_det, acc_rel_softmax = evaluate( dataset=data_loader.dataset, predictions=predictions, image_ids=image_ids, curr_iter=curr_iter, output_folder=None, **extra_args) record = {val_tags[k]: v for (k, v) in meters.meters.items()} logger.log(TFBoardHandler_LEVEL, (record, curr_iter)) logger.info("current accuracy is: {}".format(acc)) logger.info("current topN accuracy is: {}".format(acc_topN)) logger.info("current accuracy with detection score is: {}".format(acc_det)) logger.info( "current rel constrain accuracy is: {}".format(acc_rel_softmax)) logger.log(TFBoardHandler_LEVEL, ({ val_tags['acc']: acc, val_tags['acc_topN']: acc_topN, val_tags['acc_det']: acc_det, val_tags['acc_rel_softmax']: acc_rel_softmax }, curr_iter)) logger.info("test done !")
def do_da_train(model, source_data_loader, target_data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, cfg): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") # todo li max_iter怎么会在这里? max_iter = len(source_data_loader) start_iter = arguments["iteration"] # 设置为训练模式,不是直接开始训练 model.train() start_training_time = time.time() end = time.time() # 和SHOT的代码相比,这个代码里面没有对source_data_loader进行一个for循环迭代,是因为zip函数实现了 # 但是这个iteration就不是epoch了,每次iteration就是对每次从DataLoader里面出来的数据的一次迭代,并不是整个数据集的一次迭代 for iteration, ((source_images, source_targets, idx1), (target_images, target_targets, idx2))\ in enumerate(zip(source_data_loader, target_data_loader), start_iter): data_time = time.time() - end arguments["iteration"] = iteration # 源数据和目标数据 images = (source_images + target_images).to(device) targets = [ target.to(device) for target in list(source_targets + target_targets) ] # 正向传播 loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() # 反向传播 losses.backward() optimizer.step() scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) # 迭代20次,log一次 if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter - 1: checkpointer.save("model_final", **arguments) if torch.isnan(losses_reduced).any(): logger.critical('Loss is NaN, exiting...') return total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter))) # DA end
def train(cfg, local_rank, distributed, d_path=None): MaskDnet = MaskDiscriminator(nc=256) BBoxDnet = BoxDiscriminator(nc=256, ndf=64) Dnet = CombinedDiscriminator(MaskDnet, BBoxDnet) model = Mask_RCNN(cfg) g_rcnn = GAN_RCNN(model, Dnet) device = torch.device(cfg.MODEL.DEVICE) g_rcnn.to(device) g_optimizer = make_optimizer(cfg, model) d_optimizer = make_D_optimizer(cfg, Dnet) g_scheduler = make_lr_scheduler(cfg, g_optimizer) d_scheduler = make_lr_scheduler(cfg, d_optimizer) # model.BoxDnet = BBoxDnet # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, g_optimizer = amp.initialize(model, g_optimizer, opt_level=amp_opt_level) Dnet, d_optimizer = amp.initialize(Dnet, d_optimizer, opt_level=amp_opt_level) if distributed: g_rcnn = torch.nn.parallel.DistributedDataParallel( g_rcnn, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer( cfg, model, g_optimizer, g_scheduler, output_dir, save_to_disk ) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) d_checkpointer = DetectronCheckpointer( cfg, Dnet, d_optimizer, d_scheduler, output_dir, save_to_disk ) if d_path: d_checkpointer.load(d_path, use_latest=False) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) test_period = cfg.SOLVER.TEST_PERIOD data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=distributed, is_for_period=True) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD ## START TRAINING logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = TensorboardLogger( log_dir=cfg.OUTPUT_DIR + "/tensorboardX", start_iter=arguments['iteration'], delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] g_rcnn.train() start_training_time = time.time() end = time.time() iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) dataset_names = cfg.DATASETS.TEST for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if any(len(target) < 1 for target in targets): logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) continue data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration images = images.to(device) targets = [target.to(device) for target in targets] g_loss_dict, d_loss_dict = g_rcnn(images, targets) g_losses = sum(loss for loss in g_loss_dict.values()) d_losses = sum(loss for loss in d_loss_dict.values()) # reduce losses over all GPUs for logging purposes g_loss_dict_reduced = reduce_loss_dict(g_loss_dict) g_losses_reduced = sum(loss for loss in g_loss_dict_reduced.values()) d_loss_dict_reduced = reduce_loss_dict(d_loss_dict) d_losses_reduced = sum(loss for loss in d_loss_dict_reduced.values()) meters.update(total_g_loss=g_losses_reduced, **g_loss_dict_reduced) meters.update(total_d_loss=d_losses_reduced, **d_loss_dict_reduced) g_optimizer.zero_grad() # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe with amp.scale_loss(g_losses, g_optimizer) as g_scaled_losses: g_scaled_losses.backward() g_optimizer.step() g_scheduler.step() d_optimizer.zero_grad() # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe with amp.scale_loss(d_losses, d_optimizer) as d_scaled_losses: d_scaled_losses.backward() d_optimizer.step() d_scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join( [ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ] ).format( eta=eta_string, iter=iteration, meters=str(meters), lr=g_optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, ) ) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) d_checkpointer.save("dnet_{:07d}".format(iteration), **arguments) if data_loader_val is not None and test_period > 0 and iteration % test_period == 0: meters_val = MetricLogger(delimiter=" ") synchronize() _ = inference( # The result can be used for additional logging, e. g. for TensorBoard model, # The method changes the segmentation mask format in a data loader, # so every time a new data loader is created: make_data_loader(cfg, is_train=False, is_distributed=False, is_for_period=True), dataset_name="[Validation]", iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=cfg.OUTPUT_DIR, ) synchronize() model.train() with torch.no_grad(): # Should be one image for each GPU: for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)): images_val = images_val.to(device) targets_val = [target.to(device) for target in targets_val] loss_dict = model(images_val, targets_val) losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters_val.update(loss=losses_reduced, **loss_dict_reduced) synchronize() logger.info( meters_val.delimiter.join( [ "[Validation]: ", "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ] ).format( eta=eta_string, iter=iteration, meters=str(meters_val), lr=g_optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, ) ) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info( "Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter) ) )
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): # print('images: ', images) # print('targets: ', targets, targets[0].bbox) # print('targets: ', type(targets[0]), type(targets)) data_time = time.time() - end arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] # print('images.size(): ', images.tensors.size(), images.image_sizes) # print('targets: ', targets) loss_dict = model(images=images, iteration=iteration + 1, targets=targets) # print('loss_dict: ', loss_dict) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if (iteration + 1) % 20 == 0 or (iteration + 1) == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration + 1, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if (iteration + 1) % checkpoint_period == 0 or (iteration + 1) == max_iter: checkpointer.save("model_{:07d}".format(iteration + 1), **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, per_iter_start_callback_fn=None, per_iter_end_callback_fn=None, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if per_iter_start_callback_fn is not None: per_iter_start_callback_fn(iteration=iteration) data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) losses.backward() optimizer.step() optimizer.zero_grad() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0 and arguments["save_checkpoints"]: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter and arguments["save_checkpoints"]: checkpointer.save("model_final", **arguments) # per-epoch work (testing) if per_iter_end_callback_fn is not None: # Note: iteration has been incremented previously for # human-readable checkpoint names (i.e. 60000 instead of 59999) # so need to adjust again here early_exit = per_iter_end_callback_fn(iteration=iteration - 1) if early_exit: break total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter))) if per_iter_end_callback_fn is not None: if early_exit: return True else: return False else: return None