def main():
    parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument('--vgg', help='Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth')
    parser.add_argument('--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from')
    parser.add_argument('--log_step', default=50, type=int, help='Print logs every log_step')
    parser.add_argument('--save_step', default=5000, type=int, help='Save checkpoint every save_step')
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl", init_method="env://")

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args)

    if not args.skip_test:
        logger.info('Start evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)
Exemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(
        description='SSD Evaluation on VOC and COCO dataset.')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument("--weights", type=str, help="Trained weights.")
    parser.add_argument("--output_dir",
                        default="eval_results",
                        type=str,
                        help="The directory to store evaluation results.")

    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))
    evaluation(cfg,
               weights_file=args.weights,
               output_dir=args.output_dir,
               distributed=distributed)
Exemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description='SSD Evaluation on VOC and COCO dataset.')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument("--weights", type=str, help="Trained weights.")
    parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.")
    parser.add_argument("--eval_mode", default="test", type=str,
                        help='Use defined test datasets for final evaluation or use a validation split. Default: "test", alternative "val"')

    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl", init_method="env://")

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))
    evaluation(cfg, args, weights_file=args.weights, output_dir=args.output_dir, distributed=distributed)
Exemplo n.º 4
0
def do_evaluation(cfg, model, output_dir, distributed):
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model = model.module
    assert isinstance(model, SSD), 'Wrong module.'
    test_datasets = build_dataset(dataset_list=cfg.DATASETS.TEST, is_test=True)
    device = torch.device(cfg.MODEL.DEVICE)
    model.eval()
    if not model.is_test:
        model.is_test = True
    predictor = Predictor(cfg=cfg,
                          model=model,
                          iou_threshold=cfg.TEST.NMS_THRESHOLD,
                          score_threshold=cfg.TEST.CONFIDENCE_THRESHOLD,
                          device=device)

    cpu_device = torch.device("cpu")
    logger = logging.getLogger("SSD.inference")
    for dataset_name, test_dataset in zip(cfg.DATASETS.TEST, test_datasets):
        logger.info("Test dataset {} size: {}".format(dataset_name,
                                                      len(test_dataset)))
        indices = list(range(len(test_dataset)))
        if distributed:
            indices = indices[distributed_util.get_rank()::distributed_util.
                              get_world_size()]

        # show progress bar only on main process.
        progress_bar = tqdm if distributed_util.is_main_process() else iter
        logger.info('Progress on {} 0:'.format(cfg.MODEL.DEVICE.upper()))
        predictions = {}
        for i in progress_bar(indices):
            image = test_dataset.get_image(i)
            output = predictor.predict(image)
            boxes, labels, scores = [o.to(cpu_device).numpy() for o in output]
            predictions[i] = (boxes, labels, scores)
        distributed_util.synchronize()
        predictions = _accumulate_predictions_from_multiple_gpus(predictions)
        if not distributed_util.is_main_process():
            return

        final_output_dir = os.path.join(output_dir, dataset_name)
        if not os.path.exists(final_output_dir):
            os.makedirs(final_output_dir)
        torch.save(predictions,
                   os.path.join(final_output_dir, 'predictions.pth'))
        evaluate(dataset=test_dataset,
                 predictions=predictions,
                 output_dir=final_output_dir)
Exemplo n.º 5
0
def _evaluation(cfg, dataset_name, test_dataset, predictor, distributed, output_dir):
    """ Perform evaluating on one dataset
    Args:
        cfg:
        dataset_name: dataset's name
        test_dataset: Dataset object
        predictor: Predictor object, used to to prediction.
        distributed: whether distributed evaluating or not
        output_dir: path to save prediction results
    Returns:
        evaluate result
    """
    cpu_device = torch.device("cpu")
    logger = logging.getLogger("SSD.inference")
    logger.info("Evaluating {} dataset({} images):".format(dataset_name, len(test_dataset)))
    indices = list(range(len(test_dataset)))
    if distributed:
        indices = indices[distributed_util.get_rank()::distributed_util.get_world_size()]

    # show progress bar only on main process.
    progress_bar = tqdm if distributed_util.is_main_process() else iter
    logger.info('Progress on {} 0:'.format(cfg.MODEL.DEVICE.upper()))
    predictions = {}
    for i in progress_bar(indices):
        image = test_dataset.get_image(i)
        #print(type(image))
        
        #image=numpy(image)
        #transform=PredictionTransform(cfg.INPUT.IMAGE_SIZE, cfg.INPUT.PIXEL_MEAN)
        #image=transform(image)
        output = predictor.predict(image)
        print('output')
        boxes, labels, scores = [o.to(cpu_device).numpy() for o in output]
        predictions[i] = (boxes, labels, scores)
    distributed_util.synchronize()
    predictions = _accumulate_predictions_from_multiple_gpus(predictions)
    if not distributed_util.is_main_process():
        return

    final_output_dir = os.path.join(output_dir, dataset_name)
    if not os.path.exists(final_output_dir):
        os.makedirs(final_output_dir)
    torch.save(predictions, os.path.join(final_output_dir, 'predictions.pth'))
    return evaluate(dataset=test_dataset, predictions=predictions, output_dir=final_output_dir)
Exemplo n.º 6
0
def do_train(cfg, model,
             data_loader,
             optimizer,
             scheduler,
             device,
             args):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training")
    model.train()

    save_to_disk = distributed_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        import tensorboardX

        summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR)
    else:
        summary_writer = None

    max_iter = len(data_loader)
    start_training_time = time.time()
    trained_time = 0
    tic = time.time()
    end = time.time()
    for iteration, (images, boxes, labels) in enumerate(data_loader):
        iteration = iteration + 1
        scheduler.step()
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)
        
        #print(images.shape)
        #print(labels.shape)
        #print(boxes.shape)
        optimizer.zero_grad()
        loss_dict = model(images, targets=(boxes, labels))

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss = sum(loss for loss in loss_dict.values())
        loss.backward()
        optimizer.step()
        trained_time += time.time() - end
        end = time.time()
        if iteration % args.log_step == 0:
            eta_seconds = int((trained_time / iteration) * (max_iter - iteration))
            log_str = [
                "Iter: {:06d}, Lr: {:.5f}, Cost: {:.2f}s, Eta: {}".format(iteration,
                                                                          optimizer.param_groups[0]['lr'],
                                                                          time.time() - tic, str(datetime.timedelta(seconds=eta_seconds))),
                "total_loss: {:.3f}".format(losses_reduced.item())
            ]
            for loss_name, loss_item in loss_dict_reduced.items():
                log_str.append("{}: {:.3f}".format(loss_name, loss_item.item()))
            log_str = ', '.join(log_str)
            logger.info(log_str)
            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step)
                for loss_name, loss_item in loss_dict_reduced.items():
                    summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step)
                summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step)

            tic = time.time()

        if save_to_disk and iteration % args.save_step == 0:
            model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_iteration_{:06d}.pth".format(cfg.INPUT.IMAGE_SIZE, iteration))
            _save_model(logger, model, model_path)
        # Do eval when training, to trace the mAP changes and see performance improved whether or nor
        if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
            do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)
            model.train()

    if save_to_disk:
        model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE))
        _save_model(logger, model, model_path)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter))
    return model
Exemplo n.º 7
0
def do_train(cfg, model,
             data_loader,
             optimizer,
             scheduler,
             criterion,
             device,
             args):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training")
    save_to_disk = distributed_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        import tensorboardX

        summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR, comment="myvgg")
        #dummy_input = torch.zeros(1, 3, 300, 300)
        #dummy_input = dummy_input.type(torch.cuda.FloatTensor)
        #with summary_writer:
        #    summary_writer.add_graph(model, dummy_input, True)
    else:
        summary_writer = None

    model.train()

    max_iter = len(data_loader)
    start_training_time = time.time()
    trained_time = 0
    tic = time.time()
    end = time.time()
    for iteration, (images, boxes, labels) in enumerate(data_loader):
        iteration = iteration + 1
        scheduler.step()
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        confidence, locations = model(images)
        regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict({'regression_loss': regression_loss, 'classification_loss': classification_loss})
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss = regression_loss + classification_loss
        loss.backward()
        optimizer.step()
        trained_time += time.time() - end
        end = time.time()
        if iteration % args.log_step == 0:
            eta_seconds = int((trained_time / iteration) * (max_iter - iteration))
            logger.info(
                "Iter: {:06d}, Lr: {:.5f}, Cost: {:.2f}s, Eta: {}, ".format(iteration, optimizer.param_groups[0]['lr'],
                                                                            time.time() - tic,
                                                                            str(datetime.timedelta(seconds=eta_seconds))) +
                "Loss: {:.3f}, ".format(losses_reduced.item()) +
                "Regression Loss {:.3f}, ".format(loss_dict_reduced['regression_loss'].item()) +
                "Classification Loss: {:.3f}".format(loss_dict_reduced['classification_loss'].item()))

            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar('losses/total_loss', losses_reduced.item(), global_step=global_step)
                summary_writer.add_scalar('losses/location_loss', loss_dict_reduced['regression_loss'].item(), global_step=global_step)
                summary_writer.add_scalar('losses/class_loss', loss_dict_reduced['classification_loss'].item(), global_step=global_step)
                summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step)

            tic = time.time()

        if save_to_disk and iteration % args.save_step == 0:
            model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_iteration_{:06d}.pth".format(cfg.INPUT.IMAGE_SIZE, iteration))
            _save_model(logger, model, model_path)

    if save_to_disk:
        model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE))
        _save_model(logger, model, model_path)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter))
    return model
Exemplo n.º 8
0
def do_train(cfg, model,
             data_loader,
             optimizer,
             scheduler,
             device,
             args,
             val_sets_dict=None):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training")
    model.train()
    save_to_disk = distributed_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        import tensorboardX

        summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR)
        tf_writer = tf.compat.v1.summary.FileWriter(cfg.OUTPUT_DIR)
    else:
        summary_writer = None


    if cfg.DATASETS.DG:
        dataloaders = data_loader
        max_iter = len(data_loader[0])
        dataiters = [iter(dataloader) for dataloader in dataloaders]
    else:
        max_iter = len(data_loader)
        data_loader = iter(data_loader)

    start_training_time = time.time()
    trained_time = 0
    tic = time.time()
    end = time.time()

    if args.return_best:
        best_map = 0
    
    for iteration in range(scheduler.last_epoch, max_iter):
        if cfg.DATASETS.DG:
            # domain generalization settings
            # we need to read images from different sources
            images = torch.ones(cfg.SOLVER.BATCH_SIZE * len(dataloaders), 3, cfg.INPUT.IMAGE_SIZE, cfg.INPUT.IMAGE_SIZE)

            for j in range(len(dataloaders)):
                if cfg.MODEL.SELF_SUPERVISED:
                    d_images, d_boxes, d_labels, d_j_images, d_j_index, d_orig_boxes, d_orig_labels = next(dataiters[j])
                else:
                    d_images, d_boxes, d_labels, d_orig_boxes, d_orig_labels = next(dataiters[j])

                start_bs = cfg.SOLVER.BATCH_SIZE * j
                end_bs = start_bs + cfg.SOLVER.BATCH_SIZE

                images[start_bs:end_bs, :, :, :] = d_images

                if j == 0:
                    boxes = d_boxes
                    labels = d_labels
                    orig_boxes = d_orig_boxes
                    orig_labels = d_orig_labels

                    if cfg.MODEL.SELF_SUPERVISED:
                        j_images = d_j_images
                        j_index = d_j_index
                else:
                    boxes = torch.cat((boxes, d_boxes))
                    labels = torch.cat((labels, d_labels))
                    orig_boxes = torch.cat((orig_boxes, d_orig_boxes))
                    orig_labels = torch.cat((orig_labels, d_orig_labels))

                    if cfg.MODEL.SELF_SUPERVISED:
                        j_images = torch.cat((j_images, d_j_images))
                        j_index = torch.cat((j_index, d_j_index))
        else:
            if cfg.MODEL.SELF_SUPERVISED:
                images, boxes, labels, j_images, j_index, orig_boxes, orig_labels = next(data_loader)
            else:
                images, boxes, labels, orig_boxes, orig_labels = next(data_loader)

        # it is not a problem if we increment iteration because it will be reset in the loop
        iteration = iteration + 1

        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        loss_dict = model(images, targets=(boxes, labels))
        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss = sum(loss for loss in loss_dict.values())

        # loss.backward() becomes:
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()

        if cfg.MODEL.SELF_SUPERVISED:
            j_images = j_images.to(device)
            j_index = j_index.to(device)
            loss_dict_j = model(j_images, targets=j_index, auxiliary_task=True)
            loss_dict_reduced_j = reduce_loss_dict(loss_dict_j)
            losses_reduced_j = sum(loss for loss in loss_dict_reduced_j.values())
            loss_j = sum(loss for loss in loss_dict_j.values())
            # apply reduction factor for auxiliary loss
            loss_j = loss_j * cfg.MODEL.SELF_SUPERVISOR.WEIGHT

            # loss.backward() becomes:
            with amp.scale_loss(loss_j, optimizer) as scaled_loss:
                scaled_loss.backward()

            # append this loss to the dictionary of losses
            loss_dict.update(loss_dict_j)
            losses_reduced += losses_reduced_j

        optimizer.step()
        scheduler.step()

        trained_time += time.time() - end
        end = time.time()
        if iteration % args.log_step == 0:
            eta_seconds = int((trained_time / iteration) * (max_iter - iteration))
            log_str = [
                "Iter: {:06d}, Lr: {:.5f}, Cost: {:.2f}s, Eta: {}".format(iteration,
                                                                          optimizer.param_groups[0]['lr'],
                                                                          time.time() - tic,
                                                                          str(datetime.timedelta(seconds=eta_seconds))),
                "total_loss: {:.3f}".format(losses_reduced.item())
            ]
            for loss_name, loss_item in loss_dict_reduced.items():
                log_str.append("{}: {:.3f}".format(loss_name, loss_item.item()))
            log_str = ', '.join(log_str)
            logger.info(log_str)
            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step)
                for loss_name, loss_item in loss_dict_reduced.items():
                    summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step)
                summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step)

                if cfg.MODEL.SELF_SUPERVISED:
                    _log_images_tensorboard(cfg, global_step, images, orig_boxes, orig_labels, summary_writer, j_images=j_images)
                else:
                    _log_images_tensorboard(cfg, global_step, images, orig_boxes, orig_labels, summary_writer)
                #for tag, value in model.named_parameters():
                #    tag = tag.replace('.', '/')
                #    if 'ss_classifier' in tag:
                #        print(tag, value)
                #_log_network_params(tf_writer, model, global_step)

            tic = time.time()

        if save_to_disk and iteration % args.save_step == 0:
            model_path = os.path.join(cfg.OUTPUT_DIR,
                                      "ssd{}_vgg_iteration_{:06d}.pth".format(cfg.INPUT.IMAGE_SIZE, iteration))
            save_training_checkpoint(logger, model, scheduler, optimizer, model_path)
        # Do eval when training, to trace the mAP changes and see whether or not performance improved
        # if args.return_best = True the model returned should be the one that gave best performances on the val set
        if args.eval_step > 0 and iteration % args.eval_step == 0 and (not iteration == max_iter or args.return_best):
            dataset_metrics = do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed, datasets_dict=val_sets_dict)
            
            model.train()

            if args.distributed and not distributed_util.is_main_process():
                continue
            
            avg_map = _compute_avg_map(dataset_metrics)
            
            if args.return_best:

                if avg_map > best_map:
                    best_map = avg_map
                    logger.info("With iteration {} passed the best! New best avg map: {:4f}".format(iteration, best_map))
                    model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_best.pth".format(cfg.INPUT.IMAGE_SIZE))
                    _save_model(logger, model, model_path)
                else:
                    logger.info("With iteration {} the best has not been reached. Best avg map: {:4f}, Current avg mAP: {:4f}".format(iteration, best_map, avg_map))
                
            # logging
            if summary_writer:
                global_step = iteration

                summary_writer.add_scalar("val_avg_map", avg_map, global_step=global_step)

                for dataset_name, metrics in dataset_metrics.items():
                    for metric_name, metric_value in metrics.get_printable_metrics().items():
                        summary_writer.add_scalar('/'.join(['val', dataset_name, metric_name]), metric_value,
                                                  global_step=global_step)

    if save_to_disk:
        model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE))
        _save_model(logger, model, model_path)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter))
    if args.return_best:
        model.load(os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_best.pth".format(cfg.INPUT.IMAGE_SIZE)))
    return model
Exemplo n.º 9
0
def do_train(cfg,
             model,
             data_loader,
             optimizer,
             scheduler,
             device,
             args,
             resume_iteration=0):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training")
    model.train()
    save_to_disk = distributed_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        import tensorboardX

        summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR)
    else:
        summary_writer = None

    max_iter = len(data_loader)
    start_training_time = time.time()
    trained_time = 0
    tic = time.time()
    end = time.time()
    import numpy as np
    count_useful_iteration = 0
    count_not_useful_iteration = 0
    for iteration, (images, quads, labels,
                    score_map) in enumerate(data_loader):
        #size infoes
        #shape(boxes):[2, 24564, 8]
        #shape(labels):[2, 24564]
        #shape(images):[2, 3, 512, 512]
        #print('iteration:',iteration)
        if args.resume:
            iteration = resume_iteration + iteration
        else:
            iteration = iteration + 1

        if save_to_disk and iteration % args.save_step == 0:
            model_path = os.path.join(
                cfg.OUTPUT_DIR, "ssd{}_vgg_iteration_{:06d}.pth".format(
                    cfg.INPUT.IMAGE_SIZE, iteration))
            _save_model(logger, model, model_path, iteration)
        scheduler.step()
        # labels_temp = labels.numpy()
        # labels_temp = np.squeeze(labels_temp,0)
        # index = np.squeeze(np.argwhere(labels_temp == 1),1)
        # print('index:',index)
        # current_quad = quads[0,index,:]
        # print(current_quad)
        # temp_img = images.numpy()[0]
        # temp_img = np.swapaxes(temp_img, 0, 1)
        # temp_img = np.swapaxes(temp_img, 1, 2)
        # for i in range(np.shape(current_quad)[0]):
        #     cv2.circle(temp_img, (int(current_quad[i][0]), int(current_quad[i][1])), 5, (0, 255, 0), 5)
        #     cv2.circle(temp_img, (int(current_quad[i][2]), int(current_quad[i][3])), 5, (255, 255, 255), 5)
        #     cv2.circle(temp_img, (int(current_quad[i][4]), int(current_quad[i][5])), 5, (255, 0, 0), 5)
        #     cv2.circle(temp_img, (int(current_quad[i][6]), int(current_quad[i][7])), 5, (0, 0, 255), 5)
        # cv2.imshow('img', temp_img.astype(np.uint8))
        # cv2.waitKey()
        if len(quads) == 0:
            print('quads is None')
            continue
        images = images.to(device)
        quads = quads.to(device)

        labels = labels.to(device)

        num_pos = torch.sum(labels)
        if num_pos == 0:
            count_not_useful_iteration += 1
            print('num_pos==0 and no pos sample found')
            continue
        else:
            # print(num_pos)
            count_useful_iteration += 1
        optimizer.zero_grad()
        if score_map is None:
            loss_dict = model(images, targets=(quads, labels))
        else:
            score_map = score_map.to(device)
            loss_dict = model(images, (quads, labels), score_map)

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss = sum(loss for loss in loss_dict.values())
        loss.backward()
        optimizer.step()
        trained_time += time.time() - end
        end = time.time()
        if iteration % args.log_step == 0:
            eta_seconds = int(
                (trained_time / iteration) * (max_iter - iteration))
            log_str = [
                "Iter: {:06d}, Lr: {:.7f}, Cost: {:.2f}s, Eta: {}".format(
                    iteration, optimizer.param_groups[0]['lr'],
                    time.time() - tic,
                    str(datetime.timedelta(seconds=eta_seconds))),
                "total_loss: {:.3f}".format(losses_reduced.item())
            ]
            log_str.append("{}: {:.3f}".format(
                'regression_loss', loss_dict_reduced['regression_loss']))
            log_str.append("{}: {:.6f}".format(
                'classification_loss',
                loss_dict_reduced['classification_loss']))
            log_str.append("{}: {:.5f}".format('fcn_loss',
                                               loss_dict_reduced['fcn_loss']))

            log_str = ', '.join(log_str)
            logger.info(log_str)
            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar('losses/total_loss',
                                          losses_reduced,
                                          global_step=global_step)
                for loss_name, loss_item in loss_dict_reduced.items():
                    summary_writer.add_scalar('losses/{}'.format(loss_name),
                                              loss_item,
                                              global_step=global_step)
                summary_writer.add_scalar('lr',
                                          optimizer.param_groups[0]['lr'],
                                          global_step=global_step)

            tic = time.time()
        # Do eval when training, to trace the mAP changes and see performance improved whether or nor
        # if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
        #     do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)
        #     model.train()

    # if save_to_disk:
    #     model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE))
    #     _save_model(logger, model, model_path)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / max_iter))
    with open('useful_iteration.txt', 'w') as f:
        f.write('count_useful_iteration:' + str(count_useful_iteration) + '\n')
        f.write('count_not_useful_iteration:' +
                str(count_not_useful_iteration))

    return model
def main():
    parser = argparse.ArgumentParser(
        description='ssd_fcn_multitask_text_detectior training with pytorch')
    parser.add_argument(
        "--config_file",
        default="./configs/icdar2015_incidental_scene_text_512.yaml",
        metavar="FILE",
        help="path to config file",
        type=str)
    # parser.add_argument("--config_file",default="./configs/synthtext.yaml",metavar="FILE",help="path to config file",type=str)
    parser.add_argument(
        '--vgg',
        default='./pretrained_on_imgnet/vgg16_reducedfc.pth',
        help=
        'Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth'
    )
    parser.add_argument(
        '--resume',
        default=
        "/home/binchengxiong/ssd_fcn_multitask_text_detection_pytorch1.0/output/ssd512_vgg_iteration_043000.pth",
        type=str,
        help='Checkpoint state_dict file to resume training from')
    parser.add_argument('--log_step',
                        default=50,
                        type=int,
                        help='Print logs every log_step')
    parser.add_argument('--save_step',
                        default=1000,
                        type=int,
                        help='Save checkpoint every save_step')
    parser.add_argument(
        '--eval_step',
        default=5000,
        type=int,
        help='Evaluate dataset every eval_step, disabled when eval_step < 0')
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument("--skip-test",
                        default=True,
                        dest="skip_test",
                        help="Do not test the final model",
                        action="store_true")
    parser.add_argument("opts",
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)
    args = parser.parse_args()
    num_gpus = 1
    args.num_gpus = num_gpus
    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    #freeze方法可以防止参数被后续进一步修改,ref:https://github.com/rbgirshick/yacs
    cfg.freeze()

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args)
Exemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser(
        description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        '--vgg',
        help=
        'Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth'
    )
    parser.add_argument(
        '--resume',
        default=None,
        type=str,
        help='Checkpoint state_dict file to resume training from')
    parser.add_argument('--log_step',
                        default=50,
                        type=int,
                        help='Print logs every log_step')
    parser.add_argument('--save_step',
                        default=5000,
                        type=int,
                        help='Save checkpoint every save_step')
    parser.add_argument(
        '--eval_step',
        default=0,
        type=int,
        help=
        'Evaluate dataset every eval_step, disabled when eval_step <= 0. Default: disabled'
    )
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument("--num_workers",
                        default=4,
                        type=int,
                        help="Number of workers to use for data loaders")
    parser.add_argument(
        "--eval_mode",
        default="test",
        type=str,
        help=
        'Use defined test datasets for periodic evaluation or use a validation split. Default: "test", alternative "val"'
    )
    parser.add_argument(
        "--return_best",
        default=False,
        type=str2bool,
        help=
        "If false (default) tests on the target the last model. If true tests on the target the model with the best performance on the validation set"
    )
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    if not os.path.exists(cfg.OUTPUT_DIR):
        if not args.distributed or (args.distributed
                                    and distributed_util.is_main_process()):
            os.makedirs(cfg.OUTPUT_DIR)

    model = train(cfg, args)

    if not args.skip_test:
        logger.info('Start evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)