Пример #1
0
def worker(rank, args, world, model, state):
    'Per-device distributed worker'
    print('##########rank:', rank)
    #import pdb;pdb.set_trace()
    if torch.cuda.is_available():
        os.environ.update({
            'MASTER_PORT': args.master.split(':')[-1],
            'MASTER_ADDR': ':'.join(args.master.split(':')[:-1]),
            'WORLD_SIZE': str(world),
            'RANK': str(rank),
            'CUDA_DEVICE': str(rank)
        })

        torch.cuda.set_device(rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')

        if args.batch % world != 0:
            raise RuntimeError(
                'Batch size should be a multiple of the number of GPUs')

    if args.command == 'infer':
        if model is None:  #junl
            if rank == 0:
                print('Loading CUDA engine from {}...'.format(
                    os.path.basename(cfg.MODEL.WEIGHT)))
            print('cfg.MODEL.WEIGHT', cfg.MODEL.WEIGHT)
            model = Engine.load(cfg.MODEL.WEIGHT)
        #print('  resize:',args.resize)
        print('max_size:', args.max_size)

        # print("inferring {}".format(os.path.basename(video)))
        # anno_path = args.images +'annotations/' + os.path.basename(video) + '.txt'
        infer.infer(model,
                    args.images,
                    args.output,
                    args.resize,
                    args.max_size,
                    args.batch,
                    args.deepsort_config,
                    original_annotations=args.annotations,
                    mixed_precision=not args.full_precision,
                    is_master=(rank == 0),
                    world=world,
                    use_dali=args.with_dali,
                    verbose=False,
                    save_images=args.save_images,
                    output_path=args.images + '-results/')
Пример #2
0
def worker(rank, args, world, model, state):
    'Per-device distributed worker'

    if torch.cuda.is_available():
        os.environ.update({
            'MASTER_PORT': args.master.split(':')[-1],
            'MASTER_ADDR': ':'.join(args.master.split(':')[:-1]),
            'WORLD_SIZE':  str(world),
            'RANK':        str(rank),
            'CUDA_DEVICE': str(rank)
        })

        torch.cuda.set_device(rank)
        torch.distributed.init_process_group(backend='nccl', init_method='env://')

        if args.batch % world != 0:
            raise RuntimeError('Batch size should be a multiple of the number of GPUs')

    if args.command == 'train':
        train.train(model, state, args.images, args.annotations,
            args.val_images or args.images, args.val_annotations, args.resize, args.max_size, args.jitter, 
            args.batch, int(args.iters * args.schedule), args.val_iters, not args.full_precision, args.lr, 
            args.warmup, [int(m * args.schedule) for m in args.milestones], args.gamma, 
            is_master=(rank == 0), world=world, use_dali=args.with_dali,
            metrics_url=args.post_metrics, logdir=args.logdir, verbose=(rank == 0))

    elif args.command == 'infer':
        if model is None:
            if rank == 0: print('Loading CUDA engine from {}...'.format(os.path.basename(args.model)))
            model = Engine.load(args.model)

        infer.infer(model, args.images, args.output, args.resize, args.max_size, args.batch,
            annotations=args.annotations, mixed_precision=not args.full_precision,
            is_master=(rank == 0), world=world, use_dali=args.with_dali, verbose=(rank == 0))

    elif args.command == 'export':
        onnx_only = args.export.split('.')[-1] == 'onnx'
        input_size = args.size * 2 if len(args.size) == 1 else args.size

        calibration_files = []
        if args.int8:
            # Get list of images to use for calibration
            if os.path.isdir(args.calibration_images):
                import glob
                file_extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']
                for ex in file_extensions:
                    calibration_files += glob.glob("{}/*{}".format(args.calibration_images, ex), recursive=True)
                # Only need enough images for specified num of calibration batches
                if len(calibration_files) >= args.calibration_batches * args.batch:
                    calibration_files = calibration_files[:(args.calibration_batches * args.batch)]
                else:
                    print('Only found enough images for {} batches. Continuing anyway...'.format(len(calibration_files) // args.batch))

                random.shuffle(calibration_files)

        precision = "FP32"
        if args.int8:
            precision = "INT8"
        elif not args.full_precision:
            precision = "FP16"

        exported = model.export(input_size, args.batch, precision, calibration_files, args.calibration_table, args.verbose, onnx_only=onnx_only)
        if onnx_only:
            with open(args.export, 'wb') as out:
                out.write(exported)
        else:
            exported.save(args.export)
Пример #3
0
def get_model_trt(path):
    return Engine.load(path)