def worker(rank, args, world, model, state): 'Per-device distributed worker' print('##########rank:', rank) #import pdb;pdb.set_trace() if torch.cuda.is_available(): os.environ.update({ 'MASTER_PORT': args.master.split(':')[-1], 'MASTER_ADDR': ':'.join(args.master.split(':')[:-1]), 'WORLD_SIZE': str(world), 'RANK': str(rank), 'CUDA_DEVICE': str(rank) }) torch.cuda.set_device(rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') if args.batch % world != 0: raise RuntimeError( 'Batch size should be a multiple of the number of GPUs') if args.command == 'infer': if model is None: #junl if rank == 0: print('Loading CUDA engine from {}...'.format( os.path.basename(cfg.MODEL.WEIGHT))) print('cfg.MODEL.WEIGHT', cfg.MODEL.WEIGHT) model = Engine.load(cfg.MODEL.WEIGHT) #print(' resize:',args.resize) print('max_size:', args.max_size) # print("inferring {}".format(os.path.basename(video))) # anno_path = args.images +'annotations/' + os.path.basename(video) + '.txt' infer.infer(model, args.images, args.output, args.resize, args.max_size, args.batch, args.deepsort_config, original_annotations=args.annotations, mixed_precision=not args.full_precision, is_master=(rank == 0), world=world, use_dali=args.with_dali, verbose=False, save_images=args.save_images, output_path=args.images + '-results/')
def worker(rank, args, world, model, state): 'Per-device distributed worker' if torch.cuda.is_available(): os.environ.update({ 'MASTER_PORT': args.master.split(':')[-1], 'MASTER_ADDR': ':'.join(args.master.split(':')[:-1]), 'WORLD_SIZE': str(world), 'RANK': str(rank), 'CUDA_DEVICE': str(rank) }) torch.cuda.set_device(rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') if args.batch % world != 0: raise RuntimeError('Batch size should be a multiple of the number of GPUs') if args.command == 'train': train.train(model, state, args.images, args.annotations, args.val_images or args.images, args.val_annotations, args.resize, args.max_size, args.jitter, args.batch, int(args.iters * args.schedule), args.val_iters, not args.full_precision, args.lr, args.warmup, [int(m * args.schedule) for m in args.milestones], args.gamma, is_master=(rank == 0), world=world, use_dali=args.with_dali, metrics_url=args.post_metrics, logdir=args.logdir, verbose=(rank == 0)) elif args.command == 'infer': if model is None: if rank == 0: print('Loading CUDA engine from {}...'.format(os.path.basename(args.model))) model = Engine.load(args.model) infer.infer(model, args.images, args.output, args.resize, args.max_size, args.batch, annotations=args.annotations, mixed_precision=not args.full_precision, is_master=(rank == 0), world=world, use_dali=args.with_dali, verbose=(rank == 0)) elif args.command == 'export': onnx_only = args.export.split('.')[-1] == 'onnx' input_size = args.size * 2 if len(args.size) == 1 else args.size calibration_files = [] if args.int8: # Get list of images to use for calibration if os.path.isdir(args.calibration_images): import glob file_extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG'] for ex in file_extensions: calibration_files += glob.glob("{}/*{}".format(args.calibration_images, ex), recursive=True) # Only need enough images for specified num of calibration batches if len(calibration_files) >= args.calibration_batches * args.batch: calibration_files = calibration_files[:(args.calibration_batches * args.batch)] else: print('Only found enough images for {} batches. Continuing anyway...'.format(len(calibration_files) // args.batch)) random.shuffle(calibration_files) precision = "FP32" if args.int8: precision = "INT8" elif not args.full_precision: precision = "FP16" exported = model.export(input_size, args.batch, precision, calibration_files, args.calibration_table, args.verbose, onnx_only=onnx_only) if onnx_only: with open(args.export, 'wb') as out: out.write(exported) else: exported.save(args.export)
def get_model_trt(path): return Engine.load(path)