def __init__(self, cfg): """ Args: cfg (CfgNode): configs. Details can be found in tsn/config/defaults.py gpu_id (Optional[int]): GPU id. """ if cfg.NUM_GPUS > 0: device = get_device(local_rank=get_local_rank()) else: device = get_device() # Build the video model and print model statistics. self.model = build_model(cfg, device) self.model.eval() self.transform = build_transform(cfg, is_train=False) self.cfg = cfg self.device = device
def train(cfg): # Set up environment. init_distributed_training(cfg) local_rank_id = get_local_rank() # Set random seed from configs. np.random.seed(cfg.RNG_SEED + 10 * local_rank_id) torch.manual_seed(cfg.RNG_SEED + 10 * local_rank_id) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) logger.info('init start') # 迭代轮数从1开始计数 arguments = {"cur_epoch": 1} device = get_device(local_rank_id) model = build_recognizer(cfg, device) criterion = build_criterion(cfg, device) optimizer = build_optimizer(cfg, model) lr_scheduler = build_lr_scheduler(cfg, optimizer) checkpointer = CheckPointer(model, optimizer=optimizer, scheduler=lr_scheduler, save_dir=cfg.OUTPUT_DIR, save_to_disk=True) if cfg.TRAIN.RESUME: logger.info('resume start') extra_checkpoint_data = checkpointer.load(map_location=device) if isinstance(extra_checkpoint_data, dict): arguments['cur_epoch'] = extra_checkpoint_data['cur_epoch'] if cfg.LR_SCHEDULER.IS_WARMUP: logger.info('warmup start') if lr_scheduler.finished: optimizer.load_state_dict( lr_scheduler.after_scheduler.optimizer.state_dict()) else: optimizer.load_state_dict( lr_scheduler.optimizer.state_dict()) lr_scheduler.optimizer = optimizer lr_scheduler.after_scheduler.optimizer = optimizer logger.info('warmup end') logger.info('resume end') data_loader = build_dataloader(cfg, is_train=True) logger.info('init end') synchronize() do_train(cfg, arguments, data_loader, model, criterion, optimizer, lr_scheduler, checkpointer, device)
def test(cfg): # Set up environment. init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = True device = get_device(local_rank=get_local_rank()) model = build_recognizer(cfg, device=device) synchronize() do_evaluation(cfg, model, device)
def main(): global frame_queue, camera, frame, results, threshold, sample_length, \ data, test_transform, model, device, average_size, label, result_queue, \ frame_interval args = parse_test_args() cfg = load_test_config(args) average_size = 1 threshold = 0.5 np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = True device = get_device(local_rank=get_local_rank()) model = build_model(cfg, device) model.eval() camera = cv2.VideoCapture(cfg.VISUALIZATION.INPUT_VIDEO) with open(cfg.VISUALIZATION.LABEL_FILE_PATH, 'r') as f: label = [line.strip().split(' ')[1] for line in f] # prepare test pipeline from non-camera pipeline test_transform = build_transform(cfg, is_train=False) sample_length = cfg.DATASETS.CLIP_LEN * cfg.DATASETS.NUM_CLIPS * cfg.DATASETS.FRAME_INTERVAL frame_interval = cfg.DATASETS.FRAME_INTERVAL assert sample_length > 0 try: frame_queue = deque(maxlen=sample_length) result_queue = deque(maxlen=1) pw = Thread(target=show_results, args=(), daemon=True) pr = Thread(target=inference, args=(), daemon=True) pw.start() pr.start() while True: if not pw.is_alive(): exit(0) except KeyboardInterrupt: pass
def main(data_shape, config_file, mobile_name): cfg.merge_from_file(config_file) device = get_device(local_rank=get_local_rank()) model = build_recognizer(cfg, device) data = torch.randn(data_shape).to(device=device, non_blocking=True) GFlops, params_size = compute_num_flops(model, data) print(f'{mobile_name} ' + '*' * 10) print(f'device: {device}') print(f'GFlops: {GFlops}') print(f'Params Size: {params_size}') total_time = 0.0 num = 100 for i in range(num): data = torch.randn(data_shape).to(device=device, non_blocking=True) start = time.time() model(data) total_time = time.time() - start print(f'one process need {total_time / num}')
def main(data_shape, config_file, mobile_name): cfg.merge_from_file(config_file) device = get_device(local_rank=get_local_rank()) model = build_recognizer(cfg, device) model.eval() data = torch.randn(data_shape).to(device=device, non_blocking=True) GFlops, params_size = compute_num_flops(model, data) print(f'{mobile_name} ' + '*' * 10) print(f'device: {device}') print(f'GFlops: {GFlops}') print(f'Params Size: {params_size}') data = torch.randn(data_shape) t1 = 0.0 num = 100 begin = time.time() for i in range(num): start = time.time() model(data.to(device=device, non_blocking=True)) t1 += time.time() - start t2 = time.time() - begin print(f'one process need {t2 / num}, model compute need: {t1 / num}')
def build_backbone(cfg): device = get_device(local_rank=get_local_rank()) return registry.BACKBONE[cfg.MODEL.BACKBONE.NAME](cfg, map_location=device)
def _resnet(arch, cfg, block_layer): pretrained2d = cfg.MODEL.BACKBONE.TORCHVISION_PRETRAINED state_dict_2d = None if pretrained2d: device = get_device(local_rank=get_local_rank()) state_dict_2d = _load_pretrained(arch, map_location=device) conv_layer = get_conv(cfg.MODEL.CONV_LAYER) pool_layer = get_pool(cfg.MODEL.POOL_LAYER) norm_layer = get_norm(cfg.MODEL.NORM_LAYER) act_layer = get_act(cfg.MODEL.ACT_LAYER) model = ResNet3d( # 输入通道数 in_channels=cfg.MODEL.BACKBONE.IN_CHANNELS, # Stem通道数 base_channel=cfg.MODEL.BACKBONE.BASE_CHANNEL, # 第一个卷积层kernel_size conv1_kernel=cfg.MODEL.BACKBONE.CONV1_KERNEL, # 第一个卷积层步长 conv1_stride=cfg.MODEL.BACKBONE.CONV1_STRIDE, # 第一个卷积层零填充 conv1_padding=cfg.MODEL.BACKBONE.CONV1_PADDING, # 是否使用第一个池化层 with_pool1=cfg.MODEL.BACKBONE.WITH_POOL1, # 第一个池化层kernel_size pool1_kernel=cfg.MODEL.BACKBONE.POOL1_KERNEL, # 第一个池化层步长 pool1_stride=cfg.MODEL.BACKBONE.POOL1_STRIDE, # 是否使用第二个池化层 with_pool2=cfg.MODEL.BACKBONE.WITH_POOL2, # 第二个池化层kernel_size pool2_kernel=cfg.MODEL.BACKBONE.POOL2_KERNEL, # 第二个池化层步长 pool2_stride=cfg.MODEL.BACKBONE.POOL2_STRIDE, # 各层块个数,以R50为例 stage_blocks=cfg.MODEL.BACKBONE.STAGE_BLOCKS, # 各层Block第一个卷积层的输出通道数 res_planes=cfg.MODEL.BACKBONE.RES_PLANES, # 膨胀系数,以Bottleneck为例 expansion=cfg.MODEL.BACKBONE.EXPANSION, # 空间步长 spatial_strides=cfg.MODEL.BACKBONE.SPATIAL_STRIDES, # 是否进行膨胀 inflates=cfg.MODEL.BACKBONE.INFLATES, # 膨胀类型 inflate_style=cfg.MODEL.BACKBONE.INFLATE_STYLE, # 卷积层类型 conv_layer=conv_layer, # 池化层类型 pool_layer=pool_layer, # 归一化层类型 norm_layer=norm_layer, # 激活层类型 act_layer=act_layer, # 块类型 block_layer=block_layer, # 是否进行残差分支零初始化 zero_init_residual=cfg.MODEL.BACKBONE.ZERO_INIT_RESIDUAL, # 是否加载预训练模型 state_dict_2d=state_dict_2d, # 是否进行partialBN partial_bn=cfg.MODEL.BACKBONE.PARTIAL_BN) return model