def __init__(self, cfg): #Slowfast constructor """ The `__init__` method of any subclass should also contain these arguments. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ super(SlowFast, self).__init__() #Standard line at the beginning of every NN definition. super(Classname, self).__init__() self.norm_module = get_norm(cfg) #The normalization used in our case is usually batch normalization. Normalizing the values within a certain range ([0, 1]) can help #Speed up the convergence. Batch normalization does this for values in the hidden layer as well as the input layer. It basically allows each layer in a #deep network to learn independently. Watch : https://www.youtube.com/watch?v=nUUqwaxLnWs self.enable_detection = cfg.DETECTION.ENABLE self.num_pathways = 3 #The number of pathways in the architecture, in this case we have 2 pathways, slow and fast. self._construct_network(cfg) #Function call init_helper.init_weights(self, cfg.MODEL.FC_INIT_STD, cfg.RESNET.ZERO_INIT_FINAL_BN)
def __init__(self, cfg): """ The `__init__` method of any subclass should also contain these arguments. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ super(X3D, self).__init__() self.norm_module = get_norm(cfg) self.enable_detection = cfg.DETECTION.ENABLE self.num_pathways = 1 exp_stage = 2.0 self.dim_c1 = cfg.X3D.DIM_C1 self.dim_res2 = (round_width(self.dim_c1, exp_stage, divisor=8) if cfg.X3D.SCALE_RES2 else self.dim_c1) self.dim_res3 = round_width(self.dim_res2, exp_stage, divisor=8) self.dim_res4 = round_width(self.dim_res3, exp_stage, divisor=8) self.dim_res5 = round_width(self.dim_res4, exp_stage, divisor=8) self.block_basis = [ # blocks, c, stride [1, self.dim_res2, 2], [2, self.dim_res3, 2], [5, self.dim_res4, 2], [3, self.dim_res5, 2], ] self._construct_network(cfg) init_helper.init_weights(self, cfg.MODEL.FC_INIT_STD, cfg.RESNET.ZERO_INIT_FINAL_BN)
def __init__(self, cn, num_classes, dropout_rate, debug_outputs): super(M_resnet, self).__init__() self.norm_module = get_norm(cn) self.enable_detection = cn.DETECTION.ENABLE self.num_pathways = 1 self._construct_network(cn) self.head = Head_fullframe_c2d_1x1(cn, num_classes, dropout_rate, debug_outputs)
def __init__(self, cfg): super(X3D, self).__init__() self.norm_module = get_norm(cfg) self.enable_detection = cfg.DETECTION.ENABLE self.num_pathways = 1 self._construct_network(cfg) init_helper.init_weights( self, cfg.MODEL.FC_INIT_STD, cfg.RESNET.ZERO_INIT_FINAL_BN )
def _construct_network(self, cfg): """ Builds a X3D model. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ # Params from configs. norm_module = get_norm(cfg) temp_kernel = _TEMPORAL_KERNEL_BASIS[cfg.MODEL.ARCH] self.model = create_x3d( # Input clip configs. input_channel=cfg.DATA.INPUT_CHANNEL_NUM[0], input_clip_length=cfg.DATA.NUM_FRAMES, input_crop_size=cfg.DATA.TRAIN_CROP_SIZE, # Model configs. model_num_class=cfg.MODEL.NUM_CLASSES, dropout_rate=cfg.MODEL.DROPOUT_RATE, width_factor=cfg.X3D.WIDTH_FACTOR, depth_factor=cfg.X3D.DEPTH_FACTOR, # Normalization configs. norm=norm_module, norm_eps=1e-5, norm_momentum=0.1, # Activation configs. activation=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU), # Stem configs. stem_dim_in=cfg.X3D.DIM_C1, stem_conv_kernel_size=(temp_kernel[0][0][0], 3, 3), stem_conv_stride=(1, 2, 2), # Stage configs. stage_conv_kernel_size=( (temp_kernel[1][0][0], 3, 3), (temp_kernel[2][0][0], 3, 3), (temp_kernel[3][0][0], 3, 3), (temp_kernel[4][0][0], 3, 3), ), stage_spatial_stride=(2, 2, 2, 2), stage_temporal_stride=(1, 1, 1, 1), bottleneck=create_x3d_bottleneck_block, bottleneck_factor=cfg.X3D.BOTTLENECK_FACTOR, se_ratio=0.0625, inner_act=Swish, # Head configs. head_dim_out=cfg.X3D.DIM_C5, head_pool_act=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU), head_bn_lin5_on=cfg.X3D.BN_LIN5, head_activation=None, head_output_with_global_average=False, ) self.post_act = get_head_act(cfg.MODEL.HEAD_ACT)
def __init__(self, cfg): """ The `__init__` method of any subclass should also contain these arguments. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ super(SlowFast, self).__init__() self.norm_module = get_norm(cfg) self.num_pathways = 2 self._construct_network(cfg) init_helper.init_weights(self, cfg.MODEL.FC_INIT_STD, cfg.RESNET.ZERO_INIT_FINAL_BN)
def __init__(self, cfg): """ The `__init__` method of any subclass should also contain these arguments. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ super().__init__() self.num_pathways = 2 self.norm_module = get_norm(cfg) self._construct_network(cfg) print("Created SlowFastShuffleNetV2 version {}".format( cfg.SHUFFLENET.COMPLEXITY))
def _construct_network(self, cfg): """ Builds a single pathway ResNet model. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ # Params from configs. norm_module = get_norm(cfg) self.model = create_csn( # Input clip configs. input_channel=cfg.DATA.INPUT_CHANNEL_NUM[0], # Model configs. model_depth=cfg.RESNET.DEPTH, model_num_class=cfg.MODEL.NUM_CLASSES, dropout_rate=cfg.MODEL.DROPOUT_RATE, # Normalization configs. norm=norm_module, # Activation configs. activation=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU), # Stem configs. stem_dim_out=cfg.RESNET.WIDTH_PER_GROUP, stem_conv_kernel_size=(3, 7, 7), stem_conv_stride=(1, 2, 2), stem_pool=nn.MaxPool3d, stem_pool_kernel_size=(1, 3, 3), stem_pool_stride=(1, 2, 2), # Stage configs. stage_conv_a_kernel_size=(1, 1, 1), stage_conv_b_kernel_size=(3, 3, 3), stage_conv_b_width_per_group=1, stage_spatial_stride=(1, 2, 2, 2), stage_temporal_stride=(1, 2, 2, 2), bottleneck=create_bottleneck_block, # Head configs. head_pool=nn.AvgPool3d, head_pool_kernel_size=( cfg.DATA.NUM_FRAMES // 8, cfg.DATA.TRAIN_CROP_SIZE // 32, cfg.DATA.TRAIN_CROP_SIZE // 32, ), head_activation=None, head_output_with_global_average=False, ) self.post_act = get_head_act(cfg.MODEL.HEAD_ACT)
def __init__(self, cfg): """ The `__init__` method of any subclass should also contain these arguments. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ self.temp_crops = cfg.SWAV_nmb_frame_views self.spat_crops_list = cfg.SWAV_nmb_crops self.spat_crops_size = cfg.SWAV_size_crops self.shuffle = cfg.SWAV_shuffle super(SlowFastSWAV, self).__init__() self.norm_module = get_norm(cfg) self.enable_detection = cfg.DETECTION.ENABLE self.num_pathways = 2 self._construct_network(cfg) init_helper.init_weights(self, cfg.MODEL.FC_INIT_STD, cfg.RESNET.ZERO_INIT_FINAL_BN)
def _construct_network(self, cfg): """ Builds a single pathway ResNet model. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ # Params from configs. norm_module = get_norm(cfg) head_act = get_head_act(cfg.MODEL.HEAD_ACT) pool_size = _POOL1[cfg.MODEL.ARCH] num_groups = cfg.RESNET.NUM_GROUPS spatial_dilations = cfg.RESNET.SPATIAL_DILATIONS spatial_strides = cfg.RESNET.SPATIAL_STRIDES temp_kernel = _TEMPORAL_KERNEL_BASIS[cfg.MODEL.ARCH] stage1_pool = pool_size[0][0] != 1 or len(set(pool_size[0])) > 1 stage_spatial_stride = ( spatial_strides[0][0], spatial_strides[1][0], spatial_strides[2][0], spatial_strides[3][0], ) if cfg.MODEL.ARCH == "i3d": stage_conv_a_kernel_size = ( (3, 1, 1), [(3, 1, 1), (1, 1, 1)], [(3, 1, 1), (1, 1, 1)], [(1, 1, 1), (3, 1, 1)], ) else: stage_conv_a_kernel_size = ( (temp_kernel[1][0][0], 1, 1), (temp_kernel[2][0][0], 1, 1), (temp_kernel[3][0][0], 1, 1), (temp_kernel[4][0][0], 1, 1), ) self.model = create_resnet( # Input clip configs. input_channel=cfg.DATA.INPUT_CHANNEL_NUM[0], # Model configs. model_depth=cfg.RESNET.DEPTH, model_num_class=cfg.MODEL.NUM_CLASSES, dropout_rate=cfg.MODEL.DROPOUT_RATE, # Normalization configs. norm=norm_module, # Activation configs. activation=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU), # Stem configs. stem_dim_out=cfg.RESNET.WIDTH_PER_GROUP, stem_conv_kernel_size=(temp_kernel[0][0][0], 7, 7), stem_conv_stride=(1, 2, 2), stem_pool=nn.MaxPool3d, stem_pool_kernel_size=(1, 3, 3), stem_pool_stride=(1, 2, 2), # Stage configs. stage1_pool=nn.MaxPool3d if stage1_pool else None, stage1_pool_kernel_size=pool_size[0], stage_conv_a_kernel_size=stage_conv_a_kernel_size, stage_conv_b_kernel_size=( (1, 3, 3), (1, 3, 3), (1, 3, 3), (1, 3, 3), ), stage_conv_b_num_groups=( num_groups, num_groups, num_groups, num_groups, ), stage_conv_b_dilation=( (1, spatial_dilations[0][0], spatial_dilations[0][0]), (1, spatial_dilations[1][0], spatial_dilations[1][0]), (1, spatial_dilations[2][0], spatial_dilations[2][0]), (1, spatial_dilations[3][0], spatial_dilations[3][0]), ), stage_spatial_h_stride=stage_spatial_stride, stage_spatial_w_stride=stage_spatial_stride, stage_temporal_stride=(1, 1, 1, 1), bottleneck=create_bottleneck_block, # Head configs. head_pool=nn.AvgPool3d, head_pool_kernel_size=( cfg.DATA.NUM_FRAMES // pool_size[0][0], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[0][1], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[0][2], ), head_activation=None, head_output_with_global_average=False, ) self.post_act = head_act
def _construct_network(self, cfg): """ Builds a SlowFast model. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ # Params from configs. norm_module = get_norm(cfg) pool_size = _POOL1[cfg.MODEL.ARCH] num_groups = cfg.RESNET.NUM_GROUPS width_per_group = cfg.RESNET.WIDTH_PER_GROUP spatial_dilations = cfg.RESNET.SPATIAL_DILATIONS spatial_strides = cfg.RESNET.SPATIAL_STRIDES temp_kernel = _TEMPORAL_KERNEL_BASIS[cfg.MODEL.ARCH] self.model = create_slowfast( # SlowFast configs. slowfast_channel_reduction_ratio=cfg.SLOWFAST.BETA_INV, slowfast_conv_channel_fusion_ratio=cfg.SLOWFAST. FUSION_CONV_CHANNEL_RATIO, slowfast_fusion_conv_kernel_size=( cfg.SLOWFAST.FUSION_KERNEL_SZ, 1, 1, ), slowfast_fusion_conv_stride=(cfg.SLOWFAST.ALPHA, 1, 1), # Input clip configs. input_channels=cfg.DATA.INPUT_CHANNEL_NUM, # Model configs. model_depth=cfg.RESNET.DEPTH, model_num_class=cfg.MODEL.NUM_CLASSES, dropout_rate=cfg.MODEL.DROPOUT_RATE, # Normalization configs. norm=norm_module, # Activation configs. activation=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU), # Stem configs. stem_dim_outs=( width_per_group, width_per_group // cfg.SLOWFAST.BETA_INV, ), stem_conv_kernel_sizes=( (temp_kernel[0][0][0], 7, 7), (temp_kernel[0][1][0], 7, 7), ), stem_conv_strides=((1, 2, 2), (1, 2, 2)), stem_pool=nn.MaxPool3d, stem_pool_kernel_sizes=((1, 3, 3), (1, 3, 3)), stem_pool_strides=((1, 2, 2), (1, 2, 2)), # Stage configs. stage_conv_a_kernel_sizes=( ( (temp_kernel[1][0][0], 1, 1), (temp_kernel[2][0][0], 1, 1), (temp_kernel[3][0][0], 1, 1), (temp_kernel[4][0][0], 1, 1), ), ( (temp_kernel[1][1][0], 1, 1), (temp_kernel[2][1][0], 1, 1), (temp_kernel[3][1][0], 1, 1), (temp_kernel[4][1][0], 1, 1), ), ), stage_conv_b_kernel_sizes=( ((1, 3, 3), (1, 3, 3), (1, 3, 3), (1, 3, 3)), ((1, 3, 3), (1, 3, 3), (1, 3, 3), (1, 3, 3)), ), stage_conv_b_num_groups=( (num_groups, num_groups, num_groups, num_groups), (num_groups, num_groups, num_groups, num_groups), ), stage_conv_b_dilations=( ( (1, spatial_dilations[0][0], spatial_dilations[0][0]), (1, spatial_dilations[1][0], spatial_dilations[1][0]), (1, spatial_dilations[2][0], spatial_dilations[2][0]), (1, spatial_dilations[3][0], spatial_dilations[3][0]), ), ( (1, spatial_dilations[0][1], spatial_dilations[0][1]), (1, spatial_dilations[1][1], spatial_dilations[1][1]), (1, spatial_dilations[1][1], spatial_dilations[1][1]), (1, spatial_dilations[1][1], spatial_dilations[1][1]), ), ), stage_spatial_strides=( ( spatial_strides[0][0], spatial_strides[1][0], spatial_strides[2][0], spatial_strides[3][0], ), ( spatial_strides[0][1], spatial_strides[1][1], spatial_strides[2][1], spatial_strides[3][1], ), ), stage_temporal_strides=((1, 1, 1, 1), (1, 1, 1, 1)), bottleneck=create_bottleneck_block, # Head configs. head_pool=nn.AvgPool3d, head_pool_kernel_sizes=( ( cfg.DATA.NUM_FRAMES // cfg.SLOWFAST.ALPHA // pool_size[0][0], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[0][1], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[0][2], ), ( cfg.DATA.NUM_FRAMES // pool_size[1][0], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[1][1], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[1][2], ), ), head_activation=None, head_output_with_global_average=False, ) self.post_act = get_head_act(cfg.MODEL.HEAD_ACT)
def _construct_network(self, cfg): """ Builds a SlowFast model. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ _MODEL_STAGE_DEPTH = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3)} # Params from configs. norm_module = get_norm(cfg) pool_size = _POOL1[cfg.MODEL.ARCH] num_groups = cfg.RESNET.NUM_GROUPS width_per_group = cfg.RESNET.WIDTH_PER_GROUP spatial_dilations = cfg.RESNET.SPATIAL_DILATIONS spatial_strides = cfg.RESNET.SPATIAL_STRIDES temp_kernel = _TEMPORAL_KERNEL_BASIS[cfg.MODEL.ARCH] num_block_temp_kernel = cfg.RESNET.NUM_BLOCK_TEMP_KERNEL stage_depth = _MODEL_STAGE_DEPTH[cfg.RESNET.DEPTH] stage_conv_a_kernel_sizes = [[], []] for pathway in range(2): for stage in range(4): stage_conv_a_kernel_sizes[pathway].append( ((temp_kernel[stage + 1][pathway][0], 1, 1),) * num_block_temp_kernel[stage][pathway] + ((1, 1, 1),) * ( stage_depth[stage] - num_block_temp_kernel[stage][pathway] ) ) # Head from config # Number of stages = 4 stage_dim_in = cfg.RESNET.WIDTH_PER_GROUP * 2 ** (4 + 1) head_in_features = stage_dim_in + stage_dim_in // cfg.SLOWFAST.BETA_INV if cfg.DETECTION.ENABLE: self.detection_head = create_res_roi_pooling_head( in_features=head_in_features, out_features=cfg.MODEL.NUM_CLASSES, pool=None, output_size=(1, 1, 1), dropout_rate=cfg.MODEL.DROPOUT_RATE, activation=None, output_with_global_average=False, pool_spatial=nn.MaxPool2d, resolution=[cfg.DETECTION.ROI_XFORM_RESOLUTION] * 2, spatial_scale=1.0 / float(cfg.DETECTION.SPATIAL_SCALE_FACTOR), sampling_ratio=0, roi=ROIAlign, ) head_pool_kernel_sizes = ( ( cfg.DATA.NUM_FRAMES // cfg.SLOWFAST.ALPHA // pool_size[0][0], 1, 1, ), (cfg.DATA.NUM_FRAMES // pool_size[1][0], 1, 1), ) else: head_pool_kernel_sizes = ( ( cfg.DATA.NUM_FRAMES // cfg.SLOWFAST.ALPHA // pool_size[0][0], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[0][1], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[0][2], ), ( cfg.DATA.NUM_FRAMES // pool_size[1][0], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[1][1], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[1][2], ), ) self.model = create_slowfast( # SlowFast configs. slowfast_channel_reduction_ratio=cfg.SLOWFAST.BETA_INV, slowfast_conv_channel_fusion_ratio=cfg.SLOWFAST.FUSION_CONV_CHANNEL_RATIO, slowfast_fusion_conv_kernel_size=( cfg.SLOWFAST.FUSION_KERNEL_SZ, 1, 1, ), slowfast_fusion_conv_stride=(cfg.SLOWFAST.ALPHA, 1, 1), # Input clip configs. input_channels=cfg.DATA.INPUT_CHANNEL_NUM, # Model configs. model_depth=cfg.RESNET.DEPTH, model_num_class=cfg.MODEL.NUM_CLASSES, dropout_rate=cfg.MODEL.DROPOUT_RATE, # Normalization configs. norm=norm_module, # Activation configs. activation=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU), # Stem configs. stem_dim_outs=( width_per_group, width_per_group // cfg.SLOWFAST.BETA_INV, ), stem_conv_kernel_sizes=( (temp_kernel[0][0][0], 7, 7), (temp_kernel[0][1][0], 7, 7), ), stem_conv_strides=((1, 2, 2), (1, 2, 2)), stem_pool=nn.MaxPool3d, stem_pool_kernel_sizes=((1, 3, 3), (1, 3, 3)), stem_pool_strides=((1, 2, 2), (1, 2, 2)), # Stage configs. stage_conv_a_kernel_sizes=stage_conv_a_kernel_sizes, stage_conv_b_kernel_sizes=( ((1, 3, 3), (1, 3, 3), (1, 3, 3), (1, 3, 3)), ((1, 3, 3), (1, 3, 3), (1, 3, 3), (1, 3, 3)), ), stage_conv_b_num_groups=( (num_groups, num_groups, num_groups, num_groups), (num_groups, num_groups, num_groups, num_groups), ), stage_conv_b_dilations=( ( (1, spatial_dilations[0][0], spatial_dilations[0][0]), (1, spatial_dilations[1][0], spatial_dilations[1][0]), (1, spatial_dilations[2][0], spatial_dilations[2][0]), (1, spatial_dilations[3][0], spatial_dilations[3][0]), ), ( (1, spatial_dilations[0][1], spatial_dilations[0][1]), (1, spatial_dilations[1][1], spatial_dilations[1][1]), (1, spatial_dilations[1][1], spatial_dilations[1][1]), (1, spatial_dilations[1][1], spatial_dilations[1][1]), ), ), stage_spatial_strides=( ( spatial_strides[0][0], spatial_strides[1][0], spatial_strides[2][0], spatial_strides[3][0], ), ( spatial_strides[0][1], spatial_strides[1][1], spatial_strides[2][1], spatial_strides[3][1], ), ), stage_temporal_strides=((1, 1, 1, 1), (1, 1, 1, 1)), bottleneck=create_bottleneck_block, # Head configs. head=create_res_basic_head if not self.detection_mode else None, head_pool=nn.AvgPool3d, head_pool_kernel_sizes=head_pool_kernel_sizes, head_activation=None, head_output_with_global_average=False, ) self.post_act = get_head_act(cfg.MODEL.HEAD_ACT)