def __init__(self, num_class, num_segments, base_model='mobilenetv2', dropout=0.5, partial_bn=True, is_shift=False, shift_div=8): super(TSN, self).__init__() self.num_segments = num_segments self.base_model_name = base_model self.dropout = dropout self.is_shift = is_shift self.shift_div = shift_div print((""" TSN Configurations: base model: {} num_segments: {} dropout_ratio: {} shift_div: {} """.format(base_model, self.num_segments, self.dropout, self.shift_div))) self._prepare_base_model(base_model) self._prepare_tsn(num_class) self.consensus = ConsensusModule() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, crop_num=1, partial_bn=True): super(TSN, self).__init__() self.modality = modality #特征种类,从RGB,Flow,RGBDiff中选 self.num_segments = num_segments #默认为3,视频分割成的段数 self.reshape = True #是否reshape self.before_softmax = before_softmax #模型是否在softmax前的意思? self.dropout = dropout #dropout参数 self.crop_num = crop_num #裁剪数量? self.consensus_type = consensus_type #聚集函数G的设置,论文中有5种,这里有avg,max,topk,cnn,rnn if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 #RGB取1张图,Flow取5张图? else: self.new_length = new_length print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) #导入模型初始化 self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) #根据特征不同对网络结构进行修改 if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule( consensus_type) #ConsensusModule就是聚集函数模块定义的关键 if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, crop_num=1, partial_bn=True): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def forward(self, x): # [112, 128, 28, 28] consensus = ConsensusModule('avg') id_test = self.id_tester(x) # [112, 512, 1, 1] id_test = id_test.view((-1, self.num_segments) + id_test.size()[1:]) id_test = consensus(id_test) id_test = id_test.squeeze(1) # [16, 512, 1, 1] id_test = id_test.view(-1, 512) id_result = self.classifier(id_test) return id_result
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8,img_feature_dim=256, crop_num=1, partial_bn=True, print_spec=True,opt=None): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.img_feature_dim = img_feature_dim # the dimension of the CNN feature to represent each frame if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length if print_spec == True: print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} img_feature_dim: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout, self.img_feature_dim))) self._prepare_base_model(base_model,opt) feature_dim = self._prepare_tsn(num_class) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") if consensus_type in ['TRN', 'TRNmultiscale']: # plug in the Temporal Relation Network Module self.consensus = TRNmodule.return_TRN(consensus_type, self.img_feature_dim, self.num_segments, num_class) else: self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, modality, base_model='BNInception', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.5, crop_num=1, partial_bn=True, print_spec=True, gsm=False, target_transform=None): super(VideoModel, self).__init__() self.modality = modality self.num_segments = num_segments print('Number of segments = {}'.format(self.num_segments)) self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.gsm = gsm self.target_transform = target_transform if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length if print_spec == True: print((""" Initializing Video Model with backbone: {}. Model Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) self._prepare_base_model(base_model) self.feature_dim = self._prepare_model(num_class) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, base_model='resnet101', consensus_type='avg', before_softmax=True, dropout=0.8, crop_num=1, partial_bn=True, pretrain='imagenet', fc_lr5=False, args=None): super(TSN_Ada, self).__init__() self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.pretrain = pretrain self.fc_lr5 = fc_lr5 # TODO(yue) self.args = args self.rescale_to = args.rescale_to if self.args.ada_reso_skip: base_model = self.args.backbone_list[0] if len( self.args.backbone_list) >= 1 else None self.base_model_name = base_model self.num_class = num_class self.multi_models = False self.time_steps = self.num_segments if self.args.ada_reso_skip: self.reso_dim = self._get_resolution_dimension() self.skip_dim = len(self.args.skip_list) self.action_dim = self._get_action_dimension() self._prepare_policy_net() self._extends_to_multi_models() self._prepare_base_model(base_model) self._prepare_fc(num_class) self.consensus = ConsensusModule(consensus_type, args=self.args) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, crop_num=1, partial_bn=True, does_use_global_img=False): super(TSNCustom, self).__init__() self.num_segments = num_segments self.modality = modality self.new_length = new_length self.base_model = base_model self.consensus_type = consensus_type self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self._enable_pbn = partial_bn self.does_use_global_img = does_use_global_img print('\033[93muse global:{} \033[0m'.format(does_use_global_img)) if does_use_global_img: self.tsn_for_global = PartialTSN( num_class=num_class, num_segments=num_segments, modality=modality, base_model=base_model, new_length=new_length, consensus_type=consensus_type, before_softmax=before_softmax, dropout=dropout, crop_num=crop_num, partial_bn=partial_bn ) self.tsn_for_global.consensus = None self.tsn_for_local = PartialTSN( num_class=num_class, num_segments=num_segments, modality=modality, base_model=base_model, new_length=new_length, consensus_type=consensus_type, before_softmax=before_softmax, dropout=dropout, crop_num=crop_num, partial_bn=partial_bn ) self.tsn_for_local.consensus = None self.consensus = ConsensusModule(consensus_type) self._prepare_newfc(num_class) if does_use_global_img: self.tsn_for_global.new_fc = None self.tsn_for_local.new_fc = None
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=False, dropout=0.8, crop_num=1, partial_bn=True): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, feature_dim, modality, midfusion, num_class, consensus_type, before_softmax, dropout, num_segments): super().__init__() self.num_class = num_class self.modality = modality self.midfusion = midfusion self.reshape = True self.consensus = ConsensusModule(consensus_type) self.before_softmax = before_softmax self.dropout = dropout self.num_segments = num_segments if not self.before_softmax: self.softmax = nn.Softmax() if len(self.modality) > 1: # Fusion if self.midfusion == 'concat': self._add_audiovisual_fc_layer(len(self.modality) * feature_dim, 512) self._add_classification_layer(512) elif self.midfusion == 'context_gating': self._add_audiovisual_fc_layer(len(self.modality) * feature_dim, 512) self.context_gating = Context_Gating(512) self._add_classification_layer(512) elif self.midfusion == 'multimodal_gating': self.multimodal_gated_unit = Multimodal_Gated_Unit(feature_dim, 512) if self.dropout > 0: self.dropout_layer = nn.Dropout(p=self.dropout) self._add_classification_layer(512) else: # Single modality if self.dropout > 0: self.dropout_layer = nn.Dropout(p=self.dropout) self._add_classification_layer(feature_dim)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, crop_num=1, partial_bn=True): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.base_model_name = base_model if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) self._prepare_base_model(base_model) # zc comments feature_dim = self._prepare_tsn(num_class) # modules = list(self.modules()) # print(modules) # zc comments end ''' # zc: print "NN variable name" zc_params = self.base_model.state_dict() for zc_k in zc_params.items(): print(zc_k) # zc: print "Specified layer's weight and bias" print(zc_params['conv1_7x7_s2.weight']) print(zc_params['conv1_7x7_s2.bias']) ''' if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__( self, num_class: int, num_segments: int, modality: str, base_model: str = "resnet50", segment_length: Optional[int] = None, consensus_type: str = "avg", dropout: float = 0.7, img_feature_dim: int = 256, partial_bn: bool = True, pretrained: str = "imagenet", ): super(TSN, self).__init__() self.num_class = num_class self.num_segments = num_segments self.modality = modality self.arch = base_model self.consensus_type = consensus_type self.dropout = dropout self.img_feature_dim = img_feature_dim self._enable_pbn = partial_bn self.pretrained = pretrained if segment_length is None: self.segment_length = 1 if modality == "RGB" else 5 else: self.segment_length = segment_length LOG.info(f"""\ Initializing {self.__class__.__name__} with base model: {base_model}. {self.__class__.__name__} Configuration: input_modality: {self.modality} num_segments: {self.num_segments} segment_length: {self.segment_length} consensus_module: {self.consensus_type} img_feature_dim: {self.img_feature_dim} (only valid for TRN) dropout_ratio: {self.dropout} partial_bn: {partial_bn} """) self.base_model = self._prepare_base_model(base_model) self.feature_dim = getattr(self.base_model, self.base_model.last_layer_name).in_features self._prepare_tsn() if self.modality == "Flow": LOG.info("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) LOG.debug("Done. Flow model ready...") if consensus_type.startswith("TRN"): self.consensus = return_TRN(consensus_type, self.img_feature_dim, self.num_segments, num_class) else: self.consensus = ConsensusModule(consensus_type) if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, crop_num=1, partial_bn=True): super(TSN, self).__init__() self.sobel = torch.nn.Conv2d(1, 1, 3, bias=False).cuda() self.sobel.weight.requires_grad = False self.sobel_x = torch.nn.Parameter( torch.Tensor([[[[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]]])) self.ReflectionPad = torch.nn.ReflectionPad2d(1).cuda() self.sobel_y = torch.nn.Parameter( torch.Tensor([[[[1, 2, 1], [0, 0, 0], [-1, -2, -1]]]])) self.sobel_x.requires_grad = False self.sobel_y.requires_grad = False self.model56to28 = ResNetBlock(BasicBlock, 4, inchannels=384).cuda() self.model28to14 = ResNetBlock(BasicBlock, 4, inchannels=768).cuda() self.model14to7 = ResNetBlock(BasicBlock, 4, inchannels=1152).cuda() self.avgpooling7to1 = nn.AvgPool2d(7, stride=1).cuda() self.fc1024to101 = nn.Linear(1152, 101).cuda() self.InnerConsensus = ConsensusModule(consensus_type) self.resnet_optimal = [] self.resnet_subconv_stage1 = torch.nn.Conv2d(192, 128, 1).cuda() self.resnet_optimal.append(self.resnet_subconv_stage1) self.resnet_subconv_stage2 = torch.nn.Conv2d(256, 128, 1).cuda() self.resnet_optimal.append(self.resnet_subconv_stage2) self.resnet_subconv_stage3 = torch.nn.Conv2d(576, 128, 1).cuda() self.resnet_optimal.append(self.resnet_subconv_stage3) self.resnet_sobelconv_stage1 = torch.nn.Conv2d(192, 128, 1).cuda() self.resnet_optimal.append(self.resnet_sobelconv_stage1) self.resnet_sobelconv_stage2 = torch.nn.Conv2d(256, 128, 1).cuda() self.resnet_optimal.append(self.resnet_sobelconv_stage2) self.resnet_sobelconv_stage3 = torch.nn.Conv2d(576, 128, 1).cuda() self.resnet_optimal.append(self.resnet_sobelconv_stage3) self.resnet_conv3d_stage1 = nn.Conv3d(6, 1, 3, stride=1, padding=1).cuda() self.resnet_optimal.append(self.resnet_conv3d_stage1) self.resnet_conv3d_stage2 = nn.Conv3d(6, 1, 3, stride=1, padding=1).cuda() self.resnet_optimal.append(self.resnet_conv3d_stage2) self.resnet_conv3d_stage3 = nn.Conv3d(6, 1, 3, stride=1, padding=1).cuda() self.resnet_optimal.append(self.resnet_conv3d_stage3) for m in self.resnet_optimal: if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) # print('2d') elif isinstance(m, nn.Conv3d): n = m.kernel_size[0] * m.kernel_size[1] * m.kernel_size[ 2] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) # print('2d') self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self._construct_RGB_model(base_model, num_class) self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, pretrained_parts, modality, base_model='resnet101', dataset='something', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, fc_lr5=True, crop_num=1, partial_bn=True): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.pretrained_parts = pretrained_parts self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.base_model_name = base_model self.dataset = dataset self.fc_lr5 = fc_lr5 if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 1 else: self.new_length = new_length print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) if (base_model == 'TSM'): # from resnet_TSM import resnet18 # from resnet_TSM import resnet34 from resnet_TSM import resnet18 # self.base_model = resnet50(True) self.base_model = resnet18(True, shift='TSM', num_segments=num_segments) self.base_model.last_layer_name = 'fc1' self.input_size = 224 self.input_mean = [0.485, 0.456, 0.406] self.input_std = [0.229, 0.224, 0.225] if self.modality == 'Flow': self.input_mean = [0.5] self.input_std = [np.mean(self.input_std)] feature_dim = self._prepare_tsn(num_class) elif (base_model == 'MS'): # from resnet_TSM import resnet18 # self.base_model = resnet18(True, shift='TSM', num_segments = num_segments, flow_estimation = 1) from resnet_TSM import resnet18 self.base_model = resnet18(True, shift='TSM', num_segments=num_segments, flow_estimation=1) self.base_model.last_layer_name = 'fc1' self.input_size = 224 self.input_mean = [0.485, 0.456, 0.406] self.input_std = [0.229, 0.224, 0.225] feature_dim = self._prepare_tsn(num_class) else: self._prepare_base_model(base_model) # zc comments feature_dim = self._prepare_tsn(num_class) # modules = list(self.modules()) # print(modules) # zc comments end ''' # zc: print "NN variable name" zc_params = self.base_model.state_dict() for zc_k in zc_params.items(): print(zc_k) # zc: print "Specified layer's weight and bias" print(zc_params['conv1_7x7_s2.weight']) print(zc_params['conv1_7x7_s2.bias']) ''' if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
class TSN(nn.model): """"tsn模型类 # 输入包含分 # 类的类别数:num_class; # 修改网络时第一层卷积层snippet的卷积核的参数:new_length,rgb:1,diff:6,flow:5(采用当前帧以及之后4帧图像的两个方向的flow) # args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3; # 采用哪种输入:modality,比如RGB表示常规图像,Flow表示optical flow等; # 采用哪种模型:base_model,比如resnet101,BNInception等; # 不同输入snippet的融合方式:consensus_type,默认为avg等; # dropout参数:dropout """ def __init__(self, num_class, num_segements, modality, base_model = 'resnet101', new_length = None, consensus_type = 'avg', before_softmax = True, dropout = 0.8, crop_num = 1, partial_bn = True): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type# 各个snippet之间的融合方式:段共识函数,评估 g 的三种形式:(1)最大池化;(2)平均池化;(3)加权平均 # 段共识函数在Softmax归一化之前 if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) # 导入模型 # 论文中导入的是bninception # BNInception类,定义在tf_model_zoo文件夹下的bninception文件夹下的pytorch_load.py中 self._prepare_base_model(base_model) # 导入模型 feature_dim = self._prepare_tsn(num_class) # feature_dim是网络最后一层的输入feature map的channel数 # 迁移学习的第一种方式:利用conv网络初始化参数 # 交叉模式预训练技术:利用RGB模型初始化时间网络 # 如果你的输入数据是optical flow或RGBDiff,那么还会对网络结构做修改,分别调用_construct_flow_model方法和_construct_diff_model方法来实现的,主要差别在第一个卷积层,因为该层的输入channel依据不同的输入类型而变化 if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") # 修改网络结构 self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") else self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") # 段共识函数 self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() # 在用预训练模型初始化后,冻结所有Batch Normalization层的均值和方差参数,但第一个标准化层除外。由于光流的分布和RGB图像的分布不同,第一个卷积层的激活值将有不同的分布,于是,我们需要重新估计的均值和方差,称这种策略为部分BN self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, crop_num=1, partial_bn=True, **kwargs): super(TSN, self).__init__() self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.resume = kwargs.get('resume', None) self.modality = kwargs.get('modality', "RGB") self.project_mode = kwargs.get('project_mode', '1111') if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 4 if self.modality == "RGB" else 6 else: self.new_length = new_length print((""" Initializing TSN with base model: {}. TSN Configurations: num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.num_segments, self.new_length, consensus_type, self.dropout))) self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) # if self.modality == 'Flow' or self.modality == 'tvl1': # print("Converting the ImageNet model to a flow init model") # self.base_model = self._construct_flow_model(self.base_model) # print("Done. Flow model ready...") # elif self.modality == 'RGB': # print("Converting the ImageNet model to (multiple) RGB init model") # self.base_model = self._construct_flow_model(self.base_model) # print("Done. (multiple) RGB model ready...") # elif self.modality == 'RGBDiff': # print("Converting the ImageNet model to RGB+Diff init model") # self.base_model = self._construct_diff_model(self.base_model) # print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, crop_num=1, partial_bn=True): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length print(( """To see if Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True) ''' Define Residual Motion Generator ''' self.res_gen = TemResGen() ''' Import Sobel Filter for Image gradient ''' self.image_grad = SobelFilter_Diagonal(1, 1) self.sigmoid = torch.nn.Sigmoid() self.rnn = RNN(784, 2048, 784).cuda()
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=1, consensus_type='avg', before_softmax=True, dropout=0.8, img_feature_dim=256, crop_num=1, partial_bn=True, print_spec=True): ''' Single-scale Temporal Relational Network See https://arxiv.org/abs/1711.08496 for more details. and see https://github.com/epic-kitchens/action-models/blob/master/tsn.py Args: num_class: Number of classes, can be either a single integer, or a 2-tuple for training verb+noun multi-task models num_segments: Number of frames/optical flow stacks input into the model modality: Either ``RGB`` or ``Flow``. base_model: Backbone model architecture one of ``resnet18``, ``resnet30``, ``resnet50``, ``BNInception``, ``InceptionV3``, ``VGG16``. ``BNInception`` and ``resnet50`` are the most thoroughly tested. new_length: The number of frame(channel inputs) per snippet consensus_type: The consensus function used to combined information across segments. One of ``avg``, ``max``, ``TRN``, ``TRNMultiscale``. before_softmax: Whether to output class score before or after softmax. dropout: The dropout probability. The dropout layer replaces the backbone's classification layer. img_feature_dim: Only for TRN/MTRN models. The dimensionality of the features used for relational reasoning. partial_bn: Whether to freeze all BN layers beyond the first 2 layers. pretrained: Either ``'imagenet'`` for ImageNet initialised models, or ``'epic-kitchens'`` for weights pretrained on EPIC-Kitchens. ''' super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.new_length = new_length self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.img_feature_dim = img_feature_dim # the dimension of the CNN feature to represent each frame if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 # else: self.new_length = new_length self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) if print_spec == True: print((""" +-------------------------------------------------------+ Initializing TSN with base model: {} Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} feature_dim(for fc_layer): {} img_feature_dim([M]TRN): {} before_softmax: {} +-------------------------------------------------------+ """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout, feature_dim, self.img_feature_dim, self.before_softmax))) if self.modality in ('RGB', 'Flow'): print( "Converting the ImageNet model according to the snippet length" ) self.base_model = self._construct_RGBflow_model(self.base_model) print("Done. base model(for RGB or Flow) ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") if 'trn' in consensus_type.lower(): #['TRN', 'MTRN']: # plug in the Temporal Relation Network Module self.consensus = TRNmodule.return_TRN(consensus_type, self.img_feature_dim, self.num_segments, num_class) else: self.consensus = ConsensusModule( consensus_type ) # here lies a bug to be fixed."TypeError: SegmentConsensus.forward: expected Variable (got NoneType) for return value 0" if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, img_feature_dim=256, crop_num=1, partial_bn=True, print_spec=True, pretrain='imagenet', fuse=False, fuse_group=1, fuse_layer=['3.03','4.0'], fuse_dilation=False, fuse_spatial_dilation=1, fuse_correlation=False, fuse_ave=False, fuse_downsample=False, correlation_neighbor=3, GroupConv=False, is_shift=False, shift_div=8, shift_place='blockres', fc_lr5=False, temporal_pool=False, non_local=False): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.img_feature_dim = img_feature_dim # the dimension of the CNN feature to represent each frame self.pretrain = pretrain self.fuse=fuse self.fuse_group = fuse_group self.fuse_layer = fuse_layer self.fuse_dilation = fuse_dilation self.fuse_spatial_dilation = fuse_spatial_dilation self.fuse_correlation = fuse_correlation self.fuse_ave = fuse_ave self.fuse_downsample = fuse_downsample self.correlation_neighbor = correlation_neighbor self.GroupConv = GroupConv stride_list = [2**len(x.split('.')[1]) for x in self.fuse_layer] self.temporal_stride = sum(stride_list) self.is_shift = is_shift self.shift_div = shift_div self.shift_place = shift_place self.base_model_name = base_model self.fc_lr5 = fc_lr5 self.temporal_pool = temporal_pool self.non_local = non_local if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length if print_spec: print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} img_feature_dim: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout, self.img_feature_dim))) self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__( self, num_class, num_segments, modality, base_model="resnet101", segment_length=None, consensus_type="avg", before_softmax=True, dropout=0.8, crop_num=1, partial_bn=True, pretrained="imagenet", is_shift=True, shift_div=8, shift_place="blockres", fc_lr5=False, temporal_pool=False, non_local=False, ): super().__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.pretrained = pretrained self.is_shift = is_shift self.shift_div = shift_div self.shift_place = shift_place self.base_model_name = base_model self.fc_lr5 = fc_lr5 self.temporal_pool = temporal_pool self.non_local = non_local if not before_softmax and consensus_type != "avg": raise ValueError("Only avg consensus can be used after Softmax") if segment_length is None: self.segment_length = 1 if modality == "RGB" else 5 else: self.segment_length = segment_length LOG.info(f""" Initializing {self.__class__.__name__} with base model: {base_model}. {self.__class__.__name__} Configuration: input_modality: {self.modality} num_segments: {self.num_segments} segment_length: {self.segment_length} consensus_module: {self.consensus_type} dropout_ratio: {self.dropout} """) self._prepare_base_model(base_model) self._prepare_tsn(num_class) if self.modality == "Flow": LOG.info("Converting model to take operate on optical flow") self.base_model = self._construct_flow_model(self.base_model) self.consensus = ConsensusModule(consensus_type) if self.pretrained == "kinetics": LOG.info("Loading kinetics pretrained weights") if self.modality.lower() == "rgb": sd = strip_module_prefix( model_zoo.load_url( "https://file.lzhu.me/projects/tsm/models/" "TSM_kinetics_RGB_resnet50_shift8_blockres_avg_segment8_e50.pth" )["state_dict"]) del sd["new_fc.weight"] del sd["new_fc.bias"] missing, unexpected = self.load_state_dict(sd, strict=False) if len(missing) > 0: LOG.warning(f"Missing keys in checkpoint: {missing}") if len(unexpected) > 0: LOG.warning(f"Unexpected keys in checkpoint: {unexpected}") LOG.info("Loading kinetics pretrained RGB weights") elif self.modality.lower() == "flow": sd = strip_module_prefix( model_zoo.load_url( "https://file.lzhu.me/projects/tsm/models/" "TSM_kinetics_Flow_resnet50_shift8_blockres_avg_segment8_e50.pth" )["state_dict"]) del sd["new_fc.weight"] del sd["new_fc.bias"] missing, unexpected = self.load_state_dict(sd, strict=False) if len(missing) > 0: LOG.warning(f"Missing keys in checkpoint: {missing}") if len(unexpected) > 0: LOG.warning(f"Unexpected keys in checkpoint: {unexpected}") LOG.info("Loading kinetics pretrained flow weights") else: raise ValueError(f"Unknown modality {self.modality}") if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__( self, num_class, num_segments, modality, base_model="resnet50", new_length=None, consensus_type="avg", before_softmax=True, dropout=0.7, img_feature_dim=256, partial_bn=True, pretrained="imagenet", ): super(TSN, self).__init__() self.num_class = num_class self.num_segments = num_segments self.modality = modality self.arch = base_model self.consensus_type = consensus_type self.before_softmax = before_softmax self.dropout = dropout self.img_feature_dim = img_feature_dim self._enable_pbn = partial_bn self.pretrained = pretrained self.reshape = True if not before_softmax and consensus_type != "avg": raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length LOG.info(""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} img_feature_dim: {} dropout_ratio: {} """.format( base_model, self.modality, self.num_segments, self.new_length, self.consensus_type, self.img_feature_dim, self.dropout, )) self._prepare_base_model(base_model) self.feature_dim = getattr(self.base_model, self.base_model.last_layer_name).in_features self._prepare_tsn() if self.modality == "Flow": LOG.info("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) LOG.debug("Done. Flow model ready...") elif self.modality == "RGBDiff": LOG.info("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) LOG.debug("Done. RGBDiff model ready.") if consensus_type.startswith("TRN"): self.consensus = return_TRN(consensus_type, self.img_feature_dim, self.num_segments, num_class) else: self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() if partial_bn: self.partialBN(True) if pretrained and pretrained != "imagenet": self._load_pretrained_model(pretrained)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, img_feature_dim=256, crop_num=1, partial_bn=True, print_spec=True, pretrain='imagenet', is_shift=False, shift_div=8, shift_place='blockres', fc_lr5=False, temporal_pool=False, non_local=False, shift_groups=2, shift_diff=[3, 5], is_TSA=False, is_sTSA=False, is_ME=False, is_3D=False, is_tTSA=False, cfg_file=None): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.img_feature_dim = img_feature_dim # the dimension of the CNN feature to represent each frame self.pretrain = pretrain self.is_shift = is_shift self.shift_div = shift_div self.shift_place = shift_place self.base_model_name = base_model self.fc_lr5 = fc_lr5 self.temporal_pool = temporal_pool self.non_local = non_local self.shift_groups = shift_groups self.shift_diff = shift_diff self.is_TSA = is_TSA self.is_sTSA = is_sTSA self.is_ME = is_ME self.is_3D = is_3D self.is_tTSA = is_tTSA self.cfg_file = cfg_file if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: if modality == "RGB": self.new_length = 1 elif modality == "TwoStream": self.new_length = 1 else: self.new_length = 5 else: self.new_length = new_length if print_spec: print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} img_feature_dim: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout, self.img_feature_dim))) self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) feat_pos = 256 if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") elif self.modality == "TwoStream": print("Converting the ImageNet model to TwoStream model") self.stream_model = copy.deepcopy(self.base_model) elif self.modality == "PoseAction": self.pose_model = ST_GCN_18(in_channels=3, num_class=60, graph_cfg={ 'layout': 'ntu-rgb+d', 'strategy': 'spatial' }, dropout=0.5) print("Adding Pose Module") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, img_feature_dim=256, crop_num=1, partial_bn=True, print_spec=True, pretrain='imagenet', is_shift=False, shift_div=8, shift_place='blockres', fc_lr5=False, temporal_pool=False, non_local=False, concat="", extra_temporal_modeling=False, prune_list=[], is_prune=""): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.img_feature_dim = img_feature_dim # the dimension of the CNN feature to represent each frame self.pretrain = pretrain self.is_shift = is_shift self.shift_div = shift_div self.shift_place = shift_place self.base_model_name = base_model self.fc_lr5 = fc_lr5 self.temporal_pool = temporal_pool self.non_local = non_local self.num_class = num_class self.concat = concat self.extra_temporal_modeling = extra_temporal_modeling self.is_prune = is_prune self.prune_list = prune_list #self.activate = nn.Tanh() #if use cross entropy, output value can't be nagative //log DNE self.activate = nn.Sigmoid() if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality in ["RGB", "Depth"] else 5 else: self.new_length = new_length if print_spec: print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} img_feature_dim: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout, self.img_feature_dim))) self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) if self.modality == 'RGB' and self.new_length == 2: #this condition means data fusion print( "Converting the ImageNet model to a RGB-depth fusion init model" ) self.base_model = self._construct_fuse_model(self.base_model) print("Done. RGB-depth fusion model ready...") elif self.modality == 'Depth': print("Converting the ImageNet model to a depth init model") self.base_model = self._construct_depth_model(self.base_model) print("Done. Depth model ready...") elif self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") if consensus_type == 'conv1d': self.consensus = nn.Conv1d(in_channels=num_segments, out_channels=1, kernel_size=1, bias=False) self.consensus2 = nn.Conv1d(in_channels=num_segments * 2, out_channels=1, kernel_size=1, bias=False) else: self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, num_segments, modality,test_mode=False,slow_testing=0,fast_implementation=1, base_model='resnet101', new_length=None, consensus_type='avg', apply_softmax=False,gtsn=False, dropout=0.8, crop_num=1, partial_bn=True): super(TSN, self).__init__() self.modality = modality self.test_mode=test_mode self.fast_implementation=fast_implementation self.slow_testing=slow_testing self.num_segments = num_segments self.gtsn=gtsn self.reshape = True self.apply_softmax = apply_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type if self.test_mode: self.input_size=256 else: self.input_size = 224 self.input_mean = [0.485, 0.456, 0.406] self.input_std = [0.229, 0.224, 0.225] if not apply_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 4 if modality == "RGB" else 5 else: self.new_length = new_length print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) #self._prepare_base_model(base_model) #feature_dim = self._prepare_tsn(num_class) if base_model=='i3dresnet18': resnet18 = torchvision.models.resnet18(pretrained=True) self.base_model= I3ResNet_18_34(resnet18, self.new_length,num_class,num_segments=self.num_segments,gtsn=self.gtsn) if self.gtsn: self.new_fc = nn.Linear(512*self.num_segments, num_class) else: self.new_fc = nn.Linear(512, num_class) else: resnet50 = torchvision.models.resnet50(pretrained=True) self.base_model= I3ResNet(resnet50, self.new_length,num_class,test_mode=test_mode,num_segments=self.num_segments,fast_implementation=fast_implementation) self.new_fc = nn.Linear(2048, num_class) ''' resnet18 = torchvision.models.resnet18(pretrained=True) self.base_model= I3ResNet_18_34(resnet18, 4,num_class)#I2ResNet_maxpool(resnet50, 8,num_class) ''' #self.base_model=r3d_34() if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) #self.new_fc = nn.Linear(512, num_class) std = 0.001 normal(self.new_fc.weight, 0, std) constant(self.new_fc.bias, 0) if self.apply_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__( self, num_class, num_segments, modality, base_model='tea', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.5, img_feature_dim=256, crop_num=1, partial_bn=True, print_spec=True, pretrain='imagenet', is_shift=False, shift_div=8, shift_place='blockres', fc_lr5=False): super(TSN, self).__init__() self.num_class = num_class self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.img_feature_dim = img_feature_dim # the dimension of the CNN feature to represent each frame self.pretrain = pretrain self.is_shift = is_shift self.shift_div = shift_div self.shift_place = shift_place self.base_model_name = base_model self.fc_lr5 = fc_lr5 if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length if print_spec: print(("""Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} img_feature_dim: {}""".format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout, self.img_feature_dim))) self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) if self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") # Here is module initiative self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, num_class, args): super(TSN, self).__init__() self.modality = args.modality self.num_segments = args.num_segments self.num_motion = args.num_motion self.reshape = True self.before_softmax = True self.dropout = args.dropout self.dataset = args.dataset self.crop_num = 1 self.consensus_type = args.consensus_type # LSTM etc. self.img_feature_dim = args.img_feature_dim # the dimension of the CNN feature to represent each frame base_model = args.arch # resnet etc. nhidden = 512 print_spec = True new_length = None if not self.before_softmax and self.consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: if self.modality == "RGB": self.new_length = 1 elif self.modality == "Flow": self.new_length = 5 elif self.modality == "RGBFlow": # self.new_length = 1 self.new_length = self.num_motion else: self.new_length = new_length if print_spec == True: print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} img_feature_dim: {} """.format(base_model, self.modality, self.num_segments, self.new_length, self.consensus_type, self.dropout, self.img_feature_dim))) self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class, base_model) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") elif self.modality == 'RGBFlow': print("Converting the ImageNet model to RGB+Flow init model") self.base_model = self._construct_rgbflow_model(self.base_model) print("Done. RGBFlow model ready.") if self.consensus_type == 'MLP': self.consensus = MLPmodule.return_MLP(self.consensus_type, self.img_feature_dim, self.num_segments, num_class) elif self.consensus_type == 'TSN': self.consensus = TSNmodule.return_TSN(self.consensus_type, self.img_feature_dim, self.num_segments, num_class) elif self.consensus_type in ['TRNmultiscale']: self.consensus = TRNmodule.return_TRN(self.consensus_type, self.img_feature_dim, self.num_segments, num_class) elif self.consensus_type in ['FCN']: self.consensus = FCNmodule.return_FCN(self.consensus_type, self.img_feature_dim, self.num_segments, num_class) elif self.consensus_type in [ 'LSTM', 'GRU', 'RNN_TANH', 'RNN_RELU', 'GFLSTM', 'BLSTM' ]: self.consensus = RNNmodule.return_RNN(self.consensus_type, self.img_feature_dim, args.rnn_hidden_size, self.num_segments, num_class, args.rnn_layer, args.rnn_dropout) elif self.consensus_type == 'DNDF': self.consensus = DNDFmodule.return_DNDF(self.consensus_type, self.img_feature_dim, self.num_segments, num_class) else: self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = not args.no_partialbn if not args.no_partialbn: self.partialBN(True)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, crop_num=1, partial_bn=True): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True) # TODO: add structure here extract_feature = 2048 planes = 512 self.conv1 = nn.Conv2d(extract_feature, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.conv3d_spatial = nn.Conv3d(planes, planes, kernel_size=(1, 3, 3)) self.conv3d_temporal = nn.Conv3d(planes, planes, kernel_size=(3, 1, 1)) self.bn3d = nn.BatchNorm3d(planes) self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1)) self.drop = nn.Dropout3d(dropout) self.fc = nn.Linear(planes, num_class)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, crop_num=1): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type if self.modality == 'CV': self.cost_volume = CostVolume(2, 2) if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: if modality == 'RGB': self.new_length = 1 elif modality == 'CV': self.new_length = 5 else: self.new_length = 5 else: self.new_length = new_length print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) if self.modality == 'CV': self.input_size = 224 self.input_mean = [104, 117, 128] self.input_std = [1] self._prepare_tsn(num_class, is_cv=True) else: self._prepare_base_model(base_model) self._prepare_tsn(num_class, is_cv=False) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") elif self.modality == 'CV': # print("Converting the ImageNet model to CV init model") # self._construct_cv_model() self.prev_cv_model = cost_volume_model.PreModel() self.displacement_map = DisplacementMap(2, 2, tau=1) self.late_cv_model = cost_volume_model.LateModel() print("CV model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax()
def __init__( self, num_class, num_segments, modality, base_model="resnet50", new_length=None, consensus_type="avg", before_softmax=True, dropout=0.8, partial_bn=True, shift_div=8, shift_place="blockres", fc_lr5=False, temporal_pool=False, non_local=False, pretrained="imagenet", ): super(TSM, self).__init__() self.arch = base_model self.num_class = num_class self.is_multitask = isinstance(num_class, (list, tuple)) if self.is_multitask: assert len( self.num_class) == 2, ("We only support 2 tasks in multi task " "problems") self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.consensus_type = consensus_type self.pretrained = pretrained self.is_shift = True self.shift_div = shift_div self.shift_place = shift_place self.base_model_name = base_model self.fc_lr5 = fc_lr5 self.temporal_pool = temporal_pool self.non_local = non_local if not before_softmax and consensus_type != "avg": raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length LOG.info((""" Initializing TSM with base model: {}. TSM Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format( base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout, ))) self._prepare_base_model(base_model) self.feature_dim = self._prepare_tsn(num_class) if self.modality == "Flow": LOG.info("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) LOG.info("Done. Flow model ready...") elif self.modality == "RGBDiff": LOG.info("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) LOG.info("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True) if pretrained and pretrained != "imagenet": self._load_pretrained_model(pretrained)
def __init__(self, num_class, num_segments, modality, base_model='resnet101', dataset='something', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8,fc_lr5=True, crop_num=1, partial_bn=True, non_local=False): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.base_model_name = base_model self.dataset = dataset self.fc_lr5 = fc_lr5 self.non_local = non_local if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 1 else: self.new_length = new_length print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout))) if (base_model[:3] == 'TCM'): if 'resnet50' in base_model: from resnet_TSM import resnet50 as resnet self.base_model = resnet(True, shift='TSM', num_segments = self.num_segments, enable_TCM = 1) print("Backbone: resnet50") elif 'resnet101' in base_model: from resnet_TSM import resnet101 as resnet self.base_model = resnet(True, shift='TSM', num_segments = self.num_segments, enable_TCM = 1) print("Backbone: resnet101") else: raise ValueError('Unknown base model: {}'.format(base_model)) if self.non_local: print('Adding non-local module...') from non_local import make_non_local_resnet50_layer4 as make_non_local make_non_local(self.base_model, self.num_segments) self.base_model.last_layer_name = 'fc1' self.input_size = 224 self.input_mean = [0.485, 0.456, 0.406] self.input_std = [0.229, 0.224, 0.225] feature_dim = self._prepare_tsn(num_class) else: self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)