def load_pretrained_layers(self): # Current state of base state_dict = self.state_dict() param_names = list(state_dict.keys()) # Pretrained VGG base pretrained_state_dict = torchvision.models.vgg16(pretrained=True).state_dict() pretrained_param_names = list(pretrained_state_dict.keys()) # Transfer conv. parameters from pretrained model to current model for i, param in enumerate(param_names[:-4]): # excluding conv6 and conv7 parameters state_dict[param] = pretrained_state_dict[pretrained_param_names[i]] # Convert fc6, fc7 to convolutional layers, and subsample (by decimation) to sizes of conv6 and conv7 # fc6 conv_fc6_weight = pretrained_state_dict['classifier.0.weight'].view(4096, 512, 7, 7) # (4096, 512, 7, 7) conv_fc6_bias = pretrained_state_dict['classifier.0.bias'] # (4096) state_dict['conv6.weight'] = decimate(conv_fc6_weight, m=[4, None, 3, 3]) # (1024, 512, 3, 3) state_dict['conv6.bias'] = decimate(conv_fc6_bias, m=[4]) # (1024) # fc7 conv_fc7_weight = pretrained_state_dict['classifier.3.weight'].view(4096, 4096, 1, 1) # (4096, 4096, 1, 1) conv_fc7_bias = pretrained_state_dict['classifier.3.bias'] # (4096) state_dict['conv7.weight'] = decimate(conv_fc7_weight, m=[4, 4, None, None]) # (1024, 1024, 1, 1) state_dict['conv7.bias'] = decimate(conv_fc7_bias, m=[4]) # (1024) # Note: an FC layer of size (K) operating on a flattened version (C*H*W) of a 2D image of size (C, H, W)... # ...is equivalent to a convolutional layer with kernel size (H, W), input channels C, output channels K... # ...operating on the 2D image of size (C, H, W) without padding self.load_state_dict(state_dict) print("\nLoaded base model.\n")
def load_pretrained(self): ''' Use a VGG-16 pretrained on the ImageNet task for conv1-->conv5 Convert conv6, conv7 to pretrained ''' print("Loading pretrained base model...") state_dict = self.state_dict() param_names = list(state_dict.keys()) pretrained_state_dict = torchvision.models.vgg16( pretrained=True).state_dict() pretrained_param_names = list(pretrained_state_dict.keys()) for i, parameters in enumerate(param_names[:26]): state_dict[parameters] = pretrained_state_dict[ pretrained_param_names[i]] #convert fc6, fc7 in pretrained to conv6, conv7 in model fc6_weight = pretrained_state_dict['classifier.0.weight'].view( 4096, 512, 7, 7) fc6_bias = pretrained_state_dict['classifier.0.bias'] state_dict['conv6.weight'] = decimate(fc6_weight, m=[4, None, 3, 3]) #(1024, 512, 3, 3) state_dict['conv6.bias'] = decimate(fc6_bias, m=[4]) #(1024) fc7_weight = pretrained_state_dict['classifier.3.weight'].view( 4096, 4096, 1, 1) fc7_bias = pretrained_state_dict['classifier.3.bias'] state_dict['conv7.weight'] = decimate(fc7_weight, m=[4, 4, None, None]) state_dict['conv7.bias'] = decimate(fc7_bias, m=[4]) self.load_state_dict(state_dict) print("Loaded base model")
def __init__(self, vgg16: VGG, num_classes=21, num_db=6): super(Stage2, self).__init__() vgg16_features = list(vgg16.features) vgg16_classifier = vgg16.classifier conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) # atrous convolution conv6.weight.data.copy_( decimate(vgg16_classifier[0].weight.data.view(4096, 512, 7, 7), m=(4, None, 3, 3))) conv6.bias.data.copy_(decimate(vgg16_classifier[0].bias.data, m=[4])) conv7 = nn.Conv2d(1024, 1024, kernel_size=1) conv7.weight.data.copy_( decimate(vgg16_classifier[3].weight.data.view(4096, 4096, 1, 1), m=[4, 4, None, None])) conv7.bias.data.copy_(decimate(vgg16_classifier[3].bias.data, m=[4])) self.features = nn.Sequential( *vgg16_features[24:-1], # relu 5_3 # retains size because stride is 1 (and padding) nn.MaxPool2d(kernel_size=3, stride=1, padding=1), conv6, nn.ReLU(True), conv7, nn.ReLU(True)) self.detector = DetectionConv2d(1024, num_db, num_classes)
def load_pretrained_layers(self): """ Using a VGG network that has been pretrained on ImageNet dataset. The original paper used VGG16, we are using VGG11 to achieve real-time inference on videos. https://pytorch.org/docs/stable/torchvision/models.html#torchvision.models.vgg11 We copy these parameters into our network. It's straightforward for conv1 to conv5. VGG-11 does not contain the conv6 and con7 layers. Therefore, we convert fc6 and fc7 into convolutional layers, and subsample by decimation. See 'decimate' in utils.py. """ # current state of base state_dict = self.state_dict() param_names = list(state_dict.keys()) # pretrained VGG base pretrained_state_dict = torchvision.models.vgg11( pretrained=True).state_dict() pretrained_param_names = list(pretrained_state_dict.keys()) # transfer conv. parameters from pretrained model to current model for i, param in enumerate( param_names[:-4]): # excluding conv6 and conv7 parameters state_dict[param] = pretrained_state_dict[ pretrained_param_names[i]] # convert fc6, fc7 to convolutional layers,... # ...and subsample (by decimation) to sizes of conv6 and conv7 # fc6 conv_fc6_weight = pretrained_state_dict['classifier.0.weight'].view( 4096, 512, 7, 7) # (4096, 512, 7, 7) conv_fc6_bias = pretrained_state_dict['classifier.0.bias'] # (4096) state_dict['conv6.weight'] = decimate(conv_fc6_weight, m=[4, None, 3, 3]) # (1024, 512, 3, 3) state_dict['conv6.bias'] = decimate(conv_fc6_bias, m=[4]) # (1024) # fc7 conv_fc7_weight = pretrained_state_dict['classifier.3.weight'].view( 4096, 4096, 1, 1) # (4096, 4096, 1, 1) conv_fc7_bias = pretrained_state_dict['classifier.3.bias'] # (4096) state_dict['conv7.weight'] = decimate(conv_fc7_weight, m=[4, 4, None, None]) # (1024, 1024, 1, 1) state_dict['conv7.bias'] = decimate(conv_fc7_bias, m=[4]) # (1024) # Note: an FC layer of size (K) operating on a flattened... # ...version (C*H*W) of a 2D image of size (C, H, W) # ...is equivalent to a convolutional layer with kernel size (H, W),... # ...input channels C, output channels K... # ...operating on the 2D image of size (C, H, W) without padding self.load_state_dict(state_dict) print('\nLoaded base model.\n')
def load_pretrained_layers(self): """ As in the paper, we use a VGG-16 pretrained on the ImageNet task as the base network. There's one available in PyTorch, see https://pytorch.org/docs/stable/torchvision/models.html#torchvision.models.vgg16 We copy these parameters into our network. It's straightforward for conv1 to conv5. However, the original VGG-16 does not contain the conv6 and con7 layers. Therefore, we convert fc6 and fc7 into convolutional layers, and subsample by decimation. See 'decimate' in utils.py. """ # Current state of base state_dict = self.state_dict() param_names = list(state_dict.keys()) # Pretrained VGG base pretrained_state_dict = torchvision.models.vgg16( pretrained=True).state_dict() pretrained_param_names = list(pretrained_state_dict.keys()) # Transfer conv. parameters from pretrained model to current model for i, param in enumerate( param_names[:-4]): # excluding conv6 and conv7 parameters state_dict[param] = pretrained_state_dict[ pretrained_param_names[i]] # Convert fc6, fc7 to convolutional layers, and subsample (by decimation) to sizes of conv6 and conv7 # fc6 conv_fc6_weight = pretrained_state_dict['classifier.0.weight'].view( 4096, 512, 7, 7) # (4096, 512, 7, 7) conv_fc6_bias = pretrained_state_dict['classifier.0.bias'] # (4096) state_dict['conv6.weight'] = utils.decimate(conv_fc6_weight, m=[4, None, 3, 3]) # (1024, 512, 3, 3) state_dict['conv6.bias'] = utils.decimate(conv_fc6_bias, m=[4]) # (1024) # fc7 conv_fc7_weight = pretrained_state_dict['classifier.3.weight'].view( 4096, 4096, 1, 1) # (4096, 4096, 1, 1) conv_fc7_bias = pretrained_state_dict['classifier.3.bias'] # (4096) state_dict['conv7.weight'] = utils.decimate(conv_fc7_weight, m=[4, 4, None, None ]) # (1024, 1024, 1, 1) state_dict['conv7.bias'] = utils.decimate(conv_fc7_bias, m=[4]) # (1024) # Note: an FC layer of size (K) operating on a flattened version (C*H*W) of a 2D image of size (C, H, W)... # ...is equivalent to a convolutional layer with kernel size (H, W), input channels C, output channels K... # ...operating on the 2D image of size (C, H, W) without padding self.load_state_dict(state_dict) print("\nLoaded base model.\n")
def init_weights(self): ''' Load pretrained VGG16 parameters for some first layers and initialize the rest ''' state_dict = self.state_dict() layer_names = list(state_dict.keys()) vgg16_url = "https://download.pytorch.org/models/vgg16-397923af.pth" vgg16 = torch.hub.load_state_dict_from_url(vgg16_url, model_dir = self.vgg16_dir) vgg16_layer_names = list(vgg16.keys()) # Load from conv1_1 .. conv5_3 for i, layer_name in enumerate(layer_names[0:26]): state_dict[layer_name] = vgg16[vgg16_layer_names[i]] # Convert fc6, fc7 to convolutional layers, and subsample (by decimation) to sizes of conv6 and conv7 # fc6 conv_fc6_weight = vgg16['classifier.0.weight'].view(4096, 512, 7, 7) # (4096, 512, 7, 7) conv_fc6_bias = vgg16['classifier.0.bias'] # (4096) state_dict['conv6.weight'] = decimate(conv_fc6_weight, m=[4, None, 3, 3]) # (1024, 512, 3, 3) state_dict['conv6.bias'] = decimate(conv_fc6_bias, m=[4]) # (1024) # fc7 conv_fc7_weight = vgg16['classifier.3.weight'].view(4096, 4096, 1, 1) # (4096, 4096, 1, 1) conv_fc7_bias = vgg16['classifier.3.bias'] # (4096) state_dict['conv7.weight'] = decimate(conv_fc7_weight, m=[4, 4, None, None]) # (1024, 1024, 1, 1) state_dict['conv7.bias'] = decimate(conv_fc7_bias, m=[4]) # (1024) # Init extra conv and clf layers for layer_name in layer_names[30:]: if layer_name[-4:] == 'bias': nn.init.zeros_(state_dict[layer_name]) elif layer_name[-6:] == 'weight': nn.init.xavier_uniform_(state_dict[layer_name]) else: assert False self.load_state_dict(state_dict)