예제 #1
0
    def load_pretrained_layers(self):
        # Current state of base
        state_dict = self.state_dict()
        param_names = list(state_dict.keys())

        # Pretrained VGG base
        pretrained_state_dict = torchvision.models.vgg16(pretrained=True).state_dict()
        pretrained_param_names = list(pretrained_state_dict.keys())

        # Transfer conv. parameters from pretrained model to current model
        for i, param in enumerate(param_names[:-4]):  # excluding conv6 and conv7 parameters
            state_dict[param] = pretrained_state_dict[pretrained_param_names[i]]

        # Convert fc6, fc7 to convolutional layers, and subsample (by decimation) to sizes of conv6 and conv7
        # fc6
        conv_fc6_weight = pretrained_state_dict['classifier.0.weight'].view(4096, 512, 7, 7)  # (4096, 512, 7, 7)
        conv_fc6_bias = pretrained_state_dict['classifier.0.bias']  # (4096)
        state_dict['conv6.weight'] = decimate(conv_fc6_weight, m=[4, None, 3, 3])  # (1024, 512, 3, 3)
        state_dict['conv6.bias'] = decimate(conv_fc6_bias, m=[4])  # (1024)
        # fc7
        conv_fc7_weight = pretrained_state_dict['classifier.3.weight'].view(4096, 4096, 1, 1)  # (4096, 4096, 1, 1)
        conv_fc7_bias = pretrained_state_dict['classifier.3.bias']  # (4096)
        state_dict['conv7.weight'] = decimate(conv_fc7_weight, m=[4, 4, None, None])  # (1024, 1024, 1, 1)
        state_dict['conv7.bias'] = decimate(conv_fc7_bias, m=[4])  # (1024)

        # Note: an FC layer of size (K) operating on a flattened version (C*H*W) of a 2D image of size (C, H, W)...
        # ...is equivalent to a convolutional layer with kernel size (H, W), input channels C, output channels K...
        # ...operating on the 2D image of size (C, H, W) without padding

        self.load_state_dict(state_dict)

        print("\nLoaded base model.\n")
예제 #2
0
    def load_pretrained(self):
        '''
            Use a VGG-16 pretrained on the ImageNet task for conv1-->conv5
            Convert conv6, conv7 to pretrained
        '''
        print("Loading pretrained base model...")
        state_dict = self.state_dict()
        param_names = list(state_dict.keys())

        pretrained_state_dict = torchvision.models.vgg16(
            pretrained=True).state_dict()
        pretrained_param_names = list(pretrained_state_dict.keys())

        for i, parameters in enumerate(param_names[:26]):
            state_dict[parameters] = pretrained_state_dict[
                pretrained_param_names[i]]

        #convert fc6, fc7 in pretrained to conv6, conv7 in model
        fc6_weight = pretrained_state_dict['classifier.0.weight'].view(
            4096, 512, 7, 7)
        fc6_bias = pretrained_state_dict['classifier.0.bias']
        state_dict['conv6.weight'] = decimate(fc6_weight,
                                              m=[4, None, 3,
                                                 3])  #(1024, 512, 3, 3)
        state_dict['conv6.bias'] = decimate(fc6_bias, m=[4])  #(1024)

        fc7_weight = pretrained_state_dict['classifier.3.weight'].view(
            4096, 4096, 1, 1)
        fc7_bias = pretrained_state_dict['classifier.3.bias']
        state_dict['conv7.weight'] = decimate(fc7_weight, m=[4, 4, None, None])
        state_dict['conv7.bias'] = decimate(fc7_bias, m=[4])

        self.load_state_dict(state_dict)
        print("Loaded base model")
예제 #3
0
파일: modules.py 프로젝트: NNHieu/Learn_SSD
    def __init__(self, vgg16: VGG, num_classes=21, num_db=6):
        super(Stage2, self).__init__()
        vgg16_features = list(vgg16.features)
        vgg16_classifier = vgg16.classifier

        conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6,
                          dilation=6)  # atrous convolution
        conv6.weight.data.copy_(
            decimate(vgg16_classifier[0].weight.data.view(4096, 512, 7, 7),
                     m=(4, None, 3, 3)))
        conv6.bias.data.copy_(decimate(vgg16_classifier[0].bias.data, m=[4]))

        conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
        conv7.weight.data.copy_(
            decimate(vgg16_classifier[3].weight.data.view(4096, 4096, 1, 1),
                     m=[4, 4, None, None]))
        conv7.bias.data.copy_(decimate(vgg16_classifier[3].bias.data, m=[4]))

        self.features = nn.Sequential(
            *vgg16_features[24:-1],  # relu 5_3
            # retains size because stride is 1 (and padding)
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            conv6,
            nn.ReLU(True),
            conv7,
            nn.ReLU(True))
        self.detector = DetectionConv2d(1024, num_db, num_classes)
예제 #4
0
    def load_pretrained_layers(self):
        """
        Using a VGG network that has been pretrained on ImageNet dataset. 
        The original paper used VGG16, we are using VGG11 to achieve real-time
        inference on videos.
        https://pytorch.org/docs/stable/torchvision/models.html#torchvision.models.vgg11
        We copy these parameters into our network. 
        It's straightforward for conv1 to conv5.
        VGG-11 does not contain the conv6 and con7 layers.
        Therefore, we convert fc6 and fc7 into convolutional layers, 
        and subsample by decimation. See 'decimate' in utils.py.
        """
        # current state of base
        state_dict = self.state_dict()
        param_names = list(state_dict.keys())

        # pretrained VGG base
        pretrained_state_dict = torchvision.models.vgg11(
            pretrained=True).state_dict()
        pretrained_param_names = list(pretrained_state_dict.keys())

        # transfer conv. parameters from pretrained model to current model
        for i, param in enumerate(
                param_names[:-4]):  # excluding conv6 and conv7 parameters
            state_dict[param] = pretrained_state_dict[
                pretrained_param_names[i]]

        # convert fc6, fc7 to convolutional layers,...
        # ...and subsample (by decimation) to sizes of conv6 and conv7
        # fc6
        conv_fc6_weight = pretrained_state_dict['classifier.0.weight'].view(
            4096, 512, 7, 7)  # (4096, 512, 7, 7)
        conv_fc6_bias = pretrained_state_dict['classifier.0.bias']  # (4096)
        state_dict['conv6.weight'] = decimate(conv_fc6_weight,
                                              m=[4, None, 3,
                                                 3])  # (1024, 512, 3, 3)
        state_dict['conv6.bias'] = decimate(conv_fc6_bias, m=[4])  # (1024)
        # fc7
        conv_fc7_weight = pretrained_state_dict['classifier.3.weight'].view(
            4096, 4096, 1, 1)  # (4096, 4096, 1, 1)
        conv_fc7_bias = pretrained_state_dict['classifier.3.bias']  # (4096)
        state_dict['conv7.weight'] = decimate(conv_fc7_weight,
                                              m=[4, 4, None,
                                                 None])  # (1024, 1024, 1, 1)
        state_dict['conv7.bias'] = decimate(conv_fc7_bias, m=[4])  # (1024)

        # Note: an FC layer of size (K) operating on a flattened...
        # ...version (C*H*W) of a 2D image of size (C, H, W)
        # ...is equivalent to a convolutional layer with kernel size (H, W),...
        # ...input channels C, output channels K...
        # ...operating on the 2D image of size (C, H, W) without padding

        self.load_state_dict(state_dict)

        print('\nLoaded base model.\n')
예제 #5
0
    def load_pretrained_layers(self):
        """
        As in the paper, we use a VGG-16 pretrained on the ImageNet task as the base network.
        There's one available in PyTorch, see https://pytorch.org/docs/stable/torchvision/models.html#torchvision.models.vgg16
        We copy these parameters into our network. It's straightforward for conv1 to conv5.
        However, the original VGG-16 does not contain the conv6 and con7 layers.
        Therefore, we convert fc6 and fc7 into convolutional layers, and subsample by decimation. See 'decimate' in utils.py.
        """
        # Current state of base
        state_dict = self.state_dict()
        param_names = list(state_dict.keys())

        # Pretrained VGG base
        pretrained_state_dict = torchvision.models.vgg16(
            pretrained=True).state_dict()
        pretrained_param_names = list(pretrained_state_dict.keys())

        # Transfer conv. parameters from pretrained model to current model
        for i, param in enumerate(
                param_names[:-4]):  # excluding conv6 and conv7 parameters
            state_dict[param] = pretrained_state_dict[
                pretrained_param_names[i]]

        # Convert fc6, fc7 to convolutional layers, and subsample (by decimation) to sizes of conv6 and conv7
        # fc6
        conv_fc6_weight = pretrained_state_dict['classifier.0.weight'].view(
            4096, 512, 7, 7)  # (4096, 512, 7, 7)
        conv_fc6_bias = pretrained_state_dict['classifier.0.bias']  # (4096)
        state_dict['conv6.weight'] = utils.decimate(conv_fc6_weight,
                                                    m=[4, None, 3,
                                                       3])  # (1024, 512, 3, 3)
        state_dict['conv6.bias'] = utils.decimate(conv_fc6_bias,
                                                  m=[4])  # (1024)
        # fc7
        conv_fc7_weight = pretrained_state_dict['classifier.3.weight'].view(
            4096, 4096, 1, 1)  # (4096, 4096, 1, 1)
        conv_fc7_bias = pretrained_state_dict['classifier.3.bias']  # (4096)
        state_dict['conv7.weight'] = utils.decimate(conv_fc7_weight,
                                                    m=[4, 4, None, None
                                                       ])  # (1024, 1024, 1, 1)
        state_dict['conv7.bias'] = utils.decimate(conv_fc7_bias,
                                                  m=[4])  # (1024)

        # Note: an FC layer of size (K) operating on a flattened version (C*H*W) of a 2D image of size (C, H, W)...
        # ...is equivalent to a convolutional layer with kernel size (H, W), input channels C, output channels K...
        # ...operating on the 2D image of size (C, H, W) without padding

        self.load_state_dict(state_dict)

        print("\nLoaded base model.\n")
예제 #6
0
 def init_weights(self):
     ''' Load pretrained VGG16 parameters for some first layers and initialize the rest
     '''
     state_dict = self.state_dict()
     layer_names = list(state_dict.keys())
     
     vgg16_url = "https://download.pytorch.org/models/vgg16-397923af.pth"
     vgg16 = torch.hub.load_state_dict_from_url(vgg16_url, model_dir = self.vgg16_dir)
     vgg16_layer_names = list(vgg16.keys())
         
     # Load from conv1_1 .. conv5_3
     for i, layer_name in enumerate(layer_names[0:26]):
         state_dict[layer_name] = vgg16[vgg16_layer_names[i]]
         
     # Convert fc6, fc7 to convolutional layers, and subsample (by decimation) to sizes of conv6 and conv7
     # fc6
     conv_fc6_weight = vgg16['classifier.0.weight'].view(4096, 512, 7, 7)  # (4096, 512, 7, 7)
     conv_fc6_bias = vgg16['classifier.0.bias']  # (4096)
     state_dict['conv6.weight'] = decimate(conv_fc6_weight, m=[4, None, 3, 3])  # (1024, 512, 3, 3)
     state_dict['conv6.bias'] = decimate(conv_fc6_bias, m=[4])  # (1024)
     # fc7
     conv_fc7_weight = vgg16['classifier.3.weight'].view(4096, 4096, 1, 1)  # (4096, 4096, 1, 1)
     conv_fc7_bias = vgg16['classifier.3.bias']  # (4096)
     state_dict['conv7.weight'] = decimate(conv_fc7_weight, m=[4, 4, None, None])  # (1024, 1024, 1, 1)
     state_dict['conv7.bias'] = decimate(conv_fc7_bias, m=[4])  # (1024)
     
     # Init extra conv and clf layers
     for layer_name in layer_names[30:]:
         if layer_name[-4:] == 'bias':
             nn.init.zeros_(state_dict[layer_name])
         elif layer_name[-6:] == 'weight':
             nn.init.xavier_uniform_(state_dict[layer_name])
         else:
             assert False
     
     self.load_state_dict(state_dict)