Exemplo n.º 1
0
def vgg(**config):
    dataset = config.pop('dataset', 'imagenet')
    depth = config.pop('depth', 16)
    bn = config.pop('bn', True)

    if dataset == 'imagenet':
        config.setdefault('num_classes', 1000)
        if depth == 11:
            if bn is False:
                return vgg11(pretrained=False, **config)
            else:
                return vgg11_bn(pretrained=False, **config)
        if depth == 13:
            if bn is False:
                return vgg13(pretrained=False, **config)
            else:
                return vgg13_bn(pretrained=False, **config)
        if depth == 16:
            if bn is False:
                return vgg16(pretrained=False, **config)
            else:
                return vgg16_bn(pretrained=False, **config)
        if depth == 19:
            if bn is False:
                return vgg19(pretrained=False, **config)
            else:
                return vgg19_bn(pretrained=False, **config)
    elif dataset == 'cifar10':
        config.setdefault('num_classes', 10)
    elif dataset == 'cifar100':
        config.setdefault('num_classes', 100)
    config.setdefault('batch_norm', bn)
    return VGG(model_name[depth], **config)
Exemplo n.º 2
0
def vgg_13(batch_norm=True, pretrained=False, fixed_feature=True):
    """ VGG 13-layer model from torchvision's vgg model.

	:param batch_norm: train model with batch normalization
	:param pretrained: if true, return a model pretrained on ImageNet
	:param fixed_feature: if true and pretrained is true, model features are fixed while training.
	"""
    if batch_norm:
        from torchvision.models.vgg import vgg13_bn
        model = vgg13_bn(pretrained)
    else:
        from torchvision.models.vgg import vgg13
        model = vgg13(pretrained)

    ff = True if pretrained and fixed_feature else False
    return _VGG(model, model.features, ff)
    def init_vgg13_params(self):

        vgg13 = vgg.vgg13_bn(pretrained=True)

        blocks = [self.down1, self.down2, self.down3, self.down4, self.down5]

        features = list(vgg13.features.children())

        vgg_layers = []
        for _layer in features:
            if isinstance(_layer, nn.Conv2d):
                vgg_layers.append(_layer)
            elif isinstance(_layer, nn.BatchNorm2d):
                vgg_layers.append(_layer)

        merged_layers = []
        for idx, conv_block in enumerate(blocks):
            if idx < 2:
                units = [conv_block.conv1.cbr_unit, conv_block.conv2.cbr_unit]
            else:
                units = [
                    conv_block.conv1.cbr_unit,
                    conv_block.conv2.cbr_unit,
                    #conv_block.conv3.cbr_unit,
                    #conv_block.conv4.cbr_unit,
                ]
            for _unit in units:
                for _layer in _unit:
                    if isinstance(_layer, nn.Conv2d):
                        merged_layers.append(_layer)
                    elif isinstance(_layer, nn.BatchNorm2d):
                        merged_layers.append(_layer)

        assert len(vgg_layers) == len(merged_layers)

        for l1, l2 in zip(vgg_layers, merged_layers):
            if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
                assert l1.weight.size() == l2.weight.size()
                assert l1.bias.size() == l2.bias.size()
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data
            elif isinstance(l1, nn.BatchNorm2d) and isinstance(
                    l2, nn.BatchNorm2d):
                l2.running_mean.data = l1.running_mean.data
                l2.running_var.data = l1.running_var.data
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data
Exemplo n.º 4
0
    def __init__(self,
                 subtype='vgg16',
                 out_stages=[2, 3, 4],
                 backbone_path=None,
                 pretrained=False):
        super(VGG, self).__init__()
        self.out_stages = out_stages
        self.backbone_path = backbone_path
        self.pretrained = pretrained

        if subtype == 'vgg11':
            self.pretrained = True
            features = vgg11_bn(pretrained=self.pretrained).features
            self.out_channels = [64, 128, 256, 512, 512]
        elif subtype == 'vgg13':
            self.pretrained = True
            features = vgg13_bn(pretrained=self.pretrained).features
            self.out_channels = [64, 128, 256, 512, 512]
        elif subtype == 'vgg16':
            self.pretrained = True
            features = vgg16_bn(pretrained=self.pretrained).features
            self.out_channels = [64, 128, 256, 512, 512]
        elif subtype == 'vgg19':
            self.pretrained = True
            features = vgg19_bn(pretrained=self.pretrained).features
            self.out_channels = [64, 128, 256, 512, 512]
        else:
            raise NotImplementedError

        self.out_channels = self.out_channels[self.out_stages[0]:self.
                                              out_stages[-1] + 1]

        self.conv1 = nn.Sequential(*list(features.children())[:7])
        self.layer1 = nn.Sequential(*list(features.children())[7:14])
        self.layer2 = nn.Sequential(*list(features.children())[14:24])
        self.layer3 = nn.Sequential(*list(features.children())[24:34])
        self.layer4 = nn.Sequential(*list(features.children())[34:43])

        if not self.pretrained:
            if self.backbone_path:
                self.pretrained = True
                self.backbone.load_state_dict(torch.load(self.backbone_path))
            else:
                self.init_weights()
Exemplo n.º 5
0
    def __init__(self,
                 shape=(64, 64),
                 lidar_channels=6,
                 rgb_channels=3,
                 num_features=512,
                 **kwargs):
        """
        The network architecture.


        :param shape:
            The shape of the input windows (rows and columns)

        :param lidar_channels:
            The number of channels of LiDAR data (the volume depth)

        :param rgb_channels:
            The number of channels of RGB data (e.g. 3)

        :param fusion:
            How to do fusion; one of:

            - 'early' to concatenate the input channels before the 'features' net,

            - 'late_cat', to process inputs separately and concatenate them. The
               concatenated vector is reduced using a linear layer.

            - 'late_add' to process inputs separately and the combine them by adding them.

            See :class FusionOptions: for list the valid options.

        :param channel_dropout:
            Whether to selectively drop RGB or LIDAR data.

        :param channel_dropout_ratios:
            A triple with the probability to dropout color, lidar, or neither.
            For examples (1,2,7) means that ther is a one in ten chance that
            color is dropped and a 2 in ten chance that lidar is dropped.

        :param obb_parametrization:
            The parametrization of the OBB

        :param num_hidden:
            The number of hidden layers to use for classification and regression.


        """
        super().__init__()

        self.shape = shape
        self.lidar_channels = lidar_channels
        self.rgb_channels = rgb_channels
        self.num_features = num_features

        self.fusion = kwargs.pop('fusion', C.EXPERIMENT.FUSION)
        self.obb_parametrization = kwargs.pop('obb_parametrization',
                                              ObbOptions.VECTOR_AND_WIDTH)
        self.channel_dropout = kwargs.pop('channel_dropout',
                                          ChannelDropoutOptions.CDROP)
        self.channel_dropout_ratios = np.array(kwargs.pop(
            'channel_dropout_ratios', (1, 1, 5)),
                                               dtype=np.float)
        self.channel_dropout_ratios /= self.channel_dropout_ratios.sum()

        self.synthetic = kwargs.pop('synthetic', SyntheticOptions.NO_PRETRAIN)
        self.class_loss_function = kwargs.pop('class_loss',
                                              ClassLossOptions.XENT_LOSS)
        self.regression_loss_function = kwargs.pop(
            'regression_loss', RegressionLossOptions.SMOOTH_L1)

        self.num_hidden = kwargs.pop('num_hidden', 2048)

        proto = vgg.vgg13_bn(pretrained=True)

        # Choose a feature extraction subbnet based on the `fusion` argument.
        if self.fusion == FusionOptions.EARLY:
            self.features = EarlyFusion(lidar_channels, rgb_channels,
                                        proto.features)
        elif self.fusion == FusionOptions.LATE_ADD:
            self.features = LateFusionAdd(lidar_channels, rgb_channels,
                                          proto.features)
        elif self.fusion == FusionOptions.LATE_CAT:
            self.features = LateFusionCat(lidar_channels, rgb_channels,
                                          num_features, proto.features)

        # Classify (determine if it is a box)
        num_classes = 2  # 0 = 'background' (not a box), 1 = 'object' (it is a box)
        self.classifier = nn.Sequential(
            nn.Linear(self.num_features, self.num_hidden),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(self.num_hidden, num_classes),
        )

        # Regress (estimate box parameters)
        if self.obb_parametrization == ObbOptions.VECTOR_AND_WIDTH:
            self.num_obb_parameters = 2 + 2 + 1  # origin + length-vector + width
        elif self.obb_parametrization == ObbOptions.TWO_VECTORS:
            self.num_obb_parameters = 2 + 2 + 2
        elif self.obb_parametrization == ObbOptions.FOUR_POINTS:
            self.num_obb_parameters = 4 * 2
        else:
            raise ValueError(
                "obb_parametrization must be on of the `ObbOptions` values")

        self.regressor = nn.Sequential(
            nn.Linear(self.num_features, self.num_hidden),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(self.num_hidden, self.num_obb_parameters),
        )
Exemplo n.º 6
0
def recordVGG(info):
    global SKIP
    import torchvision.models.vgg as vggGen

    if not (SKIP and 'vgg11' in info['name_list']):
        INFO("proceeding for VGG11...")
        net = vggGen.vgg11(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg11')
    else:
        INFO("Skip VGG11")

    if not (SKIP and 'vgg13' in info['name_list']):
        INFO("proceeding for VGG13...")
        net = vggGen.vgg13(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg13')
    else:
        INFO("Skip VGG13")

    if not (SKIP and 'vgg16' in info['name_list']):
        INFO("proceeding for VGG16...")
        net = vggGen.vgg16(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg16')
    else:
        INFO("Skip VGG16")

    if not (SKIP and 'vgg19' in info['name_list']):
        INFO("proceeding for VGG19...")
        net = vggGen.vgg19(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg19')
    else:
        INFO("Skip VGG19")

    if not (SKIP and 'vgg11_bn' in info['name_list']):
        INFO("proceeding for VGG11_bn...")
        net = vggGen.vgg11_bn(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg11_bn')
    else:
        INFO("Skip VGG11_bn")

    if not (SKIP and 'vgg13_bn' in info['name_list']):
        INFO("proceeding for VGG13_bn...")
        net = vggGen.vgg13_bn(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg13_bn')
    else:
        INFO("Skip VGG13_bn")

    if not (SKIP and 'vgg16_bn' in info['name_list']):
        INFO("proceeding for VGG16_bn...")
        net = vggGen.vgg16_bn(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg16_bn')
    else:
        INFO("Skip VGG16_bn")

    if not (SKIP and 'vgg19_bn' in info['name_list']):
        INFO("proceeding for VGG19_bn...")
        net = vggGen.vgg19_bn(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg19_bn')
    else:
        INFO("Skip VGG19_bn")
Exemplo n.º 7
0
def load_model(model_name, classes=1000, pretrained=True, in_channels=3):
    """Load the specified VGG architecture for ImageNet
  
    Args:
      model_name: VGG architecture type
      classes: number of predicted classes
      pretrained: load pretrained network on ImageNet
  """
    if pretrained:
        assert classes == 1000, "Pretrained models are provided only for Imagenet."

    kwargs = {'num_classes': classes}

    if model_name == 'vgg11':
        net = VGG.vgg11(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg13':
        net = VGG.vgg13(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg16':
        net = VGG.vgg16(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg19':
        net = VGG.vgg19(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg11bn':
        net = VGG.vgg11_bn(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg13bn':
        net = VGG.vgg13_bn(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg16bn':
        net = VGG.vgg16_bn(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg19bn':
        net = VGG.vgg19_bn(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg19_orig':
        net = VGG.vgg19(pretrained=False, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            net.features[0] = input_layer
        init_weights_vgg_orig(net)
    elif model_name == 'alexnet':
        net = AlexNet(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels,
                                    64,
                                    kernel_size=11,
                                    stride=4,
                                    padding=2)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'lenet':
        kwargs['in_channels'] = in_channels
        net = lenet(**kwargs)
    else:
        raise ValueError("Unsupported model architecture.")
    return net