def vgg(**config): dataset = config.pop('dataset', 'imagenet') depth = config.pop('depth', 16) bn = config.pop('bn', True) if dataset == 'imagenet': config.setdefault('num_classes', 1000) if depth == 11: if bn is False: return vgg11(pretrained=False, **config) else: return vgg11_bn(pretrained=False, **config) if depth == 13: if bn is False: return vgg13(pretrained=False, **config) else: return vgg13_bn(pretrained=False, **config) if depth == 16: if bn is False: return vgg16(pretrained=False, **config) else: return vgg16_bn(pretrained=False, **config) if depth == 19: if bn is False: return vgg19(pretrained=False, **config) else: return vgg19_bn(pretrained=False, **config) elif dataset == 'cifar10': config.setdefault('num_classes', 10) elif dataset == 'cifar100': config.setdefault('num_classes', 100) config.setdefault('batch_norm', bn) return VGG(model_name[depth], **config)
def vgg_13(batch_norm=True, pretrained=False, fixed_feature=True): """ VGG 13-layer model from torchvision's vgg model. :param batch_norm: train model with batch normalization :param pretrained: if true, return a model pretrained on ImageNet :param fixed_feature: if true and pretrained is true, model features are fixed while training. """ if batch_norm: from torchvision.models.vgg import vgg13_bn model = vgg13_bn(pretrained) else: from torchvision.models.vgg import vgg13 model = vgg13(pretrained) ff = True if pretrained and fixed_feature else False return _VGG(model, model.features, ff)
def init_vgg13_params(self): vgg13 = vgg.vgg13_bn(pretrained=True) blocks = [self.down1, self.down2, self.down3, self.down4, self.down5] features = list(vgg13.features.children()) vgg_layers = [] for _layer in features: if isinstance(_layer, nn.Conv2d): vgg_layers.append(_layer) elif isinstance(_layer, nn.BatchNorm2d): vgg_layers.append(_layer) merged_layers = [] for idx, conv_block in enumerate(blocks): if idx < 2: units = [conv_block.conv1.cbr_unit, conv_block.conv2.cbr_unit] else: units = [ conv_block.conv1.cbr_unit, conv_block.conv2.cbr_unit, #conv_block.conv3.cbr_unit, #conv_block.conv4.cbr_unit, ] for _unit in units: for _layer in _unit: if isinstance(_layer, nn.Conv2d): merged_layers.append(_layer) elif isinstance(_layer, nn.BatchNorm2d): merged_layers.append(_layer) assert len(vgg_layers) == len(merged_layers) for l1, l2 in zip(vgg_layers, merged_layers): if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d): assert l1.weight.size() == l2.weight.size() assert l1.bias.size() == l2.bias.size() l2.weight.data = l1.weight.data l2.bias.data = l1.bias.data elif isinstance(l1, nn.BatchNorm2d) and isinstance( l2, nn.BatchNorm2d): l2.running_mean.data = l1.running_mean.data l2.running_var.data = l1.running_var.data l2.weight.data = l1.weight.data l2.bias.data = l1.bias.data
def __init__(self, subtype='vgg16', out_stages=[2, 3, 4], backbone_path=None, pretrained=False): super(VGG, self).__init__() self.out_stages = out_stages self.backbone_path = backbone_path self.pretrained = pretrained if subtype == 'vgg11': self.pretrained = True features = vgg11_bn(pretrained=self.pretrained).features self.out_channels = [64, 128, 256, 512, 512] elif subtype == 'vgg13': self.pretrained = True features = vgg13_bn(pretrained=self.pretrained).features self.out_channels = [64, 128, 256, 512, 512] elif subtype == 'vgg16': self.pretrained = True features = vgg16_bn(pretrained=self.pretrained).features self.out_channels = [64, 128, 256, 512, 512] elif subtype == 'vgg19': self.pretrained = True features = vgg19_bn(pretrained=self.pretrained).features self.out_channels = [64, 128, 256, 512, 512] else: raise NotImplementedError self.out_channels = self.out_channels[self.out_stages[0]:self. out_stages[-1] + 1] self.conv1 = nn.Sequential(*list(features.children())[:7]) self.layer1 = nn.Sequential(*list(features.children())[7:14]) self.layer2 = nn.Sequential(*list(features.children())[14:24]) self.layer3 = nn.Sequential(*list(features.children())[24:34]) self.layer4 = nn.Sequential(*list(features.children())[34:43]) if not self.pretrained: if self.backbone_path: self.pretrained = True self.backbone.load_state_dict(torch.load(self.backbone_path)) else: self.init_weights()
def __init__(self, shape=(64, 64), lidar_channels=6, rgb_channels=3, num_features=512, **kwargs): """ The network architecture. :param shape: The shape of the input windows (rows and columns) :param lidar_channels: The number of channels of LiDAR data (the volume depth) :param rgb_channels: The number of channels of RGB data (e.g. 3) :param fusion: How to do fusion; one of: - 'early' to concatenate the input channels before the 'features' net, - 'late_cat', to process inputs separately and concatenate them. The concatenated vector is reduced using a linear layer. - 'late_add' to process inputs separately and the combine them by adding them. See :class FusionOptions: for list the valid options. :param channel_dropout: Whether to selectively drop RGB or LIDAR data. :param channel_dropout_ratios: A triple with the probability to dropout color, lidar, or neither. For examples (1,2,7) means that ther is a one in ten chance that color is dropped and a 2 in ten chance that lidar is dropped. :param obb_parametrization: The parametrization of the OBB :param num_hidden: The number of hidden layers to use for classification and regression. """ super().__init__() self.shape = shape self.lidar_channels = lidar_channels self.rgb_channels = rgb_channels self.num_features = num_features self.fusion = kwargs.pop('fusion', C.EXPERIMENT.FUSION) self.obb_parametrization = kwargs.pop('obb_parametrization', ObbOptions.VECTOR_AND_WIDTH) self.channel_dropout = kwargs.pop('channel_dropout', ChannelDropoutOptions.CDROP) self.channel_dropout_ratios = np.array(kwargs.pop( 'channel_dropout_ratios', (1, 1, 5)), dtype=np.float) self.channel_dropout_ratios /= self.channel_dropout_ratios.sum() self.synthetic = kwargs.pop('synthetic', SyntheticOptions.NO_PRETRAIN) self.class_loss_function = kwargs.pop('class_loss', ClassLossOptions.XENT_LOSS) self.regression_loss_function = kwargs.pop( 'regression_loss', RegressionLossOptions.SMOOTH_L1) self.num_hidden = kwargs.pop('num_hidden', 2048) proto = vgg.vgg13_bn(pretrained=True) # Choose a feature extraction subbnet based on the `fusion` argument. if self.fusion == FusionOptions.EARLY: self.features = EarlyFusion(lidar_channels, rgb_channels, proto.features) elif self.fusion == FusionOptions.LATE_ADD: self.features = LateFusionAdd(lidar_channels, rgb_channels, proto.features) elif self.fusion == FusionOptions.LATE_CAT: self.features = LateFusionCat(lidar_channels, rgb_channels, num_features, proto.features) # Classify (determine if it is a box) num_classes = 2 # 0 = 'background' (not a box), 1 = 'object' (it is a box) self.classifier = nn.Sequential( nn.Linear(self.num_features, self.num_hidden), nn.ReLU(True), nn.Dropout(), nn.Linear(self.num_hidden, num_classes), ) # Regress (estimate box parameters) if self.obb_parametrization == ObbOptions.VECTOR_AND_WIDTH: self.num_obb_parameters = 2 + 2 + 1 # origin + length-vector + width elif self.obb_parametrization == ObbOptions.TWO_VECTORS: self.num_obb_parameters = 2 + 2 + 2 elif self.obb_parametrization == ObbOptions.FOUR_POINTS: self.num_obb_parameters = 4 * 2 else: raise ValueError( "obb_parametrization must be on of the `ObbOptions` values") self.regressor = nn.Sequential( nn.Linear(self.num_features, self.num_hidden), nn.ReLU(True), nn.Dropout(), nn.Linear(self.num_hidden, self.num_obb_parameters), )
def recordVGG(info): global SKIP import torchvision.models.vgg as vggGen if not (SKIP and 'vgg11' in info['name_list']): INFO("proceeding for VGG11...") net = vggGen.vgg11(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg11') else: INFO("Skip VGG11") if not (SKIP and 'vgg13' in info['name_list']): INFO("proceeding for VGG13...") net = vggGen.vgg13(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg13') else: INFO("Skip VGG13") if not (SKIP and 'vgg16' in info['name_list']): INFO("proceeding for VGG16...") net = vggGen.vgg16(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg16') else: INFO("Skip VGG16") if not (SKIP and 'vgg19' in info['name_list']): INFO("proceeding for VGG19...") net = vggGen.vgg19(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg19') else: INFO("Skip VGG19") if not (SKIP and 'vgg11_bn' in info['name_list']): INFO("proceeding for VGG11_bn...") net = vggGen.vgg11_bn(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg11_bn') else: INFO("Skip VGG11_bn") if not (SKIP and 'vgg13_bn' in info['name_list']): INFO("proceeding for VGG13_bn...") net = vggGen.vgg13_bn(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg13_bn') else: INFO("Skip VGG13_bn") if not (SKIP and 'vgg16_bn' in info['name_list']): INFO("proceeding for VGG16_bn...") net = vggGen.vgg16_bn(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg16_bn') else: INFO("Skip VGG16_bn") if not (SKIP and 'vgg19_bn' in info['name_list']): INFO("proceeding for VGG19_bn...") net = vggGen.vgg19_bn(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg19_bn') else: INFO("Skip VGG19_bn")
def load_model(model_name, classes=1000, pretrained=True, in_channels=3): """Load the specified VGG architecture for ImageNet Args: model_name: VGG architecture type classes: number of predicted classes pretrained: load pretrained network on ImageNet """ if pretrained: assert classes == 1000, "Pretrained models are provided only for Imagenet." kwargs = {'num_classes': classes} if model_name == 'vgg11': net = VGG.vgg11(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg13': net = VGG.vgg13(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg16': net = VGG.vgg16(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg19': net = VGG.vgg19(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg11bn': net = VGG.vgg11_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg13bn': net = VGG.vgg13_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg16bn': net = VGG.vgg16_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg19bn': net = VGG.vgg19_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg19_orig': net = VGG.vgg19(pretrained=False, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) net.features[0] = input_layer init_weights_vgg_orig(net) elif model_name == 'alexnet': net = AlexNet(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=11, stride=4, padding=2) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'lenet': kwargs['in_channels'] = in_channels net = lenet(**kwargs) else: raise ValueError("Unsupported model architecture.") return net