示例#1
0
    def __init__(self, bottleneck2d):
        super(Bottleneck3d, self).__init__()

        spatial_stride = bottleneck2d.conv2.stride[0]

        self.conv1 = inflate.inflate_conv(
            bottleneck2d.conv1, time_dim=1, center=True)
        self.bn1 = inflate.inflate_batch_norm(bottleneck2d.bn1)

        self.conv2 = inflate.inflate_conv(
            bottleneck2d.conv2,
            time_dim=3,
            time_padding=1,
            time_stride=spatial_stride,
            center=True)
        self.bn2 = inflate.inflate_batch_norm(bottleneck2d.bn2)

        self.conv3 = inflate.inflate_conv(
            bottleneck2d.conv3, time_dim=1, center=True)
        self.bn3 = inflate.inflate_batch_norm(bottleneck2d.bn3)

        self.relu = torch.nn.ReLU(inplace=True)

        if bottleneck2d.downsample is not None:
            self.downsample = inflate_downsample(
                bottleneck2d.downsample, time_stride=spatial_stride)
        else:
            self.downsample = None

        self.stride = bottleneck2d.stride
示例#2
0
    def __init__(self, denselayer2d, inflate_convs=False):
        super(_DenseLayer3d, self).__init__()

        self.inflate_convs = inflate_convs
        for name, child in denselayer2d.named_children():
            if isinstance(child, torch.nn.BatchNorm2d):
                self.add_module(name, inflate.inflate_batch_norm(child))
            elif isinstance(child, torch.nn.ReLU):
                self.add_module(name, child)
            elif isinstance(child, torch.nn.Conv2d):
                kernel_size = child.kernel_size[0]
                if inflate_convs and kernel_size > 1:
                    # Pad input in the time dimension
                    assert kernel_size % 2 == 1, 'kernel size should be\
                            odd be got {}'.format(kernel_size)
                    pad_size = int(kernel_size / 2)
                    pad_time = ReplicationPad3d(
                        (0, 0, 0, 0, pad_size, pad_size))
                    self.add_module('padding.1', pad_time)
                    # Add time dimension of same dim as the space one
                    self.add_module(name,
                                    inflate.inflate_conv(child, kernel_size))
                else:
                    self.add_module(name, inflate.inflate_conv(child, 1))
            else:
                raise ValueError('{} is not among handled layer types'.format(
                    type(child)))
        self.drop_rate = denselayer2d.drop_rate
示例#3
0
    def __init__(self, resnet2d, frame_nb=16, class_nb=1000, conv_class=False):
        """
        Args:
            conv_class: Whether to use convolutional layer as classifier to
                adapt to various number of frames
        """
        super(I3ResNet, self).__init__()
        self.conv_class = conv_class

        self.conv1 = inflate.inflate_conv(
            resnet2d.conv1, time_dim=3, time_padding=1, center=True)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = torch.nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(
            resnet2d.maxpool, time_dim=3, time_padding=1, time_stride=2)

        self.layer1 = inflate_reslayer(resnet2d.layer1)
        self.layer2 = inflate_reslayer(resnet2d.layer2)
        self.layer3 = inflate_reslayer(resnet2d.layer3)
        self.layer4 = inflate_reslayer(resnet2d.layer4)

        if conv_class:
            self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=1)
            self.classifier = torch.nn.Conv3d(
                in_channels=2048,
                out_channels=class_nb,
                kernel_size=(1, 1, 1),
                bias=True)
        else:
            final_time_dim = int(math.ceil(frame_nb / 16))
            self.avgpool = inflate.inflate_pool(
                resnet2d.avgpool, time_dim=final_time_dim)
            self.fc = inflate.inflate_linear(resnet2d.fc, 1)
def test_inflate_conv_no_padding():
    torch.manual_seed(0)

    input_space_dim = 10  # Dimensions of input image
    in_channels = 3  # input feature map dim
    out_channels = 2  # output feature map dim

    filter_space_dim = 5  # conv filter spatial dim
    filter_time_dim = 3  # conv filter temporal dim

    frame_nb = 5

    # Initialize inputs with batch dimension
    input_img = torch.rand(in_channels, input_space_dim, input_space_dim)
    input_2d_var = Variable(input_img.unsqueeze(0))
    input_3d = input_img.unsqueeze(1).repeat(1, frame_nb, 1, 1)
    input_3d_var = Variable(input_3d.unsqueeze(0))

    # Initialize convolutions
    conv2d = torch.nn.Conv2d(in_channels, out_channels, filter_space_dim, padding=1)
    conv3d = inflate_conv(conv2d, filter_time_dim)

    # Compute outputs
    out_2d = conv2d(input_2d_var)
    out_3d = conv3d(input_3d_var)
    expected_out_3d = out_2d.data.unsqueeze(2).repeat(1, 1, frame_nb - 2*int(filter_time_dim/2), 1, 1)

    output_diff = out_3d.data - expected_out_3d
    assert(output_diff.max() < 0.00001)
示例#5
0
def inflate_features(features, inflate_block_convs=False):
    """
    Inflates the feature extractor part of DenseNet by adding the corresponding
    inflated modules and transfering the inflated weights
    """
    features3d = torch.nn.Sequential()
    transition_nb = 0  # Count number of transition layers
    for name, child in features.named_children():
        if isinstance(child, torch.nn.BatchNorm2d):
            features3d.add_module(name, inflate.inflate_batch_norm(child))
        elif isinstance(child, torch.nn.ReLU):
            features3d.add_module(name, child)
        elif isinstance(child, torch.nn.Conv2d):
            features3d.add_module(name, inflate.inflate_conv(child, 1))
        elif isinstance(child, torch.nn.MaxPool2d) or isinstance(
                child, torch.nn.AvgPool2d):
            features3d.add_module(name, inflate.inflate_pool(child))
        elif isinstance(child, torchvision.models.densenet._DenseBlock):
            # Add dense block
            block = torch.nn.Sequential()
            for nested_name, nested_child in child.named_children():
                assert isinstance(nested_child,
                                  torchvision.models.densenet._DenseLayer)
                block.add_module(
                    nested_name,
                    _DenseLayer3d(nested_child,
                                  inflate_convs=inflate_block_convs))
            features3d.add_module(name, block)
        elif isinstance(child, torchvision.models.densenet._Transition):
            features3d.add_module(name, _Transition3d(child))
            transition_nb = transition_nb + 1
        else:
            raise ValueError('{} is not among handled layer types'.format(
                type(child)))
    return features3d, transition_nb
示例#6
0
    def __init__(self, resnet2d, frame_nb=32, class_nb=4, reason_nb=21, side_task=False, conv_class=False):
        """
        Args:
            conv_class: Whether to use convolutional layer as classifier to
                adapt to various number of frames
        """
        super(I3ResNet, self).__init__()
        self.conv_class = conv_class

        self.conv1 = inflate.inflate_conv(
            resnet2d.conv1, time_dim=5, time_stride=1, time_padding=2, center=True)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = torch.nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(
            resnet2d.maxpool, time_dim=1, time_stride=1)
        self.maxpool2 = torch.nn.MaxPool3d(kernel_size=(3,1,1),stride=(2,1,1), padding=(1,0,0))

        self.layer1 = inflate_reslayer(resnet2d.layer1)
        self.layer2 = inflate_reslayer(resnet2d.layer2)
        self.layer3 = inflate_reslayer(resnet2d.layer3)
        self.layer4_2d = resnet2d.layer4
        self.avgpool_2d = resnet2d.avgpool
        num_ftrs = resnet2d.fc.in_features
        self.fc_2d = torch.nn.Linear(num_ftrs, 4)
        # self.drop = nn.Dropout(0.25)
        self.side_task = side_task
def inflate_downsample(downsample2d, time_stride=1):
    downsample3d = torch.nn.Sequential(
        inflate.inflate_conv(downsample2d[0],
                             time_dim=1,
                             time_stride=time_stride,
                             center=True),
        inflate.inflate_batch_norm(downsample2d[1]))
    return downsample3d
示例#8
0
 def __init__(self, transition2d, inflate_conv=False):
     """
     Inflates transition layer from transition2d
     """
     super(_Transition3d, self).__init__()
     for name, layer in transition2d.named_children():
         if isinstance(layer, torch.nn.BatchNorm2d):
             self.add_module(name, inflate.inflate_batch_norm(layer))
         elif isinstance(layer, torch.nn.ReLU):
             self.add_module(name, layer)
         elif isinstance(layer, torch.nn.Conv2d):
             if inflate_conv:
                 pad_time = ReplicationPad3d((0, 0, 0, 0, 1, 1))
                 self.add_module('padding.1', pad_time)
                 self.add_module(name, inflate.inflate_conv(layer, 3))
             else:
                 self.add_module(name, inflate.inflate_conv(layer, 1))
         elif isinstance(layer, torch.nn.AvgPool2d):
             self.add_module(name, inflate.inflate_pool(layer, 2))
         else:
             raise ValueError('{} is not among handled layer types'.format(
                 type(layer)))
示例#9
0
def inflate_downsample(downsample2d, time_stride=1):
    downsample3d = torch.nn.Sequential(
        inflate.inflate_conv(
            downsample2d[0], time_dim=1, time_stride=time_stride, center=True),
        inflate.inflate_batch_norm(downsample2d[1]))
    return downsample3d


# resnet2d = torchvision.models.resnet101(pretrained=True)
# i3d = I3ResNet(copy.deepcopy(resnet2d), 32, 4, conv_class=False)
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# i3d = i3d.to(device)
# input = torch.ones([1,3,32,224,224], dtype=torch.float, device=device)
# output = i3d(input)
# print(i3d)
def test_input_block():
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    dataset = datasets.ImageFolder(
        '/sequoia/data1/yhasson/datasets/test-dataset',
        transforms.Compose([
            transforms.RandomSizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    densenet = torchvision.models.densenet121(pretrained=True)
    features = densenet.features
    seq2d = torch.nn.Sequential(features.conv0, features.norm0, features.relu0,
                                features.pool0)
    seq3d = torch.nn.Sequential(inflate.inflate_conv(features.conv0, 3),
                                inflate.inflate_batch_norm(features.norm0),
                                features.relu0,
                                inflate.inflate_pool(features.pool0, 1))

    loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=False)
    frame_nb = 4
    for i, (input_2d, target) in enumerate(loader):
        target = target.cuda()
        target_var = torch.autograd.Variable(target)
        input_2d_var = torch.autograd.Variable(input_2d)
        out2d = seq2d(input_2d_var)
        time_pad = torch.nn.ReplicationPad3d((0, 0, 0, 0, 1, 1))
        input_3d = input_2d.unsqueeze(2).repeat(1, 1, frame_nb, 1, 1)
        input_3d_var = time_pad(input_3d)
        out3d = seq3d(input_3d_var)
        expected_out_3d = out2d.data.unsqueeze(2).repeat(1, 1, frame_nb, 1, 1)
        out_diff = expected_out_3d - out3d.data
        print(out_diff.max())
        assert (out_diff.max() < 0.0001)
示例#11
0
    def __init__(self, vgg2d, frame_nb=16, class_nb=1000, conv_class=False):
        """
        Args:
            conv_class: Whether to use convolutional layer as classifier to
                adapt to various number of frames
        """
        super(I3vgg16, self).__init__()

        # lookup = {'conv1_1':'0', 'conv1_2':'2', 'conv2_1':'5', 'conv2_2':'7',
        #           'conv3_1':'10', 'conv3_2':'12', 'conv3_3':'14',
        #           'conv4_1':'17', 'conv4_2':'19', 'conv4_3':'21',
        #           'conv5_1':'24', 'conv5_2':'26', 'conv5_3':'28',
        #           'conv6':'31', 'conv7':'33'}

        self.conv1_1 = inflate.inflate_conv(vgg2d.features[0],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.conv1_2 = inflate.inflate_conv(vgg2d.features[2],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.maxpool_1 = inflate.inflate_pool(vgg2d.features[4],
                                              time_dim=3,
                                              time_padding=1,
                                              time_stride=2)
        self.conv2_1 = inflate.inflate_conv(vgg2d.features[5],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.conv2_2 = inflate.inflate_conv(vgg2d.features[7],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.maxpool_2 = inflate.inflate_pool(vgg2d.features[9],
                                              time_dim=3,
                                              time_padding=1,
                                              time_stride=2)
        self.conv3_1 = inflate.inflate_conv(vgg2d.features[10],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.conv3_2 = inflate.inflate_conv(vgg2d.features[12],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.conv3_3 = inflate.inflate_conv(vgg2d.features[14],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.maxpool_3 = inflate.inflate_pool(vgg2d.features[16],
                                              time_dim=3,
                                              time_padding=1,
                                              time_stride=2)
        self.conv4_1 = inflate.inflate_conv(vgg2d.features[17],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.conv4_2 = inflate.inflate_conv(vgg2d.features[19],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.conv4_3 = inflate.inflate_conv(vgg2d.features[21],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.maxpool_4 = inflate.inflate_pool(vgg2d.features[23],
                                              time_dim=3,
                                              time_padding=1,
                                              time_stride=2)
        self.conv5_1 = inflate.inflate_conv(vgg2d.features[24],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.conv5_2 = inflate.inflate_conv(vgg2d.features[26],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.conv5_3 = inflate.inflate_conv(vgg2d.features[28],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.maxpool_5 = inflate.inflate_pool(vgg2d.features[30],
                                              time_dim=3,
                                              time_padding=1,
                                              time_stride=2)
        self.fc1 = inflate.inflate_linear(vgg2d.classifier[0], 1)
        self.fc2 = inflate.inflate_linear(vgg2d.classifier[3], 1)
        self.fc3 = inflate.inflate_linear(vgg2d.classifier[6], 1)

        self.relu = torch.nn.ReLU(inplace=True)
示例#12
0
    def __init__(self, vgg2d, frame_nb=16, class_nb=1000, conv_class=False):
        """
        Args:
            conv_class: Whether to use convolutional layer as classifier to
                adapt to various number of frames
        """
        super(I3vgg, self).__init__()

        self.conv1_1 = inflate.inflate_conv(vgg2d.features[0],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn1 = inflate.inflate_batch_norm(vgg2d.features[1])
        self.conv1_2 = inflate.inflate_conv(vgg2d.features[3],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn2 = inflate.inflate_batch_norm(vgg2d.features[4])
        self.maxpool_1 = inflate.inflate_pool(vgg2d.features[6],
                                              time_dim=3,
                                              time_padding=1,
                                              time_stride=2)
        self.conv2_1 = inflate.inflate_conv(vgg2d.features[7],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn3 = inflate.inflate_batch_norm(vgg2d.features[8])
        self.conv2_2 = inflate.inflate_conv(vgg2d.features[10],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn4 = inflate.inflate_batch_norm(vgg2d.features[11])
        self.maxpool_2 = inflate.inflate_pool(vgg2d.features[13],
                                              time_dim=3,
                                              time_padding=1,
                                              time_stride=2)
        self.conv3_1 = inflate.inflate_conv(vgg2d.features[14],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn5 = inflate.inflate_batch_norm(vgg2d.features[15])
        self.conv3_2 = inflate.inflate_conv(vgg2d.features[17],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn6 = inflate.inflate_batch_norm(vgg2d.features[18])
        self.conv3_3 = inflate.inflate_conv(vgg2d.features[20],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn7 = inflate.inflate_batch_norm(vgg2d.features[21])
        self.maxpool_3 = inflate.inflate_pool(vgg2d.features[23],
                                              time_dim=3,
                                              time_padding=1,
                                              time_stride=2)
        self.conv4_1 = inflate.inflate_conv(vgg2d.features[24],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn8 = inflate.inflate_batch_norm(vgg2d.features[25])
        self.conv4_2 = inflate.inflate_conv(vgg2d.features[27],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn9 = inflate.inflate_batch_norm(vgg2d.features[28])
        self.conv4_3 = inflate.inflate_conv(vgg2d.features[30],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn10 = inflate.inflate_batch_norm(vgg2d.features[31])
        self.maxpool_4 = inflate.inflate_pool(vgg2d.features[33],
                                              time_dim=3,
                                              time_padding=1,
                                              time_stride=2)
        self.conv5_1 = inflate.inflate_conv(vgg2d.features[34],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn11 = inflate.inflate_batch_norm(vgg2d.features[35])
        self.conv5_2 = inflate.inflate_conv(vgg2d.features[37],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn12 = inflate.inflate_batch_norm(vgg2d.features[38])
        self.conv5_3 = inflate.inflate_conv(vgg2d.features[40],
                                            time_dim=3,
                                            time_padding=1,
                                            center=True)
        self.bn13 = inflate.inflate_batch_norm(vgg2d.features[41])
        self.maxpool_5 = inflate.inflate_pool(vgg2d.features[43],
                                              time_dim=3,
                                              time_padding=1,
                                              time_stride=2)
        self.fc1 = inflate.inflate_linear(vgg2d.classifier[0], 1)

        self.fc2 = inflate.inflate_linear(vgg2d.classifier[3], 1)

        self.fc3 = inflate.inflate_linear(vgg2d.classifier[6], 1)

        self.relu = torch.nn.ReLU(inplace=True)