Пример #1
0
    def __init__(self, nr_classes):
        super(FBSparseVGGTest, self).__init__()
        self.sparseModel = scn.SparseVggNet(
            2,
            nInputPlanes=2,
            layers=[['C', 16], ['C', 16], 'MP', ['C', 32], ['C', 32], 'MP',
                    ['C', 64], ['C', 64], 'MP', ['C', 128], ['C', 128], 'MP',
                    ['C', 256], ['C', 256], 'MP', ['C', 512]]).add(
                        scn.Convolution(2,
                                        512,
                                        256,
                                        3,
                                        filter_stride=2,
                                        bias=False)).add(
                                            scn.BatchNormReLU(256)).add(
                                                scn.SparseToDense(2, 256))

        cnn_spatial_output_size = [2, 3]
        self.spatial_size = self.sparseModel.input_spatial_size(
            torch.LongTensor(cnn_spatial_output_size))
        self.inputLayer = scn.InputLayer(dimension=2,
                                         spatial_size=self.spatial_size,
                                         mode=2)
        self.linear_input_features = cnn_spatial_output_size[
            0] * cnn_spatial_output_size[1] * 256
        self.linear = nn.Linear(self.linear_input_features, nr_classes)
Пример #2
0
 def __init__(self):
     nn.Module.__init__(self)
     self.sparseModel = scn.SparseVggNet(
         2, 3,
         [['C', 16], ['C', 16], 'MP', ['C', 32], ['C', 32], 'MP', ['C', 48],
          ['C', 48], 'MP', ['C', 64], ['C', 64], 'MP', ['C', 96], ['C', 96]
          ]).add(scn.Convolution(2, 96, 128, 3, 2, False)).add(
              scn.BatchNormReLU(128)).add(scn.SparseToDense(2, 128))
     self.linear = nn.Linear(128, 3755)
Пример #3
0
 def __init__(self):
     nn.Module.__init__(self)
     self.sparseModel = scn.SparseVggNet(2, 3, [
         ['C', 8, ], ['C', 8], 'MP',
         ['C', 16], ['C', 16], 'MP',
         ['C', 16 + 8], ['C', 16 + 8], 'MP',
         ['C', 24 + 8], ['C', 24 + 8], 'MP']
     ).add(scn.Convolution(2, 32, 64, 5, 1, False)
           ).add(scn.BatchNormReLU(64)
                 ).add(scn.SparseToDense(2, 64))
     self.linear = nn.Linear(64, 183)
Пример #4
0
 def __init__(self):
     nn.Module.__init__(self)
     self.sparseModel = scn.SparseVggNet(
         2, 3,
         [['C', 16], ['C', 16], 'MP', ['C', 32], ['C', 32], 'MP', ['C', 48],
          ['C', 48], 'MP', ['C', 64], ['C', 64], 'MP', ['C', 96], ['C', 96]
          ]).add(scn.Convolution(2, 96, 128, 3, 2, False)).add(
              scn.BatchNormReLU(128)).add(scn.SparseToDense(2, 128))
     self.spatial_size = self.sparseModel.input_spatial_size(
         torch.LongTensor([1, 1]))
     self.inputLayer = scn.InputLayer(2, self.spatial_size, 2)
     self.linear = nn.Linear(128, 3755)
Пример #5
0
 def __init__(self, dimension=3, device='cuda'):
     nn.Module.__init__(self)
     self.sparseModel = scn.Sequential().add(
         scn.SparseVggNet(
             dimension, 1,
             [['C', 8], ['C', 8], ['MP', 3, 2], ['C', 16], ['C', 16],
              ['MP', 3, 2], ['C', 24], ['C', 24], ['MP', 3, 2]])).add(
                  scn.SubmanifoldConvolution(
                      dimension, 24, 32, 3,
                      False)).add(scn.BatchNormReLU(32)).add(
                          scn.SparseToDense(dimension, 32)).to(device)
     self.spatial_size = self.sparseModel.input_spatial_size(
         torch.LongTensor([1] * dimension))
     self.inputLayer = scn.InputLayer(dimension, self.spatial_size)
Пример #6
0
 def __init__(self):
     nn.Module.__init__(self)
     self.sparseModel = scn.Sequential(
         scn.SparseVggNet(
             2, 3, [[
                 'C',
                 8,
             ], ['C', 8], 'MP', ['C', 16], ['C', 16], 'MP', ['C', 16, 8],
                    ['C', 16, 8], 'MP', ['C', 24, 8], ['C', 24, 8], 'MP']),
         scn.Convolution(2, 32, 64, 5, 1, False), scn.BatchNormReLU(64),
         scn.SparseToDense(2, 64))
     self.spatial_size = self.sparseModel.input_spatial_size(
         torch.LongTensor([1, 1]))
     self.inputLayer = scn.InputLayer(2, self.spatial_size, 2)
     self.linear = nn.Linear(64, 183)
    def __init__(self,
                 nr_classes,
                 nr_box=2,
                 nr_input_channels=2,
                 small_out_map=True):
        super(FBSparseObjectDet, self).__init__()
        self.nr_classes = nr_classes
        self.nr_box = nr_box

        sparse_out_channels = 256
        self.sparseModel = scn.SparseVggNet(
            2,
            nInputPlanes=nr_input_channels,
            layers=[['C', 16], ['C', 16], 'MP', ['C', 32], ['C', 32], 'MP',
                    ['C', 64], ['C', 64], 'MP', ['C', 128], ['C', 128], 'MP',
                    ['C', 256], ['C', 256]]).add(
                        scn.Convolution(
                            2,
                            256,
                            sparse_out_channels,
                            3,
                            filter_stride=2,
                            bias=False)).add(
                                scn.BatchNormReLU(sparse_out_channels)).add(
                                    scn.SparseToDense(2, sparse_out_channels))

        if small_out_map:
            self.cnn_spatial_output_size = [5, 7]
        else:
            self.cnn_spatial_output_size = [6, 8]

        spatial_size_product = self.cnn_spatial_output_size[
            0] * self.cnn_spatial_output_size[1]
        self.spatial_size = self.sparseModel.input_spatial_size(
            torch.LongTensor(self.cnn_spatial_output_size))
        self.inputLayer = scn.InputLayer(dimension=2,
                                         spatial_size=self.spatial_size,
                                         mode=2)
        self.linear_input_features = spatial_size_product * 256
        self.linear_1 = nn.Linear(self.linear_input_features, 1024)
        self.linear_2 = nn.Linear(
            1024, spatial_size_product * (nr_classes + 5 * self.nr_box))
Пример #8
0
# Copyright 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import torch
import sparseconvnet as scn

# Use the GPU if there is one, otherwise CPU
use_gpu = torch.cuda.is_available()

model = scn.Sequential().add(
    scn.SparseVggNet(2, 1,
		     [['C',  8], ['C',  8], ['MP', 3, 2],
		      ['C', 16], ['C', 16], ['MP', 3, 2],
		      ['C', 24], ['C', 24], ['MP', 3, 2]])
).add(
    scn.ValidConvolution(2, 24, 32, 3, False)
).add(
    scn.BatchNormReLU(32)
).add(
    scn.SparseToDense(2,32)
)
if use_gpu:
    model.cuda()

# output will be 10x10
inputSpatialSize = model.input_spatial_size(torch.LongTensor([10, 10]))
input = scn.InputBatch(2, inputSpatialSize)
Пример #9
0
# Copyright 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import torch
import sparseconvnet as scn

# Use the GPU if there is one, otherwise CPU
use_gpu = torch.cuda.is_available()

model = scn.Sequential().add(
    scn.SparseVggNet(2, 1,
                     [['C', 8], ['C', 8], ['MP', 3, 2], ['C', 16], ['C', 16],
                      ['MP', 3, 2], ['C', 24], ['C', 24], ['MP', 3, 2]
                      ])).add(scn.ValidConvolution(2, 24, 32, 3, False)).add(
                          scn.BatchNormReLU(32)).add(scn.SparseToDense(2, 32))
if use_gpu:
    model.cuda()

# output will be 10x10
inputSpatialSize = model.input_spatial_size(torch.LongTensor([10, 10]))
input = scn.InputBatch(2, inputSpatialSize)

msg = [
    " X   X  XXX  X    X    XX     X       X   XX   XXX   X    XXX   ",
    " X   X  X    X    X   X  X    X       X  X  X  X  X  X    X  X  ",
    " XXXXX  XX   X    X   X  X    X   X   X  X  X  XXX   X    X   X ",
    " X   X  X    X    X   X  X     X X X X   X  X  X  X  X    X  X  ",
    " X   X  XXX  XXX  XXX  XX       X   X     XX   X  X  XXX  XXX   "
Пример #10
0
import torch
import sparseconvnet as scn

# Use the GPU if there is one, otherwise CPU
use_gpu = torch.cuda.is_available()

model = scn.Sequential().add(
    scn.SparseVggNet(dimension=2,
                     nInputPlanes=1,
                     layers=[['C', 8], ['C', 8], ['MP', 3, 2], ['C', 16],
                             ['C', 16], ['MP', 3, 2], ['C', 24], ['C', 24],
                             ['MP', 3, 2]])).add(
                                 scn.SubmanifoldConvolution(
                                     dimension=2,
                                     nIn=24,
                                     nOut=32,
                                     filter_size=3,
                                     bias=False)).add(
                                         scn.BatchNormReLU(nPlanes=32)).add(
                                             scn.SparseToDense(dimension=2,
                                                               nPlanes=32))
if use_gpu:
    model.cuda()

# output will be 10x10, calculate the input size
inputSpatialSize = model.input_spatial_size(torch.LongTensor([10, 10]))
print(inputSpatialSize)  # (87, 87)
#input = scn.InputBatch(dimension=2, spatial_size=inputSpatialSize)
input_scn = scn.InputLayer(dimension=2, spatial_size=inputSpatialSize)

msg = [
Пример #11
0
    def __init__(self, args, device):
        super(MME2E_Sparse, self).__init__()
        self.args = args
        self.device = device
        self.num_classes = args['num_emotions']
        self.mod = args['modalities'].lower()
        self.feature_dim = args['feature_dim']
        self.threshold = args['sparse_threshold']
        nlayers = args['trans_nlayers']
        nheads = args['trans_nheads']
        trans_dim = args['trans_dim']

        text_cls_dim = 768
        if args['text_model_size'] == 'large':
            text_cls_dim = 1024
        if args['text_model_size'] == 'xlarge':
            text_cls_dim = 2048

        # Textual
        self.T = MME2E_T(feature_dim=self.feature_dim,
                         size=args['text_model_size'])

        # Visual
        self.mtcnn = MTCNN(image_size=48,
                           margin=2,
                           post_process=False,
                           device=device)
        self.normalize = transforms.Normalize(mean=[159, 111, 102],
                                              std=[37, 33, 32])

        self.V = nn.ModuleDict({
            'low_level':
            nn.Sequential(
                nn.Conv2d(in_channels=3,
                          out_channels=64,
                          kernel_size=5,
                          padding=2), nn.BatchNorm2d(64), nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                VggBasicBlock(in_planes=64, out_planes=64),
                VggBasicBlock(in_planes=64, out_planes=64)),
            'sparse_layers':
            nn.ModuleList([
                scn.Sequential(
                    # 第一个 2,指的是 2D
                    scn.SparseVggNet(2, 64,
                                     [['C', 128], ['C', 128], ['MP', 2, 2]])),
                scn.Sequential(
                    scn.SparseVggNet(2, 128,
                                     [['C', 256], ['C', 256], ['MP', 2, 2]])),
                scn.Sequential(
                    scn.SparseVggNet(2, 256,
                                     [['C', 512], ['C', 512], ['MP', 2, 2]]),
                    scn.SparseToDense(2, 512))
            ]),
            'attn_layers':
            nn.ModuleList([
                CrossModalAttentionLayer(k=64,
                                         x_channels=64,
                                         y_size=text_cls_dim,
                                         spatial=True),
                SparseCrossModalAttentionLayer(
                    k=128,
                    x_channels=128,
                    y_size=text_cls_dim,
                    sparse_threshold=self.threshold),
                SparseCrossModalAttentionLayer(k=256,
                                               x_channels=256,
                                               y_size=text_cls_dim,
                                               sparse_threshold=self.threshold)
            ])
        })

        self.A = nn.ModuleDict({
            'low_level':
            nn.Sequential(
                nn.Conv2d(in_channels=1,
                          out_channels=64,
                          kernel_size=5,
                          padding=2), nn.BatchNorm2d(64), nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                VggBasicBlock(in_planes=64, out_planes=64),
                VggBasicBlock(in_planes=64, out_planes=64)),
            'sparse_layers':
            nn.ModuleList([
                scn.Sequential().add(
                    scn.SparseVggNet(2, 64,
                                     [['C', 128], ['C', 128], ['MP', 2, 2]])),
                scn.Sequential().add(
                    scn.SparseVggNet(2, 128,
                                     [['C', 256], ['C', 256], ['MP', 2, 2]])),
                scn.Sequential().add(
                    scn.SparseVggNet(2, 256,
                                     [['C', 512], ['C', 512], ['MP', 2, 2]
                                      ])).add(scn.SparseToDense(2, 512))
            ]),
            'attn_layers':
            nn.ModuleList([
                CrossModalAttentionLayer(k=64,
                                         x_channels=64,
                                         y_size=text_cls_dim,
                                         spatial=True),
                SparseCrossModalAttentionLayer(
                    k=128,
                    x_channels=128,
                    y_size=text_cls_dim,
                    sparse_threshold=self.threshold),
                SparseCrossModalAttentionLayer(k=256,
                                               x_channels=256,
                                               y_size=text_cls_dim,
                                               sparse_threshold=self.threshold)
            ])
        })

        self.v_sparse_input_layers = nn.ModuleList([
            scn.InputLayer(
                2, self.V['sparse_layers'][0].input_spatial_size(
                    torch.LongTensor([12, 12]))),
            scn.InputLayer(
                2, self.V['sparse_layers'][1].input_spatial_size(
                    torch.LongTensor([6, 6]))),
            scn.InputLayer(
                2, self.V['sparse_layers'][2].input_spatial_size(
                    torch.LongTensor([3, 3])))
        ])

        self.a_sparse_input_layers = nn.ModuleList([
            scn.InputLayer(
                2, self.A['sparse_layers'][0].input_spatial_size(
                    torch.LongTensor([32, 8]))),
            scn.InputLayer(
                2, self.A['sparse_layers'][1].input_spatial_size(
                    torch.LongTensor([16, 4]))),
            scn.InputLayer(
                2, self.A['sparse_layers'][2].input_spatial_size(
                    torch.LongTensor([8, 2])))
        ])

        self.v_flatten = nn.Sequential(nn.Linear(512 * 3 * 3, 1024), nn.ReLU(),
                                       nn.Linear(1024, trans_dim))

        self.a_flatten = nn.Sequential(nn.Linear(512 * 8 * 2, 1024), nn.ReLU(),
                                       nn.Linear(1024, trans_dim))

        self.v_transformer = WrappedTransformerEncoder(dim=trans_dim,
                                                       num_layers=nlayers,
                                                       num_heads=nheads)
        self.a_transformer = WrappedTransformerEncoder(dim=trans_dim,
                                                       num_layers=nlayers,
                                                       num_heads=nheads)

        # Output layers
        self.t_out = nn.Linear(text_cls_dim, self.num_classes)
        self.v_out = nn.Linear(trans_dim, self.num_classes)
        self.a_out = nn.Linear(trans_dim, self.num_classes)

        self.weighted_fusion = nn.Linear(len(self.mod), 1, bias=False)
               #['MP',3,2],               
               #['C',512],
               #['C',512],
               #['MP',3,2] ]
default_layers = [['C', 8],
                  ['C', 8],
                  ['MP', 3, 2],
                  ['C', 16],
                  ['C', 16],
                  ['MP', 3, 2],
                  ['C', 24],
                  ['C', 24],
                  ['MP', 3, 2]]
               
model = scn.Sequential().add(
    scn.SparseVggNet(2, 1, vgg_layers )
#).add(
#scn.SubmanifoldConvolution(2, 24, 32, 3, False)
#).add(
#    scn.BatchNormReLU(32)
).add(
    scn.SparseToDense(2, 32)
).to(device)

print(model)
par_dict = {}
for par in model.named_parameters():
    print("--------------------------------------------")
    print("setting [",par[0],par[1].shape,"]")
    if "weight" in par[0] and len(par[0].split("."))==3 and len(par[1].shape)==4:
        layerid1 = int(par[0].split(".")[0])