Exemplo n.º 1
0
 def __init__(self, configs):
     super(MMNIST_ConvLSTM, self).__init__()
     _KEYS = ['encoder_configs', 'reconstruct_configs', 'predict_configs']
     en_conf, rec_conf, pred_conf = unpack(configs, _KEYS)
     self.encoder = ConvLSTM(en_conf)
     self.reconstructor = Generator(rec_conf)
     self.predictor = Generator(pred_conf)
Exemplo n.º 2
0
    def __init__(self, num_classes=61, mem_size=512, no_cam=False):
        super(NewAttentionModelBi, self).__init__()

        self.num_classes = num_classes
        self.noCam = no_cam
        self.mem_size = mem_size

        self.resnet = resnet34(pretrained=True, noBN=True)

        self.attention_rgb = Variable(
            (torch.FloatTensor(512).normal_(0, .05)).unsqueeze(0).cuda())
        self.attention_flow = Variable(
            (torch.FloatTensor(512).normal_(0, .05)).unsqueeze(0).cuda())

        self.lstm_cell = ConvLSTM(1024, mem_size)

        self.avgpool = nn.AvgPool2d(7)
        self.dropout = nn.Dropout(0.7)
        self.fc = nn.Linear(mem_size, self.num_classes)
        self.classifier = nn.Sequential(self.dropout, self.fc)

        self._custom_train_mode = True
Exemplo n.º 3
0
    def __init__(self,
                 num_classes=61,
                 mem_size=512,
                 no_cam=False,
                 enable_motion_segmentation=False):
        super(AttentionModel, self).__init__()

        self.num_classes = num_classes
        self.noCam = no_cam
        self.mem_size = mem_size
        self.enable_motion_segmentation = enable_motion_segmentation

        self.resnet = resnet34(pretrained=True, noBN=True)
        self.weight_softmax = self.resnet.fc.weight
        self.lstm_cell = ConvLSTM(512, mem_size)
        self.avgpool = nn.AvgPool2d(7)
        self.dropout = nn.Dropout(0.7)
        self.fc = nn.Linear(mem_size, self.num_classes)
        self.classifier = nn.Sequential(self.dropout, self.fc)

        self.motion_segmentation = MotionSegmentationBlock()

        self._custom_train_mode = True
Exemplo n.º 4
0
    def __init__(self, in_dim, ae_en_h_dims, ae_de_h_dims, conv_lstm_in_size,
                 conv_lstm_in_dim, conv_lstm_h_dim, conv_lstm_kernel_sizes,
                 conv_lstm_n_layers, fc_in_dim, fc_h_dims, fc_out_dim,
                 **kwargs):

        super(DeepAP, self).__init__()

        self.device = kwargs.get('device', 'cpu')

        ################
        # masked layer #
        ################

        mask = [[i for i in range(in_dim)], [i for i in range(in_dim)]]
        self.mask_layer = MaskNet(in_dim, in_dim, mask, device=self.device)
        self.mask_thre = kwargs.get('mask_thre', 0.0001)

        ######################
        # auto_encoder layer #
        ######################

        self.ae = AutoEncoder(in_dim=in_dim,
                              en_h_dims=ae_en_h_dims,
                              de_h_dims=ae_de_h_dims)

        if kwargs.get('ae_pretrain_weight') is not None:
            self.ae.load_state_dict(kwargs['ae_pretrain_weight'])
        else:
            raise ValueError('AutoEncoder not pretrained.')

        if kwargs.get('if_trainable'):
            for p in self.ae.parameters():
                p.requires_grad = kwargs['if_trainable']
        else:
            self.ae.weight.requires_grad = False

        ####################
        # conv_lstm layers #
        ####################

        self.conv_lstm_list = nn.ModuleList()
        for i in conv_lstm_kernel_sizes:
            i_kernel_size = (i, i)
            conv_lstm = ConvLSTM(
                in_size=conv_lstm_in_size,
                in_dim=conv_lstm_in_dim,
                h_dim=conv_lstm_h_dim,
                kernel_size=i_kernel_size,
                num_layers=conv_lstm_n_layers,
                batch_first=kwargs.get('conv_lstm_batch_first', True),
                bias=kwargs.get('conv_lstm_bias', True),
                only_last_state=kwargs.get('only_last_state', True),
                device=self.device)
            self.conv_lstm_list.append(conv_lstm)

        #########################
        # fully-connected layer #
        #########################

        self.fc = FC(
            in_dim=fc_in_dim,  # assert in_size == n_conv_lstm * conv_lstm_h_dim
            h_dims=fc_h_dims,
            out_dim=fc_out_dim,
            p_dropout=kwargs.get('fc_p_dropout', 0.1))
Exemplo n.º 5
0
class AttentionModel(nn.Module):
    def __init__(self,
                 num_classes=61,
                 mem_size=512,
                 no_cam=False,
                 enable_motion_segmentation=False):
        super(AttentionModel, self).__init__()

        self.num_classes = num_classes
        self.noCam = no_cam
        self.mem_size = mem_size
        self.enable_motion_segmentation = enable_motion_segmentation

        self.resnet = resnet34(pretrained=True, noBN=True)
        self.weight_softmax = self.resnet.fc.weight
        self.lstm_cell = ConvLSTM(512, mem_size)
        self.avgpool = nn.AvgPool2d(7)
        self.dropout = nn.Dropout(0.7)
        self.fc = nn.Linear(mem_size, self.num_classes)
        self.classifier = nn.Sequential(self.dropout, self.fc)

        self.motion_segmentation = MotionSegmentationBlock()

        self._custom_train_mode = True

    def train(self, mode=True):
        correct_values = {True, 'stage2', 'stage1', False}

        if mode not in correct_values:
            raise ValueError('Invalid modes, correct values are: ' +
                             ' '.join(correct_values))

        self._custom_train_mode = mode

        # Fai fare il training completo solo se mode == True
        super().train(mode == True)

        self.resnet.train(mode)
        self.lstm_cell.train(mode)
        if mode == 'stage2' or mode == True:
            self.motion_segmentation.train(True)
        if mode != False:
            self.classifier.train(True)

    def get_training_parameters(self, name='all'):
        train_params = []
        train_params_ms = []

        # Prima levo i gradienti a tutti, e poi li aggiungo solo a quelli
        # su cui faccio il training
        for params in self.parameters():
            params.requires_grad = False

        # è responsabilità della funzione negli oggetti aggiungere i gradienti
        train_params += self.resnet.get_training_parameters()
        train_params += self.lstm_cell.get_training_parameters()
        # trainiamo l'ultimo layer a tutti gli stagi, eccetto se non sono in training
        if self._custom_train_mode != False:
            for params in self.classifier.parameters():
                params.requires_grad = True
                train_params += [params]

        train_params_ms = self.motion_segmentation.get_training_parameters()

        if name == 'all':
            return train_params + train_params_ms
        elif name == 'main':
            return train_params
        elif name == 'ms':
            return train_params_ms

    def load_weights(self, file_path):
        model_dict = torch.load(file_path)
        if 'model_state_dict' in model_dict:
            self.load_state_dict(model_dict['model_state_dict'])
        else:
            self.load_state_dict(model_dict)

    def forward(self, inputVariable):
        state = (Variable(
            torch.zeros((inputVariable.size(1), self.mem_size, 7, 7)).cuda()),
                 Variable(
                     torch.zeros(
                         (inputVariable.size(1), self.mem_size, 7, 7)).cuda()))

        ms_feats = None
        if self.enable_motion_segmentation:
            ms_feats = Variable(
                torch.zeros(inputVariable.size(0), inputVariable.size(1),
                            49 * 2).cuda())

        for t in range(inputVariable.size(0)):
            logit, feature_conv, feature_convNBN = self.resnet(
                inputVariable[t])

            bz, nc, h, w = feature_conv.size()
            feature_conv1 = feature_conv.view(bz, nc, h * w)
            probs, idxs = logit.sort(1, True)
            class_idx = idxs[:, 0]
            cam = torch.bmm(self.weight_softmax[class_idx].unsqueeze(1),
                            feature_conv1)
            attentionMAP = F.softmax(cam.squeeze(1), dim=1)
            attentionMAP = attentionMAP.view(attentionMAP.size(0), 1, 7, 7)
            attentionFeat = feature_convNBN * attentionMAP.expand_as(
                feature_conv)

            if self.enable_motion_segmentation:
                ms_feats[t] = self.motion_segmentation(feature_convNBN)

            if self.noCam:
                state = self.lstm_cell(feature_convNBN, state)
            else:
                state = self.lstm_cell(attentionFeat, state)

        feats1 = self.avgpool(state[1]).view(state[1].size(0), -1)
        feats = self.classifier(feats1)

        return {
            'classifications': feats,
            'ms_feats': ms_feats,
            'lstm_feats': feats1
        }

    def get_class_activation_id(self, inputVariable):
        logit, _, _ = self.resnet(inputVariable)
        return logit

    def get_cam_visualisation(self, input_pil_image, preprocess_for_viz,
                              preprocess_for_model):
        return get_cam_visualisation(self.resnet, self.weight_softmax,
                                     input_pil_image, preprocess_for_viz,
                                     preprocess_for_model)
Exemplo n.º 6
0
class NewAttentionModelBi(nn.Module):
    def __init__(self, num_classes=61, mem_size=512, no_cam=False):
        super(NewAttentionModelBi, self).__init__()

        self.num_classes = num_classes
        self.noCam = no_cam
        self.mem_size = mem_size

        self.resnet = resnet34(pretrained=True, noBN=True)

        self.attention_rgb = Variable(
            (torch.FloatTensor(512).normal_(0, .05)).unsqueeze(0).cuda())
        self.attention_flow = Variable(
            (torch.FloatTensor(512).normal_(0, .05)).unsqueeze(0).cuda())

        self.lstm_cell = ConvLSTM(1024, mem_size)

        self.avgpool = nn.AvgPool2d(7)
        self.dropout = nn.Dropout(0.7)
        self.fc = nn.Linear(mem_size, self.num_classes)
        self.classifier = nn.Sequential(self.dropout, self.fc)

        self._custom_train_mode = True

    def train(self, mode=True):
        correct_values = {True, 'stage2', 'stage1', False}

        if mode not in correct_values:
            raise ValueError('Invalid modes, correct values are: ' +
                             ' '.join(correct_values))

        self._custom_train_mode = mode

        super().train(mode == True)

        self.resnet.train(mode)
        self.lstm_cell.train(mode)
        if mode != False:
            self.classifier.train(True)

    def get_training_parameters(self):
        train_params = []

        for params in self.parameters():
            params.requires_grad = False

        train_params += self.resnet.get_training_parameters()

        train_params += self.lstm_cell.get_training_parameters()

        if self._custom_train_mode != False:
            for params in self.classifier.parameters():
                params.requires_grad = True
                train_params += [params]
            self.attention_rgb.requires_grad = True
            train_params += [self.attention_rgb]
            self.attention_flow.requires_grad = True
            train_params += [self.attention_flow]

        return train_params

    def load_weights(self, file_path):
        model_dict = torch.load(file_path)
        if 'model_state_dict' in model_dict:
            self.load_state_dict(model_dict['model_state_dict'])
        else:
            self.load_state_dict(model_dict)

    def get_resnet_output_feats(self, resnet, attention, input_frames):
        logit, feature_conv, feature_convNBN = resnet(input_frames)

        if self.noCam:
            return feature_convNBN

        bz, nc, h, w = feature_conv.size()
        feature_conv1 = feature_conv.view(bz, nc, h * w)

        cam = torch.bmm(attention[[0] * input_frames.size(0)].unsqueeze(1),
                        feature_conv1)
        attentionMAP = F.softmax(cam.squeeze(1), dim=1)
        attentionMAP = attentionMAP.view(attentionMAP.size(0), 1, 7, 7)
        attentionFeat = feature_convNBN * attentionMAP.expand_as(feature_conv)

        return attentionFeat

    def forward(self, rgb_frames, flow_frames):
        state = (Variable(
            torch.zeros((rgb_frames.size(1), self.mem_size, 7, 7)).cuda()),
                 Variable(
                     torch.zeros(
                         (rgb_frames.size(1), self.mem_size, 7, 7)).cuda()))

        for t in range(rgb_frames.size(0)):
            rgb_feats = self.get_resnet_output_feats(self.resnet,
                                                     self.attention_rgb,
                                                     rgb_frames[t])
            flow_feats = self.get_resnet_output_feats(self.resnet,
                                                      self.attention_flow,
                                                      flow_frames[t])
            state = self.lstm_cell(torch.cat((rgb_feats, flow_feats), dim=1),
                                   state)

        feats1 = self.avgpool(state[1]).view(state[1].size(0), -1)
        feats = self.classifier(feats1)

        return {'classifications': feats}
Exemplo n.º 7
0
    batch_size = 3

    num_layers = 3
    cell_conf_l0 = pack(
        [h_c, active_func, in_c, in_h, in_w, kernel_size, DEBUG],
        ConvLSTMCell.get_init_keys())
    cell_conf_l1 = pack(
        [h_c, active_func, h_c, in_h, in_w, kernel_size, DEBUG],
        ConvLSTMCell.get_init_keys())
    cell_conf_l2 = pack(
        [h_c, active_func, h_c, in_h, in_w, kernel_size, DEBUG],
        ConvLSTMCell.get_init_keys())
    cell_configs = [cell_conf_l0, cell_conf_l1, cell_conf_l2]

    encoder_configs = pack([num_layers, cell_configs],
                           ConvLSTM.get_init_keys())
    reconstruct_configs = pack([num_layers, cell_configs],
                               ConvLSTM.get_init_keys())
    predict_configs = pack([num_layers, cell_configs],
                           ConvLSTM.get_init_keys())

    model_configs = pack(
        [encoder_configs, reconstruct_configs, predict_configs], _KEYS)

    model = MMNIST_ConvLSTM(model_configs)

    x_train = to_var(torch.randn(batch_size, time_steps, in_c, in_h, in_w))
    x_predict = to_var(torch.randn(batch_size, time_steps, in_c, in_h, in_w))

    data = pack([x_train, x_predict, None], ['x_train', 'x_predict', 'states'])
    configs = pack([True, 6], ['use_gt', 'max_steps'])
Exemplo n.º 8
0
def MMNIST_CONV_LSTM(extra_info):
    _KEYS = ['encoder_configs', 'reconstruct_configs', 'predict_configs']
    h_c = 16
    active_func = nn.Tanh()
    in_c = 1
    in_h = 64
    in_w = 64
    kernel_size = 5
    DEBUG = True

    num_layers = 3
    cell_conf_l0 = pack(
        [h_c, active_func, in_c, in_h, in_w, kernel_size, DEBUG],
        ConvLSTMCell.get_init_keys())
    cell_conf_l1 = pack([8, active_func, h_c, in_h, in_w, kernel_size, DEBUG],
                        ConvLSTMCell.get_init_keys())
    cell_conf_l2 = pack([8, active_func, 8, in_h, in_w, kernel_size, DEBUG],
                        ConvLSTMCell.get_init_keys())
    cell_configs = [cell_conf_l0, cell_conf_l1, cell_conf_l2]

    encoder_configs = pack([num_layers, cell_configs],
                           ConvLSTM.get_init_keys())
    reconstruct_configs = pack([num_layers, cell_configs],
                               ConvLSTM.get_init_keys())
    predict_configs = pack([num_layers, cell_configs],
                           ConvLSTM.get_init_keys())

    model_info = pack([encoder_configs, reconstruct_configs, predict_configs],
                      _KEYS)
    model_info['name'] = 'MMNIST_CONV_LSTM'

    trainloader_info = {
        'file_addr': './data/mmnist_train.npy',
        'batch_size': 32,
        'shuffle': True,
        'num_workers': 2
    }

    valloader_info = {
        'file_addr': './data/mmnist_val.npy',
        'batch_size': 16,
        'shuffle': False,
        'num_workers': 2
    }

    testloader_info = {
        'file_addr': './data/mmnist_test.npy',
        'batch_size': 16,
        'shuffle': False,
        'num_workers': 2
    }
    seed = 666
    folder_name = 'mmnist_convLSTM'
    main_info = {
        'clip': 0.25,
        'num_epochs': 60,
        'halve_every': 10,
        'log_dir': './logs/%s' % folder_name,
        'save_dir': './checkpoints/%s' % folder_name
    }

    optimizer_info = {
        'lr': 1e-4,
        'optim_alg': 'RMSprop',
        'weight_decay': 0.9,
        'momentum': 0
    }

    hparams = HParams(trainloader_info=trainloader_info,
                      valloader_info=valloader_info,
                      testloader_info=testloader_info,
                      model_info=model_info,
                      optimizer_info=optimizer_info,
                      main_info=main_info,
                      seed=seed)
    return hparams