示例#1
0
    def __init__(self, sample_size, num_seq=8, seq_len=5, pred_step=3, network='resnet50'):
        super(DPC_RNN_Infer_Pred_Error, self).__init__()
        torch.cuda.manual_seed(233)
        print('Using DPC-RNN model')
        self.sample_size = sample_size
        self.num_seq = num_seq
        self.seq_len = seq_len
        self.pred_step = pred_step
        self.last_duration = int(math.ceil(seq_len / 4))
        self.last_size = int(math.ceil(sample_size / 32))
        print('final feature map has size %dx%d' % (self.last_size, self.last_size))

        self.backbone, self.param = select_resnet(network, track_running_stats=False)
        self.param['num_layers'] = 1 # param for GRU
        self.param['hidden_size'] = self.param['feature_size'] # param for GRU

        self.agg = ConvGRU(input_size=self.param['feature_size'],
                               hidden_size=self.param['hidden_size'],
                               kernel_size=1,
                               num_layers=self.param['num_layers'])
        self.network_pred = nn.Sequential(
                                nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0),
                                nn.ReLU(inplace=True),
                                nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0)
                                )
        self.mask = None
        self.relu = nn.ReLU(inplace=False)
        self._initialize_weights(self.agg)
        self._initialize_weights(self.network_pred)
示例#2
0
    def __init__(self, sample_size, num_seq, seq_len,
                 network='resnet18', dropout=0.5, num_class=101):
        '''
        Original DPC, according to diagram in appendix
        No future prediction network involved
        num_class: If integer => single output layer; if list => multiple output layers (for example verb + noun)
        '''
        super(LC_present, self).__init__()
        torch.cuda.manual_seed(666)  # very innocent number

        self.sample_size = sample_size
        self.num_seq = num_seq
        self.seq_len = seq_len
        self.num_class = num_class
        print('=> Using RNN + FC model with num_class:', num_class)

        print('=> Use 2D-3D %s!' % network)
        self.last_duration = int(math.ceil(seq_len / 4))
        self.last_size = int(math.ceil(sample_size / 32))
        track_running_stats = True

        self.backbone, self.param = select_resnet(
            network, track_running_stats=track_running_stats)
        self.param['num_layers'] = 1
        self.param['hidden_size'] = self.param['feature_size']

        print('=> using ConvRNN, kernel_size = 1')
        self.agg = ConvGRU(input_size=self.param['feature_size'],
                           hidden_size=self.param['hidden_size'],
                           kernel_size=1,
                           num_layers=self.param['num_layers'])
        self._initialize_weights(self.agg)

        self.final_bn = nn.BatchNorm1d(self.param['feature_size'])
        self.final_bn.weight.data.fill_(1)
        self.final_bn.bias.data.zero_()

        # Initializer final FC layer(s), one or multiple, depending on class configuration
        if isinstance(num_class, int):
            # Single
            self.multi_output = False
            self.final_fc = nn.Sequential(nn.Dropout(dropout),
                                          nn.Linear(self.param['feature_size'], self.num_class))
            self._initialize_weights(self.final_fc)

        elif isinstance(num_class, list):
            # Multi
            self.multi_output = True
            self.final_fc = []
            for cur_num_cls in num_class:
                cur_fc = nn.Sequential(nn.Dropout(dropout),
                                       nn.Linear(self.param['feature_size'], cur_num_cls))
                self._initialize_weights(cur_fc)
                self.final_fc.append(cur_fc)
            # IMPORTANT, otherwise pytorch won't register
            self.final_fc = nn.ModuleList(self.final_fc)

        else:
            raise ValueError(
                'num_class is of unknown type (expected int or list of ints)')
示例#3
0
    def __init__(self, args):
        super(DpcRnn, self).__init__()

        torch.cuda.manual_seed(233)

        print('Using DPC-RNN model for mode: {}'.format(args["mode"]))
        self.num_seq = args["num_seq"]
        self.seq_len = args["seq_len"]
        self.pred_step = args["pred_step"]
        self.sample_size = args["img_dim"]
        self.last_duration = int(math.ceil(self.seq_len / 4))
        self.last_size = int(math.ceil(self.sample_size / 32))
        print('final feature map has size %dx%d' % (self.last_size, self.last_size))

        self.mode = args["mode"]
        self.in_channels = get_num_channels(self.mode)
        self.l2_norm = args["l2_norm"]

        track_running_stats = True
        print("Track running stats: {}".format(track_running_stats))
        self.backbone, self.param = select_resnet(
            args["net"], track_running_stats=track_running_stats, in_channels=self.in_channels
        )

        # params for GRU
        self.param['num_layers'] = 1
        self.param['hidden_size'] = self.param['feature_size']

        # param for current model
        self.final_feature_size = self.param["feature_size"]
        # self.final_feature_size = self.param['hidden_size'] * (self.last_size ** 2)
        self.total_feature_size = self.param['hidden_size'] * (self.last_size ** 2)

        self.agg = ConvGRU(input_size=self.param['feature_size'],
                               hidden_size=self.param['hidden_size'],
                               kernel_size=1,
                               num_layers=self.param['num_layers'])
        self.network_pred = nn.Sequential(
                                nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0),
                                nn.ReLU(inplace=True),
                                nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0)
                        )

        self.compiled_features = self.get_modality_feature_extractor()
        self.interModeDotHandler = su.InterModeDotHandler(self.last_size)
        self.cosSimHandler = su.CosSimHandler()

        self.mask = None
        # self.relu = nn.ReLU(inplace=False)
        self._initialize_weights(self.agg)
        self._initialize_weights(self.network_pred)
示例#4
0
    def __init__(self,
                 sample_size,
                 num_seq,
                 seq_len,
                 in_channels,
                 network='resnet18',
                 dropout=0.5,
                 num_class=101):
        super(LC, self).__init__()
        torch.cuda.manual_seed(666)
        self.sample_size = sample_size
        self.num_seq = num_seq
        self.seq_len = seq_len
        self.num_class = num_class
        self.in_channels = in_channels
        print('=> Using RNN + FC model with ic:', self.in_channels)

        print('=> Use 2D-3D %s!' % network)
        self.last_duration = int(math.ceil(seq_len / 4))
        self.last_size = int(math.ceil(sample_size / 32))
        track_running_stats = True

        self.backbone, self.param = \
            select_resnet(network, track_running_stats=track_running_stats, in_channels=self.in_channels)
        self.param['num_layers'] = 1
        self.param['hidden_size'] = self.param['feature_size']

        print('=> using ConvRNN, kernel_size = 1')
        self.agg = ConvGRU(input_size=self.param['feature_size'],
                           hidden_size=self.param['hidden_size'],
                           kernel_size=1,
                           num_layers=self.param['num_layers'])
        self._initialize_weights(self.agg)

        self.final_bn = nn.BatchNorm1d(self.param['feature_size'])
        self.final_bn.weight.data.fill_(1)
        self.final_bn.bias.data.zero_()

        self.num_classes = num_class
        self.dropout = dropout
        self.hidden_size = 128
        self.final_fc = nn.Sequential(
            nn.Dropout(self.dropout),
            nn.Linear(self.param['feature_size'], self.num_classes),
        )

        self._initialize_weights(self.final_fc)
示例#5
0
    def __init__(self,
                 img_dim,
                 network='resnet50',
                 num_layers_in_fc_layers=1024,
                 dropout=0.5):
        super(Audio_RNN, self).__init__()

        self.__nFeatures__ = 24
        self.__nChs__ = 32
        self.__midChs__ = 32

        self.netcnnaud = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1),
                      padding=(1, 1)),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=(1, 1), stride=(1, 1)),
            nn.Conv2d(64,
                      192,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)),
            nn.BatchNorm2d(192),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=(3, 3), stride=(1, 2)),
            nn.Conv2d(192, 384, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(384),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=(3, 3), padding=(1, 1)),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2)),
            nn.Conv2d(256, 512, kernel_size=(5, 4), padding=(0, 0)),
            nn.BatchNorm2d(512),
            nn.ReLU(),
        )

        self.netfcaud = nn.Sequential(
            nn.Linear(512 * 21, 4096),
            nn.BatchNorm1d(4096),
            nn.ReLU(),
            nn.Linear(4096, num_layers_in_fc_layers),
        )

        self.netcnnlip, self.param = select_resnet(network,
                                                   track_running_stats=False)
        self.last_duration = int(math.ceil(30 / 4))
        self.last_size = int(math.ceil(img_dim / 32))

        self.netfclip = nn.Sequential(
            nn.Linear(
                self.param['feature_size'] * self.last_size * self.last_size,
                4096),
            nn.BatchNorm1d(4096),
            nn.ReLU(),
            nn.Linear(4096, num_layers_in_fc_layers),
        )

        self.final_bn_lip = nn.BatchNorm1d(num_layers_in_fc_layers)
        self.final_bn_lip.weight.data.fill_(1)
        self.final_bn_lip.bias.data.zero_()

        self.final_fc_lip = nn.Sequential(
            nn.Dropout(dropout), nn.Linear(num_layers_in_fc_layers, 2))
        self._initialize_weights(self.final_fc_lip)

        self.final_bn_aud = nn.BatchNorm1d(num_layers_in_fc_layers)
        self.final_bn_aud.weight.data.fill_(1)
        self.final_bn_aud.bias.data.zero_()

        self.final_fc_aud = nn.Sequential(
            nn.Dropout(dropout), nn.Linear(num_layers_in_fc_layers, 2))
        self._initialize_weights(self.final_fc_aud)

        self._initialize_weights(self.netcnnaud)
        self._initialize_weights(self.netfcaud)
        self._initialize_weights(self.netfclip)
示例#6
0
    def __init__(self, sample_size, num_seq=8, seq_len=5,
                 pred_step=3, network='resnet50', distance='dot',
                 poincare_c=1.0, poincare_ball_dim=256):
        super(DPC_RNN, self).__init__()

        # to reproduce the experiments
        torch.cuda.manual_seed(233)
        print('Using DPC-RNN model')

        # number of dimensions in the image
        self.sample_size = sample_size
        self.num_seq = num_seq
        self.seq_len = seq_len
        self.distance = distance

        # how many futures to predict
        self.pred_step = pred_step

        # 2 if seq_len is 5
        if network == 'resnet8' or network == 'resnet10':
            self.last_duration = int(math.ceil(seq_len / 2))
        else:
            self.last_duration = int(math.ceil(seq_len / 4))

        # 4 if size of the image is 128

        # change for toy experiment
        #self.last_size = 1
        self.last_size = int(math.ceil(sample_size / 32))

        print('final feature map has size %dx%d' %
              (self.last_size, self.last_size))

        # f - choose an appropriate feature extractor. In this case, a resent
        self.backbone, self.param = select_resnet(
            network, track_running_stats=False, distance=self.distance)

        #print (self.param)

        self.param['num_layers'] = 1  # param for GRU
        self.param['hidden_size'] = self.param['feature_size']  # param for GRU

        self.agg = ConvGRU(input_size=self.param['feature_size'],
                           hidden_size=self.param['hidden_size'],
                           kernel_size=1,
                           num_layers=self.param['num_layers'])

        # two layered network \phi
        self.network_pred = nn.Sequential(
            nn.Conv2d(self.param['feature_size'],
                      self.param['feature_size'], kernel_size=1, padding=0),
            nn.ReLU(inplace=True),
            nn.Conv2d(self.param['feature_size'],
                      self.param['feature_size'], kernel_size=1, padding=0)
        )

        # what does mask do ?
        self.mask = None
        self.relu = nn.ReLU(inplace=False)
        self._initialize_weights(self.agg)
        self._initialize_weights(self.network_pred)
        
        # exponential map
        self.tp = hypnn.ToPoincare(c=1.0, train_x=True, train_c=True, ball_dim=self.param['feature_size'])
示例#7
0
    def __init__(self,
                 sample_size,
                 num_seq,
                 seq_len,
                 pred_step,
                 network='resnet18',
                 dropout=0.5,
                 num_class=101):
        '''
        modified from dpc-eval model. Different from the original model,
        pred function is run pred_step number of times just like
        pretext training. The last context variable is used to project
        to the action space
        num_class: If integer => single output layer; if list => multiple output layers (for example verb + noun)
        '''
        super(LC_future_DPC, self).__init__()
        torch.cuda.manual_seed(666)

        # size of the image 128x128
        self.sample_size = sample_size

        # num_seq = 5 and seq_len = 8
        self.num_seq = num_seq
        self.seq_len = seq_len
        self.pred_step = pred_step
        self.num_class = num_class
        print('=> Using RNN + FC model ')

        print('=> Use 2D-3D %s!' % network)

        # dimensions of the output
        self.last_duration = int(math.ceil(seq_len / 4))
        self.last_size = int(math.ceil(sample_size / 32))

        track_running_stats = True

        # f network (= extract representation given video)
        self.backbone, self.param = select_resnet(
            network, track_running_stats=track_running_stats)
        print('feature_size:', self.param['feature_size'])
        self.param['num_layers'] = 1
        self.param['hidden_size'] = self.param['feature_size']

        # g-network (= aggregate into present context)
        print('=> using ConvRNN, kernel_size = 1')
        self.agg = ConvGRU(input_size=self.param['feature_size'],
                           hidden_size=self.param['hidden_size'],
                           kernel_size=1,
                           num_layers=self.param['num_layers'])

        # two layered network \phi (= predict future given context)
        self.network_pred = nn.Sequential(
            nn.Conv2d(self.param['feature_size'],
                      self.param['feature_size'],
                      kernel_size=1,
                      padding=0), nn.ReLU(inplace=True),
            nn.Conv2d(self.param['feature_size'],
                      self.param['feature_size'],
                      kernel_size=1,
                      padding=0))

        self.relu = nn.ReLU(inplace=False)
        self._initialize_weights(self.agg)
        self._initialize_weights(self.network_pred)

        # not in the training network
        self.final_bn = nn.BatchNorm1d(self.param['feature_size'])
        self.final_bn.weight.data.fill_(1)
        self.final_bn.bias.data.zero_()

        # Initializer final FC layer(s), one or multiple, depending on class configuration
        if isinstance(num_class, int):
            # Single
            self.multi_output = False
            self.final_fc = nn.Sequential(
                nn.Dropout(dropout),
                nn.Linear(self.param['feature_size'], self.num_class))
            self._initialize_weights(self.final_fc)

        elif isinstance(num_class, list):
            # Multi, for predicting noun and verb simultaneously
            self.multi_output = True
            self.final_fc = []
            for cur_num_cls in num_class:
                cur_fc = nn.Sequential(
                    nn.Dropout(dropout),
                    nn.Linear(self.param['feature_size'], cur_num_cls))
                self._initialize_weights(cur_fc)
                self.final_fc.append(cur_fc)
            # IMPORTANT, otherwise pytorch won't register
            self.final_fc = nn.ModuleList(self.final_fc)
        else:
            raise ValueError(
                'num_class is of unknown type (expected int or list of ints)')
示例#8
0
    def __init__(self,
                 img_dim,
                 num_seq=8,
                 seq_len=5,
                 pred_step=3,
                 network='resnet50',
                 latent_size=8,
                 kernel_size=1,
                 rnn_dropout=0.1,
                 action_cls_head=None,
                 cls_dropout=0.5,
                 num_class=101,
                 time_indep=False):
        super(DPC_VRNN, self).__init__()
        torch.cuda.manual_seed(233)
        print('Using DPC-VRNN model, feature extractor: ' + network +
              ', latent size: ' + str(latent_size) + ', kernel size: ' +
              str(kernel_size))

        self.img_dim = img_dim
        self.num_seq = num_seq
        self.seq_len = seq_len
        self.pred_step = pred_step
        self.latent_size = latent_size
        self.kernel_size = kernel_size
        self.rnn_dropout = rnn_dropout
        self.action_cls_head = action_cls_head
        self.cls_dropout = cls_dropout
        self.num_class = num_class
        self.time_indep = time_indep
        if action_cls_head is not None:
            print('Action classification head(s) enabled for: ' +
                  action_cls_head)
        if time_indep:
            print(
                'Temporal independence => ablation study enabled => VRNN is now like CVAE'
            )

        if network == 'resnet8' or network == 'resnet10':
            # 3 if seq_len is 5
            self.last_duration = int(math.ceil(seq_len / 2))
        else:
            # 2 if seq_len is 5
            self.last_duration = int(math.ceil(seq_len / 4))

        # 4 if size of the image is 128
        self.spatial_size = int(math.ceil(img_dim / 32))

        # Feature extractor f
        self.feat_backbone, self.param = select_resnet(
            network, track_running_stats=False)

        # Context aggregator g and projection function phi governed by random z
        self.input_size = self.param['feature_size']
        self.state_size = self.param['feature_size']
        self.param[
            'input_size'] = self.input_size  # for input w_t and output ^w_t
        self.param['state_size'] = self.state_size  # for context c_t
        self.param[
            'latent_size'] = self.latent_size  # for probabilistic latent vector z_t
        self.param['kernel_size'] = self.kernel_size  # for all conv layers
        self.param['spatial_size'] = self.spatial_size  # H, W
        self.param['rnn_dropout'] = self.rnn_dropout
        self.vrnn_backbone = My_VRNN_Conv_GRU(
            input_size=self.param['input_size'],
            state_size=self.param['state_size'],
            latent_size=self.param['latent_size'],
            spatial_size=self.param['spatial_size'],
            kernel_size=self.param['kernel_size'],
            dropout=self.param['rnn_dropout'],
            time_indep=self.time_indep)
        self.sim_div_dropout = nn.Dropout(
            p=self.rnn_dropout, inplace=False)  # for diversity measurements

        self.mask = None
        self.relu = nn.ReLU(inplace=False)
        self._initialize_weights(self.vrnn_backbone)

        if action_cls_head is not None:
            # Initializer final FC layer(s), one or multiple, depending on class configuration
            if isinstance(num_class, int):
                num_class = [num_class]  # singleton to simplify code
            assert (isinstance(num_class, list))
            self.num_class = num_class
            self.final_bn = nn.BatchNorm1d(self.param['state_size'])
            self.final_bn.weight.data.fill_(1)
            self.final_bn.bias.data.zero_()
            self.final_fc = []
            for cur_num_cls in num_class:
                cur_fc = nn.Sequential(
                    nn.Dropout(cls_dropout),
                    nn.Linear(self.param['state_size'], cur_num_cls))
                self._initialize_weights(cur_fc)
                self.final_fc.append(cur_fc)
            self.final_fc = nn.ModuleList(
                self.final_fc)  # IMPORTANT, otherwise pytorch won't register
示例#9
0
    def __init__(self,
                 sample_size,
                 num_seq=8,
                 seq_len=5,
                 pred_step=3,
                 network='resnet10',
                 distance='L2',
                 distance_type='uncertain',
                 positive_vs_negative='same',
                 radius_type='linear',
                 radius_which='pred'):
        super(DPC_RNN, self).__init__()

        # to reproduce the experiments
        torch.cuda.manual_seed(233)
        print('[model_3d.py] Using DPC-RNN model')

        # number of dimensions in the image
        self.sample_size = sample_size
        self.num_seq = num_seq
        self.seq_len = seq_len

        self.distance = distance
        self.distance_type = distance_type
        self.positive_vs_negative = positive_vs_negative
        self.radius_which = radius_which
        self.radius_type = radius_type

        print('[model_3d.py] Using distance metric : ', self.distance)
        print('[model_3d.py] Using distance type : ', self.distance_type)
        print('[model_3d.py] Treating positive and negative instances as : ',
              self.positive_vs_negative)
        print('[model_3d.py] Using radius type : ', self.radius_type)
        # how many futures to predict
        self.pred_step = pred_step

        # what is sample size ?
        # 2 if seq_len is 5
        if network == 'resnet8' or network == 'resnet10':
            self.last_duration = int(math.ceil(seq_len / 2))
        else:
            self.last_duration = int(math.ceil(seq_len / 4))

        self.last_size = int(math.ceil(sample_size / 32))

        # print('final feature map has size %dx%d' %
        #       (self.last_size, self.last_size))

        # f - choose an appropriate feature extractor. In this case, a resent
        self.backbone, self.param = select_resnet(
            network,
            track_running_stats=False,
            distance_type=self.distance_type,
            radius_type=self.radius_type)

        #print (self.param)

        # number of layers in GRU
        self.param['num_layers'] = 1  # param for GRU
        self.param['hidden_size'] = self.param['feature_size']  # param for GRU

        self.agg = ConvGRU(input_size=self.param['feature_size'],
                           hidden_size=self.param['hidden_size'],
                           kernel_size=1,
                           num_layers=self.param['num_layers'],
                           radius_type=self.radius_type)

        # two layered network \phi
        self.network_pred = nn.Sequential(
            nn.Conv2d(self.param['feature_size'],
                      self.param['feature_size'],
                      kernel_size=1,
                      padding=0), nn.ReLU(inplace=True),
            nn.Conv2d(self.param['feature_size'],
                      self.param['feature_size'],
                      kernel_size=1,
                      padding=0))

        if self.radius_type == 'log' and self.distance_type == 'uncertain':
            print('[model_3d.py] Using log as radius_type')
            self.activation = exp_activation()

        # what does mask do ?
        self.mask = None
        self.relu = nn.ReLU(inplace=False)
        self._initialize_weights(self.agg)
        self._initialize_weights(self.network_pred)
示例#10
0
    def __init__(self,
                 sample_size,
                 num_seq=8,
                 seq_len=5,
                 pred_step=3,
                 network='monkeynet'):
        super(DPC_Plus, self).__init__()
        #         torch.cuda.manual_seed(233) #233
        print('Using DPC-RNN model')
        self.sample_size = sample_size
        self.num_seq = num_seq
        self.seq_len = seq_len
        self.pred_step = pred_step

        if network == 'vgg' or network == 'mousenet' or network == 'simmousenet' or network == 'monkeynet':
            self.last_duration = seq_len
        else:
            self.last_duration = int(math.ceil(seq_len / 4))

        if network == 'resnet0':
            self.last_size = int(math.ceil(sample_size / 8))  #8
            self.pool_size = 1
        elif network == 'mousenet':
            self.last_size = 16
            self.pool_size = 2  # (2 for all readout, 4 for VISp5 readout)
        elif network == 'simmousenet':
            self.last_size = 16
            self.pool_size = 1
        elif network == 'monkeynet':
            self.last_size = 16
            self.pool_size = 1
        else:
            self.last_size = int(math.ceil(sample_size / 32))
            self.pool_size = 1

        print('final feature map has size %dx%d' %
              (self.last_size, self.last_size))
        if network == 'mousenet':
            self.backbone, self.param = select_mousenet()
        elif network == 'simmousenet':
            self.backbone, self.param = select_simmousenet(hp)
        elif network == 'monkeynet':
            self.backbone, self.param = select_monkeynet()
        else:
            self.backbone, self.param = select_resnet(
                network, track_running_stats=False)

        self.param['num_layers'] = 1  # param for GRU
        self.param['hidden_size'] = self.param['feature_size']  # param for GRU

        self.agg = ConvGRU(input_size=self.param['feature_size'],
                           hidden_size=self.param['hidden_size'],
                           kernel_size=1,
                           num_layers=self.param['num_layers'])
        self.network_pred = nn.Sequential(
            nn.Conv2d(self.param['feature_size'],
                      self.param['feature_size'],
                      kernel_size=1,
                      padding=0), nn.ReLU(inplace=True),
            nn.Conv2d(self.param['feature_size'],
                      self.param['feature_size'],
                      kernel_size=1,
                      padding=0))
        self.mask = None
        self.relu = nn.ReLU(inplace=False)

        self.linear1_1 = nn.Linear(self.backbone.path1.resblocks_out_channels,
                                   64)
        self.linear1_2 = nn.Linear(self.backbone.path1.resblocks_out_channels,
                                   64)
        self.linear2_1 = nn.Linear(64, 2)
        self.linear2_2 = nn.Linear(64, 1)

        self._initialize_weights(self.agg)
        self._initialize_weights(self.network_pred)
示例#11
0
    def __init__(self,
                 sample_size,
                 num_seq=5,
                 seq_len=5,
                 network='resnet18',
                 distance_type='uncertain',
                 feature_type='F',
                 pred_steps=1,
                 pool='avg',
                 radius_location='Phi'):
        super(model_visualize, self).__init__()
        torch.cuda.manual_seed(666)

        # size of the image 128x128
        self.sample_size = sample_size
        self.distance_type = distance_type
        self.feature_type = feature_type
        self.network = network
        self.pool = pool
        self.pred_steps = pred_steps
        self.radius_location = radius_location

        # num_seq = 5 and seq_len = 8
        self.num_seq = num_seq
        self.seq_len = seq_len

        if self.feature_type == 'F':
            print('[model_visualize.py] Using <<F>> mapping ')
        elif self.feature_type == 'G':
            print('[model_visualize.py] Using <<F+G>> mapping ')
        elif self.feature_type == 'Phi':
            print('[model_visualize.py] Using <<F+G+Phi>> mapping ')

        print('[model_visualize.py] Use 2D-3D %s!' % network)

        # dimensions of the output
        if self.network == 'resnet8' or self.network == 'resnet10':
            self.last_duration = int(math.ceil(seq_len / 2))
        else:
            self.last_duration = int(math.ceil(seq_len / 4))
        self.last_size = int(math.ceil(sample_size / 32))

        track_running_stats = True

        # f network
        print('[model_visualize.py] Using distance type : <<',
              self.distance_type, ' >>')

        # f - choose an appropriate feature extractor. In this case, a resent
        if self.radius_location == 'Phi':
            self.backbone, self.param = select_resnet(
                network, track_running_stats=False, distance_type='certain')
        elif self.radius_location == 'F':
            self.backbone, self.param = select_resnet(
                network,
                track_running_stats=False,
                distance_type=self.distance_type)

        self.param['num_layers'] = 1  # param for GRU
        self.param['hidden_size'] = self.param['feature_size']  # param for GRU

        self.agg = ConvGRU(input_size=self.param['feature_size'],
                           hidden_size=self.param['hidden_size'],
                           kernel_size=1,
                           num_layers=self.param['num_layers'])

        # two layered network \phi
        if self.radius_location == 'Phi':
            if self.distance_type == 'certain':
                output_size = self.param['feature_size']
            elif self.distance_type == 'uncertain':
                output_size = self.param['feature_size'] + 1
            self.network_pred = nn.Sequential(
                nn.Conv2d(self.param['feature_size'],
                          self.param['feature_size'],
                          kernel_size=1,
                          padding=0), nn.ReLU(inplace=True),
                nn.Conv2d(self.param['feature_size'],
                          output_size,
                          kernel_size=1,
                          padding=0))
        elif self.radius_location == 'F':
            self.network_pred = nn.Sequential(
                nn.Conv2d(self.param['feature_size'],
                          self.param['feature_size'],
                          kernel_size=1,
                          padding=0), nn.ReLU(inplace=True),
                nn.Conv2d(self.param['feature_size'],
                          self.param['feature_size'],
                          kernel_size=1,
                          padding=0))

        self.avg_pool = nn.AvgPool3d((1, self.last_size, self.last_size),
                                     stride=1)
        self.max_pool = nn.MaxPool3d((1, self.last_size, self.last_size),
                                     stride=1)

        # what does mask do ?
        self.relu = nn.ReLU(inplace=False)
        self._initialize_weights(self.agg)
        self._initialize_weights(self.network_pred)
示例#12
0
    def __init__(self,
                 img_dim,
                 num_seq=8,
                 seq_len=5,
                 pred_step=3,
                 network='resnet50',
                 cvae_arch='fc',
                 action_cls_head=False,
                 dropout=0.5,
                 num_class=101):
        super(DPC_CVAE, self).__init__()

        # to reproduce the experiments
        torch.cuda.manual_seed(233)
        print('Using DPC-CVAE model ' + network + ' ' + cvae_arch)

        # number of dimensions in the image
        self.img_dim = img_dim
        self.num_seq = num_seq
        self.seq_len = seq_len
        self.action_cls_head = action_cls_head

        if action_cls_head:
            print('Action classification head(s) enabled with final FC')
        # if force_encode_train:
        #     print('::: WARNING ::: Gaussian parameter encoding will take place during TRAIN, which might inflate accuracy!')
        # if force_encode_eval:
        #     print('::: WARNING ::: Gaussian parameter encoding will take place during EVAL, which will inflate accuracy!')

        # how many futures to predict
        self.pred_step = pred_step

        # 2 if seq_len is 5
        if network == 'resnet8' or network == 'resnet10':
            self.last_duration = int(math.ceil(seq_len / 2))
        else:
            self.last_duration = int(math.ceil(seq_len / 4))

        # 4 if size of the image is 128
        self.last_size = int(math.ceil(img_dim / 32))
        self.spatial_size = self.last_size

        print('final feature map has size %dx%d' %
              (self.last_size, self.last_size))

        # f - choose an appropriate feature extractor. In this case, a resent
        self.backbone, self.param = select_resnet(network,
                                                  track_running_stats=False)

        #print (self.param)

        self.param['num_layers'] = 1  # param for GRU
        self.param['hidden_size'] = self.param['feature_size']  # param for GRU

        # Converts input (video block representation) + old hidden state to new hidden state
        self.agg = ConvGRU(input_size=self.param['feature_size'],
                           hidden_size=self.param['hidden_size'],
                           kernel_size=1,
                           num_layers=self.param['num_layers'])

        # two layered network \phi
        # Replaced with CVAE
        # self.network_pred = nn.Sequential(
        #     nn.Conv2d(self.param['feature_size'],
        #               self.param['feature_size'], kernel_size=1, padding=0),
        #     nn.ReLU(inplace=True),
        #     nn.Conv2d(self.param['feature_size'],
        #               self.param['feature_size'], kernel_size=1, padding=0)
        # )
        if cvae_arch == 'fc':
            print('Using CVAE class: My_CVAE_FC')
            self.network_pred_cvae = My_CVAE_FC(
                self.param['feature_size'] * self.last_size * self.last_size,
                self.param['feature_size'] * self.last_size * self.last_size,
                latent_size=256,
                hidden_size=1024)
        elif cvae_arch == 'conv' or cvae_arch == 'conv_a':
            # Conv 1x1 version A
            print(
                'Using CVAE class: My_CVAE_Conv1x1_A (latent=64x4x4, hidden=128x4x4)'
            )
            self.network_pred_cvae = My_CVAE_Conv1x1(
                self.param['feature_size'],
                self.param['feature_size'],
                latent_size=64,
                hidden_size=128)
        elif cvae_arch == 'conv_b':
            # Conv 1x1 version B (smaller latent dimension)
            print(
                'Using CVAE class: My_CVAE_Conv1x1_B (latent=16x4x4, hidden=128x4x4)'
            )
            self.network_pred_cvae = My_CVAE_Conv1x1(
                self.param['feature_size'],
                self.param['feature_size'],
                latent_size=16,
                hidden_size=128)
        elif cvae_arch == 'conv_c':
            # Conv 1x1 version C (even smaller latent dimension)
            print(
                'Using CVAE class: My_CVAE_Conv1x1_C (latent=4x4x4, hidden=128x4x4)'
            )
            self.network_pred_cvae = My_CVAE_Conv1x1(
                self.param['feature_size'],
                self.param['feature_size'],
                latent_size=4,
                hidden_size=128)
        elif cvae_arch == 'conv_d':
            # Conv + FC version D (global latent space)
            print(
                'Using CVAE class: My_CVAE_ConvFC (latent=8x1x1, hidden=256x4x4)'
            )
            self.network_pred_cvae = My_CVAE_ConvFC(
                self.param['feature_size'],
                self.param['feature_size'],
                latent_size=8,
                hidden_size=256,
                spatial_size=self.last_size)
        elif cvae_arch == 'conv_e':
            # Conv + FC version E (global latent space, size 16)
            print(
                'Using CVAE class: My_CVAE_ConvFC (latent=16x1x1, hidden=256x4x4)'
            )
            self.network_pred_cvae = My_CVAE_ConvFC(
                self.param['feature_size'],
                self.param['feature_size'],
                latent_size=16,
                hidden_size=256,
                spatial_size=self.last_size)
        elif cvae_arch == 'vrnn_a':
            print('Using VRNN class: My_VRNN_A')
        else:
            raise Exception('CVAE architecture not recognized: ' + cvae_arch)

        self.mask = None
        self.relu = nn.ReLU(inplace=False)
        self._initialize_weights(self.agg)
        self._initialize_weights(self.network_pred_cvae)

        self.action_cls_head = action_cls_head
        if action_cls_head:
            # See eval/model_3d_lc.py
            if isinstance(num_class, int):
                num_class = [num_class]  # singleton to simplify code
            assert (isinstance(num_class, list))
            self.num_class = num_class
            self.final_bn = nn.BatchNorm1d(self.param['feature_size'])
            self.final_bn.weight.data.fill_(1)
            self.final_bn.bias.data.zero_()
            self.final_fc = []
            for cur_num_cls in num_class:
                cur_fc = nn.Sequential(
                    nn.Dropout(dropout),
                    nn.Linear(self.param['feature_size'], cur_num_cls))
                self._initialize_weights(cur_fc)
                self.final_fc.append(cur_fc)
            self.final_fc = nn.ModuleList(
                self.final_fc)  # IMPORTANT, otherwise pytorch won't register