def __init__(self, sample_size, num_seq=8, seq_len=5, pred_step=3, network='resnet50'): super(DPC_RNN_Infer_Pred_Error, self).__init__() torch.cuda.manual_seed(233) print('Using DPC-RNN model') self.sample_size = sample_size self.num_seq = num_seq self.seq_len = seq_len self.pred_step = pred_step self.last_duration = int(math.ceil(seq_len / 4)) self.last_size = int(math.ceil(sample_size / 32)) print('final feature map has size %dx%d' % (self.last_size, self.last_size)) self.backbone, self.param = select_resnet(network, track_running_stats=False) self.param['num_layers'] = 1 # param for GRU self.param['hidden_size'] = self.param['feature_size'] # param for GRU self.agg = ConvGRU(input_size=self.param['feature_size'], hidden_size=self.param['hidden_size'], kernel_size=1, num_layers=self.param['num_layers']) self.network_pred = nn.Sequential( nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0), nn.ReLU(inplace=True), nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0) ) self.mask = None self.relu = nn.ReLU(inplace=False) self._initialize_weights(self.agg) self._initialize_weights(self.network_pred)
def __init__(self, sample_size, num_seq, seq_len, network='resnet18', dropout=0.5, num_class=101): ''' Original DPC, according to diagram in appendix No future prediction network involved num_class: If integer => single output layer; if list => multiple output layers (for example verb + noun) ''' super(LC_present, self).__init__() torch.cuda.manual_seed(666) # very innocent number self.sample_size = sample_size self.num_seq = num_seq self.seq_len = seq_len self.num_class = num_class print('=> Using RNN + FC model with num_class:', num_class) print('=> Use 2D-3D %s!' % network) self.last_duration = int(math.ceil(seq_len / 4)) self.last_size = int(math.ceil(sample_size / 32)) track_running_stats = True self.backbone, self.param = select_resnet( network, track_running_stats=track_running_stats) self.param['num_layers'] = 1 self.param['hidden_size'] = self.param['feature_size'] print('=> using ConvRNN, kernel_size = 1') self.agg = ConvGRU(input_size=self.param['feature_size'], hidden_size=self.param['hidden_size'], kernel_size=1, num_layers=self.param['num_layers']) self._initialize_weights(self.agg) self.final_bn = nn.BatchNorm1d(self.param['feature_size']) self.final_bn.weight.data.fill_(1) self.final_bn.bias.data.zero_() # Initializer final FC layer(s), one or multiple, depending on class configuration if isinstance(num_class, int): # Single self.multi_output = False self.final_fc = nn.Sequential(nn.Dropout(dropout), nn.Linear(self.param['feature_size'], self.num_class)) self._initialize_weights(self.final_fc) elif isinstance(num_class, list): # Multi self.multi_output = True self.final_fc = [] for cur_num_cls in num_class: cur_fc = nn.Sequential(nn.Dropout(dropout), nn.Linear(self.param['feature_size'], cur_num_cls)) self._initialize_weights(cur_fc) self.final_fc.append(cur_fc) # IMPORTANT, otherwise pytorch won't register self.final_fc = nn.ModuleList(self.final_fc) else: raise ValueError( 'num_class is of unknown type (expected int or list of ints)')
def __init__(self, args): super(DpcRnn, self).__init__() torch.cuda.manual_seed(233) print('Using DPC-RNN model for mode: {}'.format(args["mode"])) self.num_seq = args["num_seq"] self.seq_len = args["seq_len"] self.pred_step = args["pred_step"] self.sample_size = args["img_dim"] self.last_duration = int(math.ceil(self.seq_len / 4)) self.last_size = int(math.ceil(self.sample_size / 32)) print('final feature map has size %dx%d' % (self.last_size, self.last_size)) self.mode = args["mode"] self.in_channels = get_num_channels(self.mode) self.l2_norm = args["l2_norm"] track_running_stats = True print("Track running stats: {}".format(track_running_stats)) self.backbone, self.param = select_resnet( args["net"], track_running_stats=track_running_stats, in_channels=self.in_channels ) # params for GRU self.param['num_layers'] = 1 self.param['hidden_size'] = self.param['feature_size'] # param for current model self.final_feature_size = self.param["feature_size"] # self.final_feature_size = self.param['hidden_size'] * (self.last_size ** 2) self.total_feature_size = self.param['hidden_size'] * (self.last_size ** 2) self.agg = ConvGRU(input_size=self.param['feature_size'], hidden_size=self.param['hidden_size'], kernel_size=1, num_layers=self.param['num_layers']) self.network_pred = nn.Sequential( nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0), nn.ReLU(inplace=True), nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0) ) self.compiled_features = self.get_modality_feature_extractor() self.interModeDotHandler = su.InterModeDotHandler(self.last_size) self.cosSimHandler = su.CosSimHandler() self.mask = None # self.relu = nn.ReLU(inplace=False) self._initialize_weights(self.agg) self._initialize_weights(self.network_pred)
def __init__(self, sample_size, num_seq, seq_len, in_channels, network='resnet18', dropout=0.5, num_class=101): super(LC, self).__init__() torch.cuda.manual_seed(666) self.sample_size = sample_size self.num_seq = num_seq self.seq_len = seq_len self.num_class = num_class self.in_channels = in_channels print('=> Using RNN + FC model with ic:', self.in_channels) print('=> Use 2D-3D %s!' % network) self.last_duration = int(math.ceil(seq_len / 4)) self.last_size = int(math.ceil(sample_size / 32)) track_running_stats = True self.backbone, self.param = \ select_resnet(network, track_running_stats=track_running_stats, in_channels=self.in_channels) self.param['num_layers'] = 1 self.param['hidden_size'] = self.param['feature_size'] print('=> using ConvRNN, kernel_size = 1') self.agg = ConvGRU(input_size=self.param['feature_size'], hidden_size=self.param['hidden_size'], kernel_size=1, num_layers=self.param['num_layers']) self._initialize_weights(self.agg) self.final_bn = nn.BatchNorm1d(self.param['feature_size']) self.final_bn.weight.data.fill_(1) self.final_bn.bias.data.zero_() self.num_classes = num_class self.dropout = dropout self.hidden_size = 128 self.final_fc = nn.Sequential( nn.Dropout(self.dropout), nn.Linear(self.param['feature_size'], self.num_classes), ) self._initialize_weights(self.final_fc)
def __init__(self, sample_size, num_seq=8, seq_len=5, pred_step=3, network='resnet50', distance='dot', poincare_c=1.0, poincare_ball_dim=256): super(DPC_RNN, self).__init__() # to reproduce the experiments torch.cuda.manual_seed(233) print('Using DPC-RNN model') # number of dimensions in the image self.sample_size = sample_size self.num_seq = num_seq self.seq_len = seq_len self.distance = distance # how many futures to predict self.pred_step = pred_step # 2 if seq_len is 5 if network == 'resnet8' or network == 'resnet10': self.last_duration = int(math.ceil(seq_len / 2)) else: self.last_duration = int(math.ceil(seq_len / 4)) # 4 if size of the image is 128 # change for toy experiment #self.last_size = 1 self.last_size = int(math.ceil(sample_size / 32)) print('final feature map has size %dx%d' % (self.last_size, self.last_size)) # f - choose an appropriate feature extractor. In this case, a resent self.backbone, self.param = select_resnet( network, track_running_stats=False, distance=self.distance) #print (self.param) self.param['num_layers'] = 1 # param for GRU self.param['hidden_size'] = self.param['feature_size'] # param for GRU self.agg = ConvGRU(input_size=self.param['feature_size'], hidden_size=self.param['hidden_size'], kernel_size=1, num_layers=self.param['num_layers']) # two layered network \phi self.network_pred = nn.Sequential( nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0), nn.ReLU(inplace=True), nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0) ) # what does mask do ? self.mask = None self.relu = nn.ReLU(inplace=False) self._initialize_weights(self.agg) self._initialize_weights(self.network_pred) # exponential map self.tp = hypnn.ToPoincare(c=1.0, train_x=True, train_c=True, ball_dim=self.param['feature_size'])
def __init__(self, sample_size, num_seq, seq_len, pred_step, network='resnet18', dropout=0.5, num_class=101): ''' modified from dpc-eval model. Different from the original model, pred function is run pred_step number of times just like pretext training. The last context variable is used to project to the action space num_class: If integer => single output layer; if list => multiple output layers (for example verb + noun) ''' super(LC_future_DPC, self).__init__() torch.cuda.manual_seed(666) # size of the image 128x128 self.sample_size = sample_size # num_seq = 5 and seq_len = 8 self.num_seq = num_seq self.seq_len = seq_len self.pred_step = pred_step self.num_class = num_class print('=> Using RNN + FC model ') print('=> Use 2D-3D %s!' % network) # dimensions of the output self.last_duration = int(math.ceil(seq_len / 4)) self.last_size = int(math.ceil(sample_size / 32)) track_running_stats = True # f network (= extract representation given video) self.backbone, self.param = select_resnet( network, track_running_stats=track_running_stats) print('feature_size:', self.param['feature_size']) self.param['num_layers'] = 1 self.param['hidden_size'] = self.param['feature_size'] # g-network (= aggregate into present context) print('=> using ConvRNN, kernel_size = 1') self.agg = ConvGRU(input_size=self.param['feature_size'], hidden_size=self.param['hidden_size'], kernel_size=1, num_layers=self.param['num_layers']) # two layered network \phi (= predict future given context) self.network_pred = nn.Sequential( nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0), nn.ReLU(inplace=True), nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0)) self.relu = nn.ReLU(inplace=False) self._initialize_weights(self.agg) self._initialize_weights(self.network_pred) # not in the training network self.final_bn = nn.BatchNorm1d(self.param['feature_size']) self.final_bn.weight.data.fill_(1) self.final_bn.bias.data.zero_() # Initializer final FC layer(s), one or multiple, depending on class configuration if isinstance(num_class, int): # Single self.multi_output = False self.final_fc = nn.Sequential( nn.Dropout(dropout), nn.Linear(self.param['feature_size'], self.num_class)) self._initialize_weights(self.final_fc) elif isinstance(num_class, list): # Multi, for predicting noun and verb simultaneously self.multi_output = True self.final_fc = [] for cur_num_cls in num_class: cur_fc = nn.Sequential( nn.Dropout(dropout), nn.Linear(self.param['feature_size'], cur_num_cls)) self._initialize_weights(cur_fc) self.final_fc.append(cur_fc) # IMPORTANT, otherwise pytorch won't register self.final_fc = nn.ModuleList(self.final_fc) else: raise ValueError( 'num_class is of unknown type (expected int or list of ints)')
def __init__(self, sample_size, num_seq=8, seq_len=5, pred_step=3, network='resnet10', distance='L2', distance_type='uncertain', positive_vs_negative='same', radius_type='linear', radius_which='pred'): super(DPC_RNN, self).__init__() # to reproduce the experiments torch.cuda.manual_seed(233) print('[model_3d.py] Using DPC-RNN model') # number of dimensions in the image self.sample_size = sample_size self.num_seq = num_seq self.seq_len = seq_len self.distance = distance self.distance_type = distance_type self.positive_vs_negative = positive_vs_negative self.radius_which = radius_which self.radius_type = radius_type print('[model_3d.py] Using distance metric : ', self.distance) print('[model_3d.py] Using distance type : ', self.distance_type) print('[model_3d.py] Treating positive and negative instances as : ', self.positive_vs_negative) print('[model_3d.py] Using radius type : ', self.radius_type) # how many futures to predict self.pred_step = pred_step # what is sample size ? # 2 if seq_len is 5 if network == 'resnet8' or network == 'resnet10': self.last_duration = int(math.ceil(seq_len / 2)) else: self.last_duration = int(math.ceil(seq_len / 4)) self.last_size = int(math.ceil(sample_size / 32)) # print('final feature map has size %dx%d' % # (self.last_size, self.last_size)) # f - choose an appropriate feature extractor. In this case, a resent self.backbone, self.param = select_resnet( network, track_running_stats=False, distance_type=self.distance_type, radius_type=self.radius_type) #print (self.param) # number of layers in GRU self.param['num_layers'] = 1 # param for GRU self.param['hidden_size'] = self.param['feature_size'] # param for GRU self.agg = ConvGRU(input_size=self.param['feature_size'], hidden_size=self.param['hidden_size'], kernel_size=1, num_layers=self.param['num_layers'], radius_type=self.radius_type) # two layered network \phi self.network_pred = nn.Sequential( nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0), nn.ReLU(inplace=True), nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0)) if self.radius_type == 'log' and self.distance_type == 'uncertain': print('[model_3d.py] Using log as radius_type') self.activation = exp_activation() # what does mask do ? self.mask = None self.relu = nn.ReLU(inplace=False) self._initialize_weights(self.agg) self._initialize_weights(self.network_pred)
def __init__(self, sample_size, num_seq=8, seq_len=5, pred_step=3, network='monkeynet'): super(DPC_Plus, self).__init__() # torch.cuda.manual_seed(233) #233 print('Using DPC-RNN model') self.sample_size = sample_size self.num_seq = num_seq self.seq_len = seq_len self.pred_step = pred_step if network == 'vgg' or network == 'mousenet' or network == 'simmousenet' or network == 'monkeynet': self.last_duration = seq_len else: self.last_duration = int(math.ceil(seq_len / 4)) if network == 'resnet0': self.last_size = int(math.ceil(sample_size / 8)) #8 self.pool_size = 1 elif network == 'mousenet': self.last_size = 16 self.pool_size = 2 # (2 for all readout, 4 for VISp5 readout) elif network == 'simmousenet': self.last_size = 16 self.pool_size = 1 elif network == 'monkeynet': self.last_size = 16 self.pool_size = 1 else: self.last_size = int(math.ceil(sample_size / 32)) self.pool_size = 1 print('final feature map has size %dx%d' % (self.last_size, self.last_size)) if network == 'mousenet': self.backbone, self.param = select_mousenet() elif network == 'simmousenet': self.backbone, self.param = select_simmousenet(hp) elif network == 'monkeynet': self.backbone, self.param = select_monkeynet() else: self.backbone, self.param = select_resnet( network, track_running_stats=False) self.param['num_layers'] = 1 # param for GRU self.param['hidden_size'] = self.param['feature_size'] # param for GRU self.agg = ConvGRU(input_size=self.param['feature_size'], hidden_size=self.param['hidden_size'], kernel_size=1, num_layers=self.param['num_layers']) self.network_pred = nn.Sequential( nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0), nn.ReLU(inplace=True), nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0)) self.mask = None self.relu = nn.ReLU(inplace=False) self.linear1_1 = nn.Linear(self.backbone.path1.resblocks_out_channels, 64) self.linear1_2 = nn.Linear(self.backbone.path1.resblocks_out_channels, 64) self.linear2_1 = nn.Linear(64, 2) self.linear2_2 = nn.Linear(64, 1) self._initialize_weights(self.agg) self._initialize_weights(self.network_pred)
def __init__(self, sample_size, num_seq=5, seq_len=5, network='resnet18', distance_type='uncertain', feature_type='F', pred_steps=1, pool='avg', radius_location='Phi'): super(model_visualize, self).__init__() torch.cuda.manual_seed(666) # size of the image 128x128 self.sample_size = sample_size self.distance_type = distance_type self.feature_type = feature_type self.network = network self.pool = pool self.pred_steps = pred_steps self.radius_location = radius_location # num_seq = 5 and seq_len = 8 self.num_seq = num_seq self.seq_len = seq_len if self.feature_type == 'F': print('[model_visualize.py] Using <<F>> mapping ') elif self.feature_type == 'G': print('[model_visualize.py] Using <<F+G>> mapping ') elif self.feature_type == 'Phi': print('[model_visualize.py] Using <<F+G+Phi>> mapping ') print('[model_visualize.py] Use 2D-3D %s!' % network) # dimensions of the output if self.network == 'resnet8' or self.network == 'resnet10': self.last_duration = int(math.ceil(seq_len / 2)) else: self.last_duration = int(math.ceil(seq_len / 4)) self.last_size = int(math.ceil(sample_size / 32)) track_running_stats = True # f network print('[model_visualize.py] Using distance type : <<', self.distance_type, ' >>') # f - choose an appropriate feature extractor. In this case, a resent if self.radius_location == 'Phi': self.backbone, self.param = select_resnet( network, track_running_stats=False, distance_type='certain') elif self.radius_location == 'F': self.backbone, self.param = select_resnet( network, track_running_stats=False, distance_type=self.distance_type) self.param['num_layers'] = 1 # param for GRU self.param['hidden_size'] = self.param['feature_size'] # param for GRU self.agg = ConvGRU(input_size=self.param['feature_size'], hidden_size=self.param['hidden_size'], kernel_size=1, num_layers=self.param['num_layers']) # two layered network \phi if self.radius_location == 'Phi': if self.distance_type == 'certain': output_size = self.param['feature_size'] elif self.distance_type == 'uncertain': output_size = self.param['feature_size'] + 1 self.network_pred = nn.Sequential( nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0), nn.ReLU(inplace=True), nn.Conv2d(self.param['feature_size'], output_size, kernel_size=1, padding=0)) elif self.radius_location == 'F': self.network_pred = nn.Sequential( nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0), nn.ReLU(inplace=True), nn.Conv2d(self.param['feature_size'], self.param['feature_size'], kernel_size=1, padding=0)) self.avg_pool = nn.AvgPool3d((1, self.last_size, self.last_size), stride=1) self.max_pool = nn.MaxPool3d((1, self.last_size, self.last_size), stride=1) # what does mask do ? self.relu = nn.ReLU(inplace=False) self._initialize_weights(self.agg) self._initialize_weights(self.network_pred)
def __init__(self, img_dim, num_seq=8, seq_len=5, pred_step=3, network='resnet50', cvae_arch='fc', action_cls_head=False, dropout=0.5, num_class=101): super(DPC_CVAE, self).__init__() # to reproduce the experiments torch.cuda.manual_seed(233) print('Using DPC-CVAE model ' + network + ' ' + cvae_arch) # number of dimensions in the image self.img_dim = img_dim self.num_seq = num_seq self.seq_len = seq_len self.action_cls_head = action_cls_head if action_cls_head: print('Action classification head(s) enabled with final FC') # if force_encode_train: # print('::: WARNING ::: Gaussian parameter encoding will take place during TRAIN, which might inflate accuracy!') # if force_encode_eval: # print('::: WARNING ::: Gaussian parameter encoding will take place during EVAL, which will inflate accuracy!') # how many futures to predict self.pred_step = pred_step # 2 if seq_len is 5 if network == 'resnet8' or network == 'resnet10': self.last_duration = int(math.ceil(seq_len / 2)) else: self.last_duration = int(math.ceil(seq_len / 4)) # 4 if size of the image is 128 self.last_size = int(math.ceil(img_dim / 32)) self.spatial_size = self.last_size print('final feature map has size %dx%d' % (self.last_size, self.last_size)) # f - choose an appropriate feature extractor. In this case, a resent self.backbone, self.param = select_resnet(network, track_running_stats=False) #print (self.param) self.param['num_layers'] = 1 # param for GRU self.param['hidden_size'] = self.param['feature_size'] # param for GRU # Converts input (video block representation) + old hidden state to new hidden state self.agg = ConvGRU(input_size=self.param['feature_size'], hidden_size=self.param['hidden_size'], kernel_size=1, num_layers=self.param['num_layers']) # two layered network \phi # Replaced with CVAE # self.network_pred = nn.Sequential( # nn.Conv2d(self.param['feature_size'], # self.param['feature_size'], kernel_size=1, padding=0), # nn.ReLU(inplace=True), # nn.Conv2d(self.param['feature_size'], # self.param['feature_size'], kernel_size=1, padding=0) # ) if cvae_arch == 'fc': print('Using CVAE class: My_CVAE_FC') self.network_pred_cvae = My_CVAE_FC( self.param['feature_size'] * self.last_size * self.last_size, self.param['feature_size'] * self.last_size * self.last_size, latent_size=256, hidden_size=1024) elif cvae_arch == 'conv' or cvae_arch == 'conv_a': # Conv 1x1 version A print( 'Using CVAE class: My_CVAE_Conv1x1_A (latent=64x4x4, hidden=128x4x4)' ) self.network_pred_cvae = My_CVAE_Conv1x1( self.param['feature_size'], self.param['feature_size'], latent_size=64, hidden_size=128) elif cvae_arch == 'conv_b': # Conv 1x1 version B (smaller latent dimension) print( 'Using CVAE class: My_CVAE_Conv1x1_B (latent=16x4x4, hidden=128x4x4)' ) self.network_pred_cvae = My_CVAE_Conv1x1( self.param['feature_size'], self.param['feature_size'], latent_size=16, hidden_size=128) elif cvae_arch == 'conv_c': # Conv 1x1 version C (even smaller latent dimension) print( 'Using CVAE class: My_CVAE_Conv1x1_C (latent=4x4x4, hidden=128x4x4)' ) self.network_pred_cvae = My_CVAE_Conv1x1( self.param['feature_size'], self.param['feature_size'], latent_size=4, hidden_size=128) elif cvae_arch == 'conv_d': # Conv + FC version D (global latent space) print( 'Using CVAE class: My_CVAE_ConvFC (latent=8x1x1, hidden=256x4x4)' ) self.network_pred_cvae = My_CVAE_ConvFC( self.param['feature_size'], self.param['feature_size'], latent_size=8, hidden_size=256, spatial_size=self.last_size) elif cvae_arch == 'conv_e': # Conv + FC version E (global latent space, size 16) print( 'Using CVAE class: My_CVAE_ConvFC (latent=16x1x1, hidden=256x4x4)' ) self.network_pred_cvae = My_CVAE_ConvFC( self.param['feature_size'], self.param['feature_size'], latent_size=16, hidden_size=256, spatial_size=self.last_size) elif cvae_arch == 'vrnn_a': print('Using VRNN class: My_VRNN_A') else: raise Exception('CVAE architecture not recognized: ' + cvae_arch) self.mask = None self.relu = nn.ReLU(inplace=False) self._initialize_weights(self.agg) self._initialize_weights(self.network_pred_cvae) self.action_cls_head = action_cls_head if action_cls_head: # See eval/model_3d_lc.py if isinstance(num_class, int): num_class = [num_class] # singleton to simplify code assert (isinstance(num_class, list)) self.num_class = num_class self.final_bn = nn.BatchNorm1d(self.param['feature_size']) self.final_bn.weight.data.fill_(1) self.final_bn.bias.data.zero_() self.final_fc = [] for cur_num_cls in num_class: cur_fc = nn.Sequential( nn.Dropout(dropout), nn.Linear(self.param['feature_size'], cur_num_cls)) self._initialize_weights(cur_fc) self.final_fc.append(cur_fc) self.final_fc = nn.ModuleList( self.final_fc) # IMPORTANT, otherwise pytorch won't register