def __init__(self, hidden_dim=512, rel_visual_dim=4096, rel_pos_inp_dim=6, rel_pos_dim=256, dropout_rate=0.2, nl_ranking_layer=4, order='leftright', sal_input='both'): super(RankingContext, self).__init__() self.hidden_dim = hidden_dim self.rel_pair_dim = rel_visual_dim self.rel_pos_inp_dim = rel_pos_inp_dim self.rel_pos_dim = rel_pos_dim self.dropout_rate = dropout_rate assert order in ('size', 'confidence', 'random', 'leftright') self.order = order self.nl_ranking_layer = nl_ranking_layer self.pos_proj = nn.Linear(self.rel_pos_inp_dim, self.rel_pos_dim) self.ranking_ctx_rnn = AlternatingHighwayLSTM( input_size=self.rel_pair_dim + self.rel_pos_dim, hidden_size=self.hidden_dim, num_layers=self.nl_ranking_layer, recurrent_dropout_probability=dropout_rate) assert sal_input in ('both', 'sal', 'area', 'empty') self.sal_input = sal_input
def __init__(self, classes, rel_classes, mode='sgdet', embed_dim=200, hidden_dim=256, obj_dim=2048, nl_obj=2, nl_edge=2, dropout_rate=0.2, order='confidence', pass_in_obj_feats_to_decoder=True, pass_in_obj_feats_to_edge=True): super(LinearizedContext, self).__init__() self.classes = classes self.rel_classes = rel_classes assert mode in MODES self.mode = mode self.nl_obj = nl_obj self.nl_edge = nl_edge self.embed_dim = embed_dim self.hidden_dim = hidden_dim self.obj_dim = obj_dim self.dropout_rate = dropout_rate self.pass_in_obj_feats_to_decoder = pass_in_obj_feats_to_decoder self.pass_in_obj_feats_to_edge = pass_in_obj_feats_to_edge assert order in ('size', 'confidence', 'random', 'leftright') self.order = order # print('LIN CONTEXT : Start') # EMBEDDINGS embed_vecs = obj_edge_vectors(self.classes, wv_dim=self.embed_dim) self.obj_embed = nn.Embedding(self.num_classes, self.embed_dim) self.obj_embed.weight.data = embed_vecs.clone() self.obj_embed2 = nn.Embedding(self.num_classes, self.embed_dim) self.obj_embed2.weight.data = embed_vecs.clone() # print('LIN CONTEXT : 0') # This probably doesn't help it much self.pos_embed = nn.Sequential(*[ nn.BatchNorm1d(4, momentum=BATCHNORM_MOMENTUM / 10.0), nn.Linear(4, 128), nn.ReLU(inplace=True), nn.Dropout(0.1), ]) # print('LIN CONTEXT : 1') if self.nl_obj > 0: # print('LIN CONTEXT : 1.1') self.obj_ctx_rnn = AlternatingHighwayLSTM( input_size=self.obj_dim+self.embed_dim+128, hidden_size=self.hidden_dim, num_layers=self.nl_obj, recurrent_dropout_probability=dropout_rate) # print('LIN CONTEXT : 1.5') decoder_inputs_dim = self.hidden_dim if self.pass_in_obj_feats_to_decoder: decoder_inputs_dim += self.obj_dim + self.embed_dim self.decoder_rnn = DecoderRNN(self.classes, embed_dim=self.embed_dim, inputs_dim=decoder_inputs_dim, hidden_dim=self.hidden_dim, recurrent_dropout_probability=dropout_rate) else: self.decoder_lin = nn.Linear(self.obj_dim + self.embed_dim + 128, self.num_classes) # print('LIN CONTEXT : 2') if self.nl_edge > 0: input_dim = self.embed_dim if self.nl_obj > 0: input_dim += self.hidden_dim if self.pass_in_obj_feats_to_edge: input_dim += self.obj_dim self.edge_ctx_rnn = AlternatingHighwayLSTM(input_size=input_dim, hidden_size=self.hidden_dim, num_layers=self.nl_edge, recurrent_dropout_probability=dropout_rate)