Exemplo n.º 1
0
    def __init__(self,
                 hidden_dim=512,
                 rel_visual_dim=4096,
                 rel_pos_inp_dim=6,
                 rel_pos_dim=256,
                 dropout_rate=0.2,
                 nl_ranking_layer=4,
                 order='leftright',
                 sal_input='both'):
        super(RankingContext, self).__init__()

        self.hidden_dim = hidden_dim
        self.rel_pair_dim = rel_visual_dim
        self.rel_pos_inp_dim = rel_pos_inp_dim
        self.rel_pos_dim = rel_pos_dim

        self.dropout_rate = dropout_rate

        assert order in ('size', 'confidence', 'random', 'leftright')
        self.order = order
        self.nl_ranking_layer = nl_ranking_layer

        self.pos_proj = nn.Linear(self.rel_pos_inp_dim, self.rel_pos_dim)

        self.ranking_ctx_rnn = AlternatingHighwayLSTM(
            input_size=self.rel_pair_dim + self.rel_pos_dim,
            hidden_size=self.hidden_dim,
            num_layers=self.nl_ranking_layer,
            recurrent_dropout_probability=dropout_rate)

        assert sal_input in ('both', 'sal', 'area', 'empty')
        self.sal_input = sal_input
Exemplo n.º 2
0
    def __init__(self, classes, rel_classes, mode='sgdet',
                 embed_dim=200, hidden_dim=256, obj_dim=2048,
                 nl_obj=2, nl_edge=2, dropout_rate=0.2, order='confidence',
                 pass_in_obj_feats_to_decoder=True,
                 pass_in_obj_feats_to_edge=True):
        super(LinearizedContext, self).__init__()
        self.classes = classes
        self.rel_classes = rel_classes
        assert mode in MODES
        self.mode = mode

        self.nl_obj = nl_obj
        self.nl_edge = nl_edge

        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        self.obj_dim = obj_dim
        self.dropout_rate = dropout_rate
        self.pass_in_obj_feats_to_decoder = pass_in_obj_feats_to_decoder
        self.pass_in_obj_feats_to_edge = pass_in_obj_feats_to_edge

        assert order in ('size', 'confidence', 'random', 'leftright')
        self.order = order
        # print('LIN CONTEXT : Start')
        # EMBEDDINGS
        embed_vecs = obj_edge_vectors(self.classes, wv_dim=self.embed_dim)
        self.obj_embed = nn.Embedding(self.num_classes, self.embed_dim)
        self.obj_embed.weight.data = embed_vecs.clone()

        self.obj_embed2 = nn.Embedding(self.num_classes, self.embed_dim)
        self.obj_embed2.weight.data = embed_vecs.clone()
        # print('LIN CONTEXT : 0')
        # This probably doesn't help it much
        self.pos_embed = nn.Sequential(*[
            nn.BatchNorm1d(4, momentum=BATCHNORM_MOMENTUM / 10.0),
            nn.Linear(4, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.1),
        ])
        # print('LIN CONTEXT : 1')
        if self.nl_obj > 0:
            # print('LIN CONTEXT : 1.1')
            self.obj_ctx_rnn = AlternatingHighwayLSTM(
                input_size=self.obj_dim+self.embed_dim+128,
                hidden_size=self.hidden_dim,
                num_layers=self.nl_obj,
                recurrent_dropout_probability=dropout_rate)
            # print('LIN CONTEXT : 1.5')
            decoder_inputs_dim = self.hidden_dim
            if self.pass_in_obj_feats_to_decoder:
                decoder_inputs_dim += self.obj_dim + self.embed_dim
            self.decoder_rnn = DecoderRNN(self.classes, embed_dim=self.embed_dim,
                                          inputs_dim=decoder_inputs_dim,
                                          hidden_dim=self.hidden_dim,
                                          recurrent_dropout_probability=dropout_rate)
        else:
            self.decoder_lin = nn.Linear(self.obj_dim + self.embed_dim + 128, self.num_classes)
        # print('LIN CONTEXT : 2')
        if self.nl_edge > 0:
            input_dim = self.embed_dim
            if self.nl_obj > 0:
                input_dim += self.hidden_dim
            if self.pass_in_obj_feats_to_edge:
                input_dim += self.obj_dim
            self.edge_ctx_rnn = AlternatingHighwayLSTM(input_size=input_dim,
                                                       hidden_size=self.hidden_dim,
                                                       num_layers=self.nl_edge,
                                                       recurrent_dropout_probability=dropout_rate)