Exemplo n.º 1
0
    def _compute_vert_context_soft(self, edge_factor, vert_factor, reuse=False):
        """
        attention-based vertex(node) message pooling
        """

        out_edge = utils.pad_and_gather(edge_factor, self.edge_pair_mask_inds[:,0])
        in_edge = utils.pad_and_gather(edge_factor, self.edge_pair_mask_inds[:,1])
        # gather correspounding vert factors
        vert_factor_gathered = tf.gather(vert_factor, self.edge_pair_segment_inds)

        # concat outgoing edges and ingoing edges with gathered vert_factors
        out_edge_w_input = tf.concat(concat_dim=1, values=[out_edge, vert_factor_gathered])
        in_edge_w_input = tf.concat(concat_dim=1, values=[in_edge, vert_factor_gathered])

        # compute compatibility scores
        (self.feed(out_edge_w_input)
             .fc(1, relu=False, reuse=reuse, name='out_edge_w_fc')
             .sigmoid(name='out_edge_score'))
        (self.feed(in_edge_w_input)
             .fc(1, relu=False, reuse=reuse, name='in_edge_w_fc')
             .sigmoid(name='in_edge_score'))

        out_edge_w = self.get_output('out_edge_score')
        in_edge_w = self.get_output('in_edge_score')

        # weight the edge factors with computed weigths
        out_edge_weighted = tf.mul(out_edge, out_edge_w)
        in_edge_weighted = tf.mul(in_edge, in_edge_w)


        edge_sum = out_edge_weighted + in_edge_weighted
        vert_ctx = tf.segment_sum(edge_sum, self.edge_pair_segment_inds)
        return vert_ctx
Exemplo n.º 2
0
    def _compute_vert_context_fus(self, edge_factor, vert_factor, reuse=False):
        """
        attention-based vertex(node) message pooling
        also collecting messages from the detections of this object in other frames
        """
        out_edge = utils.pad_and_gather(
            edge_factor, self.edge_pair_mask_inds[:, 0]
        )  # from rel_pair_segment_inds and contains the indices of the relations which are going out
        in_edge = utils.pad_and_gather(
            edge_factor, self.edge_pair_mask_inds[:, 1]
        )  # if I have 5 objects, each of them is connected to 4 objects, so 5 times 4, edge_pair_mask_inds has size 20, but there is 25, and this 20 coming every time. dans data_utils, there is this pad with pad with dummy edge ind
        # gather correspounding vert factors
        vert_factor_gathered = tf.gather(
            vert_factor, self.edge_pair_segment_inds
        )  # in Eq.3 you compute sum_j v_1*[h_i,hi->j]hi->j. So you want to concatenate each h_i with hi->j. Here, you are building the matrix vert_factor_gathered, contains the h_i part. Later you jus have to concatenate this matrix with out_edge which contains the hi->j. vert_factor_gathered will contains all the hi. vert_factor_gathered is also used to concatenate with in_edge, because self.edge_pair_segment_inds[k] is build to be the segment id corresponding to self.edge_pair_mask_inds[k,:]

        vert_factor_matched = utils.pad_and_gather(
            vert_factor, self.obj_fus_mask_inds[:]
        )  # This tells where is this object matched in the other frames, and select the corresponding features
        vert_factor_gathered_vid = tf.gather(
            vert_factor, self.obj_fus_segment_inds
        )  # instead of having number of output +1 I need to have the number of frames

        # concat outgoing edges and ingoing edges with gathered vert_factors
        out_edge_w_input = tf.concat(concat_dim=1,
                                     values=[out_edge, vert_factor_gathered])
        in_edge_w_input = tf.concat(concat_dim=1,
                                    values=[in_edge, vert_factor_gathered])

        vid_edge_w_input = tf.concat(
            concat_dim=1,
            values=[vert_factor_matched, vert_factor_gathered_vid])
        # compute compatibility scores
        (self.feed(out_edge_w_input).fc(
            1, relu=False, reuse=reuse,
            name='out_edge_w_fc').sigmoid(name='out_edge_score'))
        (self.feed(in_edge_w_input).fc(
            1, relu=False, reuse=reuse,
            name='in_edge_w_fc').sigmoid(name='in_edge_score'))

        (self.feed(vid_edge_w_input).fc(
            1, relu=False, reuse=reuse,
            name='vid_vert_w_fc').sigmoid(name='vid_vert_score'))

        out_edge_w = self.get_output('out_edge_score')
        in_edge_w = self.get_output('in_edge_score')
        vid_vert_w = self.get_output('vid_vert_score')

        # weight the edge factors with computed weigths
        out_edge_weighted = tf.mul(out_edge, out_edge_w)
        in_edge_weighted = tf.mul(in_edge, in_edge_w)
        vid_vert_weighted = tf.mul(vert_factor_matched, vid_vert_w)

        edge_sum = out_edge_weighted + in_edge_weighted

        vert_ctx = tf.segment_sum(
            edge_sum, self.edge_pair_segment_inds) + tf.segment_sum(
                vid_vert_weighted, self.obj_fus_segment_inds)
        return vert_ctx
Exemplo n.º 3
0
    def _compute_vert_context_soft(self,
                                   edge_factor,
                                   vert_factor,
                                   reuse=False):
        """
        attention-based vertex(node) message pooling
        """
        out_edge = utils.pad_and_gather(
            edge_factor, self.edge_pair_mask_inds[:, 0]
        )  # from rel_pair_segment_inds and contains the indices of the relations which are going out
        in_edge = utils.pad_and_gather(
            edge_factor, self.edge_pair_mask_inds[:, 1]
        )  # 100 x 512 i.e. edge_factor[self.edge_pair_mask_inds[:,1],:] self.edge_pair_mask_inds[:,1] is 100,1
        # gather correspounding vert factors
        vert_factor_gathered = tf.gather(
            vert_factor, self.edge_pair_segment_inds
        )  #  will tell you which vert_factor correspond to which segment id maybe?

        # concat outgoing edges and ingoing edges with gathered vert_factors
        out_edge_w_input = tf.concat(concat_dim=1,
                                     values=[out_edge, vert_factor_gathered
                                             ])  # 100 x 1024
        in_edge_w_input = tf.concat(concat_dim=1,
                                    values=[in_edge, vert_factor_gathered])

        # compute compatibility scores
        (self.feed(out_edge_w_input).fc(
            1, relu=False, reuse=reuse,
            name='out_edge_w_fc').sigmoid(name='out_edge_score'))
        (self.feed(in_edge_w_input).fc(
            1, relu=False, reuse=reuse,
            name='in_edge_w_fc').sigmoid(name='in_edge_score'))

        out_edge_w = self.get_output('out_edge_score')
        in_edge_w = self.get_output('in_edge_score')

        # weight the edge factors with computed weigths
        out_edge_weighted = tf.mul(out_edge, out_edge_w)
        in_edge_weighted = tf.mul(in_edge, in_edge_w)

        edge_sum = out_edge_weighted + in_edge_weighted
        vert_ctx = tf.segment_sum(edge_sum, self.edge_pair_segment_inds)
        return vert_ctx
Exemplo n.º 4
0
    def _compute_vert_context_hard(self, edge_factor, vert_factor, reduction_mode='max'):
        """
        max or average message pooling
        """
        edge_factor_gathered = utils.pad_and_gather(edge_factor, self.edge_mask_inds, None)

        vert_ctx = utils.padded_segment_reduce(edge_factor_gathered, self.edge_segment_inds,
                                               vert_factor.get_shape()[0], reduction_mode)

        return vert_ctx
Exemplo n.º 5
0
    def _compute_edge_context_3d_fus(self,
                                     vert_factor,
                                     edge_factor,
                                     reuse=False):
        """
        attention-based edge message pooling
        this output will be the input edge_ctx of _edge_rnn_forward(edge_ctx, ...
        edge_factor and vert_factor are gru_cell
        """
        vert_pairs = utils.gather_vec_pairs(
            vert_factor, self.relations
        )  #Tensor("Reshape:0", shape=(?, 1024), dtype=float32) if relation is [24 2], vert_pairs is [24 2*512] so I guess the corresponding object feature for each relation
        geo_pairs = utils.gather_vec_pairs(
            self.geo_state, self.relations
        )  #utils.gather_vec_pairs(self.geo_state, self.relations)
        self.geo_pairs = geo_pairs

        edge_factor_matched = utils.pad_and_gather(
            edge_factor, self.rel_fus_mask_inds[:]
        )  # getting the output for this relation in every frame
        edge_factor_gathered_vid = tf.gather(
            edge_factor, self.rel_fus_segment_inds
        )  # getting the same but in other order. This way it is just a matter of concatenate and sum
        vid_edge_w_input = tf.concat(
            concat_dim=1,
            values=[edge_factor_matched, edge_factor_gathered_vid])

        sub_vert, obj_vert = tf.split(
            split_dim=1, num_split=2, value=vert_pairs
        )  # I guess putting the in (subject) on one part and the out (object) on the other part, because subject verb object, so each is 24 z 512
        sub_geo, obj_geo = tf.split(split_dim=1, num_split=2, value=geo_pairs)
        sub_vert_w_input = tf.concat(concat_dim=1,
                                     values=[sub_vert, edge_factor])
        obj_vert_w_input = tf.concat(concat_dim=1,
                                     values=[obj_vert, edge_factor])

        # compute compatibility scores
        (self.feed(sub_vert_w_input).fc(
            1, relu=False, reuse=reuse,
            name='sub_vert_w_fc').sigmoid(name='sub_vert_score')
         )  # passing the input messages from the relation to a fc layer
        (self.feed(obj_vert_w_input).fc(
            1, relu=False, reuse=reuse,
            name='obj_vert_w_fc').sigmoid(name='obj_vert_score')
         )  # passing the previous state of the object node to a fc layer

        # encoding the geomtery of the relation
        geo_pairs = self.norm_geo_pairs(geo_pairs, sc=500)
        #sub_geo, obj_geo = tf.split(split_dim=1, num_split=2, value=geo_pairs)
        #geo_conc = tf.concat(concat_dim=1, values=[sub_geo, obj_geo])
        (self.feed(geo_pairs).fc(
            100, relu=False, reuse=reuse,
            name='geo_fc1_obj').sigmoid(name='geo_encoded_pre').fc(
                512, relu=False, reuse=reuse,
                name='geo_fc2_obj').sigmoid(name='geo_encoded')
         )  # passing the previous state of the object node to a fc layer
        geo_encoded = self.get_output('geo_encoded')

        # then concatenating wiht the relation feature (edge_factor) and getting a weight out of that
        geo_encoded_w_input = tf.concat(concat_dim=1,
                                        values=[geo_encoded, edge_factor])
        (self.feed(geo_encoded_w_input).fc(
            1, relu=False, reuse=reuse,
            name='geo_w_fc').sigmoid(name='geo_vert_score')
         )  # passing the previous state of the object node to a fc layer

        # will compute weigts from the messages in each relations
        (self.feed(vid_edge_w_input).fc(
            1, relu=False, reuse=reuse,
            name='vid_edge_w_fc').sigmoid(name='vid_edge_score'))

        sub_vert_w = self.get_output(
            'sub_vert_score'
        )  #this guy is 24 x 1, and the relations vector is 24,which count for the 4 objects present in the 2 images.
        obj_vert_w = self.get_output('obj_vert_score')
        geo_vert_w = self.get_output('geo_vert_score')
        vid_edge_w = self.get_output('vid_edge_score')

        weighted_sub = tf.mul(sub_vert, sub_vert_w)
        weighted_obj = tf.mul(obj_vert, obj_vert_w)
        weighted_geo = tf.mul(geo_encoded, geo_vert_w)
        vid_edge_weighted = tf.mul(edge_factor_matched, vid_edge_w)
        return weighted_sub + weighted_obj + weighted_geo + tf.segment_sum(
            vid_edge_weighted, self.rel_fus_segment_inds)