Exemplo n.º 1
0
    def __init__(self, vocab, configs):
        super().__init__()
        self.vocab = vocab

        #pdb.set_trace()

        import jactorch.models.vision.resnet as resnet
        self.resnet = resnet.resnet34(pretrained=True,
                                      incl_gap=False,
                                      num_classes=None)
        self.resnet.layer4 = jacnn.Identity()

        import nscl.nn.scene_graph.scene_graph as sng
        # number of channels = 256; downsample rate = 16.
        self.scene_graph = sng.SceneGraph(256, configs.model.sg_dims, 16)

        import nscl.nn.reasoning_v1.quasi_symbolic as qs
        self.reasoning = qs.DifferentiableReasoning(
            self._make_vse_concepts(configs.model.vse_large_scale,
                                    configs.model.vse_known_belong),
            self.scene_graph.output_dims, configs.model.vse_hidden_dims)

        import nscl.nn.reasoning_v1.losses as vqa_losses
        self.scene_loss = vqa_losses.SceneParsingLoss(
            gdef.all_concepts,
            add_supervision=configs.train.scene_add_supervision)
        self.qa_loss = vqa_losses.QALoss(
            add_supervision=configs.train.qa_add_supervision)
Exemplo n.º 2
0
    def build(self):

        if self.use_vision:
            import jactorch.models.vision.resnet as resnet
            self.resnet = resnet.resnet34(pretrained=True,
                                          incl_gap=False,
                                          num_classes=None)
            self.resnet.layer4 = jacnn.Identity()

            self.mlp = jacnn.MLPLayer(256 + 128 * 2, len(self.tools.answers),
                                      [512])
        else:
            self.mlp = jacnn.MLPLayer(128 * 2, len(self.tools.answers), [256])

        padding_idx = self.tools.words['<NULL>']
        self.embedding = nn.Embedding(self.num_vocab,
                                      self.dim,
                                      padding_idx=padding_idx)

        self.gru = jacnn.GRULayer(self.dim,
                                  128,
                                  1,
                                  bidirectional=True,
                                  batch_first=True,
                                  dropout=0.1)

        self.loss_fn = F.nll_loss

        if self.use_lm:
            self.gru_dropout = nn.Dropout(0.1)
            self.decode = nn.Linear(128 * 2, self.num_vocab)
            self.decode.bias.data.zero_()
            self.decode_loss = jacnn.CrossEntropyLoss(average='none')
Exemplo n.º 3
0
    def __init__(self, vocab, configs, args=None):
        super().__init__()
        self.vocab = vocab
        self.args = args
        #pdb.set_trace()

        import jactorch.models.vision.resnet as resnet
        self.resnet = resnet.resnet34(pretrained=True,
                                      incl_gap=False,
                                      num_classes=None)
        self.resnet.layer4 = jacnn.Identity()

        import clevrer.models.scene_graph as sng
        # number of channels = 256; downsample rate = 16.
        #pdb.set_trace()
        self.scene_graph = sng.SceneGraph(256,
                                          configs.model.sg_dims,
                                          16,
                                          args=configs)

        #pdb.set_trace()

        import clevrer.models.quasi_symbolic as qs
        if configs.rel_box_flag:
            self.scene_graph.output_dims[
                2] = self.scene_graph.output_dims[2] * 2
        if configs.dynamic_ftr_flag and (
                not self.args.box_only_for_collision_flag):
            self.scene_graph.output_dims[2] = self.scene_graph.output_dims[
                2] + self.scene_graph.output_dims[3] * 4
        elif configs.dynamic_ftr_flag and self.args.box_only_for_collision_flag:
            self.scene_graph.output_dims[
                2] = self.scene_graph.output_dims[3] * 4

        if self.args.box_iou_for_collision_flag:
            box_dim = 4
            self.scene_graph.output_dims[2] += int(
                self.scene_graph.output_dims[3] / box_dim)

        self.reasoning = qs.DifferentiableReasoning(
            self._make_vse_concepts(configs.model.vse_large_scale,
                                    configs.model.vse_known_belong),
            self.scene_graph.output_dims,
            configs.model.vse_hidden_dims,
            args=self.args)

        import clevrer.losses as vqa_losses
        self.scene_loss = vqa_losses.SceneParsingLoss(
            gdef.all_concepts_clevrer,
            add_supervision=configs.train.scene_add_supervision,
            args=self.args)
        self.qa_loss = vqa_losses.QALoss(
            add_supervision=configs.train.qa_add_supervision)
Exemplo n.º 4
0
    def __init__(self, configs, args=None):
        super().__init__()
        self.args = args
        configs.colli_ftr_type = args.colli_ftr_type
        import jactorch.models.vision.resnet as resnet
        self.resnet = resnet.resnet34(pretrained=True,
                                      incl_gap=False,
                                      num_classes=None)
        self.resnet.layer4 = jacnn.Identity()

        import clevrer.models.scene_graph as sng
        # number of channels = 256; downsample rate = 16.
        self.scene_graph = sng.SceneGraph(256,
                                          configs.model.sg_dims,
                                          16,
                                          args=configs)

        import clevrer.models.quasi_symbolic_v2 as qs
        ftr_dim = self.scene_graph.output_dims[3]
        box_dim = 4
        time_step = int(ftr_dim / box_dim)
        offset = time_step % self.args.smp_coll_frm_num
        seg_frm_num = int((time_step - offset) / self.args.smp_coll_frm_num)

        if configs.rel_box_flag:
            self.scene_graph.output_dims[
                2] = self.scene_graph.output_dims[2] * 2
        if configs.dynamic_ftr_flag and (
                not self.args.box_only_for_collision_flag):
            self.scene_graph.output_dims[2] = self.scene_graph.output_dims[
                2] + seg_frm_num * 4 * box_dim
        elif configs.dynamic_ftr_flag and self.args.box_only_for_collision_flag:
            self.scene_graph.output_dims[2] = seg_frm_num * 4 * box_dim

        if self.args.box_iou_for_collision_flag:
            box_dim = 4
            self.scene_graph.output_dims[2] += seg_frm_num

        self.reasoning = qs.DifferentiableReasoning(
            self._make_vse_concepts(configs.model.vse_known_belong),
            self.scene_graph.output_dims,
            configs.model.vse_hidden_dims,
            args=self.args,
            seg_frm_num=seg_frm_num)
        #pdb.set_trace()
        import clevrer.losses_v2 as vqa_losses
        self.scene_loss = vqa_losses.SceneParsingLoss(
            gdef.all_concepts_clevrer,
            add_supervision=configs.train.scene_add_supervision,
            args=self.args)
        self.qa_loss = vqa_losses.QALoss(
            add_supervision=configs.train.qa_add_supervision, args=self.args)