Exemplo n.º 1
0
    def __getitem__(self, index):
        metainfo = GView(self.get_metainfo(index))
        feed_dict = GView()

        # scene annotations
        if self.incl_scene:
            feed_dict.scene = metainfo.scene
            feed_dict.update(gdef.annotate_objects(metainfo.scene))
            feed_dict.objects_raw = feed_dict.objects.copy()
            feed_dict.update(gdef.annotate_scene(metainfo.scene))

        # image
        feed_dict.image_index = metainfo.image_index
        feed_dict.image_filename = metainfo.image_filename
        # video_folder
        feed_dict.video_folder = metainfo.video_folder
        feed_dict.video = []

        if self.image_root is not None:
            feed_dict.image = Image.open(
                osp.join(self.image_root,
                         feed_dict.image_filename)).convert("RGB")
            feed_dict.image, feed_dict.objects = self.image_transform(
                feed_dict.image, feed_dict.objects)

        if self.image_root is not None:
            import glob

            print("Got video 3")
            for name in glob.glob(
                    osp.join(self.image_root, feed_dict.video_folder) +
                    "/*.png"):
                feed_dict.video += [Image.open(name).convert("RGB")]

        return feed_dict.raw()
Exemplo n.º 2
0
    def __getitem__(self, index):
        metainfo = GView(self.get_metainfo(index))
        feed_dict = GView()

        # scene annotations
        if self.incl_scene:
            feed_dict.scene = metainfo.scene
            feed_dict.update(gdef.annotate_objects(metainfo.scene))
            feed_dict.objects_raw = feed_dict.objects.copy()
            feed_dict.update(gdef.annotate_scene(metainfo.scene))

        # image
        feed_dict.image_index = metainfo.image_index
        feed_dict.image_filename = metainfo.image_filename
        if self.image_root is not None:
            feed_dict.image = Image.open(osp.join(self.image_root, feed_dict.image_filename)).convert('RGB')
            feed_dict.image, feed_dict.objects = self.image_transform(feed_dict.image, feed_dict.objects)

        # program
        feed_dict.program_raw = metainfo.program_raw
        feed_dict.program_seq = metainfo.program_seq
        feed_dict.program_tree = metainfo.program_tree
        feed_dict.program_qsseq = metainfo.program_qsseq
        feed_dict.program_qstree = metainfo.program_qstree
        feed_dict.question_type = metainfo.question_type

        # question
        feed_dict.answer = True

        return feed_dict.raw()
Exemplo n.º 3
0
    def __getitem__(self, index):
        metainfo = GView(self.get_metainfo(index))
        feed_dict = GView()

        # scene annotations
        if self.incl_scene:
            feed_dict.scene = metainfo.scene
            feed_dict.update(gdef.annotate_objects(metainfo.scene))
            feed_dict.objects_raw = feed_dict.objects.copy()
            feed_dict.update(gdef.annotate_scene(metainfo.scene))

        # image
        feed_dict.image_index = metainfo.image_index
        feed_dict.image_filename = metainfo.image_filename

        # video
        feed_dict.video_folder = metainfo.video_folder
        video = []
        original_objects = feed_dict.objects
        if self.image_root is not None:
            feed_dict.image = Image.open(
                osp.join(self.image_root,
                         feed_dict.image_filename)).convert("RGB")
            feed_dict.image, feed_dict.objects = self.image_transform(
                feed_dict.image, feed_dict.objects)

        if self.image_root is not None and feed_dict.video_folder is not None:
            import glob

            for name in glob.glob(
                    osp.join(self.image_root, feed_dict.video_folder) +
                    "/*.png"):
                image = Image.open(name).convert("RGB")
                image, _ = self.image_transform(image, original_objects)
                video += [image]

            feed_dict.video = torch.cat(video)

        # program
        feed_dict.program_raw = metainfo.program_raw
        feed_dict.program_seq = metainfo.program_seq
        feed_dict.program_tree = metainfo.program_tree
        feed_dict.program_qsseq = metainfo.program_qsseq
        feed_dict.program_qstree = metainfo.program_qstree
        feed_dict.question_type = metainfo.question_type

        # question
        feed_dict.answer = True

        return feed_dict.raw()
Exemplo n.º 4
0
    def __getitem__(self, index):
        metainfo = GView(self.get_metainfo(index))
        feed_dict = GView()

        # scene annotations
        if self.incl_scene:
            feed_dict.scene = metainfo.scene
            feed_dict.update(gdef.annotate_objects(metainfo.scene))
            feed_dict.objects_raw = feed_dict.objects.copy()
            feed_dict.update(gdef.annotate_scene(metainfo.scene))

        # image
        feed_dict.image_index = metainfo.image_index
        feed_dict.image_filename = metainfo.image_filename
        if self.image_root is not None:
            feed_dict.image = Image.open(osp.join(self.image_root, feed_dict.image_filename)).convert('RGB')
            feed_dict.image, feed_dict.objects = self.image_transform(feed_dict.image, feed_dict.objects)

        return feed_dict.raw()
Exemplo n.º 5
0
    def __getitem__(self, index):
        # index = index % 200
        metainfo = GView(self.get_metainfo(index))
        metainfo.view_id = 1
        feed_dict = GView()
        feed_dict.scene = metainfo.scene
        feed_dict.attribute_name = "shape"
        feed_dict.concept_name = metainfo.scene["objects"][0][feed_dict.attribute_name]
        if self.incl_scene:
            feed_dict.scene = metainfo.scene
            feed_dict.update(gdef.annotate_objects(metainfo.scene))
            if "objects" in feed_dict:
                # NB(Jiayuan Mao): in some datasets, object information might be completely unavailable.
                feed_dict.objects_raw = feed_dict.objects.copy()
            feed_dict.update(gdef.annotate_scene(metainfo.scene))

        # image
        feed_dict.image_index = metainfo.image_index
        feed_dict.image_filename = metainfo.image_filename
        if self.image_root is not None and feed_dict.image_filename is not None:
            feed_dict.image = Image.open(
                osp.join(self.image_root, feed_dict.image_filename)
            ).convert("RGB")
            feed_dict.image, feed_dict.objects = self.image_transform(
                feed_dict.image, feed_dict.objects
            )
        if self.depth_root is not None and feed_dict.image_filename is not None:
            depth_filename = feed_dict.image_filename.split(".")[0] + ".exr"
            feed_dict.depth = torch.tensor(
                load_depth(osp.join(self.depth_root, depth_filename))
            )
        # program

        # Scene
        # feed_dict.bboxes = torch.tensor(feed_dict.scene["obj_bboxes"][0]).reshape(-1, 9)
        # # feed_dict.bboxes_len = torch.tensor(feed_dict.bboxes.size(0))
        # feed_dict.pix_T_cam = torch.tensor(metainfo.scene["pix_T_cams"]).float()
        # feed_dict.origin_T_cam = torch.tensor(
        #     metainfo.scene["origin_T_cams"][metainfo.view_id]
        # ).float()
        return feed_dict.raw()
Exemplo n.º 6
0
    def __getitem__(self, index):
        metainfo = GView(self.get_metainfo(index))
        feed_dict = GView()

        # metainfo annotations
        if self.incl_scene:
            feed_dict.scene = metainfo.scene
            feed_dict.update(gdef.annotate_objects(metainfo.scene))
            if "objects" in feed_dict:
                # NB(Jiayuan Mao): in some datasets, object information might be completely unavailable.
                feed_dict.objects_raw = feed_dict.objects.copy()
            feed_dict.update(gdef.annotate_scene(metainfo.scene))

        # image
        feed_dict.image_index = metainfo.image_index
        feed_dict.image_filename = metainfo.image_filename
        if self.image_root is not None and feed_dict.image_filename is not None:
            feed_dict.image = Image.open(
                osp.join(self.image_root, feed_dict.image_filename)
            ).convert("RGB")
            feed_dict.image, feed_dict.objects = self.image_transform(
                feed_dict.image, feed_dict.objects
            )
        if self.depth_root is not None and feed_dict.image_filename is not None:
            depth_filename = feed_dict.image_filename.split(".")[0] + ".exr"
            feed_dict.depth = torch.tensor(
                load_depth(osp.join(self.depth_root, depth_filename))
            )

        # program
        if "program_raw" in metainfo:
            feed_dict.program_raw = metainfo.program_raw
            feed_dict.program_seq = metainfo.program_seq
            feed_dict.program_tree = metainfo.program_tree
            feed_dict.program_qsseq = metainfo.program_qsseq
            feed_dict.program_qstree = metainfo.program_qstree
        feed_dict.question_type = metainfo.question_type

        # question
        feed_dict.question_index = metainfo.question_index
        feed_dict.question_raw = metainfo.question
        feed_dict.question_raw_tokenized = metainfo.question_tokenized
        feed_dict.question_metainfo = gdef.annotate_question_metainfo(metainfo)
        feed_dict.question = metainfo.question_tokenized
        feed_dict.answer = gdef.canonize_answer(metainfo.answer, metainfo.question_type)
        feed_dict.update(gdef.annotate_question(metainfo))

        if self.question_transform is not None:
            self.question_transform(feed_dict)
        feed_dict.question = np.array(
            self.vocab.map_sequence(feed_dict.question), dtype="int64"
        )

        return feed_dict.raw()
Exemplo n.º 7
0
    def __getitem__(self, index):
        metainfo = GView(self.get_metainfo(index))
        feed_dict = GView()

        # metainfo annotations
        if self.incl_scene:
            feed_dict.scene = metainfo.scene
            feed_dict.update(gdef.annotate_objects(metainfo.scene))
            if "objects" in feed_dict:
                # NB(Jiayuan Mao): in some datasets, object information might be completely unavailable.
                feed_dict.objects_raw = feed_dict.objects.copy()
            feed_dict.update(gdef.annotate_scene(metainfo.scene))

        # image
        feed_dict.image_index = metainfo.image_index
        feed_dict.image_filename = metainfo.image_filename
        # video
        feed_dict.video_folder = metainfo.video_folder
        video = []
        original_objects = feed_dict.objects
        if self.image_root is not None and feed_dict.image_filename is not None:
            feed_dict.image = Image.open(
                osp.join(self.image_root,
                         feed_dict.image_filename)).convert("RGB")
            feed_dict.image, feed_dict.objects = self.image_transform(
                feed_dict.image, feed_dict.objects)

            # print("Image:", feed_dict.image.shape)
            # print(feed_dict.objects)

        if self.image_root is not None and feed_dict.video_folder is not None:
            import glob

            for name in glob.glob(
                    osp.join(self.image_root, feed_dict.video_folder) +
                    "/*.png"):
                image = Image.open(name).convert("RGB")
                image, _ = self.image_transform(image, original_objects)
                video += [image]

            feed_dict.video = torch.stack(video)

            # Tensor
            # print("Video:", feed_dict.video.shape)

        # program
        if "program_raw" in metainfo:
            feed_dict.program_raw = metainfo.program_raw
            feed_dict.program_seq = metainfo.program_seq
            feed_dict.program_tree = metainfo.program_tree
            feed_dict.program_qsseq = metainfo.program_qsseq
            feed_dict.program_qstree = metainfo.program_qstree
        feed_dict.question_type = metainfo.question_type

        # question
        feed_dict.question_index = metainfo.question_index
        feed_dict.question_raw = metainfo.question
        feed_dict.question_raw_tokenized = metainfo.question_tokenized
        feed_dict.question_metainfo = gdef.annotate_question_metainfo(metainfo)
        feed_dict.question = metainfo.question_tokenized
        feed_dict.answer = gdef.canonize_answer(metainfo.answer,
                                                metainfo.question_type)
        feed_dict.update(gdef.annotate_question(metainfo))

        if self.question_transform is not None:
            self.question_transform(feed_dict)
        feed_dict.question = np.array(self.vocab.map_sequence(
            feed_dict.question),
                                      dtype="int64")

        return feed_dict.raw()
Exemplo n.º 8
0
class ForwardContext(object):
    def __init__(self, training, *, loss=0, monitors=None, output_dict=None):
        self.training = training
        self.loss = loss
        self.monitors = GView(monitors)
        self.output_dict = GView(output_dict)
        self.hyperparameters = dict()

    def set_hyperparameter(self, key, value):
        self.hyperparameters[key] = value

    def get_hyperparameter(self, key, default=None):
        return self.hyperparameters.get(key, default=default)

    def add_loss(self, loss, key=None, accumulate=True):
        if float(accumulate) > 0:
            self.loss = self.loss + loss * float(accumulate)

        if key is not None:
            if f'loss/{key}' in self.monitors:
                self.monitors[f'loss/{key}'] += float(loss)
            else:
                self.monitors[f'loss/{key}'] = float(loss)
        return self

    def add_accuracy(self, accuracy, key):
        self.monitors[f'accuracy/{key}'] = float(accuracy)
        return self

    def add_output(self, output, key):
        self.output_dict[key] = output
        return self

    def update_monitors(self, monitors):
        self.monitors.update(monitors)
        return self

    def update_mo(self, monitors, output_dict):
        self.monitors.update(monitors)
        self.output_dict.update(output_dict)
        return self

    binary_classification_accuracy = _wrap_monitor_function(
        monitor.binary_classification_accuracy)
    classification_accuracy = _wrap_monitor_function(
        monitor.classification_accuracy)
    regression_accuracy = _wrap_monitor_function(monitor.regression_accuracy)
    monitor_rms = _wrap_monitor_function(monitor.monitor_rms)
    monitor_param_saturation = _wrap_monitor_function(
        monitor.monitor_param_saturation)
    monitor_param_rms = _wrap_monitor_function(monitor.monitor_param_rms)
    monitor_param_gradrms = _wrap_monitor_function(
        monitor.monitor_param_gradrms)
    monitor_param_gradrms_ratio = _wrap_monitor_function(
        monitor.monitor_param_gradrms_ratio)

    @wrap_custom_as_default(is_local=True)
    def as_default(self) -> 'ForwardContext':
        yield self

    def finalize(self):
        if self.training:
            return self.loss, self.monitors, self.output_dict
        else:
            self.output_dict.monitors = self.monitors
            return self.output_dict