Пример #1
0
    def __init__(self, config, constants):

        AbstractModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]

        num_channels, image_height, image_width = 3, 3, 3

        self.text_module = ChaplotTextModule(emb_dim=32,
                                             hidden_dim=256,
                                             vocab_size=config["vocab_size"],
                                             image_height=image_height,
                                             image_width=image_width)

        self.final_module = FinalModule(self.text_module)

        if False:  # config["do_object_detection"]:
            self.landmark_names = get_all_landmark_names()
            self.object_detection_module = PixelIdentificationModule(
                num_channels=num_channels, num_objects=67)
        else:
            self.object_detection_module = None

        if torch.cuda.is_available():
            self.text_module.cuda()
            self.final_module.cuda()
            if self.object_detection_module is not None:
                self.object_detection_module.cuda()
Пример #2
0
    def parse(folder_name):

        start = time.time()
        dataset = []
        landmark_names = get_all_landmark_names()
        num_examples = len(os.listdir(folder_name))
        for i in range(0, num_examples):
            example_folder_name = folder_name + "/example_" + str(i)
            images = []
            for j in range(0, 6):
                img = scipy.misc.imread(example_folder_name +
                                        "/image_" + str(j) + ".png").swapaxes(
                                            1, 2).swapaxes(0, 1)
                images.append(img)

            with open(example_folder_name + "/data.json", 'r') as fp:
                data = json.load(fp)
                new_data = dict()
                for key in data:
                    new_data[landmark_names.index(
                        key)] = SupervisedLearningDetectTurningAngle.fix_angle(
                            data[key])
                image_concatenate = np.concatenate(images, 0)
                dataset.append((image_concatenate, new_data))

        end = time.time()
        logging.info("Parsed dataset of size %r in time % seconds",
                     len(dataset), (end - start))
        return dataset
Пример #3
0
    def __init__(self, config, constants, image_module_path):
        self.none_action = config["num_actions"]
        self.landmark_names = get_all_landmark_names()
        self.image_module = ImageRyanResnetModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)

        # TODO: load pre-trained image module from file
        # image_module_path = "path/to/saved/image/module"
        if torch.cuda.is_available():
            torch_load = torch.load
        else:
            torch_load = lambda f_: torch.load(f_,
                                               map_location=lambda s_, l_: s_)

        if image_module_path is not None:
            self.image_module.load_state_dict(torch_load(image_module_path))

        total_emb_size = constants["image_emb_dim"] * 6
        final_module = ImageRyanDetectionModule(image_module=self.image_module,
                                                image_emb_size=total_emb_size)
        self.final_module = final_module
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.final_module.cuda()
        self.image_module.init_weights()
    def parse(folder_name):

        start = time.time()
        dataset = []
        landmark_names = get_all_landmark_names()
        num_examples = len(os.listdir(folder_name))
        for i in range(0, num_examples):
            example_folder_name = folder_name + "/example_" + str(i)
            image_names = [
                file for file in os.listdir(example_folder_name)
                if file.endswith('.png')
            ]
            num_actions = len(image_names)
            for j in range(0, num_actions):
                img = scipy.misc.imread(example_folder_name +
                                        "/image_" + str(j) + ".png").swapaxes(
                                            1, 2).swapaxes(0, 1)
                with open(example_folder_name + "/data_" + str(j) + ".json",
                          'r') as fp:
                    data = json.load(fp)
                    new_data = dict()
                    for key in data:
                        new_data[landmark_names.index(key)] = data[key]
                    dataset.append((img, new_data))

        end = time.time()
        logging.info("Parsed dataset of size %r in time % seconds",
                     len(dataset), (end - start))
        return dataset
    def __init__(self, config, constants):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]

        self.image_module = ImageResnetModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)
        # self.image_module = resnet.resnet18(pretrained=True)
        # constants["image_emb_dim"] = 1000
        self.image_recurrence_module = IncrementalRecurrenceSimpleModule(
            input_emb_dim=constants["image_emb_dim"],
            output_emb_dim=constants["image_emb_dim"])
        self.action_module = ActionSimpleModule(
            num_actions=config["num_actions"],
            action_emb_size=constants["action_emb_dim"])
        total_emb_size = (2 * constants["image_emb_dim"]
                          + constants["action_emb_dim"])

        if config["do_action_prediction"]:
            self.action_prediction_module = ActionPredictionModule(
                2 * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"])
        else:
            self.action_prediction_module = None

        if config["do_temporal_autoencoding"]:
            self.temporal_autoencoder_module = TemporalAutoencoderModule(
                self.action_module, constants["image_emb_dim"], constants["action_emb_dim"], constants["image_emb_dim"])
        else:
            self.temporal_autoencoder_module = None

        if config["do_object_detection"]:
            self.landmark_names = get_all_landmark_names()
            self.object_detection_module = ObjectDetectionModule(
                image_module=self.image_module, image_emb_size=constants["image_emb_dim"], num_objects=63)
        else:
            self.object_detection_module = None

        final_module = IncrementalMultimodalRecurrentSimpleGoalImageModule(
            image_module=self.image_module,
            image_recurrence_module=self.image_recurrence_module,
            action_module=self.action_module,
            total_emb_size=total_emb_size,
            num_actions=config["num_actions"])
        self.final_module = final_module
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.image_recurrence_module.cuda()
            self.action_module.cuda()
            self.final_module.cuda()
            if self.action_prediction_module is not None:
                self.action_prediction_module.cuda()
            if self.temporal_autoencoder_module is not None:
                self.temporal_autoencoder_module.cuda()
            if self.object_detection_module is not None:
                self.object_detection_module.cuda()
Пример #6
0
    def __init__(self, config, constants):
        self.none_action = config["num_actions"]
        self.landmark_names = get_all_landmark_names()

        self.text_module = TextPointerModule(
            emb_dim=constants["word_emb_dim"],
            hidden_dim=constants["lstm_emb_dim"],
            vocab_size=config["vocab_size"])
        self.final_module = SegmentationFinalModule(
            text_module=self.text_module,
            text_emb_size=4 * constants["lstm_emb_dim"])
        if torch.cuda.is_available():
            self.text_module.cuda()
            self.final_module.cuda()
Пример #7
0
    def __init__(self, config, constants):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]
        self.image_module = UnetImageModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)
        num_channels, image_height, image_width = 64, 32, 32
        self.num_cameras = 1

        if config["use_pointer_model"]:
            raise NotImplementedError()
        else:
            self.text_module = ChaplotTextModule(
                emb_dim=32,
                hidden_dim=256,
                vocab_size=config["vocab_size"],
                image_height=image_height,
                image_width=image_width)

        if config["do_object_detection"]:
            self.landmark_names = get_all_landmark_names()
            self.object_detection_module = PixelIdentificationModule(
                num_channels=num_channels, num_objects=67)
        else:
            self.object_detection_module = None

        if config["do_goal_prediction"]:
            self.goal_prediction_module = None  # GoalPredictionModule(total_emb_size=32)
        else:
            self.goal_prediction_module = None

        self.final_module = Unet5Contextual(in_channels=num_channels,
                                            out_channels=1,
                                            embedding_size=256)

        if torch.cuda.is_available():
            self.image_module.cuda()
            self.text_module.cuda()
            self.final_module.cuda()
            if self.object_detection_module is not None:
                self.object_detection_module.cuda()
            if self.goal_prediction_module is not None:
                self.goal_prediction_module.cuda()
    def __init__(self, config, constants):
        AbstractModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]
        landmark_names = get_all_landmark_names()
        self.radius_module = RadiusModule(15)
        self.angle_module = AngleModule(48)
        self.landmark_module = LandmarkModule(63)

        self.image_module = ImageResnetModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)
        self.image_recurrence_module = RecurrenceSimpleModule(
            input_emb_dim=constants["image_emb_dim"],
            output_emb_dim=constants["image_emb_dim"])

        self.text_module = SymbolicInstructionModule(
            radius_embedding=self.radius_module,
            theta_embedding=self.angle_module,
            landmark_embedding=self.landmark_module)
        self.action_module = ActionSimpleModule(
            num_actions=config["num_actions"],
            action_emb_size=constants["action_emb_dim"])
        total_emb_size = (constants["image_emb_dim"]
                          + 32 * 4
                          + constants["action_emb_dim"])
        final_module = MultimodalRecurrentSimpleModule(
            image_module=self.image_module,
            image_recurrence_module=self.image_recurrence_module,
            text_module=self.text_module,
            action_module=self.action_module,
            total_emb_size=total_emb_size,
            num_actions=config["num_actions"])
        self.final_module = final_module
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.text_module.cuda()
            self.action_module.cuda()
            self.final_module.cuda()
            self.radius_module.cuda()
            self.angle_module.cuda()
            self.landmark_module.cuda()
Пример #9
0
 def __init__(self, config, constants):
     AbstractModel.__init__(self, config, constants)
     self.none_action = config["num_actions"]
     landmark_names = get_all_landmark_names()
     self.radius_module = RadiusModule(15)
     self.angle_module = AngleModule(48)
     self.landmark_module = LandmarkModule(63)
     self.image_module = SymbolicImageModule(
         landmark_names=landmark_names,
         radius_module=self.radius_module,
         angle_module=self.angle_module,
         landmark_module=self.landmark_module)
     if config["use_pointer_model"]:
         self.text_module = TextPointerModule(
             emb_dim=constants["word_emb_dim"],
             hidden_dim=constants["lstm_emb_dim"],
             vocab_size=config["vocab_size"])
     else:
         self.text_module = TextSimpleModule(
             emb_dim=constants["word_emb_dim"],
             hidden_dim=constants["lstm_emb_dim"],
             vocab_size=config["vocab_size"])
     self.action_module = ActionSimpleModule(
         num_actions=config["num_actions"],
         action_emb_size=constants["action_emb_dim"])
     total_emb_size = (32 * 3 * 63
                       + constants["lstm_emb_dim"]
                       + constants["action_emb_dim"])
     final_module = MultimodalSimpleModule(
         image_module=self.image_module,
         text_module=self.text_module,
         action_module=self.action_module,
         total_emb_size=total_emb_size,
         num_actions=config["num_actions"])
     self.final_module = final_module
     if torch.cuda.is_available():
         self.image_module.cuda()
         self.text_module.cuda()
         self.action_module.cuda()
         self.final_module.cuda()
         self.radius_module.cuda()
         self.angle_module.cuda()
         self.landmark_module.cuda()
Пример #10
0
    def __init__(self, config, constants, image_module_path):
        self.none_action = config["num_actions"]
        self.landmark_names = get_all_landmark_names()
        self.image_module = resnet.resnet18(pretrained=True)

        # TODO: load pre-trained image module from file
        # image_module_path = "path/to/saved/image/module"
        if torch.cuda.is_available():
            torch_load = torch.load
        else:
            torch_load = lambda f_: torch.load(f_,
                                               map_location=lambda s_, l_: s_)

        if image_module_path is not None:
            self.image_module.load_state_dict(torch_load(image_module_path))

        total_emb_size = 1000
        final_module = ImageDetectionModule(image_module=self.image_module,
                                            image_emb_size=total_emb_size)
        self.final_module = final_module
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.final_module.cuda()
 def __init__(self, config, constants):
     AbstractModel.__init__(self, config, constants)
     self.none_action = config["num_actions"]
     landmark_names = get_all_landmark_names()
     self.radius_module = RadiusModule(15)
     self.angle_module = AngleModule(48)
     self.landmark_module = LandmarkModule(63)
     self.image_module = SymbolicImageModule(
         landmark_names=landmark_names,
         radius_module=self.radius_module,
         angle_module=self.angle_module,
         landmark_module=self.landmark_module)
     self.text_module = SymbolicInstructionModule(
         radius_embedding=self.radius_module,
         theta_embedding=self.angle_module,
         landmark_embedding=self.landmark_module)
     self.action_module = ActionSimpleModule(
         num_actions=config["num_actions"],
         action_emb_size=constants["action_emb_dim"])
     total_emb_size = (32 * 3 * 63
                       + 32 * 4
                       + constants["action_emb_dim"])
     final_module = MultimodalSimpleModule(
         image_module=self.image_module,
         text_module=self.text_module,
         action_module=self.action_module,
         total_emb_size=total_emb_size,
         num_actions=config["num_actions"])
     self.final_module = final_module
     if torch.cuda.is_available():
         self.image_module.cuda()
         self.text_module.cuda()
         self.action_module.cuda()
         self.final_module.cuda()
         self.radius_module.cuda()
         self.angle_module.cuda()
         self.landmark_module.cuda()
    def __init__(self,
                 config,
                 constants,
                 final_model_type="unet",
                 final_dimension=None):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]
        self.image_module = UnetImageModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True,
            final_dimension=final_dimension)
        num_channels, image_height, image_width = self.image_module.get_final_dimension(
        )
        self.num_cameras = 1
        self.image_recurrence_module = IncrementalRecurrenceChaplotModule(
            input_emb_dim=256, output_emb_dim=256)
        if config["use_pointer_model"]:
            raise NotImplementedError()
        else:
            self.text_module = ChaplotTextModule(
                emb_dim=32,
                hidden_dim=256,
                vocab_size=config["vocab_size"],
                image_height=image_height,
                image_width=image_width)

        if config["do_action_prediction"]:
            self.action_prediction_module = ActionPredictionModule(
                2 * self.num_cameras * constants["image_emb_dim"],
                constants["image_emb_dim"], config["num_actions"])
        else:
            self.action_prediction_module = None

        if config["do_temporal_autoencoding"]:
            self.temporal_autoencoder_module = TemporalAutoencoderModule(
                self.action_module,
                self.num_cameras * constants["image_emb_dim"],
                constants["action_emb_dim"], constants["image_emb_dim"])
        else:
            self.temporal_autoencoder_module = None

        if config["do_object_detection"]:
            self.landmark_names = get_all_landmark_names()
            self.object_detection_module = PixelIdentificationModule(
                num_channels=num_channels, num_objects=67)
        else:
            self.object_detection_module = None

        if config["do_symbolic_language_prediction"]:
            self.symbolic_language_prediction_module = SymbolicLanguagePredictionModule(
                total_emb_size=2 * constants["lstm_emb_dim"])
        else:
            self.symbolic_language_prediction_module = None

        if config["do_goal_prediction"]:
            self.goal_prediction_module = None  # GoalPredictionModule(total_emb_size=32)
        else:
            self.goal_prediction_module = None

        if final_model_type == "m4jksum1":
            self.final_module = IncrementalMultimodalAttentionChaplotModuleM4JKSUM1(
                image_module=self.image_module,
                image_recurrence_module=self.image_recurrence_module,
                text_module=self.text_module,
                max_episode_length=150,
                final_image_height=image_height,
                final_image_width=image_width)
        elif final_model_type == "unet":
            self.final_module = IncrementalUnetAttentionModuleJustProb(
                image_module=self.image_module,
                image_recurrence_module=self.image_recurrence_module,
                text_module=self.text_module,
                max_episode_length=150,
                final_image_height=image_height,
                final_image_width=image_width,
                in_channels=num_channels,
                out_channels=1,
                embedding_size=256)
        elif final_model_type == "unet-positional-encoding":
            self.final_module = IncrementalUnetAttentionModuleJustProbSpatialEncoding(
                image_module=self.image_module,
                image_recurrence_module=self.image_recurrence_module,
                text_module=self.text_module,
                max_episode_length=150,
                final_image_height=image_height,
                final_image_width=image_width,
                in_channels=num_channels,
                out_channels=1,
                embedding_size=256)
        elif final_model_type == "andrew":
            self.final_module = IncrementalMultimodalAttentionChaplotModuleM5AndrewV2(
                image_module=self.image_module,
                image_recurrence_module=self.image_recurrence_module,
                text_module=self.text_module,
                max_episode_length=150,
                final_image_height=image_height,
                final_image_width=image_width,
                normalize_filters=False)
        else:
            raise AssertionError("Unknown final model type ", final_model_type)
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.image_recurrence_module.cuda()
            self.text_module.cuda()
            self.final_module.cuda()
            if self.action_prediction_module is not None:
                self.action_prediction_module.cuda()
            if self.temporal_autoencoder_module is not None:
                self.temporal_autoencoder_module.cuda()
            if self.object_detection_module is not None:
                self.object_detection_module.cuda()
            if self.symbolic_language_prediction_module is not None:
                self.symbolic_language_prediction_module.cuda()
            if self.goal_prediction_module is not None:
                self.goal_prediction_module.cuda()
Пример #13
0
    def __init__(self, config, constants):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]
        self.image_module = ChaplotImageModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,  # TODO this value keeps changing.
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)
        self.num_cameras = 1
        self.image_recurrence_module = IncrementalRecurrenceChaplotModule(
            input_emb_dim=256,
            output_emb_dim=256)
        if config["use_pointer_model"]:
            raise NotImplementedError()
        else:
            self.text_module = ChaplotTextModule(
                emb_dim=32,
                hidden_dim=256,
                vocab_size=config["vocab_size"],
                image_height=3, image_width=3)  # TODO these 4, 4, are shaky and keep changing.

        if config["do_action_prediction"]:
            self.action_prediction_module = ActionPredictionModule(
                2 * self.num_cameras * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"])
        else:
            self.action_prediction_module = None

        if config["do_temporal_autoencoding"]:
            self.temporal_autoencoder_module = TemporalAutoencoderModule(
                self.action_module, self.num_cameras * constants["image_emb_dim"],
                constants["action_emb_dim"], constants["image_emb_dim"])
        else:
            self.temporal_autoencoder_module = None

        if config["do_object_detection"]:
            self.landmark_names = get_all_landmark_names()
            self.object_detection_module = ObjectDetectionModule(
                image_module=self.image_module, image_emb_size=self.num_cameras * constants["image_emb_dim"], num_objects=67)
        else:
            self.object_detection_module = None

        if config["do_symbolic_language_prediction"]:
            self.symbolic_language_prediction_module = SymbolicLanguagePredictionModule(
                total_emb_size=2 * constants["lstm_emb_dim"])
        else:
            self.symbolic_language_prediction_module = None

        if config["do_goal_prediction"]:
            self.goal_prediction_module = GoalPredictionModule(
                total_emb_size=32)
        else:
            self.goal_prediction_module = None

        self.final_module = IncrementalMultimodalChaplotModule(
            image_module=self.image_module,
            image_recurrence_module=self.image_recurrence_module,
            text_module=self.text_module,
            max_episode_length=(constants["horizon"] + constants["max_extra_horizon"]),
            final_image_height=3, final_image_width=3)  # TODO these 4, 4, are shaky and keep changing.
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.image_recurrence_module.cuda()
            self.text_module.cuda()
            self.final_module.cuda()
            if self.action_prediction_module is not None:
                self.action_prediction_module.cuda()
            if self.temporal_autoencoder_module is not None:
                self.temporal_autoencoder_module.cuda()
            if self.object_detection_module is not None:
                self.object_detection_module.cuda()
            if self.symbolic_language_prediction_module is not None:
                self.symbolic_language_prediction_module.cuda()
            if self.goal_prediction_module is not None:
                self.goal_prediction_module.cuda()
Пример #14
0
    def __init__(self, config, constants):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]
        self.image_module = ImageResnetModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)
        self.num_cameras = 1
        self.image_recurrence_module = IncrementalRecurrenceSimpleModule(
            input_emb_dim=(constants["image_emb_dim"] * self.num_cameras + constants["action_emb_dim"]),
            output_emb_dim=constants["image_emb_dim"])
        if config["use_pointer_model"]:
            self.text_module = TextPointerModule(
                emb_dim=constants["word_emb_dim"],
                hidden_dim=constants["lstm_emb_dim"],
                vocab_size=config["vocab_size"])
        else:
            self.text_module = TextBiLSTMModule(
                emb_dim=constants["word_emb_dim"],
                hidden_dim=constants["lstm_emb_dim"],
                vocab_size=config["vocab_size"])
        self.action_module = ActionSimpleModule(
            num_actions=config["num_actions"],
            action_emb_size=constants["action_emb_dim"])
        if config["use_pointer_model"]:
            total_emb_size = (constants["image_emb_dim"]
                              + 4 * constants["lstm_emb_dim"]
                              + constants["action_emb_dim"])
        else:
            total_emb_size = ((self.num_cameras + 1) * constants["image_emb_dim"]
                              + 2 * constants["lstm_emb_dim"]
                              + constants["action_emb_dim"])

        if config["do_action_prediction"]:
            self.action_prediction_module = ActionPredictionModule(
                2 * self.num_cameras * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"])
        else:
            self.action_prediction_module = None

        if config["do_temporal_autoencoding"]:
            self.temporal_autoencoder_module = TemporalAutoencoderModule(
                self.action_module, self.num_cameras * constants["image_emb_dim"],
                constants["action_emb_dim"], constants["image_emb_dim"])
        else:
            self.temporal_autoencoder_module = None

        if config["do_object_detection"]:
            self.landmark_names = get_all_landmark_names()
            self.object_detection_module = ObjectDetectionModule(
                image_module=self.image_module, image_emb_size=self.num_cameras * constants["image_emb_dim"], num_objects=67)
        else:
            self.object_detection_module = None

        if config["do_symbolic_language_prediction"]:
            self.symbolic_language_prediction_module = SymbolicLanguagePredictionModule(
                total_emb_size=2 * constants["lstm_emb_dim"])
        else:
            self.symbolic_language_prediction_module = None

        if config["do_goal_prediction"]:
            self.goal_prediction_module = GoalPredictionModule(
                total_emb_size=32)
        else:
            self.goal_prediction_module = None

        final_module = TmpIncrementalMultimodalDenseValtsRecurrentSimpleModule(
            image_module=self.image_module,
            image_recurrence_module=self.image_recurrence_module,
            text_module=self.text_module,
            action_module=self.action_module,
            total_emb_size=total_emb_size,
            num_actions=config["num_actions"])
        self.final_module = final_module
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.image_recurrence_module.cuda()
            self.text_module.cuda()
            self.action_module.cuda()
            self.final_module.cuda()
            if self.action_prediction_module is not None:
                self.action_prediction_module.cuda()
            if self.temporal_autoencoder_module is not None:
                self.temporal_autoencoder_module.cuda()
            if self.object_detection_module is not None:
                self.object_detection_module.cuda()
            if self.symbolic_language_prediction_module is not None:
                self.symbolic_language_prediction_module.cuda()
            if self.goal_prediction_module is not None:
                self.goal_prediction_module.cuda()
    def __init__(self, config, constants, use_image=False):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]
        self.image_module = UnetImageModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)
        num_channels, image_height, image_width = self.image_module.get_final_dimension()
        self.num_cameras = 1
        self.image_recurrence_module = IncrementalRecurrenceChaplotModule(
            input_emb_dim=256,
            output_emb_dim=256)
        if config["use_pointer_model"]:
            raise NotImplementedError()
        else:
            self.text_module = ChaplotTextModule(
                emb_dim=32,
                hidden_dim=256,
                vocab_size=config["vocab_size"],
                image_height=image_height, image_width=image_width)

        if config["do_action_prediction"]:
            self.action_prediction_module = ActionPredictionModule(
                2 * self.num_cameras * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"])
        else:
            self.action_prediction_module = None

        if config["do_temporal_autoencoding"]:
            self.temporal_autoencoder_module = TemporalAutoencoderModule(
                self.action_module, self.num_cameras * constants["image_emb_dim"],
                constants["action_emb_dim"], constants["image_emb_dim"])
        else:
            self.temporal_autoencoder_module = None

        if config["do_object_detection"]:
            self.landmark_names = get_all_landmark_names()
            self.object_detection_module = PixelIdentificationModule(
                num_channels=num_channels, num_objects=67)
        else:
            self.object_detection_module = None

        if config["do_symbolic_language_prediction"]:
            self.symbolic_language_prediction_module = SymbolicLanguagePredictionModule(
                total_emb_size=2 * constants["lstm_emb_dim"])
        else:
            self.symbolic_language_prediction_module = None

        if config["do_goal_prediction"]:
            self.goal_prediction_module = None  # GoalPredictionModule(total_emb_size=32)
        else:
            self.goal_prediction_module = None

        if use_image:
            self.final_module = OracleGoldWithImage(
                image_recurrence_module=self.image_recurrence_module,
                image_module=self.image_module,
                max_episode_length=150,
                final_image_height=image_height, final_image_width=image_width)
        else:
            self.final_module = OracleGold(
                image_recurrence_module=self.image_recurrence_module,
                max_episode_length=150,
                final_image_height=image_height, final_image_width=image_width)

        if torch.cuda.is_available():
            self.image_module.cuda()
            self.image_recurrence_module.cuda()
            self.text_module.cuda()
            self.final_module.cuda()
            if self.action_prediction_module is not None:
                self.action_prediction_module.cuda()
            if self.temporal_autoencoder_module is not None:
                self.temporal_autoencoder_module.cuda()
            if self.object_detection_module is not None:
                self.object_detection_module.cuda()
            if self.symbolic_language_prediction_module is not None:
                self.symbolic_language_prediction_module.cuda()
            if self.goal_prediction_module is not None:
                self.goal_prediction_module.cuda()
import torch
import torch.optim as optim
import collections
import utils.generic_policy as gp
import utils.debug_nav_drone_instruction as debug
import utils.nav_drone_symbolic_instructions as nav_drone_symbolic_instructions

from agents.agent_observed_state import AgentObservedState
from agents.symbolic_text_replay_memory_item import SymbolicTextReplayMemoryItem
from abstract_learning import AbstractLearning
from utils.cuda import cuda_var

#For error analysis/printing
from dataset_agreement_nav_drone.nav_drone_dataset_parser import make_vocab_map
from utils.nav_drone_landmarks import get_all_landmark_names
LANDMARK_NAMES = get_all_landmark_names()

NO_BUCKETS = 48
BUCKET_WIDTH = 7.5


class ParagraphSegmentationTrainTest(AbstractLearning):
    """ Trains model to predict symbolic form from the text """
    def __init__(self, model, action_space, meta_data_util, config, constants,
                 tensorboard):
        self.max_epoch = constants["max_epochs"]
        self.model = model
        self.action_space = action_space
        self.meta_data_util = meta_data_util
        self.config = config
        self.constants = constants
Пример #17
0
    def __init__(self, config, constants):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]
        landmark_names = get_all_landmark_names()
        self.radius_module = RadiusModule(15)
        self.angle_module = AngleModule(12)  # (48)
        self.landmark_module = LandmarkModule(67)
        self.num_cameras = 1
        self.image_module = ImageRyanResnetModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)
        self.image_recurrence_module = IncrementalRecurrenceSimpleModule(
            input_emb_dim=constants["image_emb_dim"] * self.num_cameras, # + constants["action_emb_dim"],
            output_emb_dim=constants["image_emb_dim"])
        self.text_module = SymbolicInstructionModule(
            radius_embedding=self.radius_module,
            theta_embedding=self.angle_module,
            landmark_embedding=self.landmark_module)
        self.action_module = ActionSimpleModule(
            num_actions=config["num_actions"],
            action_emb_size=constants["action_emb_dim"])
        total_emb_size = ((self.num_cameras) * constants["image_emb_dim"]
                          + 32 * 2
                          + constants["action_emb_dim"])

        if config["do_action_prediction"]:
            self.action_prediction_module = ActionPredictionModule(
                2 * self.num_cameras * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"])
        else:
            self.action_prediction_module = None

        if config["do_temporal_autoencoding"]:
            self.temporal_autoencoder_module = TemporalAutoencoderModule(
                self.action_module, self.num_cameras * constants["image_emb_dim"],
                constants["action_emb_dim"], constants["image_emb_dim"])
        else:
            self.temporal_autoencoder_module = None

        if config["do_object_detection"]:
            self.landmark_names = get_all_landmark_names()
            self.object_detection_module = ObjectDetectionModule(
                image_module=self.image_module,
                image_emb_size=self.num_cameras * constants["image_emb_dim"], num_objects=67)
        else:
            self.object_detection_module = None

        final_module = IncrementalMultimodalRecurrentSimpleModule(
            image_module=self.image_module,
            image_recurrence_module=self.image_recurrence_module,
            text_module=self.text_module,
            action_module=self.action_module,
            total_emb_size=total_emb_size,
            num_actions=config["num_actions"])
        self.final_module = final_module
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.image_recurrence_module.cuda()
            self.text_module.cuda()
            self.action_module.cuda()
            self.final_module.cuda()
            if self.action_prediction_module is not None:
                self.action_prediction_module.cuda()
            if self.temporal_autoencoder_module is not None:
                self.temporal_autoencoder_module.cuda()
            if self.object_detection_module is not None:
                self.object_detection_module.cuda()