def __init__(self, config, constants):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]

        self.config = config
        self.constants = constants

        # CNN over images - using SimpleImage for testing for now!
        self.image_module = ImageCnnEmnlp(
            image_emb_size=config["image_emb_dim"],
            input_num_channels=3 *
            5,  #3 channels per image - 5 images in history
            image_height=config["image_height"],
            image_width=config["image_width"])

        # this is somewhat counter intuitivie - emb_dim is the word size
        # hidden_size is the output size
        self.text_module = TextSimpleModule(emb_dim=config["word_emb_dim"],
                                            hidden_dim=config["lstm_emb_dim"],
                                            vocab_size=config["vocab_size"])

        self.previous_action_module = ActionSimpleModule(
            num_actions=config["no_actions"],
            action_emb_size=config["previous_action_embedding_dim"])

        self.previous_block_module = ActionSimpleModule(
            num_actions=config["no_blocks"],
            action_emb_size=config["previous_block_embedding_dim"])

        self.final_module = IncrementalMultimodalEmnlp(
            image_module=self.image_module,
            text_module=self.text_module,
            previous_action_module=self.previous_action_module,
            previous_block_module=self.previous_block_module,
            input_embedding_size=config["lstm_emb_dim"] +
            config["image_emb_dim"] + config["previous_action_embedding_dim"] +
            config["previous_block_embedding_dim"],
            output_hidden_size=config["h1_hidden_dim"],
            blocks_hidden_size=config["no_blocks"],
            directions_hidden_size=config["no_actions"],
            max_episode_length=(constants["horizon"] + 5))

        if torch.cuda.is_available():
            self.image_module.cuda()
            self.text_module.cuda()
            self.previous_action_module.cuda()
            self.previous_block_module.cuda()
            self.final_module.cuda()
    def __init__(self, config, constants):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]

        self.image_module = ImageResnetModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)
        # self.image_module = resnet.resnet18(pretrained=True)
        # constants["image_emb_dim"] = 1000
        self.image_recurrence_module = IncrementalRecurrenceSimpleModule(
            input_emb_dim=constants["image_emb_dim"],
            output_emb_dim=constants["image_emb_dim"])
        self.action_module = ActionSimpleModule(
            num_actions=config["num_actions"],
            action_emb_size=constants["action_emb_dim"])
        total_emb_size = (2 * constants["image_emb_dim"]
                          + constants["action_emb_dim"])

        if config["do_action_prediction"]:
            self.action_prediction_module = ActionPredictionModule(
                2 * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"])
        else:
            self.action_prediction_module = None

        if config["do_temporal_autoencoding"]:
            self.temporal_autoencoder_module = TemporalAutoencoderModule(
                self.action_module, constants["image_emb_dim"], constants["action_emb_dim"], constants["image_emb_dim"])
        else:
            self.temporal_autoencoder_module = None

        if config["do_object_detection"]:
            self.landmark_names = get_all_landmark_names()
            self.object_detection_module = ObjectDetectionModule(
                image_module=self.image_module, image_emb_size=constants["image_emb_dim"], num_objects=63)
        else:
            self.object_detection_module = None

        final_module = IncrementalMultimodalRecurrentSimpleGoalImageModule(
            image_module=self.image_module,
            image_recurrence_module=self.image_recurrence_module,
            action_module=self.action_module,
            total_emb_size=total_emb_size,
            num_actions=config["num_actions"])
        self.final_module = final_module
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.image_recurrence_module.cuda()
            self.action_module.cuda()
            self.final_module.cuda()
            if self.action_prediction_module is not None:
                self.action_prediction_module.cuda()
            if self.temporal_autoencoder_module is not None:
                self.temporal_autoencoder_module.cuda()
            if self.object_detection_module is not None:
                self.object_detection_module.cuda()
示例#3
0
 def __init__(self, config, constants):
     AbstractIncrementalModel.__init__(self, config, constants)
     self.none_action = config["num_actions"]
     self.image_module = ImageResnetModule(
         image_emb_size=constants["image_emb_dim"],
         input_num_channels=3,
         image_height=config["image_height"],
         image_width=config["image_width"],
         using_recurrence=True)
     self.image_recurrence_module = IncrementalRecurrenceSimpleModule(
         input_emb_dim=constants["image_emb_dim"],
         output_emb_dim=constants["image_emb_dim"])
     if config["use_pointer_model"]:
         raise AssertionError("Not implemented")
         # self.text_module = TextPointerModule(
         #     emb_dim=constants["word_emb_dim"],
         #     hidden_dim=constants["lstm_emb_dim"],
         #     vocab_size=config["vocab_size"])
     else:
         self.text_module = TextImplicitFactorizationModule(
             emb_dim=constants["word_emb_dim"],
             hidden_dim=constants["lstm_emb_dim"],
             vocab_size=config["vocab_size"],
             num_factors=2,
             factors_vocabulary_size=60,
             factors_embedding_size=250)
     self.action_module = ActionSimpleModule(
         num_actions=config["num_actions"],
         action_emb_size=constants["action_emb_dim"])
     if config["use_pointer_model"]:
         total_emb_size = (constants["image_emb_dim"] +
                           4 * constants["lstm_emb_dim"] +
                           constants["action_emb_dim"])
     else:
         total_emb_size = (constants["image_emb_dim"] + 2 * 250 +
                           constants["action_emb_dim"])
     final_module = IncrementalMultimodalRecurrentSimpleModule(
         image_module=self.image_module,
         image_recurrence_module=self.image_recurrence_module,
         text_module=self.text_module,
         action_module=self.action_module,
         total_emb_size=total_emb_size,
         num_actions=config["num_actions"])
     self.final_module = final_module
     if torch.cuda.is_available():
         self.image_module.cuda()
         self.image_recurrence_module.cuda()
         self.text_module.cuda()
         self.action_module.cuda()
         self.final_module.cuda()
 def __init__(self, config, constants):
     AbstractModel.__init__(self, config, constants)
     self.none_action = config["num_actions"]
     self.image_module = ImageTextKernelResnetModule(
         image_emb_size=constants["image_emb_dim"],
         input_num_channels=3,
         image_height=config["image_height"],
         image_width=config["image_width"],
         text_emb_size=constants["lstm_emb_dim"],
         using_recurrence=True)
     self.image_recurrence_module = RecurrenceSimpleModule(
         input_emb_dim=constants["image_emb_dim"],
         output_emb_dim=constants["image_emb_dim"])
     if config["use_pointer_model"]:
         self.text_module = TextPointerModule(
             emb_dim=constants["word_emb_dim"],
             hidden_dim=constants["lstm_emb_dim"],
             vocab_size=config["vocab_size"])
     else:
         self.text_module = TextSimpleModule(
             emb_dim=constants["word_emb_dim"],
             hidden_dim=constants["lstm_emb_dim"],
             vocab_size=config["vocab_size"])
     self.action_module = ActionSimpleModule(
         num_actions=config["num_actions"],
         action_emb_size=constants["action_emb_dim"])
     if config["use_pointer_model"]:
         total_emb_size = (constants["image_emb_dim"] +
                           4 * constants["lstm_emb_dim"] +
                           constants["action_emb_dim"])
     else:
         total_emb_size = (constants["image_emb_dim"] +
                           constants["lstm_emb_dim"] +
                           constants["action_emb_dim"])
     final_module = MultimodalTextKernelRecurrentSimpleModule(
         image_module=self.image_module,
         image_recurrence_module=self.image_recurrence_module,
         text_module=self.text_module,
         action_module=self.action_module,
         total_emb_size=total_emb_size,
         num_actions=config["num_actions"])
     self.final_module = final_module
     if torch.cuda.is_available():
         self.image_module.cuda()
         self.image_recurrence_module.cuda()
         self.text_module.cuda()
         self.action_module.cuda()
         self.final_module.cuda()
    def __init__(self, config, constants):
        AbstractModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]
        landmark_names = get_all_landmark_names()
        self.radius_module = RadiusModule(15)
        self.angle_module = AngleModule(48)
        self.landmark_module = LandmarkModule(63)

        self.image_module = ImageResnetModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)
        self.image_recurrence_module = RecurrenceSimpleModule(
            input_emb_dim=constants["image_emb_dim"],
            output_emb_dim=constants["image_emb_dim"])

        self.text_module = SymbolicInstructionModule(
            radius_embedding=self.radius_module,
            theta_embedding=self.angle_module,
            landmark_embedding=self.landmark_module)
        self.action_module = ActionSimpleModule(
            num_actions=config["num_actions"],
            action_emb_size=constants["action_emb_dim"])
        total_emb_size = (constants["image_emb_dim"]
                          + 32 * 4
                          + constants["action_emb_dim"])
        final_module = MultimodalRecurrentSimpleModule(
            image_module=self.image_module,
            image_recurrence_module=self.image_recurrence_module,
            text_module=self.text_module,
            action_module=self.action_module,
            total_emb_size=total_emb_size,
            num_actions=config["num_actions"])
        self.final_module = final_module
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.text_module.cuda()
            self.action_module.cuda()
            self.final_module.cuda()
            self.radius_module.cuda()
            self.angle_module.cuda()
            self.landmark_module.cuda()
 def __init__(self, config, constants):
     AbstractModel.__init__(self, config, constants)
     self.none_action = config["num_actions"]
     landmark_names = get_all_landmark_names()
     self.radius_module = RadiusModule(15)
     self.angle_module = AngleModule(48)
     self.landmark_module = LandmarkModule(63)
     self.image_module = SymbolicImageModule(
         landmark_names=landmark_names,
         radius_module=self.radius_module,
         angle_module=self.angle_module,
         landmark_module=self.landmark_module)
     if config["use_pointer_model"]:
         self.text_module = TextPointerModule(
             emb_dim=constants["word_emb_dim"],
             hidden_dim=constants["lstm_emb_dim"],
             vocab_size=config["vocab_size"])
     else:
         self.text_module = TextSimpleModule(
             emb_dim=constants["word_emb_dim"],
             hidden_dim=constants["lstm_emb_dim"],
             vocab_size=config["vocab_size"])
     self.action_module = ActionSimpleModule(
         num_actions=config["num_actions"],
         action_emb_size=constants["action_emb_dim"])
     total_emb_size = (32 * 3 * 63
                       + constants["lstm_emb_dim"]
                       + constants["action_emb_dim"])
     final_module = MultimodalSimpleModule(
         image_module=self.image_module,
         text_module=self.text_module,
         action_module=self.action_module,
         total_emb_size=total_emb_size,
         num_actions=config["num_actions"])
     self.final_module = final_module
     if torch.cuda.is_available():
         self.image_module.cuda()
         self.text_module.cuda()
         self.action_module.cuda()
         self.final_module.cuda()
         self.radius_module.cuda()
         self.angle_module.cuda()
         self.landmark_module.cuda()
示例#7
0
    def __init__(self, config, constants):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]

        self.config = config
        self.constants = constants

        # CNN over images - using what is essentially SimpleImage currently
        self.image_module = ImageCnnEmnlp(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3 *
            5,  # 3 channels per image - 5 images in history
            image_height=config["image_height"],
            image_width=config["image_width"])

        # LSTM to embed text
        self.text_module = TextSimpleModule(
            emb_dim=constants["word_emb_dim"],
            hidden_dim=constants["lstm_emb_dim"],
            vocab_size=config["vocab_size"])

        # Action module to embed previous action+block
        self.action_module = ActionSimpleModule(
            num_actions=config["num_actions"],
            action_emb_size=constants["action_emb_dim"])

        # Put it all together
        self.final_module = IncrementalMultimodalEmnlp(
            image_module=self.image_module,
            text_module=self.text_module,
            action_module=self.action_module,
            input_embedding_size=constants["lstm_emb_dim"] +
            constants["image_emb_dim"] + constants["action_emb_dim"],
            output_hidden_size=config["h1_hidden_dim"],
            blocks_hidden_size=config["blocks_hidden_dim"],
            directions_hidden_size=config["action_hidden_dim"],
            max_episode_length=(constants["horizon"] + 5))

        if torch.cuda.is_available():
            self.image_module.cuda()
            self.text_module.cuda()
            self.action_module.cuda()
            self.final_module.cuda()
 def __init__(self, config, constants):
     AbstractModel.__init__(self, config, constants)
     self.none_action = config["num_actions"]
     landmark_names = get_all_landmark_names()
     self.radius_module = RadiusModule(15)
     self.angle_module = AngleModule(48)
     self.landmark_module = LandmarkModule(63)
     self.image_module = SymbolicImageModule(
         landmark_names=landmark_names,
         radius_module=self.radius_module,
         angle_module=self.angle_module,
         landmark_module=self.landmark_module)
     self.text_module = SymbolicInstructionModule(
         radius_embedding=self.radius_module,
         theta_embedding=self.angle_module,
         landmark_embedding=self.landmark_module)
     self.action_module = ActionSimpleModule(
         num_actions=config["num_actions"],
         action_emb_size=constants["action_emb_dim"])
     total_emb_size = (32 * 3 * 63
                       + 32 * 4
                       + constants["action_emb_dim"])
     final_module = MultimodalSimpleModule(
         image_module=self.image_module,
         text_module=self.text_module,
         action_module=self.action_module,
         total_emb_size=total_emb_size,
         num_actions=config["num_actions"])
     self.final_module = final_module
     if torch.cuda.is_available():
         self.image_module.cuda()
         self.text_module.cuda()
         self.action_module.cuda()
         self.final_module.cuda()
         self.radius_module.cuda()
         self.angle_module.cuda()
         self.landmark_module.cuda()
 def __init__(self, config, constants):
     AbstractModel.__init__(self, config, constants)
     self.none_action = config["num_actions"]
     self.image_module = ImageResnetModule(
         image_emb_size=constants["image_emb_dim"],
         input_num_channels=3*constants["max_num_images"],
         image_height=config["image_height"],
         image_width=config["image_width"])
     if config["use_pointer_model"]:
         self.text_module = TextPointerModule(
             emb_dim=constants["word_emb_dim"],
             hidden_dim=constants["lstm_emb_dim"],
             vocab_size=config["vocab_size"])
     else:
         self.text_module = TextSimpleModule(
             emb_dim=constants["word_emb_dim"],
             hidden_dim=constants["lstm_emb_dim"],
             vocab_size=config["vocab_size"])
     self.action_module = ActionSimpleModule(
         num_actions=config["num_actions"],
         action_emb_size=constants["action_emb_dim"])
     total_emb_size = (constants["image_emb_dim"]
                       + constants["lstm_emb_dim"]
                       + constants["action_emb_dim"])
     final_module = MultimodalSimpleWithStopModule(
         image_module=self.image_module,
         text_module=self.text_module,
         action_module=self.action_module,
         total_emb_size=total_emb_size,
         num_actions=config["num_actions"])
     self.final_module = final_module
     if torch.cuda.is_available():
         self.image_module.cuda()
         self.text_module.cuda()
         self.action_module.cuda()
         self.final_module.cuda()
示例#10
0
    def __init__(self, config, constants):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]
        self.image_module = ImageResnetModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)
        self.num_cameras = 1
        self.image_recurrence_module = IncrementalRecurrenceSimpleModule(
            input_emb_dim=(constants["image_emb_dim"] * self.num_cameras + constants["action_emb_dim"]),
            output_emb_dim=constants["image_emb_dim"])
        if config["use_pointer_model"]:
            self.text_module = TextPointerModule(
                emb_dim=constants["word_emb_dim"],
                hidden_dim=constants["lstm_emb_dim"],
                vocab_size=config["vocab_size"])
        else:
            self.text_module = TextBiLSTMModule(
                emb_dim=constants["word_emb_dim"],
                hidden_dim=constants["lstm_emb_dim"],
                vocab_size=config["vocab_size"])
        self.action_module = ActionSimpleModule(
            num_actions=config["num_actions"],
            action_emb_size=constants["action_emb_dim"])
        if config["use_pointer_model"]:
            total_emb_size = (constants["image_emb_dim"]
                              + 4 * constants["lstm_emb_dim"]
                              + constants["action_emb_dim"])
        else:
            total_emb_size = ((self.num_cameras + 1) * constants["image_emb_dim"]
                              + 2 * constants["lstm_emb_dim"]
                              + constants["action_emb_dim"])

        if config["do_action_prediction"]:
            self.action_prediction_module = ActionPredictionModule(
                2 * self.num_cameras * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"])
        else:
            self.action_prediction_module = None

        if config["do_temporal_autoencoding"]:
            self.temporal_autoencoder_module = TemporalAutoencoderModule(
                self.action_module, self.num_cameras * constants["image_emb_dim"],
                constants["action_emb_dim"], constants["image_emb_dim"])
        else:
            self.temporal_autoencoder_module = None

        if config["do_object_detection"]:
            self.landmark_names = get_all_landmark_names()
            self.object_detection_module = ObjectDetectionModule(
                image_module=self.image_module, image_emb_size=self.num_cameras * constants["image_emb_dim"], num_objects=67)
        else:
            self.object_detection_module = None

        if config["do_symbolic_language_prediction"]:
            self.symbolic_language_prediction_module = SymbolicLanguagePredictionModule(
                total_emb_size=2 * constants["lstm_emb_dim"])
        else:
            self.symbolic_language_prediction_module = None

        if config["do_goal_prediction"]:
            self.goal_prediction_module = GoalPredictionModule(
                total_emb_size=32)
        else:
            self.goal_prediction_module = None

        final_module = TmpIncrementalMultimodalDenseValtsRecurrentSimpleModule(
            image_module=self.image_module,
            image_recurrence_module=self.image_recurrence_module,
            text_module=self.text_module,
            action_module=self.action_module,
            total_emb_size=total_emb_size,
            num_actions=config["num_actions"])
        self.final_module = final_module
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.image_recurrence_module.cuda()
            self.text_module.cuda()
            self.action_module.cuda()
            self.final_module.cuda()
            if self.action_prediction_module is not None:
                self.action_prediction_module.cuda()
            if self.temporal_autoencoder_module is not None:
                self.temporal_autoencoder_module.cuda()
            if self.object_detection_module is not None:
                self.object_detection_module.cuda()
            if self.symbolic_language_prediction_module is not None:
                self.symbolic_language_prediction_module.cuda()
            if self.goal_prediction_module is not None:
                self.goal_prediction_module.cuda()
示例#11
0
    def __init__(self, config, constants):
        AbstractIncrementalModel.__init__(self, config, constants)
        self.none_action = config["num_actions"]
        landmark_names = get_all_landmark_names()
        self.radius_module = RadiusModule(15)
        self.angle_module = AngleModule(12)  # (48)
        self.landmark_module = LandmarkModule(67)
        self.num_cameras = 1
        self.image_module = ImageRyanResnetModule(
            image_emb_size=constants["image_emb_dim"],
            input_num_channels=3,
            image_height=config["image_height"],
            image_width=config["image_width"],
            using_recurrence=True)
        self.image_recurrence_module = IncrementalRecurrenceSimpleModule(
            input_emb_dim=constants["image_emb_dim"] * self.num_cameras, # + constants["action_emb_dim"],
            output_emb_dim=constants["image_emb_dim"])
        self.text_module = SymbolicInstructionModule(
            radius_embedding=self.radius_module,
            theta_embedding=self.angle_module,
            landmark_embedding=self.landmark_module)
        self.action_module = ActionSimpleModule(
            num_actions=config["num_actions"],
            action_emb_size=constants["action_emb_dim"])
        total_emb_size = ((self.num_cameras) * constants["image_emb_dim"]
                          + 32 * 2
                          + constants["action_emb_dim"])

        if config["do_action_prediction"]:
            self.action_prediction_module = ActionPredictionModule(
                2 * self.num_cameras * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"])
        else:
            self.action_prediction_module = None

        if config["do_temporal_autoencoding"]:
            self.temporal_autoencoder_module = TemporalAutoencoderModule(
                self.action_module, self.num_cameras * constants["image_emb_dim"],
                constants["action_emb_dim"], constants["image_emb_dim"])
        else:
            self.temporal_autoencoder_module = None

        if config["do_object_detection"]:
            self.landmark_names = get_all_landmark_names()
            self.object_detection_module = ObjectDetectionModule(
                image_module=self.image_module,
                image_emb_size=self.num_cameras * constants["image_emb_dim"], num_objects=67)
        else:
            self.object_detection_module = None

        final_module = IncrementalMultimodalRecurrentSimpleModule(
            image_module=self.image_module,
            image_recurrence_module=self.image_recurrence_module,
            text_module=self.text_module,
            action_module=self.action_module,
            total_emb_size=total_emb_size,
            num_actions=config["num_actions"])
        self.final_module = final_module
        if torch.cuda.is_available():
            self.image_module.cuda()
            self.image_recurrence_module.cuda()
            self.text_module.cuda()
            self.action_module.cuda()
            self.final_module.cuda()
            if self.action_prediction_module is not None:
                self.action_prediction_module.cuda()
            if self.temporal_autoencoder_module is not None:
                self.temporal_autoencoder_module.cuda()
            if self.object_detection_module is not None:
                self.object_detection_module.cuda()