def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.none_action = config["num_actions"] self.image_module = ImagePositionResnetModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3 * constants["max_num_images"], image_height=config["image_height"], image_width=config["image_width"]) if config["use_pointer_model"]: self.text_module = TextPointerModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) else: self.text_module = TextSimpleModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) # total_emb_size = (constants["image_emb_dim"] # + constants["lstm_emb_dim"]) total_emb_size = constants["image_emb_dim"] final_module = MultimodalSimplePositionModule( image_module=self.image_module, text_module=self.text_module, total_emb_size=total_emb_size, num_grid_x=8, num_grid_y=8, num_grid_pose=24) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.text_module.cuda() self.final_module.cuda()
def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.none_action = config["num_actions"] self.text_module = TextSimpleModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) self.image_module = ImageResnetModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, image_height=config["image_height"], image_width=config["image_width"], using_recurrence=True) total_emb_size = constants[ "lstm_emb_dim"] #+ constants["image_emb_dim"] final_module = MultimodalTextClassificationModule( text_module=self.text_module, image_module=self.image_module, total_emb_size=total_emb_size) self.final_module = final_module if torch.cuda.is_available(): self.text_module.cuda() self.image_module.cuda() self.final_module.cuda()
def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.none_action = config["num_actions"] num_channels, image_height, image_width = 3, 3, 3 self.text_module = ChaplotTextModule(emb_dim=32, hidden_dim=256, vocab_size=config["vocab_size"], image_height=image_height, image_width=image_width) self.final_module = FinalModule(self.text_module) if False: # config["do_object_detection"]: self.landmark_names = get_all_landmark_names() self.object_detection_module = PixelIdentificationModule( num_channels=num_channels, num_objects=67) else: self.object_detection_module = None if torch.cuda.is_available(): self.text_module.cuda() self.final_module.cuda() if self.object_detection_module is not None: self.object_detection_module.cuda()
def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.radius_model = RadiusModule(15) self.angle_model = AngleModule(48) num_actions = config["num_actions"] self.goal_module = GoalPositionModule( radius_module=self.radius_model, angle_module=self.angle_model, num_actions=num_actions) if torch.cuda.is_available(): self.goal_module.cuda()
def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.none_action = config["num_actions"] self.image_module = ImageTextKernelResnetModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, image_height=config["image_height"], image_width=config["image_width"], text_emb_size=constants["lstm_emb_dim"], using_recurrence=True) self.image_recurrence_module = RecurrenceSimpleModule( input_emb_dim=constants["image_emb_dim"], output_emb_dim=constants["image_emb_dim"]) if config["use_pointer_model"]: self.text_module = TextPointerModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) else: self.text_module = TextSimpleModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) self.action_module = ActionSimpleModule( num_actions=config["num_actions"], action_emb_size=constants["action_emb_dim"]) if config["use_pointer_model"]: total_emb_size = (constants["image_emb_dim"] + 4 * constants["lstm_emb_dim"] + constants["action_emb_dim"]) else: total_emb_size = (constants["image_emb_dim"] + constants["lstm_emb_dim"] + constants["action_emb_dim"]) final_module = MultimodalTextKernelRecurrentSimpleModule( image_module=self.image_module, image_recurrence_module=self.image_recurrence_module, text_module=self.text_module, action_module=self.action_module, total_emb_size=total_emb_size, num_actions=config["num_actions"]) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.image_recurrence_module.cuda() self.text_module.cuda() self.action_module.cuda() self.final_module.cuda()
def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.none_action = config["num_actions"] landmark_names = get_all_landmark_names() self.radius_module = RadiusModule(15) self.angle_module = AngleModule(48) self.landmark_module = LandmarkModule(63) self.image_module = ImageResnetModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, image_height=config["image_height"], image_width=config["image_width"], using_recurrence=True) self.image_recurrence_module = RecurrenceSimpleModule( input_emb_dim=constants["image_emb_dim"], output_emb_dim=constants["image_emb_dim"]) self.text_module = SymbolicInstructionModule( radius_embedding=self.radius_module, theta_embedding=self.angle_module, landmark_embedding=self.landmark_module) self.action_module = ActionSimpleModule( num_actions=config["num_actions"], action_emb_size=constants["action_emb_dim"]) total_emb_size = (constants["image_emb_dim"] + 32 * 4 + constants["action_emb_dim"]) final_module = MultimodalRecurrentSimpleModule( image_module=self.image_module, image_recurrence_module=self.image_recurrence_module, text_module=self.text_module, action_module=self.action_module, total_emb_size=total_emb_size, num_actions=config["num_actions"]) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.text_module.cuda() self.action_module.cuda() self.final_module.cuda() self.radius_module.cuda() self.angle_module.cuda() self.landmark_module.cuda()
def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.none_action = config["num_actions"] landmark_names = get_all_landmark_names() self.radius_module = RadiusModule(15) self.angle_module = AngleModule(48) self.landmark_module = LandmarkModule(63) self.image_module = SymbolicImageModule( landmark_names=landmark_names, radius_module=self.radius_module, angle_module=self.angle_module, landmark_module=self.landmark_module) if config["use_pointer_model"]: self.text_module = TextPointerModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) else: self.text_module = TextSimpleModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) self.action_module = ActionSimpleModule( num_actions=config["num_actions"], action_emb_size=constants["action_emb_dim"]) total_emb_size = (32 * 3 * 63 + constants["lstm_emb_dim"] + constants["action_emb_dim"]) final_module = MultimodalSimpleModule( image_module=self.image_module, text_module=self.text_module, action_module=self.action_module, total_emb_size=total_emb_size, num_actions=config["num_actions"]) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.text_module.cuda() self.action_module.cuda() self.final_module.cuda() self.radius_module.cuda() self.angle_module.cuda() self.landmark_module.cuda()
def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.none_action = config["num_actions"] landmark_names = get_all_landmark_names() self.radius_module = RadiusModule(15) self.angle_module = AngleModule(48) self.landmark_module = LandmarkModule(63) self.image_module = SymbolicImageModule( landmark_names=landmark_names, radius_module=self.radius_module, angle_module=self.angle_module, landmark_module=self.landmark_module) self.text_module = SymbolicInstructionModule( radius_embedding=self.radius_module, theta_embedding=self.angle_module, landmark_embedding=self.landmark_module) self.action_module = ActionSimpleModule( num_actions=config["num_actions"], action_emb_size=constants["action_emb_dim"]) total_emb_size = (32 * 3 * 63 + 32 * 4 + constants["action_emb_dim"]) final_module = MultimodalSimpleModule( image_module=self.image_module, text_module=self.text_module, action_module=self.action_module, total_emb_size=total_emb_size, num_actions=config["num_actions"]) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.text_module.cuda() self.action_module.cuda() self.final_module.cuda() self.radius_module.cuda() self.angle_module.cuda() self.landmark_module.cuda()