def __init__(self, device, model, item_scorer, hcp=4): self.device = device self.cmd_memory = flist() self.item_scorer = item_scorer # self.navigator = Navigator(navigation_model) self.utils = None self.hcp = hcp self.step_count = 0 self.total_score = 0 self.current_score = 0 self.recipe = '' self.reading = False self.model = model self.description = 'nothing' self.description_updated = True self.inventory = 'nothing' self.inventory_updated = False self.info = None # added for KG part self.state = StateNAction() self.kg = KGDQN(params, self.state.all_actions).cuda() self.params = params self.num_frames = params['num_frames'] if params['scheduler_type'] == 'exponential': self.e_scheduler = ExponentialSchedule(self.num_frames, params['e_decay'], params['e_final']) elif params['scheduler_type'] == 'linear': self.e_scheduler = LinearSchedule(self.num_frames, params['e_final'])
def explore(self, description): """ Determines the low level action that needs to be performed to execute one more step of a DFS from the current location. Additionally the neural model determines all closed doors in the current location and adds the respective open commands. """ location = Navigator.extract_location(description) commands = flist() self.update_graph(location=location, description=description) if len(self.graph[location].closed_doors) > 0: commands.append(self.open_doors(location)) # explore the first unvisited direction if len(self.graph[location].unvisited_directions) > 0: direction = self.graph[location].unvisited_directions.pop(0) else: # if no unvisited direction available anymore -> go back to where we came from if self.graph[location].came_from == 'start': direction = None else: direction = self.graph[location].came_from commands.append(self.do_move(direction)) if len(commands) == 0: # fallback commands.append('look') return commands
def step(self, observation, info: Dict[str, Any], detailed_commands=False): self.info = info self.reading = 'and start reading' in observation # retrieve the information about the inventory, description, recipe and location (different approaches for different HCPs) self.inventory, self.description = self._get_inventory_and_description(observation, info) inventory = [self.remove_articles(inv.strip()) for inv in self.inventory.strip().split('\n') if not 'carrying' in inv] self.recipe = self._get_recipe(observation) location = Navigator.extract_location(self.description) nav_commands = self.navigator.get_navigational_commands(self.description) items = None if self._know_recipe(): items, utils = self.item_scorer(recipe=self.recipe, inventory=self.inventory) # update the needed utils self._update_util_locations(self.description, utils, location) state_description = self.build_state_description(self.description, items, location, observation, inventory) possible_commands = self.get_commands(self.description, items, location, inventory, nav_commands) score, prob, value, high_level_command, index = self.model(state_description, possible_commands) cmds = flist() cmds.append(self.command_to_action(command=high_level_command, items=items, inventory=inventory, description=self.description)) learning_info = LearningInfo(score=score, prob=prob, value=value, action=high_level_command, index=index, possible_actions=possible_commands) self.reading = (high_level_command == 'examine cookbook') self.step_count += 1 self.cmd_memory.append(high_level_command) if detailed_commands: hl2ll = {hl_cmd: self.command_to_action(command=hl_cmd, items=items, inventory=inventory, description=self.description) for hl_cmd in possible_commands} return cmds, learning_info, hl2ll return cmds, learning_info
def __init__(self, device, model, item_scorer, navigation_model, hcp=4): self.device = device self.cmd_memory = flist() self.item_scorer = item_scorer self.navigator = Navigator(navigation_model) self.utils = None self.hcp = hcp self.step_count = 0 self.total_score = 0 self.current_score = 0 self.recipe = '' self.reading = False self.model = model self.description = 'nothing' self.description_updated = True self.inventory = 'nothing' self.inventory_updated = False self.info = None
def step(self, observation, info: Dict[str, Any], detailed_commands=False): """" :param observation: observation from the environment :param info: info dictionary from the environment. :return: One or multiple low level cmds that correspond to a single high level action and the model infos needed for the A2C learning. """ self.info = info self.reading = 'and start reading' in observation # retrieve the information about the inventory, description, recipe and location (different approaches for different HCPs) self.inventory, self.description = self._get_inventory_and_description( observation, info) inventory = [ self.remove_articles(inv.strip()) for inv in self.inventory.strip().split('\n') if not 'carrying' in inv ] self.recipe = self._get_recipe(observation) location = Navigator.extract_location(self.description) if (len(self.cmd_memory) == 0): self.state.step(self.description.strip(), pruned=self.params['pruned']) else: self.state.step(self.description.strip(), prev_action=self.cmd_memory[-1], pruned=self.params['pruned']) total_frames = 0 # have to update this somehow later epsilon = self.e_scheduler.value(total_frames) state_embedding, possible_commands = self.kg.act(self.state, epsilon) # nav_commands = self.navigator.get_navigational_commands(self.description) items = None if self._know_recipe(): # Invoke the neural model to determine from the recipe and inventory which items we need to pickup and # what actions need to performed on them to satisfy the recipe. items, utils = self.item_scorer(recipe=self.recipe, inventory=self.inventory) # update the needed utils self._update_util_locations(self.description, utils, location) # build the representation of the current game state (dictionary of strings) state_description = self.build_state_description( self.description, items, state_embedding, observation, inventory) # generate a list of possible commands for the current game state # possible_commands = self.get_commands(self.description, items, location, inventory, nav_commands) # ask the model for the next command score, prob, value, high_level_command, index = self.model( state_description, possible_commands) cmds = flist() # translate the chosen high level command to a (set of) low level commands cmds.append( self.command_to_action(command=high_level_command, items=items, inventory=inventory, description=self.description)) # save the learning necessary for the A2C update of the model learning_info = LearningInfo(score=score, prob=prob, value=value, action=high_level_command, index=index, possible_actions=possible_commands) self.reading = (high_level_command == 'examine cookbook') self.step_count += 1 self.cmd_memory.append(high_level_command) if detailed_commands: hl2ll = { hl_cmd: self.command_to_action(command=hl_cmd, items=items, inventory=inventory, description=self.description) for hl_cmd in possible_commands } return cmds, learning_info, hl2ll return cmds, learning_info