def run_simulated_mission(model, display=None, use_delays=False): print("Simulated mission running.") world_model = WorldModel(BLUEPRINT, CONFIG_FILE, simulated=True) ticks_left = 5 * MAX_EPISODE_TIME total_reward = 0 current_r = 0 while (ticks_left > 0 and world_model.is_mission_running()): ticks_left -= 1 current_r = world_model.reward() action = model.act(current_r, world_model.get_observation()) if display is not None: display.update(world_model) total_reward += current_r world_model.simulate(action) if use_delays: print(action) time.sleep(ACTION_DELAY) # Collect last reward, and give to model, then end the mission current_r = world_model.reward() model.act(current_r, world_model.get_observation()) total_reward += current_r model.mission_ended() print("Simulated mission ended") return total_reward, (MAX_EPISODE_TIME - (ticks_left / 5))
def run_simulated_mission(model, mission, cfg, demo=False): print("Simulated mission running.") world_model = WorldModel(mission.blueprint, cfg, simulated=True, agent_pos=mission.start_position) ticks_left = 5 * mission.max_episode_time total_reward = 0 current_r = 0 use_delays = mission.action_delay > 0 while (ticks_left > 0 and world_model.is_mission_running()): ticks_left -= 1 current_r = world_model.reward() if demo: action = model.demo_act(world_model.get_observation()) else: action = model.act(current_r, world_model.get_observation()) if mission.display is not None: mission.display.update(world_model) total_reward += current_r world_model.simulate(action) if use_delays: print(action) time.sleep(mission.action_delay) # Collect last reward, and give to model, then end the mission if mission.display is not None: mission.display.update(world_model) current_r = world_model.reward() if not demo: model.act(current_r, world_model.get_observation()) total_reward += current_r model.mission_ended() print("Simulated mission ended") return MissionStats(reward=total_reward, length=(mission.max_episode_time - (ticks_left / 5)))
def run_mission(model, display=None): # Create default Malmo objects: my_mission = MalmoPython.MissionSpec(MISSION_XML, True) my_mission_record = MalmoPython.MissionRecordSpec() world_model = WorldModel(BLUEPRINT, CONFIG_FILE, simulated=False) # Attempt to start a mission: for retry in range(MAX_RETRIES): try: AGENT_HOST.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == MAX_RETRIES - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2**retry) # Loop until mission starts: print("Waiting for the mission to start ", end=' ') world_state = AGENT_HOST.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = AGENT_HOST.getWorldState() for error in world_state.errors: print("Error:", error.text) print("\nMission running.") total_reward = 0 current_r = 0 start = time.time() # Loop until mission ends while (world_state.is_mission_running and world_model.is_mission_running()): world_state = AGENT_HOST.getWorldState() for error in world_state.errors: print("Error:", error.text) current_r += sum(r.getValue() for r in world_state.rewards) if len(world_state.observations) > 0: raw_obs = json.loads(world_state.observations[-1].text) world_model.update(raw_obs) current_r += world_model.reward() action = model.act(current_r, world_model.get_observation()) if display is not None: display.update(world_model) total_reward += current_r current_r = 0 if world_model.mission_complete( ) or not world_model.agent_in_arena(): AGENT_HOST.sendCommand('quit') elif world_state.is_mission_running: AGENT_HOST.sendCommand(action) time.sleep(ACTION_DELAY) end = time.time() model.mission_ended() print() print("Mission ended") return total_reward, end - start
def run_malmo_mission(model, mission, mission_xml, cfg, agent_host, max_retries=5, demo=False): # Create default Malmo objects: my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission_record = MalmoPython.MissionRecordSpec() world_model = WorldModel(mission.blueprint, cfg, simulated=False) # Attempt to start a mission: for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2**retry) # Loop until mission starts: print("Waiting for the mission to start ", end=' ') world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() time.sleep(1) for error in world_state.errors: print("Error:", error.text) print("\nMission running.") total_reward = 0 current_r = 0 start = time.time() # Loop until mission ends while (world_state.is_mission_running and world_model.is_mission_running()): world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) current_r += sum(r.getValue() for r in world_state.rewards) if len(world_state.observations) > 0: raw_obs = json.loads(world_state.observations[-1].text) world_model.update(raw_obs) current_r += world_model.reward() if demo: action = model.demo_act(world_model.get_observation()) else: action = model.act(current_r, world_model.get_observation()) if mission.display is not None: mission.display.update(world_model) total_reward += current_r current_r = 0 if world_model.mission_complete( ) or not world_model.agent_in_arena(): agent_host.sendCommand('quit') elif world_state.is_mission_running: agent_host.sendCommand(action) if demo: print(action) time.sleep(mission.action_delay) end = time.time() model.mission_ended() print() print("Mission ended") return MissionStats(reward=total_reward, length=end - start)