def run(self): """Runs the game with the registered agents Raises: :class:`jason_malmo.exceptions.NoAgentsException`: There are not registered agents in the game.\n Register an agent before running the game:: game.register('/path/to/file.asl') game.run() """ self._client_pool = MalmoPython.ClientPool() if not len(self._agents): raise NoAgentsException for port in range(10000, 10000 + len(self._agents) + 1): self._client_pool.add(MalmoPython.ClientInfo('127.0.0.1', port)) self._my_mission = MalmoPython.MissionSpec(self._get_mission_xml(), True) for (index, agent) in enumerate(self._agents): malmoutils.parse_command_line(agent.malmo_agent) self._safe_start_mission( agent.malmo_agent, self._my_mission, self._client_pool, malmoutils.get_default_recording_object( agent.malmo_agent, "saved_data"), index, '') self._safe_wait_for_start( [agent.malmo_agent for agent in self._agents]) threads = [] for agent in self._agents: thr = threading.Thread(target=self._jason_env.run_agent, args=(agent, ), kwargs={}) thr.start() threads.append(thr) # TODO while mission is running while True: for agent in self._agents: for (belief, value) in agent.beliefs.items(): if belief[0] == 'tasks': tasks = [] for task in list(value)[0].args[0]: tasks.append(task) self.tasks.handle(agent, tasks) time.sleep(0.05)
def startMission(agent_host, xml): my_mission = MalmoPython.MissionSpec(xml, True) my_mission_record = malmoutils.get_default_recording_object(agent_host, "teleport_results") max_retries = 3 for retry in range(max_retries): try: agent_host.startMission( my_mission, my_mission_record ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission",e) print("Is the game running?") exit(1) else: time.sleep(2) world_state = agent_host.peekWorldState() while not world_state.has_mission_begun: time.sleep(0.1) world_state = agent_host.peekWorldState() for error in world_state.errors: print("Error:",error.text) if len(world_state.errors) > 0: exit(1)
if agent_host.receivedArgument("test"): num_reps = 1 else: num_reps = 30000 current_yaw = 0 best_yaw = 0 current_life = 0 for iRepeat in range(num_reps): mission_xml = getMissionXML(MOB_TYPE + " Apocalypse #" + str(iRepeat)) my_mission = MalmoPython.MissionSpec(mission_xml, validate) max_retries = 3 # Set up a recording my_mission_record = malmoutils.get_default_recording_object( agent_host, "Mission_" + str(iRepeat)) for retry in range(max_retries): try: # Attempt to start the mission: agent_host.startMission(my_mission, my_client_pool, my_mission_record, 0, "predatorExperiment") break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission", e) print("Is the game running?") exit(1) else: time.sleep(2) world_state = agent_host.getWorldState()
frameTest = [] wallclockTimes = [] distances = [] agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) print("WELCOME TO THE OVERCLOCK TEST") print("=============================") print("This will run the same simple mission with " + str(len(tickLengths)) + " different tick lengths.") print("(Each test should run faster than the previous one.)") for iRepeat in range(len(tickLengths)): msPerTick = tickLengths[iRepeat] my_mission = MalmoPython.MissionSpec(GetMissionXML(str(msPerTick)),validate) # Set up a recording my_mission_record = malmoutils.get_default_recording_object(agent_host, "Overclock_Test_" + str(msPerTick) + "ms_per_tick"); max_retries = 3 for retry in range(max_retries): try: agent_host.startMission( my_mission, my_mission_record ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: time.sleep(0.1)
num_reps = 300 #=======core part initialization==================================== #input size 5*5, you can change the size here memory = MemoryD(5) network_model, q_values_func = nn_model(input_shape=[5, 5]) agent = Agent(network_model, q_values_func, memory, 'train', 'ddqn') #set learning rate to be 0.00025 agent.do_compile(optimizer=Adam(lr=0.00025), loss_func=mean_huber_loss) agent.memoryD.clear() #=================================================================== for iRepeat in range(num_reps): my_mission_record = malmoutils.get_default_recording_object( agent_host, "./Mission_{}".format(iRepeat + 1)) #my_mission_record = MalmoPython.MissionRecordSpec('./' + "Mission_" + str(iRepeat) + ".tgz") #my_mission_record.recordRewards() #my_mission_record.recordMP4(24,400000) #my_mission_record.recordObservations() my_mission = MalmoPython.MissionSpec(GetMissionXML(mapblock, agent_host), validate) max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e)
if agent_host.receivedArgument("test"): print("Using test settings (overrides other command-line arguments).") NUM_REPEATS = 1 WAIT_TIME = 0.2 STOP = True PATH_LENGTH = 20 agent_host.setObservationsPolicy( MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS) agent_host.setRewardsPolicy(MalmoPython.RewardsPolicy.KEEP_ALL_REWARDS) for iRepeat in range(NUM_REPEATS): my_mission = MalmoPython.MissionSpec( GetMissionXML(iRepeat, malmoutils.get_video_xml(agent_host)), validate) # Set up a recording my_mission_record = malmoutils.get_default_recording_object( agent_host, "QuitFromReachingPosition_Test" + str(iRepeat)) max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: time.sleep(0.01)
datarate_onscreen=[] agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) print("WELCOME TO THE RENDER SPEED TEST") print("================================") print("This will run the same simple mission with " + str(len(sizes)) + " different frame sizes.") for iRepeat in range(len(sizes) * 2): prioritiseOffscreen = "true" if iRepeat % 2 else "false" width,height = sizes[old_div(iRepeat,2)] if iRepeat % 2: num_pixels.append(width*height) my_mission = MalmoPython.MissionSpec(GetMissionXML(str(width), str(height), prioritiseOffscreen), validate) # Set up a recording my_mission_record = malmoutils.get_default_recording_object(agent_host, "RenderSpeed_Test" + str(iRepeat)); max_retries = 3 for retry in range(max_retries): try: agent_host.startMission( my_mission, my_mission_record ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: time.sleep(0.1)
agent_host.setObservationsPolicy( MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) print("WELCOME TO THE OVERCLOCK TEST") print("=============================") print("This will run the same simple mission with " + str(len(tickLengths)) + " different tick lengths.") print("(Each test should run faster than the previous one.)") for iRepeat in range(len(tickLengths)): msPerTick = tickLengths[iRepeat] my_mission = MalmoPython.MissionSpec(GetMissionXML(str(msPerTick)), validate) # Set up a recording my_mission_record = malmoutils.get_default_recording_object( agent_host, "Overclock_Test_" + str(msPerTick) + "ms_per_tick") max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: time.sleep(0.1)
def run(argv=['']): if "MALMO_XSD_PATH" not in os.environ: print("Please set the MALMO_XSD_PATH environment variable.") return malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host, argv) my_mission = MalmoPython.MissionSpec() my_mission.timeLimitInSeconds( 10 ) my_mission.requestVideo( 320, 240 ) my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data") # client_info = MalmoPython.ClientInfo('localhost', 10000) client_info = MalmoPython.ClientInfo('127.0.0.1', 10000) pool = MalmoPython.ClientPool() pool.add(client_info) experiment_id = str(uuid.uuid1()) print("experiment id " + experiment_id) max_retries = 3 max_response_time = 60 # seconds for retry in range(max_retries): try: agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ') start_time = time.time() world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) if time.time() - start_time > max_response_time: print("Max delay exceeded for mission to begin") restart_minecraft(world_state, agent_host, client_info, "begin mission") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:",error.text) print() last_delta = time.time() # main loop: while world_state.is_mission_running: agent_host.sendCommand( "move 1" ) agent_host.sendCommand( "turn " + str(random.random()*2-1) ) time.sleep(0.5) world_state = agent_host.getWorldState() print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state) if (world_state.number_of_video_frames_since_last_state > 0 or world_state.number_of_observations_since_last_state > 0 or world_state.number_of_rewards_since_last_state > 0): last_delta = time.time() else: if time.time() - last_delta > max_response_time: print("Max delay exceeded for world state change") restart_minecraft(world_state, agent_host, client_info, "world state change") for reward in world_state.rewards: print("Summed reward:",reward.getValue()) for error in world_state.errors: print("Error:",error.text) for frame in world_state.video_frames: print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels') #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image print("Mission has stopped.")
# -- test each action set in turn -- max_retries = 3 action_sets = ['discrete_absolute', 'discrete_relative', 'teleport'] for action_set in action_sets: if action_set == 'discrete_absolute': my_mission.allowAllDiscreteMovementCommands() elif action_set == 'discrete_relative': my_mission.allowAllDiscreteMovementCommands() elif action_set == 'teleport': my_mission.allowAllAbsoluteMovementCommands() else: print('ERROR: Unsupported action set:', action_set) exit(1) my_mission_recording = malmoutils.get_default_recording_object( agent_host, action_set) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_recording) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) print("Waiting for the mission to start", end=' ') world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="")
# -- test each action set in turn -- max_retries = 3 action_sets = ['discrete_absolute','discrete_relative', 'teleport'] for action_set in action_sets: if action_set == 'discrete_absolute': my_mission.allowAllDiscreteMovementCommands() elif action_set == 'discrete_relative': my_mission.allowAllDiscreteMovementCommands() elif action_set == 'teleport': my_mission.allowAllAbsoluteMovementCommands() else: print('ERROR: Unsupported action set:',action_set) exit(1) my_mission_recording = malmoutils.get_default_recording_object(agent_host, action_set) for retry in range(max_retries): try: agent_host.startMission( my_mission, my_mission_recording ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2.5) print("Waiting for the mission to start", end=' ') world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="")
'''Tests malmoutils.''' import MalmoPython import malmoutils agentHost = MalmoPython.AgentHost() # See if we can parse our extended command line. malmoutils.parse_command_line(agentHost) # As we are not recording our video xml should be an empty string. assert malmoutils.get_video_xml(agentHost) == '' # Test that we can get a default recording spec. assert type(malmoutils.get_default_recording_object(agentHost, "test")) == MalmoPython.MissionRecordSpec # Default recordings directory is ''. assert malmoutils.get_recordings_directory(agentHost) == '' def clientInfos(cp): return [(c.ip_address, c.control_port, c.command_port) for c in cp.clients] # Test adding some client infos to a client pool. clientPool = MalmoPython.ClientPool() assert len(clientPool.clients) == 0 c1 = ("localhost", 10000, 0) client1 = MalmoPython.ClientInfo(*c1) clientPool.add(client1) assert clientInfos(clientPool) == [c1] c2 = ("127.0.0.1", 10001, 20001)
my_mission.drawBlock( x,45,z,"lava") my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available max_retries = 3 agentID = 0 expID = 'tabular_q_learning' num_repeats = 150 cumulative_rewards = [] for i in range(num_repeats): print("\nMap %d - Mission %d of %d:" % ( imap, i+1, num_repeats )) my_mission_record = malmoutils.get_default_recording_object(agent_host, "./save_%s-map%d-rep%d" % (expID, imap, i)) for retry in range(max_retries): try: agent_host.startMission( my_mission, my_clients, my_mission_record, agentID, "%s-%d" % (expID, i) ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2.5) print("Waiting for the mission to start", end=' ') world_state = agent_host.getWorldState() while not world_state.has_mission_begun:
errors = [e for w in states for e in w.errors] if len(errors) > 0: print("Errors waiting for mission start:") for e in errors: print(e.text) print("Bailing now.") exit(1) time.sleep(0.1) print(".", end=' ') print() if time.time() - start_time >= time_out: print("Timed out waiting for mission to begin. Bailing.") exit(1) print("Mission has started.") safeStartMission(agent_host1, my_mission, client_pool, malmoutils.get_default_recording_object(agent_host1, "agent_1_viewpoint_discrete"), 0, '' ) safeStartMission(agent_host2, my_mission, client_pool, malmoutils.get_default_recording_object(agent_host1, "agent_2_viewpoint_discrete"), 1, '' ) safeWaitForStart([agent_host1, agent_host2]) # perform a few actions reps = 3 time.sleep(1) for i in range(reps): agent_host1.sendCommand('attack 1') agent_host2.sendCommand('attack 1') time.sleep(1) agent_host1.sendCommand('use 1') agent_host2.sendCommand('use 1') time.sleep(1) # wait for the missions to end
NUM_REPEATS = 10 if agent_host.receivedArgument("test"): print("Using test settings (overrides other command-line arguments).") NUM_REPEATS = 1 WAIT_TIME = 0.2 STOP = True PATH_LENGTH = 20 agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS) agent_host.setRewardsPolicy(MalmoPython.RewardsPolicy.KEEP_ALL_REWARDS) for iRepeat in range(NUM_REPEATS): my_mission = MalmoPython.MissionSpec(GetMissionXML(iRepeat, malmoutils.get_video_xml(agent_host)), validate) # Set up a recording my_mission_record = malmoutils.get_default_recording_object(agent_host, "QuitFromReachingPosition_Test" + str(iRepeat)); max_retries = 3 for retry in range(max_retries): try: agent_host.startMission( my_mission, my_mission_record ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: time.sleep(0.01)
# ------------------------------------------------------------------------------------------------ '''Tests malmoutils.''' import MalmoPython import malmoutils agentHost = MalmoPython.AgentHost() # See if we can parse our extended command line. malmoutils.parse_command_line(agentHost) # As we are not recording our video xml should be an empty string. assert malmoutils.get_video_xml(agentHost) == '' # Test that we can get a default recording spec. assert type(malmoutils.get_default_recording_object( agentHost, "test")) == MalmoPython.MissionRecordSpec # Default recordings directory is ''. assert malmoutils.get_recordings_directory(agentHost) == '' def clientInfos(cp): return [(c.ip_address, c.control_port, c.command_port) for c in cp.clients] # Test adding some client infos to a client pool. clientPool = MalmoPython.ClientPool() assert len(clientPool.clients) == 0 c1 = ("localhost", 10000, 0) client1 = MalmoPython.ClientInfo(*c1) clientPool.add(client1)
<ArenaBounds> <min x="-50" y="40" z="-50"/> <max x="50" y="60" z="50"/> </ArenaBounds> <StartPos x="-3" y="50" z="0"/> <Seed>random</Seed> <UpdateSpeed>3</UpdateSpeed> <PermeableBlocks type="air obsidian"/> <BlockType type="beacon"/> </MovingTargetDecorator>''' else: return "" # Code for telling Malmo what to do: my_mission = MalmoPython.MissionSpec(get_mission_xml(), True) my_mission_record = malmoutils.get_default_recording_object(agent_host, "braitenberg_test") my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002)) # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host.startMission( my_mission, my_client_pool, my_mission_record, 0, "braitenberg") break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else:
# is using these ports): my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000)) my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001)) my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002)) my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003)) if agent_host.receivedArgument("test"): num_reps = 1 else: num_reps = 30000 for iRepeat in range(num_reps): my_mission = MalmoPython.MissionSpec(GetMissionXML("Let them eat fish/cookies #" + str(iRepeat + 1), malmoutils.get_video_xml(agent_host)),validate) # Set up a recording my_mission_record = malmoutils.get_default_recording_object(agent_host, "Mission_{}".format(iRepeat + 1)) max_retries = 3 for retry in range(max_retries): try: # Attempt to start the mission: agent_host.startMission( my_mission, my_client_pool, my_mission_record, 0, "itemDiscardTestExperiment" ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission",e) print("Is the game running?") exit(1) else: time.sleep(2) world_state = agent_host.getWorldState()
<ArenaBounds> <min x="-50" y="40" z="-50"/> <max x="50" y="60" z="50"/> </ArenaBounds> <StartPos x="-3" y="50" z="0"/> <Seed>random</Seed> <UpdateSpeed>3</UpdateSpeed> <PermeableBlocks type="air obsidian"/> <BlockType type="beacon"/> </MovingTargetDecorator>''' else: return "" # Code for telling Malmo what to do: my_mission = MalmoPython.MissionSpec(get_mission_xml(), True) my_mission_record = malmoutils.get_default_recording_object(agent_host, "braitenberg_test") # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host.startMission( my_mission, my_mission_record ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) # Loop until mission starts:
for e in errors: print(e.text) print("Bailing now.") exit(1) time.sleep(0.1) print(".", end=' ') print() if time.time() - start_time >= time_out: print("Timed out waiting for mission to begin. Bailing.") exit(1) print("Mission has started.") safeStartMission( agent_host1, my_mission, client_pool, malmoutils.get_default_recording_object(agent_host1, "agent_1_viewpoint_discrete"), 0, '') safeStartMission( agent_host2, my_mission, client_pool, malmoutils.get_default_recording_object(agent_host1, "agent_2_viewpoint_discrete"), 1, '') safeWaitForStart([agent_host1, agent_host2]) # perform a few actions time.sleep(1) agent_host1.sendCommand('attack 1') agent_host2.sendCommand('attack 1') time.sleep(2) agent_host1.sendCommand('attack 0') agent_host2.sendCommand('attack 0')
import malmoutils malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host) print(agent_host) my_mission = MalmoPython.MissionSpec() my_mission.timeLimitInSeconds( 10 ) my_mission.requestVideo( 320, 240 ) my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) print(my_mission) my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data") print(my_mission_record) max_retries = 3 for retry in range(max_retries): try: agent_host.startMission( my_mission, my_mission_record ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ')
def deep_q_learning(sess, agent_host, q_estimator, target_estimator, state_processor, num_episodes, experiment_dir, replay_memory_size=500000, replay_memory_init_size=50000, update_target_estimator_every=10000, discount_factor=0.99, epsilon_start=1.0, epsilon_end=0.1, epsilon_decay_steps=50000, batch_size=32, record_video_every=100): """ Q-Learning algorithm for off-policy TD control using Function Approximation. Finds the optimal greedy policy while following an epsilon-greedy policy. Args: sess: Tensorflow Session object env: OpenAI environment q_estimator: Estimator object used for the q values target_estimator: Estimator object used for the targets state_processor: A StateProcessor object num_episodes: Number of episodes to run for experiment_dir: Directory to save Tensorflow summaries in replay_memory_size: Size of the replay memory replay_memory_init_size: Number of random experiences to sampel when initializing the reply memory. update_target_estimator_every: Copy parameters from the Q estimator to the target estimator every N steps discount_factor: Gamma discount factor epsilon_start: Chance to sample a random action when taking an action. Epsilon is decayed over time and this is the start value epsilon_end: The final minimum value of epsilon after decaying is done epsilon_decay_steps: Number of steps to decay epsilon over batch_size: Size of batches to sample from the replay memory record_video_every: Record a video every N episodes Returns: An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards. """ mission_file = agent_host.getStringArgument('mission_file') with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() my_mission.setViewpoint(2) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available max_retries = 3 agentID = 0 expID = 'Deep_q_learning memory' Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"]) # The replay memory replay_memory = [] # Keeps track of useful statistics stats = plotting.EpisodeStats( episode_lengths=np.zeros(num_episodes), episode_rewards=np.zeros(num_episodes)) # Create directories for checkpoints and summaries checkpoint_dir = os.path.join(experiment_dir, "checkpoints") checkpoint_path = os.path.join(checkpoint_dir, "model") monitor_path = os.path.join(experiment_dir, "monitor") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if not os.path.exists(monitor_path): os.makedirs(monitor_path) saver = tf.train.Saver() # Load a previous checkpoint if we find one latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) total_t = sess.run(tf.contrib.framework.get_global_step()) # The epsilon decay schedule epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) # The policy we're following policy = make_epsilon_greedy_policy( q_estimator, len(actionSet)) my_mission_record = malmoutils.get_default_recording_object(agent_host, "./save_%s-rep" % (expID)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" %(expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) print("Sleeping") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() # world_state = agent_host.peekWorldState() while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): print("Sleeping....") world_state = agent_host.peekWorldState() world_state = agent_host.getWorldState() # Populate the replay memory with initial experience print("Populating replay memory...") while world_state.number_of_observations_since_last_state <= 0: # print("Sleeping") time.sleep(0.1) world_state = agent_host.peekWorldState() state = gridProcess(world_state) #MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) for i in range(replay_memory_init_size): print("%s th replay memory" %i) action_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps-1)]) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action # print("Sending command: ", actionSet[action]) agent_host.sendCommand(actionSet[action]) #checking if the mission is done world_state = agent_host.peekWorldState() #Getting the reward from taking a step if world_state.number_of_rewards_since_last_state > 0: reward = world_state.rewards[-1].getValue() print("Just received the reward: %s on action: %s "%(reward, actionSet[action])) else: print("No reward") reward = 0 #getting the next state while world_state.number_of_observations_since_last_state <=0 and world_state.is_mission_running: print("Sleeping") time.sleep(0.1) world_state = agent_host.peekWorldState() if world_state.is_mission_running: next_state = gridProcess(world_state) next_state = state_processor.process(sess, next_state) next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2) done = not world_state.is_mission_running replay_memory.append(Transition(state, action, reward, next_state, done)) state = next_state else: for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() world_state = agent_host.peekWorldState() while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): world_state = agent_host.peekWorldState() world_state = agent_host.getWorldState() if not world_state.is_mission_running: print("Breaking") break state = gridProcess(world_state) # Malmo GetworldState? / env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) print("Finished populating memory") # Record videos # Use the gym env Monitor wrapper # env = Monitor(env, # directory=monitor_path, # resume=True, # video_callable=lambda count: count % record_video_every ==0) # NEED TO RECORD THE VIDEO AND SAVE TO THE SPECIFIED DIRECTORY for i_episode in range(num_episodes): print("%s-th episode"%i_episode) if i_episode != 0: mission_file = agent_host.getStringArgument('mission_file') with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() # my_mission.requestVideo(320, 240) my_mission.forceWorldReset() my_mission.setViewpoint(2) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available max_retries = 3 agentID = 0 expID = 'Deep_q_learning ' my_mission_record = malmoutils.get_default_recording_object(agent_host, "./save_%s-rep%d" % (expID, i)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s-%d" % (expID, i)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() print("Waiting for the mission to start", end=' ') while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) # Save the current checkpoint saver.save(tf.get_default_session(), checkpoint_path) # world_state = agent_host.getWorldState() # Reset the environment # world_state = agent_host.peekWorldState() while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): # print("Sleeping!!!") world_state = agent_host.peekWorldState() world_state = agent_host.getWorldState() state = gridProcess(world_state) #MalmoGetWorldState? state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) loss = None # One step in the environment for t in itertools.count(): # Epsilon for this time step epsilon = epsilons[min(total_t, epsilon_decay_steps-1)] # Add epsilon to Tensorboard episode_summary = tf.Summary() episode_summary.value.add(simple_value=epsilon, tag="epsilon") q_estimator.summary_writer.add_summary(episode_summary, total_t) # Maybe update the target estimator if total_t % update_target_estimator_every == 0: copy_model_parameters(sess, q_estimator, target_estimator) print("\nCopied model parameters to target network.") # Print out which step we're on, useful for debugging. print("\rStep {} ({}) @ Episode {}/{}, loss: {}".format( t, total_t, i_episode + 1, num_episodes, loss), end="") sys.stdout.flush() # Take a step action_probs = policy(sess, state, epsilon) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo AgentHost send command? # print("Sending command: ", actionSet[action]) agent_host.sendCommand(actionSet[action]) world_state = agent_host.peekWorldState() if world_state.number_of_rewards_since_last_state > 0: reward = world_state.rewards[-1].getValue() print("Just received the reward: %s on action: %s " % (reward, actionSet[action])) else: print("No reward") reward = 0 while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): # print("Sleeping!!!") world_state = agent_host.peekWorldState() # world_state = agent_host.getWorldState() # if not world_state.is_mission_running: # print("Breaking") # break done = not world_state.is_mission_running print(" IS MISSION FINISHED? ", done) # if done: # print("Breaking before updating last reward") # break next_state = gridProcess(world_state) next_state = state_processor.process(sess, next_state) next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2) # If our replay memory is full, pop the first element if len(replay_memory) == replay_memory_size: replay_memory.pop(0) # Save transition to replay memory replay_memory.append(Transition(state, action, reward, next_state, done)) # Update statistics stats.episode_rewards[i_episode] += reward stats.episode_lengths[i_episode] = t # Sample a minibatch from the replay memory samples = random.sample(replay_memory, batch_size) states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples)) # Calculate q values and targets (Double DQN) q_values_next = q_estimator.predict(sess, next_states_batch) best_actions = np.argmax(q_values_next, axis=1) q_values_next_target = target_estimator.predict(sess, next_states_batch) targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * \ discount_factor * q_values_next_target[np.arange(batch_size), best_actions] # Perform gradient descent update states_batch = np.array(states_batch) loss = q_estimator.update(sess, states_batch, action_batch, targets_batch) if done: print("End of Episode") break state = next_state total_t += 1 # Add summaries to tensorboard episode_summary = tf.Summary() episode_summary.value.add(simple_value=stats.episode_rewards[i_episode], node_name="episode_reward", tag="episode_reward") episode_summary.value.add(simple_value=stats.episode_lengths[i_episode], node_name="episode_length", tag="episode_length") q_estimator.summary_writer.add_summary(episode_summary, total_t) q_estimator.summary_writer.flush() yield total_t, plotting.EpisodeStats( episode_lengths=stats.episode_lengths[:i_episode+1], episode_rewards=stats.episode_rewards[:i_episode+1]) # env.monitor.close() return stats
<AgentHandlers> <ObservationFromFullStats/> <ContinuousMovementCommands turnSpeedDegs="180"/> <ChatCommands /> <MissionQuitCommands quitDescription="give_up"/> <RewardForMissionEnd> <Reward description="give_up" reward="-1000"/> </RewardForMissionEnd>''' + malmoutils.get_video_xml(agent_host) + ''' </AgentHandlers> </AgentSection> </Mission>''' # Create default Malmo objects: my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission_record = malmoutils.get_default_recording_object(agent_host, "Mission_1") # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host.startMission( my_mission, my_mission_record ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) # Loop until mission starts:
<Item reward="-1" type="stained_glass"/> </RewardForCollectingItem> <RewardForDiscardingItem> <Item reward="1" type="stained_glass"/> </RewardForDiscardingItem>''' + malmoutils.get_video_xml( agent_host) + ''' </AgentHandlers> </AgentSection> </Mission>''' my_mission = MalmoPython.MissionSpec(missionXML, True) num_missions = 10 if agent_host.receivedArgument("test") else 30000 for mission_no in range(num_missions): merges_allowed = mission_no % 2 my_mission_record = malmoutils.get_default_recording_object( agent_host, "Mission_{}".format(mission_no + 1)) max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: print(e) if retry == max_retries - 1: print("Error starting mission", e) print("Is the game running?") exit(1) else: time.sleep(2) world_state = agent_host.peekWorldState()
def run(argv=['']): if "MALMO_XSD_PATH" not in os.environ: print("Please set the MALMO_XSD_PATH environment variable.") return #forceReset="true" missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary>Hello world!</Summary> </About> <ServerSection> <ServerHandlers> <DefaultWorldGenerator forceReset="true" /> <ServerQuitFromTimeUp timeLimitMs="30000"/> <ServerQuitWhenAnyAgentFinishes/> </ServerHandlers> </ServerSection> <AgentSection mode="Survival"> <Name>MalmoTutorialBot</Name> <AgentStart> <Inventory> <InventoryItem slot="8" type="diamond_pickaxe"/> </Inventory> </AgentStart> <AgentHandlers> <ObservationFromFullStats/> <ObservationFromGrid> <Grid name="all_the_blocks" > <min x="-1" y="-1" z="-1"/> <max x="1" y="2" z="1"/> </Grid> </ObservationFromGrid> <ContinuousMovementCommands turnSpeedDegs="180"/> </AgentHandlers> </AgentSection> </Mission>''' malmoutils.fix_print() #agent_host = MalmoPython.AgentHost() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host, argv) my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission.timeLimitInSeconds(300) my_mission.requestVideo(640, 480) #my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) my_mission_record = malmoutils.get_default_recording_object( agent_host, "saved_data") # client_info = MalmoPython.ClientInfo('localhost', 10000) client_info = MalmoPython.ClientInfo('127.0.0.1', 10000) pool = MalmoPython.ClientPool() pool.add(client_info) experiment_id = str(uuid.uuid1()) print("experiment id " + experiment_id) max_retries = 3 max_response_time = 60 # seconds for retry in range(max_retries): try: agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ') start_time = time.time() world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) if time.time() - start_time > max_response_time: print("Max delay exceeded for mission to begin") restart_minecraft(world_state, agent_host, client_info, "begin mission") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() last_delta = time.time() # main loop: #agent_host.sendCommand( "jump 1") TURN = 0 TURN2 = 0 JUMP = 0 while world_state.is_mission_running: print("New Iteration") if JUMP > 0: JUMP = JUMP - 1 if JUMP == 0: agent_host.sendCommand("jump 0") JUMP = JUMP - 1 agent_host.sendCommand("move 1") if math.sin(TURN) / 3 >= 0: agent_host.sendCommand("turn 0.15") else: agent_host.sendCommand("turn -0.2") print(TURN, " ", math.sin(TURN)) TURN = TURN + 0.3 #agent_host.sendCommand( "jump 1" ) time.sleep(0.5) world_state = agent_host.getWorldState() y = json.loads(world_state.observations[-1].text) #print(y["all_the_blocks"]) dir = "" if y["Yaw"] + 180 < 90: dir = "S" print("Facing South") elif y["Yaw"] < 180: dir = "W" print("Facing West") elif y["Yaw"] < 270: dir = "N" print("Facing North") else: dir = "E" print("Facing East") blocks = [[], [], [], []] i = 0 for x in y["all_the_blocks"]: blocks[math.floor(i / 9)].append(x) i = i + 1 if dir == "S": willjump = False for j in range(0, 3): if blocks[1][j] != "air": willjump = True print(j, blocks[1][j], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "W": willjump = False for j in range(0, 3): if blocks[1][j * 3] != "air": willjump = True print(j * 3, blocks[1][j * 3], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "E": willjump = False for j in range(1, 4): if blocks[1][j * 3 - 1] != "air": willjump = True print(j * 3 - 1, blocks[1][j * 3 - 1], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "N": willjump = False for j in range(0, 3): if blocks[1][j] != "air": willjump = True print(j, blocks[1][j + 6], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") if (blocks[1][2] != "air" and blocks[2][2] != "air" or blocks[1][4] != "air" and blocks[2][4] != "air" or blocks[1][2] != "air" and blocks[2][2] != "air" or blocks[1][4] != "air" and blocks[2][4] != "air"): TURN2 = 2 if TURN2 >= 0: agent_host.sendCommand("turn 1") TURN2 = TURN2 - 1 '''if blocks[1][5] != "air" or blocks[1][5] != "grass" or blocks[1][5] != "tallgrass" : JUMP = 2 agent_host.sendCommand( "jump 1" ) print() print(blocks[1][5])''' #print(len(blocks)) #print(blocks) if (world_state.number_of_video_frames_since_last_state > 0 or world_state.number_of_observations_since_last_state > 0 or world_state.number_of_rewards_since_last_state > 0): last_delta = time.time() else: if time.time() - last_delta > max_response_time: print("Max delay exceeded for world state change") restart_minecraft(world_state, agent_host, client_info, "world state change") for reward in world_state.rewards: print("Summed reward:", reward.getValue()) for error in world_state.errors: print("Error:", error.text) for frame in world_state.video_frames: print() #print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels') #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image print("Mission has stopped.")
def main(): #Hardcode number of agents to play song num_agents = 4 #Obtain song csv and get solutions #freq_list = mt.create_note_list("Twinkle_Twinkle_Little_Star.csv",120,7000,-.08) #1 Agent #freq_list = mt.create_note_list("Chopsticks.csv",120,4000,-.15,.03) #2 Agents freq_list = mt.create_note_list("Bad_Apple.csv", 120, 3000, -.08, .03) #2 Agents #freq_list = mt.create_note_list("Grenade_120BPM.csv",120,1500,-.08,.03) #4 Agents freq_list = mt.number_converter(freq_list) solutions = cs.get_solutions(freq_list, num_agents) print(solutions) #print(solutions) #Get Mission. Needed for teleport positions. missionXML = getMissionXML(num_agents) #Create musician for each agent and pass teleport positions. musicians = [] for i in range(num_agents): agent_positions = generateAgentTeleportPositions(note_positions, i) musicians.append(Musician(agent_positions)) ''' MALMO ''' print('Starting...', flush=True) #Create agents. agent_hosts = [] for i in range(num_agents): agent_hosts.append(MalmoPython.AgentHost()) malmoutils.parse_command_line(agent_hosts[0]) #Get mission and allow commands for teleport. my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission.allowAllChatCommands() #Add client for each agent needed. my_client_pool = MalmoPython.ClientPool() for i in range(num_agents): my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000 + i)) MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF) #Start mission for each agent for i in range(num_agents): startMission( agent_hosts[i], my_mission, my_client_pool, malmoutils.get_default_recording_object( agent_hosts[0], "agent_" + str(i + 1) + "_viewpoint_discrete"), i, '') #Wait for all missions to begin. waitForStart(agent_hosts) #Pause for simulation to begin. time.sleep(1) ''' SIMULATION BEGINS HERE ''' for i in range(len(solutions[0])): #teleport each agent to the corresponding note. for j in range(len(musicians)): musicians[j].teleport_to_noteblock(agent_hosts[j], solutions[j][i]) # play each note. for k in range(len(musicians)): if musicians[k].can_play: agent_hosts[k].sendCommand("attack 1") time.sleep(0.001) for k in range(len(musicians)): if musicians[k].can_play: agent_hosts[k].sendCommand("attack 0") musicians[k].can_play = False #modifies the timing between each note hit. time.sleep(0.2)
def deep_q_learning_run(sess, agent_host, q_estimator, state_processor, experiment_dir, epsilon_start=1.0, epsilon_end=0.1, epsilon_decay_steps=8000): """ Q-Learning algorithm for off-policy TD control using Function Approximation. Finds the optimal greedy policy while following an epsilon-greedy policy. Args: sess: Tensorflow Session object env: OpenAI environment q_estimator: Estimator object used for the q values target_estimator: Estimator object used for the targets state_processor: A StateProcessor object num_episodes: Number of episodes to run for experiment_dir: Directory to save Tensorflow summaries in replay_memory_size: Size of the replay memory replay_memory_init_size: Number of random experiences to sampel when initializing the reply memory. update_target_estimator_every: Copy parameters from the Q estimator to the target estimator every N steps discount_factor: Gamma discount factor epsilon_start: Chance to sample a random action when taking an action. Epsilon is decayed over time and this is the start value epsilon_end: The final minimum value of epsilon after decaying is done epsilon_decay_steps: Number of steps to decay epsilon over batch_size: Size of batches to sample from the replay memory record_video_every: Record a video every N episodes Returns: An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards. """ mission_file = agent_host.getStringArgument('mission_file') with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() my_mission.setViewpoint(2) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available max_retries = 3 agentID = 0 expID = 'Deep_q_learning memory' # Create directories for checkpoints and summaries checkpoint_dir = os.path.join(experiment_dir, "checkpoints") print("Checkpoint dir is:", checkpoint_dir) saver = tf.train.Saver() # Load a previous checkpoint if we find one # latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) # print("~~~~~~~~~~~~~~", latest_checkpoint) # exit(0) latest_checkpoint = os.path.join(checkpoint_dir, "model") if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) total_t = sess.run(tf.contrib.framework.get_global_step()) # The epsilon decay schedule epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) # The policy we're following policy = make_epsilon_greedy_policy(q_estimator, len(actionSet)) my_mission_record = malmoutils.get_default_recording_object( agent_host, "save_%s-rep" % (expID)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) print("Sleeping") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() agent_host.sendCommand("look -1") agent_host.sendCommand("look -1") while world_state.is_mission_running and all( e.text == '{}' for e in world_state.observations): print("Sleeping....") world_state = agent_host.peekWorldState() # Populate the replay memory with initial experience while world_state.number_of_observations_since_last_state <= 0 and world_state.is_mission_running: # print("Sleeping") time.sleep(0.1) world_state = agent_host.peekWorldState() state = gridProcess( world_state ) # MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) stepNum = 0 while world_state.is_mission_running: action = randint(0, 3) print("actions:", action) # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action # print("Sending command: ", actionSet[action]) print("Step %s" % stepNum) stepNum += 1 agent_host.sendCommand(actionSet[action]) world_state = agent_host.peekWorldState() num_frames_seen = world_state.number_of_video_frames_since_last_state while world_state.is_mission_running and world_state.number_of_video_frames_since_last_state == num_frames_seen: world_state = agent_host.peekWorldState() if world_state.is_mission_running: # Getting the reward from taking a step while world_state.number_of_observations_since_last_state <= 0: time.sleep(0.1) world_state = agent_host.peekWorldState() # world_state = agent_host.getWorldState() next_state = gridProcess(world_state) next_state = state_processor.process(sess, next_state) next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) state = next_state # time.sleep(1) return None
def run(argv=['']): if "MALMO_XSD_PATH" not in os.environ: print("Please set the MALMO_XSD_PATH environment variable.") return malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host, argv) my_mission = MalmoPython.MissionSpec() my_mission.timeLimitInSeconds( 10 ) my_mission.requestVideo( 320, 240 ) my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data") # client_info = MalmoPython.ClientInfo('localhost', 10000) client_info = MalmoPython.ClientInfo('127.0.0.1', 10000) pool = MalmoPython.ClientPool() pool.add(client_info) experiment_id = str(uuid.uuid1()) print("experiment id " + experiment_id) max_retries = 3 max_response_time = 60 # seconds for retry in range(max_retries): try: agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ') start_time = time.time() world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) if time.time() - start_time > max_response_time: print("Max delay exceeded for mission to begin") restart_minecraft(world_state, agent_host, client_info, "begin mission") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:",error.text) print() last_delta = time.time() # main loop: while world_state.is_mission_running: agent_host.sendCommand( "move 1" ) agent_host.sendCommand( "turn " + str(random.random()*2-1) ) time.sleep(0.5) world_state = agent_host.getWorldState() print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state) if (world_state.number_of_video_frames_since_last_state > 0 or world_state.number_of_observations_since_last_state > 0 or world_state.number_of_rewards_since_last_state > 0): last_delta = time.time() else: if time.time() - last_delta > max_response_time: print("Max delay exceeded for world state change") restart_minecraft(world_state, agent_host, client_info, "world state change") for reward in world_state.rewards: print("Summed reward:",reward.getValue()) for error in world_state.errors: print("Error:",error.text) for frame in world_state.video_frames: print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels') #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image print("Mission has stopped.")
if agent_host.receivedArgument("test"): num_reps = 1 else: num_reps = 30000 current_yaw = 0 best_yaw = 0 current_life = 0 for iRepeat in range(num_reps): mission_xml = getMissionXML(MOB_TYPE + " Apocalypse #" + str(iRepeat)) my_mission = MalmoPython.MissionSpec(mission_xml,validate) max_retries = 3 # Set up a recording my_mission_record = malmoutils.get_default_recording_object(agent_host, "Mission_" + str(iRepeat)) for retry in range(max_retries): try: # Attempt to start the mission: agent_host.startMission( my_mission, my_client_pool, my_mission_record, 0, "predatorExperiment" ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission",e) print("Is the game running?") exit(1) else: time.sleep(2) world_state = agent_host.getWorldState() while not world_state.has_mission_begun:
<ObservationFromFullStats/> <ContinuousMovementCommands turnSpeedDegs="180"/> <ChatCommands /> <MissionQuitCommands quitDescription="give_up"/> <RewardForMissionEnd> <Reward description="give_up" reward="-1000"/> </RewardForMissionEnd>''' + malmoutils.get_video_xml( agent_host) + ''' </AgentHandlers> </AgentSection> </Mission>''' # Create default Malmo objects: my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission_record = malmoutils.get_default_recording_object( agent_host, "Mission_1") # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) # Loop until mission starts:
num_steps_since_last_chat = 0 cumulative_rewards = [] for iRepeat in range(num_episodes): xorg = (iRepeat % 64) * 32 zorg = ((old_div(iRepeat, 64)) % 64) * 32 yorg = 200 + ((old_div(iRepeat, (64 * 64))) % 64) * 8 print("Mission " + str(iRepeat) + " --- starting at " + str(xorg) + ", " + str(yorg) + ", " + str(zorg)) validate = True my_mission = MalmoPython.MissionSpec( GetMissionXML(iRepeat, xorg, yorg, zorg), validate) my_mission_record = malmoutils.get_default_recording_object( agent_host, "episode_{}_role_{}".format(iRepeat + 1, role)) unique_experiment_id = genExperimentID( iRepeat ) # used to disambiguate multiple running copies of the same mission max_retries = 3 retry = 0 while True: try: print("Calling startMission...") agent_host.startMission(my_mission, client_pool, my_mission_record, role, unique_experiment_id) #agent_host.startMission( my_mission, client_pool ) break except MalmoPython.MissionException as e: errorCode = e.details.errorCode
num_repeats = itersNum cumulative_rewards = [] rolling_avg = [] # Trying to load existing q-table. agent.loadModel(q_tableFile) # Activate evaluation mode: if EVALUATE: agent.evaluate() for i in range(num_repeats): print("\nMap %d - Mission %d of %d:" % (imap, i + 1, num_repeats)) my_mission_record = malmoutils.get_default_recording_object( agent_host, "./save_%s-map%d-rep%d" % (expID, imap, i)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s-%d" % (expID, i)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) print("Waiting for the mission to start", end=' ')
chat_frequency = 30 # if we send chat messages too frequently the agent will be disconnected for spamming num_steps_since_last_chat = 0 for iRepeat in range(num_episodes): xorg = (iRepeat % 64) * 32 zorg = ((old_div(iRepeat, 64)) % 64) * 32 yorg = 200 + ((old_div(iRepeat, (64*64))) % 64) * 8 print("Mission " + str(iRepeat) + " --- starting at " + str(xorg) + ", " + str(yorg) + ", " + str(zorg)) validate = True my_mission = MalmoPython.MissionSpec(GetMissionXML(iRepeat, xorg, yorg, zorg), validate) my_mission_record = malmoutils.get_default_recording_object(agent_host, "episode_{}_role_{}".format(iRepeat + 1, role)) unique_experiment_id = genExperimentID(iRepeat) # used to disambiguate multiple running copies of the same mission max_retries = 3 retry = 0 while True: try: print("Calling startMission...") agent_host.startMission( my_mission, client_pool, my_mission_record, role, unique_experiment_id ) break except MalmoPython.MissionException as e: errorCode = e.details.errorCode if errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_WARMING_UP: print("Server not online yet - will keep waiting as long as needed.") time.sleep(1) elif errorCode in [MalmoPython.MissionErrorCode.MISSION_INSUFFICIENT_CLIENTS_AVAILABLE,