def run(self): """Runs the game with the registered agents Raises: :class:`jason_malmo.exceptions.NoAgentsException`: There are not registered agents in the game.\n Register an agent before running the game:: game.register('/path/to/file.asl') game.run() """ self._client_pool = MalmoPython.ClientPool() if not len(self._agents): raise NoAgentsException for port in range(10000, 10000 + len(self._agents) + 1): self._client_pool.add(MalmoPython.ClientInfo('127.0.0.1', port)) self._my_mission = MalmoPython.MissionSpec(self._get_mission_xml(), True) for (index, agent) in enumerate(self._agents): malmoutils.parse_command_line(agent.malmo_agent) self._safe_start_mission( agent.malmo_agent, self._my_mission, self._client_pool, malmoutils.get_default_recording_object( agent.malmo_agent, "saved_data"), index, '') self._safe_wait_for_start( [agent.malmo_agent for agent in self._agents]) threads = [] for agent in self._agents: thr = threading.Thread(target=self._jason_env.run_agent, args=(agent, ), kwargs={}) thr.start() threads.append(thr) # TODO while mission is running while True: for agent in self._agents: for (belief, value) in agent.beliefs.items(): if belief[0] == 'tasks': tasks = [] for task in list(value)[0].args[0]: tasks.append(task) self.tasks.handle(agent, tasks) time.sleep(0.05)
def StartServer(self, names, ip='127.0.0.1'): """ Initiates a server given a mission XML and a list of names of the agents """ for i, name in enumerate(names): n = 10000 + i self.clientPool.add(MalmoPython.ClientInfo(ip, n)) self.agents.append(MultiAgent(name, self.missionXML, i)) malmoutils.parse_command_line(self.agents[0].host) for a in self.agents: a.StartMission(self.clientPool) self.safeWaitForStart(self.agents)
import random import sys import time import json import random import errno import math import malmoutils import numpy as np import agentMC malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host) recordingsDirectory = malmoutils.get_recordings_directory(agent_host) video_requirements = '<VideoProducer><Width>860</Width><Height>480</Height></VideoProducer>' if agent_host.receivedArgument( "record_video") else '' # Task parameters: MAX_DISTANCE = 40 MAX_ZOMBIES = 16 ####### SPEED OF GAME ####### SPEED = 8 ARENA_WIDTH = MAX_DISTANCE ARENA_BREADTH = MAX_DISTANCE def getCorner(index, top, left, expand=0, y=0): ''' Return part of the XML string that defines the requested corner'''
def run(argv=['']): if "MALMO_XSD_PATH" not in os.environ: print("Please set the MALMO_XSD_PATH environment variable.") return malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host, argv) my_mission = MalmoPython.MissionSpec() my_mission.timeLimitInSeconds( 10 ) my_mission.requestVideo( 320, 240 ) my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data") # client_info = MalmoPython.ClientInfo('localhost', 10000) client_info = MalmoPython.ClientInfo('127.0.0.1', 10000) pool = MalmoPython.ClientPool() pool.add(client_info) experiment_id = str(uuid.uuid1()) print("experiment id " + experiment_id) max_retries = 3 max_response_time = 60 # seconds for retry in range(max_retries): try: agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ') start_time = time.time() world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) if time.time() - start_time > max_response_time: print("Max delay exceeded for mission to begin") restart_minecraft(world_state, agent_host, client_info, "begin mission") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:",error.text) print() last_delta = time.time() # main loop: while world_state.is_mission_running: agent_host.sendCommand( "move 1" ) agent_host.sendCommand( "turn " + str(random.random()*2-1) ) time.sleep(0.5) world_state = agent_host.getWorldState() print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state) if (world_state.number_of_video_frames_since_last_state > 0 or world_state.number_of_observations_since_last_state > 0 or world_state.number_of_rewards_since_last_state > 0): last_delta = time.time() else: if time.time() - last_delta > max_response_time: print("Max delay exceeded for world state change") restart_minecraft(world_state, agent_host, client_info, "world state change") for reward in world_state.rewards: print("Summed reward:",reward.getValue()) for error in world_state.errors: print("Error:",error.text) for frame in world_state.video_frames: print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels') #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image print("Mission has stopped.")
def main(): #Hardcode number of agents to play song num_agents = 4 #Obtain song csv and get solutions #freq_list = mt.create_note_list("Twinkle_Twinkle_Little_Star.csv",120,7000,-.08) #1 Agent #freq_list = mt.create_note_list("Chopsticks.csv",120,4000,-.15,.03) #2 Agents freq_list = mt.create_note_list("Bad_Apple.csv", 120, 3000, -.08, .03) #2 Agents #freq_list = mt.create_note_list("Grenade_120BPM.csv",120,1500,-.08,.03) #4 Agents freq_list = mt.number_converter(freq_list) solutions = cs.get_solutions(freq_list, num_agents) print(solutions) #print(solutions) #Get Mission. Needed for teleport positions. missionXML = getMissionXML(num_agents) #Create musician for each agent and pass teleport positions. musicians = [] for i in range(num_agents): agent_positions = generateAgentTeleportPositions(note_positions, i) musicians.append(Musician(agent_positions)) ''' MALMO ''' print('Starting...', flush=True) #Create agents. agent_hosts = [] for i in range(num_agents): agent_hosts.append(MalmoPython.AgentHost()) malmoutils.parse_command_line(agent_hosts[0]) #Get mission and allow commands for teleport. my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission.allowAllChatCommands() #Add client for each agent needed. my_client_pool = MalmoPython.ClientPool() for i in range(num_agents): my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000 + i)) MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF) #Start mission for each agent for i in range(num_agents): startMission( agent_hosts[i], my_mission, my_client_pool, malmoutils.get_default_recording_object( agent_hosts[0], "agent_" + str(i + 1) + "_viewpoint_discrete"), i, '') #Wait for all missions to begin. waitForStart(agent_hosts) #Pause for simulation to begin. time.sleep(1) ''' SIMULATION BEGINS HERE ''' for i in range(len(solutions[0])): #teleport each agent to the corresponding note. for j in range(len(musicians)): musicians[j].teleport_to_noteblock(agent_hosts[j], solutions[j][i]) # play each note. for k in range(len(musicians)): if musicians[k].can_play: agent_hosts[k].sendCommand("attack 1") time.sleep(0.001) for k in range(len(musicians)): if musicians[k].can_play: agent_hosts[k].sendCommand("attack 0") musicians[k].can_play = False #modifies the timing between each note hit. time.sleep(0.2)
import MalmoPython import os import random import sys import time import json import copy import errno import xml.etree.ElementTree from collections import deque import malmoutils malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host) recordingsDirectory = malmoutils.get_recordings_directory(agent_host) # Set up some pallettes: colourful=["stained_glass", "diamond_block", "lapis_block", "gold_block", "redstone_block", "obsidian"] fiery=["stained_glass WHITE", "stained_glass PINK", "stained_glass ORANGE", "stained_glass RED", "wool BLACK", "glowstone"] oresome=["gold_ore", "lapis_ore", "iron_ore", "emerald_ore", "redstone_ore", "quartz_ore"] frilly=["skull", "stained_glass WHITE", "wool PINK", "wool WHITE", "stained_hardened_clay PINK", "stained_hardened_clay WHITE"] icepalace=["ice", "stained_glass", "stained_glass", "stained_glass", "stained_glass", "snow"] volatile=["tnt", "stained_glass", "stained_glass", "redstone_block", "stained_glass", "stained_glass"] oak=["planks", "planks", "planks", "planks", "lapis_block", "lapis_block"] sponge=["sponge", "glass", "sponge", "glass", "sponge", "glass"] palletes = [colourful, fiery, oresome, frilly, icepalace, volatile, oak, sponge] # dimensions of the test structure: SIZE_X = 21
print("Bailing now.") exit(1) time.sleep(0.1) print(".", end=' ') if time.time() - start_time >= time_out: print("Timed out while waiting for mission to start - bailing.") exit(1) print() print("Mission has started.") # -- set up two agent hosts -- agent_host_simeon = MalmoPython.AgentHost() agent_host_fred = MalmoPython.AgentHost() # Use simeon's agenthost to hold the command-line options: malmoutils.parse_command_line(agent_host_simeon) # -- set up the mission -- xml = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary/> </About> <ModSettings> <MsPerTick>10</MsPerTick> <!-- Because it's pretty boring watching Fred build steps for five minutes... --> </ModSettings> <ServerSection> <ServerInitialConditions> <Time> <StartTime>0</StartTime> </Time>
def main(agent_host): device = torch.device("cpu") if VISION_ENABLED: eyes = Eyes() if GET_VISION_DATA: clear_images() malmoutils.fix_print() malmoutils.parse_command_line(agent_host) recordingsDirectory = malmoutils.get_recordings_directory(agent_host) q_network = QNetwork((2, Hyperparameters.OBS_SIZE, Hyperparameters.OBS_SIZE), len(Hyperparameters.ACTION_DICT)) target_network = QNetwork((2, Hyperparameters.OBS_SIZE, Hyperparameters.OBS_SIZE), len(Hyperparameters.ACTION_DICT)) target_network.load_state_dict(q_network.state_dict()) optim = torch.optim.Adam(q_network.parameters(), lr= Hyperparameters.LEARNING_RATE) replay_buffer = deque(maxlen=Hyperparameters.REPLAY_BUFFER_SIZE) global_step = 0 num_episode = 0 epsilon = 1 start_time = time.time() returns = [] steps = [] loss_array = [] loop = tqdm(total=Hyperparameters.MAX_GLOBAL_STEPS, position=0, leave=False) result_dataset = [] print("Global Step", Hyperparameters.MAX_GLOBAL_STEPS) while global_step < Hyperparameters.MAX_GLOBAL_STEPS: episode_step = 0 episode_return = 0 episode_loss = 0 done = False #Initialize agent_host = init_malmo(agent_host,recordingsDirectory, video_width,video_height) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: time.sleep(0.1) world_state = agent_host.getWorldState() #for error in world_state.errors: #print("\nError:",error.text) obs = get_observation(world_state, agent_host) #Testing agent_host.sendCommand( "move 1" ) while world_state.is_mission_running: #Depth Implementation while world_state.number_of_video_frames_since_last_state < 1 and world_state.is_mission_running: time.sleep(0.05) world_state = agent_host.getWorldState() if world_state.is_mission_running: frame = world_state.video_frames[0].pixels processFrame(frame) if GET_VISION_DATA: try: result_dataset.append(view_surrounding(video_height, video_width, frame, global_step)) except: print("Error in getting image for training data.") elif VISION_ENABLED: input_img_temp = get_img(world_state,frame,agent_host,eyes,device,video_width,video_height) print("Yaw Delta ", current_yaw_delta_from_depth) if current_yaw_delta_from_depth > 0: agent_host.sendCommand(Hyperparameters.ACTION_DICT[1]) else: agent_host.sendCommand(Hyperparameters.ACTION_DICT[2]) action_idx = get_action(obs, q_network, epsilon) command = Hyperparameters.ACTION_DICT[action_idx] agent_host.sendCommand(command) #agent_host.sendCommand( "turn " + str(current_yaw_delta_from_depth) ) #time.sleep(.3) episode_step += 1 if episode_step >= Hyperparameters.MAX_EPISODE_STEPS or \ (obs[0, int(Hyperparameters.OBS_SIZE/2)+1, int(Hyperparameters.OBS_SIZE/2)] == -1 and \ command == 'movesouth 1'): done = True time.sleep(2) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) next_obs = get_observation(world_state, agent_host) reward = 0 for r in world_state.rewards: reward += r.getValue() episode_return += reward replay_buffer.append((obs, action_idx, next_obs, reward, done)) obs = next_obs global_step += 1 #print(global_step) if global_step == Hyperparameters.MAX_GLOBAL_STEPS: break if global_step > Hyperparameters.START_TRAINING and global_step % Hyperparameters.LEARN_FREQUENCY == 0: batch = prepare_batch(replay_buffer) loss = learn(batch, optim, q_network, target_network) episode_loss += loss if epsilon > Hyperparameters.MIN_EPSILON: epsilon *= Hyperparameters.EPSILON_DECAY if global_step % Hyperparameters.TARGET_UPDATE == 0: target_network.load_state_dict(q_network.state_dict()) num_episode += 1 returns.append(episode_return) loss_array.append(episode_loss) steps.append(global_step) avg_return = sum(returns[-min(len(returns), 10):]) / min(len(returns), 10) loop.update(episode_step) loop.set_description('Episode: {} Steps: {} Time: {:.2f} Loss: {:.2f} Last Return: {:.2f} Avg Return: {:.2f}'.format( num_episode, global_step, (time.time() - start_time) / 60, episode_loss, episode_return, avg_return)) if num_episode > 0 and num_episode % 10 == 0: log_returns(steps, loss_array) #print() #print(len(result_dataset)) np.save("images/image_labels",np.array(result_dataset))
def run(argv=['']): if "MALMO_XSD_PATH" not in os.environ: print("Please set the MALMO_XSD_PATH environment variable.") return #forceReset="true" missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary>Hello world!</Summary> </About> <ServerSection> <ServerHandlers> <DefaultWorldGenerator forceReset="true" /> <ServerQuitFromTimeUp timeLimitMs="30000"/> <ServerQuitWhenAnyAgentFinishes/> </ServerHandlers> </ServerSection> <AgentSection mode="Survival"> <Name>MalmoTutorialBot</Name> <AgentStart> <Inventory> <InventoryItem slot="8" type="diamond_pickaxe"/> </Inventory> </AgentStart> <AgentHandlers> <ObservationFromFullStats/> <ObservationFromGrid> <Grid name="all_the_blocks" > <min x="-1" y="-1" z="-1"/> <max x="1" y="2" z="1"/> </Grid> </ObservationFromGrid> <ContinuousMovementCommands turnSpeedDegs="180"/> </AgentHandlers> </AgentSection> </Mission>''' malmoutils.fix_print() #agent_host = MalmoPython.AgentHost() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host, argv) my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission.timeLimitInSeconds(300) my_mission.requestVideo(640, 480) #my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) my_mission_record = malmoutils.get_default_recording_object( agent_host, "saved_data") # client_info = MalmoPython.ClientInfo('localhost', 10000) client_info = MalmoPython.ClientInfo('127.0.0.1', 10000) pool = MalmoPython.ClientPool() pool.add(client_info) experiment_id = str(uuid.uuid1()) print("experiment id " + experiment_id) max_retries = 3 max_response_time = 60 # seconds for retry in range(max_retries): try: agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ') start_time = time.time() world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) if time.time() - start_time > max_response_time: print("Max delay exceeded for mission to begin") restart_minecraft(world_state, agent_host, client_info, "begin mission") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() last_delta = time.time() # main loop: #agent_host.sendCommand( "jump 1") TURN = 0 TURN2 = 0 JUMP = 0 while world_state.is_mission_running: print("New Iteration") if JUMP > 0: JUMP = JUMP - 1 if JUMP == 0: agent_host.sendCommand("jump 0") JUMP = JUMP - 1 agent_host.sendCommand("move 1") if math.sin(TURN) / 3 >= 0: agent_host.sendCommand("turn 0.15") else: agent_host.sendCommand("turn -0.2") print(TURN, " ", math.sin(TURN)) TURN = TURN + 0.3 #agent_host.sendCommand( "jump 1" ) time.sleep(0.5) world_state = agent_host.getWorldState() y = json.loads(world_state.observations[-1].text) #print(y["all_the_blocks"]) dir = "" if y["Yaw"] + 180 < 90: dir = "S" print("Facing South") elif y["Yaw"] < 180: dir = "W" print("Facing West") elif y["Yaw"] < 270: dir = "N" print("Facing North") else: dir = "E" print("Facing East") blocks = [[], [], [], []] i = 0 for x in y["all_the_blocks"]: blocks[math.floor(i / 9)].append(x) i = i + 1 if dir == "S": willjump = False for j in range(0, 3): if blocks[1][j] != "air": willjump = True print(j, blocks[1][j], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "W": willjump = False for j in range(0, 3): if blocks[1][j * 3] != "air": willjump = True print(j * 3, blocks[1][j * 3], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "E": willjump = False for j in range(1, 4): if blocks[1][j * 3 - 1] != "air": willjump = True print(j * 3 - 1, blocks[1][j * 3 - 1], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "N": willjump = False for j in range(0, 3): if blocks[1][j] != "air": willjump = True print(j, blocks[1][j + 6], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") if (blocks[1][2] != "air" and blocks[2][2] != "air" or blocks[1][4] != "air" and blocks[2][4] != "air" or blocks[1][2] != "air" and blocks[2][2] != "air" or blocks[1][4] != "air" and blocks[2][4] != "air"): TURN2 = 2 if TURN2 >= 0: agent_host.sendCommand("turn 1") TURN2 = TURN2 - 1 '''if blocks[1][5] != "air" or blocks[1][5] != "grass" or blocks[1][5] != "tallgrass" : JUMP = 2 agent_host.sendCommand( "jump 1" ) print() print(blocks[1][5])''' #print(len(blocks)) #print(blocks) if (world_state.number_of_video_frames_since_last_state > 0 or world_state.number_of_observations_since_last_state > 0 or world_state.number_of_rewards_since_last_state > 0): last_delta = time.time() else: if time.time() - last_delta > max_response_time: print("Max delay exceeded for world state change") restart_minecraft(world_state, agent_host, client_info, "world state change") for reward in world_state.rewards: print("Summed reward:", reward.getValue()) for error in world_state.errors: print("Error:", error.text) for frame in world_state.video_frames: print() #print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels') #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image print("Mission has stopped.")
def run(size, algo1, algo2): #algorithms = {"reflex": reflex.reflex, "hiddenMarkov": hiddenMarkov.hiddenMarkov, "minimax":minimax.minimax, "expectimax": expectimax.expectimax} algorithms = { "reflex": reflex.reflex, 'random': randomagent.randommove, 'smartrandom': smartrandomagent.randommove, 'astarreflex': AStarReflex.search, "minimax": minimax.minmax } #assert len(sys.argv) == 4, "Wrong number of arguments, the form is: mapSize, agent algorithm, enemy alogrithm" malmoutils.fix_print() # -- set up two agent hosts -- agent_host1 = MalmoPython.AgentHost() agent_host2 = MalmoPython.AgentHost() #map_size = str(sys.argv[1]) map_size = int(size) map_minus = str(map_size - 1) agentAlgo = algorithms[algo1] enemyAlgo = algorithms[algo2] #agentAlgo = algorithms[sys.argv[2]] #enemyAlgo = algorithms[sys.argv[3]] # Use agent_host1 for parsing the command-line options. # (This is why agent_host1 is passed in to all the subsequent malmoutils calls, even for # agent 2's setup.) malmoutils.parse_command_line(agent_host1) missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary>Hello world!</Summary> </About> <ServerSection> <ServerInitialConditions> <Time> <StartTime>12000</StartTime> <AllowPassageOfTime>false</AllowPassageOfTime> </Time> </ServerInitialConditions> <ServerHandlers> <FlatWorldGenerator generatorString="3;7,220*1,5*3,2;3;,biome_1"/> <DrawingDecorator> <!-- coordinates for cuboid are inclusive --> <DrawCuboid x1="0" y1="45" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="300" z2=''' + '"' + map_minus + '"' + ''' type="air" /> <!-- limits of our arena --> <DrawCuboid x1="0" y1="40" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="44" z2=''' + '"' + map_minus + '"' + ''' type="lava" /> <!-- lava floor --> <DrawCuboid x1="0" y1="46" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="46" z2=''' + '"' + map_minus + '"' + ''' type="snow" /> </DrawingDecorator> <ServerQuitFromTimeUp timeLimitMs="30000"/> </ServerHandlers> </ServerSection> <AgentSection mode="Survival"> <Name>Agent</Name> <AgentStart> <Inventory> <InventoryItem slot="0" type="diamond_shovel"/> </Inventory> <Placement x="0.5" y="47.0" z="0.5" pitch="50" yaw="0"/> </AgentStart> <AgentHandlers> <ObservationFromFullStats/> <ObservationFromGrid> <Grid name="floor3x3W"> <min x="-1" y="0" z="-1"/> <max x="1" y="0" z="1"/> </Grid> <Grid name="floor3x3F"> <min x="-1" y="-1" z="-1"/> <max x="1" y="-1" z="1"/> </Grid> </ObservationFromGrid> <DiscreteMovementCommands/> </AgentHandlers> </AgentSection> <AgentSection mode="Survival"> <Name>Enemy</Name> <AgentStart> <Inventory> <InventoryItem slot="0" type="diamond_shovel"/> </Inventory> <Placement x=''' + '"' + str( float(map_size) - 0.5) + '"' + ''' y="47.0" z=''' + '"' + str( float(map_size) - 0.5) + '"' + ''' pitch="50" yaw="180"/> </AgentStart> <AgentHandlers> <ObservationFromFullStats/> <DiscreteMovementCommands/> <ObservationFromGrid> <Grid name="floor3x3W"> <min x="-1" y="0" z="-1"/> <max x="1" y="0" z="1"/> </Grid> <Grid name="floor3x3F"> <min x="-1" y="-1" z="-1"/> <max x="1" y="-1" z="1"/> </Grid> </ObservationFromGrid> <RewardForTouchingBlockType> <Block reward="-100.0" type="lava" behaviour="onceOnly"/> </RewardForTouchingBlockType> <AgentQuitFromTouchingBlockType> <Block type="lava" /> </AgentQuitFromTouchingBlockType> </AgentHandlers> </AgentSection> </Mission>''' # Create default Malmo objects: my_mission = MalmoPython.MissionSpec(missionXML, True) client_pool = MalmoPython.ClientPool() client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001)) MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF) my_mission_record = MalmoPython.MissionRecordSpec() def safeStartMission(agent_host, mission, client_pool, recording, role, experimentId): used_attempts = 0 max_attempts = 5 print("Calling startMission for role", role) while True: try: agent_host.startMission(mission, client_pool, recording, role, experimentId) break except MalmoPython.MissionException as e: errorCode = e.details.errorCode if errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_WARMING_UP: print("Server not quite ready yet - waiting...") time.sleep(2) elif errorCode == MalmoPython.MissionErrorCode.MISSION_INSUFFICIENT_CLIENTS_AVAILABLE: print("Not enough available Minecraft instances running.") used_attempts += 1 if used_attempts < max_attempts: print("Will wait in case they are starting up.", max_attempts - used_attempts, "attempts left.") time.sleep(2) elif errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_NOT_FOUND: print( "Server not found - has the mission with role 0 been started yet?" ) used_attempts += 1 if used_attempts < max_attempts: print("Will wait and retry.", max_attempts - used_attempts, "attempts left.") time.sleep(2) else: print("Other error:", e.message) print("Waiting will not help here - bailing immediately.") exit(1) if used_attempts == max_attempts: print("All chances used up - bailing now.") exit(1) print("startMission called okay.") def safeWaitForStart(agent_hosts): print("Waiting for the mission to start", end=' ') start_flags = [False for a in agent_hosts] start_time = time.time() time_out = 120 # Allow two minutes for mission to start. while not all(start_flags) and time.time() - start_time < time_out: states = [a.peekWorldState() for a in agent_hosts] start_flags = [w.has_mission_begun for w in states] errors = [e for w in states for e in w.errors] if len(errors) > 0: print("Errors waiting for mission start:") for e in errors: print(e.text) print("Bailing now.") exit(1) time.sleep(0.1) print(".", end=' ') print() if time.time() - start_time >= time_out: print("Timed out waiting for mission to begin. Bailing.") exit(1) print("Mission has started.") safeStartMission(agent_host1, my_mission, client_pool, my_mission_record, 0, '') safeStartMission(agent_host2, my_mission, client_pool, my_mission_record, 1, '') safeWaitForStart([agent_host1, agent_host2]) def movement(ah, direction, pos): if direction == "north": ah.sendCommand("movenorth 1") position = (pos[0], pos[1] - 1) elif direction == "south": ah.sendCommand("movesouth 1") position = (pos[0], pos[1] + 1) elif direction == "west": ah.sendCommand("movewest 1") position = (pos[0] - 1, pos[1]) elif direction == "east": ah.sendCommand("moveeast 1") position = (pos[0] + 1, pos[1]) else: position = (pos[0], pos[1]) time.sleep(0.1) return position def attack(ah, index, pos, map, enemy=False): #We are going to make it so the agent can only break the blocks immediately around them. #So a location will be one of the 8 locations around it #Enemy starts facing north (1), Agent starts facing south (3) # Enemy: 0 1 0 Agent: 0 3 0 # 4 X 2 2 X 4 # 0 3 0 0 1 0 x, y = math.floor(pos[0]), math.floor(pos[1]) #print("Player position: {},{} Direction: {}".format(x,y, index)) did_Break = False if enemy: if index == "north": # print("Index 1") ah.sendCommand("attack 1") time.sleep(0.1) y -= 1 did_Break = True if index == "east": # print("Index 2") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) x += 1 did_Break = True if index == "west": # print("Index 4") ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) x -= 1 did_Break = True if index == "south": # print("Index 3") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) y += 1 did_Break = True else: # Agent: 0 3 0 # 2 X 4 # 0 1 0 if index == "south": # print("Index 3") ah.sendCommand("attack 1") time.sleep(0.1) y += 1 did_Break = True if index == "west": # print("Index 4") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) x -= 1 did_Break = True if index == "east": # print("Index 2") ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) x += 1 did_Break = True if index == "north": # print("Index 3") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) y -= 1 did_Break = True if did_Break: map[x][y] = False ''' Sample Observation: {"DistanceTravelled":0,"TimeAlive":50,"MobsKilled":0,"PlayersKilled":0,"DamageTaken":0,"DamageDealt":0, "Life":20.0,"Score":0,"Food":20,"XP":0,"IsAlive":true,"Air":300,"Name":"Enemy","XPos":5.5,"YPos":47.0, "ZPos":5.5,"Pitch":50.0,"Yaw":180.0,"WorldTime":12000,"TotalTime":57} ''' agent_score = 0 #count = 0 agent_ob = None enemy_ob = None map = [[True for i in range(0, int(map_size))] for j in range(0, int(map_size))] # for i in map: # print(i) while True: #Scores should decrease with time and get a bonus if they win agent_score -= 1 agent_state = agent_host1.peekWorldState() enemy_state = agent_host2.peekWorldState() if agent_state.number_of_observations_since_last_state > 0: agent_ob = json.loads(agent_state.observations[-1].text) if enemy_state.number_of_observations_since_last_state > 0: enemy_ob = json.loads(enemy_state.observations[-1].text) if agent_ob is None or enemy_ob is None: continue if agent_state.is_mission_running == False: break agent_position = (agent_ob["XPos"], agent_ob["ZPos"]) enemy_position = (enemy_ob["XPos"], enemy_ob["ZPos"]) agent_grid = agent_ob.get(u'floor3x3F', 0) enemy_grid = enemy_ob.get(u'floor3x3F', 0) if "lava" in agent_grid: print("Enemy Won!") agent_score -= 100 for i in map: print(i) return 0 break if "lava" in enemy_grid: print("Agent Won!") agent_score += 100 for i in map: print(i) return 1 break agentMoveString, agentBreakIndex = agentAlgo(agent_host1, agent_position, enemy_position, agent_grid, map) enemyMoveString, enemyBreakIndex = enemyAlgo(agent_host2, enemy_position, agent_position, enemy_grid, map) # #Agent Turn to Break attack(agent_host1, agentBreakIndex, agent_position, map) # #Enemy Turn to Move pos = movement(agent_host2, enemyMoveString, enemy_position) # #Enemy Turn to Break attack(agent_host2, enemyBreakIndex, pos, map, enemy=True) # #Agent Turn to Move movement(agent_host1, agentMoveString, agent_position) for i in map: print(i) return 2
class ThesisEnvExperiment(gym.Env): """ initialize agents and give commandline permissions """ metadata = {'render.modes': ['human']} """ Agent 01: Tom """ agent_host1 = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host1) """ Agent 02: Jerry """ agent_host2 = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host2) """ Agent 03: Skye """ agent_host3 = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host3) """global variables to remember, if somebody already catched the flag""" flag_captured_tom = flag_captured_jerry = False fetched_cell_tom = fetched_cell_jerry = cell_now_tom = cell_now_jerry = 0 time_stamp_start_for_distance = 0 too_close_counter = 0 time_step_tom_won = None time_step_jerry_won = None time_step_tom_captured_the_flag = None time_step_jerry_captured_the_flag = None winner_agent = "-" time_step_agents_ran_into_each_other = None steps_tom = 0 steps_jerry = 0 episode_counter = 0 """ collected data for evaluation """ evaluation_episode_counter = [] evaluation_too_close_counter = [] evaluation_episode_time = [] evaluation_flag_captured_tom = [] evaluation_flag_captured_jerry = [] evaluation_agents_ran_into_each_other = [] evaluation_game_won_timestamp = [] evaluation_winner_agent = [] evaluation_reward_tom = [] evaluation_reward_jerry = [] evaluation_steps_tom = [] evaluation_steps_jerry = [] def __init__(self): super(ThesisEnvExperiment, self).__init__() """ load the mission file format: XML """ mission_file = 'capture_the_flag_xml_mission_DQL.xml' self.load_mission_file(mission_file) print("Mission loaded: Capture the Flag") self.client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001), ('127.0.0.1', 10002)] self.mc_process = None self.mission_end = False def init(self, client_pool=None, start_minecraft=None, continuous_discrete=True, add_noop_command=None, max_retries=90, retry_sleep=10, step_sleep=0.001, skip_steps=0, videoResolution=None, videoWithDepth=None, observeRecentCommands=None, observeHotBar=None, observeFullInventory=None, observeGrid=None, observeDistance=None, observeChat=None, allowContinuousMovement=None, allowDiscreteMovement=None, allowAbsoluteMovement=None, recordDestination=None, recordObservations=None, recordRewards=None, recordCommands=None, recordMP4=None, gameMode=None, forceWorldReset=None): self.max_retries = max_retries self.retry_sleep = retry_sleep self.step_sleep = step_sleep self.skip_steps = skip_steps self.forceWorldReset = forceWorldReset self.continuous_discrete = continuous_discrete self.add_noop_command = add_noop_command self.client_pool = client_pool if videoResolution: if videoWithDepth: self.mission_spec.requestVideoWithDepth(*videoResolution) else: self.mission_spec.requestVideo(*videoResolution) if observeRecentCommands: self.mission_spec.observeRecentCommands() if observeHotBar: self.mission_spec.observeHotBar() if observeFullInventory: self.mission_spec.observeFullInventory() if observeGrid: self.mission_spec.observeGrid(*(observeGrid + ["grid"])) if observeDistance: self.mission_spec.observeDistance(*(observeDistance + ["dist"])) if observeChat: self.mission_spec.observeChat() if allowDiscreteMovement: # if there are any parameters, remove current command handlers first self.mission_spec.removeAllCommandHandlers() if allowDiscreteMovement is True: self.mission_spec.allowAllDiscreteMovementCommands() elif isinstance(allowDiscreteMovement, list): for cmd in allowDiscreteMovement: self.mission_spec.allowDiscreteMovementCommand(cmd) if start_minecraft: # start Minecraft process assigning port dynamically self.mc_process, port = minecraft_py.start() logger.info( "Started Minecraft on port %d, overriding client_pool.", port) client_pool = [('127.0.0.1', port)] """ make client_pool usable for Malmo: change format of the client_pool to struct """ if client_pool: if not isinstance(client_pool, list): raise ValueError( "client_pool must be list of tuples of (IP-address, port)") self.client_pool = MalmoPython.ClientPool() for client in client_pool: self.client_pool.add(MalmoPython.ClientInfo(*client)) """ initialize video parameters for video processing """ self.video_height = self.mission_spec.getVideoHeight(0) self.video_width = self.mission_spec.getVideoWidth(0) self.video_depth = self.mission_spec.getVideoChannels(0) self.observation_space = spaces.Box(low=0, high=255, shape=(self.video_height, self.video_width, self.video_depth)) """ dummy image just for the first observation """ self.last_image1 = np.zeros( (self.video_height, self.video_width, self.video_depth), dtype=np.float32) self.last_image2 = np.zeros( (self.video_height, self.video_width, self.video_depth), dtype=np.float32) self.create_action_space() """ mission recording """ self.mission_record_spec = MalmoPython.MissionRecordSpec( ) # record nothing if recordDestination: self.mission_record_spec.setDestination(recordDestination) if recordRewards: self.mission_record_spec.recordRewards() if recordCommands: self.mission_record_spec.recordCommands() if recordMP4: self.mission_record_spec.recordMP4(*recordMP4) """ game mode """ if gameMode: if gameMode == "spectator": self.mission_spec.setModeToSpectator() elif gameMode == "creative": self.mission_spec.setModeToCreative() elif gameMode == "survival": logger.warn( "Cannot force survival mode, assuming it is the default.") else: assert False, "Unknown game mode: " + gameMode def create_action_space(self): """ create action_space from action_names to dynamically generate the needed movement format: Discrete possible actions: "move", "jumpmove", "strafe", "jumpstrafe", "turn", "jumpnorth", "jumpsouth", "jumpwest", "jumpeast","look", "use", "jumpuse", "sleep", "movenorth", "movesouth", "moveeast", "movewest", "jump", "attack" unused_actions: not wanted actions discrete_actions: wanted actions """ # collect different actions based on allowed commands unused_actions = [] discrete_actions = [] chs = self.mission_spec.getListOfCommandHandlers(0) for ch in chs: cmds = self.mission_spec.getAllowedCommands(0, ch) for command in cmds: logger.debug(ch + ":" + command) if command in [ "movenorth", "movesouth", "moveeast", "movewest", "attack", "turn" ]: discrete_actions.append(command + " 1") discrete_actions.append(command + " -1") else: unused_actions.append(command) """ turn action lists into action spaces """ self.action_names = [] self.action_spaces = [] if len(discrete_actions) > 0: self.action_spaces.append(spaces.Discrete(len(discrete_actions))) self.action_names.append(discrete_actions) if len(self.action_spaces) == 1: self.action_space = self.action_spaces[0] else: self.action_space = spaces.Tuple(self.action_spaces) logger.debug(self.action_space) def load_mission_file(self, mission_file): """ load XML mission from folder """ logger.info("Loading mission from " + mission_file) mission_xml = open(mission_file, 'r').read() self.load_mission_xml(mission_xml) def load_mission_xml(self, mission_xml): """ load mission file into game """ self.mission_spec = MalmoPython.MissionSpec(mission_xml, True) logger.info("Loaded mission: " + self.mission_spec.getSummary()) def clip_action_filter(self, a): return np.clip(a, self.action_space.low, self.action_space.high) def dqn_q_values_and_neuronal_net(self, args, action_space, obs_size, obs_space): """ learning process """ if isinstance(action_space, spaces.Box): action_size = action_space.low.size # Use NAF to apply DQN to continuous action spaces q_func = q_functions.FCQuadraticStateQFunction( obs_size, action_size, n_hidden_channels=args.n_hidden_channels, n_hidden_layers=args.n_hidden_layers, action_space=action_space) # Use the Ornstein-Uhlenbeck process for exploration ou_sigma = (action_space.high - action_space.low) * 0.2 explorer = explorers.AdditiveOU(sigma=ou_sigma) else: n_actions = action_space.n # print("n_actions: ", n_actions) q_func = q_functions.FCStateQFunctionWithDiscreteAction( obs_size, n_actions, n_hidden_channels=args.n_hidden_channels, n_hidden_layers=args.n_hidden_layers) # print("q_func ", q_func) # Use epsilon-greedy for exploration explorer = explorers.LinearDecayEpsilonGreedy( args.start_epsilon, args.end_epsilon, args.final_exploration_steps, action_space.sample) # print("explorer: ", explorer) if args.noisy_net_sigma is not None: links.to_factorized_noisy(q_func, sigma_scale=args.noisy_net_sigma) # Turn off explorer explorer = explorers.Greedy() # print("obs_space.low : ", obs_space.shape) chainerrl.misc.draw_computational_graph( [q_func(np.zeros_like(obs_space.low, dtype=np.float32)[None])], os.path.join(args.outdir, 'model')) opt = optimizers.Adam() opt.setup(q_func) rbuf_capacity = 5 * 10**5 if args.minibatch_size is None: args.minibatch_size = 32 if args.prioritized_replay: betasteps = (args.steps - args.replay_start_size) \ // args.update_interval rbuf = replay_buffer.PrioritizedReplayBuffer(rbuf_capacity, betasteps=betasteps) else: rbuf = replay_buffer.ReplayBuffer(rbuf_capacity) return q_func, opt, rbuf, explorer def step_generating(self, action, agent_num): """ time step in arena next action is executed reward of actual state is calculated and summed up with the overall reward RETURN: image, reward, done, info """ reward1 = 0 reward2 = 0 world_state1 = self.agent_host1.peekWorldState() world_state2 = self.agent_host2.peekWorldState() if agent_num == 1: if world_state1.is_mission_running: """ take action """ self.do_action(action, agent_num) """ wait for the new state """ world_state1 = self.agent_host1.getWorldState() else: if world_state2.is_mission_running: """ take action """ self.do_action(action, agent_num) """ wait for the new state """ world_state2 = self.agent_host2.getWorldState() """ calculate reward of current state """ if agent_num == 1: for r in world_state1.rewards: reward1 += r.getValue() else: for r in world_state2.rewards: reward2 += r.getValue() """ take the last frame from world state | 'done'-flag indicated, if mission is still running """ if agent_num == 1: image1 = self.get_video_frame(world_state1, 1) done1 = not world_state1.is_mission_running else: image2 = self.get_video_frame(world_state2, 2) done2 = not world_state1.is_mission_running """ collected information during the run """ if agent_num == 1: info1 = {} info1['has_mission_begun'] = world_state1.has_mission_begun info1['is_mission_running'] = world_state1.is_mission_running info1[ 'number_of_video_frames_since_last_state'] = world_state1.number_of_video_frames_since_last_state info1[ 'number_of_rewards_since_last_state'] = world_state1.number_of_rewards_since_last_state info1[ 'number_of_observations_since_last_state'] = world_state1.number_of_observations_since_last_state info1['mission_control_messages'] = [ msg.text for msg in world_state1.mission_control_messages ] info1['observation'] = self.get_observation(world_state1) else: info2 = {} info2['has_mission_begun'] = world_state2.has_mission_begun info2['is_mission_running'] = world_state2.is_mission_running info2[ 'number_of_video_frames_since_last_state'] = world_state2.number_of_video_frames_since_last_state info2[ 'number_of_rewards_since_last_state'] = world_state2.number_of_rewards_since_last_state info2[ 'number_of_observations_since_last_state'] = world_state2.number_of_observations_since_last_state info2['mission_control_messages'] = [ msg.text for msg in world_state2.mission_control_messages ] info2['observation'] = self.get_observation(world_state2) if agent_num == 1: return image1, reward1, done1, info1 else: return image2, reward2, done2, info2 def reset_world(self, experiment_ID): """ reset the arena and start the missions per agent The sleep-timer of 6sec is required, because the client needs far too much time to set up the mission for the first time. All followed missions start faster. """ print("force world reset........") self.flag_captured_tom = False self.flag_captured_jerry = False time.sleep(0.1) print(self.client_pool) for retry in range(self.max_retries + 1): try: """ start missions for every client """ print("\nstarting mission for agent #1") time.sleep(6) self.agent_host1.startMission(self.mission_spec, self.client_pool, self.mission_record_spec, 0, experiment_ID) print("starting mission for agent #2") time.sleep(6) self.agent_host2.startMission(self.mission_spec, self.client_pool, self.mission_record_spec, 1, experiment_ID) print("starting mission for agent #3") time.sleep(6) self.agent_host3.startMission(self.mission_spec, self.client_pool, self.mission_record_spec, 2, experiment_ID) print("\nmissions successfully started.....\n") break except RuntimeError as e: if retry == self.max_retries: logger.error("Error starting mission: " + str(e)) raise else: logger.warn("Error starting mission: " + str(e)) logger.info("Sleeping for %d seconds...", self.retry_sleep) time.sleep(self.retry_sleep) logger.info("Waiting for the mission to start.") world_state1 = self.agent_host1.getWorldState() world_state2 = self.agent_host2.getWorldState() while not world_state1.has_mission_begun and world_state2.has_mission_begun: time.sleep(0.1) world_state1 = self.agent_host1.getWorldState() world_state2 = self.agent_host2.getWorldState() for error in world_state1.errors and world_state2.errors: logger.warn(error.text) logger.info("Mission running") return self.get_video_frame(world_state1, 1), self.get_video_frame(world_state2, 2) def do_action(self, actions, agent_num): """ get next action from action_space execute action in environment for the agent """ if len(self.action_spaces) == 1: actions = [actions] #print(actions) if agent_num == 1: self.steps_tom += 1 else: self.steps_jerry += 1 for spc, cmds, acts in zip(self.action_spaces, self.action_names, actions): if isinstance(spc, spaces.Discrete): logger.debug(cmds[acts]) if agent_num == 1: print("Tom's next action: ", cmds[acts]) self.agent_host1.sendCommand(cmds[acts]) else: print("Jerry's next action: ", cmds[acts]) self.agent_host2.sendCommand(cmds[acts]) elif isinstance(spc, spaces.Box): for cmd, val in zip(cmds, acts): logger.debug(cmd + " " + str(val)) if agent_num == 1: self.agent_host1.sendCommand(cmd + " " + str(val)) else: self.agent_host2.sendCommand(cmd + " " + str(val)) elif isinstance(spc, spaces.MultiDiscrete): for cmd, val in zip(cmds, acts): logger.debug(cmd + " " + str(val)) if agent_num == 1: self.agent_host1.sendCommand(cmd + " " + str(val)) else: self.agent_host2.sendCommand(cmd + " " + str(val)) else: logger.warn("Unknown action space for %s, ignoring." % cmds) def get_video_frame(self, world_state, agent_num): """ process video frame for called agent RETURN: image for called agent """ if world_state.number_of_video_frames_since_last_state > 0: assert len(world_state.video_frames) == 1 frame = world_state.video_frames[0] reshaped = np.zeros( (self.video_height * self.video_width * self.video_depth), dtype=np.float32) image = np.frombuffer(frame.pixels, dtype=np.int8) # print(reshaped.shape) for i in range(360000): reshaped[i] = image[i] image = np.frombuffer( frame.pixels, dtype=np.float32) # 300x400 = 120000 Werte // np.float32 image = reshaped.reshape((frame.height, frame.width, frame.channels)) # 300x400x3 = 360000 if agent_num == 1: self.last_image1 = image else: self.last_image2 = image else: """ if mission ends befor we got a frame, just take the last frame to reduce exceptions """ if agent_num == 1: image = self.last_image1 else: image = self.last_image2 return image def get_observation(self, world_state): """ check observations during mission run RETURN: number of missed observations - if there are any """ if world_state.number_of_observations_since_last_state > 0: missed = world_state.number_of_observations_since_last_state - len( world_state.observations) - self.skip_steps if missed > 0: logger.warn("Agent missed %d observation(s).", missed) assert len(world_state.observations) == 1 return json.loads(world_state.observations[0].text) else: return None def save_new_round(self, t): """ saves the round number in results.txt """ datei = open('results.txt', 'a') datei.write("-------------- ROUND %i --------------\n" % (t)) datei.close() def append_save_file_with_flag(self, time_step, name): """ saves the flagholder in results.txt """ datei = open('results.txt', 'a') datei.write("%s captured the flag after %i seconds.\n" % (name, time_step)) datei.close() def append_save_file_with_fail(self): """ saves the failes in results.txt """ datei = open('results.txt', 'a') datei.write("X the mission failed X.\n") datei.close() def append_save_file_with_agents_fail(self): """ saves the failes in results.txt """ datei = open('results.txt', 'a') datei.write( "X the mission failed: the agents ran into each other or got stranded in the field X.\n" ) datei.close() def append_save_file_with_finish(self, time_step, name): """ saves the winner in results.txt """ datei = open('results.txt', 'a') datei.write("%s won the game after %i seconds.\n" % (name, time_step)) datei.close() def save_results(self, overall_reward_agent_Tom, overall_reward_agent_Jerry, time_step): """ saves the results in results.txt """ datei = open('results.txt', 'a') datei.write("The agents were %i times very close to each other.\n" % (self.too_close_counter)) datei.write( "Reward Tom: %i, Reward Jerry: %i , Time: %f \n\n" % (overall_reward_agent_Tom, overall_reward_agent_Jerry, time_step)) datei.close() def get_cell_agents(self): """ gets the cell coordinates for the agents to compare with every 20 seconds """ world_state1 = self.agent_host1.peekWorldState() world_state2 = self.agent_host2.peekWorldState() msg1 = world_state1.observations[-1].text msg2 = world_state2.observations[-1].text ob1 = json.loads(msg1) ob2 = json.loads(msg2) if "cell" in ob1 and "cell" in ob2: self.fetched_cell_tom = ob1.get(u'cell', 0) self.fetched_cell_jerry = ob2.get(u'cell', 0) print("fetched cell tom: ", self.fetched_cell_tom) print("fetched cell jerry: ", self.fetched_cell_jerry) def get_current_cell_agents(self): """ gets the cell coordinates for the agents at a state """ world_state1 = self.agent_host1.peekWorldState() world_state2 = self.agent_host2.peekWorldState() msg1 = world_state1.observations[-1].text msg2 = world_state2.observations[-1].text ob1 = json.loads(msg1) ob2 = json.loads(msg2) if "cell" in ob1 and "cell" in ob2: self.cell_now_tom = ob1.get(u'cell', 0) self.cell_now_jerry = ob2.get(u'cell', 0) print("current cell tom: ", self.cell_now_tom) print("current cell jerry: ", self.cell_now_jerry) def get_position_in_arena(self, world_state, time_step): """ get (x,y,z) Positioncoordinates of agent fetch the cell coordinates every 20 seconds check with current coordinates -> if they are the same more than 20 seconds, it is nearly safe, that the agents crashed into each other -> declare mission as failed and end it RETURN: x,y,z """ x = y = z = t = 0 while world_state: if len(world_state.observations) >= 1: msg = world_state.observations[-1].text ob = json.loads(msg) time_now = time.time() if time_now - self.time_stamp_start_for_distance > 20: """ fetch cell every 20 seconds """ self.get_cell_agents() self.time_stamp_start_for_distance = time.time() seconds = time_now - self.time_stamp_start_for_distance # print("seconds: ", int(seconds)) if int(seconds) == 18: self.get_current_cell_agents() if self.fetched_cell_tom == self.cell_now_tom and self.fetched_cell_jerry == self.cell_now_jerry: print("They ran into each other again.") self.append_save_file_with_agents_fail() self.mission_end = True if "XPos" in ob and "ZPos" in ob and "YPos" in ob: x = ob[u'XPos'] y = ob[u'YPos'] z = ob[u'ZPos'] return x, y, z else: if t == 5: self.append_save_file_with_fail() self.time_step_agents_ran_into_each_other = time_step self.mission_end = True return x, y, z else: time.sleep(1) t += 1 print(t) def distance(self, time_step): """ check if agents are to near to eachother move apart if so """ x1 = y1 = z1 = x2 = y2 = z2 = 0 """ checks, if world_state is read corrctly, if not, trys again""" while (x1 == y1 == z1 == 0) or (x2 == y2 == z2 == 0): world_state1 = self.agent_host1.peekWorldState() world_state2 = self.agent_host2.peekWorldState() x1, y1, z1 = self.get_position_in_arena(world_state1, time_step) x2, y2, z2 = self.get_position_in_arena(world_state2, time_step) print("...") # print(" \tTom \tJerry \nX: \t %i\t %i \nY: \t %i\t %i \nZ: \t %i\t %i" % (x1, x2, y1, y2, z1, z2)) """(x2 == x1+2 and z1 == z1+2) or (x2 == x1+1 and z2 == z1+2) or (x2 == x1 and z2 == z1+2) or \ (x2 == x1-1 and z2 == z1+2) or (x2 == x1-2 and z2 == z1+2) or (x1 == x2+2 and z1 == z2-2) or \ (x1 == x2+1 and z1 == z2-2) or (x1 == x2 and z1 == z2-2) or (x1 == x2-1 and z1 == z2-2) or \ (x1 == x2-2 and z1 == z2-2) or """ if (x1 == x2 and z1 == z2) or (x2 == x1 + 1 and z2 == z1 + 1) or (x2 == x1 and z2 == z1 + 1) or \ (x2 == x1 - 1 and z2 == z1 + 1) or (x1 == x2 + 1 and z1 == z2 - 1) or (x1 == x2 and z1 == z2 - 1) or \ (x1 == x2 - 1 and z1 == z2 - 1): print( "---------------------------------------------------- stop!! agents too close!" ) self.too_close_counter += 1 self.agent_host1.sendCommand("movenorth 1") self.agent_host2.sendCommand("movesouth 1") """(x2 == x1 + 2 and z2 == z1 + 1) or (x2 == x1 + 2 and z2 == z1) or (x2 == x1 + 2 and z2 == z1 - 1) or (x1 == x2-2 and z1 == z2+1) or (x1 == x2-2 and z1 == z2) or (x1 == x2-2 and z1 == z2-1) or """ if (x2 == x1 + 1 and z2 == z1) or (x1 == x2 - 1 and z1 == z2): print( "---------------------------------------------------- stop!! agents too close!" ) self.too_close_counter += 1 self.agent_host1.sendCommand("movewest 1") self.agent_host2.sendCommand("moveeast 1") """(x2 == x1 - 2 and z2 == z1 + 1) or (x2 == x1 - 2 and z2 == z1) or (x2 == x1 - 2 and z2 == z1 - 1) or (x1 == x2+2 and z1 == z2+1) or (x1 == x2+2 and z1 == z2) or \ (x1 == x2+2 and z1 == z2-1) or """ if (x2 == x1 - 1 and z2 == z1) or (x1 == x2 + 1 and z1 == z2): print( "---------------------------------------------------- stop!! agents too close!" ) self.too_close_counter += 1 self.agent_host1.sendCommand("moveeast 1") self.agent_host2.sendCommand("movewest 1") """(x2 == x1 + 2 and z1 == z1 - 2) or (x2 == x1 + 1 and z2 == z1 - 2) or (x2 == x1 and z2 == z1 - 2) or \ (x2 == x1 - 1 and z2 == z1 - 2) or (x2 == x1 - 2 and z2 == z1 - 2) or (x1 == x2+2 and z1 == z2+2) or \ (x1 == x2+1 and z1 == z2+2) or (x1 == x2 and z1 == z2+2) or (x1 == x2-1 and z1 == z2+2) or \ (x1 == x2-2 and z1 == z2+2) or """ if (x2 == x1 + 1 and z2 == z1 - 1) or (x2 == x1 and z2 == z1 - 1) or (x2 == x1 - 1 and z2 == z1 - 1) or \ (x1 == x2 + 1 and z1 == z2 + 1) or (x1 == x2 and z1 == z2 + 1) or (x1 == x2 - 1 and z1 == z2 + 1): print( "---------------------------------------------------- stop!! agents too close!" ) self.too_close_counter += 1 self.agent_host1.sendCommand("movesouth 1") self.agent_host2.sendCommand("movennorth 1") def check_inventory(self, time_step): """ checks, if the agent got the flag in his inventory """ world_state1 = 0 world_state2 = 0 x1 = y1 = z1 = x2 = y2 = z2 = 0 while world_state1 == 0 and world_state2 == 0: world_state1 = self.agent_host1.peekWorldState() world_state2 = self.agent_host2.peekWorldState() print("..") while not len(world_state1.observations) >= 1 and not len( world_state2.observations) >= 1: world_state1 = self.agent_host1.peekWorldState() world_state2 = self.agent_host2.peekWorldState() print("..") if json.dumps(world_state1.observations[-1].text) and json.dumps( world_state2.observations[-1].text): msg1 = world_state1.observations[-1].text msg2 = world_state2.observations[-1].text obs1 = json.loads(msg1) obs2 = json.loads(msg2) """ checks, if world_state is read corrctly, if not, trys again""" while (x1 == y1 == z1 == 0) or (x2 == y2 == z2 == 0): world_state1 = self.agent_host1.peekWorldState() world_state2 = self.agent_host2.peekWorldState() x1, y1, z1 = self.get_position_in_arena( world_state1, time_step) x2, y2, z2 = self.get_position_in_arena( world_state2, time_step) print("..") #if u'inventory' in obs1: self.get_current_cell_agents() if self.flag_captured_tom and ( 12 <= x1 <= 15 and 0 <= z1 <= 4): # "(11,0)" <= self.cell_now_tom < "(14,5)" """ if agent reached the target area: look down, set block, jump on it to reach wanted position and win the game """ self.agent_host1.sendCommand("chat I won the game!") self.append_save_file_with_finish(time_step, "Tom") self.time_step_tom_won = time_step self.winner_agent = "Tom" self.agent_host1.sendCommand("look 1") time.sleep(0.2) self.agent_host1.sendCommand("use 1") time.sleep(0.2) self.agent_host1.sendCommand("jumpmove 1") time.sleep(0.2) self.agent_host1.sendCommand("look -1") self.mission_end = True else: if self.flag_captured_tom: print("[INFO] Tom holds the flag.") else: last_inventory_tom = obs1[u'inventory'] inventory_string_tom = json.dumps(last_inventory_tom) # print("Toms last inventory: ", inventory_string_tom) if (inventory_string_tom.find('quartz') != -1): """ tauscht quartz mit log, sodass quartz zurück gelegt werden kann""" if (json.dumps(last_inventory_tom[1]).find('quartz') != -1): self.agent_host1.sendCommand( "swapInventoryItems 0 1") self.agent_host1.sendCommand( "chat Wrong flag, I'll put it back!") self.agent_host1.sendCommand("use") self.agent_host1.sendCommand("swapInventoryItems 0 1") if (inventory_string_tom.find('log') != -1): self.flag_captured_tom = True self.time_step_tom_captured_the_flag = time_step self.append_save_file_with_flag(time_step, "Tom") print( "----------------------------------------------------------------Tom captured the flag after %i seconds!" % (time_step)) #if u'inventory' in obs2: if self.flag_captured_jerry and (0 <= x2 <= 4 and 11 <= z2 <= 15): """ if agent reached the target area: look down, set block, jump on it to reach wanted position and win the game """ self.agent_host2.sendCommand("chat I won the game!") self.append_save_file_with_finish(time_step, "Jerry") self.time_step_jerry_won = time_step self.winner_agent = "Jerry" self.agent_host2.sendCommand("look 1") time.sleep(0.2) self.agent_host2.sendCommand("use 1") time.sleep(0.2) self.agent_host2.sendCommand("jumpmove 1") time.sleep(0.2) self.agent_host2.sendCommand("look -1") self.mission_end = True else: if self.flag_captured_jerry: print("[INFO] Jerry holds the flag.") else: last_inventory_jerry = obs2[u'inventory'] inventory_string_jerry = json.dumps(last_inventory_jerry) # print("Jerrys last inventory: ", inventory_string_jerry) if (inventory_string_jerry.find('log') != -1): """ tauscht quartz mit log, sodass log zurück gelegt werden kann""" if (json.dumps(last_inventory_jerry[1]).find('log') != -1): self.agent_host2.sendCommand("swapInventoryItems 0 1") self.agent_host2.sendCommand( "chat Wrong flag, I'll put it back!") self.agent_host2.sendCommand("use") self.agent_host1.sendCommand("swapInventoryItems 0 1") if (inventory_string_jerry.find('quartz') != -1): self.flag_captured_jerry = True self.time_step_jerry_captured_the_flag = time_step self.append_save_file_with_flag(time_step, "Jerry") print( "----------------------------------------------------------------Jerry captured the flag after %i seconds!" % (time_step)) def sending_mission_quit_commands(self, overall_reward_agent_Tom, overall_reward_agent_Jerry, time_step, obs1, r1, obs2, r2, outdir, t, tom, jerry, experiment_ID): self.agent_host1.sendCommand("quit") self.agent_host2.sendCommand("quit") self.agent_host3.sendCommand("quit") dirname = os.path.join(outdir, 'plots') print("dirname: ", dirname) """ save and show results of reward calculations """ self.save_results(overall_reward_agent_Tom, overall_reward_agent_Jerry, time_step) print("Final Reward Tom: ", overall_reward_agent_Tom) print("Final Reward Jerry: ", overall_reward_agent_Jerry) """ end episode, save results """ tom.stop_episode_and_train(obs1, r1, done=True) jerry.stop_episode_and_train(obs2, r2, done=True) print("outdir: %s step: %s " % (outdir, t)) print("Tom's statistics: ", tom.get_statistics()) print("Jerry's statistics: ", jerry.get_statistics()) """ save the final model and results """ save_agent(tom, t, outdir, logger, suffix='_finish_01') save_agent(jerry, t, outdir, logger, suffix='_finish_02') """ save all the collected data for evaluation graphs """ self.save_data_for_evaluation_plots(t, time_step, overall_reward_agent_Tom, overall_reward_agent_Jerry, dirname) time.sleep(2) """ initialisation for the next episode, reset parameters, build new world """ t += 1 self.episode_counter += 1 r1 = r2 = 0 done1 = done2 = self.mission_end = False overall_reward_agent_Jerry = overall_reward_agent_Tom = 0 self.save_new_round(t) obs1, obs2 = self.reset_world(experiment_ID) self.too_close_counter = 0 self.winner_agent = "-" self.time_step_tom_won = self.time_step_jerry_won = None self.time_step_tom_captured_the_flag = self.time_step_jerry_captured_the_flag = None self.time_step_agents_ran_into_each_other = None self.steps_tom = 0 self.steps_jerry = 0 """ recover """ """if evaluator1 and evaluator2 is not None: evaluator1.evaluate_if_necessary( t=t, episodes=episode_idx + 1) evaluator2.evaluate_if_necessary( t=t, episodes=episode_idx + 1) if (successful_score is not None and evaluator1.max_score >= successful_score and evaluator2.max_score >= successful_score): break""" return t, obs1, obs2, r1, r2, done1, done2, overall_reward_agent_Jerry, overall_reward_agent_Tom def save_data_for_evaluation_plots(self, t, time_step, overall_reward_agent_Tom, overall_reward_agent_Jerry, dirname): """ t: number of episode time_step: duration of the episode too_close_counter: how often agents came too close overall_reward_agent_Tom, overall_reward_agent_Jerry: reward of the agents winner_agent: agent's name who won the episode, if there is no "-" time_step_tom_won: timestep, Tom won the game, if not: 0 time_step_jerry_won: timestep, Jerry won the game, if not: 0 time_step_tom_captured_the_flag : timestep, Tom captured the flag, if not: 0 time_step_jerry_captured_the_flag : timestep, Jerry captured the flag, if not: 0 time_step_agents_ran_into_each_other: timestep; the agents ran into each other and the mission ends """ print("t : ", self.episode_counter) if self.episode_counter > 0: """ Episode 0 is skipped, because there just starts the initialisation of the world, they do nothing. """ self.evaluation_agents_ran_into_each_other.append( self.time_step_agents_ran_into_each_other) print(self.time_step_agents_ran_into_each_other) if self.time_step_agents_ran_into_each_other is None: self.evaluation_episode_counter.append(self.episode_counter) self.evaluation_episode_time.append(time_step) self.evaluation_too_close_counter.append( self.too_close_counter) self.evaluation_reward_tom.append(overall_reward_agent_Tom) self.evaluation_reward_jerry.append(overall_reward_agent_Jerry) self.evaluation_winner_agent.append(self.winner_agent) if self.winner_agent == "Tom": self.evaluation_game_won_timestamp.append( self.time_step_tom_won) if self.winner_agent == "Jerry": self.evaluation_game_won_timestamp.append( self.time_step_jerry_won) self.evaluation_flag_captured_tom.append( self.time_step_tom_captured_the_flag) self.evaluation_flag_captured_jerry.append( self.time_step_jerry_captured_the_flag) self.evaluation_steps_tom.append(self.steps_tom) self.evaluation_steps_jerry.append(self.steps_jerry) """ evaluate and print the plots """ thesis_evaluation_experiment.evaluate( t, self.evaluation_episode_counter, self.evaluation_episode_time, self.evaluation_too_close_counter, self.evaluation_reward_tom, self.evaluation_reward_jerry, self.evaluation_winner_agent, self.evaluation_game_won_timestamp, self.evaluation_flag_captured_tom, self.evaluation_flag_captured_jerry, self.evaluation_agents_ran_into_each_other, dirname, self.evaluation_steps_tom, self.evaluation_steps_jerry)
import random import math import errno import uuid from collections import defaultdict, deque from timeit import default_timer as timer if __name__ == '__main__': # -- set up mission agent -- # agent_host_player = MalmoPython.AgentHost() client_pool = MalmoPython.ClientPool() client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # Use agent_host_player for parsing the command-line options. malmoutils.parse_command_line(agent_host_player) repeats = 1 win_counter = 0 qlearner = Minesweeper_Agent.Qlearner(agent_host_debug) qlearner.train2(num_simulations=10000000, grid_size=5, reward=1, game_size=5, num_mines=5) qmap = qlearner.get_qmap() for game_counter in range(repeats): # -- set up the game -- # # Minesweeper(size, num_mines)
exit(1) time.sleep(0.1) print(".", end=' ') if time.time() - start_time >= time_out: print("Timed out while waiting for mission to start - bailing.") exit(1) print() print("Mission has started.") # -- set up two agent hosts -- agent_host_simeon = MalmoPython.AgentHost() agent_host_fred = MalmoPython.AgentHost() # Use simeon's agenthost to hold the command-line options: malmoutils.parse_command_line(agent_host_simeon) # -- set up the mission -- xml = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary/> </About> <ModSettings> <MsPerTick>10</MsPerTick> <!-- Because it's pretty boring watching Fred build steps for five minutes... --> </ModSettings> <ServerSection> <ServerInitialConditions> <Time> <StartTime>0</StartTime> </Time>
def main(): sight = {'x': (-30, 30), 'z': (-30, 30), 'y': (-1, 1)} range_x = abs(sight['x'][1] - sight['x'][0]) + 1 range_y = abs(sight['y'][1] - sight['y'][0]) + 1 range_z = abs(sight['z'][1] - sight['z'][0]) + 1 malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host) recordingsDirectory = malmoutils.get_recordings_directory(agent_host) recordingsDirectory = "../human_trajectories" if (not os.path.exists(recordingsDirectory)): os.mkdir(recordingsDirectory) logging.basicConfig(level=logging.INFO) # pdb.set_trace() logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # set to INFO if you want fewer messages video_width = 640 video_height = 480 sys.argv mission_xml_path = "../custom_xmls/usar.xml" validate = True # my_mission = MalmoPython.MissionSpec(missionXML, validate) my_mission = MalmoPython.MissionSpec(getMissionXML(mission_xml_path), validate) # ObservationFromGrid my_mission.observeGrid(sight['x'][0], sight['y'][0], sight['z'][0], sight['x'][1], sight['y'][1], sight['z'][1], 'relative_view') # agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) agent_host.setVideoPolicy(MalmoPython.VideoPolicy.LATEST_FRAME_ONLY) if agent_host.receivedArgument("test"): num_reps = 1 else: num_reps = 30000 my_mission_record = MalmoPython.MissionRecordSpec() if recordingsDirectory: my_mission_record.recordRewards() my_mission_record.recordObservations() my_mission_record.recordCommands() # if agent_host.receivedArgument("record_video"): # my_mission_record.recordMP4(24,2000000) my_mission_record.recordMP4(24, 2000000) recording_name = datetime.now().strftime("%Y-%m-%d_%I-%M-%S_%p") for iRepeat in range(1): my_mission_record.setDestination( os.path.join(recordingsDirectory, recording_name + ".tgz")) max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: logger.error("Error starting mission: %s" % e) exit(1) else: time.sleep(2) logger.info('Mission %s', iRepeat) logger.info("Waiting for the mission to start") world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() print() img_counter = 0 # print('observations', world_state.observations) while world_state.is_mission_running: world_state = agent_host.getWorldState() # Observations # msg = observe(agent_host) # if msg is not None: # print('timestamp: ', msg['timestamp']) # NOTE : Nothing recorded in world state. Uncomment to test it out. # if world_state.number_of_observations_since_last_state > 0: # timestamp = world_state.observations[-1].timestamp # msg = world_state.observations[-1].text # obs = json.loads(msg) # print("{'timestamp': timestamp, 'observations': obs}") # Video Frames while world_state.number_of_video_frames_since_last_state < 1 and world_state.is_mission_running: logger.info("Waiting for frames...") time.sleep(0.05) world_state = agent_host.getWorldState() logger.info("Got frame!") # import ipdb; ipdb.set_trace # print('observations', world_state.observations) # world_state.observations if world_state.is_mission_running: # timestamp = world_state.observations[-1].timestamp # msg = world_state.observations[-1].text # print(timestamp) # print(msg) frame = world_state.video_frames[-1] img = Image.frombytes('RGB', (640, 480), bytes(frame.pixels)) # imageio.imsave("./tmp_imgs/{}.png".format(img_counter), img) img_counter += 1 logger.info("Mission has stopped.") time.sleep(1) # let the Mod recover