Пример #1
0
    def run(self):
        """Runs the game with the registered agents

        Raises:
            :class:`jason_malmo.exceptions.NoAgentsException`: There are not registered agents in the game.\n
                Register an agent before running the game::

                    game.register('/path/to/file.asl')
                    game.run()
        """
        self._client_pool = MalmoPython.ClientPool()

        if not len(self._agents):
            raise NoAgentsException

        for port in range(10000, 10000 + len(self._agents) + 1):
            self._client_pool.add(MalmoPython.ClientInfo('127.0.0.1', port))

        self._my_mission = MalmoPython.MissionSpec(self._get_mission_xml(),
                                                   True)

        for (index, agent) in enumerate(self._agents):
            malmoutils.parse_command_line(agent.malmo_agent)
            self._safe_start_mission(
                agent.malmo_agent, self._my_mission, self._client_pool,
                malmoutils.get_default_recording_object(
                    agent.malmo_agent, "saved_data"), index, '')
        self._safe_wait_for_start(
            [agent.malmo_agent for agent in self._agents])

        threads = []
        for agent in self._agents:
            thr = threading.Thread(target=self._jason_env.run_agent,
                                   args=(agent, ),
                                   kwargs={})
            thr.start()
            threads.append(thr)

        # TODO while mission is running
        while True:
            for agent in self._agents:
                for (belief, value) in agent.beliefs.items():
                    if belief[0] == 'tasks':
                        tasks = []
                        for task in list(value)[0].args[0]:
                            tasks.append(task)
                        self.tasks.handle(agent, tasks)
            time.sleep(0.05)
    def StartServer(self, names, ip='127.0.0.1'):
        """
            Initiates a server given a mission XML and a list of names of the agents
        """
        for i, name in enumerate(names):
            n = 10000 + i
            self.clientPool.add(MalmoPython.ClientInfo(ip, n))

            self.agents.append(MultiAgent(name, self.missionXML, i))

        malmoutils.parse_command_line(self.agents[0].host)

        for a in self.agents:
            a.StartMission(self.clientPool)

        self.safeWaitForStart(self.agents)
Пример #3
0
import random
import sys
import time
import json
import random
import errno
import math
import malmoutils
import numpy as np

import agentMC

malmoutils.fix_print()

agent_host = MalmoPython.AgentHost()
malmoutils.parse_command_line(agent_host)
recordingsDirectory = malmoutils.get_recordings_directory(agent_host)
video_requirements = '<VideoProducer><Width>860</Width><Height>480</Height></VideoProducer>' if agent_host.receivedArgument(
    "record_video") else ''

# Task parameters:
MAX_DISTANCE = 40
MAX_ZOMBIES = 16
####### SPEED OF GAME #######
SPEED = 8
ARENA_WIDTH = MAX_DISTANCE
ARENA_BREADTH = MAX_DISTANCE


def getCorner(index, top, left, expand=0, y=0):
    ''' Return part of the XML string that defines the requested corner'''
Пример #4
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return

    malmoutils.fix_print()

    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec()
    my_mission.timeLimitInSeconds( 10 )
    my_mission.requestVideo( 320, 240 )
    my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info, "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:",error.text)
    print()

    last_delta = time.time()
    # main loop:
    while world_state.is_mission_running:
        agent_host.sendCommand( "move 1" )
        agent_host.sendCommand( "turn " + str(random.random()*2-1) )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state)
        if (world_state.number_of_video_frames_since_last_state > 0 or
           world_state.number_of_observations_since_last_state > 0 or
           world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info, "world state change")
        for reward in world_state.rewards:
            print("Summed reward:",reward.getValue())
        for error in world_state.errors:
            print("Error:",error.text)
        for frame in world_state.video_frames:
            print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
Пример #5
0
def main():

    #Hardcode number of agents to play song
    num_agents = 4

    #Obtain song csv and get solutions
    #freq_list = mt.create_note_list("Twinkle_Twinkle_Little_Star.csv",120,7000,-.08) #1 Agent
    #freq_list = mt.create_note_list("Chopsticks.csv",120,4000,-.15,.03) #2 Agents
    freq_list = mt.create_note_list("Bad_Apple.csv", 120, 3000, -.08,
                                    .03)  #2 Agents
    #freq_list = mt.create_note_list("Grenade_120BPM.csv",120,1500,-.08,.03) #4 Agents
    freq_list = mt.number_converter(freq_list)
    solutions = cs.get_solutions(freq_list, num_agents)
    print(solutions)
    #print(solutions)

    #Get Mission. Needed for teleport positions.
    missionXML = getMissionXML(num_agents)

    #Create musician for each agent and pass teleport positions.
    musicians = []
    for i in range(num_agents):
        agent_positions = generateAgentTeleportPositions(note_positions, i)
        musicians.append(Musician(agent_positions))
    '''
    MALMO
    '''
    print('Starting...', flush=True)

    #Create agents.
    agent_hosts = []
    for i in range(num_agents):
        agent_hosts.append(MalmoPython.AgentHost())

    malmoutils.parse_command_line(agent_hosts[0])

    #Get mission and allow commands for teleport.
    my_mission = MalmoPython.MissionSpec(missionXML, True)
    my_mission.allowAllChatCommands()

    #Add client for each agent needed.
    my_client_pool = MalmoPython.ClientPool()
    for i in range(num_agents):
        my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000 + i))

    MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF)

    #Start mission for each agent
    for i in range(num_agents):
        startMission(
            agent_hosts[i], my_mission, my_client_pool,
            malmoutils.get_default_recording_object(
                agent_hosts[0], "agent_" + str(i + 1) + "_viewpoint_discrete"),
            i, '')

    #Wait for all missions to begin.
    waitForStart(agent_hosts)

    #Pause for simulation to begin.
    time.sleep(1)
    '''
    SIMULATION BEGINS HERE
    '''

    for i in range(len(solutions[0])):

        #teleport each agent to the corresponding note.
        for j in range(len(musicians)):
            musicians[j].teleport_to_noteblock(agent_hosts[j], solutions[j][i])

        # play each note.
        for k in range(len(musicians)):
            if musicians[k].can_play:
                agent_hosts[k].sendCommand("attack 1")

        time.sleep(0.001)

        for k in range(len(musicians)):
            if musicians[k].can_play:
                agent_hosts[k].sendCommand("attack 0")
            musicians[k].can_play = False

        #modifies the timing between each note hit.
        time.sleep(0.2)
Пример #6
0
import MalmoPython
import os
import random
import sys
import time
import json
import copy
import errno
import xml.etree.ElementTree
from collections import deque
import malmoutils

malmoutils.fix_print()

agent_host = MalmoPython.AgentHost()
malmoutils.parse_command_line(agent_host)
recordingsDirectory = malmoutils.get_recordings_directory(agent_host)

# Set up some pallettes:
colourful=["stained_glass", "diamond_block", "lapis_block", "gold_block", "redstone_block", "obsidian"]
fiery=["stained_glass WHITE", "stained_glass PINK", "stained_glass ORANGE", "stained_glass RED", "wool BLACK", "glowstone"]
oresome=["gold_ore", "lapis_ore", "iron_ore", "emerald_ore", "redstone_ore", "quartz_ore"]
frilly=["skull", "stained_glass WHITE", "wool PINK", "wool WHITE", "stained_hardened_clay PINK", "stained_hardened_clay WHITE"]
icepalace=["ice", "stained_glass", "stained_glass", "stained_glass", "stained_glass", "snow"]
volatile=["tnt", "stained_glass", "stained_glass", "redstone_block", "stained_glass", "stained_glass"]
oak=["planks", "planks", "planks", "planks", "lapis_block", "lapis_block"]
sponge=["sponge", "glass", "sponge", "glass", "sponge", "glass"]
palletes = [colourful, fiery, oresome, frilly, icepalace, volatile, oak, sponge]

# dimensions of the test structure:
SIZE_X = 21
Пример #7
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return

    malmoutils.fix_print()

    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec()
    my_mission.timeLimitInSeconds( 10 )
    my_mission.requestVideo( 320, 240 )
    my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info, "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:",error.text)
    print()

    last_delta = time.time()
    # main loop:
    while world_state.is_mission_running:
        agent_host.sendCommand( "move 1" )
        agent_host.sendCommand( "turn " + str(random.random()*2-1) )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state)
        if (world_state.number_of_video_frames_since_last_state > 0 or
           world_state.number_of_observations_since_last_state > 0 or
           world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info, "world state change")
        for reward in world_state.rewards:
            print("Summed reward:",reward.getValue())
        for error in world_state.errors:
            print("Error:",error.text)
        for frame in world_state.video_frames:
            print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
Пример #8
0
            print("Bailing now.")
            exit(1)
        time.sleep(0.1)
        print(".", end=' ')
    if time.time() - start_time >= time_out:
        print("Timed out while waiting for mission to start - bailing.")
        exit(1)
    print()
    print("Mission has started.")

# -- set up two agent hosts --
agent_host_simeon = MalmoPython.AgentHost()
agent_host_fred = MalmoPython.AgentHost()

# Use simeon's agenthost to hold the command-line options:
malmoutils.parse_command_line(agent_host_simeon)

# -- set up the mission --
xml = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <About>
    <Summary/>
  </About>
  <ModSettings>
    <MsPerTick>10</MsPerTick>   <!-- Because it's pretty boring watching Fred build steps for five minutes... -->
  </ModSettings>
  <ServerSection>
    <ServerInitialConditions>
      <Time>
        <StartTime>0</StartTime>
      </Time>
Пример #9
0
def main(agent_host):
    device = torch.device("cpu")
    if VISION_ENABLED:
        eyes = Eyes()
    if GET_VISION_DATA:
        clear_images()
    malmoutils.fix_print()
    malmoutils.parse_command_line(agent_host)
    recordingsDirectory = malmoutils.get_recordings_directory(agent_host)

    q_network = QNetwork((2, Hyperparameters.OBS_SIZE, Hyperparameters.OBS_SIZE), len(Hyperparameters.ACTION_DICT))
    target_network = QNetwork((2, Hyperparameters.OBS_SIZE, Hyperparameters.OBS_SIZE), len(Hyperparameters.ACTION_DICT))
    target_network.load_state_dict(q_network.state_dict())

    optim = torch.optim.Adam(q_network.parameters(), lr= Hyperparameters.LEARNING_RATE)

    replay_buffer = deque(maxlen=Hyperparameters.REPLAY_BUFFER_SIZE)

    global_step = 0
    num_episode = 0
    epsilon = 1
    start_time = time.time()
    returns = []
    steps = []
    loss_array = []

    loop = tqdm(total=Hyperparameters.MAX_GLOBAL_STEPS, position=0, leave=False)

    result_dataset = []

    print("Global Step", Hyperparameters.MAX_GLOBAL_STEPS)
    while global_step < Hyperparameters.MAX_GLOBAL_STEPS:
        episode_step = 0
        episode_return = 0
        episode_loss = 0
        done = False
        

        #Initialize
        agent_host = init_malmo(agent_host,recordingsDirectory, video_width,video_height)
        world_state = agent_host.getWorldState()
        while not world_state.has_mission_begun:
            time.sleep(0.1)
            world_state = agent_host.getWorldState()
            #for error in world_state.errors:
                #print("\nError:",error.text)
        obs = get_observation(world_state, agent_host)


        #Testing  
        agent_host.sendCommand( "move 1" )

        while world_state.is_mission_running:
            #Depth Implementation
            while world_state.number_of_video_frames_since_last_state < 1 and world_state.is_mission_running:
                time.sleep(0.05)
                world_state = agent_host.getWorldState()

            if world_state.is_mission_running:
                frame = world_state.video_frames[0].pixels
                processFrame(frame)

                if GET_VISION_DATA:
                    try:
                        result_dataset.append(view_surrounding(video_height, video_width, frame, global_step))
                    except:
                        print("Error in getting image for training data.")
                
                elif VISION_ENABLED:
                    input_img_temp = get_img(world_state,frame,agent_host,eyes,device,video_width,video_height)
                        
                print("Yaw Delta ", current_yaw_delta_from_depth)  

                if current_yaw_delta_from_depth > 0:
                    agent_host.sendCommand(Hyperparameters.ACTION_DICT[1])
                else:
                    agent_host.sendCommand(Hyperparameters.ACTION_DICT[2])
        

            action_idx = get_action(obs, q_network, epsilon)
            command = Hyperparameters.ACTION_DICT[action_idx]

            agent_host.sendCommand(command)
            #agent_host.sendCommand( "turn " + str(current_yaw_delta_from_depth) )

            #time.sleep(.3)

            episode_step += 1
            if episode_step >= Hyperparameters.MAX_EPISODE_STEPS or \
                    (obs[0, int(Hyperparameters.OBS_SIZE/2)+1, int(Hyperparameters.OBS_SIZE/2)] == -1 and \
                    command == 'movesouth 1'):
                done = True
                time.sleep(2)  

            world_state = agent_host.getWorldState()            
                        
            for error in world_state.errors:
                print("Error:", error.text)
            
            next_obs = get_observation(world_state, agent_host) 
        
            reward = 0
            for r in world_state.rewards:
                reward += r.getValue()
            episode_return += reward

            replay_buffer.append((obs, action_idx, next_obs, reward, done))
            obs = next_obs

            global_step += 1
            #print(global_step)
            if global_step == Hyperparameters.MAX_GLOBAL_STEPS:
                break

            if global_step > Hyperparameters.START_TRAINING and global_step % Hyperparameters.LEARN_FREQUENCY == 0:
                batch = prepare_batch(replay_buffer)
                loss = learn(batch, optim, q_network, target_network)
                episode_loss += loss

                if epsilon > Hyperparameters.MIN_EPSILON:
                    epsilon *= Hyperparameters.EPSILON_DECAY

                if global_step % Hyperparameters.TARGET_UPDATE == 0:
                    target_network.load_state_dict(q_network.state_dict())



        num_episode += 1
        returns.append(episode_return)
        loss_array.append(episode_loss)
        steps.append(global_step)
        avg_return = sum(returns[-min(len(returns), 10):]) / min(len(returns), 10)
        loop.update(episode_step)
        loop.set_description('Episode: {} Steps: {} Time: {:.2f} Loss: {:.2f} Last Return: {:.2f} Avg Return: {:.2f}'.format(
            num_episode, global_step, (time.time() - start_time) / 60, episode_loss, episode_return, avg_return))

        if num_episode > 0 and num_episode % 10 == 0:
            log_returns(steps, loss_array)
            #print()

    #print(len(result_dataset))
    np.save("images/image_labels",np.array(result_dataset))
Пример #10
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return
    #forceReset="true"
    missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
            <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
            
              <About>
                <Summary>Hello world!</Summary>
              </About>
              
              <ServerSection>
                <ServerHandlers>
                  <DefaultWorldGenerator forceReset="true" />
                  <ServerQuitFromTimeUp timeLimitMs="30000"/>
                  <ServerQuitWhenAnyAgentFinishes/>
                </ServerHandlers>
              </ServerSection>
              
              
              <AgentSection mode="Survival">
                <Name>MalmoTutorialBot</Name>
                <AgentStart>
                    <Inventory>
                        <InventoryItem slot="8" type="diamond_pickaxe"/>
                    </Inventory>
                </AgentStart>
                <AgentHandlers>
                    <ObservationFromFullStats/>
                    <ObservationFromGrid>
                        <Grid name="all_the_blocks" >
                            <min x="-1" y="-1" z="-1"/>
                            <max x="1" y="2" z="1"/>
                        </Grid>
                    </ObservationFromGrid>
                    <ContinuousMovementCommands turnSpeedDegs="180"/>
                </AgentHandlers>
              </AgentSection>
            </Mission>'''

    malmoutils.fix_print()

    #agent_host = MalmoPython.AgentHost()
    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec(missionXML, True)
    my_mission.timeLimitInSeconds(300)
    my_mission.requestVideo(640, 480)

    #my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0,
                                    experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info,
                              "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()

    last_delta = time.time()

    # main loop:
    #agent_host.sendCommand( "jump 1")
    TURN = 0
    TURN2 = 0
    JUMP = 0
    while world_state.is_mission_running:
        print("New Iteration")

        if JUMP > 0:
            JUMP = JUMP - 1
        if JUMP == 0:
            agent_host.sendCommand("jump 0")
            JUMP = JUMP - 1
        agent_host.sendCommand("move 1")
        if math.sin(TURN) / 3 >= 0:
            agent_host.sendCommand("turn 0.15")
        else:
            agent_host.sendCommand("turn -0.2")
        print(TURN, " ", math.sin(TURN))
        TURN = TURN + 0.3

        #agent_host.sendCommand( "jump 1" )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        y = json.loads(world_state.observations[-1].text)

        #print(y["all_the_blocks"])
        dir = ""
        if y["Yaw"] + 180 < 90:
            dir = "S"
            print("Facing South")
        elif y["Yaw"] < 180:
            dir = "W"
            print("Facing West")
        elif y["Yaw"] < 270:
            dir = "N"
            print("Facing North")
        else:
            dir = "E"
            print("Facing East")

        blocks = [[], [], [], []]
        i = 0
        for x in y["all_the_blocks"]:
            blocks[math.floor(i / 9)].append(x)
            i = i + 1

        if dir == "S":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j] != "air":
                    willjump = True
                print(j, blocks[1][j], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "W":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j * 3] != "air":
                    willjump = True
                print(j * 3, blocks[1][j * 3], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "E":
            willjump = False

            for j in range(1, 4):
                if blocks[1][j * 3 - 1] != "air":
                    willjump = True
                print(j * 3 - 1, blocks[1][j * 3 - 1], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "N":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j] != "air":
                    willjump = True
                print(j, blocks[1][j + 6], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")

        if (blocks[1][2] != "air" and blocks[2][2] != "air"
                or blocks[1][4] != "air" and blocks[2][4] != "air"
                or blocks[1][2] != "air" and blocks[2][2] != "air"
                or blocks[1][4] != "air" and blocks[2][4] != "air"):
            TURN2 = 2

        if TURN2 >= 0:
            agent_host.sendCommand("turn 1")
            TURN2 = TURN2 - 1
        '''if blocks[1][5] != "air" or  blocks[1][5] != "grass" or blocks[1][5] != "tallgrass" :
            JUMP = 2
            agent_host.sendCommand( "jump 1" )
            print()
            print(blocks[1][5])'''

        #print(len(blocks))
        #print(blocks)

        if (world_state.number_of_video_frames_since_last_state > 0
                or world_state.number_of_observations_since_last_state > 0
                or world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info,
                                  "world state change")
        for reward in world_state.rewards:
            print("Summed reward:", reward.getValue())
        for error in world_state.errors:
            print("Error:", error.text)
        for frame in world_state.video_frames:
            print()
            #print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
Пример #11
0
def run(size, algo1, algo2):
    #algorithms = {"reflex": reflex.reflex, "hiddenMarkov": hiddenMarkov.hiddenMarkov, "minimax":minimax.minimax, "expectimax": expectimax.expectimax}
    algorithms = {
        "reflex": reflex.reflex,
        'random': randomagent.randommove,
        'smartrandom': smartrandomagent.randommove,
        'astarreflex': AStarReflex.search,
        "minimax": minimax.minmax
    }
    #assert len(sys.argv) == 4, "Wrong number of arguments, the form is: mapSize, agent algorithm, enemy alogrithm"

    malmoutils.fix_print()

    # -- set up two agent hosts --
    agent_host1 = MalmoPython.AgentHost()
    agent_host2 = MalmoPython.AgentHost()
    #map_size = str(sys.argv[1])
    map_size = int(size)
    map_minus = str(map_size - 1)
    agentAlgo = algorithms[algo1]
    enemyAlgo = algorithms[algo2]
    #agentAlgo =  algorithms[sys.argv[2]]
    #enemyAlgo = algorithms[sys.argv[3]]

    # Use agent_host1 for parsing the command-line options.
    # (This is why agent_host1 is passed in to all the subsequent malmoutils calls, even for
    # agent 2's setup.)
    malmoutils.parse_command_line(agent_host1)

    missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
                <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
                
                  <About>
                    <Summary>Hello world!</Summary>
                  </About>
                  
                  <ServerSection>
                    <ServerInitialConditions>
                      <Time>
                        <StartTime>12000</StartTime>
                        <AllowPassageOfTime>false</AllowPassageOfTime>
                      </Time>
                    </ServerInitialConditions>
                    <ServerHandlers>
                      <FlatWorldGenerator generatorString="3;7,220*1,5*3,2;3;,biome_1"/>
                      <DrawingDecorator>
                        <!-- coordinates for cuboid are inclusive -->
                        <DrawCuboid x1="0" y1="45" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="300" z2=''' + '"' + map_minus + '"' + ''' type="air" />            <!-- limits of our arena -->
                        <DrawCuboid x1="0" y1="40" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="44" z2=''' + '"' + map_minus + '"' + ''' type="lava" />           <!-- lava floor -->
                        <DrawCuboid x1="0"  y1="46" z1="0"  x2=''' + '"' + map_minus + '"' + ''' y2="46" z2=''' + '"' + map_minus + '"' + ''' type="snow" />
                      </DrawingDecorator>
                      <ServerQuitFromTimeUp timeLimitMs="30000"/>
                      
                    </ServerHandlers>
                  </ServerSection>
                  
                  <AgentSection mode="Survival">
                    <Name>Agent</Name>
                    <AgentStart>
                        <Inventory>
                            <InventoryItem slot="0" type="diamond_shovel"/>
                        </Inventory>
                        <Placement x="0.5" y="47.0" z="0.5" pitch="50" yaw="0"/>
                    </AgentStart>
                    <AgentHandlers>
                      <ObservationFromFullStats/>
                      <ObservationFromGrid>
                          <Grid name="floor3x3W">
                            <min x="-1" y="0" z="-1"/>
                            <max x="1" y="0" z="1"/>
                          </Grid>
                          <Grid name="floor3x3F">
                            <min x="-1" y="-1" z="-1"/>
                            <max x="1" y="-1" z="1"/>
                          </Grid>
                      </ObservationFromGrid>
                      <DiscreteMovementCommands/>
                    </AgentHandlers>
                  </AgentSection>
                  
                  <AgentSection mode="Survival">
                    <Name>Enemy</Name>
                    <AgentStart>
                        <Inventory>
                            <InventoryItem slot="0" type="diamond_shovel"/>
                        </Inventory>
                        <Placement x=''' + '"' + str(
        float(map_size) - 0.5) + '"' + ''' y="47.0" z=''' + '"' + str(
            float(map_size) - 0.5) + '"' + ''' pitch="50" yaw="180"/>
                    </AgentStart>
                    
                    <AgentHandlers>
                      <ObservationFromFullStats/>
                      <DiscreteMovementCommands/>
                      <ObservationFromGrid>
                          <Grid name="floor3x3W">
                            <min x="-1" y="0" z="-1"/>
                            <max x="1" y="0" z="1"/>
                          </Grid>
                          <Grid name="floor3x3F">
                            <min x="-1" y="-1" z="-1"/>
                            <max x="1" y="-1" z="1"/>
                          </Grid>
                      </ObservationFromGrid>
                      <RewardForTouchingBlockType>
                        <Block reward="-100.0" type="lava" behaviour="onceOnly"/>
                      </RewardForTouchingBlockType>
                      <AgentQuitFromTouchingBlockType>
                        <Block type="lava" />
                      </AgentQuitFromTouchingBlockType>
                    </AgentHandlers>
                  </AgentSection>
                </Mission>'''

    # Create default Malmo objects:
    my_mission = MalmoPython.MissionSpec(missionXML, True)

    client_pool = MalmoPython.ClientPool()
    client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))
    client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001))

    MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF)
    my_mission_record = MalmoPython.MissionRecordSpec()

    def safeStartMission(agent_host, mission, client_pool, recording, role,
                         experimentId):
        used_attempts = 0
        max_attempts = 5
        print("Calling startMission for role", role)
        while True:
            try:
                agent_host.startMission(mission, client_pool, recording, role,
                                        experimentId)
                break
            except MalmoPython.MissionException as e:
                errorCode = e.details.errorCode
                if errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_WARMING_UP:
                    print("Server not quite ready yet - waiting...")
                    time.sleep(2)
                elif errorCode == MalmoPython.MissionErrorCode.MISSION_INSUFFICIENT_CLIENTS_AVAILABLE:
                    print("Not enough available Minecraft instances running.")
                    used_attempts += 1
                    if used_attempts < max_attempts:
                        print("Will wait in case they are starting up.",
                              max_attempts - used_attempts, "attempts left.")
                        time.sleep(2)
                elif errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_NOT_FOUND:
                    print(
                        "Server not found - has the mission with role 0 been started yet?"
                    )
                    used_attempts += 1
                    if used_attempts < max_attempts:
                        print("Will wait and retry.",
                              max_attempts - used_attempts, "attempts left.")
                        time.sleep(2)
                else:
                    print("Other error:", e.message)
                    print("Waiting will not help here - bailing immediately.")
                    exit(1)
            if used_attempts == max_attempts:
                print("All chances used up - bailing now.")
                exit(1)
        print("startMission called okay.")

    def safeWaitForStart(agent_hosts):
        print("Waiting for the mission to start", end=' ')
        start_flags = [False for a in agent_hosts]
        start_time = time.time()
        time_out = 120  # Allow two minutes for mission to start.
        while not all(start_flags) and time.time() - start_time < time_out:
            states = [a.peekWorldState() for a in agent_hosts]
            start_flags = [w.has_mission_begun for w in states]
            errors = [e for w in states for e in w.errors]
            if len(errors) > 0:
                print("Errors waiting for mission start:")
                for e in errors:
                    print(e.text)
                print("Bailing now.")
                exit(1)
            time.sleep(0.1)
            print(".", end=' ')
        print()
        if time.time() - start_time >= time_out:
            print("Timed out waiting for mission to begin. Bailing.")
            exit(1)
        print("Mission has started.")

    safeStartMission(agent_host1, my_mission, client_pool, my_mission_record,
                     0, '')
    safeStartMission(agent_host2, my_mission, client_pool, my_mission_record,
                     1, '')
    safeWaitForStart([agent_host1, agent_host2])

    def movement(ah, direction, pos):
        if direction == "north":
            ah.sendCommand("movenorth 1")
            position = (pos[0], pos[1] - 1)
        elif direction == "south":
            ah.sendCommand("movesouth 1")
            position = (pos[0], pos[1] + 1)
        elif direction == "west":
            ah.sendCommand("movewest 1")
            position = (pos[0] - 1, pos[1])
        elif direction == "east":
            ah.sendCommand("moveeast 1")
            position = (pos[0] + 1, pos[1])
        else:
            position = (pos[0], pos[1])
        time.sleep(0.1)
        return position

    def attack(ah, index, pos, map, enemy=False):
        #We are going to make it so the agent can only break the blocks immediately around them.
        #So a location will be one of the 8 locations around it
        #Enemy starts facing north (1), Agent starts facing south (3)
        #  Enemy: 0 1 0  Agent: 0 3 0
        #         4 X 2         2 X 4
        #         0 3 0         0 1 0
        x, y = math.floor(pos[0]), math.floor(pos[1])
        #print("Player position: {},{} Direction: {}".format(x,y, index))
        did_Break = False
        if enemy:
            if index == "north":
                # print("Index 1")
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                y -= 1
                did_Break = True
            if index == "east":
                # print("Index 2")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                x += 1
                did_Break = True
            if index == "west":
                # print("Index 4")
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                x -= 1
                did_Break = True
            if index == "south":
                # print("Index 3")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                y += 1
                did_Break = True
        else:
            # Agent: 0 3 0
            #        2 X 4
            #        0 1 0
            if index == "south":
                # print("Index 3")
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                y += 1
                did_Break = True
            if index == "west":
                # print("Index 4")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                x -= 1
                did_Break = True
            if index == "east":
                # print("Index 2")
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                x += 1
                did_Break = True
            if index == "north":
                # print("Index 3")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                y -= 1
                did_Break = True
        if did_Break:
            map[x][y] = False

    '''
    Sample Observation:
    {"DistanceTravelled":0,"TimeAlive":50,"MobsKilled":0,"PlayersKilled":0,"DamageTaken":0,"DamageDealt":0,
    "Life":20.0,"Score":0,"Food":20,"XP":0,"IsAlive":true,"Air":300,"Name":"Enemy","XPos":5.5,"YPos":47.0,
    "ZPos":5.5,"Pitch":50.0,"Yaw":180.0,"WorldTime":12000,"TotalTime":57}

    '''

    agent_score = 0
    #count = 0
    agent_ob = None
    enemy_ob = None

    map = [[True for i in range(0, int(map_size))]
           for j in range(0, int(map_size))]
    # for i in map:
    # print(i)

    while True:
        #Scores should decrease with time and get a bonus if they win
        agent_score -= 1
        agent_state = agent_host1.peekWorldState()
        enemy_state = agent_host2.peekWorldState()
        if agent_state.number_of_observations_since_last_state > 0:
            agent_ob = json.loads(agent_state.observations[-1].text)

        if enemy_state.number_of_observations_since_last_state > 0:
            enemy_ob = json.loads(enemy_state.observations[-1].text)
        if agent_ob is None or enemy_ob is None:
            continue
        if agent_state.is_mission_running == False:
            break
        agent_position = (agent_ob["XPos"], agent_ob["ZPos"])
        enemy_position = (enemy_ob["XPos"], enemy_ob["ZPos"])

        agent_grid = agent_ob.get(u'floor3x3F', 0)
        enemy_grid = enemy_ob.get(u'floor3x3F', 0)

        if "lava" in agent_grid:
            print("Enemy Won!")
            agent_score -= 100
            for i in map:
                print(i)
            return 0
            break
        if "lava" in enemy_grid:
            print("Agent Won!")
            agent_score += 100
            for i in map:
                print(i)
            return 1
            break

        agentMoveString, agentBreakIndex = agentAlgo(agent_host1,
                                                     agent_position,
                                                     enemy_position,
                                                     agent_grid, map)
        enemyMoveString, enemyBreakIndex = enemyAlgo(agent_host2,
                                                     enemy_position,
                                                     agent_position,
                                                     enemy_grid, map)

        # #Agent Turn to Break
        attack(agent_host1, agentBreakIndex, agent_position, map)
        # #Enemy Turn to Move
        pos = movement(agent_host2, enemyMoveString, enemy_position)

        # #Enemy Turn to Break
        attack(agent_host2, enemyBreakIndex, pos, map, enemy=True)
        # #Agent Turn to Move
        movement(agent_host1, agentMoveString, agent_position)
    for i in map:
        print(i)
    return 2
class ThesisEnvExperiment(gym.Env):
    """
    initialize agents and give commandline permissions
    """
    metadata = {'render.modes': ['human']}
    """ Agent 01: Tom """
    agent_host1 = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host1)
    """ Agent 02: Jerry """
    agent_host2 = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host2)
    """ Agent 03: Skye """
    agent_host3 = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host3)
    """global variables to remember, if somebody already catched the flag"""
    flag_captured_tom = flag_captured_jerry = False
    fetched_cell_tom = fetched_cell_jerry = cell_now_tom = cell_now_jerry = 0
    time_stamp_start_for_distance = 0
    too_close_counter = 0
    time_step_tom_won = None
    time_step_jerry_won = None
    time_step_tom_captured_the_flag = None
    time_step_jerry_captured_the_flag = None
    winner_agent = "-"
    time_step_agents_ran_into_each_other = None
    steps_tom = 0
    steps_jerry = 0
    episode_counter = 0
    """ collected data for evaluation """
    evaluation_episode_counter = []
    evaluation_too_close_counter = []
    evaluation_episode_time = []
    evaluation_flag_captured_tom = []
    evaluation_flag_captured_jerry = []
    evaluation_agents_ran_into_each_other = []
    evaluation_game_won_timestamp = []
    evaluation_winner_agent = []
    evaluation_reward_tom = []
    evaluation_reward_jerry = []
    evaluation_steps_tom = []
    evaluation_steps_jerry = []

    def __init__(self):
        super(ThesisEnvExperiment, self).__init__()
        """
        load the mission file
        format: XML
        """
        mission_file = 'capture_the_flag_xml_mission_DQL.xml'
        self.load_mission_file(mission_file)
        print("Mission loaded: Capture the Flag")

        self.client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001),
                            ('127.0.0.1', 10002)]
        self.mc_process = None
        self.mission_end = False

    def init(self,
             client_pool=None,
             start_minecraft=None,
             continuous_discrete=True,
             add_noop_command=None,
             max_retries=90,
             retry_sleep=10,
             step_sleep=0.001,
             skip_steps=0,
             videoResolution=None,
             videoWithDepth=None,
             observeRecentCommands=None,
             observeHotBar=None,
             observeFullInventory=None,
             observeGrid=None,
             observeDistance=None,
             observeChat=None,
             allowContinuousMovement=None,
             allowDiscreteMovement=None,
             allowAbsoluteMovement=None,
             recordDestination=None,
             recordObservations=None,
             recordRewards=None,
             recordCommands=None,
             recordMP4=None,
             gameMode=None,
             forceWorldReset=None):

        self.max_retries = max_retries
        self.retry_sleep = retry_sleep
        self.step_sleep = step_sleep
        self.skip_steps = skip_steps
        self.forceWorldReset = forceWorldReset
        self.continuous_discrete = continuous_discrete
        self.add_noop_command = add_noop_command
        self.client_pool = client_pool

        if videoResolution:
            if videoWithDepth:
                self.mission_spec.requestVideoWithDepth(*videoResolution)
            else:
                self.mission_spec.requestVideo(*videoResolution)

        if observeRecentCommands:
            self.mission_spec.observeRecentCommands()
        if observeHotBar:
            self.mission_spec.observeHotBar()
        if observeFullInventory:
            self.mission_spec.observeFullInventory()
        if observeGrid:
            self.mission_spec.observeGrid(*(observeGrid + ["grid"]))
        if observeDistance:
            self.mission_spec.observeDistance(*(observeDistance + ["dist"]))
        if observeChat:
            self.mission_spec.observeChat()

        if allowDiscreteMovement:
            # if there are any parameters, remove current command handlers first
            self.mission_spec.removeAllCommandHandlers()

            if allowDiscreteMovement is True:
                self.mission_spec.allowAllDiscreteMovementCommands()
            elif isinstance(allowDiscreteMovement, list):
                for cmd in allowDiscreteMovement:
                    self.mission_spec.allowDiscreteMovementCommand(cmd)

        if start_minecraft:
            # start Minecraft process assigning port dynamically
            self.mc_process, port = minecraft_py.start()
            logger.info(
                "Started Minecraft on port %d, overriding client_pool.", port)
            client_pool = [('127.0.0.1', port)]
        """ 
        make client_pool usable for Malmo: change format of the client_pool to struct 
        """
        if client_pool:
            if not isinstance(client_pool, list):
                raise ValueError(
                    "client_pool must be list of tuples of (IP-address, port)")
            self.client_pool = MalmoPython.ClientPool()
            for client in client_pool:
                self.client_pool.add(MalmoPython.ClientInfo(*client))
        """
        initialize video parameters for video processing
        """
        self.video_height = self.mission_spec.getVideoHeight(0)
        self.video_width = self.mission_spec.getVideoWidth(0)
        self.video_depth = self.mission_spec.getVideoChannels(0)
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.video_height,
                                                   self.video_width,
                                                   self.video_depth))
        """
        dummy image just for the first observation
        """
        self.last_image1 = np.zeros(
            (self.video_height, self.video_width, self.video_depth),
            dtype=np.float32)
        self.last_image2 = np.zeros(
            (self.video_height, self.video_width, self.video_depth),
            dtype=np.float32)
        self.create_action_space()
        """ 
        mission recording 
        """
        self.mission_record_spec = MalmoPython.MissionRecordSpec(
        )  # record nothing
        if recordDestination:
            self.mission_record_spec.setDestination(recordDestination)
        if recordRewards:
            self.mission_record_spec.recordRewards()
        if recordCommands:
            self.mission_record_spec.recordCommands()
        if recordMP4:
            self.mission_record_spec.recordMP4(*recordMP4)
        """ 
        game mode
        """
        if gameMode:
            if gameMode == "spectator":
                self.mission_spec.setModeToSpectator()
            elif gameMode == "creative":
                self.mission_spec.setModeToCreative()
            elif gameMode == "survival":
                logger.warn(
                    "Cannot force survival mode, assuming it is the default.")
            else:
                assert False, "Unknown game mode: " + gameMode

    def create_action_space(self):
        """
        create action_space from action_names to dynamically generate the needed movement
        format:             Discrete
        possible actions:   "move", "jumpmove", "strafe", "jumpstrafe", "turn", "jumpnorth", "jumpsouth", "jumpwest",
                            "jumpeast","look", "use", "jumpuse", "sleep", "movenorth", "movesouth", "moveeast",
                            "movewest", "jump", "attack"
        unused_actions:     not wanted actions
        discrete_actions:   wanted actions
        """
        # collect different actions based on allowed commands
        unused_actions = []
        discrete_actions = []
        chs = self.mission_spec.getListOfCommandHandlers(0)
        for ch in chs:
            cmds = self.mission_spec.getAllowedCommands(0, ch)
            for command in cmds:
                logger.debug(ch + ":" + command)
                if command in [
                        "movenorth", "movesouth", "moveeast", "movewest",
                        "attack", "turn"
                ]:
                    discrete_actions.append(command + " 1")
                    discrete_actions.append(command + " -1")
                else:
                    unused_actions.append(command)
        """ turn action lists into action spaces """
        self.action_names = []
        self.action_spaces = []
        if len(discrete_actions) > 0:
            self.action_spaces.append(spaces.Discrete(len(discrete_actions)))
            self.action_names.append(discrete_actions)

        if len(self.action_spaces) == 1:
            self.action_space = self.action_spaces[0]
        else:
            self.action_space = spaces.Tuple(self.action_spaces)
        logger.debug(self.action_space)

    def load_mission_file(self, mission_file):
        """
        load XML mission from folder
        """
        logger.info("Loading mission from " + mission_file)
        mission_xml = open(mission_file, 'r').read()
        self.load_mission_xml(mission_xml)

    def load_mission_xml(self, mission_xml):
        """
        load mission file into game
        """
        self.mission_spec = MalmoPython.MissionSpec(mission_xml, True)
        logger.info("Loaded mission: " + self.mission_spec.getSummary())

    def clip_action_filter(self, a):
        return np.clip(a, self.action_space.low, self.action_space.high)

    def dqn_q_values_and_neuronal_net(self, args, action_space, obs_size,
                                      obs_space):
        """
        learning process
        """

        if isinstance(action_space, spaces.Box):
            action_size = action_space.low.size
            # Use NAF to apply DQN to continuous action spaces
            q_func = q_functions.FCQuadraticStateQFunction(
                obs_size,
                action_size,
                n_hidden_channels=args.n_hidden_channels,
                n_hidden_layers=args.n_hidden_layers,
                action_space=action_space)
            # Use the Ornstein-Uhlenbeck process for exploration
            ou_sigma = (action_space.high - action_space.low) * 0.2
            explorer = explorers.AdditiveOU(sigma=ou_sigma)
        else:
            n_actions = action_space.n
            # print("n_actions: ", n_actions)
            q_func = q_functions.FCStateQFunctionWithDiscreteAction(
                obs_size,
                n_actions,
                n_hidden_channels=args.n_hidden_channels,
                n_hidden_layers=args.n_hidden_layers)
            # print("q_func ", q_func)
            # Use epsilon-greedy for exploration
            explorer = explorers.LinearDecayEpsilonGreedy(
                args.start_epsilon, args.end_epsilon,
                args.final_exploration_steps, action_space.sample)
            # print("explorer: ", explorer)

        if args.noisy_net_sigma is not None:
            links.to_factorized_noisy(q_func, sigma_scale=args.noisy_net_sigma)
            # Turn off explorer
            explorer = explorers.Greedy()
        # print("obs_space.low : ", obs_space.shape)
        chainerrl.misc.draw_computational_graph(
            [q_func(np.zeros_like(obs_space.low, dtype=np.float32)[None])],
            os.path.join(args.outdir, 'model'))

        opt = optimizers.Adam()
        opt.setup(q_func)

        rbuf_capacity = 5 * 10**5
        if args.minibatch_size is None:
            args.minibatch_size = 32
        if args.prioritized_replay:
            betasteps = (args.steps - args.replay_start_size) \
                        // args.update_interval
            rbuf = replay_buffer.PrioritizedReplayBuffer(rbuf_capacity,
                                                         betasteps=betasteps)
        else:
            rbuf = replay_buffer.ReplayBuffer(rbuf_capacity)

        return q_func, opt, rbuf, explorer

    def step_generating(self, action, agent_num):
        """
        time step in arena
        next action is executed
        reward of actual state is calculated and summed up with the overall reward
        RETURN: image, reward, done, info
        """
        reward1 = 0
        reward2 = 0

        world_state1 = self.agent_host1.peekWorldState()
        world_state2 = self.agent_host2.peekWorldState()
        if agent_num == 1:
            if world_state1.is_mission_running:
                """ take action """
                self.do_action(action, agent_num)
                """ wait for the new state """
            world_state1 = self.agent_host1.getWorldState()
        else:
            if world_state2.is_mission_running:
                """ take action """
                self.do_action(action, agent_num)
                """ wait for the new state """
            world_state2 = self.agent_host2.getWorldState()
        """ calculate reward of current state """
        if agent_num == 1:
            for r in world_state1.rewards:
                reward1 += r.getValue()
        else:
            for r in world_state2.rewards:
                reward2 += r.getValue()
        """ take the last frame from world state | 'done'-flag indicated, if mission is still running """
        if agent_num == 1:
            image1 = self.get_video_frame(world_state1, 1)
            done1 = not world_state1.is_mission_running
        else:
            image2 = self.get_video_frame(world_state2, 2)
            done2 = not world_state1.is_mission_running
        """
        collected information during the run
        """
        if agent_num == 1:
            info1 = {}
            info1['has_mission_begun'] = world_state1.has_mission_begun
            info1['is_mission_running'] = world_state1.is_mission_running
            info1[
                'number_of_video_frames_since_last_state'] = world_state1.number_of_video_frames_since_last_state
            info1[
                'number_of_rewards_since_last_state'] = world_state1.number_of_rewards_since_last_state
            info1[
                'number_of_observations_since_last_state'] = world_state1.number_of_observations_since_last_state
            info1['mission_control_messages'] = [
                msg.text for msg in world_state1.mission_control_messages
            ]
            info1['observation'] = self.get_observation(world_state1)

        else:
            info2 = {}
            info2['has_mission_begun'] = world_state2.has_mission_begun
            info2['is_mission_running'] = world_state2.is_mission_running
            info2[
                'number_of_video_frames_since_last_state'] = world_state2.number_of_video_frames_since_last_state
            info2[
                'number_of_rewards_since_last_state'] = world_state2.number_of_rewards_since_last_state
            info2[
                'number_of_observations_since_last_state'] = world_state2.number_of_observations_since_last_state
            info2['mission_control_messages'] = [
                msg.text for msg in world_state2.mission_control_messages
            ]
            info2['observation'] = self.get_observation(world_state2)

        if agent_num == 1:
            return image1, reward1, done1, info1
        else:
            return image2, reward2, done2, info2

    def reset_world(self, experiment_ID):
        """
        reset the arena and start the missions per agent
        The sleep-timer of 6sec is required, because the client needs far too much time to set up the mission
        for the first time.
        All followed missions start faster.
        """
        print("force world reset........")
        self.flag_captured_tom = False
        self.flag_captured_jerry = False

        time.sleep(0.1)

        print(self.client_pool)

        for retry in range(self.max_retries + 1):
            try:
                """ start missions for every client """

                print("\nstarting mission for agent #1")
                time.sleep(6)
                self.agent_host1.startMission(self.mission_spec,
                                              self.client_pool,
                                              self.mission_record_spec, 0,
                                              experiment_ID)

                print("starting mission for agent #2")
                time.sleep(6)
                self.agent_host2.startMission(self.mission_spec,
                                              self.client_pool,
                                              self.mission_record_spec, 1,
                                              experiment_ID)

                print("starting mission for agent #3")
                time.sleep(6)
                self.agent_host3.startMission(self.mission_spec,
                                              self.client_pool,
                                              self.mission_record_spec, 2,
                                              experiment_ID)
                print("\nmissions successfully started.....\n")
                break
            except RuntimeError as e:
                if retry == self.max_retries:
                    logger.error("Error starting mission: " + str(e))
                    raise
                else:
                    logger.warn("Error starting mission: " + str(e))
                    logger.info("Sleeping for %d seconds...", self.retry_sleep)
                    time.sleep(self.retry_sleep)

        logger.info("Waiting for the mission to start.")
        world_state1 = self.agent_host1.getWorldState()
        world_state2 = self.agent_host2.getWorldState()
        while not world_state1.has_mission_begun and world_state2.has_mission_begun:
            time.sleep(0.1)
            world_state1 = self.agent_host1.getWorldState()
            world_state2 = self.agent_host2.getWorldState()
            for error in world_state1.errors and world_state2.errors:
                logger.warn(error.text)

        logger.info("Mission running")

        return self.get_video_frame(world_state1,
                                    1), self.get_video_frame(world_state2, 2)

    def do_action(self, actions, agent_num):
        """
        get next action from action_space
        execute action in environment for the agent
        """
        if len(self.action_spaces) == 1:
            actions = [actions]
        #print(actions)

        if agent_num == 1:
            self.steps_tom += 1
        else:
            self.steps_jerry += 1

        for spc, cmds, acts in zip(self.action_spaces, self.action_names,
                                   actions):
            if isinstance(spc, spaces.Discrete):
                logger.debug(cmds[acts])
                if agent_num == 1:
                    print("Tom's next action: ", cmds[acts])
                    self.agent_host1.sendCommand(cmds[acts])
                else:
                    print("Jerry's next action: ", cmds[acts])
                    self.agent_host2.sendCommand(cmds[acts])
            elif isinstance(spc, spaces.Box):
                for cmd, val in zip(cmds, acts):
                    logger.debug(cmd + " " + str(val))
                    if agent_num == 1:
                        self.agent_host1.sendCommand(cmd + " " + str(val))
                    else:
                        self.agent_host2.sendCommand(cmd + " " + str(val))
            elif isinstance(spc, spaces.MultiDiscrete):
                for cmd, val in zip(cmds, acts):
                    logger.debug(cmd + " " + str(val))
                    if agent_num == 1:
                        self.agent_host1.sendCommand(cmd + " " + str(val))
                    else:
                        self.agent_host2.sendCommand(cmd + " " + str(val))
            else:
                logger.warn("Unknown action space for %s, ignoring." % cmds)

    def get_video_frame(self, world_state, agent_num):
        """
        process video frame for called agent
        RETURN: image for called agent
        """

        if world_state.number_of_video_frames_since_last_state > 0:
            assert len(world_state.video_frames) == 1
            frame = world_state.video_frames[0]
            reshaped = np.zeros(
                (self.video_height * self.video_width * self.video_depth),
                dtype=np.float32)
            image = np.frombuffer(frame.pixels, dtype=np.int8)
            # print(reshaped.shape)
            for i in range(360000):
                reshaped[i] = image[i]

            image = np.frombuffer(
                frame.pixels,
                dtype=np.float32)  # 300x400 = 120000 Werte // np.float32
            image = reshaped.reshape((frame.height, frame.width,
                                      frame.channels))  # 300x400x3 = 360000

            if agent_num == 1:
                self.last_image1 = image
            else:
                self.last_image2 = image
        else:
            """ if mission ends befor we got a frame, just take the last frame to reduce exceptions """
            if agent_num == 1:
                image = self.last_image1
            else:
                image = self.last_image2

        return image

    def get_observation(self, world_state):
        """
        check observations during mission run
        RETURN: number of missed observations - if there are any
        """
        if world_state.number_of_observations_since_last_state > 0:
            missed = world_state.number_of_observations_since_last_state - len(
                world_state.observations) - self.skip_steps
            if missed > 0:
                logger.warn("Agent missed %d observation(s).", missed)
            assert len(world_state.observations) == 1
            return json.loads(world_state.observations[0].text)
        else:
            return None

    def save_new_round(self, t):
        """
        saves the round number in results.txt
        """
        datei = open('results.txt', 'a')
        datei.write("-------------- ROUND %i --------------\n" % (t))
        datei.close()

    def append_save_file_with_flag(self, time_step, name):
        """
        saves the flagholder in results.txt
        """
        datei = open('results.txt', 'a')
        datei.write("%s captured the flag after %i seconds.\n" %
                    (name, time_step))
        datei.close()

    def append_save_file_with_fail(self):
        """
        saves the failes in results.txt
        """
        datei = open('results.txt', 'a')
        datei.write("X the mission failed X.\n")
        datei.close()

    def append_save_file_with_agents_fail(self):
        """
        saves the failes in results.txt
        """
        datei = open('results.txt', 'a')
        datei.write(
            "X the mission failed: the agents ran into each other or got stranded in the field X.\n"
        )
        datei.close()

    def append_save_file_with_finish(self, time_step, name):
        """
        saves the winner in results.txt
        """
        datei = open('results.txt', 'a')
        datei.write("%s won the game after %i seconds.\n" % (name, time_step))
        datei.close()

    def save_results(self, overall_reward_agent_Tom,
                     overall_reward_agent_Jerry, time_step):
        """
        saves the results in results.txt
        """
        datei = open('results.txt', 'a')
        datei.write("The agents were %i times very close to each other.\n" %
                    (self.too_close_counter))
        datei.write(
            "Reward Tom: %i, Reward Jerry: %i , Time: %f \n\n" %
            (overall_reward_agent_Tom, overall_reward_agent_Jerry, time_step))
        datei.close()

    def get_cell_agents(self):
        """
        gets the cell coordinates for the agents to compare with every 20 seconds
        """
        world_state1 = self.agent_host1.peekWorldState()
        world_state2 = self.agent_host2.peekWorldState()
        msg1 = world_state1.observations[-1].text
        msg2 = world_state2.observations[-1].text
        ob1 = json.loads(msg1)
        ob2 = json.loads(msg2)
        if "cell" in ob1 and "cell" in ob2:
            self.fetched_cell_tom = ob1.get(u'cell', 0)
            self.fetched_cell_jerry = ob2.get(u'cell', 0)
            print("fetched cell tom: ", self.fetched_cell_tom)
            print("fetched cell jerry: ", self.fetched_cell_jerry)

    def get_current_cell_agents(self):
        """
        gets the cell coordinates for the agents at a state
        """
        world_state1 = self.agent_host1.peekWorldState()
        world_state2 = self.agent_host2.peekWorldState()
        msg1 = world_state1.observations[-1].text
        msg2 = world_state2.observations[-1].text
        ob1 = json.loads(msg1)
        ob2 = json.loads(msg2)
        if "cell" in ob1 and "cell" in ob2:
            self.cell_now_tom = ob1.get(u'cell', 0)
            self.cell_now_jerry = ob2.get(u'cell', 0)
            print("current cell tom: ", self.cell_now_tom)
            print("current cell jerry: ", self.cell_now_jerry)

    def get_position_in_arena(self, world_state, time_step):
        """
        get (x,y,z) Positioncoordinates of agent
        fetch the cell coordinates every 20 seconds
        check with current coordinates -> if they are the same more than 20 seconds, it is nearly safe, that the agents
        crashed into each other -> declare mission as failed and end it
        RETURN: x,y,z
        """

        x = y = z = t = 0
        while world_state:
            if len(world_state.observations) >= 1:
                msg = world_state.observations[-1].text
                ob = json.loads(msg)
                time_now = time.time()

                if time_now - self.time_stamp_start_for_distance > 20:
                    """ fetch cell every 20 seconds """
                    self.get_cell_agents()
                    self.time_stamp_start_for_distance = time.time()

                seconds = time_now - self.time_stamp_start_for_distance
                # print("seconds: ", int(seconds))
                if int(seconds) == 18:
                    self.get_current_cell_agents()
                    if self.fetched_cell_tom == self.cell_now_tom and self.fetched_cell_jerry == self.cell_now_jerry:
                        print("They ran into each other again.")
                        self.append_save_file_with_agents_fail()
                        self.mission_end = True

                if "XPos" in ob and "ZPos" in ob and "YPos" in ob:
                    x = ob[u'XPos']
                    y = ob[u'YPos']
                    z = ob[u'ZPos']
                return x, y, z
            else:
                if t == 5:
                    self.append_save_file_with_fail()
                    self.time_step_agents_ran_into_each_other = time_step
                    self.mission_end = True
                    return x, y, z
                else:
                    time.sleep(1)
                    t += 1
                    print(t)

    def distance(self, time_step):
        """
        check if agents are to near to eachother
        move apart if so
        """

        x1 = y1 = z1 = x2 = y2 = z2 = 0
        """ checks, if world_state is read corrctly, if not, trys again"""
        while (x1 == y1 == z1 == 0) or (x2 == y2 == z2 == 0):
            world_state1 = self.agent_host1.peekWorldState()
            world_state2 = self.agent_host2.peekWorldState()

            x1, y1, z1 = self.get_position_in_arena(world_state1, time_step)
            x2, y2, z2 = self.get_position_in_arena(world_state2, time_step)
            print("...")

        # print("  \tTom \tJerry \nX: \t %i\t %i \nY: \t %i\t %i \nZ: \t %i\t %i" % (x1, x2, y1, y2, z1, z2))
        """(x2 == x1+2 and z1 == z1+2) or (x2 == x1+1 and z2 == z1+2) or (x2 == x1 and z2 == z1+2) or \
        (x2 == x1-1 and z2 == z1+2) or (x2 == x1-2 and z2 == z1+2) or (x1 == x2+2 and z1 == z2-2) or \
        (x1 == x2+1 and z1 == z2-2) or (x1 == x2 and z1 == z2-2) or (x1 == x2-1 and z1 == z2-2) or \
        (x1 == x2-2 and z1 == z2-2) or """

        if (x1 == x2 and z1 == z2) or (x2 == x1 + 1 and z2 == z1 + 1) or (x2 == x1 and z2 == z1 + 1) or \
                (x2 == x1 - 1 and z2 == z1 + 1) or (x1 == x2 + 1 and z1 == z2 - 1) or (x1 == x2 and z1 == z2 - 1) or \
                (x1 == x2 - 1 and z1 == z2 - 1):
            print(
                "---------------------------------------------------- stop!! agents too close!"
            )
            self.too_close_counter += 1
            self.agent_host1.sendCommand("movenorth 1")
            self.agent_host2.sendCommand("movesouth 1")
        """(x2 == x1 + 2 and z2 == z1 + 1) or (x2 == x1 + 2 and z2 == z1) or (x2 == x1 + 2 and z2 == z1 - 1) or
        (x1 == x2-2 and z1 == z2+1) or (x1 == x2-2 and z1 == z2) or 
        (x1 == x2-2 and z1 == z2-1) or """

        if (x2 == x1 + 1 and z2 == z1) or (x1 == x2 - 1 and z1 == z2):
            print(
                "---------------------------------------------------- stop!! agents too close!"
            )
            self.too_close_counter += 1
            self.agent_host1.sendCommand("movewest 1")
            self.agent_host2.sendCommand("moveeast 1")
        """(x2 == x1 - 2 and z2 == z1 + 1) or (x2 == x1 - 2 and z2 == z1) or (x2 == x1 - 2 and z2 == z1 - 1) or
        (x1 == x2+2 and z1 == z2+1) or (x1 == x2+2 and z1 == z2) or \
        (x1 == x2+2 and z1 == z2-1) or """

        if (x2 == x1 - 1 and z2 == z1) or (x1 == x2 + 1 and z1 == z2):
            print(
                "---------------------------------------------------- stop!! agents too close!"
            )
            self.too_close_counter += 1
            self.agent_host1.sendCommand("moveeast 1")
            self.agent_host2.sendCommand("movewest 1")
        """(x2 == x1 + 2 and z1 == z1 - 2) or (x2 == x1 + 1 and z2 == z1 - 2) or (x2 == x1 and z2 == z1 - 2) or \
        (x2 == x1 - 1 and z2 == z1 - 2) or (x2 == x1 - 2 and z2 == z1 - 2) or (x1 == x2+2 and z1 == z2+2) or \
        (x1 == x2+1 and z1 == z2+2) or (x1 == x2 and z1 == z2+2) or (x1 == x2-1 and z1 == z2+2) or \
        (x1 == x2-2 and z1 == z2+2) or """

        if (x2 == x1 + 1 and z2 == z1 - 1) or (x2 == x1 and z2 == z1 - 1) or (x2 == x1 - 1 and z2 == z1 - 1) or \
                (x1 == x2 + 1 and z1 == z2 + 1) or (x1 == x2 and z1 == z2 + 1) or (x1 == x2 - 1 and z1 == z2 + 1):
            print(
                "---------------------------------------------------- stop!! agents too close!"
            )
            self.too_close_counter += 1
            self.agent_host1.sendCommand("movesouth 1")
            self.agent_host2.sendCommand("movennorth 1")

    def check_inventory(self, time_step):
        """
        checks, if the agent got the flag in his inventory
        """
        world_state1 = 0
        world_state2 = 0

        x1 = y1 = z1 = x2 = y2 = z2 = 0
        while world_state1 == 0 and world_state2 == 0:
            world_state1 = self.agent_host1.peekWorldState()
            world_state2 = self.agent_host2.peekWorldState()
            print("..")

        while not len(world_state1.observations) >= 1 and not len(
                world_state2.observations) >= 1:
            world_state1 = self.agent_host1.peekWorldState()
            world_state2 = self.agent_host2.peekWorldState()
            print("..")

        if json.dumps(world_state1.observations[-1].text) and json.dumps(
                world_state2.observations[-1].text):

            msg1 = world_state1.observations[-1].text
            msg2 = world_state2.observations[-1].text
            obs1 = json.loads(msg1)
            obs2 = json.loads(msg2)
            """ checks, if world_state is read corrctly, if not, trys again"""
            while (x1 == y1 == z1 == 0) or (x2 == y2 == z2 == 0):
                world_state1 = self.agent_host1.peekWorldState()
                world_state2 = self.agent_host2.peekWorldState()

                x1, y1, z1 = self.get_position_in_arena(
                    world_state1, time_step)
                x2, y2, z2 = self.get_position_in_arena(
                    world_state2, time_step)
                print("..")

            #if u'inventory' in obs1:

            self.get_current_cell_agents()

            if self.flag_captured_tom and (
                    12 <= x1 <= 15 and
                    0 <= z1 <= 4):  # "(11,0)" <= self.cell_now_tom < "(14,5)"
                """ 
                if agent reached the target area:
                look down, set block, jump on it to reach wanted position and win the game 
                """
                self.agent_host1.sendCommand("chat I won the game!")
                self.append_save_file_with_finish(time_step, "Tom")
                self.time_step_tom_won = time_step
                self.winner_agent = "Tom"

                self.agent_host1.sendCommand("look 1")
                time.sleep(0.2)
                self.agent_host1.sendCommand("use 1")
                time.sleep(0.2)
                self.agent_host1.sendCommand("jumpmove 1")
                time.sleep(0.2)
                self.agent_host1.sendCommand("look -1")
                self.mission_end = True
            else:
                if self.flag_captured_tom:
                    print("[INFO] Tom holds the flag.")
                else:
                    last_inventory_tom = obs1[u'inventory']
                    inventory_string_tom = json.dumps(last_inventory_tom)
                    # print("Toms last inventory: ", inventory_string_tom)
                    if (inventory_string_tom.find('quartz') != -1):
                        """ tauscht quartz mit log, sodass quartz zurück gelegt werden kann"""
                        if (json.dumps(last_inventory_tom[1]).find('quartz') !=
                                -1):
                            self.agent_host1.sendCommand(
                                "swapInventoryItems 0 1")
                        self.agent_host1.sendCommand(
                            "chat Wrong flag, I'll put it back!")
                        self.agent_host1.sendCommand("use")
                        self.agent_host1.sendCommand("swapInventoryItems 0 1")
                    if (inventory_string_tom.find('log') != -1):
                        self.flag_captured_tom = True
                        self.time_step_tom_captured_the_flag = time_step
                        self.append_save_file_with_flag(time_step, "Tom")
                        print(
                            "----------------------------------------------------------------Tom captured the flag after %i seconds!"
                            % (time_step))

        #if u'inventory' in obs2:

        if self.flag_captured_jerry and (0 <= x2 <= 4 and 11 <= z2 <= 15):
            """ 
                if agent reached the target area:
                look down, set block, jump on it to reach wanted position and win the game 
                """
            self.agent_host2.sendCommand("chat I won the game!")
            self.append_save_file_with_finish(time_step, "Jerry")
            self.time_step_jerry_won = time_step
            self.winner_agent = "Jerry"
            self.agent_host2.sendCommand("look 1")
            time.sleep(0.2)
            self.agent_host2.sendCommand("use 1")
            time.sleep(0.2)
            self.agent_host2.sendCommand("jumpmove 1")
            time.sleep(0.2)
            self.agent_host2.sendCommand("look -1")
            self.mission_end = True
        else:
            if self.flag_captured_jerry:
                print("[INFO] Jerry holds the flag.")
            else:
                last_inventory_jerry = obs2[u'inventory']
                inventory_string_jerry = json.dumps(last_inventory_jerry)
                # print("Jerrys last inventory: ", inventory_string_jerry)
                if (inventory_string_jerry.find('log') != -1):
                    """ tauscht quartz mit log, sodass log zurück gelegt werden kann"""
                    if (json.dumps(last_inventory_jerry[1]).find('log') != -1):
                        self.agent_host2.sendCommand("swapInventoryItems 0 1")
                    self.agent_host2.sendCommand(
                        "chat Wrong flag, I'll put it back!")
                    self.agent_host2.sendCommand("use")
                    self.agent_host1.sendCommand("swapInventoryItems 0 1")
                if (inventory_string_jerry.find('quartz') != -1):
                    self.flag_captured_jerry = True
                    self.time_step_jerry_captured_the_flag = time_step
                    self.append_save_file_with_flag(time_step, "Jerry")
                    print(
                        "----------------------------------------------------------------Jerry captured the flag after %i seconds!"
                        % (time_step))

    def sending_mission_quit_commands(self, overall_reward_agent_Tom,
                                      overall_reward_agent_Jerry, time_step,
                                      obs1, r1, obs2, r2, outdir, t, tom,
                                      jerry, experiment_ID):

        self.agent_host1.sendCommand("quit")
        self.agent_host2.sendCommand("quit")
        self.agent_host3.sendCommand("quit")

        dirname = os.path.join(outdir, 'plots')
        print("dirname: ", dirname)
        """ save and show results of reward calculations """
        self.save_results(overall_reward_agent_Tom, overall_reward_agent_Jerry,
                          time_step)
        print("Final Reward Tom:   ", overall_reward_agent_Tom)
        print("Final Reward Jerry: ", overall_reward_agent_Jerry)
        """ end episode, save results """
        tom.stop_episode_and_train(obs1, r1, done=True)
        jerry.stop_episode_and_train(obs2, r2, done=True)
        print("outdir: %s step: %s " % (outdir, t))
        print("Tom's statistics:   ", tom.get_statistics())
        print("Jerry's statistics: ", jerry.get_statistics())
        """ save the final model and results """
        save_agent(tom, t, outdir, logger, suffix='_finish_01')
        save_agent(jerry, t, outdir, logger, suffix='_finish_02')
        """ save all the collected data for evaluation graphs """
        self.save_data_for_evaluation_plots(t, time_step,
                                            overall_reward_agent_Tom,
                                            overall_reward_agent_Jerry,
                                            dirname)
        time.sleep(2)
        """ initialisation for the next episode, reset parameters, build new world """
        t += 1
        self.episode_counter += 1
        r1 = r2 = 0
        done1 = done2 = self.mission_end = False
        overall_reward_agent_Jerry = overall_reward_agent_Tom = 0
        self.save_new_round(t)
        obs1, obs2 = self.reset_world(experiment_ID)
        self.too_close_counter = 0
        self.winner_agent = "-"
        self.time_step_tom_won = self.time_step_jerry_won = None
        self.time_step_tom_captured_the_flag = self.time_step_jerry_captured_the_flag = None
        self.time_step_agents_ran_into_each_other = None
        self.steps_tom = 0
        self.steps_jerry = 0
        """ recover """
        """if evaluator1 and evaluator2 is not None:
            evaluator1.evaluate_if_necessary(
                t=t, episodes=episode_idx + 1)
            evaluator2.evaluate_if_necessary(
                t=t, episodes=episode_idx + 1)
            if (successful_score is not None and
                    evaluator1.max_score >= successful_score and evaluator2.max_score >= successful_score):
                break"""
        return t, obs1, obs2, r1, r2, done1, done2, overall_reward_agent_Jerry, overall_reward_agent_Tom

    def save_data_for_evaluation_plots(self, t, time_step,
                                       overall_reward_agent_Tom,
                                       overall_reward_agent_Jerry, dirname):
        """
        t: number of episode
        time_step: duration of the episode
        too_close_counter: how often agents came too close
        overall_reward_agent_Tom, overall_reward_agent_Jerry: reward of the agents
        winner_agent: agent's name who won the episode, if there is no "-"
        time_step_tom_won: timestep, Tom won the game, if not: 0
        time_step_jerry_won: timestep, Jerry won the game, if not: 0
        time_step_tom_captured_the_flag : timestep, Tom captured the flag, if not: 0
        time_step_jerry_captured_the_flag : timestep, Jerry captured the flag, if not: 0
        time_step_agents_ran_into_each_other: timestep; the agents ran into each other and the mission ends

        """
        print("t : ", self.episode_counter)
        if self.episode_counter > 0:
            """ Episode 0 is skipped, because there just starts the initialisation of the world, they do nothing. """
            self.evaluation_agents_ran_into_each_other.append(
                self.time_step_agents_ran_into_each_other)
            print(self.time_step_agents_ran_into_each_other)
            if self.time_step_agents_ran_into_each_other is None:
                self.evaluation_episode_counter.append(self.episode_counter)
                self.evaluation_episode_time.append(time_step)
                self.evaluation_too_close_counter.append(
                    self.too_close_counter)
                self.evaluation_reward_tom.append(overall_reward_agent_Tom)
                self.evaluation_reward_jerry.append(overall_reward_agent_Jerry)
                self.evaluation_winner_agent.append(self.winner_agent)

                if self.winner_agent == "Tom":
                    self.evaluation_game_won_timestamp.append(
                        self.time_step_tom_won)
                if self.winner_agent == "Jerry":
                    self.evaluation_game_won_timestamp.append(
                        self.time_step_jerry_won)

                self.evaluation_flag_captured_tom.append(
                    self.time_step_tom_captured_the_flag)
                self.evaluation_flag_captured_jerry.append(
                    self.time_step_jerry_captured_the_flag)
                self.evaluation_steps_tom.append(self.steps_tom)
                self.evaluation_steps_jerry.append(self.steps_jerry)
            """ evaluate and print the plots """
            thesis_evaluation_experiment.evaluate(
                t, self.evaluation_episode_counter,
                self.evaluation_episode_time,
                self.evaluation_too_close_counter, self.evaluation_reward_tom,
                self.evaluation_reward_jerry, self.evaluation_winner_agent,
                self.evaluation_game_won_timestamp,
                self.evaluation_flag_captured_tom,
                self.evaluation_flag_captured_jerry,
                self.evaluation_agents_ran_into_each_other, dirname,
                self.evaluation_steps_tom, self.evaluation_steps_jerry)
Пример #13
0
import random
import math
import errno
import uuid
from collections import defaultdict, deque
from timeit import default_timer as timer

if __name__ == '__main__':

    # -- set up mission agent -- #
    agent_host_player = MalmoPython.AgentHost()
    client_pool = MalmoPython.ClientPool()
    client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))

    # Use agent_host_player for parsing the command-line options.
    malmoutils.parse_command_line(agent_host_player)

    repeats = 1
    win_counter = 0
    qlearner = Minesweeper_Agent.Qlearner(agent_host_debug)
    qlearner.train2(num_simulations=10000000,
                    grid_size=5,
                    reward=1,
                    game_size=5,
                    num_mines=5)
    qmap = qlearner.get_qmap()

    for game_counter in range(repeats):

        # -- set up the game -- #
        # Minesweeper(size, num_mines)
Пример #14
0
            exit(1)
        time.sleep(0.1)
        print(".", end=' ')
    if time.time() - start_time >= time_out:
        print("Timed out while waiting for mission to start - bailing.")
        exit(1)
    print()
    print("Mission has started.")


# -- set up two agent hosts --
agent_host_simeon = MalmoPython.AgentHost()
agent_host_fred = MalmoPython.AgentHost()

# Use simeon's agenthost to hold the command-line options:
malmoutils.parse_command_line(agent_host_simeon)

# -- set up the mission --
xml = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <About>
    <Summary/>
  </About>
  <ModSettings>
    <MsPerTick>10</MsPerTick>   <!-- Because it's pretty boring watching Fred build steps for five minutes... -->
  </ModSettings>
  <ServerSection>
    <ServerInitialConditions>
      <Time>
        <StartTime>0</StartTime>
      </Time>
def main():
    sight = {'x': (-30, 30), 'z': (-30, 30), 'y': (-1, 1)}

    range_x = abs(sight['x'][1] - sight['x'][0]) + 1
    range_y = abs(sight['y'][1] - sight['y'][0]) + 1
    range_z = abs(sight['z'][1] - sight['z'][0]) + 1

    malmoutils.fix_print()
    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host)
    recordingsDirectory = malmoutils.get_recordings_directory(agent_host)
    recordingsDirectory = "../human_trajectories"
    if (not os.path.exists(recordingsDirectory)):
        os.mkdir(recordingsDirectory)
    logging.basicConfig(level=logging.INFO)
    # pdb.set_trace()
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)  # set to INFO if you want fewer messages

    video_width = 640
    video_height = 480
    sys.argv

    mission_xml_path = "../custom_xmls/usar.xml"
    validate = True
    # my_mission = MalmoPython.MissionSpec(missionXML, validate)
    my_mission = MalmoPython.MissionSpec(getMissionXML(mission_xml_path),
                                         validate)

    # ObservationFromGrid
    my_mission.observeGrid(sight['x'][0], sight['y'][0], sight['z'][0],
                           sight['x'][1], sight['y'][1], sight['z'][1],
                           'relative_view')

    # agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)
    agent_host.setVideoPolicy(MalmoPython.VideoPolicy.LATEST_FRAME_ONLY)

    if agent_host.receivedArgument("test"):
        num_reps = 1
    else:
        num_reps = 30000

    my_mission_record = MalmoPython.MissionRecordSpec()
    if recordingsDirectory:
        my_mission_record.recordRewards()
        my_mission_record.recordObservations()
        my_mission_record.recordCommands()
        # if agent_host.receivedArgument("record_video"): # my_mission_record.recordMP4(24,2000000)
        my_mission_record.recordMP4(24, 2000000)
    recording_name = datetime.now().strftime("%Y-%m-%d_%I-%M-%S_%p")
    for iRepeat in range(1):
        my_mission_record.setDestination(
            os.path.join(recordingsDirectory, recording_name + ".tgz"))
        max_retries = 3
        for retry in range(max_retries):
            try:
                agent_host.startMission(my_mission, my_mission_record)
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    logger.error("Error starting mission: %s" % e)
                    exit(1)
                else:
                    time.sleep(2)

        logger.info('Mission %s', iRepeat)
        logger.info("Waiting for the mission to start")
        world_state = agent_host.getWorldState()
        while not world_state.has_mission_begun:
            print(".", end="")
            time.sleep(0.1)
            world_state = agent_host.getWorldState()
        print()

        img_counter = 0
        # print('observations', world_state.observations)
        while world_state.is_mission_running:
            world_state = agent_host.getWorldState()

            # Observations
            # msg = observe(agent_host)
            # if msg is not None:
            #     print('timestamp: ', msg['timestamp'])

            # NOTE : Nothing recorded in world state. Uncomment to test it out.

            # if world_state.number_of_observations_since_last_state > 0:
            #     timestamp = world_state.observations[-1].timestamp
            #     msg = world_state.observations[-1].text
            #     obs = json.loads(msg)
            #     print("{'timestamp': timestamp, 'observations': obs}")

            # Video Frames
            while world_state.number_of_video_frames_since_last_state < 1 and world_state.is_mission_running:
                logger.info("Waiting for frames...")
                time.sleep(0.05)
                world_state = agent_host.getWorldState()

            logger.info("Got frame!")
            # import ipdb; ipdb.set_trace
            # print('observations', world_state.observations)
            # world_state.observations
            if world_state.is_mission_running:
                # timestamp = world_state.observations[-1].timestamp
                # msg = world_state.observations[-1].text
                # print(timestamp)
                # print(msg)
                frame = world_state.video_frames[-1]
                img = Image.frombytes('RGB', (640, 480), bytes(frame.pixels))
                # imageio.imsave("./tmp_imgs/{}.png".format(img_counter), img)
                img_counter += 1
        logger.info("Mission has stopped.")
        time.sleep(1)  # let the Mod recover