示例#1
0
    def __init__(self, mission_xml, num_agents):
        self.mission = MalmoPython.MissionSpec(mission_xml, True)
        self.mission_record = MalmoPython.MissionRecordSpec()
        self.num_agents = num_agents
        self.experiment_ID = str(uuid.uuid4())
        self.client_pool = MalmoPython.ClientPool()
        for x in range(10000, 10000 + NUM_AGENTS + 1):
            self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', x))

        # Create one agent host for parsing
        self.agent_hosts = [MalmoPython.AgentHost()]

        try:
            self.agent_hosts[0].parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_hosts[0].getUsage())
            exit(1)
        if self.agent_hosts[0].receivedArgument("help"):
            print(self.agent_hosts[0].getUsage())
            exit(0)

        # Create the rest of the agent hosts.
        if self.num_agents > 1:
            self.agent_hosts += [
                MalmoPython.AgentHost() for x in range(NUM_AGENTS - 1)
            ]
 def get_client_pool(self):
     my_client_pool = MalmoPython.ClientPool()
     my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 20000))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003))
     return my_client_pool
示例#3
0
 def __init__(self, missionXML, serverIp='127.0.0.1'):
     self.missionDesc = None
     self.mission = None
     self.mission_record = None
     self.setMissionXML(missionXML)
     nAgents = len(missionXML.agentSections)
     self.agent_hosts = []
     self.agent_hosts += [MalmoPython.AgentHost() for n in range(nAgents)]
     self.agent_hosts[0].parse(sys.argv)
     if self.receivedArgument('recording_dir'):
         recordingsDirectory = malmoutils.get_recordings_directory(
             self.agent_hosts[0])
         self.mission_record.recordRewards()
         self.mission_record.recordObservations()
         self.mission_record.recordCommands()
         self.mission_record.setDestination(recordingsDirectory + "//" +
                                            "lastRecording.tgz")
         if self.agent_hosts[0].receivedArgument("record_video"):
             self.mission_record.recordMP4(24, 2000000)
     self.client_pool = MalmoPython.ClientPool()
     for x in range(10000, 10000 + nAgents):
         self.client_pool.add(MalmoPython.ClientInfo(serverIp, x))
     self.worldStates = [None] * nAgents
     self.observe = [None] * nAgents
     self.isAlive = [True] * nAgents
     self.frames = [None] * nAgents
     self.segmentation_frames = [None] * nAgents
示例#4
0
def run_mission(rambo_steve, episode):
    agent_host = MalmoPython.AgentHost()

    try:
        agent_host.parse(sys.argv)
    except RuntimeError as e:
        print('ERROR:', e)
        print(agent_host.getUsage())
        exit(1)
    if agent_host.receivedArgument('help'):
        print(agent_host.getUsage())

    my_mission = MalmoPython.MissionSpec(world.getMissionXML(), True)
    # adding the recordedFileName into MissionRecordSpec
    my_mission_record = MalmoPython.MissionRecordSpec()
    # my_mission = malmoutils.get_default_recording_object(agent_host, "Mission")
    # adding the spec for adding the recording of the video
    # my_mission.requestVideo(1280, 720)
    # my_mission_record.recordMP4(30, 2000000)

    # set up client to connect:
    my_clients = MalmoPython.ClientPool()
    for i in range(5):
        my_clients.add(
            MalmoPython.ClientInfo('127.0.0.1', c.MISSION_CONTROL_PORT + i))

    # Attempt to start a mission:
    print('Attempting to start mission...')
    max_retries = 5
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record,
                                    0, "RamboSteve")
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print('Error starting mission:', e)
                exit(1)
            else:
                time.sleep(2)

    # Loop until mission starts:
    print('Waiting for the mission to start ', end=' ')
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print('.', end='')
        time.sleep(0.1)
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print('Error:', error.text)

    print()
    print('Mission running ', end=' ')

    rambo_steve.run(agent_host, episode)

    print()
    print('Mission ended')
    time.sleep(2)
示例#5
0
 def get_client_pool(self):
     """ 
     Malmo specific function: To create client pool for connecting to the minecraft server
     """
     my_client_pool = MalmoPython.ClientPool()
     my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 20000))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003))
     return my_client_pool
示例#6
0
def initalizeMinecraftMap(xml):
    agent_host = MalmoPython.AgentHost()
    
    my_mission = MalmoPython.MissionSpec(xml, True)
    recordedFileName = recordPath.format("final_take0_bad.tgz") #comment out to not capture video
    #my_mission_record = MalmoPython.MissionRecordSpec()
    my_mission_record = MalmoPython.MissionRecordSpec(recordedFileName) #comment out to not capture video
    my_mission.requestVideo(1200,720)
    my_mission_record.recordMP4(30, 2000000) #comment out to not capture video
    my_mission.setViewpoint(1)
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available

    return (my_mission,agent_host,my_clients,my_mission_record)
示例#7
0
        def __init__(self, port=None, existing=False):
            self.existing = existing

            if not existing:
                if not port:
                    port = InstanceManager._get_valid_port()
                cmd = InstanceManager.MC_COMMAND
                if InstanceManager.headless:
                    cmd += " -headless "
                cmd += " -port " + str(port)
                logger.info("Starting Minecraft process: " + cmd)

                args = shlex.split(cmd)
                proc = subprocess.Popen(
                    args,
                    cwd=InstanceManager.MINECRAFT_DIR,
                    # pipe entire output
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    # use process group, see http://stackoverflow.com/a/4791612/18576
                    preexec_fn=os.setsid)
                # wait until Minecraft process has outputed "CLIENT enter state: DORMANT"
                while True:
                    line = proc.stdout.readline()
                    logger.debug(line)
                    if not line:
                        raise EOFError(
                            "Minecraft process finished unexpectedly")
                    if b"CLIENT enter state: DORMANT" in line:
                        break
                logger.info("Minecraft process ready")
                # supress entire output, otherwise the subprocess will block
                # NB! there will be still logs under Malmo/Minecraft/run/logs
                # FNULL = open(os.devnull, 'w')
                FMINE = open('./minecraft.log', 'w')
                proc.stdout = FMINE
                self.proc = proc
            else:
                assert port is not None, "No existing port specified."

            self.ip = InstanceManager.DEFAULT_IP
            self.port = port
            self.existing = existing
            self.locked = False

            # Creating client pool.
            logger.info("Creating client pool for {}".format(self))
            self.client_pool = MalmoPython.ClientPool()
            self.client_pool.add(MalmoPython.ClientInfo(self.ip, self.port))
示例#8
0
def create_malmo_components():
  # setup client pool
  client_pool = MalmoPython.ClientPool()
  for port in map(int, opts.malmo_ports.split(",")):
    print >>sys.stderr, "adding client with port %d" % port
    client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port))
  # setup agent host
  malmo = MalmoPython.AgentHost()
  # can't do this without more complex caching of world state vid frames
  #malmo.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)
  # load mission spec
  mission = MalmoPython.MissionSpec(specs.classroom(opts, overclock_tick_ms), True)
  mission_record = MalmoPython.MissionRecordSpec()
  # return all
  return client_pool, malmo, mission, mission_record
示例#9
0
    def run(self):
        """Runs the game with the registered agents

        Raises:
            :class:`jason_malmo.exceptions.NoAgentsException`: There are not registered agents in the game.\n
                Register an agent before running the game::

                    game.register('/path/to/file.asl')
                    game.run()
        """
        self._client_pool = MalmoPython.ClientPool()

        if not len(self._agents):
            raise NoAgentsException

        for port in range(10000, 10000 + len(self._agents) + 1):
            self._client_pool.add(MalmoPython.ClientInfo('127.0.0.1', port))

        self._my_mission = MalmoPython.MissionSpec(self._get_mission_xml(),
                                                   True)

        for (index, agent) in enumerate(self._agents):
            malmoutils.parse_command_line(agent.malmo_agent)
            self._safe_start_mission(
                agent.malmo_agent, self._my_mission, self._client_pool,
                malmoutils.get_default_recording_object(
                    agent.malmo_agent, "saved_data"), index, '')
        self._safe_wait_for_start(
            [agent.malmo_agent for agent in self._agents])

        threads = []
        for agent in self._agents:
            thr = threading.Thread(target=self._jason_env.run_agent,
                                   args=(agent, ),
                                   kwargs={})
            thr.start()
            threads.append(thr)

        # TODO while mission is running
        while True:
            for agent in self._agents:
                for (belief, value) in agent.beliefs.items():
                    if belief[0] == 'tasks':
                        tasks = []
                        for task in list(value)[0].args[0]:
                            tasks.append(task)
                        self.tasks.handle(agent, tasks)
            time.sleep(0.05)
示例#10
0
    def __init__(self, _):
        # graphing the returns
        self.log_frequency = 1

        self.episode_step = 0
        self.episode_return = 0
        self.returns = []
        self.steps = []

        # DISCRETE ACTION SPACE [0, 5]:
        # - action 0 = attack
        # - action 1 = switch to sword
        # - action 2 = switch to axe
        # - action 3 = use gapple
        # - action 4 = use shield (1 second)
        # - action 5 = idle
        self.action_space = Discrete(6)

        # CONTINUOUS OBSERVATION SPACE:
        # - enemy in range: true=1, false=0
        # - my health normalized: [0, 1]
        # - enemy health normalized: [0, 1]
        # - enemy weapon: axe=1, sword=0.75, gapple=0.25, shield=0 (offensive to defensive scale)
        # - distance apart from both agents
        self.observation_space = Box(0, 1, shape=(5, ), dtype=np.float32)

        ###################################
        # Malmo parameters
        self.agent_hosts = [Malmo.AgentHost() for _ in range(2)]
        # Create client pool
        self.client_pool = Malmo.ClientPool()
        self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10000))
        self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10001))

        ###################################
        # Custom parameters
        self.mission_index = 0

        ###################################
        # self-play parameters
        #self.opponent_policy = load_trained_agent(CURRENT_CHECKPOINT)
        self.use_self_play = False

        self.first_reset = True
示例#11
0
    def start(self):

        self.malmo_client_pool = MalmoPython.ClientPool()
        self.malmo_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001))  # 10000 in use - try 10001

        self.malmo_mission = MalmoPython.MissionSpec(self.missionXML, True)
        self.malmo_mission.forceWorldReset()

        self.malmo_mission_record = MalmoPython.MissionRecordSpec()

        self.malmo_mission.requestVideo(800, 500)
        self.malmo_mission.setViewpoint(1)

        # Attempt to start a mission:
        max_retries = 10
        for retry in range(max_retries):
            try:
                self.agent_host.startMission(self.malmo_mission, self.malmo_mission_record )
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    print("Error starting mission:",e)
                    exit(1)
                else:
                    time.sleep(2)

        # Loop until mission starts:
        print("Waiting for the mission to start ")
        self.world_state = self.agent_host.getWorldState()

        while not self.world_state.has_mission_begun:
            sys.stdout.write(".")
            time.sleep(0.1)
            self.world_state = self.agent_host.getWorldState()
            for error in self.world_state.errors:
                print("Error:", error.text)

        print(" ")
        print("Mission running ")

        self.number += 1
        self.start_time = time.time()
        self.end_time = None
示例#12
0
文件: ALE_HAC.py 项目: Miej/malmo
def startGame():
    #Find filename for the recording:
    filenum = 0
    fileRecording = ''
    while fileRecording == '':
        fileRecording = recordingsDirectory + '/saved_data' + str(
            filenum) + '.tar.gz'
        if os.path.isfile(fileRecording):
            filenum = filenum + 1
            fileRecording = ''

    my_mission_record = MalmoPython.MissionRecordSpec(fileRecording)
    my_mission_record.recordCommands()
    my_mission_record.recordMP4(20, 400000)
    my_mission_record.recordRewards()
    my_mission_record.recordObservations()

    try:
        display_gui = 1
        if want_own_display:
            display_gui = 0
        agent_host.startMission(my_mission, MalmoPython.ClientPool(),
                                my_mission_record, display_gui, rom_file)
    except RuntimeError as e:
        print("Error starting mission:", e)
        exit(1)

    print("Waiting for the mission to start", end=' ')
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()

    gamestats = "Go " + str(gameNum + 1) + " out of " + str(iterations) + "\n"
    canvas.delete("all")
    canvas.create_text(
        80, 105, text=gamestats + "Click to begin!\nEscape to end"
    )  # The window needs keyboard focus or no way to control game.
示例#13
0
    def init_malmo(self):
        """
        Initialize new Malmo mission.
        """

        # Load the XML file and create mission spec & record.
        mission_file = './mission.xml'
        with open(mission_file, 'r') as f:
            print("Loading mission from %s" % mission_file)
            mission_xml = f.read()
            my_mission = MalmoPython.MissionSpec(mission_xml, True)
            my_mission_record = MalmoPython.MissionRecordSpec()
            my_mission.requestVideo(800, 500)
            my_mission.setViewpoint(1)

        # Attempt to start Malmo.
        max_retries = 3
        my_clients = MalmoPython.ClientPool()
        my_clients.add(MalmoPython.ClientInfo(
            '127.0.0.1', 10000))  # add Minecraft machines here as available
        for retry in range(max_retries):
            try:
                self.agent_host.startMission(my_mission, my_clients,
                                             my_mission_record, 0, 'Agent')
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    print("Error starting mission:", e)
                    exit(1)
                else:
                    time.sleep(2)

        # Start the world.
        world_state = self.agent_host.getWorldState()
        while not world_state.has_mission_begun:
            time.sleep(0.1)
            world_state = self.agent_host.getWorldState()
            for error in world_state.errors:
                print("\nError:", error.text)
        self.initialize()
        return world_state
示例#14
0
    def __init__(self, _):
        # Graphing the returns
        self.step_rewards = []

        # DISCRETE ACTION SPACE [0, 5]:
        # - action 0 = attack
        # - action 1 = switch to sword
        # - action 2 = switch to axe
        # - action 3 = use gapple
        # - action 4 = use shield (1 second)
        # - action 5 = idle
        self.action_space = Discrete(6)

        # CONTINUOUS OBSERVATION SPACE:
        # - enemy in range: true=1, false=0
        # - my health normalized: [0, 1]
        # - enemy health normalized: [0, 1]
        # - enemy weapon: axe=1, sword=0.75, gapple=0.25, shield=0 (offensive to defensive scale)
        self.observation_space = Box(0, 1, shape=(4, ), dtype=np.float32)

        ###################################
        # Malmo parameters
        self.agent_hosts = [Malmo.AgentHost() for _ in range(2)]
        # Create client pool
        self.client_pool = Malmo.ClientPool()
        self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10001))
        self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10002))
        self.mission_index = 0
        self.old_checkpoint = -1

        ###################################
        # self-play parameters
        self.opponent_policy = load_trained_agent(get_current_checkpoint())
        self.use_self_play = False
        self.last_load = 0
        self.first_reset = True
示例#15
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return

    malmoutils.fix_print()

    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec()
    my_mission.timeLimitInSeconds( 10 )
    my_mission.requestVideo( 320, 240 )
    my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info, "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:",error.text)
    print()

    last_delta = time.time()
    # main loop:
    while world_state.is_mission_running:
        agent_host.sendCommand( "move 1" )
        agent_host.sendCommand( "turn " + str(random.random()*2-1) )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state)
        if (world_state.number_of_video_frames_since_last_state > 0 or
           world_state.number_of_observations_since_last_state > 0 or
           world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info, "world state change")
        for reward in world_state.rewards:
            print("Summed reward:",reward.getValue())
        for error in world_state.errors:
            print("Error:",error.text)
        for frame in world_state.video_frames:
            print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
示例#16
0
def run(size, algo1, algo2):
    #algorithms = {"reflex": reflex.reflex, "hiddenMarkov": hiddenMarkov.hiddenMarkov, "minimax":minimax.minimax, "expectimax": expectimax.expectimax}
    algorithms = {
        "reflex": reflex.reflex,
        'random': randomagent.randommove,
        'smartrandom': smartrandomagent.randommove,
        'astarreflex': AStarReflex.search,
        "minimax": minimax.minmax
    }
    #assert len(sys.argv) == 4, "Wrong number of arguments, the form is: mapSize, agent algorithm, enemy alogrithm"

    malmoutils.fix_print()

    # -- set up two agent hosts --
    agent_host1 = MalmoPython.AgentHost()
    agent_host2 = MalmoPython.AgentHost()
    #map_size = str(sys.argv[1])
    map_size = int(size)
    map_minus = str(map_size - 1)
    agentAlgo = algorithms[algo1]
    enemyAlgo = algorithms[algo2]
    #agentAlgo =  algorithms[sys.argv[2]]
    #enemyAlgo = algorithms[sys.argv[3]]

    # Use agent_host1 for parsing the command-line options.
    # (This is why agent_host1 is passed in to all the subsequent malmoutils calls, even for
    # agent 2's setup.)
    malmoutils.parse_command_line(agent_host1)

    missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
                <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
                
                  <About>
                    <Summary>Hello world!</Summary>
                  </About>
                  
                  <ServerSection>
                    <ServerInitialConditions>
                      <Time>
                        <StartTime>12000</StartTime>
                        <AllowPassageOfTime>false</AllowPassageOfTime>
                      </Time>
                    </ServerInitialConditions>
                    <ServerHandlers>
                      <FlatWorldGenerator generatorString="3;7,220*1,5*3,2;3;,biome_1"/>
                      <DrawingDecorator>
                        <!-- coordinates for cuboid are inclusive -->
                        <DrawCuboid x1="0" y1="45" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="300" z2=''' + '"' + map_minus + '"' + ''' type="air" />            <!-- limits of our arena -->
                        <DrawCuboid x1="0" y1="40" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="44" z2=''' + '"' + map_minus + '"' + ''' type="lava" />           <!-- lava floor -->
                        <DrawCuboid x1="0"  y1="46" z1="0"  x2=''' + '"' + map_minus + '"' + ''' y2="46" z2=''' + '"' + map_minus + '"' + ''' type="snow" />
                      </DrawingDecorator>
                      <ServerQuitFromTimeUp timeLimitMs="30000"/>
                      
                    </ServerHandlers>
                  </ServerSection>
                  
                  <AgentSection mode="Survival">
                    <Name>Agent</Name>
                    <AgentStart>
                        <Inventory>
                            <InventoryItem slot="0" type="diamond_shovel"/>
                        </Inventory>
                        <Placement x="0.5" y="47.0" z="0.5" pitch="50" yaw="0"/>
                    </AgentStart>
                    <AgentHandlers>
                      <ObservationFromFullStats/>
                      <ObservationFromGrid>
                          <Grid name="floor3x3W">
                            <min x="-1" y="0" z="-1"/>
                            <max x="1" y="0" z="1"/>
                          </Grid>
                          <Grid name="floor3x3F">
                            <min x="-1" y="-1" z="-1"/>
                            <max x="1" y="-1" z="1"/>
                          </Grid>
                      </ObservationFromGrid>
                      <DiscreteMovementCommands/>
                    </AgentHandlers>
                  </AgentSection>
                  
                  <AgentSection mode="Survival">
                    <Name>Enemy</Name>
                    <AgentStart>
                        <Inventory>
                            <InventoryItem slot="0" type="diamond_shovel"/>
                        </Inventory>
                        <Placement x=''' + '"' + str(
        float(map_size) - 0.5) + '"' + ''' y="47.0" z=''' + '"' + str(
            float(map_size) - 0.5) + '"' + ''' pitch="50" yaw="180"/>
                    </AgentStart>
                    
                    <AgentHandlers>
                      <ObservationFromFullStats/>
                      <DiscreteMovementCommands/>
                      <ObservationFromGrid>
                          <Grid name="floor3x3W">
                            <min x="-1" y="0" z="-1"/>
                            <max x="1" y="0" z="1"/>
                          </Grid>
                          <Grid name="floor3x3F">
                            <min x="-1" y="-1" z="-1"/>
                            <max x="1" y="-1" z="1"/>
                          </Grid>
                      </ObservationFromGrid>
                      <RewardForTouchingBlockType>
                        <Block reward="-100.0" type="lava" behaviour="onceOnly"/>
                      </RewardForTouchingBlockType>
                      <AgentQuitFromTouchingBlockType>
                        <Block type="lava" />
                      </AgentQuitFromTouchingBlockType>
                    </AgentHandlers>
                  </AgentSection>
                </Mission>'''

    # Create default Malmo objects:
    my_mission = MalmoPython.MissionSpec(missionXML, True)

    client_pool = MalmoPython.ClientPool()
    client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))
    client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001))

    MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF)
    my_mission_record = MalmoPython.MissionRecordSpec()

    def safeStartMission(agent_host, mission, client_pool, recording, role,
                         experimentId):
        used_attempts = 0
        max_attempts = 5
        print("Calling startMission for role", role)
        while True:
            try:
                agent_host.startMission(mission, client_pool, recording, role,
                                        experimentId)
                break
            except MalmoPython.MissionException as e:
                errorCode = e.details.errorCode
                if errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_WARMING_UP:
                    print("Server not quite ready yet - waiting...")
                    time.sleep(2)
                elif errorCode == MalmoPython.MissionErrorCode.MISSION_INSUFFICIENT_CLIENTS_AVAILABLE:
                    print("Not enough available Minecraft instances running.")
                    used_attempts += 1
                    if used_attempts < max_attempts:
                        print("Will wait in case they are starting up.",
                              max_attempts - used_attempts, "attempts left.")
                        time.sleep(2)
                elif errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_NOT_FOUND:
                    print(
                        "Server not found - has the mission with role 0 been started yet?"
                    )
                    used_attempts += 1
                    if used_attempts < max_attempts:
                        print("Will wait and retry.",
                              max_attempts - used_attempts, "attempts left.")
                        time.sleep(2)
                else:
                    print("Other error:", e.message)
                    print("Waiting will not help here - bailing immediately.")
                    exit(1)
            if used_attempts == max_attempts:
                print("All chances used up - bailing now.")
                exit(1)
        print("startMission called okay.")

    def safeWaitForStart(agent_hosts):
        print("Waiting for the mission to start", end=' ')
        start_flags = [False for a in agent_hosts]
        start_time = time.time()
        time_out = 120  # Allow two minutes for mission to start.
        while not all(start_flags) and time.time() - start_time < time_out:
            states = [a.peekWorldState() for a in agent_hosts]
            start_flags = [w.has_mission_begun for w in states]
            errors = [e for w in states for e in w.errors]
            if len(errors) > 0:
                print("Errors waiting for mission start:")
                for e in errors:
                    print(e.text)
                print("Bailing now.")
                exit(1)
            time.sleep(0.1)
            print(".", end=' ')
        print()
        if time.time() - start_time >= time_out:
            print("Timed out waiting for mission to begin. Bailing.")
            exit(1)
        print("Mission has started.")

    safeStartMission(agent_host1, my_mission, client_pool, my_mission_record,
                     0, '')
    safeStartMission(agent_host2, my_mission, client_pool, my_mission_record,
                     1, '')
    safeWaitForStart([agent_host1, agent_host2])

    def movement(ah, direction, pos):
        if direction == "north":
            ah.sendCommand("movenorth 1")
            position = (pos[0], pos[1] - 1)
        elif direction == "south":
            ah.sendCommand("movesouth 1")
            position = (pos[0], pos[1] + 1)
        elif direction == "west":
            ah.sendCommand("movewest 1")
            position = (pos[0] - 1, pos[1])
        elif direction == "east":
            ah.sendCommand("moveeast 1")
            position = (pos[0] + 1, pos[1])
        else:
            position = (pos[0], pos[1])
        time.sleep(0.1)
        return position

    def attack(ah, index, pos, map, enemy=False):
        #We are going to make it so the agent can only break the blocks immediately around them.
        #So a location will be one of the 8 locations around it
        #Enemy starts facing north (1), Agent starts facing south (3)
        #  Enemy: 0 1 0  Agent: 0 3 0
        #         4 X 2         2 X 4
        #         0 3 0         0 1 0
        x, y = math.floor(pos[0]), math.floor(pos[1])
        #print("Player position: {},{} Direction: {}".format(x,y, index))
        did_Break = False
        if enemy:
            if index == "north":
                # print("Index 1")
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                y -= 1
                did_Break = True
            if index == "east":
                # print("Index 2")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                x += 1
                did_Break = True
            if index == "west":
                # print("Index 4")
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                x -= 1
                did_Break = True
            if index == "south":
                # print("Index 3")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                y += 1
                did_Break = True
        else:
            # Agent: 0 3 0
            #        2 X 4
            #        0 1 0
            if index == "south":
                # print("Index 3")
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                y += 1
                did_Break = True
            if index == "west":
                # print("Index 4")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                x -= 1
                did_Break = True
            if index == "east":
                # print("Index 2")
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                x += 1
                did_Break = True
            if index == "north":
                # print("Index 3")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                y -= 1
                did_Break = True
        if did_Break:
            map[x][y] = False

    '''
    Sample Observation:
    {"DistanceTravelled":0,"TimeAlive":50,"MobsKilled":0,"PlayersKilled":0,"DamageTaken":0,"DamageDealt":0,
    "Life":20.0,"Score":0,"Food":20,"XP":0,"IsAlive":true,"Air":300,"Name":"Enemy","XPos":5.5,"YPos":47.0,
    "ZPos":5.5,"Pitch":50.0,"Yaw":180.0,"WorldTime":12000,"TotalTime":57}

    '''

    agent_score = 0
    #count = 0
    agent_ob = None
    enemy_ob = None

    map = [[True for i in range(0, int(map_size))]
           for j in range(0, int(map_size))]
    # for i in map:
    # print(i)

    while True:
        #Scores should decrease with time and get a bonus if they win
        agent_score -= 1
        agent_state = agent_host1.peekWorldState()
        enemy_state = agent_host2.peekWorldState()
        if agent_state.number_of_observations_since_last_state > 0:
            agent_ob = json.loads(agent_state.observations[-1].text)

        if enemy_state.number_of_observations_since_last_state > 0:
            enemy_ob = json.loads(enemy_state.observations[-1].text)
        if agent_ob is None or enemy_ob is None:
            continue
        if agent_state.is_mission_running == False:
            break
        agent_position = (agent_ob["XPos"], agent_ob["ZPos"])
        enemy_position = (enemy_ob["XPos"], enemy_ob["ZPos"])

        agent_grid = agent_ob.get(u'floor3x3F', 0)
        enemy_grid = enemy_ob.get(u'floor3x3F', 0)

        if "lava" in agent_grid:
            print("Enemy Won!")
            agent_score -= 100
            for i in map:
                print(i)
            return 0
            break
        if "lava" in enemy_grid:
            print("Agent Won!")
            agent_score += 100
            for i in map:
                print(i)
            return 1
            break

        agentMoveString, agentBreakIndex = agentAlgo(agent_host1,
                                                     agent_position,
                                                     enemy_position,
                                                     agent_grid, map)
        enemyMoveString, enemyBreakIndex = enemyAlgo(agent_host2,
                                                     enemy_position,
                                                     agent_position,
                                                     enemy_grid, map)

        # #Agent Turn to Break
        attack(agent_host1, agentBreakIndex, agent_position, map)
        # #Enemy Turn to Move
        pos = movement(agent_host2, enemyMoveString, enemy_position)

        # #Enemy Turn to Break
        attack(agent_host2, enemyBreakIndex, pos, map, enemy=True)
        # #Agent Turn to Move
        movement(agent_host1, agentMoveString, agent_position)
    for i in map:
        print(i)
    return 2
示例#17
0
    def __init__(self,
                 world_def,
                 video_dim=(32, 32),
                 num_parallel=1,
                 time_limit=20,
                 reset=True,
                 discrete_actions=False,
                 vision_observation=False,
                 depth=False,
                 num_frames=1,
                 grayscale=True):
        self.video_width, self.video_height = video_dim
        self.image_width, self.image_height = video_dim
        self.discrete_actions = discrete_actions
        self.vision_observation = vision_observation
        self.depth = depth
        self.num_parallel = num_parallel

        self.world_def = world_def
        self.mission = self.world_def.generate_mission(reset=reset)
        #self.XGoalPos, self.YGoalPos = self.world_def.goal_pos[0], self.world_def.goal_pos[2]

        self.mission.requestVideo(self.video_height, self.video_width)
        self.mission.observeRecentCommands()
        self.mission.allowAllContinuousMovementCommands()
        self.mission.timeLimitInSeconds(time_limit)

        if self.num_parallel > 1:
            self.client_pool = MalmoPython.ClientPool()
            for i in range(num_parallel):
                port = 10000 + i
                self.client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port))

        self.agent_host = MalmoPython.AgentHost()
        self.agent_host.setObservationsPolicy(
            MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS)
        # self.agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)

        #self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.KEEP_ALL_FRAMES)
        self.agent_host.setVideoPolicy(
            MalmoPython.VideoPolicy.LATEST_FRAME_ONLY)

        self.mission_record_spec = MalmoPython.MissionRecordSpec()

        if discrete_actions:
            self._action_set = {
                0: "move 1",
                1: "turn 0.5",
                2: "turn -0.5",
                3: None
            }
            self.action_space = Discrete(n=len(self._action_set))
        else:
            self._action_set = [
                ("move", (-1, 1)),
                ("turn", (-1, 1)),
                ("pitch", (-1, 1)),
                ("use", (0, 1)),
                ("jump", (0, 1)),
            ]
            # self._action_set = [("move", (0, 1)),
            #                     ("move", (-1, 0)),
            #                     ("turn", (0, 1)),
            #                     ("turn", (-1, 0))]

            lower_bound = np.asarray([x[1][0] for x in self._action_set])
            upper_bound = np.asarray([x[1][1] for x in self._action_set])
            self.action_space = Box(lower_bound, upper_bound)

        self.num_frames = num_frames
        self.grayscale = grayscale
        if self.grayscale:
            self.num_frame_channels = 1
            high = 1
        else:
            self.num_frame_channels = 3
            high = 255

        # Obs keys and bounds
        x_bounds = self.world_def.x_bounds
        z_bounds = self.world_def.z_bounds
        self.max_dist = np.linalg.norm((x_bounds[-1], z_bounds[-1]))
        self.minDistanceFromGoal = None
        if self.vision_observation:
            self.observation_space = Box(
                low=0,
                high=high,
                shape=(self.image_height, self.image_width,
                       self.num_frames * self.num_frame_channels))
        else:

            self.obs_keys = [
                (u'XPos', x_bounds),
                (u'YPos', (200, 300)),
                (u'ZPos', z_bounds),
                (u'yaw', (0, 360)),
                (u'pitch', (0, 180)),
                #(u'XGoalPos', x_bounds),
                #(u'YGoalPos', z_bounds),
                (u'DistanceTravelled', (0, 30)),
                (u'distanceFromGoal', (0, self.max_dist))
            ]
            l_bounds = [key[1][0] for key in self.obs_keys]
            u_bounds = [key[1][1] for key in self.obs_keys]
            self.observation_space = Box(np.array(l_bounds),
                                         np.array(u_bounds))

        self.last_obs = None
        self.cum_reward = 0
        self.distance_travelled = 0
        self.terminal = False
        self.jump = 0
示例#18
0
    def init(self,
             client_pool=None,
             start_minecraft=None,
             continuous_discrete=True,
             add_noop_command=None,
             max_retries=90,
             retry_sleep=10,
             step_sleep=0.001,
             skip_steps=0,
             videoResolution=None,
             videoWithDepth=None,
             observeRecentCommands=None,
             observeHotBar=None,
             observeFullInventory=None,
             observeGrid=None,
             observeDistance=None,
             observeChat=None,
             allowContinuousMovement=None,
             allowDiscreteMovement=None,
             allowAbsoluteMovement=None,
             recordDestination=None,
             recordObservations=None,
             recordRewards=None,
             recordCommands=None,
             recordMP4=None,
             gameMode=None,
             forceWorldReset=None):

        self.max_retries = max_retries
        self.retry_sleep = retry_sleep
        self.step_sleep = step_sleep
        self.skip_steps = skip_steps
        self.forceWorldReset = forceWorldReset
        self.continuous_discrete = continuous_discrete
        self.add_noop_command = add_noop_command

        if videoResolution:
            if videoWithDepth:
                self.mission_spec.requestVideoWithDepth(*videoResolution)
            else:
                self.mission_spec.requestVideo(*videoResolution)

        if observeRecentCommands:
            self.mission_spec.observeRecentCommands()
        if observeHotBar:
            self.mission_spec.observeHotBar()
        if observeFullInventory:
            self.mission_spec.observeFullInventory()
        if observeGrid:
            self.mission_spec.observeGrid(*(observeGrid + ["grid"]))
        if observeDistance:
            self.mission_spec.observeDistance(*(observeDistance + ["dist"]))
        if observeChat:
            self.mission_spec.observeChat()

        if allowContinuousMovement or allowDiscreteMovement or allowAbsoluteMovement:
            # if there are any parameters, remove current command handlers first
            self.mission_spec.removeAllCommandHandlers()

            if allowContinuousMovement is True:
                self.mission_spec.allowAllContinuousMovementCommands()
            elif isinstance(allowContinuousMovement, list):
                for cmd in allowContinuousMovement:
                    self.mission_spec.allowContinuousMovementCommand(cmd)

            if allowDiscreteMovement is True:
                self.mission_spec.allowAllDiscreteMovementCommands()
            elif isinstance(allowDiscreteMovement, list):
                for cmd in allowDiscreteMovement:
                    self.mission_spec.allowDiscreteMovementCommand(cmd)

            if allowAbsoluteMovement is True:
                self.mission_spec.allowAllAbsoluteMovementCommands()
            elif isinstance(allowAbsoluteMovement, list):
                for cmd in allowAbsoluteMovement:
                    self.mission_spec.allowAbsoluteMovementCommand(cmd)

        if start_minecraft:
            # start Minecraft process assigning port dynamically
            self.mc_process, port = minecraft_py.start()
            logger.info(
                "Started Minecraft on port %d, overriding client_pool.", port)
            client_pool = [('127.0.0.1', port)]

        if client_pool:
            if not isinstance(client_pool, list):
                raise ValueError(
                    "client_pool must be list of tuples of (IP-address, port)")
            self.client_pool = MalmoPython.ClientPool()
            for client in client_pool:
                self.client_pool.add(MalmoPython.ClientInfo(*client))

        # TODO: produce observation space dynamically based on requested features

        self.video_height = self.mission_spec.getVideoHeight(0)
        self.video_width = self.mission_spec.getVideoWidth(0)
        self.video_depth = self.mission_spec.getVideoChannels(0)
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.video_height,
                                                   self.video_width,
                                                   self.video_depth))
        # dummy image just for the first observation
        self.last_image = np.zeros(
            (self.video_height, self.video_width, self.video_depth),
            dtype=np.uint8)

        self._create_action_space()

        # mission recording
        self.mission_record_spec = MalmoPython.MissionRecordSpec(
        )  # record nothing
        if recordDestination:
            self.mission_record_spec.setDestination(recordDestination)
        if recordRewards:
            self.mission_record_spec.recordRewards()
        if recordCommands:
            self.mission_record_spec.recordCommands()
        if recordMP4:
            self.mission_record_spec.recordMP4(*recordMP4)

        if gameMode:
            if gameMode == "spectator":
                self.mission_spec.setModeToSpectator()
            elif gameMode == "creative":
                self.mission_spec.setModeToCreative()
            elif gameMode == "survival":
                logger.warn(
                    "Cannot force survival mode, assuming it is the default.")
            else:
                assert False, "Unknown game mode: " + gameMode
示例#19
0
def main():

    #Hardcode number of agents to play song
    num_agents = 4

    #Obtain song csv and get solutions
    #freq_list = mt.create_note_list("Twinkle_Twinkle_Little_Star.csv",120,7000,-.08) #1 Agent
    #freq_list = mt.create_note_list("Chopsticks.csv",120,4000,-.15,.03) #2 Agents
    freq_list = mt.create_note_list("Bad_Apple.csv", 120, 3000, -.08,
                                    .03)  #2 Agents
    #freq_list = mt.create_note_list("Grenade_120BPM.csv",120,1500,-.08,.03) #4 Agents
    freq_list = mt.number_converter(freq_list)
    solutions = cs.get_solutions(freq_list, num_agents)
    print(solutions)
    #print(solutions)

    #Get Mission. Needed for teleport positions.
    missionXML = getMissionXML(num_agents)

    #Create musician for each agent and pass teleport positions.
    musicians = []
    for i in range(num_agents):
        agent_positions = generateAgentTeleportPositions(note_positions, i)
        musicians.append(Musician(agent_positions))
    '''
    MALMO
    '''
    print('Starting...', flush=True)

    #Create agents.
    agent_hosts = []
    for i in range(num_agents):
        agent_hosts.append(MalmoPython.AgentHost())

    malmoutils.parse_command_line(agent_hosts[0])

    #Get mission and allow commands for teleport.
    my_mission = MalmoPython.MissionSpec(missionXML, True)
    my_mission.allowAllChatCommands()

    #Add client for each agent needed.
    my_client_pool = MalmoPython.ClientPool()
    for i in range(num_agents):
        my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000 + i))

    MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF)

    #Start mission for each agent
    for i in range(num_agents):
        startMission(
            agent_hosts[i], my_mission, my_client_pool,
            malmoutils.get_default_recording_object(
                agent_hosts[0], "agent_" + str(i + 1) + "_viewpoint_discrete"),
            i, '')

    #Wait for all missions to begin.
    waitForStart(agent_hosts)

    #Pause for simulation to begin.
    time.sleep(1)
    '''
    SIMULATION BEGINS HERE
    '''

    for i in range(len(solutions[0])):

        #teleport each agent to the corresponding note.
        for j in range(len(musicians)):
            musicians[j].teleport_to_noteblock(agent_hosts[j], solutions[j][i])

        # play each note.
        for k in range(len(musicians)):
            if musicians[k].can_play:
                agent_hosts[k].sendCommand("attack 1")

        time.sleep(0.001)

        for k in range(len(musicians)):
            if musicians[k].can_play:
                agent_hosts[k].sendCommand("attack 0")
            musicians[k].can_play = False

        #modifies the timing between each note hit.
        time.sleep(0.2)
示例#20
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return
    #forceReset="true"
    missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
            <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
            
              <About>
                <Summary>Hello world!</Summary>
              </About>
              
              <ServerSection>
                <ServerHandlers>
                  <DefaultWorldGenerator forceReset="true" />
                  <ServerQuitFromTimeUp timeLimitMs="30000"/>
                  <ServerQuitWhenAnyAgentFinishes/>
                </ServerHandlers>
              </ServerSection>
              
              
              <AgentSection mode="Survival">
                <Name>MalmoTutorialBot</Name>
                <AgentStart>
                    <Inventory>
                        <InventoryItem slot="8" type="diamond_pickaxe"/>
                    </Inventory>
                </AgentStart>
                <AgentHandlers>
                    <ObservationFromFullStats/>
                    <ObservationFromGrid>
                        <Grid name="all_the_blocks" >
                            <min x="-1" y="-1" z="-1"/>
                            <max x="1" y="2" z="1"/>
                        </Grid>
                    </ObservationFromGrid>
                    <ContinuousMovementCommands turnSpeedDegs="180"/>
                </AgentHandlers>
              </AgentSection>
            </Mission>'''

    malmoutils.fix_print()

    #agent_host = MalmoPython.AgentHost()
    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec(missionXML, True)
    my_mission.timeLimitInSeconds(300)
    my_mission.requestVideo(640, 480)

    #my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0,
                                    experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info,
                              "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()

    last_delta = time.time()

    # main loop:
    #agent_host.sendCommand( "jump 1")
    TURN = 0
    TURN2 = 0
    JUMP = 0
    while world_state.is_mission_running:
        print("New Iteration")

        if JUMP > 0:
            JUMP = JUMP - 1
        if JUMP == 0:
            agent_host.sendCommand("jump 0")
            JUMP = JUMP - 1
        agent_host.sendCommand("move 1")
        if math.sin(TURN) / 3 >= 0:
            agent_host.sendCommand("turn 0.15")
        else:
            agent_host.sendCommand("turn -0.2")
        print(TURN, " ", math.sin(TURN))
        TURN = TURN + 0.3

        #agent_host.sendCommand( "jump 1" )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        y = json.loads(world_state.observations[-1].text)

        #print(y["all_the_blocks"])
        dir = ""
        if y["Yaw"] + 180 < 90:
            dir = "S"
            print("Facing South")
        elif y["Yaw"] < 180:
            dir = "W"
            print("Facing West")
        elif y["Yaw"] < 270:
            dir = "N"
            print("Facing North")
        else:
            dir = "E"
            print("Facing East")

        blocks = [[], [], [], []]
        i = 0
        for x in y["all_the_blocks"]:
            blocks[math.floor(i / 9)].append(x)
            i = i + 1

        if dir == "S":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j] != "air":
                    willjump = True
                print(j, blocks[1][j], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "W":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j * 3] != "air":
                    willjump = True
                print(j * 3, blocks[1][j * 3], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "E":
            willjump = False

            for j in range(1, 4):
                if blocks[1][j * 3 - 1] != "air":
                    willjump = True
                print(j * 3 - 1, blocks[1][j * 3 - 1], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "N":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j] != "air":
                    willjump = True
                print(j, blocks[1][j + 6], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")

        if (blocks[1][2] != "air" and blocks[2][2] != "air"
                or blocks[1][4] != "air" and blocks[2][4] != "air"
                or blocks[1][2] != "air" and blocks[2][2] != "air"
                or blocks[1][4] != "air" and blocks[2][4] != "air"):
            TURN2 = 2

        if TURN2 >= 0:
            agent_host.sendCommand("turn 1")
            TURN2 = TURN2 - 1
        '''if blocks[1][5] != "air" or  blocks[1][5] != "grass" or blocks[1][5] != "tallgrass" :
            JUMP = 2
            agent_host.sendCommand( "jump 1" )
            print()
            print(blocks[1][5])'''

        #print(len(blocks))
        #print(blocks)

        if (world_state.number_of_video_frames_since_last_state > 0
                or world_state.number_of_observations_since_last_state > 0
                or world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info,
                                  "world state change")
        for reward in world_state.rewards:
            print("Summed reward:", reward.getValue())
        for error in world_state.errors:
            print("Error:", error.text)
        for frame in world_state.video_frames:
            print()
            #print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
示例#21
0
文件: malmo.py 项目: avisingh599/icnn
    def __init__(self,
                 maze_def,
                 reset,
                 video_dim=(32, 32),
                 num_parallel=1,
                 time_limit=30,
                 discrete_actions=False,
                 vision_observation=True,
                 depth=False,
                 num_frames=1,
                 grayscale=True):
        self.video_width, self.video_height = video_dim
        self.image_width, self.image_height = video_dim
        self.discrete_actions = discrete_actions
        self.vision_observation = vision_observation
        self.depth = depth
        self.num_parallel = num_parallel

        maze = create_maze(maze_def)
        self.mission_gen = MissionGen()
        self.mission = self.mission_gen.generate_mission(
            maze.create_maze_array(), reset=reset)
        self.XGoalPos, self.YGoalPos = self.mission_gen.goal_pos[
            0], self.mission_gen.goal_pos[2]

        # with open(mission_file, 'r') as f:
        #     print("Loading mission from %s" % mission_file)
        #     mission_xml = f.read()
        #     self.mission = MalmoPython.MissionSpec(mission_xml, True)
        self.mission.requestVideo(self.video_height, self.video_width)
        self.mission.observeRecentCommands()
        self.mission.allowAllContinuousMovementCommands()
        # self.mission.timeLimitInSeconds(time_limit)

        if self.num_parallel > 1:
            self.client_pool = MalmoPython.ClientPool()
            for i in range(num_parallel):
                port = 10000 + i
                self.client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port))

        self.agent_host = MalmoPython.AgentHost()
        self.agent_host.setObservationsPolicy(
            MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS)
        # self.agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)
        self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.KEEP_ALL_FRAMES)
        # self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.LATEST_FRAME_ONLY)

        self.mission_record_spec = MalmoPython.MissionRecordSpec()

        if discrete_actions:
            self._action_set = {0: "move 1", 1: "turn 1", 2: "turn -1"}
            self.action_space = Discrete(n=len(self._action_set))
        else:
            # self._action_set = ["move", "turn", "pitch"]
            # self.action_space = Box(np.array([0, -.5, -.5]), np.array([1, .5, .5]))
            self._action_set = [("move", (-1, 1)), ("turn", (-0.5, 0.5))]
            #("jump", (-1, 1))]
            lower_bound = np.asarray([x[1][0] for x in self._action_set])
            upper_bound = np.asarray([x[1][1] for x in self._action_set])
            self.action_space = Box(lower_bound, upper_bound)

        self.num_frames = num_frames
        self.grayscale = grayscale
        if self.grayscale:
            self.num_frame_channels = 1
            high = 1
        else:
            self.num_frame_channels = 3
            high = 255

        # Obs keys and bounds
        x_bounds = self.mission_gen.x_bounds
        z_bounds = self.mission_gen.z_bounds
        self.max_dist = np.linalg.norm((x_bounds[-1], z_bounds[-1]))
        self.minDistanceFromGoal = None
        if self.vision_observation:
            self.observation_space = Box(
                low=0,
                high=high,
                shape=(self.num_frames * self.num_frame_channels,
                       self.image_height, self.image_width))
        else:

            self.obs_keys = [(u'XPos', x_bounds), (u'ZPos', z_bounds),
                             (u'yaw', (0, 360)), (u'XGoalPos', x_bounds),
                             (u'YGoalPos', z_bounds),
                             (u'DistanceTravelled', (0, 30)),
                             (u'distanceFromGoal', (0, self.max_dist))]
            l_bounds = [key[1][0] for key in self.obs_keys]
            u_bounds = [key[1][1] for key in self.obs_keys]
            self.observation_space = Box(np.array(l_bounds),
                                         np.array(u_bounds))
        # self._horizon = env.spec.timestep_limit
        self.last_obs = None
        self.cum_reward = 0
        self.distance_travelled = 0
        self.terminal = False
        self.jump = 0
示例#22
0
            and AGENT_COOLDOWNS[1] <= 0 and not AGENT_IS_SHIELDING[1]:
        agent.sendCommand("attack 1")
        agent.sendCommand("attack 0")

        AGENT_COOLDOWNS[1] = ATTACK_COOLDOWNS[AGENT_WEAPONS[1]]


if __name__ == "__main__":
    # Flush immediately
    print = functools.partial(print, flush=True)

    # Create agent host
    agent_hosts = [Malmo.AgentHost() for _ in range(AGENT_COUNT)]

    # Create client pool
    client_pool = Malmo.ClientPool()
    client_pool.add(Malmo.ClientInfo("127.0.0.1", 10000))
    client_pool.add(Malmo.ClientInfo("127.0.0.1", 10002))

    for a in range(MISSION_COUNT):
        print(f"Running mission #{a}...")
        # Create missions
        mission = Malmo.MissionSpec(get_mission_xml(), True)
        mission_id = str(uuid.uuid4())

        # Start mission
        for a in range(AGENT_COUNT):
            start_mission(agent_hosts[a], mission, client_pool,
                          Malmo.MissionRecordSpec(), a, mission_id)

        wait_for_start(agent_hosts)
def deep_q_learning_run(sess,
                        agent_host,
                        q_estimator,
                        state_processor,
                        experiment_dir,
                        epsilon_start=1.0,
                        epsilon_end=0.1,
                        epsilon_decay_steps=8000):
    """
    Q-Learning algorithm for off-policy TD control using Function Approximation.
    Finds the optimal greedy policy while following an epsilon-greedy policy.
    Args:
        sess: Tensorflow Session object
        env: OpenAI environment
        q_estimator: Estimator object used for the q values
        target_estimator: Estimator object used for the targets
        state_processor: A StateProcessor object
        num_episodes: Number of episodes to run for
        experiment_dir: Directory to save Tensorflow summaries in
        replay_memory_size: Size of the replay memory
        replay_memory_init_size: Number of random experiences to sampel when initializing
          the reply memory.
        update_target_estimator_every: Copy parameters from the Q estimator to the
          target estimator every N steps
        discount_factor: Gamma discount factor
        epsilon_start: Chance to sample a random action when taking an action.
          Epsilon is decayed over time and this is the start value
        epsilon_end: The final minimum value of epsilon after decaying is done
        epsilon_decay_steps: Number of steps to decay epsilon over
        batch_size: Size of batches to sample from the replay memory
        record_video_every: Record a video every N episodes
    Returns:
        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.
    """
    mission_file = agent_host.getStringArgument('mission_file')
    with open(mission_file, 'r') as f:
        print("Loading mission from %s" % mission_file)
        mission_xml = f.read()
        my_mission = MalmoPython.MissionSpec(mission_xml, True)
    my_mission.removeAllCommandHandlers()
    my_mission.allowAllDiscreteMovementCommands()
    my_mission.setViewpoint(2)
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo(
        '127.0.0.1', 10000))  # add Minecraft machines here as available

    max_retries = 3
    agentID = 0
    expID = 'Deep_q_learning memory'

    # Create directories for checkpoints and summaries
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    print("Checkpoint dir is:", checkpoint_dir)
    saver = tf.train.Saver()
    # Load a previous checkpoint if we find one
    # latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    # print("~~~~~~~~~~~~~~", latest_checkpoint)
    # exit(0)
    latest_checkpoint = os.path.join(checkpoint_dir, "model")
    if latest_checkpoint:
        print("Loading model checkpoint {}...\n".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)

    total_t = sess.run(tf.contrib.framework.get_global_step())

    # The epsilon decay schedule
    epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps)

    # The policy we're following
    policy = make_epsilon_greedy_policy(q_estimator, len(actionSet))

    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "save_%s-rep" % (expID))

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record,
                                    agentID, "%s" % (expID))
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2.5)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        print("Sleeping")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()

    agent_host.sendCommand("look -1")
    agent_host.sendCommand("look -1")

    while world_state.is_mission_running and all(
            e.text == '{}' for e in world_state.observations):
        print("Sleeping....")
        world_state = agent_host.peekWorldState()
    # Populate the replay memory with initial experience

    while world_state.number_of_observations_since_last_state <= 0 and world_state.is_mission_running:
        # print("Sleeping")
        time.sleep(0.1)
        world_state = agent_host.peekWorldState()

    state = gridProcess(
        world_state
    )  # MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset()
    state = state_processor.process(sess, state)
    state = np.stack([state] * 4, axis=2)

    stepNum = 0
    while world_state.is_mission_running:

        action = randint(0, 3)
        print("actions:", action)
        # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action
        # print("Sending command: ", actionSet[action])
        print("Step %s" % stepNum)
        stepNum += 1
        agent_host.sendCommand(actionSet[action])

        world_state = agent_host.peekWorldState()

        num_frames_seen = world_state.number_of_video_frames_since_last_state

        while world_state.is_mission_running and world_state.number_of_video_frames_since_last_state == num_frames_seen:
            world_state = agent_host.peekWorldState()

        if world_state.is_mission_running:
            # Getting the reward from taking a step
            while world_state.number_of_observations_since_last_state <= 0:
                time.sleep(0.1)
                world_state = agent_host.peekWorldState()
            # world_state = agent_host.getWorldState()
            next_state = gridProcess(world_state)
            next_state = state_processor.process(sess, next_state)
            next_state = np.append(state[:, :, 1:],
                                   np.expand_dims(next_state, 2),
                                   axis=2)
            state = next_state
        # time.sleep(1)

    return None
示例#24
0
# See if we can parse our extended command line.
malmoutils.parse_command_line(agentHost)

# As we are not recording our video xml should be an empty string.
assert malmoutils.get_video_xml(agentHost) == ''

# Test that we can get a default recording spec.
assert type(malmoutils.get_default_recording_object(
    agentHost, "test")) == MalmoPython.MissionRecordSpec

# Default recordings directory is ''.
assert malmoutils.get_recordings_directory(agentHost) == ''


def clientInfos(cp):
    return [(c.ip_address, c.control_port, c.command_port) for c in cp.clients]


# Test adding some client infos to a client pool.
clientPool = MalmoPython.ClientPool()
assert len(clientPool.clients) == 0
c1 = ("localhost", 10000, 0)
client1 = MalmoPython.ClientInfo(*c1)
clientPool.add(client1)
assert clientInfos(clientPool) == [c1]
c2 = ("127.0.0.1", 10001, 20001)
client2 = MalmoPython.ClientInfo(*c2)
clientPool.add(client2)
assert clientInfos(clientPool) == [c1, c2]
def cwc_run_mission(args):
    print("Calling cwc_run_mission with args:", args, "\n")
    start_time = time.time()

    builder_ip, builder_port = args["builder_ip_addr"], args["builder_port"]
    architect_ip, architect_port = args["architect_ip_addr"], args[
        "architect_port"]
    fixed_viewer_ip, fixed_viewer_port, num_fixed_viewers = args[
        "fixed_viewer_ip_addr"], args["fixed_viewer_port"], args[
            "num_fixed_viewers"]

    draw_inventory_blocks = args["draw_inventory_blocks"]
    existing_is_gold = args["existing_is_gold"]
    create_target_structures = args["create_target_structures"]
    builder_idx = 0 if create_target_structures else 1

    if create_target_structures and os.path.isfile(args["gold_config"]):
        print(
            "ERROR: attempting to create target structure",
            args["gold_config"],
            "but it already exists! Please update the configs_csv file to include file paths for NEW target structures only."
        )
        sys.exit(0)

    # Create agent hosts:
    agent_hosts = []
    for i in range((3 +
                    num_fixed_viewers) if not create_target_structures else 1):
        agent_hosts.append(MalmoPython.AgentHost())

    # Set observation policy for builder
    agent_hosts[builder_idx].setObservationsPolicy(
        MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS)

    # Set up a client pool
    client_pool = MalmoPython.ClientPool()

    if not args["lan"]:
        print("Starting in local mode.")
        client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))

        if not create_target_structures:
            client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001))
            client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10002))

            for i in range(num_fixed_viewers):
                client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10003 + i))
    else:
        print("Builder IP: " + builder_ip, "\tPort:", builder_port)
        print("Architect IP:", architect_ip, "\tPort:", architect_port)
        print("FixedViewer IP:", fixed_viewer_ip, "\tPort:", fixed_viewer_port,
              "\tNumber of clients:", num_fixed_viewers, "\n")

        if not create_target_structures:
            client_pool.add(
                MalmoPython.ClientInfo(architect_ip, architect_port + 1))
            client_pool.add(MalmoPython.ClientInfo(builder_ip, builder_port))
            client_pool.add(
                MalmoPython.ClientInfo(architect_ip, architect_port))

            for i in range(num_fixed_viewers):
                client_pool.add(
                    MalmoPython.ClientInfo(fixed_viewer_ip,
                                           fixed_viewer_port + i))
        else:
            client_pool.add(MalmoPython.ClientInfo(builder_ip, builder_port))

    # experiment ID
    player_ids = "B" + args["builder_id"] + "-A" + args["architect_id"]
    config_id = os.path.basename(args["gold_config"]).replace(".xml", "")
    experiment_time = str(int(round(time.time() * 1000)))
    experiment_id = player_ids + "-" + config_id + "-" + experiment_time

    # obtain xml substrings
    gold_config_xml_substring = io_utils.readXMLSubstringFromFile(
        args["gold_config"], False) if not create_target_structures else ""
    existing_config_xml_substring = io_utils.readXMLSubstringFromFile(
        args["existing_config"], existing_is_gold)

    # construct mission xml
    missionXML = generateMissionXML(experiment_id,
                                    existing_config_xml_substring,
                                    num_fixed_viewers, draw_inventory_blocks,
                                    create_target_structures)
    missionXML_oracle = generateOracleXML(experiment_id,
                                          gold_config_xml_substring)

    if not create_target_structures:
        # oracle
        my_mission_oracle = MalmoPython.MissionSpec(missionXML_oracle, True)
        mission_utils.safeStartMission(agent_hosts[0], my_mission_oracle,
                                       client_pool,
                                       MalmoPython.MissionRecordSpec(), 0,
                                       "cwc_dummy_mission_oracle")

    # builder, architect
    my_mission = MalmoPython.MissionSpec(missionXML, True)
    mission_utils.safeStartMission(agent_hosts[builder_idx],
                                   my_mission, client_pool,
                                   MalmoPython.MissionRecordSpec(), 0,
                                   "cwc_dummy_mission")

    if not create_target_structures:
        mission_utils.safeStartMission(agent_hosts[2], my_mission, client_pool,
                                       MalmoPython.MissionRecordSpec(), 1,
                                       "cwc_dummy_mission")

        # fixed viewers
        for i in range(num_fixed_viewers):
            mission_utils.safeStartMission(agent_hosts[3 + i], my_mission,
                                           client_pool,
                                           MalmoPython.MissionRecordSpec(),
                                           2 + i, "cwc_dummy_mission")

    mission_utils.safeWaitForStart(agent_hosts)

    # poll for observations
    timed_out = False
    all_observations = []
    while not timed_out:
        for i in range((
                3 + num_fixed_viewers) if not create_target_structures else 1):
            ah = agent_hosts[i]
            world_state = ah.getWorldState()

            if not world_state.is_mission_running:
                timed_out = True

            elif i == builder_idx and world_state.number_of_observations_since_last_state > 0:
                total_elements = 0
                for observation in world_state.observations:
                    total_elements += len(json.loads(observation.text))

                print("Received", len(world_state.observations),
                      "observations. Total number of elements:",
                      total_elements)
                for observation in world_state.observations:
                    print("Processing observation:", )
                    debug_utils.printObservationElements(
                        json.loads(observation.text))
                    all_observations.append(observation)

                print("-----")

        time.sleep(1)

    time_elapsed = time.time() - start_time

    agent_hosts[0].sendCommand("chat /kill")

    print("Mission has been quit. All world states:\n")

    all_world_states = []
    for observation in all_observations:
        world_state = json.loads(observation.text)
        world_state["Timestamp"] = observation.timestamp.replace(
            microsecond=0).isoformat(' ')
        debug_utils.prettyPrintObservation(world_state)
        all_world_states.append(world_state)

    raw_observations = {
        "WorldStates": all_world_states,
        "TimeElapsed": time_elapsed,
        "NumFixedViewers": num_fixed_viewers
    }

    if not create_target_structures:
        io_utils.writeJSONtoLog(experiment_id, "raw-observations.json",
                                raw_observations)
    else:
        reformatted = reformatObservations(raw_observations.get("WorldStates"))
        merged = mergeObservations(reformatted)
        _ = postprocess(merged, False)
        time_elapsed = raw_observations.get("TimeElapsed")
        m, s = divmod(time_elapsed, 60)
        h, m = divmod(m, 60)
        raw_observations["WorldStates"] = merged

        print(json.dumps(raw_observations, indent=4))
        xml_str = get_gold_config_xml(raw_observations)

        if len(xml_str) > 0:
            with open(args['gold_config'], 'w') as f:
                f.write(xml_str)
            with open(
                    os.path.join('../../../../cwc-minecraft-models/data',
                                 args['gold_config']), 'w') as f:
                f.write(xml_str)
            print("Wrote gold configuration to", args["gold_config"],
                  " (" + str(len(xml_str.split('\n')) - 1) + ' blocks)')
        else:
            print(
                "WARNING: creating target structures: created structure was empty. Configuration",
                args["gold_config"], "not saved.")

    m, s = divmod(time_elapsed, 60)
    h, m = divmod(m, 60)
    print("Done! Mission time elapsed: %d:%02d:%02d (%.2fs)\n" %
          (h, m, s, time_elapsed))

    print("Waiting for mission to end...")
    # Mission should have ended already, but we want to wait until all the various agent hosts
    # have had a chance to respond to their mission ended message.
    hasEnded = False
    while not hasEnded:
        hasEnded = True  # assume all good
        sys.stdout.write('.')
        time.sleep(0.1)
        for ah in agent_hosts[1:3]:
            world_state = ah.getWorldState()
            if world_state.is_mission_running:
                hasEnded = False  # all not good

    print("Mission ended")
    # Mission has ended.

    time.sleep(2)
示例#26
0
                    <Mob type="Skeleton" reward="1"/>
                </RewardForDamagingEntity>
                <ObservationFromNearbyEntities>
                    <Range name="entities" xrange="''' + str(
                              ARENA_WIDTH) + '''" yrange="2" zrange="''' + str(
                                  ARENA_BREADTH) + '''" />
                </ObservationFromNearbyEntities>
                <ObservationFromFullStats/>''' + video_requirements + '''
            </AgentHandlers>
        </AgentSection>

    </Mission>'''


validate = True
my_client_pool = MalmoPython.ClientPool()
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000))

episode_reward = 0
if agent_host.receivedArgument("test"):
    num_reps = 1
else:
    num_reps = 10000
fout = open('results.csv', 'w')
# Set up the agent
agent = agentMC.agentMC(agent_host, MAX_ZOMBIES, MAX_DISTANCE, 20)
for i in range(num_reps):
    print('episode:', i)
    for iRepeat in range(1, MAX_ZOMBIES):
        #########################################
        #       Set up the enviornment          #
 def __init__(self, xml):
     self.timer = 0
     self.start_time = 0
     self.agents = []
     self.clientPool = MalmoPython.ClientPool()
     self.missionXML = xml
示例#28
0
def deep_q_learning(sess,
                    agent_host,
                    q_estimator,
                    target_estimator,
                    state_processor,
                    num_episodes,
                    experiment_dir,
                    replay_memory_size=500000,
                    replay_memory_init_size=50000,
                    update_target_estimator_every=10000,
                    discount_factor=0.99,
                    epsilon_start=1.0,
                    epsilon_end=0.1,
                    epsilon_decay_steps=50000,
                    batch_size=32,
                    record_video_every=100):
    """
    Q-Learning algorithm for off-policy TD control using Function Approximation.
    Finds the optimal greedy policy while following an epsilon-greedy policy.
    Args:
        sess: Tensorflow Session object
        env: OpenAI environment
        q_estimator: Estimator object used for the q values
        target_estimator: Estimator object used for the targets
        state_processor: A StateProcessor object
        num_episodes: Number of episodes to run for
        experiment_dir: Directory to save Tensorflow summaries in
        replay_memory_size: Size of the replay memory
        replay_memory_init_size: Number of random experiences to sampel when initializing
          the reply memory.
        update_target_estimator_every: Copy parameters from the Q estimator to the
          target estimator every N steps
        discount_factor: Gamma discount factor
        epsilon_start: Chance to sample a random action when taking an action.
          Epsilon is decayed over time and this is the start value
        epsilon_end: The final minimum value of epsilon after decaying is done
        epsilon_decay_steps: Number of steps to decay epsilon over
        batch_size: Size of batches to sample from the replay memory
        record_video_every: Record a video every N episodes
    Returns:
        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.
    """
    mission_file = agent_host.getStringArgument('mission_file')
    with open(mission_file, 'r') as f:
        print("Loading mission from %s" % mission_file)
        mission_xml = f.read()
        my_mission = MalmoPython.MissionSpec(mission_xml, True)
    my_mission.removeAllCommandHandlers()
    my_mission.allowAllDiscreteMovementCommands()
    my_mission.setViewpoint(2)
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000))  # add Minecraft machines here as available

    max_retries = 3
    agentID = 0
    expID = 'Deep_q_learning memory'



    Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"])

    # The replay memory
    replay_memory = []

    # Keeps track of useful statistics
    stats = plotting.EpisodeStats(
        episode_lengths=np.zeros(num_episodes),
        episode_rewards=np.zeros(num_episodes))

    # Create directories for checkpoints and summaries
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    checkpoint_path = os.path.join(checkpoint_dir, "model")
    monitor_path = os.path.join(experiment_dir, "monitor")

    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    if not os.path.exists(monitor_path):
        os.makedirs(monitor_path)

    saver = tf.train.Saver()
    # Load a previous checkpoint if we find one
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    if latest_checkpoint:
        print("Loading model checkpoint {}...\n".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)

    total_t = sess.run(tf.contrib.framework.get_global_step())

    # The epsilon decay schedule
    epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps)

    # The policy we're following
    policy = make_epsilon_greedy_policy(
        q_estimator,
        len(actionSet))

    my_mission_record = malmoutils.get_default_recording_object(agent_host,
                                                                "./save_%s-rep" % (expID))


    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" %(expID))
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2.5)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        print("Sleeping")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()

    # world_state = agent_host.peekWorldState()
    while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
        print("Sleeping....")
        world_state = agent_host.peekWorldState()

    world_state = agent_host.getWorldState()

    # Populate the replay memory with initial experience
    print("Populating replay memory...")

    while world_state.number_of_observations_since_last_state <= 0:
        # print("Sleeping")
        time.sleep(0.1)
        world_state = agent_host.peekWorldState()

    state = gridProcess(world_state) #MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset()
    state = state_processor.process(sess, state)
    state = np.stack([state] * 4, axis=2)

    for i in range(replay_memory_init_size):
        print("%s th replay memory" %i)

        action_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps-1)])
        action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
        # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action
        # print("Sending command: ", actionSet[action])
        agent_host.sendCommand(actionSet[action])
        #checking if the mission is done
        world_state = agent_host.peekWorldState()
        #Getting the reward from taking a step
        if world_state.number_of_rewards_since_last_state > 0:
            reward = world_state.rewards[-1].getValue()
            print("Just received the reward: %s on action: %s "%(reward, actionSet[action]))
        else:
            print("No reward")
            reward = 0
        #getting the next state
        while world_state.number_of_observations_since_last_state <=0 and world_state.is_mission_running:
            print("Sleeping")
            time.sleep(0.1)
            world_state = agent_host.peekWorldState()

        if world_state.is_mission_running:
            next_state = gridProcess(world_state)
            next_state = state_processor.process(sess, next_state)
            next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2)
            done = not world_state.is_mission_running
            replay_memory.append(Transition(state, action, reward, next_state, done))
            state = next_state
        else:
            for retry in range(max_retries):
                try:
                    agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID))
                    break
                except RuntimeError as e:
                    if retry == max_retries - 1:
                        print("Error starting mission:", e)
                        exit(1)
                    else:
                        time.sleep(2.5)

            world_state = agent_host.getWorldState()
            while not world_state.has_mission_begun:
                print(".", end="")
                time.sleep(0.1)
                world_state = agent_host.getWorldState()
            world_state = agent_host.peekWorldState()
            while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
                world_state = agent_host.peekWorldState()
            world_state = agent_host.getWorldState()
            if not world_state.is_mission_running:
                print("Breaking")
                break
            state = gridProcess(world_state) # Malmo GetworldState? / env.reset()
            state = state_processor.process(sess, state)
            state = np.stack([state] * 4, axis=2)

    print("Finished populating memory")

    # Record videos
    # Use the gym env Monitor wrapper
    # env = Monitor(env,
    #               directory=monitor_path,
    #               resume=True,
    #               video_callable=lambda count: count % record_video_every ==0)

    # NEED TO RECORD THE VIDEO AND SAVE TO THE SPECIFIED DIRECTORY

    for i_episode in range(num_episodes):
        print("%s-th episode"%i_episode)
        if i_episode != 0:
            mission_file = agent_host.getStringArgument('mission_file')
            with open(mission_file, 'r') as f:
                print("Loading mission from %s" % mission_file)
                mission_xml = f.read()
                my_mission = MalmoPython.MissionSpec(mission_xml, True)
            my_mission.removeAllCommandHandlers()
            my_mission.allowAllDiscreteMovementCommands()
            # my_mission.requestVideo(320, 240)
            my_mission.forceWorldReset()
            my_mission.setViewpoint(2)
            my_clients = MalmoPython.ClientPool()
            my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000))  # add Minecraft machines here as available

            max_retries = 3
            agentID = 0
            expID = 'Deep_q_learning '

            my_mission_record = malmoutils.get_default_recording_object(agent_host,
                                                                        "./save_%s-rep%d" % (expID, i))

            for retry in range(max_retries):
                try:
                    agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s-%d" % (expID, i))
                    break
                except RuntimeError as e:
                    if retry == max_retries - 1:
                        print("Error starting mission:", e)
                        exit(1)
                    else:
                        time.sleep(2.5)

            world_state = agent_host.getWorldState()
            print("Waiting for the mission to start", end=' ')
            while not world_state.has_mission_begun:
                print(".", end="")
                time.sleep(0.1)
                world_state = agent_host.getWorldState()
                for error in world_state.errors:
                    print("Error:", error.text)

        # Save the current checkpoint
        saver.save(tf.get_default_session(), checkpoint_path)
        # world_state = agent_host.getWorldState()
        # Reset the environment
        # world_state = agent_host.peekWorldState()
        while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
            # print("Sleeping!!!")
            world_state = agent_host.peekWorldState()
        world_state = agent_host.getWorldState()
        state = gridProcess(world_state)  #MalmoGetWorldState?
        state = state_processor.process(sess, state)
        state = np.stack([state] * 4, axis=2)
        loss = None

        # One step in the environment
        for t in itertools.count():

            # Epsilon for this time step
            epsilon = epsilons[min(total_t, epsilon_decay_steps-1)]

            # Add epsilon to Tensorboard
            episode_summary = tf.Summary()
            episode_summary.value.add(simple_value=epsilon, tag="epsilon")
            q_estimator.summary_writer.add_summary(episode_summary, total_t)

            # Maybe update the target estimator
            if total_t % update_target_estimator_every == 0:
                copy_model_parameters(sess, q_estimator, target_estimator)
                print("\nCopied model parameters to target network.")

            # Print out which step we're on, useful for debugging.
            print("\rStep {} ({}) @ Episode {}/{}, loss: {}".format(
                    t, total_t, i_episode + 1, num_episodes, loss), end="")
            sys.stdout.flush()

            # Take a step
            action_probs = policy(sess, state, epsilon)
            action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
            # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo AgentHost send command?
            # print("Sending command: ", actionSet[action])
            agent_host.sendCommand(actionSet[action])

            world_state = agent_host.peekWorldState()

            if world_state.number_of_rewards_since_last_state > 0:
                reward = world_state.rewards[-1].getValue()
                print("Just received the reward: %s on action: %s " % (reward, actionSet[action]))
            else:
                print("No reward")
                reward = 0
            while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
                # print("Sleeping!!!")
                world_state = agent_host.peekWorldState()
            # world_state = agent_host.getWorldState()
            # if not world_state.is_mission_running:
            #     print("Breaking")
            #     break
            done = not world_state.is_mission_running
            print(" IS MISSION FINISHED? ", done)
            # if done:
            #     print("Breaking before updating last reward")
            #     break

            next_state = gridProcess(world_state)
            next_state = state_processor.process(sess, next_state)
            next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2)



            # If our replay memory is full, pop the first element
            if len(replay_memory) == replay_memory_size:
                replay_memory.pop(0)

            # Save transition to replay memory
            replay_memory.append(Transition(state, action, reward, next_state, done))

            # Update statistics
            stats.episode_rewards[i_episode] += reward
            stats.episode_lengths[i_episode] = t

            # Sample a minibatch from the replay memory
            samples = random.sample(replay_memory, batch_size)
            states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples))

            # Calculate q values and targets (Double DQN)
            q_values_next = q_estimator.predict(sess, next_states_batch)
            best_actions = np.argmax(q_values_next, axis=1)
            q_values_next_target = target_estimator.predict(sess, next_states_batch)
            targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * \
                discount_factor * q_values_next_target[np.arange(batch_size), best_actions]

            # Perform gradient descent update
            states_batch = np.array(states_batch)
            loss = q_estimator.update(sess, states_batch, action_batch, targets_batch)

            if done:
                print("End of Episode")
                break

            state = next_state
            total_t += 1

        # Add summaries to tensorboard
        episode_summary = tf.Summary()
        episode_summary.value.add(simple_value=stats.episode_rewards[i_episode], node_name="episode_reward", tag="episode_reward")
        episode_summary.value.add(simple_value=stats.episode_lengths[i_episode], node_name="episode_length", tag="episode_length")
        q_estimator.summary_writer.add_summary(episode_summary, total_t)
        q_estimator.summary_writer.flush()

        yield total_t, plotting.EpisodeStats(
            episode_lengths=stats.episode_lengths[:i_episode+1],
            episode_rewards=stats.episode_rewards[:i_episode+1])

    # env.monitor.close()
    return stats
示例#29
0
		</Mission>
		'''
		return missionXML.format(src=seedfile, limit=timelimit, xcoord=random.randint(0,300), zcoord=random.randint(100, 350), tlimit=eptime)
agent_id = 10001
counter = 9019
while counter < numphotos:
    agent_host = MalmoPython.AgentHost()

    try:
        missionXML = generateXMLbySeed()
        my_mission = MalmoPython.MissionSpec(missionXML, True)
        my_mission_record = MalmoPython.MissionRecordSpec()
    except Exception as e:
        print("open mission ERROR: ", e)

    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available
    agent_id += 1
    # Attempt to start a mission:
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "IMGCOLLECTOR")
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2)
示例#30
0
def main(get_agent0_action, get_agent1_action):
    client_pool = MalmoPython.ClientPool()
    client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))
    client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001))

    agent_host0 = MalmoPython.AgentHost()
    agent_host0.setObservationsPolicy(
        MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)

    agent_host1 = MalmoPython.AgentHost()
    agent_host1.setObservationsPolicy(
        MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)

    mission_file = './hw2.xml'
    my_mission = None
    with open(mission_file, 'r') as f:
        print "Loading mission from %s" % mission_file
        mission_xml = f.read()
        my_mission = MalmoPython.MissionSpec(mission_xml, True)

    # Attempt to start a mission:
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host0.startMission(my_mission, client_pool,
                                     MalmoPython.MissionRecordSpec(), 0, '')
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print "Error starting mission:", e
                exit(1)
            else:
                time.sleep(2)

    time.sleep(10)

    max_retries = 30
    for retry in range(max_retries):
        try:
            agent_host1.startMission(my_mission, client_pool,
                                     MalmoPython.MissionRecordSpec(), 1, '')
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print "Error starting mission:", e
                exit(1)
            else:
                time.sleep(2)

    # Loop until mission starts:
    print "Waiting for the mission to start ",
    world_state0 = agent_host0.peekWorldState()
    while not world_state0.is_mission_running:
        sys.stdout.write(".")
        time.sleep(0.1)
        world_state0 = agent_host0.peekWorldState()
        for error in world_state0.errors:
            print "Error:", error.text

    print
    print "Mission running ",
    print

    cookie_counts = [0, 0]
    while world_state0.is_mission_running:
        time.sleep(2.0)

        world_state0 = agent_host0.getWorldState()
        if world_state0.is_mission_running:
            if cookie_counts[0] + cookie_counts[1] >= num_cookies:
                break

            msg = world_state0.observations[-1].text
            grid, count = extract_observation(msg)

            s = PickupState(grid, 0, cookie_counts)
            action = get_agent0_action(s)
            print 'Agent0 taking action: {0}'.format(action)
            for command in actions[action]:
                agent_host0.sendCommand(command)
                time.sleep(0.1)

            world_state0 = agent_host0.peekWorldState()
            msg = world_state0.observations[-1].text
            grid, count = extract_observation(msg)
            cookie_counts[0] = count

        time.sleep(2.0)

        world_state1 = agent_host1.getWorldState()
        if world_state1.is_mission_running:
            if cookie_counts[0] + cookie_counts[1] >= num_cookies:
                break

            msg = world_state1.observations[-1].text
            grid, count = extract_observation(msg)
            cookie_counts[1] = count

            print cookie_counts
            if cookie_counts[0] + cookie_counts[1] >= num_cookies:
                break

            s = PickupState(grid, 1, cookie_counts)
            action = get_agent1_action(s)
            print 'Agent1 taking action: {0}'.format(action)
            for command in actions[action]:
                agent_host1.sendCommand(command)
                time.sleep(0.1)

            world_state1 = agent_host1.peekWorldState()
            msg = world_state1.observations[-1].text
            grid, count = extract_observation(msg)
            cookie_counts[1] = count

    if cookie_counts[0] > cookie_counts[1]:
        print "Agent0 wins with a score of {0} - {1}".format(
            cookie_counts[0], cookie_counts[1])
    elif cookie_counts[0] == cookie_counts[1]:
        print "Tie with a score of {0} - {1}".format(cookie_counts[0],
                                                     cookie_counts[1])
    elif cookie_counts[0] < cookie_counts[1]:
        print "Agent1 wins with a score of {0} - {1}".format(
            cookie_counts[0], cookie_counts[1])