Exemplo n.º 1
0
    def init_malmo(self, recordings_directory=DEFAULT_RECORDINGS_DIR):
        if self.is_malmo_initialized:
            return

        launch_minecraft_in_background('/app/MalmoPlatform/Minecraft',
                                       ports=[10000, 10001])

        # Set up two agent hosts
        self.agent_host_bot = MalmoPython.AgentHost()
        self.agent_host_camera = MalmoPython.AgentHost()

        # Create list of Minecraft clients to attach to. The agents must
        # have been launched before calling record_malmo_video using
        # init_malmo()
        self.client_pool = MalmoPython.ClientPool()
        self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))
        self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001))

        # Use bot's agenthost to hold the command-line options
        malmoutils.parse_command_line(
            self.agent_host_bot,
            ['--record_video', '--recording_dir', recordings_directory])

        self.is_malmo_initialized = True
Exemplo n.º 2
0
    def __init__(
        self, missionXML, validate, setup_mission=None, ip="127.0.0.1", port=10000
    ):
        super()
        self.agent_host = MalmoPython.AgentHost()
        self.clientPool = MalmoPython.ClientPool()
        self.clientPool.add(MalmoPython.ClientInfo(ip, port))
        try:
            self.agent_host.parse(sys.argv)
        except RuntimeError as e:
            print("ERROR:", e)
            print(self.agent_host.getUsage())
            exit(1)
        if self.agent_host.receivedArgument("help"):
            print(self.agent_host.getUsage())
            exit(0)

        self.mission = MalmoPython.MissionSpec(missionXML, validate)
        self.mission_record = MalmoPython.MissionRecordSpec()
        if setup_mission is not None:
            setup_mission(self.mission)
Exemplo n.º 3
0
def create_mission(ind, agent_host):
    my_mission = MalmoPython.MissionSpec(GetMissionXML(), True)
    my_mission_record = MalmoPython.MissionRecordSpec()
    my_mission.requestVideo(800, 500)
    my_mission.setViewpoint(1)
    # Attempt to start a mission:
    max_retries = 3
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo(
        '127.0.0.1', 10000))  # add Minecraft machines here as available
    for i in range(len(legalPosList)):
        if i == itemPosId:
            my_mission.drawBlock(legalPosList[itemPosId][0], 1,
                                 legalPosList[itemPosId][1], "emerald_block")
        elif i == destPosId:
            my_mission.drawBlock(legalPosList[destPosId][0], 1,
                                 legalPosList[destPosId][1], "diamond_block")
        else:
            my_mission.drawBlock(legalPosList[i][0], 1, legalPosList[i][1],
                                 "stone")
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record,
                                    0, "%s-%d" % ('Herobrine', ind))
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission", ":", e)
                exit(1)
            else:
                time.sleep(2)

    world_state = agent_host.peekWorldState()
    while not world_state.has_mission_begun:
        time.sleep(0.1)
        world_state = agent_host.peekWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    time.sleep(1)
    return my_mission
Exemplo n.º 4
0
    def init_malmo(self):
        """
            Initialize new malmo mission.
        """
        my_mission = MalmoPython.MissionSpec(self.getMissonXML(), True)
        my_mission_record = MalmoPython.MissionRecordSpec()
        my_mission.requestVideo(800, 500)
        my_mission.setViewpoint(1)

        max_retries = 3
        my_clients = MalmoPython.ClientPool()
        my_clients.add(MalmoPython.ClientInfo(
            '127.0.0.1', 10000))  # add Minecraft machines here as available

        for retry in range(max_retries):
            try:
                self.agent_host.startMission(my_mission, my_clients,
                                             my_mission_record, 0,
                                             'MineFarm Farmer')
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    print("Error starting mission:", e)
                    exit(1)
                else:
                    time.sleep(2)

        world_state = self.agent_host.getWorldState()
        while not world_state.has_mission_begun:
            time.sleep(0.1)
            world_state = self.agent_host.getWorldState()
            for error in world_state.errors:
                print("\nError:", error.text)
        self.agent_host.sendCommand(
            "chat /give @p diamond_sword 1 0 {ench:[{id:16,lvl:20}]}")

        return world_state
Exemplo n.º 5
0
#     exit(1)
# mission_file = os.path.abspath(schema_dir)
# if not os.path.exists(mission_file):
#     logger.info("Could not find Maze.xml under MALMO_XSD_PATH")
#     exit(1)
# add some args
agent_host.addOptionalStringArgument('mission_file',
                                     'Path/to/file from which to load the mission.', mission_file)
agent_host.addOptionalFlag('load_model', 'Load initial model from model_file.')
agent_host.addOptionalStringArgument('model_file', 'Path to the initial model file', '')
agent_host.addOptionalFlag('debug', 'Turn on debugging.')
agent_host.setRewardsPolicy(MalmoPython.RewardsPolicy.LATEST_REWARD_ONLY)
agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)
malmoutils.parse_command_line(agent_host)
my_clients = MalmoPython.ClientPool()
my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000))
agentID = 0
################################################

############################### prepare training
action_list = ["movenorth 1", "movesouth 1", "movewest 1", "moveeast 1"]
ckpt_dir = os.path.abspath("./checkpoints")
ckpt_save_rate = 50
if os.path.exists(ckpt_dir):
    shutil.rmtree(ckpt_dir)
os.makedirs(ckpt_dir)
bs = 2000
update_rate=10
dqn = DQN(batch_size=bs, update_rate=update_rate, lr=2e-4)
memory = []
mem_size = 50000
Exemplo n.º 6
0
    num_repeats = 1
else:
    num_repeats = 10

for i in range(num_repeats):
    size = int(6 + 0.5 * i)
    print("Size of maze:", size)
    my_mission = MalmoPython.MissionSpec(
        GetMissionXML("0", 0.4 + float(i / 20.0), size), True)
    my_mission_record = MalmoPython.MissionRecordSpec()
    my_mission.requestVideo(800, 500)
    my_mission.setViewpoint(1)
    # Attempt to start a mission:
    max_retries = 3
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo(
        '127.0.0.1', 10000))  # add Minecraft machines here as available

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record,
                                    0, "%s-%d" % ('Moshe', i))
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission", (i + 1), ":", e)
                exit(1)
            else:
                time.sleep(2)

    # Loop until mission starts:
    print(
Exemplo n.º 7
0
def main():
    # Start mission
    # Create default Malmo objects:
    global agent_host
    global matrix2dOriginal
    global maze_map
    global actionHistCounter
    agent_host = MalmoPython.AgentHost()
    try:
        agent_host.parse(sys.argv)
    except RuntimeError as e:
        print('ERROR:', e)
        print(agent_host.getUsage())
        exit(1)
    if agent_host.receivedArgument("help"):
        print(agent_host.getUsage())
        exit(0)

    # The following one line is for setting how many times you want the agent to repeat
    num_repeats = 50

    esFile = open("Eval_Stats.txt", "w+")
    esFile.write("\n")
    esFile.close()

    esFile = open("Eval_Stats.txt", "a")

    trFile = open("training_result.txt", "w+")
    trFile.write("\n")
    trFile.close()

    trFile = open("training_result.txt", "a")

    for i in range(num_repeats):
        esFile.write("Run #" + str(i + 1) + "\n")
        actionHistCounter = i + 1
        # size = int(6 + 0.5*i)
        print("Size of maze:", size_of_maze)
        #my_mission = MalmoPython.MissionSpec(get_mission_xml("0", 0.4 + float(i/20.0), size_of_maze, 0), True)
        randomDif = random.uniform(-0.2, 0.2)

        print("Parameters of the mission:", str(i), "next:", 0.4 + randomDif,
              "size:", size_of_maze)
        my_mission = MalmoPython.MissionSpec(
            get_mission_xml(str(i), 0.4 + randomDif, size_of_maze, 0), True)
        # my_mission = MalmoPython.MissionSpec(get_mission_xml(), True)
        my_mission_record = MalmoPython.MissionRecordSpec()
        my_mission.requestVideo(800, 500)
        my_mission.setViewpoint(1)
        # Attempt to start a mission:
        max_retries = 3
        my_clients = MalmoPython.ClientPool()
        my_clients.add(MalmoPython.ClientInfo(
            '127.0.0.1', 10000))  # add Minecraft machines here as available

        for retry in range(max_retries):
            try:
                agent_host.startMission(my_mission, my_clients,
                                        my_mission_record, 0,
                                        "%s-%d" % ('Moshe', i))
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    print("Error starting mission", (i + 1), ":", e)
                    exit(1)
                else:
                    time.sleep(2)

        # Loop until mission starts:
        print(
            "Waiting for the mission",
            (i + 1),
            "to start ",
        )
        world_state = agent_host.getWorldState()
        while not world_state.has_mission_begun:
            #sys.stdout.write(".")
            time.sleep(0.1)
            world_state = agent_host.getWorldState()
            for error in world_state.errors:
                print("Error:", error.text)

        print()
        print("Mission", (i + 1), "running.")

        grid = load_grid(world_state, agent_host)
        # print("World State Grid:", grid)
        print("Size of actual map:", len(grid))

        maze_map = get_maze_map(grid)
        print("maze map:", len(maze_map))
        #print(maze_map[244])
        #The maze construction
        matrix2dOriginal = maze_to_2dMatrix(maze_map, size_of_maze)
        matrix2d = maze_to_2dMatrix_reversed(maze_map, size_of_maze)
        print("the matrix 2d: ", matrix2d)
        matrixArray = matrix2d.flatten()

        start_and_end_positions_in_actual_map = find_start_end(grid)

        print("size of maze map:", len(maze_map))
        print("first position in actual map:", first_block_index_in_actual_map)
        print("last position in actual map:", last_block_index_in_actual_map)

        global agent_current_position_xy_in_maze, agent_current_position_index_in_grid

        agent_current_position_xy_in_maze = get_xy_position_of_maze_map_by_position_of_actual_map(\
            start_and_end_positions_in_actual_map[0], \
                grid)

        print("Started: agent current position(xy in maze):",
              agent_current_position_xy_in_maze)

        agent_current_position_index_in_grid = get_position_of_actual_map_by_xy_position_of_maze_map(\
            agent_current_position_xy_in_maze, grid)

        print("Started: agent current position(index in grid):", agent_current_position_index_in_grid \
            , "compared with real position:", start_and_end_positions_in_actual_map[0])

        index_of_yaw = my_mission.getAsXML(True).index("yaw")
        yaw_of_agent = int(
            re.compile("(\d+)").match(
                my_mission.getAsXML(True)[index_of_yaw + 5:index_of_yaw +
                                          8]).group(1))
        sync_agent_direction_with_yaw(yaw_of_agent)

        print("Started: agent current yaw(face to where):", yaw_of_agent)

        # go_to_goal_and_finish_mission(grid, start_and_end_positions_in_actual_map[0], \
        #     start_and_end_positions_in_actual_map[1], world_state, agent_host, i)

        print("Started: How many walkable blocks in front of agent's direction:", agent_current_direction, "is walk able? Answer:", \
            get_num_of_walkable_blocks_in_front_of_agent(agent_current_position_xy_in_maze, size_of_maze, grid))

        # test_moving(agent_host, [3, 3, 0, 3, 3, 0, 3])

        positionTransition(grid, matrixArray, yaw_of_agent, size_of_maze)

        trainingStart = time.time()

        trainingProcess = Process(target=missionTrainingStart,
                                  args=(actionHistCounter, ))
        trainingProcess.start()

        stringList = []
        is_complete_action_history = False

        curr_action_counter = 0

        while True:
            if not is_complete_action_history:

                actionHistFile = None
                while True:
                    try:
                        actionHistFile = open(
                            "action_history_" + str(actionHistCounter) +
                            "_.txt", "r")
                        stringList = actionHistFile.readlines()
                        if len(stringList) != 0:
                            break
                    except:
                        continue

                #actionHistFile = open("action_history_"+str(actionHistCounter)+"_.txt", "r")

                #stringList = actionHistFile.readlines()
                print("Reading action history file, get string: ", stringList)
                curr_action_list = stringList[0].split(' ')
                actionHistFile.close()

            print("Here is the list length:", len(curr_action_list),
                  curr_action_counter + 1)

            try:
                if (len(curr_action_list) >= curr_action_counter + 1):
                    action = curr_action_list[curr_action_counter]
                    convertAction = directionConvert(int(action[0]))
                    test_moving(agent_host, [convertAction], grid)
                    curr_action_counter += 1
            except ValueError:
                # The last index of action is a newline character
                break

            if (stringList[len(stringList) - 1] == "END"):
                is_complete_action_history = True

            if (is_complete_action_history
                    and len(curr_action_list) == curr_action_counter - 1):
                break

        trainingProcess.join()
        trainingEnd = time.time()
        trainingElapsed = trainingEnd - trainingStart
        esFile.write("Training Time: " + str(trainingElapsed) + " ")

        #actionHistFile.close()
        '''
        print(stringList)
        actionCollection = []
        positionCollection = []
        for n in range(0, len(stringList)-1):
            tmp = stringList[n].split(' ')
            for m in range(0,len(tmp)-1):
                L = tmp[m].split(',')
                actionCollection.append(L[0])
                positionCollection.append([L[1],L[2]])

        print('The original: ',actionCollection)
        print(positionCollection)
        '''
        """
        del stringList[-1]
        for string in stringList:
            actionCollection = string.split(' ')
            del actionCollection[-1]
            for aindex in range(len(actionCollection)):
                converted = directionConvert(int(actionCollection[aindex]))
                actionCollection[aindex] = converted
            actionList.append(actionCollection)
        """
        """
        for testingset in actionList:
            #check if it's reachable

            test_moving(agent_host, testingset)
        """
        '''
        actionList = []
        
        for index in range(len(actionCollection)):
            row,col = positionCollection[index][0], positionCollection[index][1]
            action = actionCollection[index]
            print(matrix2d[int(row)][int(col)])
            if matrix2d[int(row)][int(col)] == 0:
                convertAction = directionConvert(int(action))
                actionList.append(convertAction)
        #print('THIS IS THE ACTION: ',len(actionList), actionList)
        
        print('The list:', actionList)
        #raise('STOP HERE')
        test_moving(agent_host, actionList, grid)
        '''

        print(
            "Training complete. Training result can be found in training_result.txt."
        )

        travelStart = time.time()
        go_to_goal_and_finish_mission(grid, agent_current_position_index_in_grid, \
             start_and_end_positions_in_actual_map[1], world_state, agent_host, i)
        travelEnd = time.time()
        travelElapsed = travelEnd - travelStart
        esFile.write("Agent Travel Time: " + str(travelElapsed) + "\n\n")

        print("Aiku did it!")

    trFile.close()
    esFile.close()
                  </AgentSection>

                </Mission>'''
    return missionXML


# Set up a client pool.
# IMPORTANT: If ANY of the clients will be on a different machine, then you MUST
# make sure that any client which can be the server has an IP address that is
# reachable from other machines - ie DO NOT SIMPLY USE 127.0.0.1!!!!
# The IP address used in the client pool will be broadcast to other agents who
# are attempting to find the server - so this will fail for any agents on a
# different machine.
client_pool = MalmoPython.ClientPool()
for x in range(10000, 10000 + NUM_AGENTS + 1):
    client_pool.add(MalmoPython.ClientInfo('127.0.0.1', x))

num_missions = 5 if INTEGRATION_TEST_MODE else 30000
for mission_no in range(1, num_missions + 1):
    print("Running mission #" + str(mission_no))
    # Create mission xml - use forcereset if this is the first mission.
    my_mission = MalmoPython.MissionSpec(
        getXML("true" if mission_no == 1 else "false"), True)

    # Generate an experiment ID for this mission.
    # This is used to make sure the right clients join the right servers -
    # if the experiment IDs don't match, the startMission request will be rejected.
    # In practice, if the client pool is only being used by one researcher, there
    # should be little danger of clients joining the wrong experiments, so a static
    # ID would probably suffice, though changing the ID on each mission also catches
    # potential problems with clients and servers getting out of step.
    def init(self,
             client_pool=None,
             role=0,
             continuous_discrete=True,
             add_noop_command=None,
             max_retries=30,
             retry_sleep=3,
             step_sleep=0.001,
             skip_steps=0,
             videoResolution=None,
             videoWithDepth=None,
             observeRecentCommands=None,
             observeHotBar=None,
             observeFullInventory=None,
             observeGrid=None,
             observeDistance=None,
             observeChat=None,
             allowContinuousMovement=None,
             allowDiscreteMovement=None,
             allowAbsoluteMovement=None,
             recordDestination=None,
             recordObservations=None,
             recordRewards=None,
             recordCommands=None,
             recordMP4=None,
             gameMode=None,
             forceWorldReset=None,
             turn_based=False,
             experiment_id="experimentid"):

        self.role = role
        self.max_retries = max_retries
        self.retry_sleep = retry_sleep
        self.step_sleep = step_sleep
        self.skip_steps = skip_steps
        self.forceWorldReset = forceWorldReset
        self.continuous_discrete = continuous_discrete
        self.add_noop_command = add_noop_command
        self.experiment_id = experiment_id
        if turn_based:
            self._turn = TurnState()

        if videoResolution:
            if videoWithDepth:
                self.mission_spec.requestVideoWithDepth(*videoResolution)
            else:
                self.mission_spec.requestVideo(*videoResolution)

        if observeRecentCommands:
            self.mission_spec.observeRecentCommands()
        if observeHotBar:
            self.mission_spec.observeHotBar()
        if observeFullInventory:
            self.mission_spec.observeFullInventory()
        if observeGrid:
            self.mission_spec.observeGrid(*(observeGrid + ["grid"]))
        if observeDistance:
            self.mission_spec.observeDistance(*(observeDistance + ["dist"]))
        if observeChat:
            self.mission_spec.observeChat()

        if allowContinuousMovement or allowDiscreteMovement or allowAbsoluteMovement:
            # if there are any parameters, remove current command handlers first
            self.mission_spec.removeAllCommandHandlers()

            if allowContinuousMovement is True:
                self.mission_spec.allowAllContinuousMovementCommands()
            elif isinstance(allowContinuousMovement, list):
                for cmd in allowContinuousMovement:
                    self.mission_spec.allowContinuousMovementCommand(cmd)

            if allowDiscreteMovement is True:
                self.mission_spec.allowAllDiscreteMovementCommands()
            elif isinstance(allowDiscreteMovement, list):
                for cmd in allowDiscreteMovement:
                    self.mission_spec.allowDiscreteMovementCommand(cmd)

            if allowAbsoluteMovement is True:
                self.mission_spec.allowAllAbsoluteMovementCommands()
            elif isinstance(allowAbsoluteMovement, list):
                for cmd in allowAbsoluteMovement:
                    self.mission_spec.allowAbsoluteMovementCommand(cmd)

        if client_pool:
            if not isinstance(client_pool, list):
                raise ValueError(
                    "client_pool must be list of tuples of (IP-address, port)")
            self.client_pool = MalmoPython.ClientPool()
            for client in client_pool:
                self.client_pool.add(MalmoPython.ClientInfo(*client))

        # TODO: produce observation space dynamically based on requested features

        self.video_height = self.mission_spec.getVideoHeight(0)
        self.video_width = self.mission_spec.getVideoWidth(0)
        self.video_depth = self.mission_spec.getVideoChannels(0)
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.video_height,
                                                   self.video_width,
                                                   self.video_depth),
                                            dtype=np.uint8)
        # dummy image just for the first observation
        # self.last_image = np.zeros((self.video_height, self.video_width, self.video_depth), dtype=np.uint8)
        self.last_image = np.zeros(
            (self.video_height * self.video_width * self.video_depth),
            dtype=np.uint8)
        self._create_action_space()

        # mission recording
        self.mission_record_spec = MalmoPython.MissionRecordSpec(
        )  # record nothing
        if recordDestination:
            self.mission_record_spec.setDestination(recordDestination)
        if recordRewards:
            self.mission_record_spec.recordRewards()
        if recordCommands:
            self.mission_record_spec.recordCommands()
        if recordMP4:
            self.mission_record_spec.recordMP4(*recordMP4)

        if gameMode:
            if gameMode == "spectator":
                self.mission_spec.setModeToSpectator()
            elif gameMode == "creative":
                self.mission_spec.setModeToCreative()
            elif gameMode == "survival":
                logger.warn(
                    "Cannot force survival mode, assuming it is the default.")
            else:
                assert False, "Unknown game mode: " + gameMode
Exemplo n.º 10
0
 def _add_default_client(self):
     self.my_client_pool = MalmoPython.ClientPool()
     self.my_client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))
Exemplo n.º 11
0
                if tau == T - 1:
                    while len(S) > 1:
                        tau = tau + 1
                        self.update_q_table(tau, S, A, R, T)
                    done_update = True
                    break


if __name__ == '__main__':
    random.seed(0)
    #sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)  # flush print output immediately
    print('Starting...', flush=True)

    expected_reward = 3390
    my_client_pool = MalmoPython.ClientPool()
    my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000))

    agent_host = MalmoPython.AgentHost()
    try:
        agent_host.parse(sys.argv)
    except RuntimeError as e:
        print('ERROR:', e)
        print(agent_host.getUsage())
        exit(1)
    if agent_host.receivedArgument("help"):
        print(agent_host.getUsage())
        exit(0)

    num_reps = 30000
    n = 1
    odie = Odie(n=n)
Exemplo n.º 12
0
def main(model=None, mode='train', start_episode=0):
    my_xml = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
    <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
      <About>
        <Summary>Hill Descent.</Summary>
      </About>
      <ModSettings>
        <MsPerTick>20</MsPerTick>
      </ModSettings>
      <ServerSection>

        <ServerInitialConditions>

            <Time><StartTime>1</StartTime></Time>
        </ServerInitialConditions>
        <ServerHandlers>

          <DefaultWorldGenerator seed="-999595225643433963" forceReset="false" destroyAfterUse="false" />

          <ServerQuitFromTimeUp timeLimitMs="100000000"/>
          <ServerQuitWhenAnyAgentFinishes/>
        </ServerHandlers>
      </ServerSection>
      <AgentSection mode="Survival">
        <Name>Bob</Name>
        <AgentStart>
          <Placement x="28.5" y="87" z="330.5" pitch="-90" yaw="0"/>
        </AgentStart>
        <AgentHandlers>
          <DiscreteMovementCommands/>
          <MissionQuitCommands quitDescription="done"/>
          <ChatCommands/>
          <ObservationFromFullStats/>
          <ObservationFromGrid>
              <Grid name="sight">
                  <min x="{}" y="{}" z="{}"/>
                  <max x="{}" y="{}" z="{}"/>
              </Grid>
              <Grid name="feet">
                  <min x="0" y="-1" z="0"/>
                  <max x="0" y="-1" z="0"/>
              </Grid>
      </ObservationsationFromGrid>
          <AgentQuitFromTouchingBlockType>
              <Block type="cobblestone" />
          </AgentQuitFromTouchingBlockType>
        </AgentHandlers>
      </AgentSection>
    </Mission>

    '''.format(-(grid_width - 1) // 2, -grid_height, -(grid_width - 1) // 2,
               (grid_width - 1) // 2, grid_height, (grid_width - 1) // 2)

    batch_size = 100
    agent = DQNAgent(state_size, action_size, learning_rate, discount_rate,
                     epsilon, epsilon_min, epsilon_decay)
    if model != None:
        agent.load(model)
        if mode == 'test':
            agent.epsilon = 0.0
        print('loaded model: {}'.format(model))
    else:
        clear_csv('./data/results.csv')
        clear_csv('./data/moves.csv')

    my_client_pool = MalmoPython.ClientPool()
    my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001))
    agent_host = MalmoPython.AgentHost()

    for e in range(start_episode + 1, episodes + 1):
        my_mission = MalmoPython.MissionSpec(my_xml, True)
        my_mission_record = MalmoPython.MissionRecordSpec()
        my_mission.requestVideo(800, 500)
        my_mission.setViewpoint(2)
        print("Waiting for the mission to start", end=' ')
        agent_host.startMission(
            my_mission,
            my_mission_record,
        )
        world_state = agent_host.getWorldState()
        while not world_state.has_mission_begun:
            print(".", end="")
            time.sleep(0.1)
            world_state = agent_host.getWorldState()
            for error in world_state.errors:
                print("Error:", error.text)
        print()
        agent_host.sendCommand('chat /kill @e[type=Chicken]')
        agent_host.sendCommand('chat /kill @e[type=Pig]')
        agent_host.sendCommand('chat /kill @e[type=Cow]')
        moves = 0
        episode_reward = 0

        while world_state.is_mission_running:
            world_state = agent_host.getWorldState()
            if world_state.number_of_observations_since_last_state > 0:
                try:
                    obvsText = world_state.observations[-1].text
                    data = json.loads(obvsText)
                except:
                    print("Error when getting state")
                    continue

                state = get_state(data)

                prev_x = data.get(u'XPos', 0)
                prev_y = data.get(u'YPos', 0)
                prev_z = data.get(u'ZPos', 0)

                useful_state = [state[2], state[6], state[7], state[8], \
                    state[10], state[11], state[13], \
                    state[14], state[16], state[17], \
                    state[18], state[22]]

                action = agent.act(useful_state)

                if ((action == 0 and state[grid_center - grid_width] == 0)
                        or (action == 1 and state[grid_center + 1] == 0) or
                    (action == 2 and state[grid_center + grid_width] == 0)
                        or (action == 3 and state[grid_center - 1] == 0)):
                    agent_host.sendCommand(jump_directions[action])
                else:
                    agent_host.sendCommand(directions[action])
                time.sleep(0.25)
                #print("North:", state[grid_center - grid_width], \
                #      "  East:", state[grid_center + 1], \
                #      "  South:", state[grid_center + grid_width], \
                #      "  West:", state[grid_center - 1])

                try:
                    world_state = wait_world_state(agent_host, world_state)
                    obvsText = world_state.observations[-1].text
                    data = json.loads(obvsText)
                except:
                    print("Error when getting state")
                    continue

                current_x = data.get(u'XPos', 0)
                current_y = data.get(u'YPos', 0)
                current_z = data.get(u'ZPos', 0)
                damage_taken = calculate_damage(prev_y, current_y)
                next_state = get_state(data)

                useful_next_state = [state[2], state[6], state[7], state[8], \
                    state[10], state[11], state[13], \
                    state[14], state[16], state[17], \
                    state[18], state[22]]

                # print("previous and current y", prev_y, current_y)
                # print("damage taken", damage_taken)
                #print("X:", prev_x, current_x, "\n", \
                #      "Y:", prev_y, current_y, "\n", \
                #      "Z:", prev_z, current_z, "\n")
                reward = 2 * (
                    prev_y - current_y
                ) - 50 * damage_taken - 1 if prev_x != current_x or prev_y != current_y or prev_z != current_z else -1000
                episode_reward += reward
                done = True if current_y <= goal_height or not world_state.is_mission_running or data[
                    'Life'] <= 0 else False

                agent.remember(useful_state, action, reward, useful_next_state,
                               done)
                if ((action == 0 and state[grid_center - grid_width] == 0)
                        or (action == 1 and state[grid_center + 1] == 0) or
                    (action == 2 and state[grid_center + grid_width] == 0)
                        or (action == 3 and state[grid_center - 1] == 0)):
                    print(
                        'episode {}/{}, action: {}, reward: {}, e: {:.2}, move: {}, done: {}'
                        .format(e, episodes, jump_directions[action], reward,
                                agent.epsilon, moves, done))
                else:
                    print(
                        'episode {}/{}, action: {}, reward: {}, e: {:.2}, move: {}, done: {}'
                        .format(e, episodes, directions[action], reward,
                                agent.epsilon, moves, done))
                moves += 1

                if mode == 'train' or model == None:
                    write_to_csv('./data/moves.csv',
                                 [e, current_x, current_y, current_z, reward])

                if e > batch_size:
                    agent.replay(batch_size)

                if done or moves > max_moves:
                    agent_host.sendCommand("quit")

        if (mode == 'train'
                or model == None) and (e in checkpoints
                                       or agent.epsilon <= epsilon_min):
            print('saving model at episode {}'.format(e))
            agent.save('./models/model_{}'.format(e))
            if agent.epsilon <= epsilon_min:
                break

        time.sleep(1)
        # my_mission.forceWorldReset()
        if mode == 'train' or model == None:
            write_to_csv('./data/results.csv',
                         [e, episode_reward, moves,
                          int(episode_reward > 0)])
# initalize two agents
agent_host1 = MalmoPython.AgentHost()
malmoutils.parse_command_line(agent_host1)
recordingsDirectory1 = malmoutils.get_recordings_directory(agent_host1)

agent_host2 = MalmoPython.AgentHost()
malmoutils.parse_command_line(agent_host2)
recordingsDirectory2 = malmoutils.get_recordings_directory(agent_host2)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

# client pool
client_pool = MalmoPython.ClientPool()
client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))
client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001))

MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_INFO)

malmoutils.parse_command_line(agent_host1)
malmoutils.parse_command_line(agent_host2)

# for video processing
current_yaw_delta_from_depth = 0
video_width = 700
video_height = 500

# unnecessary by now
if sys.version_info[0] == 2:
    import Tkinter as tk
    def init(self,
             client_pool=None,
             start_minecraft=None,
             continuous_discrete=True,
             add_noop_command=None,
             max_retries=90,
             retry_sleep=10,
             step_sleep=0.001,
             skip_steps=0,
             videoResolution=None,
             videoWithDepth=None,
             observeRecentCommands=None,
             observeHotBar=None,
             observeFullInventory=None,
             observeGrid=None,
             observeDistance=None,
             observeChat=None,
             allowContinuousMovement=None,
             allowDiscreteMovement=None,
             allowAbsoluteMovement=None,
             recordDestination=None,
             recordObservations=None,
             recordRewards=None,
             recordCommands=None,
             recordMP4=None,
             gameMode=None,
             forceWorldReset=None):

        self.max_retries = max_retries
        self.retry_sleep = retry_sleep
        self.step_sleep = step_sleep
        self.skip_steps = skip_steps
        self.forceWorldReset = forceWorldReset
        self.continuous_discrete = continuous_discrete
        self.add_noop_command = add_noop_command
        self.client_pool = client_pool

        if videoResolution:
            if videoWithDepth:
                self.mission_spec.requestVideoWithDepth(*videoResolution)
            else:
                self.mission_spec.requestVideo(*videoResolution)

        if observeRecentCommands:
            self.mission_spec.observeRecentCommands()
        if observeHotBar:
            self.mission_spec.observeHotBar()
        if observeFullInventory:
            self.mission_spec.observeFullInventory()
        if observeGrid:
            self.mission_spec.observeGrid(*(observeGrid + ["grid"]))
        if observeDistance:
            self.mission_spec.observeDistance(*(observeDistance + ["dist"]))
        if observeChat:
            self.mission_spec.observeChat()

        if allowDiscreteMovement:
            # if there are any parameters, remove current command handlers first
            self.mission_spec.removeAllCommandHandlers()

            if allowDiscreteMovement is True:
                self.mission_spec.allowAllDiscreteMovementCommands()
            elif isinstance(allowDiscreteMovement, list):
                for cmd in allowDiscreteMovement:
                    self.mission_spec.allowDiscreteMovementCommand(cmd)

        if start_minecraft:
            # start Minecraft process assigning port dynamically
            self.mc_process, port = minecraft_py.start()
            logger.info(
                "Started Minecraft on port %d, overriding client_pool.", port)
            client_pool = [('127.0.0.1', port)]
        """ 
        make client_pool usable for Malmo: change format of the client_pool to struct 
        """
        if client_pool:
            if not isinstance(client_pool, list):
                raise ValueError(
                    "client_pool must be list of tuples of (IP-address, port)")
            self.client_pool = MalmoPython.ClientPool()
            for client in client_pool:
                self.client_pool.add(MalmoPython.ClientInfo(*client))
        """
        initialize video parameters for video processing
        """
        self.video_height = self.mission_spec.getVideoHeight(0)
        self.video_width = self.mission_spec.getVideoWidth(0)
        self.video_depth = self.mission_spec.getVideoChannels(0)
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.video_height,
                                                   self.video_width,
                                                   self.video_depth))
        """
        dummy image just for the first observation
        """
        self.last_image1 = np.zeros(
            (self.video_height, self.video_width, self.video_depth),
            dtype=np.float32)
        self.last_image2 = np.zeros(
            (self.video_height, self.video_width, self.video_depth),
            dtype=np.float32)
        self.create_action_space()
        """ 
        mission recording 
        """
        self.mission_record_spec = MalmoPython.MissionRecordSpec(
        )  # record nothing
        if recordDestination:
            self.mission_record_spec.setDestination(recordDestination)
        if recordRewards:
            self.mission_record_spec.recordRewards()
        if recordCommands:
            self.mission_record_spec.recordCommands()
        if recordMP4:
            self.mission_record_spec.recordMP4(*recordMP4)
        """ 
        game mode
        """
        if gameMode:
            if gameMode == "spectator":
                self.mission_spec.setModeToSpectator()
            elif gameMode == "creative":
                self.mission_spec.setModeToCreative()
            elif gameMode == "survival":
                logger.warn(
                    "Cannot force survival mode, assuming it is the default.")
            else:
                assert False, "Unknown game mode: " + gameMode