Exemplo n.º 1
0
 def __init__(self, env_config):
     self.size = 21
     self.obs_size = 21
     self.max_episode_steps = 100
     self.log_frequency = 10
     self.action_dict = {
         0: 'move 1',
         1: 'move -1',
         2: 'turn 1',
         3: 'turn 0',
         4: 'attack 1'
     }
     # self.action_space = Box(-1, 1, shape=(3,), dtype=np.float32)
     self.action_space = Discrete(len(self.action_dict))
     self.observation_space = Box(0,
                                  1,
                                  shape=(np.prod(
                                      [1, self.obs_size, self.obs_size]), ),
                                  dtype=np.int32)
     self.agent_host = MalmoPython.AgentHost()
     self.agentinf = agent()
     try:
         self.agent_host.parse(sys.argv)
     except RuntimeError as e:
         print('ERROR:', e)
         print(self.agent_host.getUsage())
         exit(1)
     self.obs = None
     self.episode_step = 0
     self.episode_return = 0
     self.returns = []
     self.steps = []
Exemplo n.º 2
0
 def __init__(self):
     self.agentHost = MalmoPython.AgentHost()
     try:
         self.agentHost.parse(sys.argv)
     except RuntimeError as e:
         print('ERROR:', e)
         print(self.agentHost.getUsage())
         sys.exit(1)
Exemplo n.º 3
0
    def __init__(self, env_config):  
        # Static Parameters
        self.size_y = 25
        self.size_x = 10
        self.size_z = 20
        # self.reward_density = .1
        # self.penalty_density = .02
        # self.obs_size = 5
        # self.obs_size_x = 5
        # self.obs_size_y = 5
        # self.obs_size_z = 5
        self.max_episode_steps = 100 #100
        self.log_frequency = 1 #10
        self.num_ghasts = 1

        self.action_dict = {
            0: 'movewest 1',  # Move one block forward
            1: 'moveeast 1',  # Turn 90 degrees to the right
            2: 'attack 1',  # Destroy block
        }

        # Rllib Parameters
        # continuous
        self.action_space = Box(low=np.array([-1, 0]), high=np.array([1, 1])) # [move attack]

        # discrete
        # self.action_space = Discrete(len(self.action_dict))

        self.observation_space = Box(-50, 50, shape=(self.num_ghasts * 1 * 3, ), dtype=np.float32)

        # Malmo Parameters
        self.agent_host = MalmoPython.AgentHost()
        try:
            self.agent_host.parse( sys.argv )
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_host.getUsage())
            exit(1)

        # GhastKiller Parameters
        self.obs = None
        self.allow_break_action = False
        self.episode_step = 0
        self.episode_return = 0
        self.returns = []
        self.steps = []
        self.ghasts = collections.defaultdict(dict)
        self.fireballs = collections.defaultdict(dict)

        self.agentState = {"pos": (0.5, 21, -9.5), "life": 20, "prevLife": 20}
        
        self.step_dodge = 0
        self.episode_dodge = set()
        self.step_hitback = 0
        self.episode_hitback = set()
        self.step_kill = 0
Exemplo n.º 4
0
 def two_agent_init(self):
     # Create default Malmo objects:
     agent1 = MalmoPython.AgentHost()
     agent2 = MalmoPython.AgentHost()
     try:
         agent1.parse(sys.argv)
         agent2.parse(sys.argv)
     except RuntimeError as e:
         print('ERROR:', e)
         print(agent1.getUsage())
         print(agent2.getUsage())
         exit(1)
     if agent1.receivedArgument("help"):
         print(agent1.getUsage())
         exit(0)
     if agent2.receivedArgument("help"):
         print(agent2.getUsage())
         exit(0)
     return (agent1, agent2)
    def __init__(self):
        super(MinecraftEnv, self).__init__()

        self.agent_host = MalmoPython.AgentHost()
        self.client_pool = None
        self.mc_process = None
        self.screen = None
        self.experiment_id = None
        self._turn = None
        self.load_mission_xml()
Exemplo n.º 6
0
 def one_agent_init(self):
     agent = MalmoPython.AgentHost()
     try:
         agent.parse(sys.argv)
     except RuntimeError as e:
         print('ERROR:', e)
         print(agent.getUsage())
         exit(1)
     if agent.receivedArgument("help"):
         print(agent.getUsage())
         exit(0)
     return agent
Exemplo n.º 7
0
def create_malmo_obj():
    agent_host = MalmoPython.AgentHost()
    try:
        agent_host.parse( sys.argv )
    except RuntimeError as e:
        print('ERROR:',e)
        print(agent_host.getUsage())
        exit(1)
    if agent_host.receivedArgument("help"):
        print(agent_host.getUsage())
        exit(0)
    return agent_host
Exemplo n.º 8
0
 def __init__(self, env_config):  
     # Static Parameters
     self.size = 35
     self.mobCount = 5   #amount of mobs per mob type
     # Malmo Parametersa
     self.agent_host = MalmoPython.AgentHost()
     world_state = self.init_malmo()
     try:
         self.agent_host.parse( sys.argv )
     except RuntimeError as e:
         print('ERROR:', e)
         print(self.agent_host.getUsage())
         exit(1)
Exemplo n.º 9
0
    def init_malmo(self, recordings_directory=DEFAULT_RECORDINGS_DIR):
        if self.is_malmo_initialized:
            return

        launch_minecraft_in_background('/app/MalmoPlatform/Minecraft',
                                       ports=[10000, 10001])

        # Set up two agent hosts
        self.agent_host_bot = MalmoPython.AgentHost()
        self.agent_host_camera = MalmoPython.AgentHost()

        # Create list of Minecraft clients to attach to. The agents must
        # have been launched before calling record_malmo_video using
        # init_malmo()
        self.client_pool = MalmoPython.ClientPool()
        self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))
        self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001))

        # Use bot's agenthost to hold the command-line options
        malmoutils.parse_command_line(
            self.agent_host_bot,
            ['--record_video', '--recording_dir', recordings_directory])

        self.is_malmo_initialized = True
Exemplo n.º 10
0
    def __init__(self, env_config):
        # Static Parameters
        self.size = 50
        self.reward_density = .1
        self.penalty_density = .02
        self.obs_size = 5

        # todo: 100 steps
        self.max_episode_steps = 100
        # todo: 10 frequency
        self.log_frequency = 10
        self.action_dict = {
            0: 'move 1',  # Move one block forward
            1: 'turn 1',  # Turn 90 degrees to the right
            2: 'turn -1',  # Turn 90 degrees to the left
            3: 'attack 1'  # Destroy block
        }

        # Rllib Parameters
        self.action_space = Box(low=-1, high=1, shape=(3, ))

        # self.action_space = Discrete(len(self.action_dict))

        self.observation_space = Box(0,
                                     1,
                                     shape=(2 * self.obs_size *
                                            self.obs_size, ),
                                     dtype=np.float32)

        # Malmo Parameters
        self.agent_host = MalmoPython.AgentHost()
        try:
            self.agent_host.parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_host.getUsage())
            exit(1)

        # DiamondCollector Parameters
        self.obs = None
        self.allow_break_action = False
        self.episode_step = 0
        self.episode_return = 0
        self.returns = []
        self.steps = []

        self.pbar = tqdm(total=50000)
Exemplo n.º 11
0
    def __init__(self, missionXML, validate, setup_mission=None):
        super()
        self.agent_host = MalmoPython.AgentHost()
        try:
            self.agent_host.parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_host.getUsage())
            exit(1)
        if self.agent_host.receivedArgument("help"):
            print(self.agent_host.getUsage())
            exit(0)

        self.mission = MalmoPython.MissionSpec(missionXML, validate)
        self.mission_record = MalmoPython.MissionRecordSpec()
        if (setup_mission is not None):
            setup_mission(self.mission)
Exemplo n.º 12
0
    def __init__(self, env_config):
        # Static Parameters
        self.size = 50
        self.reward_density = .1
        self.penalty_density = .02
        self.obs_size = 4  #changed from 5
        self.max_episode_steps = 125
        self.log_frequency = 10
        self.action_dict = {
            0: 'move 1',  # Move one block forward
            1: 'turn 1',  # Turn 90 degrees to the right
            2: 'turn -1',  # Turn 90 degrees to the left
            3: 'attack 1'  # Destroy block
        }

        # Rllib Parameters
        self.action_space = Box(
            low=-1.0, high=1.0, shape=(2, ),
            dtype=np.float32)  #CHANGED FROM DISCRETE TO BOX
        self.observation_space = Box(
            -1000, 1000, shape=(self.obs_size, ),
            dtype=np.float32)  # Not sure what to do here

        # Malmo Parameters
        self.agent_host = MalmoPython.AgentHost()
        try:
            self.agent_host.parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_host.getUsage())
            exit(1)

        # Agent Parameters
        self.obs = np.zeros(self.obs_size)
        self.last_obs = np.zeros(self.obs_size)
        self.zstart = 81.5
        self.zend = 96.5
        self.xleft = 662.5
        self.xright = 648.5
        self.episode_step = 0
        self.episode_return = 0
        self.returns = []
        self.steps = []
Exemplo n.º 13
0
    def __init__(self, env_config):
        # Static Parameters
        self.size = 50
        self.reward_density = .1
        self.penalty_density = .02
        self.obs_size = 3
        self.depth = 30
        self.max_episode_steps = 30
        self.log_frequency = 10
        self.num_episode = 0
        self.xz_coordinate = 2.5, 2.5
        self.action_dict = {
            0: 'move 1',  # Move one block forward
            1: 'strafe -1',  # Moves left
            2: 'strafe 1',  # Moves right
            3: 'move -1',  # Moves back
            4: 'move 0'  # Moves 0
        }
        self.landInWater = False
        # Rllib Parameters
        self.action_space = Discrete(len(self.action_dict))
        self.observation_space = Box(
            0,
            1,
            shape=(np.prod([self.depth, self.obs_size, self.obs_size]), ),
            dtype=np.int32)

        # Malmo Parameters
        self.agent_host = MalmoPython.AgentHost()
        try:
            self.agent_host.parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_host.getUsage())
            exit(1)

        # DiamondCollector Parameters
        self.obs = None
        self.episode_step = 0
        self.episode_return = 0
        self.returns = []
        self.steps = []
Exemplo n.º 14
0
    def __init__(self, env_config):
        # Static Parameters
        self.penalty_density = .02
        self.obs_size = 5
        self.log_frequency = 10
        self.max_episode_steps = 400
        self.action_dict = {
            0: 'move 0',  # Stop
            1: 'strafe 1',  # Move one block left
            2: 'strafe -1',  # Move one block right
            3: 'attack 1',
        }

        # Rllib Parameters
        self.action_space = Discrete(len(self.action_dict))
        self.observation_space = Box(0,
                                     1,
                                     shape=(2 * self.obs_size *
                                            self.obs_size, ),
                                     dtype=np.float32)

        # Malmo Parameters
        self.agent_host = MalmoPython.AgentHost()
        try:
            self.agent_host.parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_host.getUsage())
            exit(1)

        # CallMePro Parameters
        self.obs = None
        self.face_brick_move = False
        self.face_gold_move = False
        self.face_diamond_move = False
        self.face_stone_move = False
        self.face_diamondblock_move = False
        self.face_coal_move = False
        self.episode_step = 0
        self.episode_return = 0
        self.returns = []
        self.steps = []
Exemplo n.º 15
0
    def __init__(
        self, missionXML, validate, setup_mission=None, ip="127.0.0.1", port=10000
    ):
        super()
        self.agent_host = MalmoPython.AgentHost()
        self.clientPool = MalmoPython.ClientPool()
        self.clientPool.add(MalmoPython.ClientInfo(ip, port))
        try:
            self.agent_host.parse(sys.argv)
        except RuntimeError as e:
            print("ERROR:", e)
            print(self.agent_host.getUsage())
            exit(1)
        if self.agent_host.receivedArgument("help"):
            print(self.agent_host.getUsage())
            exit(0)

        self.mission = MalmoPython.MissionSpec(missionXML, validate)
        self.mission_record = MalmoPython.MissionRecordSpec()
        if setup_mission is not None:
            setup_mission(self.mission)
Exemplo n.º 16
0
    def __init__(self, env_config):  
        # Static Parameters
        self.size = 50
        self.reward_density = .1
        self.penalty_density = .02
        self.obs_size = 5
        self.max_episode_steps = 100
        self.log_frequency = 10
        self.action_dict = {
            0: 'move 1',  # Move one block forward
            1: 'strafe -1',  # strafe to the left
            2: 'strafe 1',  # strafe to the right
            #3: 'turn -1',  # Turn 90 degrees to the left
        }

        # Rllib Parameters
        self.action_space = Discrete(len(self.action_dict)) #BACK TO DISCRETE FOR FINAL REPORT
        #self.observation_space = Box(-1000, 1000, shape=(self.obs_size,), dtype=np.float32) # Not sure what to do here
        self.observation_space = Box(0, 1, shape=(self.obs_size * self.obs_size, ), dtype=np.float32)

        # Malmo Parameters
        self.agent_host = MalmoPython.AgentHost()
        try:
            self.agent_host.parse( sys.argv )
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_host.getUsage())
            exit(1)

        # Agent Parameters
        self.obs = np.zeros(self.obs_size)
        self.last_obs = np.zeros(self.obs_size)
        self.zstart = 82.5
        self.zend = 96.5
        self.xstart = 655.5
        self.xend = 648.5
        self.episode_step = 0
        self.episode_return = 0
        self.returns = []
        self.steps = []
Exemplo n.º 17
0
    def __init__(self, env_config):
        # Static Parameters
        self.size = 50
        self.reward_density = .1
        self.penalty_density = .02
        self.obs_size = 6
        self.max_episode_steps = 400
        self.log_frequency = 1
        self.episode_num = 0
        self.quit = False
        self.reached = False

        # Rllib Parameters
        self.action_space = Box(-1, 1, shape=(3, ), dtype=np.float32)
        self.observation_space = Box(
            0,
            1,
            shape=(np.prod([15, self.obs_size + 1, self.obs_size + 1]), ),
            dtype=np.int32)
        # Malmo Parameters
        self.agent_host = MalmoPython.AgentHost()
        try:
            self.agent_host.parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_host.getUsage())
            exit(1)

        # DiamondCollector Parameters
        self.obs = None
        self.cur_pos = (0, 0, 0)
        self.episode_step = 0
        self.episode_return = 0
        self.returns = []
        self.steps = []

        # ADDED
        self.prev_pos = (0, 0, 0)
        self.temp_pos = (0, 0, 0)
Exemplo n.º 18
0
    def __init__(self, size, obs_size, num_entities=5, episodes=100):
        self.size = size
        self.obs_size = obs_size
        self.num_entities = num_entities
        self.num_entities_copy = num_entities  #copy it to make sure each mission has the right number of zombies in the beginning of each episode
        self.episodes = episodes

        # Create default Malmo objects:

        #the agent
        self.agent_host = MalmoPython.AgentHost()
        try:
            self.agent_host.parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_host.getUsage())
            exit(1)
        if self.agent_host.receivedArgument("help"):
            print(self.agent_host.getUsage())
            exit(0)

        self.world_state = self.agent_host.getWorldState()
        self.is_mission_running = self.world_state.is_mission_running
        self.cobblestone_wall = 0
Exemplo n.º 19
0
    def __init__(self, env_config):
        # Static Parameters
        self.max_episode_steps = 100
        self.log_frequency = 10

        # Rllib Parameters
        #self.action_space = Discrete(len(self.action_dict))
        self.action_space = Box(-1, 1, shape=(3, ), dtype=np.float32)
        self.observation_space = Box(0,
                                     1,
                                     shape=(2 * self.obs_size *
                                            self.obs_size, ),
                                     dtype=np.float32)

        # Malmo Parameters
        self.agent_host = Malmo.AgentHost()

        # DiamondCollector Parameters
        self.obs = None
        self.allow_break_action = False
        self.episode_step = 0
        self.episode_return = 0
        self.returns = []
        self.steps = []
Exemplo n.º 20
0
                                    <AgentHandlers>
                                      <ObservationFromFullStats/>
                                      <DiscreteMovementCommands/>
                                      <AbsoluteMovementCommands/>
                                      <AgentQuitFromTouchingBlockType>
                                        <Block type="torch"/>
                                      </AgentQuitFromTouchingBlockType>
                                      <ChatCommands/>
                                      <MissionQuitCommands/>
                                    </AgentHandlers>
                              </AgentSection>
                            </Mission>'''

        # Create default Malmo objects:

        agent_host = MalmoPython.AgentHost()
        try:
            agent_host.parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(agent_host.getUsage())
            exit(1)
        if agent_host.receivedArgument("help"):
            print(agent_host.getUsage())
            exit(0)

        my_mission = MalmoPython.MissionSpec(missionXML, True)
        my_mission_record = MalmoPython.MissionRecordSpec()

        # Attempt to start a mission:
        max_retries = 3
Exemplo n.º 21
0
def setupMinecraft():
    '''
    Setup the Minecraft environment
    NOTE: action space relies heavily on the coordinate system and minecraft has a weird coord system
    '''
    # 0: up, 1: up, 2: down, 3: left, 4: right
    action_space = ["move 0", "move 1", "move -1", "strafe -1", "strafe 1"]

    # Create default Malmo objects:
    agent_host = MalmoPython.AgentHost()
    try:
        agent_host.parse(sys.argv)
    except RuntimeError as e:
        print('ERROR:', e)
        print(agent_host.getUsage())
        exit(1)
    if agent_host.receivedArgument("help"):
        print(agent_host.getUsage())
        exit(0)

    # Set up the mission
    mission_file = './mission.xml'
    with open(mission_file, 'r') as f:
        print("Loading mission from %s" % mission_file)
        mission_xml = f.read()
        my_mission = MalmoPython.MissionSpec(mission_xml, True)

    # Force reset of the environment, generate a brand new world every episode
    my_mission.forceWorldReset()

    # Python code for alterations to the environment
    my_mission.drawCuboid(-1, 106, -1, GRID_SIZE, 106, GRID_SIZE, "lava")
    my_mission.drawCuboid(-1, 107, -1, GRID_SIZE, 107, GRID_SIZE, "obsidian")
    # my_mission.drawCuboid(0, 108, 0, GRID_SIZE-1, 110, GRID_SIZE-1, "air") # makes steve move

    # Generating the map
    gridmap = reset_map(GRID_SIZE, MAP_PATH)
    for j in range(GRID_SIZE):
        for i in range(GRID_SIZE):
            if gridmap[j][i] == '1':
                my_mission.drawBlock(i, 108, j, "stone")
                my_mission.drawBlock(i, 109, j, "stone")

    for j in range(GRID_SIZE):
        for i in range(GRID_SIZE):
            if gridmap[j][i] == '2':
                my_mission.drawBlock(i, 107, j, "lava")
                pass

    # Placing diamonds on map
    diamond_spots = [(4, 6), (0, 0), (5, 1), (9, 2), (7, 8), (0, 9), (7, 4),
                     (8, 0), (1, 6), (8, 6)]

    for d in diamond_spots:
        my_mission.drawItem(d[0], 109, d[1], "diamond")

    # Extra aesthetics
    my_mission.drawBlock(-1, 111, -1, "torch")
    my_mission.drawBlock(-1, 111, GRID_SIZE, "torch")
    my_mission.drawBlock(GRID_SIZE, 111, -1, "torch")
    my_mission.drawBlock(GRID_SIZE, 111, GRID_SIZE, "torch")

    # Idea for another mission
    # my_mission.drawLine(0, 107, 8, 15, 107, 8, "netherrack")
    # my_mission.drawBlock(8, 108, 8, "fire")

    # Can't add a door properly, only adding half a door?
    # my_mission.drawBlock(11, 108, 6, "wooden_door")
    # my_mission.drawBlock(11, 109, 6, "wooden_door")

    # Placing Steve in the map
    x = np.random.randint(0, 9) + 0.5
    z = np.random.randint(0, 9) + 0.5
    # my_mission.startAt(x, 108, z)
    my_mission.startAt(4.5, 108, 3.5)

    my_mission_record = MalmoPython.MissionRecordSpec()

    print(my_mission.getSummary())

    return agent_host, my_mission, my_mission_record, action_space
Exemplo n.º 22
0
    if command == 2:
        return "go there"

    if command == 3:
        return "follow"

    if command == 4:
        return "sit"

    if command == 5:
        return -1


if __name__ == '__main__':
    # Create default Malmo objects:
    agent_host = MalmoPython.AgentHost()
    #scout_ai = ScoutAI(agent_host)
    scout_ai = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host)
    commandQueue = CommandQueue()
    prev_command = 0
    counter = 0

    my_mission = MalmoPython.MissionSpec(buildEnvironment(), True)
    my_mission.allowAllChatCommands()
    client_pool = MalmoPython.ClientPool()
    client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000))
    # client_pool.add(MalmoPython.ClientInfo( "127.0.0.1", 10001) )
    agent_host_record = MalmoPython.MissionRecordSpec()
    # scout_record = MalmoPython.MissionRecordSpec()
import sys
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Flatten, Convolution2D, Permute
from keras.optimizers import Adam

from past.utils import old_div
from malmo import MalmoPython

import logging
from malmo import malmoutils
import traceback

malmoutils.fix_print()

# initalize two agents
agent_host1 = MalmoPython.AgentHost()
malmoutils.parse_command_line(agent_host1)
recordingsDirectory1 = malmoutils.get_recordings_directory(agent_host1)

agent_host2 = MalmoPython.AgentHost()
malmoutils.parse_command_line(agent_host2)
recordingsDirectory2 = malmoutils.get_recordings_directory(agent_host2)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

# client pool
client_pool = MalmoPython.ClientPool()
client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))
client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001))
Exemplo n.º 24
0
def main():
    # Start mission
    # Create default Malmo objects:
    global agent_host
    global matrix2dOriginal
    global maze_map
    global actionHistCounter
    agent_host = MalmoPython.AgentHost()
    try:
        agent_host.parse(sys.argv)
    except RuntimeError as e:
        print('ERROR:', e)
        print(agent_host.getUsage())
        exit(1)
    if agent_host.receivedArgument("help"):
        print(agent_host.getUsage())
        exit(0)

    # The following one line is for setting how many times you want the agent to repeat
    num_repeats = 50

    esFile = open("Eval_Stats.txt", "w+")
    esFile.write("\n")
    esFile.close()

    esFile = open("Eval_Stats.txt", "a")

    trFile = open("training_result.txt", "w+")
    trFile.write("\n")
    trFile.close()

    trFile = open("training_result.txt", "a")

    for i in range(num_repeats):
        esFile.write("Run #" + str(i + 1) + "\n")
        actionHistCounter = i + 1
        # size = int(6 + 0.5*i)
        print("Size of maze:", size_of_maze)
        #my_mission = MalmoPython.MissionSpec(get_mission_xml("0", 0.4 + float(i/20.0), size_of_maze, 0), True)
        randomDif = random.uniform(-0.2, 0.2)

        print("Parameters of the mission:", str(i), "next:", 0.4 + randomDif,
              "size:", size_of_maze)
        my_mission = MalmoPython.MissionSpec(
            get_mission_xml(str(i), 0.4 + randomDif, size_of_maze, 0), True)
        # my_mission = MalmoPython.MissionSpec(get_mission_xml(), True)
        my_mission_record = MalmoPython.MissionRecordSpec()
        my_mission.requestVideo(800, 500)
        my_mission.setViewpoint(1)
        # Attempt to start a mission:
        max_retries = 3
        my_clients = MalmoPython.ClientPool()
        my_clients.add(MalmoPython.ClientInfo(
            '127.0.0.1', 10000))  # add Minecraft machines here as available

        for retry in range(max_retries):
            try:
                agent_host.startMission(my_mission, my_clients,
                                        my_mission_record, 0,
                                        "%s-%d" % ('Moshe', i))
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    print("Error starting mission", (i + 1), ":", e)
                    exit(1)
                else:
                    time.sleep(2)

        # Loop until mission starts:
        print(
            "Waiting for the mission",
            (i + 1),
            "to start ",
        )
        world_state = agent_host.getWorldState()
        while not world_state.has_mission_begun:
            #sys.stdout.write(".")
            time.sleep(0.1)
            world_state = agent_host.getWorldState()
            for error in world_state.errors:
                print("Error:", error.text)

        print()
        print("Mission", (i + 1), "running.")

        grid = load_grid(world_state, agent_host)
        # print("World State Grid:", grid)
        print("Size of actual map:", len(grid))

        maze_map = get_maze_map(grid)
        print("maze map:", len(maze_map))
        #print(maze_map[244])
        #The maze construction
        matrix2dOriginal = maze_to_2dMatrix(maze_map, size_of_maze)
        matrix2d = maze_to_2dMatrix_reversed(maze_map, size_of_maze)
        print("the matrix 2d: ", matrix2d)
        matrixArray = matrix2d.flatten()

        start_and_end_positions_in_actual_map = find_start_end(grid)

        print("size of maze map:", len(maze_map))
        print("first position in actual map:", first_block_index_in_actual_map)
        print("last position in actual map:", last_block_index_in_actual_map)

        global agent_current_position_xy_in_maze, agent_current_position_index_in_grid

        agent_current_position_xy_in_maze = get_xy_position_of_maze_map_by_position_of_actual_map(\
            start_and_end_positions_in_actual_map[0], \
                grid)

        print("Started: agent current position(xy in maze):",
              agent_current_position_xy_in_maze)

        agent_current_position_index_in_grid = get_position_of_actual_map_by_xy_position_of_maze_map(\
            agent_current_position_xy_in_maze, grid)

        print("Started: agent current position(index in grid):", agent_current_position_index_in_grid \
            , "compared with real position:", start_and_end_positions_in_actual_map[0])

        index_of_yaw = my_mission.getAsXML(True).index("yaw")
        yaw_of_agent = int(
            re.compile("(\d+)").match(
                my_mission.getAsXML(True)[index_of_yaw + 5:index_of_yaw +
                                          8]).group(1))
        sync_agent_direction_with_yaw(yaw_of_agent)

        print("Started: agent current yaw(face to where):", yaw_of_agent)

        # go_to_goal_and_finish_mission(grid, start_and_end_positions_in_actual_map[0], \
        #     start_and_end_positions_in_actual_map[1], world_state, agent_host, i)

        print("Started: How many walkable blocks in front of agent's direction:", agent_current_direction, "is walk able? Answer:", \
            get_num_of_walkable_blocks_in_front_of_agent(agent_current_position_xy_in_maze, size_of_maze, grid))

        # test_moving(agent_host, [3, 3, 0, 3, 3, 0, 3])

        positionTransition(grid, matrixArray, yaw_of_agent, size_of_maze)

        trainingStart = time.time()

        trainingProcess = Process(target=missionTrainingStart,
                                  args=(actionHistCounter, ))
        trainingProcess.start()

        stringList = []
        is_complete_action_history = False

        curr_action_counter = 0

        while True:
            if not is_complete_action_history:

                actionHistFile = None
                while True:
                    try:
                        actionHistFile = open(
                            "action_history_" + str(actionHistCounter) +
                            "_.txt", "r")
                        stringList = actionHistFile.readlines()
                        if len(stringList) != 0:
                            break
                    except:
                        continue

                #actionHistFile = open("action_history_"+str(actionHistCounter)+"_.txt", "r")

                #stringList = actionHistFile.readlines()
                print("Reading action history file, get string: ", stringList)
                curr_action_list = stringList[0].split(' ')
                actionHistFile.close()

            print("Here is the list length:", len(curr_action_list),
                  curr_action_counter + 1)

            try:
                if (len(curr_action_list) >= curr_action_counter + 1):
                    action = curr_action_list[curr_action_counter]
                    convertAction = directionConvert(int(action[0]))
                    test_moving(agent_host, [convertAction], grid)
                    curr_action_counter += 1
            except ValueError:
                # The last index of action is a newline character
                break

            if (stringList[len(stringList) - 1] == "END"):
                is_complete_action_history = True

            if (is_complete_action_history
                    and len(curr_action_list) == curr_action_counter - 1):
                break

        trainingProcess.join()
        trainingEnd = time.time()
        trainingElapsed = trainingEnd - trainingStart
        esFile.write("Training Time: " + str(trainingElapsed) + " ")

        #actionHistFile.close()
        '''
        print(stringList)
        actionCollection = []
        positionCollection = []
        for n in range(0, len(stringList)-1):
            tmp = stringList[n].split(' ')
            for m in range(0,len(tmp)-1):
                L = tmp[m].split(',')
                actionCollection.append(L[0])
                positionCollection.append([L[1],L[2]])

        print('The original: ',actionCollection)
        print(positionCollection)
        '''
        """
        del stringList[-1]
        for string in stringList:
            actionCollection = string.split(' ')
            del actionCollection[-1]
            for aindex in range(len(actionCollection)):
                converted = directionConvert(int(actionCollection[aindex]))
                actionCollection[aindex] = converted
            actionList.append(actionCollection)
        """
        """
        for testingset in actionList:
            #check if it's reachable

            test_moving(agent_host, testingset)
        """
        '''
        actionList = []
        
        for index in range(len(actionCollection)):
            row,col = positionCollection[index][0], positionCollection[index][1]
            action = actionCollection[index]
            print(matrix2d[int(row)][int(col)])
            if matrix2d[int(row)][int(col)] == 0:
                convertAction = directionConvert(int(action))
                actionList.append(convertAction)
        #print('THIS IS THE ACTION: ',len(actionList), actionList)
        
        print('The list:', actionList)
        #raise('STOP HERE')
        test_moving(agent_host, actionList, grid)
        '''

        print(
            "Training complete. Training result can be found in training_result.txt."
        )

        travelStart = time.time()
        go_to_goal_and_finish_mission(grid, agent_current_position_index_in_grid, \
             start_and_end_positions_in_actual_map[1], world_state, agent_host, i)
        travelEnd = time.time()
        travelElapsed = travelEnd - travelStart
        esFile.write("Agent Travel Time: " + str(travelElapsed) + "\n\n")

        print("Aiku did it!")

    trFile.close()
    esFile.close()
import json
import logging
import math
import os
import random
import sys
import time
import re
import uuid
from collections import namedtuple
from operator import add

EntityInfo = namedtuple('EntityInfo', 'x, y, z, name')

# Create one agent host for parsing:
agent_hosts = [MalmoPython.AgentHost()]

# Parse the command-line options:
agent_hosts[0].addOptionalFlag("debug,d", "Display debug information.")
agent_hosts[0].addOptionalIntArgument(
    "agents,n", "Number of agents to use, including observer.", 4)

PICKAXE_POS = [292, 436]
D_TOOL_POS = [290, 436]
try:
    agent_hosts[0].parse(sys.argv)
except RuntimeError as e:
    print('ERROR:', e)
    print(agent_hosts[0].getUsage())
    exit(1)
if agent_hosts[0].receivedArgument("help"):
Exemplo n.º 26
0
def main(model=None, mode='train', start_episode=0):
    my_xml = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
    <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
      <About>
        <Summary>Hill Descent.</Summary>
      </About>
      <ModSettings>
        <MsPerTick>20</MsPerTick>
      </ModSettings>
      <ServerSection>

        <ServerInitialConditions>

            <Time><StartTime>1</StartTime></Time>
        </ServerInitialConditions>
        <ServerHandlers>

          <DefaultWorldGenerator seed="-999595225643433963" forceReset="false" destroyAfterUse="false" />

          <ServerQuitFromTimeUp timeLimitMs="100000000"/>
          <ServerQuitWhenAnyAgentFinishes/>
        </ServerHandlers>
      </ServerSection>
      <AgentSection mode="Survival">
        <Name>Bob</Name>
        <AgentStart>
          <Placement x="28.5" y="87" z="330.5" pitch="-90" yaw="0"/>
        </AgentStart>
        <AgentHandlers>
          <DiscreteMovementCommands/>
          <MissionQuitCommands quitDescription="done"/>
          <ChatCommands/>
          <ObservationFromFullStats/>
          <ObservationFromGrid>
              <Grid name="sight">
                  <min x="{}" y="{}" z="{}"/>
                  <max x="{}" y="{}" z="{}"/>
              </Grid>
              <Grid name="feet">
                  <min x="0" y="-1" z="0"/>
                  <max x="0" y="-1" z="0"/>
              </Grid>
      </ObservationsationFromGrid>
          <AgentQuitFromTouchingBlockType>
              <Block type="cobblestone" />
          </AgentQuitFromTouchingBlockType>
        </AgentHandlers>
      </AgentSection>
    </Mission>

    '''.format(-(grid_width - 1) // 2, -grid_height, -(grid_width - 1) // 2,
               (grid_width - 1) // 2, grid_height, (grid_width - 1) // 2)

    batch_size = 100
    agent = DQNAgent(state_size, action_size, learning_rate, discount_rate,
                     epsilon, epsilon_min, epsilon_decay)
    if model != None:
        agent.load(model)
        if mode == 'test':
            agent.epsilon = 0.0
        print('loaded model: {}'.format(model))
    else:
        clear_csv('./data/results.csv')
        clear_csv('./data/moves.csv')

    my_client_pool = MalmoPython.ClientPool()
    my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001))
    agent_host = MalmoPython.AgentHost()

    for e in range(start_episode + 1, episodes + 1):
        my_mission = MalmoPython.MissionSpec(my_xml, True)
        my_mission_record = MalmoPython.MissionRecordSpec()
        my_mission.requestVideo(800, 500)
        my_mission.setViewpoint(2)
        print("Waiting for the mission to start", end=' ')
        agent_host.startMission(
            my_mission,
            my_mission_record,
        )
        world_state = agent_host.getWorldState()
        while not world_state.has_mission_begun:
            print(".", end="")
            time.sleep(0.1)
            world_state = agent_host.getWorldState()
            for error in world_state.errors:
                print("Error:", error.text)
        print()
        agent_host.sendCommand('chat /kill @e[type=Chicken]')
        agent_host.sendCommand('chat /kill @e[type=Pig]')
        agent_host.sendCommand('chat /kill @e[type=Cow]')
        moves = 0
        episode_reward = 0

        while world_state.is_mission_running:
            world_state = agent_host.getWorldState()
            if world_state.number_of_observations_since_last_state > 0:
                try:
                    obvsText = world_state.observations[-1].text
                    data = json.loads(obvsText)
                except:
                    print("Error when getting state")
                    continue

                state = get_state(data)

                prev_x = data.get(u'XPos', 0)
                prev_y = data.get(u'YPos', 0)
                prev_z = data.get(u'ZPos', 0)

                useful_state = [state[2], state[6], state[7], state[8], \
                    state[10], state[11], state[13], \
                    state[14], state[16], state[17], \
                    state[18], state[22]]

                action = agent.act(useful_state)

                if ((action == 0 and state[grid_center - grid_width] == 0)
                        or (action == 1 and state[grid_center + 1] == 0) or
                    (action == 2 and state[grid_center + grid_width] == 0)
                        or (action == 3 and state[grid_center - 1] == 0)):
                    agent_host.sendCommand(jump_directions[action])
                else:
                    agent_host.sendCommand(directions[action])
                time.sleep(0.25)
                #print("North:", state[grid_center - grid_width], \
                #      "  East:", state[grid_center + 1], \
                #      "  South:", state[grid_center + grid_width], \
                #      "  West:", state[grid_center - 1])

                try:
                    world_state = wait_world_state(agent_host, world_state)
                    obvsText = world_state.observations[-1].text
                    data = json.loads(obvsText)
                except:
                    print("Error when getting state")
                    continue

                current_x = data.get(u'XPos', 0)
                current_y = data.get(u'YPos', 0)
                current_z = data.get(u'ZPos', 0)
                damage_taken = calculate_damage(prev_y, current_y)
                next_state = get_state(data)

                useful_next_state = [state[2], state[6], state[7], state[8], \
                    state[10], state[11], state[13], \
                    state[14], state[16], state[17], \
                    state[18], state[22]]

                # print("previous and current y", prev_y, current_y)
                # print("damage taken", damage_taken)
                #print("X:", prev_x, current_x, "\n", \
                #      "Y:", prev_y, current_y, "\n", \
                #      "Z:", prev_z, current_z, "\n")
                reward = 2 * (
                    prev_y - current_y
                ) - 50 * damage_taken - 1 if prev_x != current_x or prev_y != current_y or prev_z != current_z else -1000
                episode_reward += reward
                done = True if current_y <= goal_height or not world_state.is_mission_running or data[
                    'Life'] <= 0 else False

                agent.remember(useful_state, action, reward, useful_next_state,
                               done)
                if ((action == 0 and state[grid_center - grid_width] == 0)
                        or (action == 1 and state[grid_center + 1] == 0) or
                    (action == 2 and state[grid_center + grid_width] == 0)
                        or (action == 3 and state[grid_center - 1] == 0)):
                    print(
                        'episode {}/{}, action: {}, reward: {}, e: {:.2}, move: {}, done: {}'
                        .format(e, episodes, jump_directions[action], reward,
                                agent.epsilon, moves, done))
                else:
                    print(
                        'episode {}/{}, action: {}, reward: {}, e: {:.2}, move: {}, done: {}'
                        .format(e, episodes, directions[action], reward,
                                agent.epsilon, moves, done))
                moves += 1

                if mode == 'train' or model == None:
                    write_to_csv('./data/moves.csv',
                                 [e, current_x, current_y, current_z, reward])

                if e > batch_size:
                    agent.replay(batch_size)

                if done or moves > max_moves:
                    agent_host.sendCommand("quit")

        if (mode == 'train'
                or model == None) and (e in checkpoints
                                       or agent.epsilon <= epsilon_min):
            print('saving model at episode {}'.format(e))
            agent.save('./models/model_{}'.format(e))
            if agent.epsilon <= epsilon_min:
                break

        time.sleep(1)
        # my_mission.forceWorldReset()
        if mode == 'train' or model == None:
            write_to_csv('./data/results.csv',
                         [e, episode_reward, moves,
                          int(episode_reward > 0)])
Exemplo n.º 27
0
    def __init__(self, env_config):
        # Static Parameters
        self.size = 10
        self.reward_density = .1
        self.penalty_density = .02
        self.obs_size = 5
        self.max_global_steps = (self.size * 2)**2
        self.log_frequency = 10
        self.action_dict = {
            0: 'move 1',  # Move one block forward
            1: 'turn 1',  # Turn 90 degrees to the right
            2: 'turn -1',  # Turn 90 degrees to the left
            3: 'attack 1',  # Destroy block
            4: 'jumpmove 1'  # Jump up and move forward 1 block
        }
        self.blocks_dict = {
            "redstone_ore": 1,
            "coal_ore": 2,
            "emerald_ore": 3,
            "iron_ore": 4,
            "gold_ore": 5,
            "diamond_ore": 6,
            "lava": -1,
            "flowing_lava": -1
        }

        # Rllib Parameters
        self.action_space = Discrete(len(self.action_dict))
        self.observation_space = Box(-1,
                                     6,
                                     shape=(np.prod(
                                         [2, self.obs_size, self.obs_size]), ),
                                     dtype=np.int32)

        # Malmo Parameters
        self.agent_host = MalmoPython.AgentHost()
        try:
            self.agent_host.parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_host.getUsage())
            exit(1)

        # ResourceCollector Parameters
        self.obs = None
        self.obsdict = None  # Stores last json loaded observation
        self.episode_step = 0
        self.episode_return = 0
        self.returns = []
        self.resources_collected = {
            "diamond": [0],
            "redstone": [0],
            "coal": [0],
            "emerald": [0],
            "iron_ore": [0],
            "gold_ore": [0]
        }
        self.deaths = []
        self.death_occurred = False
        self.steps = []
        self.episode_start = time.time()
        self.episode_end = time.time()
Exemplo n.º 28
0
from utility import safeStartMission, reload, updateWorldState
from basicAgent import basic_agent
import os
import sys
import time

NUM_OF_GAMES = 100

if sys.version_info[0] == 2:
    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w',
                           0)  # flush print output immediately
else:
    import functools
    print = functools.partial(print, flush=True)
# Create default Malmo objects:
agent_host = MalmoPython.AgentHost()
opponent_host = MalmoPython.AgentHost()
spectator = MalmoPython.AgentHost()

try:
    spectator.parse(sys.argv)
except RuntimeError as e:
    print('ERROR:', e)
    print(spectator.getUsage())
    exit(1)
if spectator.receivedArgument("help"):
    print(spectator.getUsage())
    exit(0)
mission_file = './simple_arena.xml'
with open(mission_file, 'r') as f:
    print("Loading mission from %s" % mission_file)
Exemplo n.º 29
0
def doXML(area):
    missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
                <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
                  <About>
                    <Summary>Hello world!</Summary>
                  </About>
                  <ServerSection>
                    <ServerHandlers>
                      <FlatWorldGenerator generatorString="3;7,0,5*3,2;3;,biome_1" forceReset="true"/>
                      <DrawingDecorator>
                        ''' + placeBottom(area) + '''
                      </DrawingDecorator>
                    </ServerHandlers>
                  </ServerSection>
                  <AgentSection mode="Creative">
                    <Name>SketchyAI</Name>
                    <AgentStart>
                        ''' + '<Placement x="{0}" y="{1}" z="{2}" yaw="0"/>'.format(.5, 30, .5) + '''
                    </AgentStart>
                    <AgentHandlers>
                      <ObservationFromFullStats/>
                      <ContinuousMovementCommands turnSpeedDegs="180"/>
                    </AgentHandlers>
                  </AgentSection>
                </Mission>'''
    agent_host = MalmoPython.AgentHost()
    try:
        agent_host.parse(sys.argv)
    except RuntimeError as e:
        print('ERROR:', e)
        print()
        agent_host.getUsage()
        exit(1)
    if agent_host.receivedArgument("help"):
        print()
        agent_host.getUsage()
        exit(0)

    my_mission = MalmoPython.MissionSpec(missionXML, True)
    my_mission_record = MalmoPython.MissionRecordSpec()
    my_mission.setModeToSpectator()

    # Attempt to start a mission:
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_mission_record)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print ("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2)

    # Loop until mission starts:
    print ("Waiting for the mission to start ",
    world_state = agent_host.getWorldState())
    while not world_state.has_mission_begun:
        sys.stdout.write(".")
        time.sleep(0.1)
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print ("Error:", error.text)

    print()
    print ("Mission running ")

    # Loop until mission ends:
    while world_state.is_mission_running:
        sys.stdout.write(".")
        time.sleep(0.1)
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print ("Error:", error.text)

    print()
    print ("Mission ended")
Exemplo n.º 30
0
    def __init__(self,
                 missionXML,
                 n_games=500,
                 max_retries=3,
                 starting_zombies=1,
                 XSize=10,
                 ZSize=10,
                 aggregate_episode_every=5,
                 agent_search_resolution=30,
                 load_model=False):
        # keras attributes
        self.n_games = n_games

        self._init_logger()

        # keras
        self.n_actions = 4
        self.agent = Agent(gamma=0.99,
                           epsilon=1.0,
                           alpha=0.0005,
                           input_dims=7,
                           n_actions=self.n_actions,
                           mem_size=1000000,
                           batch_size=64,
                           epsilon_end=0.01)
        self._load_dqn_model(load_model)

        self.scores = []
        self.eps_history = []
        self.aggregate_episode_every = aggregate_episode_every

        # qtable
        self.Qtb = {}
        self._load_qtable(load_model)
        self.epsilon = 0.01  # chance of taking a random action instead of the best

        # agent
        self.agent_host = MalmoPython.AgentHost()

        try:
            self.agent_host.parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_host.getUsage())
            exit(1)

        # mission
        self.missionXML = missionXML
        # self._validate_mission()

        self.max_retries = max_retries

        #adding clients
        self.my_client_pool = None
        # self._add_starters()
        self._add_default_client()

        self.world_state = None

        # mission generator
        self.mission_generator = MissionGenerator(self.missionXML)
        self.starting_zombies = starting_zombies
        self.num_zombies = starting_zombies
        self.zombie_difference = 0  # for reward calculation
        self.XSize = XSize
        self.ZSize = ZSize

        # canvas
        self.visual = Visualizer(arena_width=self.XSize,
                                 arena_breadth=self.ZSize)

        # direction learner variables
        self.agent_search_resolution = agent_search_resolution
        self.agent_stepsize = 1
        self.agent_turn_weight = 100
        self.agent_edge_weight = -100
        self.agent_mob_weight = -10
        self.agent_turn_weight = 0  # Negative values to penalise turning, positive to encourage.
        self.turning_diff = 0

        # for visualization
        self.flash = False
        self.current_life = 0

        # main loop variables
        self.self_x = 0
        self.self_z = 0
        self.current_yaw = 0
        self.ob = None
        self.all_zombies_dead = False
        self.num_heals = 0
        self.life_decrease_penalty = 0
        self.TimeAlive = 0
        self.time_rewards = 0
        self.heal_rewards = 0
        self.move_backwards_reward = 0