Пример #1
0
    def run(self):
        """Runs the game with the registered agents

        Raises:
            :class:`jason_malmo.exceptions.NoAgentsException`: There are not registered agents in the game.\n
                Register an agent before running the game::

                    game.register('/path/to/file.asl')
                    game.run()
        """
        self._client_pool = MalmoPython.ClientPool()

        if not len(self._agents):
            raise NoAgentsException

        for port in range(10000, 10000 + len(self._agents) + 1):
            self._client_pool.add(MalmoPython.ClientInfo('127.0.0.1', port))

        self._my_mission = MalmoPython.MissionSpec(self._get_mission_xml(),
                                                   True)

        for (index, agent) in enumerate(self._agents):
            malmoutils.parse_command_line(agent.malmo_agent)
            self._safe_start_mission(
                agent.malmo_agent, self._my_mission, self._client_pool,
                malmoutils.get_default_recording_object(
                    agent.malmo_agent, "saved_data"), index, '')
        self._safe_wait_for_start(
            [agent.malmo_agent for agent in self._agents])

        threads = []
        for agent in self._agents:
            thr = threading.Thread(target=self._jason_env.run_agent,
                                   args=(agent, ),
                                   kwargs={})
            thr.start()
            threads.append(thr)

        # TODO while mission is running
        while True:
            for agent in self._agents:
                for (belief, value) in agent.beliefs.items():
                    if belief[0] == 'tasks':
                        tasks = []
                        for task in list(value)[0].args[0]:
                            tasks.append(task)
                        self.tasks.handle(agent, tasks)
            time.sleep(0.05)
Пример #2
0
def startMission(agent_host, xml):
    my_mission = MalmoPython.MissionSpec(xml, True)
    my_mission_record = malmoutils.get_default_recording_object(agent_host, "teleport_results")
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission( my_mission, my_mission_record )
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission",e)
                print("Is the game running?")
                exit(1)
            else:
                time.sleep(2)

    world_state = agent_host.peekWorldState()
    while not world_state.has_mission_begun:
        time.sleep(0.1)
        world_state = agent_host.peekWorldState()
        for error in world_state.errors:
            print("Error:",error.text)
        if len(world_state.errors) > 0:
            exit(1)
Пример #3
0
if agent_host.receivedArgument("test"):
    num_reps = 1
else:
    num_reps = 30000

current_yaw = 0
best_yaw = 0
current_life = 0

for iRepeat in range(num_reps):
    mission_xml = getMissionXML(MOB_TYPE + " Apocalypse #" + str(iRepeat))
    my_mission = MalmoPython.MissionSpec(mission_xml, validate)
    max_retries = 3
    # Set up a recording
    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "Mission_" + str(iRepeat))
    for retry in range(max_retries):
        try:
            # Attempt to start the mission:
            agent_host.startMission(my_mission, my_client_pool,
                                    my_mission_record, 0, "predatorExperiment")
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission", e)
                print("Is the game running?")
                exit(1)
            else:
                time.sleep(2)

    world_state = agent_host.getWorldState()
Пример #4
0
frameTest = []
wallclockTimes = []
distances = []

agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)

print("WELCOME TO THE OVERCLOCK TEST")
print("=============================")
print("This will run the same simple mission with " + str(len(tickLengths)) + " different tick lengths.")
print("(Each test should run faster than the previous one.)")

for iRepeat in range(len(tickLengths)):
    msPerTick = tickLengths[iRepeat]
    my_mission = MalmoPython.MissionSpec(GetMissionXML(str(msPerTick)),validate)
    # Set up a recording
    my_mission_record = malmoutils.get_default_recording_object(agent_host, "Overclock_Test_" + str(msPerTick) + "ms_per_tick");
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission( my_mission, my_mission_record )
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        time.sleep(0.1)
Пример #5
0
num_reps = 300

#=======core part initialization====================================
#input size 5*5, you can change the size here
memory = MemoryD(5)
network_model, q_values_func = nn_model(input_shape=[5, 5])

agent = Agent(network_model, q_values_func, memory, 'train', 'ddqn')
#set learning rate to be 0.00025
agent.do_compile(optimizer=Adam(lr=0.00025), loss_func=mean_huber_loss)
agent.memoryD.clear()
#===================================================================

for iRepeat in range(num_reps):
    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "./Mission_{}".format(iRepeat + 1))
    #my_mission_record = MalmoPython.MissionRecordSpec('./' + "Mission_" + str(iRepeat) + ".tgz")
    #my_mission_record.recordRewards()
    #my_mission_record.recordMP4(24,400000)
    #my_mission_record.recordObservations()
    my_mission = MalmoPython.MissionSpec(GetMissionXML(mapblock, agent_host),
                                         validate)

    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_mission_record)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
if agent_host.receivedArgument("test"):
    print("Using test settings (overrides other command-line arguments).")
    NUM_REPEATS = 1
    WAIT_TIME = 0.2
    STOP = True
    PATH_LENGTH = 20

agent_host.setObservationsPolicy(
    MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS)
agent_host.setRewardsPolicy(MalmoPython.RewardsPolicy.KEEP_ALL_REWARDS)

for iRepeat in range(NUM_REPEATS):
    my_mission = MalmoPython.MissionSpec(
        GetMissionXML(iRepeat, malmoutils.get_video_xml(agent_host)), validate)
    # Set up a recording
    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "QuitFromReachingPosition_Test" + str(iRepeat))
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_mission_record)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        time.sleep(0.01)
Пример #7
0
datarate_onscreen=[]

agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)

print("WELCOME TO THE RENDER SPEED TEST")
print("================================")
print("This will run the same simple mission with " + str(len(sizes)) + " different frame sizes.")

for iRepeat in range(len(sizes) * 2):
    prioritiseOffscreen = "true" if iRepeat % 2 else "false"
    width,height = sizes[old_div(iRepeat,2)]
    if iRepeat % 2:
        num_pixels.append(width*height)
    my_mission = MalmoPython.MissionSpec(GetMissionXML(str(width), str(height), prioritiseOffscreen), validate)
    # Set up a recording
    my_mission_record = malmoutils.get_default_recording_object(agent_host, "RenderSpeed_Test" + str(iRepeat));
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission( my_mission, my_mission_record )
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        time.sleep(0.1)
Пример #8
0
agent_host.setObservationsPolicy(
    MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)

print("WELCOME TO THE OVERCLOCK TEST")
print("=============================")
print("This will run the same simple mission with " + str(len(tickLengths)) +
      " different tick lengths.")
print("(Each test should run faster than the previous one.)")

for iRepeat in range(len(tickLengths)):
    msPerTick = tickLengths[iRepeat]
    my_mission = MalmoPython.MissionSpec(GetMissionXML(str(msPerTick)),
                                         validate)
    # Set up a recording
    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "Overclock_Test_" + str(msPerTick) + "ms_per_tick")
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_mission_record)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        time.sleep(0.1)
Пример #9
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return

    malmoutils.fix_print()

    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec()
    my_mission.timeLimitInSeconds( 10 )
    my_mission.requestVideo( 320, 240 )
    my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info, "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:",error.text)
    print()

    last_delta = time.time()
    # main loop:
    while world_state.is_mission_running:
        agent_host.sendCommand( "move 1" )
        agent_host.sendCommand( "turn " + str(random.random()*2-1) )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state)
        if (world_state.number_of_video_frames_since_last_state > 0 or
           world_state.number_of_observations_since_last_state > 0 or
           world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info, "world state change")
        for reward in world_state.rewards:
            print("Summed reward:",reward.getValue())
        for error in world_state.errors:
            print("Error:",error.text)
        for frame in world_state.video_frames:
            print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
Пример #10
0
# -- test each action set in turn --
max_retries = 3
action_sets = ['discrete_absolute', 'discrete_relative', 'teleport']
for action_set in action_sets:

    if action_set == 'discrete_absolute':
        my_mission.allowAllDiscreteMovementCommands()
    elif action_set == 'discrete_relative':
        my_mission.allowAllDiscreteMovementCommands()
    elif action_set == 'teleport':
        my_mission.allowAllAbsoluteMovementCommands()
    else:
        print('ERROR: Unsupported action set:', action_set)
        exit(1)

    my_mission_recording = malmoutils.get_default_recording_object(
        agent_host, action_set)
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_mission_recording)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2.5)

    print("Waiting for the mission to start", end=' ')
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
Пример #11
0
# -- test each action set in turn --
max_retries = 3
action_sets = ['discrete_absolute','discrete_relative', 'teleport']
for action_set in action_sets:

    if action_set == 'discrete_absolute':
        my_mission.allowAllDiscreteMovementCommands()
    elif action_set == 'discrete_relative':
        my_mission.allowAllDiscreteMovementCommands()
    elif action_set == 'teleport':
        my_mission.allowAllAbsoluteMovementCommands()
    else:
        print('ERROR: Unsupported action set:',action_set)
        exit(1)

    my_mission_recording = malmoutils.get_default_recording_object(agent_host, action_set)
    for retry in range(max_retries):
        try:
            agent_host.startMission( my_mission, my_mission_recording )
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2.5)

    print("Waiting for the mission to start", end=' ')
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
Пример #12
0
'''Tests malmoutils.'''

import MalmoPython
import malmoutils

agentHost = MalmoPython.AgentHost()

# See if we can parse our extended command line.
malmoutils.parse_command_line(agentHost)

# As we are not recording our video xml should be an empty string.
assert malmoutils.get_video_xml(agentHost) == ''

# Test that we can get a default recording spec.
assert type(malmoutils.get_default_recording_object(agentHost, "test")) == MalmoPython.MissionRecordSpec

# Default recordings directory is ''.
assert malmoutils.get_recordings_directory(agentHost) == ''

def clientInfos(cp):
    return [(c.ip_address, c.control_port, c.command_port) for c in cp.clients]

# Test adding some client infos to a client pool.
clientPool = MalmoPython.ClientPool()
assert len(clientPool.clients) == 0
c1 = ("localhost", 10000, 0)
client1 = MalmoPython.ClientInfo(*c1)
clientPool.add(client1)
assert clientInfos(clientPool) == [c1]
c2 = ("127.0.0.1", 10001, 20001)
Пример #13
0
        my_mission.drawBlock( x,45,z,"lava")

    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available

    max_retries = 3
    agentID = 0
    expID = 'tabular_q_learning'

    num_repeats = 150
    cumulative_rewards = []
    for i in range(num_repeats):
        
        print("\nMap %d - Mission %d of %d:" % ( imap, i+1, num_repeats ))

        my_mission_record = malmoutils.get_default_recording_object(agent_host, "./save_%s-map%d-rep%d" % (expID, imap, i))

        for retry in range(max_retries):
            try:
                agent_host.startMission( my_mission, my_clients, my_mission_record, agentID, "%s-%d" % (expID, i) )
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    print("Error starting mission:",e)
                    exit(1)
                else:
                    time.sleep(2.5)

        print("Waiting for the mission to start", end=' ')
        world_state = agent_host.getWorldState()
        while not world_state.has_mission_begun:
Пример #14
0
        errors = [e for w in states for e in w.errors]
        if len(errors) > 0:
            print("Errors waiting for mission start:")
            for e in errors:
                print(e.text)
            print("Bailing now.")
            exit(1)
        time.sleep(0.1)
        print(".", end=' ')
    print()
    if time.time() - start_time >= time_out:
        print("Timed out waiting for mission to begin. Bailing.")
        exit(1)
    print("Mission has started.")

safeStartMission(agent_host1, my_mission, client_pool, malmoutils.get_default_recording_object(agent_host1, "agent_1_viewpoint_discrete"), 0, '' )
safeStartMission(agent_host2, my_mission, client_pool, malmoutils.get_default_recording_object(agent_host1, "agent_2_viewpoint_discrete"), 1, '' )
safeWaitForStart([agent_host1, agent_host2])

# perform a few actions
reps = 3
time.sleep(1)
for i in range(reps):
    agent_host1.sendCommand('attack 1')
    agent_host2.sendCommand('attack 1')
    time.sleep(1)
    agent_host1.sendCommand('use 1')
    agent_host2.sendCommand('use 1')
    time.sleep(1)
    
# wait for the missions to end    
NUM_REPEATS = 10

if agent_host.receivedArgument("test"):
    print("Using test settings (overrides other command-line arguments).")
    NUM_REPEATS = 1
    WAIT_TIME = 0.2
    STOP = True
    PATH_LENGTH = 20
 
agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS)
agent_host.setRewardsPolicy(MalmoPython.RewardsPolicy.KEEP_ALL_REWARDS)

for iRepeat in range(NUM_REPEATS):
    my_mission = MalmoPython.MissionSpec(GetMissionXML(iRepeat, malmoutils.get_video_xml(agent_host)), validate)
    # Set up a recording
    my_mission_record = malmoutils.get_default_recording_object(agent_host, "QuitFromReachingPosition_Test" + str(iRepeat));
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission( my_mission, my_mission_record )
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        time.sleep(0.01)
Пример #16
0
# ------------------------------------------------------------------------------------------------
'''Tests malmoutils.'''

import MalmoPython
import malmoutils

agentHost = MalmoPython.AgentHost()

# See if we can parse our extended command line.
malmoutils.parse_command_line(agentHost)

# As we are not recording our video xml should be an empty string.
assert malmoutils.get_video_xml(agentHost) == ''

# Test that we can get a default recording spec.
assert type(malmoutils.get_default_recording_object(
    agentHost, "test")) == MalmoPython.MissionRecordSpec

# Default recordings directory is ''.
assert malmoutils.get_recordings_directory(agentHost) == ''


def clientInfos(cp):
    return [(c.ip_address, c.control_port, c.command_port) for c in cp.clients]


# Test adding some client infos to a client pool.
clientPool = MalmoPython.ClientPool()
assert len(clientPool.clients) == 0
c1 = ("localhost", 10000, 0)
client1 = MalmoPython.ClientInfo(*c1)
clientPool.add(client1)
Пример #17
0
                <ArenaBounds>
                   <min x="-50" y="40" z="-50"/>
                   <max x="50" y="60" z="50"/>
                </ArenaBounds>
                <StartPos x="-3" y="50" z="0"/>
                <Seed>random</Seed>
                <UpdateSpeed>3</UpdateSpeed>
                <PermeableBlocks type="air obsidian"/>
                <BlockType type="beacon"/>
            </MovingTargetDecorator>'''
    else:
        return ""

# Code for telling Malmo what to do:
my_mission = MalmoPython.MissionSpec(get_mission_xml(), True)
my_mission_record = malmoutils.get_default_recording_object(agent_host, "braitenberg_test")

my_client_pool = MalmoPython.ClientPool()
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002))

# Attempt to start a mission:
max_retries = 3
for retry in range(max_retries):
    try:
        agent_host.startMission( my_mission, my_client_pool, my_mission_record, 0, "braitenberg")
        break
    except RuntimeError as e:
        if retry == max_retries - 1:
            print("Error starting mission:",e)
            exit(1)
        else:
# is using these ports):
my_client_pool = MalmoPython.ClientPool()
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000))
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001))
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002))
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003))

if agent_host.receivedArgument("test"):
    num_reps = 1
else:
    num_reps = 30000

for iRepeat in range(num_reps):
    my_mission = MalmoPython.MissionSpec(GetMissionXML("Let them eat fish/cookies #" + str(iRepeat + 1), malmoutils.get_video_xml(agent_host)),validate)
    # Set up a recording
    my_mission_record = malmoutils.get_default_recording_object(agent_host, "Mission_{}".format(iRepeat + 1))
    max_retries = 3
    for retry in range(max_retries):
        try:
            # Attempt to start the mission:
            agent_host.startMission( my_mission, my_client_pool, my_mission_record, 0, "itemDiscardTestExperiment" )
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission",e)
                print("Is the game running?")
                exit(1)
            else:
                time.sleep(2)

    world_state = agent_host.getWorldState()
Пример #19
0
                <ArenaBounds>
                   <min x="-50" y="40" z="-50"/>
                   <max x="50" y="60" z="50"/>
                </ArenaBounds>
                <StartPos x="-3" y="50" z="0"/>
                <Seed>random</Seed>
                <UpdateSpeed>3</UpdateSpeed>
                <PermeableBlocks type="air obsidian"/>
                <BlockType type="beacon"/>
            </MovingTargetDecorator>'''
    else:
        return ""

# Code for telling Malmo what to do:
my_mission = MalmoPython.MissionSpec(get_mission_xml(), True)
my_mission_record = malmoutils.get_default_recording_object(agent_host, "braitenberg_test")

# Attempt to start a mission:
max_retries = 3
for retry in range(max_retries):
    try:
        agent_host.startMission( my_mission, my_mission_record )
        break
    except RuntimeError as e:
        if retry == max_retries - 1:
            print("Error starting mission:",e)
            exit(1)
        else:
            time.sleep(2)

# Loop until mission starts:
Пример #20
0
            for e in errors:
                print(e.text)
            print("Bailing now.")
            exit(1)
        time.sleep(0.1)
        print(".", end=' ')
    print()
    if time.time() - start_time >= time_out:
        print("Timed out waiting for mission to begin. Bailing.")
        exit(1)
    print("Mission has started.")


safeStartMission(
    agent_host1, my_mission, client_pool,
    malmoutils.get_default_recording_object(agent_host1,
                                            "agent_1_viewpoint_discrete"), 0,
    '')
safeStartMission(
    agent_host2, my_mission, client_pool,
    malmoutils.get_default_recording_object(agent_host1,
                                            "agent_2_viewpoint_discrete"), 1,
    '')
safeWaitForStart([agent_host1, agent_host2])

# perform a few actions
time.sleep(1)
agent_host1.sendCommand('attack 1')
agent_host2.sendCommand('attack 1')
time.sleep(2)
agent_host1.sendCommand('attack 0')
agent_host2.sendCommand('attack 0')
Пример #21
0
import malmoutils

malmoutils.fix_print()

agent_host = MalmoPython.AgentHost()
malmoutils.parse_command_line(agent_host)

print(agent_host)

my_mission = MalmoPython.MissionSpec()
my_mission.timeLimitInSeconds( 10 )
my_mission.requestVideo( 320, 240 )
my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )
print(my_mission)

my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data")
print(my_mission_record)

max_retries = 3
for retry in range(max_retries):
    try:
        agent_host.startMission( my_mission, my_mission_record )
        break
    except RuntimeError as e:
        if retry == max_retries - 1:
            print("Error starting mission:",e)
            exit(1)
        else:
            time.sleep(2)

print("Waiting for the mission to start", end=' ')
Пример #22
0
def deep_q_learning(sess,
                    agent_host,
                    q_estimator,
                    target_estimator,
                    state_processor,
                    num_episodes,
                    experiment_dir,
                    replay_memory_size=500000,
                    replay_memory_init_size=50000,
                    update_target_estimator_every=10000,
                    discount_factor=0.99,
                    epsilon_start=1.0,
                    epsilon_end=0.1,
                    epsilon_decay_steps=50000,
                    batch_size=32,
                    record_video_every=100):
    """
    Q-Learning algorithm for off-policy TD control using Function Approximation.
    Finds the optimal greedy policy while following an epsilon-greedy policy.
    Args:
        sess: Tensorflow Session object
        env: OpenAI environment
        q_estimator: Estimator object used for the q values
        target_estimator: Estimator object used for the targets
        state_processor: A StateProcessor object
        num_episodes: Number of episodes to run for
        experiment_dir: Directory to save Tensorflow summaries in
        replay_memory_size: Size of the replay memory
        replay_memory_init_size: Number of random experiences to sampel when initializing
          the reply memory.
        update_target_estimator_every: Copy parameters from the Q estimator to the
          target estimator every N steps
        discount_factor: Gamma discount factor
        epsilon_start: Chance to sample a random action when taking an action.
          Epsilon is decayed over time and this is the start value
        epsilon_end: The final minimum value of epsilon after decaying is done
        epsilon_decay_steps: Number of steps to decay epsilon over
        batch_size: Size of batches to sample from the replay memory
        record_video_every: Record a video every N episodes
    Returns:
        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.
    """
    mission_file = agent_host.getStringArgument('mission_file')
    with open(mission_file, 'r') as f:
        print("Loading mission from %s" % mission_file)
        mission_xml = f.read()
        my_mission = MalmoPython.MissionSpec(mission_xml, True)
    my_mission.removeAllCommandHandlers()
    my_mission.allowAllDiscreteMovementCommands()
    my_mission.setViewpoint(2)
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000))  # add Minecraft machines here as available

    max_retries = 3
    agentID = 0
    expID = 'Deep_q_learning memory'



    Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"])

    # The replay memory
    replay_memory = []

    # Keeps track of useful statistics
    stats = plotting.EpisodeStats(
        episode_lengths=np.zeros(num_episodes),
        episode_rewards=np.zeros(num_episodes))

    # Create directories for checkpoints and summaries
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    checkpoint_path = os.path.join(checkpoint_dir, "model")
    monitor_path = os.path.join(experiment_dir, "monitor")

    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    if not os.path.exists(monitor_path):
        os.makedirs(monitor_path)

    saver = tf.train.Saver()
    # Load a previous checkpoint if we find one
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    if latest_checkpoint:
        print("Loading model checkpoint {}...\n".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)

    total_t = sess.run(tf.contrib.framework.get_global_step())

    # The epsilon decay schedule
    epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps)

    # The policy we're following
    policy = make_epsilon_greedy_policy(
        q_estimator,
        len(actionSet))

    my_mission_record = malmoutils.get_default_recording_object(agent_host,
                                                                "./save_%s-rep" % (expID))


    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" %(expID))
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2.5)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        print("Sleeping")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()

    # world_state = agent_host.peekWorldState()
    while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
        print("Sleeping....")
        world_state = agent_host.peekWorldState()

    world_state = agent_host.getWorldState()

    # Populate the replay memory with initial experience
    print("Populating replay memory...")

    while world_state.number_of_observations_since_last_state <= 0:
        # print("Sleeping")
        time.sleep(0.1)
        world_state = agent_host.peekWorldState()

    state = gridProcess(world_state) #MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset()
    state = state_processor.process(sess, state)
    state = np.stack([state] * 4, axis=2)

    for i in range(replay_memory_init_size):
        print("%s th replay memory" %i)

        action_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps-1)])
        action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
        # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action
        # print("Sending command: ", actionSet[action])
        agent_host.sendCommand(actionSet[action])
        #checking if the mission is done
        world_state = agent_host.peekWorldState()
        #Getting the reward from taking a step
        if world_state.number_of_rewards_since_last_state > 0:
            reward = world_state.rewards[-1].getValue()
            print("Just received the reward: %s on action: %s "%(reward, actionSet[action]))
        else:
            print("No reward")
            reward = 0
        #getting the next state
        while world_state.number_of_observations_since_last_state <=0 and world_state.is_mission_running:
            print("Sleeping")
            time.sleep(0.1)
            world_state = agent_host.peekWorldState()

        if world_state.is_mission_running:
            next_state = gridProcess(world_state)
            next_state = state_processor.process(sess, next_state)
            next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2)
            done = not world_state.is_mission_running
            replay_memory.append(Transition(state, action, reward, next_state, done))
            state = next_state
        else:
            for retry in range(max_retries):
                try:
                    agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID))
                    break
                except RuntimeError as e:
                    if retry == max_retries - 1:
                        print("Error starting mission:", e)
                        exit(1)
                    else:
                        time.sleep(2.5)

            world_state = agent_host.getWorldState()
            while not world_state.has_mission_begun:
                print(".", end="")
                time.sleep(0.1)
                world_state = agent_host.getWorldState()
            world_state = agent_host.peekWorldState()
            while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
                world_state = agent_host.peekWorldState()
            world_state = agent_host.getWorldState()
            if not world_state.is_mission_running:
                print("Breaking")
                break
            state = gridProcess(world_state) # Malmo GetworldState? / env.reset()
            state = state_processor.process(sess, state)
            state = np.stack([state] * 4, axis=2)

    print("Finished populating memory")

    # Record videos
    # Use the gym env Monitor wrapper
    # env = Monitor(env,
    #               directory=monitor_path,
    #               resume=True,
    #               video_callable=lambda count: count % record_video_every ==0)

    # NEED TO RECORD THE VIDEO AND SAVE TO THE SPECIFIED DIRECTORY

    for i_episode in range(num_episodes):
        print("%s-th episode"%i_episode)
        if i_episode != 0:
            mission_file = agent_host.getStringArgument('mission_file')
            with open(mission_file, 'r') as f:
                print("Loading mission from %s" % mission_file)
                mission_xml = f.read()
                my_mission = MalmoPython.MissionSpec(mission_xml, True)
            my_mission.removeAllCommandHandlers()
            my_mission.allowAllDiscreteMovementCommands()
            # my_mission.requestVideo(320, 240)
            my_mission.forceWorldReset()
            my_mission.setViewpoint(2)
            my_clients = MalmoPython.ClientPool()
            my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000))  # add Minecraft machines here as available

            max_retries = 3
            agentID = 0
            expID = 'Deep_q_learning '

            my_mission_record = malmoutils.get_default_recording_object(agent_host,
                                                                        "./save_%s-rep%d" % (expID, i))

            for retry in range(max_retries):
                try:
                    agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s-%d" % (expID, i))
                    break
                except RuntimeError as e:
                    if retry == max_retries - 1:
                        print("Error starting mission:", e)
                        exit(1)
                    else:
                        time.sleep(2.5)

            world_state = agent_host.getWorldState()
            print("Waiting for the mission to start", end=' ')
            while not world_state.has_mission_begun:
                print(".", end="")
                time.sleep(0.1)
                world_state = agent_host.getWorldState()
                for error in world_state.errors:
                    print("Error:", error.text)

        # Save the current checkpoint
        saver.save(tf.get_default_session(), checkpoint_path)
        # world_state = agent_host.getWorldState()
        # Reset the environment
        # world_state = agent_host.peekWorldState()
        while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
            # print("Sleeping!!!")
            world_state = agent_host.peekWorldState()
        world_state = agent_host.getWorldState()
        state = gridProcess(world_state)  #MalmoGetWorldState?
        state = state_processor.process(sess, state)
        state = np.stack([state] * 4, axis=2)
        loss = None

        # One step in the environment
        for t in itertools.count():

            # Epsilon for this time step
            epsilon = epsilons[min(total_t, epsilon_decay_steps-1)]

            # Add epsilon to Tensorboard
            episode_summary = tf.Summary()
            episode_summary.value.add(simple_value=epsilon, tag="epsilon")
            q_estimator.summary_writer.add_summary(episode_summary, total_t)

            # Maybe update the target estimator
            if total_t % update_target_estimator_every == 0:
                copy_model_parameters(sess, q_estimator, target_estimator)
                print("\nCopied model parameters to target network.")

            # Print out which step we're on, useful for debugging.
            print("\rStep {} ({}) @ Episode {}/{}, loss: {}".format(
                    t, total_t, i_episode + 1, num_episodes, loss), end="")
            sys.stdout.flush()

            # Take a step
            action_probs = policy(sess, state, epsilon)
            action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
            # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo AgentHost send command?
            # print("Sending command: ", actionSet[action])
            agent_host.sendCommand(actionSet[action])

            world_state = agent_host.peekWorldState()

            if world_state.number_of_rewards_since_last_state > 0:
                reward = world_state.rewards[-1].getValue()
                print("Just received the reward: %s on action: %s " % (reward, actionSet[action]))
            else:
                print("No reward")
                reward = 0
            while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
                # print("Sleeping!!!")
                world_state = agent_host.peekWorldState()
            # world_state = agent_host.getWorldState()
            # if not world_state.is_mission_running:
            #     print("Breaking")
            #     break
            done = not world_state.is_mission_running
            print(" IS MISSION FINISHED? ", done)
            # if done:
            #     print("Breaking before updating last reward")
            #     break

            next_state = gridProcess(world_state)
            next_state = state_processor.process(sess, next_state)
            next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2)



            # If our replay memory is full, pop the first element
            if len(replay_memory) == replay_memory_size:
                replay_memory.pop(0)

            # Save transition to replay memory
            replay_memory.append(Transition(state, action, reward, next_state, done))

            # Update statistics
            stats.episode_rewards[i_episode] += reward
            stats.episode_lengths[i_episode] = t

            # Sample a minibatch from the replay memory
            samples = random.sample(replay_memory, batch_size)
            states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples))

            # Calculate q values and targets (Double DQN)
            q_values_next = q_estimator.predict(sess, next_states_batch)
            best_actions = np.argmax(q_values_next, axis=1)
            q_values_next_target = target_estimator.predict(sess, next_states_batch)
            targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * \
                discount_factor * q_values_next_target[np.arange(batch_size), best_actions]

            # Perform gradient descent update
            states_batch = np.array(states_batch)
            loss = q_estimator.update(sess, states_batch, action_batch, targets_batch)

            if done:
                print("End of Episode")
                break

            state = next_state
            total_t += 1

        # Add summaries to tensorboard
        episode_summary = tf.Summary()
        episode_summary.value.add(simple_value=stats.episode_rewards[i_episode], node_name="episode_reward", tag="episode_reward")
        episode_summary.value.add(simple_value=stats.episode_lengths[i_episode], node_name="episode_length", tag="episode_length")
        q_estimator.summary_writer.add_summary(episode_summary, total_t)
        q_estimator.summary_writer.flush()

        yield total_t, plotting.EpisodeStats(
            episode_lengths=stats.episode_lengths[:i_episode+1],
            episode_rewards=stats.episode_rewards[:i_episode+1])

    # env.monitor.close()
    return stats
                <AgentHandlers>
                  <ObservationFromFullStats/>
                  <ContinuousMovementCommands turnSpeedDegs="180"/>
                  <ChatCommands />
                  <MissionQuitCommands quitDescription="give_up"/>
                  <RewardForMissionEnd>
                    <Reward description="give_up" reward="-1000"/>
                  </RewardForMissionEnd>''' + malmoutils.get_video_xml(agent_host) + '''
                </AgentHandlers>
              </AgentSection>
            </Mission>'''

# Create default Malmo objects:

my_mission = MalmoPython.MissionSpec(missionXML, True)
my_mission_record = malmoutils.get_default_recording_object(agent_host, "Mission_1")

# Attempt to start a mission:
max_retries = 3
for retry in range(max_retries):
    try:
        agent_host.startMission( my_mission, my_mission_record )
        break
    except RuntimeError as e:
        if retry == max_retries - 1:
            print("Error starting mission:",e)
            exit(1)
        else:
            time.sleep(2)

# Loop until mission starts:
Пример #24
0
                    <Item reward="-1" type="stained_glass"/>
                </RewardForCollectingItem>
                <RewardForDiscardingItem>
                    <Item reward="1" type="stained_glass"/>
                </RewardForDiscardingItem>''' + malmoutils.get_video_xml(
    agent_host) + '''
            </AgentHandlers>
        </AgentSection>

    </Mission>'''

my_mission = MalmoPython.MissionSpec(missionXML, True)
num_missions = 10 if agent_host.receivedArgument("test") else 30000
for mission_no in range(num_missions):
    merges_allowed = mission_no % 2
    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "Mission_{}".format(mission_no + 1))
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_mission_record)
            break
        except RuntimeError as e:
            print(e)
            if retry == max_retries - 1:
                print("Error starting mission", e)
                print("Is the game running?")
                exit(1)
            else:
                time.sleep(2)

    world_state = agent_host.peekWorldState()
Пример #25
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return
    #forceReset="true"
    missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
            <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
            
              <About>
                <Summary>Hello world!</Summary>
              </About>
              
              <ServerSection>
                <ServerHandlers>
                  <DefaultWorldGenerator forceReset="true" />
                  <ServerQuitFromTimeUp timeLimitMs="30000"/>
                  <ServerQuitWhenAnyAgentFinishes/>
                </ServerHandlers>
              </ServerSection>
              
              
              <AgentSection mode="Survival">
                <Name>MalmoTutorialBot</Name>
                <AgentStart>
                    <Inventory>
                        <InventoryItem slot="8" type="diamond_pickaxe"/>
                    </Inventory>
                </AgentStart>
                <AgentHandlers>
                    <ObservationFromFullStats/>
                    <ObservationFromGrid>
                        <Grid name="all_the_blocks" >
                            <min x="-1" y="-1" z="-1"/>
                            <max x="1" y="2" z="1"/>
                        </Grid>
                    </ObservationFromGrid>
                    <ContinuousMovementCommands turnSpeedDegs="180"/>
                </AgentHandlers>
              </AgentSection>
            </Mission>'''

    malmoutils.fix_print()

    #agent_host = MalmoPython.AgentHost()
    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec(missionXML, True)
    my_mission.timeLimitInSeconds(300)
    my_mission.requestVideo(640, 480)

    #my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0,
                                    experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info,
                              "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()

    last_delta = time.time()

    # main loop:
    #agent_host.sendCommand( "jump 1")
    TURN = 0
    TURN2 = 0
    JUMP = 0
    while world_state.is_mission_running:
        print("New Iteration")

        if JUMP > 0:
            JUMP = JUMP - 1
        if JUMP == 0:
            agent_host.sendCommand("jump 0")
            JUMP = JUMP - 1
        agent_host.sendCommand("move 1")
        if math.sin(TURN) / 3 >= 0:
            agent_host.sendCommand("turn 0.15")
        else:
            agent_host.sendCommand("turn -0.2")
        print(TURN, " ", math.sin(TURN))
        TURN = TURN + 0.3

        #agent_host.sendCommand( "jump 1" )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        y = json.loads(world_state.observations[-1].text)

        #print(y["all_the_blocks"])
        dir = ""
        if y["Yaw"] + 180 < 90:
            dir = "S"
            print("Facing South")
        elif y["Yaw"] < 180:
            dir = "W"
            print("Facing West")
        elif y["Yaw"] < 270:
            dir = "N"
            print("Facing North")
        else:
            dir = "E"
            print("Facing East")

        blocks = [[], [], [], []]
        i = 0
        for x in y["all_the_blocks"]:
            blocks[math.floor(i / 9)].append(x)
            i = i + 1

        if dir == "S":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j] != "air":
                    willjump = True
                print(j, blocks[1][j], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "W":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j * 3] != "air":
                    willjump = True
                print(j * 3, blocks[1][j * 3], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "E":
            willjump = False

            for j in range(1, 4):
                if blocks[1][j * 3 - 1] != "air":
                    willjump = True
                print(j * 3 - 1, blocks[1][j * 3 - 1], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "N":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j] != "air":
                    willjump = True
                print(j, blocks[1][j + 6], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")

        if (blocks[1][2] != "air" and blocks[2][2] != "air"
                or blocks[1][4] != "air" and blocks[2][4] != "air"
                or blocks[1][2] != "air" and blocks[2][2] != "air"
                or blocks[1][4] != "air" and blocks[2][4] != "air"):
            TURN2 = 2

        if TURN2 >= 0:
            agent_host.sendCommand("turn 1")
            TURN2 = TURN2 - 1
        '''if blocks[1][5] != "air" or  blocks[1][5] != "grass" or blocks[1][5] != "tallgrass" :
            JUMP = 2
            agent_host.sendCommand( "jump 1" )
            print()
            print(blocks[1][5])'''

        #print(len(blocks))
        #print(blocks)

        if (world_state.number_of_video_frames_since_last_state > 0
                or world_state.number_of_observations_since_last_state > 0
                or world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info,
                                  "world state change")
        for reward in world_state.rewards:
            print("Summed reward:", reward.getValue())
        for error in world_state.errors:
            print("Error:", error.text)
        for frame in world_state.video_frames:
            print()
            #print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
Пример #26
0
def main():

    #Hardcode number of agents to play song
    num_agents = 4

    #Obtain song csv and get solutions
    #freq_list = mt.create_note_list("Twinkle_Twinkle_Little_Star.csv",120,7000,-.08) #1 Agent
    #freq_list = mt.create_note_list("Chopsticks.csv",120,4000,-.15,.03) #2 Agents
    freq_list = mt.create_note_list("Bad_Apple.csv", 120, 3000, -.08,
                                    .03)  #2 Agents
    #freq_list = mt.create_note_list("Grenade_120BPM.csv",120,1500,-.08,.03) #4 Agents
    freq_list = mt.number_converter(freq_list)
    solutions = cs.get_solutions(freq_list, num_agents)
    print(solutions)
    #print(solutions)

    #Get Mission. Needed for teleport positions.
    missionXML = getMissionXML(num_agents)

    #Create musician for each agent and pass teleport positions.
    musicians = []
    for i in range(num_agents):
        agent_positions = generateAgentTeleportPositions(note_positions, i)
        musicians.append(Musician(agent_positions))
    '''
    MALMO
    '''
    print('Starting...', flush=True)

    #Create agents.
    agent_hosts = []
    for i in range(num_agents):
        agent_hosts.append(MalmoPython.AgentHost())

    malmoutils.parse_command_line(agent_hosts[0])

    #Get mission and allow commands for teleport.
    my_mission = MalmoPython.MissionSpec(missionXML, True)
    my_mission.allowAllChatCommands()

    #Add client for each agent needed.
    my_client_pool = MalmoPython.ClientPool()
    for i in range(num_agents):
        my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000 + i))

    MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF)

    #Start mission for each agent
    for i in range(num_agents):
        startMission(
            agent_hosts[i], my_mission, my_client_pool,
            malmoutils.get_default_recording_object(
                agent_hosts[0], "agent_" + str(i + 1) + "_viewpoint_discrete"),
            i, '')

    #Wait for all missions to begin.
    waitForStart(agent_hosts)

    #Pause for simulation to begin.
    time.sleep(1)
    '''
    SIMULATION BEGINS HERE
    '''

    for i in range(len(solutions[0])):

        #teleport each agent to the corresponding note.
        for j in range(len(musicians)):
            musicians[j].teleport_to_noteblock(agent_hosts[j], solutions[j][i])

        # play each note.
        for k in range(len(musicians)):
            if musicians[k].can_play:
                agent_hosts[k].sendCommand("attack 1")

        time.sleep(0.001)

        for k in range(len(musicians)):
            if musicians[k].can_play:
                agent_hosts[k].sendCommand("attack 0")
            musicians[k].can_play = False

        #modifies the timing between each note hit.
        time.sleep(0.2)
def deep_q_learning_run(sess,
                        agent_host,
                        q_estimator,
                        state_processor,
                        experiment_dir,
                        epsilon_start=1.0,
                        epsilon_end=0.1,
                        epsilon_decay_steps=8000):
    """
    Q-Learning algorithm for off-policy TD control using Function Approximation.
    Finds the optimal greedy policy while following an epsilon-greedy policy.
    Args:
        sess: Tensorflow Session object
        env: OpenAI environment
        q_estimator: Estimator object used for the q values
        target_estimator: Estimator object used for the targets
        state_processor: A StateProcessor object
        num_episodes: Number of episodes to run for
        experiment_dir: Directory to save Tensorflow summaries in
        replay_memory_size: Size of the replay memory
        replay_memory_init_size: Number of random experiences to sampel when initializing
          the reply memory.
        update_target_estimator_every: Copy parameters from the Q estimator to the
          target estimator every N steps
        discount_factor: Gamma discount factor
        epsilon_start: Chance to sample a random action when taking an action.
          Epsilon is decayed over time and this is the start value
        epsilon_end: The final minimum value of epsilon after decaying is done
        epsilon_decay_steps: Number of steps to decay epsilon over
        batch_size: Size of batches to sample from the replay memory
        record_video_every: Record a video every N episodes
    Returns:
        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.
    """
    mission_file = agent_host.getStringArgument('mission_file')
    with open(mission_file, 'r') as f:
        print("Loading mission from %s" % mission_file)
        mission_xml = f.read()
        my_mission = MalmoPython.MissionSpec(mission_xml, True)
    my_mission.removeAllCommandHandlers()
    my_mission.allowAllDiscreteMovementCommands()
    my_mission.setViewpoint(2)
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo(
        '127.0.0.1', 10000))  # add Minecraft machines here as available

    max_retries = 3
    agentID = 0
    expID = 'Deep_q_learning memory'

    # Create directories for checkpoints and summaries
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    print("Checkpoint dir is:", checkpoint_dir)
    saver = tf.train.Saver()
    # Load a previous checkpoint if we find one
    # latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    # print("~~~~~~~~~~~~~~", latest_checkpoint)
    # exit(0)
    latest_checkpoint = os.path.join(checkpoint_dir, "model")
    if latest_checkpoint:
        print("Loading model checkpoint {}...\n".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)

    total_t = sess.run(tf.contrib.framework.get_global_step())

    # The epsilon decay schedule
    epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps)

    # The policy we're following
    policy = make_epsilon_greedy_policy(q_estimator, len(actionSet))

    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "save_%s-rep" % (expID))

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record,
                                    agentID, "%s" % (expID))
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2.5)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        print("Sleeping")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()

    agent_host.sendCommand("look -1")
    agent_host.sendCommand("look -1")

    while world_state.is_mission_running and all(
            e.text == '{}' for e in world_state.observations):
        print("Sleeping....")
        world_state = agent_host.peekWorldState()
    # Populate the replay memory with initial experience

    while world_state.number_of_observations_since_last_state <= 0 and world_state.is_mission_running:
        # print("Sleeping")
        time.sleep(0.1)
        world_state = agent_host.peekWorldState()

    state = gridProcess(
        world_state
    )  # MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset()
    state = state_processor.process(sess, state)
    state = np.stack([state] * 4, axis=2)

    stepNum = 0
    while world_state.is_mission_running:

        action = randint(0, 3)
        print("actions:", action)
        # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action
        # print("Sending command: ", actionSet[action])
        print("Step %s" % stepNum)
        stepNum += 1
        agent_host.sendCommand(actionSet[action])

        world_state = agent_host.peekWorldState()

        num_frames_seen = world_state.number_of_video_frames_since_last_state

        while world_state.is_mission_running and world_state.number_of_video_frames_since_last_state == num_frames_seen:
            world_state = agent_host.peekWorldState()

        if world_state.is_mission_running:
            # Getting the reward from taking a step
            while world_state.number_of_observations_since_last_state <= 0:
                time.sleep(0.1)
                world_state = agent_host.peekWorldState()
            # world_state = agent_host.getWorldState()
            next_state = gridProcess(world_state)
            next_state = state_processor.process(sess, next_state)
            next_state = np.append(state[:, :, 1:],
                                   np.expand_dims(next_state, 2),
                                   axis=2)
            state = next_state
        # time.sleep(1)

    return None
Пример #28
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return

    malmoutils.fix_print()

    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec()
    my_mission.timeLimitInSeconds( 10 )
    my_mission.requestVideo( 320, 240 )
    my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info, "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:",error.text)
    print()

    last_delta = time.time()
    # main loop:
    while world_state.is_mission_running:
        agent_host.sendCommand( "move 1" )
        agent_host.sendCommand( "turn " + str(random.random()*2-1) )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state)
        if (world_state.number_of_video_frames_since_last_state > 0 or
           world_state.number_of_observations_since_last_state > 0 or
           world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info, "world state change")
        for reward in world_state.rewards:
            print("Summed reward:",reward.getValue())
        for error in world_state.errors:
            print("Error:",error.text)
        for frame in world_state.video_frames:
            print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
Пример #29
0
if agent_host.receivedArgument("test"):
    num_reps = 1
else:
    num_reps = 30000

current_yaw = 0
best_yaw = 0
current_life = 0

for iRepeat in range(num_reps):
    mission_xml = getMissionXML(MOB_TYPE + " Apocalypse #" + str(iRepeat))
    my_mission = MalmoPython.MissionSpec(mission_xml,validate)
    max_retries = 3
    # Set up a recording
    my_mission_record = malmoutils.get_default_recording_object(agent_host, "Mission_" + str(iRepeat))
    for retry in range(max_retries):
        try:
            # Attempt to start the mission:
            agent_host.startMission( my_mission, my_client_pool, my_mission_record, 0, "predatorExperiment" )
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission",e)
                print("Is the game running?")
                exit(1)
            else:
                time.sleep(2)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
                  <ObservationFromFullStats/>
                  <ContinuousMovementCommands turnSpeedDegs="180"/>
                  <ChatCommands />
                  <MissionQuitCommands quitDescription="give_up"/>
                  <RewardForMissionEnd>
                    <Reward description="give_up" reward="-1000"/>
                  </RewardForMissionEnd>''' + malmoutils.get_video_xml(
    agent_host) + '''
                </AgentHandlers>
              </AgentSection>
            </Mission>'''

# Create default Malmo objects:

my_mission = MalmoPython.MissionSpec(missionXML, True)
my_mission_record = malmoutils.get_default_recording_object(
    agent_host, "Mission_1")

# Attempt to start a mission:
max_retries = 3
for retry in range(max_retries):
    try:
        agent_host.startMission(my_mission, my_mission_record)
        break
    except RuntimeError as e:
        if retry == max_retries - 1:
            print("Error starting mission:", e)
            exit(1)
        else:
            time.sleep(2)

# Loop until mission starts:
Пример #31
0
num_steps_since_last_chat = 0
cumulative_rewards = []
for iRepeat in range(num_episodes):

    xorg = (iRepeat % 64) * 32
    zorg = ((old_div(iRepeat, 64)) % 64) * 32
    yorg = 200 + ((old_div(iRepeat, (64 * 64))) % 64) * 8

    print("Mission " + str(iRepeat) + " --- starting at " + str(xorg) + ", " +
          str(yorg) + ", " + str(zorg))

    validate = True
    my_mission = MalmoPython.MissionSpec(
        GetMissionXML(iRepeat, xorg, yorg, zorg), validate)

    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "episode_{}_role_{}".format(iRepeat + 1, role))
    unique_experiment_id = genExperimentID(
        iRepeat
    )  # used to disambiguate multiple running copies of the same mission

    max_retries = 3
    retry = 0
    while True:
        try:
            print("Calling startMission...")
            agent_host.startMission(my_mission, client_pool, my_mission_record,
                                    role, unique_experiment_id)
            #agent_host.startMission( my_mission, client_pool )
            break
        except MalmoPython.MissionException as e:
            errorCode = e.details.errorCode
Пример #32
0
    num_repeats = itersNum
    cumulative_rewards = []
    rolling_avg = []

    # Trying to load existing q-table.
    agent.loadModel(q_tableFile)

    # Activate evaluation mode:
    if EVALUATE:
        agent.evaluate()

    for i in range(num_repeats):

        print("\nMap %d - Mission %d of %d:" % (imap, i + 1, num_repeats))

        my_mission_record = malmoutils.get_default_recording_object(
            agent_host, "./save_%s-map%d-rep%d" % (expID, imap, i))

        for retry in range(max_retries):
            try:
                agent_host.startMission(my_mission, my_clients,
                                        my_mission_record, agentID,
                                        "%s-%d" % (expID, i))
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    print("Error starting mission:", e)
                    exit(1)
                else:
                    time.sleep(2.5)

        print("Waiting for the mission to start", end=' ')
Пример #33
0
chat_frequency = 30 # if we send chat messages too frequently the agent will be disconnected for spamming
num_steps_since_last_chat = 0

for iRepeat in range(num_episodes):

    xorg = (iRepeat % 64) * 32
    zorg = ((old_div(iRepeat, 64)) % 64) * 32
    yorg = 200 + ((old_div(iRepeat, (64*64))) % 64) * 8

    print("Mission " + str(iRepeat) + " --- starting at " + str(xorg) + ", " + str(yorg) + ", " + str(zorg))
    
    validate = True
    my_mission = MalmoPython.MissionSpec(GetMissionXML(iRepeat, xorg, yorg, zorg), validate)

    my_mission_record = malmoutils.get_default_recording_object(agent_host, "episode_{}_role_{}".format(iRepeat + 1, role))
    unique_experiment_id = genExperimentID(iRepeat) # used to disambiguate multiple running copies of the same mission
 
    max_retries = 3
    retry = 0
    while True:
        try:
            print("Calling startMission...")
            agent_host.startMission( my_mission, client_pool, my_mission_record, role, unique_experiment_id )
            break
        except MalmoPython.MissionException as e:
            errorCode = e.details.errorCode
            if errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_WARMING_UP:
                print("Server not online yet - will keep waiting as long as needed.")
                time.sleep(1)
            elif errorCode in [MalmoPython.MissionErrorCode.MISSION_INSUFFICIENT_CLIENTS_AVAILABLE,