示例#1
0
def main():
    import time
    import numpy as np
    from learningALE.libs.ale_python_interface import ALEInterface

    # this script is used to try and find what ram index stores the number of lives for a game

    ale = ALEInterface(True)

    ale.loadROM(b'D:\\_code\\beam_rider.bin')

    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getLegalActionSet()

    frameCount = 0
    ramlist = list()
    st = time.time()
    for episode in range(1):
        total_reward = 0.0
        while not ale.game_over():
            a = legal_actions[np.random.randint(legal_actions.size)]
            ram = ale.getRAM()
            ramlist.append(ram)
            reward = ale.act(a)
            total_reward += reward
            frameCount += 1
        print("Episode " + str(episode) + " ended with score: " +
              str(total_reward))
        ale.reset_game()
    et = time.time()
    print(et - st, frameCount / (et - st))

    import matplotlib.pyplot as plt
    ramarray = np.asarray(ramlist)
    w = np.where(ramarray > 3)[1]
    ramarray[:, w] = 0
    plt.plot(ramarray)

    notZ = np.where(ramarray != 0)[1]
    unqNZ = np.unique(notZ)
    print(unqNZ)
示例#2
0
def main():
    import time
    import numpy as np
    from learningALE.libs.ale_python_interface import ALEInterface

    # this script is used to try and find what ram index stores the number of lives for a game

    ale = ALEInterface(True)

    ale.loadROM(b'D:\\_code\\beam_rider.bin')

    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getLegalActionSet()

    frameCount = 0
    ramlist = list()
    st = time.time()
    for episode in range(1):
        total_reward = 0.0
        while not ale.game_over():
            a = legal_actions[np.random.randint(legal_actions.size)]
            ram = ale.getRAM()
            ramlist.append(ram)
            reward = ale.act(a)
            total_reward += reward
            frameCount += 1
        print("Episode " + str(episode) + " ended with score: " + str(total_reward))
        ale.reset_game()
    et = time.time()
    print(et-st, frameCount/(et-st))

    import matplotlib.pyplot as plt
    ramarray = np.asarray(ramlist)
    w = np.where(ramarray > 3)[1]
    ramarray[:, w] = 0
    plt.plot(ramarray)

    notZ = np.where(ramarray != 0)[1]
    unqNZ = np.unique(notZ)
    print(unqNZ)
示例#3
0
def main():
    import os
    import pickle
    import time

    import lasagne
    import matplotlib.pyplot as plt
    import numpy as np
    from learners.nns import AlloEggoCnn
    from scipy.misc import imresize

    from learningALE.handlers import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface

    dtype = np.float16
    plt.ion()

    # set up emulator
    ale = ALEInterface(True)
    ale.loadROM(b'D:\\_code\\breakout.bin')
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    lives = 5

    # set up vars
    skipFrame = 4

    actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2),
                                  legal_actions)
    scoreList = list()

    cnn = AlloEggoCnn()
    with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin:
        parms = pickle.load(fin)

    lasagne.layers.set_all_param_values(cnn.a_out, parms)

    frameCount = 0
    st = time.time()
    for episode in range(100):
        total_reward = 0.0
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(imresize(gamescreen.view(
                    np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0],
                                                   0.5,
                                                   interp='nearest'),
                                          dtype=dtype) / 255

                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    reward += 1

                ram = ale.getRAM()
                if ram[57] != lives:
                    reward -= 1
                    lives = ram[57]

            frames = np.asarray(frames)

            actionVect = cnn.get_output(frames.reshape(
                (1, skipFrame, 105, 80)))[0]
            actionHandler.setAction(actionVect)

            total_reward += reward
            frameCount += 1 * skipFrame

        ale.reset_game()
        actionHandler.anneal()
        scoreList.append(total_reward)

        lives = 5

        print("Episode " + str(episode) + " ended with score: " +
              str(total_reward))

        et = time.time()
        print('Total Time:', et - st, 'Frame Count:', frameCount, 'FPS:',
              frameCount / (et - st))

    plt.clf()
    plt.plot(scoreList, '.')
    plt.pause(0.01)
    plt.ioff()
示例#4
0
frameCount = 0
st = time.time()
for episode in range(1):
    total_reward = 0.0
    while not ale.game_over():
        # get a random action
        a = legal_actions[np.random.randint(legal_actions.size)]

        # get gamescreen and convert to usable format (Height x Width x Channels)
        gamescreen = ale.getScreenRGB()
        gamescreen = np.asarray(gamescreen.view(np.uint8).reshape(
            screen_height, screen_width, 4)[:, :, 0],
                                dtype=np.float)

        # get ram
        ram = ale.getRAM()

        # take the action and get the reward
        reward = ale.act(a)
        total_reward += reward

        frameCount += 1

    print("Episode " + str(episode) + " ended with score: " +
          str(total_reward))
    # game over man game over, reset
    ale.reset_game()

# end time count and print total time and FPS
et = time.time()
print(et - st, frameCount / (et - st))
        reward += ale.act(a)
    total_reward += reward
    currrewards.append(reward)
    currstates.append(imresize(np.asarray(frames), 0.525, interp='nearest'))

    frames = np.swapaxes(np.asarray(frames),0,2)

    from pygame import surfarray

    frames = surfarray.make_surface(frames)
    screen.blit(pygame.transform.scale(frames, (screen_width*2, screen_height*2)),(0,0))

    #get RAM
    ram_size = ale.getRAMSize()
    ram = np.zeros((ram_size),dtype=np.uint8)
    ale.getRAM(ram)


    #Display ram bytes
    font = pygame.font.SysFont("Ubuntu Mono",32)
    text = font.render("RAM: " ,1,(255,208,208))
    screen.blit(text,(330,10))

    font = pygame.font.SysFont("Ubuntu Mono",25)
    height = font.get_height()*1.2

    line_pos = 40
    ram_pos = 0
    while(ram_pos < 128):
        ram_string = ''.join(["%02X "%ram[x] for x in range(ram_pos,min(ram_pos+16,128))])
        text = font.render(ram_string,1,(255,255,255))
示例#6
0
    currrewards.append(reward)
    currstates.append(imresize(np.asarray(frames), 0.525, interp='nearest'))

    frames = np.swapaxes(np.asarray(frames), 0, 2)

    from pygame import surfarray

    frames = surfarray.make_surface(frames)
    screen.blit(
        pygame.transform.scale(frames, (screen_width * 2, screen_height * 2)),
        (0, 0))

    #get RAM
    ram_size = ale.getRAMSize()
    ram = np.zeros((ram_size), dtype=np.uint8)
    ale.getRAM(ram)

    #Display ram bytes
    font = pygame.font.SysFont("Ubuntu Mono", 32)
    text = font.render("RAM: ", 1, (255, 208, 208))
    screen.blit(text, (330, 10))

    font = pygame.font.SysFont("Ubuntu Mono", 25)
    height = font.get_height() * 1.2

    line_pos = 40
    ram_pos = 0
    while (ram_pos < 128):
        ram_string = ''.join(
            ["%02X " % ram[x] for x in range(ram_pos, min(ram_pos + 16, 128))])
        text = font.render(ram_string, 1, (255, 255, 255))
def main():
    import os
    import pickle
    import time

    import lasagne
    import matplotlib.pyplot as plt
    import numpy as np
    from learners.nns import AlloEggoCnn
    from scipy.misc import imresize

    from learningALE.handlers import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface

    dtype = np.float16
    plt.ion()

    # set up emulator
    ale = ALEInterface(True)
    ale.loadROM(b'D:\\_code\\breakout.bin')
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    lives = 5

    # set up vars
    skipFrame = 4

    actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2), legal_actions)
    scoreList = list()

    cnn = AlloEggoCnn()
    with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin:
        parms = pickle.load(fin)

    lasagne.layers.set_all_param_values(cnn.a_out, parms)

    frameCount = 0
    st = time.time()
    for episode in range(100):
        total_reward = 0.0
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(
                    imresize(gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], 0.5, interp='nearest'),
                    dtype=dtype)/255

                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    reward += 1

                ram = ale.getRAM()
                if ram[57] != lives:
                    reward -= 1
                    lives = ram[57]

            frames = np.asarray(frames)

            actionVect = cnn.get_output(frames.reshape((1, skipFrame, 105, 80)))[0]
            actionHandler.setAction(actionVect)

            total_reward += reward
            frameCount += 1*skipFrame

        ale.reset_game()
        actionHandler.anneal()
        scoreList.append(total_reward)

        lives = 5



        print("Episode " + str(episode) + " ended with score: " + str(total_reward))

        et = time.time()
        print('Total Time:', et-st, 'Frame Count:', frameCount, 'FPS:',frameCount/(et-st))

    plt.clf()
    plt.plot(scoreList, '.')
    plt.pause(0.01)
    plt.ioff()
示例#8
0
legal_actions = ale.getLegalActionSet()

frameCount = 0
st = time.time()
for episode in range(1):
    total_reward = 0.0
    while not ale.game_over():
        # get a random action
        a = legal_actions[np.random.randint(legal_actions.size)]

        # get gamescreen and convert to usable format (Height x Width x Channels)
        gamescreen = ale.getScreenRGB()
        gamescreen = np.asarray(gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], dtype=np.float)

        # get ram
        ram = ale.getRAM()

        # take the action and get the reward
        reward = ale.act(a)
        total_reward += reward

        frameCount += 1

    print("Episode " + str(episode) + " ended with score: " + str(total_reward))
    # game over man game over, reset
    ale.reset_game()

# end time count and print total time and FPS
et = time.time()
print(et-st, frameCount/(et-st))