Exemplos de ActionHandler.getLastAction em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: learningALE.handlers.actionhandler

Classe / Tipo: ActionHandler

Método / Função: getLastAction

Exemplos em hotexamples.com: 2

ActionHandler.getLastAction em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de learningALE.handlers.actionhandler.ActionHandler.getLastAction em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

ActionHandler(9)

action_vect_to_game_action(7)

set_legal_actions(7)

game_action_to_action_ind(4)

anneal(3)

get_action(2)

getLastAction(1)

get_random(1)

setAction(1)

Métodos Frequentes

ActionHandler (9)

action_vect_to_game_action (7)

set_legal_actions (7)

game_action_to_action_ind (4)

anneal (3)

get_action (2)

getLastAction (1)

get_random (1)

setAction (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: view_learner_vals.py Projeto: hearthstoneboss/learningALE

def main(): import os import pickle import matplotlib.pyplot as plt from learningALE.learners.nns import CNN from scipy.misc import imresize from learningALE.handlers.actionhandler import ActionHandler, ActionPolicy from learningALE.libs.ale_python_interface import ALEInterface import lasagne import numpy as np # plt.ion() skipFrame = 3 cnn = CNN((None, skipFrame, 86, 80), 6, .1, stride=(4,2)) with open(os.getcwd()+'\datasets\\spccnn.pkl', 'rb') as infile: parms = pickle.load(infile) lasagne.layers.set_all_param_values(cnn.l_out, parms) # rom = b'D:\\_code\\breakout.bin' rom = b'D:\\_code\\space_invaders.bin' ale = ALEInterface(True) ale.loadROM(rom) (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getMinimalActionSet() # get labels labels = ['noop', 'fire', 'up', 'right', 'left', 'down', 'upright', 'upleft', 'downright', 'downleft', 'upfire', 'rightfire', 'leftfire', 'downfire', 'uprightfire' , 'upleftfire', 'downrightfire', 'downleftfire'] labels = np.asarray(labels)[legal_actions] # set up vars actionHandler = ActionHandler(ActionPolicy.eGreedy, (.1, .1, 2), legal_actions) rewList = list() for ep in range(100): total_reward = 0.0 trainCount = 0 ale.reset_game() while not ale.game_over(): # get frames frames = list() reward = 0 for frame in range(skipFrame): gamescreen = ale.getScreenRGB() processedImg = np.asarray( gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[25:-12, :, 0], dtype=np.float32) processedImg[processedImg > 1] = 255 processedImg = imresize(processedImg, 0.5, interp='nearest')/255 frames.append(processedImg) performedAction, actionInd = actionHandler.getLastAction() rew = ale.act(performedAction) if rew > 0: rew = 1 reward += rew total_reward += reward frames = np.asarray(frames, dtype=np.float32) actionVect = cnn.get_output(frames.reshape((1, skipFrame, frames.shape[1], 80)))[0] actionHandler.setAction(actionVect) # hid1_act = cnn.get_hid1_act(frames.reshape((1, skip_frame, frames.shape[1], 80))) # hid2_act = cnn.get_hid2_act(frames.reshape((1, skip_frame, frames.shape[1], 80))) # for x in range(hid1_act.shape[1]): # plt.subplot(4,4,x+1) # plt.imshow(hid1_act[0,x], cmap=plt.cm.gray) # for x in range(hid2_act.shape[1]): # plt.subplot(6,6,x+1) # plt.imshow(hid2_act[0,x], cmap=plt.cm.gray) # plt.show() # plt.clf() # plt.plot(actionVect) # plt.xticks(range(len(labels)), labels) # plt.pause(0.001) rewList.append(total_reward) print(ep, total_reward) print(np.mean(rewList), np.std(rewList), np.max(rewList), np.min(rewList)) print(np.unique(rewList, return_counts=True)) plt.plot(rewList) plt.show()

Exemplo n.º 2

0

Exibir arquivo

def main(): import os import pickle import matplotlib.pyplot as plt from learningALE.learners.nns import CNN from scipy.misc import imresize from learningALE.handlers.actionhandler import ActionHandler, ActionPolicy from learningALE.libs.ale_python_interface import ALEInterface import lasagne import numpy as np # plt.ion() skipFrame = 3 cnn = CNN((None, skipFrame, 86, 80), 6, .1, stride=(4, 2)) with open(os.getcwd() + '\datasets\\spccnn.pkl', 'rb') as infile: parms = pickle.load(infile) lasagne.layers.set_all_param_values(cnn.l_out, parms) # rom = b'D:\\_code\\breakout.bin' rom = b'D:\\_code\\space_invaders.bin' ale = ALEInterface(True) ale.loadROM(rom) (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getMinimalActionSet() # get labels labels = [ 'noop', 'fire', 'up', 'right', 'left', 'down', 'upright', 'upleft', 'downright', 'downleft', 'upfire', 'rightfire', 'leftfire', 'downfire', 'uprightfire', 'upleftfire', 'downrightfire', 'downleftfire' ] labels = np.asarray(labels)[legal_actions] # set up vars actionHandler = ActionHandler(ActionPolicy.eGreedy, (.1, .1, 2), legal_actions) rewList = list() for ep in range(100): total_reward = 0.0 trainCount = 0 ale.reset_game() while not ale.game_over(): # get frames frames = list() reward = 0 for frame in range(skipFrame): gamescreen = ale.getScreenRGB() processedImg = np.asarray(gamescreen.view(np.uint8).reshape( screen_height, screen_width, 4)[25:-12, :, 0], dtype=np.float32) processedImg[processedImg > 1] = 255 processedImg = imresize(processedImg, 0.5, interp='nearest') / 255 frames.append(processedImg) performedAction, actionInd = actionHandler.getLastAction() rew = ale.act(performedAction) if rew > 0: rew = 1 reward += rew total_reward += reward frames = np.asarray(frames, dtype=np.float32) actionVect = cnn.get_output( frames.reshape((1, skipFrame, frames.shape[1], 80)))[0] actionHandler.setAction(actionVect) # hid1_act = cnn.get_hid1_act(frames.reshape((1, skip_frame, frames.shape[1], 80))) # hid2_act = cnn.get_hid2_act(frames.reshape((1, skip_frame, frames.shape[1], 80))) # for x in range(hid1_act.shape[1]): # plt.subplot(4,4,x+1) # plt.imshow(hid1_act[0,x], cmap=plt.cm.gray) # for x in range(hid2_act.shape[1]): # plt.subplot(6,6,x+1) # plt.imshow(hid2_act[0,x], cmap=plt.cm.gray) # plt.show() # plt.clf() # plt.plot(actionVect) # plt.xticks(range(len(labels)), labels) # plt.pause(0.001) rewList.append(total_reward) print(ep, total_reward) print(np.mean(rewList), np.std(rewList), np.max(rewList), np.min(rewList)) print(np.unique(rewList, return_counts=True)) plt.plot(rewList) plt.show()