class test2048: def __init__(self, manual_input=True, random=True, steps=10, sleep=0): self.gamegrid = GameGrid(manual_input=manual_input) self.random = random self.steps = steps self.sleep = sleep def run(self, input_value=None): if self.random: self.run_random() def run_random(self): for k in range(self.steps): num = randint(0, 3) event_rn.char = chr(num) self.gamegrid.key_down(event_rn) if game_state(self.gamegrid.matrix) == 'win' \ or game_state(self.gamegrid.matrix) == 'lose': #time.sleep(1) return time.sleep(self.sleep) def get_status(self): return self.gamegrid.matrix def take_step(self, inp): event_rn.char = chr(inp) self.gamegrid.key_down(event_rn)
def __init__(self, numEpochs, numItterations, agentCode): self._numEpochs = numEpochs self._numItterations = numItterations self._agentCode = agentCode self._gamegrid = GameGrid() self._gamegrid.hide() self._trainingRecord = []
def refreshGameGrid(self): agent = self._gamegrid.getAgent() self._gamegrid = GameGrid() self._gamegrid.hide() agent.reset() self._gamegrid.setAgent(agent) self._gamegrid.getAgent().setGameGrid(self._gamegrid)
def dialog1(): game_grid = GameGrid(window) game_grid.tkraise() return username=entry1.get() password = entry2.get() #check if valid r = subprocess.run(['rp_user_validator', username , password]) print (r.returncode) if (r.returncode == 0): box.showinfo('info','Correct Login') else: box.showinfo('info','Username or Password incorrect')
def __init__(self, manual_input=True, random=True, steps=10, sleep=0, log=False): self.gamegrid = GameGrid(manual_input) if log: print('Initial GameGrid') print(self.gamegrid.matrix) self.random = random self.steps = steps self.sleep = sleep self.log = log self.gamegrid.win_status = False self.step = 0 self.old_matrix = [] self.check_value = 0
def main(): gamegrid = GameGrid() gamegrid.hide() agent = DNNAgent(None, waitTime=0, trainDataPickle="ULRD_train_2000_20.pickle") gamegrid.setAgent(agent) with open(('ULRD_trained_model_20_game_layers_64.pickle'), 'wb') as f: pickle.dump(agent, f) print("Train data stored in {}".format(f))
# Do not edit this # game_logic = { 'make_new_game': make_new_game, 'game_status': game_status, 'get_score': get_score, 'get_matrix': get_matrix, 'up': up, 'down': down, 'left': left, 'right': right, 'undo': lambda state: (state, False) } # UNCOMMENT THE FOLLOWING LINE TO START THE GAME (WITHOUT UNDO) gamegrid = GameGrid(game_logic) ################# # Optional Task # ################# ########### # Task 5i # ########### def make_new_record(mat, increment): "Your answer here"
def trainNetwork(s, readout, h_fc1, sess): # define the cost function a = tf.placeholder("float", [None, ACTIONS]) y = tf.placeholder("float", [None]) readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices=1) cost = tf.reduce_mean(tf.square(y - readout_action)) train_step = tf.train.AdamOptimizer(1e-6).minimize(cost) # open up a game state to communicate with emulator game_state = showscreen.game() game_state = GameGrid() # store the previous observations in replay memory D = deque() # printing a_file = open("logs_" + GAME + "/readout.txt", 'w') h_file = open("logs_" + GAME + "/hidden.txt", 'w') # get the first state by doing nothing and preprocess the image to 80x80x4 do_nothing = np.zeros(ACTIONS) do_nothing[0] = 1 x_t, r_0, score, terminal = game_state.frame_step(do_nothing) x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY) # ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY) s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # saving and loading networks saver = tf.train.Saver() sess.run(tf.initialize_all_variables()) checkpoint = tf.train.get_checkpoint_state("saved_networks") if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) else: print("Could not find old network weights") # start training epsilon = INITIAL_EPSILON t = 0 while "flappy bird" != "angry bird": # choose an action epsilon greedily readout_t = readout.eval(feed_dict={s: [s_t]})[0] a_t = np.zeros([ACTIONS]) action_index = 0 if t % FRAME_PER_ACTION == 0: if random.random() <= epsilon: print("----------Random Action----------") action_index = random.randrange(ACTIONS) a_t[action_index] = 1 else: action_index = np.argmax(readout_t) a_t[action_index] = 1 else: a_t[0] = 1 # do nothing # scale down epsilon if epsilon > FINAL_EPSILON and t > OBSERVE: epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE # run the selected action and observe next state and reward x_t1_colored, r_t, score, terminal = game_state.frame_step(a_t, score) x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored, (80, 80)), cv2.COLOR_BGR2GRAY) # ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY) x_t1 = np.reshape(x_t1, (80, 80, 1)) #s_t1 = np.append(x_t1, s_t[:,:,1:], axis = 2) s_t1 = np.append(x_t1, s_t[:, :, :3], axis=2) # store the transition in D D.append((s_t, a_t, r_t, s_t1, terminal)) if len(D) > REPLAY_MEMORY: D.popleft() # only train if done observing if t > OBSERVE: # sample a minibatch to train on minibatch = random.sample(D, BATCH) # get the batch variables s_j_batch = [d[0] for d in minibatch] a_batch = [d[1] for d in minibatch] r_batch = [d[2] for d in minibatch] s_j1_batch = [d[3] for d in minibatch] y_batch = [] readout_j1_batch = readout.eval(feed_dict = {s : s_j1_batch}) for i in range(0, len(minibatch)): terminal = minibatch[i][4] # if terminal, only equals reward if terminal: y_batch.append(r_batch[i]) else: y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i])) # perform gradient step train_step.run(feed_dict = { y : y_batch, a : a_batch, s : s_j_batch} ) # update the old values s_t = s_t1 t += 1 # save progress every 10000 iterations if t % 10000 == 0: saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t) # print info state = "" if t <= OBSERVE: state = "observe" elif t > OBSERVE and t <= OBSERVE + EXPLORE: state = "explore" else: state = "train" print("TIMESTEP", t, "/ STATE", state, \ "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, \ "/ Q_MAX %e" % np.max(readout_t)) # write info to files '''
epoches = [] ddqn_scores = [] ddqn_biggest_tiles = [] epsilon_history = [] # um zu sehen wie gut der score sich verbessert wenn epsilon weniger wird avg_scores = [] #save output #will overwrite anything in the folder -> diffrent directories with diffrent models #env = wrappers.Nobitor(env, 'tmp/lunar-lander', video_callable=lambda episode_id True, force=True) #for i in range(n_games): #TODO: AB hier hat er schon einmal durch also fehler ausbessern das er hier das erste mal macht und nicht schon einmal davor for i in range(n_games): score = 0 gamegrid = GameGrid(agent=ddqn_agent) print('\nScore: ', gamegrid.score, ' Biggest tile: ', gamegrid.biggest_tile, ' Epsilon: ', gamegrid.ddqn_agent.epsilon) ddqn_scores.append(gamegrid.score) ddqn_biggest_tiles.append(gamegrid.biggest_tile) epsilon_history.append(gamegrid.ddqn_agent.epsilon) # Um zu sehen ob der Agent besser wird und dazu lernt printen wir den avg score von den letzten 100 spielen aus avg_score = np.mean(ddqn_scores[max(0, i-100):(i+1)]) avg_scores.append(avg_score) print('\nepisode: ', i, 'score %.2f' %gamegrid.score, 'average score %.2f' %avg_score) print('\n---------\n') if i%5 == 0 and i > 0: print('------------------- hier') #gamegrid.ddqn_agent.save_model()
def __init__(self, manual_input=True, random=True, steps=10, sleep=0): self.gamegrid = GameGrid(manual_input=manual_input) self.random = random self.steps = steps self.sleep = sleep
class Runner: def __init__(self, numEpochs, numItterations, agentCode): self._numEpochs = numEpochs self._numItterations = numItterations self._agentCode = agentCode self._gamegrid = GameGrid() self._gamegrid.hide() self._trainingRecord = [] def createAgent(self, gameSessionFile=None, trainName=None, trainData=None, trainDataPickle=None, existingAgent=None): if (self._agentCode == 0): self._gamegrid.setAgent(RandomAgent(None, waitTime=0)) elif (self._agentCode == 1): self._gamegrid.setAgent(PatternAgentULRD(None, waitTime=0)) elif (self._agentCode == 2): self._gamegrid.setAgent(PatternAgentLURD(None, waitTime=0)) elif (self._agentCode == 3): self._gamegrid.setAgent(ManualAgent(None, waitTime=0)) elif (self._agentCode == 4): self._gamegrid.setAgent( DNNAgent(None, waitTime=0, gameSessionFile=gameSessionFile, trainName=trainName, trainData=trainData, trainDataPickle=trainDataPickle)) self._gamegrid.getAgent().setGameGrid(self._gamegrid) def refreshGameGrid(self): agent = self._gamegrid.getAgent() self._gamegrid = GameGrid() self._gamegrid.hide() agent.reset() self._gamegrid.setAgent(agent) self._gamegrid.getAgent().setGameGrid(self._gamegrid) def runTraining(self): for epochNum in range(0, self._numEpochs): for itterNum in range(0, self._numItterations): self.refreshGameGrid() self._gamegrid.setAgent(self._agent) print("Epoch: ", epochNum, " Iteration: ", itterNum) self._gamegrid.mainloop() # print(gamegrid.matrix) print("Score: ", self._gamegrid.scoreMatrix()) self._agent.setScore(self._gamegrid.scoreMatrix()) # The current code running AI games needs to know the current epochNum for encoding filename (boards, moves, score) = self._agent.getGameRecord() # self._agent.pikPakGame() self._trainingRecord.append( (epochNum, itterNum, boards, moves, score)) return self._trainingRecord
parser.add_argument('--replay_memory_length', default=40960, type=int) args = parser.parse_args() if __name__ == '__main__': policy, target = DQN(4).to(device), DQN(4).to(device) try: policy.load_state_dict(torch.load('my_policy.pt')) target.load_state_dict(torch.load('my_target.pt')) except: print('Exception Raised: Files not found...') rm = ReplayMemory(args.replay_memory_length) optimizer = optim.RMSprop(policy.parameters(), eps=1e-5) try: gamegrid = GameGrid(rm, policy, target, optimizer, args.epsilon, args.min_epsilon, args.eps_decay_rate, args.update_every, args.n_train, args.batch_size, args.gamma) except KeyboardInterrupt: print('\nKeyboard Interrupt!!!') try: print('Saving...') torch.save(policy.state_dict(), 'my_policy.pt') torch.save(target.state_dict(), 'my_target.pt') except Exception as e: print('Error :{}'.format(e))
from agent import Agent from puzzle import GameGrid import sys import numpy as np # normalize input values episodes = 61234 if len(sys.argv) == 2: MODE = sys.argv[1] else: MODE = 'train' environment = GameGrid() bot = Agent(MODE, episodes) # five tup is (state, action, state_after, reward, terminal) for episode in range(episodes): if MODE != "play": if episode % 75 == 0 and episode != 0: bot.target_model.set_weights(bot.model.get_weights()) still_playing = True state_before_action = environment.give_recent_state() step = 0 while still_playing: action = bot.decide_move(state_before_action)
class run2048: def __init__(self, manual_input=True, random=True, steps=10, sleep=0, log=False): self.gamegrid = GameGrid(manual_input) if log: print('Initial GameGrid') print(self.gamegrid.matrix) self.random = random self.steps = steps self.sleep = sleep self.log = log self.gamegrid.win_status = False self.step = 0 self.old_matrix = [] self.check_value = 0 def run(self, input_value=None): self.old_matrix = self.gamegrid.matrix # if game_state(self.gamegrid.matrix) == 'lose': # if self.random and input_value is None: input_value = randint(0, 3) assert input_value in range(4) event_rn.char = chr(input_value) self.take_step(event_rn) if self.log: print(self.gamegrid.matrix) time.sleep(self.sleep) self.step += 1 #def run_random(self): # for k in range(self.steps): # num = randint(0,3) # event_rn.char = chr(num) # self.take_step(event_rn) # if game_state(self.gamegrid.matrix) == 'lose': # return # self.step += 1 # time.sleep(self.sleep) def get_status(self): # Need to figure out 'lose' state from check_matrix return self.gamegrid.matrix, int(self.check_matrix()) def take_step(self, inp): self.gamegrid.key_down(inp) def check_matrix(self): if self.old_matrix == self.gamegrid.matrix: self.check_value += 1 else: self.check_value = 0 return self.check_value
def main(): existingAgent1 = None with open("TrainingPartialCountRunner_100_20_4_5.pickle", 'rb') as f: existingAgent0 = pickle.load(f) with open("ULRD_trained_model_20_game_layers_32_16.pickle", 'rb') as f: existingAgent1 = pickle.load(f) with open("ULRD_trained_model_20_game_layers_64_16.pickle", 'rb') as f: existingAgent2 = pickle.load(f) with open("ULRD_trained_model_20_game_layers_64_16_8.pickle", 'rb') as f: existingAgent3 = pickle.load(f) with open("ULRD_trained_model_20_game_layers_64_32_8.pickle", 'rb') as f: existingAgent4 = pickle.load(f) with open("ULRD_trained_model_20_game_layers_64.pickle", 'rb') as f: existingAgent5 = pickle.load(f) agentDict = { 1: RandomAgent(None, waitTime=0), 2: PatternAgentULRD(None, waitTime=0), 0: existingAgent0, 3: DNNAgent(None, waitTime=0, trainName="ULRD_train.pickle"), 4: existingAgent1, 5: existingAgent2, 6: existingAgent2, 7: existingAgent2, 8: existingAgent2 } agentDescription = { 1: "Random", 2: "Up-Left-Right-Down", 0: "Online learning NN", 3: "DNN Agent", 4: "DNN Agent with layers [32, 16]", 5: "DNN Agent with layers [64, 16]", 6: "DNN Agent with layers [64, 16, 8]", 7: "DNN Agent with layers [64, 32, 8]", 8: "DNN Agent with layers [64]" } agentScoreDict = { 1: [], 2: [], 0: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [] } agentColors = { 1: "b", 2: "r", 0: "#1f004d", 3: "g", 4: "c", 5: "m", 6: "y", 7: "k", 8: "#3CFE6E" } gameIDs = [] for i in range(0, 15): gameIDs.append(i) random.seed(i) for (agentKey, agent) in agentDict.items(): gamegrid = GameGrid() gamegrid.hide() gamegrid.setAgent(agent) agent.setGameGrid(gamegrid) gamegrid.mainloop() agentScoreDict[agentKey].append(sumScoreMatrix(gamegrid.matrix)) print(agentScoreDict[agentKey]) agent.reset() plotTrainingRecord(gameIDs, agentDict, agentDescription, agentScoreDict, agentColors)