def run_game(): game = Game() RL = BrainDQN(actions=8) step = 0 for episode in range(500): # initial observation observation = game.reset_map() # print(observation.shape) RL.setInitState(observation) while True: # RL choose action based on observation action = RL.getAction() # for i in range(6): # print(game.states[:,:,i]) # print("action:", action) # RL take action and get next observation and reward observation_, reward, done = game.step(action) # for i in range(6): # print(game.states[:,:,i]) # print("reward:",reward) # print("done:", done) RL.setPerception(observation_, action, reward, done) # break while loop when end of this episode if done: # print("done,reset") observation = game.reset_map() RL.setInitState(observation)
# break while loop when end of this episode if is_done: print("moves:", moves, "score:", score) scores.append(score) break step += 1 import matplotlib.pyplot as plt plt.plot(np.arange(len(scores)), scores) plt.ylabel('Scores') plt.xlabel('training steps') plt.show() if __name__ == "__main__": game = Game() # print(game.init_map()) # print(game.init_map()) RL = DeepQNetwork(n_actions=9, map_w=12, map_h=12, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=300, memory_size=2000, e_greedy_increment=0.2, output_graph=False) train_game() RL.plot_cost() # print(game.init_map())
def main(): game = Game(800, 450, "Week 6 Solution Template") game.run()
# setup the game and players p1 = DeepQLearningAgent(board_size=board_size, buffer_size=buffer_size, gamma=gamma, n_actions=n_actions, use_target_net=use_target_net, epsilon=epsilon, version=version, name='dqn1') p2 = DeepQLearningAgent(board_size=board_size, buffer_size=buffer_size,\ gamma=gamma, n_actions=n_actions, use_target_net=use_target_net, epsilon=epsilon, version=version, name='dqn2') p_random = RandomPlayer(board_size=board_size) g = Game(player1=p1, player2=p2, board_size=board_size) g2 = Game(player1=p1, player2=p_random, board_size=board_size) # check the model architecture print("Model architecture") p1._model.summary() # initializing parameters for DQN reward_type = 'current' sample_actions = False decay = 0.85 epsilon_end = 0.1 n_games_buffer = 300 n_games_train = 10 episodes = 1 * (10**5) log_frequency = 500
def main(): window_size = (1280, 800) game_title = "Week 06: Paddles" g = Game(window_size, game_title) g.run()
def main(): window_size = (1280, 800) game_title = "Week 05: Pong" g = Game(window_size, game_title) g.run()
from game_env import (StateEnv, Game, StateEnvBitBoard, StateConverter, StateEnvBitBoardC) from players import RandomPlayer import numpy as np import time from tqdm import tqdm board_size = 8 # initialize classes p1 = RandomPlayer(board_size=board_size) p2 = RandomPlayer(board_size=board_size) g = Game(player1=p1, player2=p2, board_size=board_size) def convert_boards(s, m=None): """Convert the board state from bitboard to ndarray Parameters ---------- s : list contains black, white bitboards and current player m : int (64 bit), default None bitboard for legal moves Returns ------- s : list contains black, white board arrays and current player m : ndarray legal moves array
def main(): print("entered main()") g = Game((1024, 768), "Week 03") g.run() print("exited main()")
def bin_pool(self, results): results_by_bin = {} # rescale and print which bin for oneresult in results: rms, state, allA, Anumber = oneresult game = Game(self.voc, state) if self.calculus_mode == 'scalar': L, function_number, mytargetnumber, firstder_number, depth, varnumber = game.get_features() else: L, function_number, mytargetnumber, firstder_number, depth, varnumber, dotnumber, normnumber, crossnumber = game.get_features() if Anumber >= self.maxa: bin_a = self.maxa else: bins_for_a = np.linspace(0, self.maxa, num=self.maxa+1) for i in range(len(bins_for_a) -1): if Anumber >= bins_for_a[i] and Anumber < bins_for_a[i + 1]: bin_a = i if L >= self.maxl: bin_l = self.maxl else: bins_for_l = np.linspace(0, self.maxl, num=self.maxl+1) for i in range(len(bins_for_l) - 1): if L >= bins_for_l[i] and L < bins_for_l[i + 1]: bin_l = i if function_number >= self.maxf: bin_f = self.maxf else: bins_for_f = np.linspace(0, self.maxf, num = self.maxf+1) for i in range(len(bins_for_f) - 1): if function_number >= bins_for_f[i] and function_number < bins_for_f[i + 1]: bin_f = i if function_number ==0: #presence ou non de la fonction bin_fzero = 0 else: bin_fzero = 1 if varnumber == 0: # presence ou non de la variale bin_var = 0 else: bin_var = 1 if firstder_number ==0: #et de la first der bin_fone = 0 else: bin_fone = 1 if config.smallgrid : bin_fzero = 0 bin_var = 0 bin_fone = 0 bin_d = 0 bin_for_d = np.linspace(0, config.MAX_DEPTH, num=config.MAX_DEPTH + 2) for i in range(len(bin_for_d) - 1): if depth >= bin_for_d[i] and depth < bin_for_d[i + 1]: bin_d = i if self.calculus_mode == 'vectorial': if dotnumber >= self.maxdot: bin_dot = self.maxdot else: bins_for_dot = np.linspace(0, self.maxdot, num=self.maxdot + 1) for i in range(len(bins_for_dot) - 1): if dotnumber >= bins_for_dot[i] and dotnumber < bins_for_dot[i + 1]: bin_dot = i if normnumber >= self.maxnorm: bin_norm = self.maxnorm else: bins_for_norm = np.linspace(0, self.maxnorm, num=self.maxnorm + 1) for i in range(len(bins_for_norm) - 1): if normnumber >= bins_for_norm[i] and normnumber < bins_for_norm[i + 1]: bin_norm = i if crossnumber >= self.maxcross: bin_cross = self.maxcross else: bins_for_cross = np.linspace(0, self.maxcross, num=self.maxcross + 1) for i in range(len(bins_for_cross) - 1): if crossnumber >= bins_for_cross[i] and crossnumber < bins_for_cross[i + 1]: bin_cross = i if self.calculus_mode =='scalar': if str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var]) not in results_by_bin: if rms <config.minrms: results_by_bin.update({str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var]): [rms, state, allA]}) else: prev_rms = results_by_bin[str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var])][0] if rms < prev_rms: results_by_bin.update({str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var]): [rms, state, allA]}) else: if str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var, bin_dot, bin_norm, bin_cross]) not in results_by_bin: if rms <config.minrms: results_by_bin.update({str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var,bin_dot, bin_norm, bin_cross]): [rms, state, allA]}) else: prev_rms = results_by_bin[str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var,bin_dot, bin_norm, bin_cross])][0] if rms < prev_rms: results_by_bin.update({str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var,bin_dot, bin_norm, bin_cross]): [rms, state, allA]}) return results_by_bin
from game_env import Game import numpy as np game = Game() game.init_map() print(game.map) game.move(0) game.move(0) print(game.map) new_map = game.transform_to_3dim(game.map) print(new_map) new_map = np.reshape(new_map, [6, 6, 4]) print(new_map)
def main(): game = Game(800, 450, "Week 6 Starter Template") game.run()
def main(): g = Game((1280, 800), "Week 03") g.run()
buffer_size = 10000 gamma = 0.99 n_actions = 64 use_target_net = True epsilon = 0.9 version = 'v1' batch_size = 512 supervised = False agent_type = 'DeepQLearningAgent' # setup the game and players p1 = DeepQLearningAgent(board_size=board_size, buffer_size=buffer_size,\ gamma=gamma, n_actions=n_actions, use_target_net=use_target_net,\ epsilon=epsilon, version=version) p2 = RandomPlayer(board_size=board_size) g = Game(player1=p1, player2=p2, board_size=board_size) # check the model architecture print("Model architecture") p1._model.summary() # initializing parameters for DQN reward_type = 'current' sample_actions = False decay = 0.85 epsilon_end = 0.1 n_games_buffer = 300 n_games_train = 30 episodes = 30 * (10**4) log_frequency = 30 * (500) win_list_random = []
def vectorial_delete_one_subtree(self, state): # here i make only crossovers between eqs1 resp. and eqs 2 prev_state = copy.deepcopy(state) game = Game(self.voc, prev_state) ast = game.convert_to_ast() rpn = prev_state.reversepolish #print('entering delete with', game.state.reversepolish, game.state.formulas) # throw away the last '1' (== halt) if exists: if rpn[-1] == 1: array = np.asarray(rpn[:-1]) else: array = np.asarray(rpn) start = 2 # get all topnodes of possible subtrees positions = np.where(array >= start)[0] if positions.size > 0: maxretries = 10 gotone = False count = 0 while gotone is False and count < maxretries: which = np.random.choice(positions) getnonleafnode = which + 1 # get the node operatornode = ast.from_ast_get_node(ast.topnode, getnonleafnode)[0] before_swap_rpn = ast.from_ast_to_rpn(operatornode) bfstate = State(self.voc, before_swap_rpn, self.calculus_mode) bef_game = Game(self.voc, bfstate) _, vec_number, _ = bef_game.from_rpn_to_critical_info() grandparent = operatornode.parent count += 1 if before_swap_rpn[ -1] in self.voc.arity2symbols and grandparent is not None: gotone = True count = 0 for child in grandparent.children: if child == operatornode: index = count count += 1 if gotone == False: return False, prev_state else: #print('le node selectionne est', ast.from_ast_to_rpn(operatornode)) #print('fils gauche', ast.from_ast_to_rpn(operatornode.children[0])) #print('fils droit', ast.from_ast_to_rpn(operatornode.children[1])) #print('ready:', before_swap_rpn, bef_game.state.formulas) vecs = [] for child in operatornode.children: rpnchild = ast.from_ast_to_rpn(child) #print('ici', rpnchild) statechild = State(self.voc, rpnchild, self.calculus_mode) gamechild = Game(self.voc, statechild) _, vec_number, _ = gamechild.from_rpn_to_critical_info() vecs.append(vec_number) #print('then vec numbers', vecs) if vecs == [0, 0]: if random.random() < 0.5: newnode = operatornode.children[0] #print('delete right') else: newnode = operatornode.children[1] #print('delete left') elif vecs == [0, 1]: newnode = operatornode.children[1] #print('delete left') elif vecs == [1, 0]: newnode = operatornode.children[0] #print('delete right') elif vecs == [1, 1] and before_swap_rpn[ -1] != self.voc.dot_number: #exclude dot product if random.random() < 0.5: newnode = operatornode.children[0] #print('delete right') else: newnode = operatornode.children[1] #print('delete left') else: #le cas du doot product return False, prev_state grandparent.children[index] = newnode # get the new reversepolish: newrpn = ast.from_ast_to_rpn(ast.topnode) # else cant delete tree else: return False, prev_state # returns the new states state = State(self.voc, newrpn, self.calculus_mode) #print('finally', state.reversepolish, state.formulas) #game = Game(self.voc, state) #game.from_rpn_to_critical_info() #print('bug?') return True, state
def vectorial_crossover(self, state1, state2): # here i make only crossovers between eqs1 resp. and eqs 2 prev_state1 = copy.deepcopy(state1) prev_state2 = copy.deepcopy(state2) game1 = Game(self.voc, prev_state1) game2 = Game(self.voc, prev_state2) ast1 = game1.convert_to_ast() ast2 = game2.convert_to_ast() rpn1 = prev_state1.reversepolish rpn2 = prev_state2.reversepolish # throw away the last '1' (== halt) if exists: if rpn1[-1] == 1: array1 = np.asarray(rpn1[:-1]) else: array1 = np.asarray(rpn1) if rpn2[-1] == 1: array2 = np.asarray(rpn2[:-1]) else: array2 = np.asarray(rpn2) # topnode has the max absolute label, so you dont want it/ you want only subtrees, hence the [:-1] # subtrees can be scalars == leaves, hence >= 2 start = 2 # + len(self.voc.arity0symbols) # get all topnodes of possible subtrees positions1 = np.where(array1 >= start)[0][:-1] positions2 = np.where(array2 >= start)[0][:-1] if positions1.size > 0 and positions2.size > 0: # choose two which1 = np.random.choice(positions1) which2 = np.random.choice(positions2) getnonleafnode1 = which1 + 1 getnonleafnode2 = which2 + 1 # get the nodes node1 = ast1.from_ast_get_node(ast1.topnode, getnonleafnode1)[0] node2 = ast2.from_ast_get_node(ast2.topnode, getnonleafnode2)[0] before_swap_rpn1 = ast1.from_ast_to_rpn(node1) before_swap_rpn2 = ast2.from_ast_to_rpn(node2) bfstate1 = State(self.voc, before_swap_rpn1, self.calculus_mode) bfstate2 = State(self.voc, before_swap_rpn2, self.calculus_mode) bef_game1 = Game(self.voc, bfstate1) bef_game2 = Game(self.voc, bfstate2) _, vec_number1, _ = bef_game1.from_rpn_to_critical_info() _, vec_number2, _ = bef_game2.from_rpn_to_critical_info() if vec_number1 == vec_number2: # swap parents and children == swap subtrees prev1 = node1.parent c = 0 for child in prev1.children: if child == node1: prev1.children[c] = node2 c += 1 c = 0 prev2 = node2.parent for child in prev2.children: if child == node2: prev2.children[c] = node1 c += 1 # get the new reversepolish: rpn1 = ast1.from_ast_to_rpn(ast1.topnode) rpn2 = ast2.from_ast_to_rpn(ast2.topnode) # but dont crossover at all if the results are eqs longer than maximal_size (see GP_QD) : if len(rpn1) > self.maximal_size or len( rpn2) > self.maximal_size: return False, prev_state1, prev_state2 else: #cant crossover vector and scalar return False, prev_state1, prev_state2 # else cant crossover else: return False, prev_state1, prev_state2 # returns the new states state1 = State(self.voc, rpn1, self.calculus_mode) state2 = State(self.voc, rpn2, self.calculus_mode) if self.usesimplif: state1 = game_env.simplif_eq(self.voc, state1) state2 = game_env.simplif_eq(self.voc, state2) # game1 = Game(self.voc, state1) # game1.simplif_eq() # state1 = game1.state # game2 = Game(self.voc, state2) # game2.simplif_eq() # state2 = game2.state game1 = Game(self.voc, state1) game2 = Game(self.voc, state2) #print('checkcrossovers enter with', game1.state.reversepolish) # print('checkcrossovers end with', game11.state.reversepolish) toreturn = [] # crossover can lead to true zero division thus : if self.voc.infinite_number in state1.reversepolish: toreturn.append(prev_state1) # print('fail') # also, if it returns too many nested functions, i dont want it (sort of parsimony) elif game1.getnumberoffunctions() > config.MAX_DEPTH: toreturn.append(prev_state1) # print('fail') else: toreturn.append(state1) # print('succes') if self.voc.infinite_number in state2.reversepolish: toreturn.append(prev_state2) # print('fail') elif game2.getnumberoffunctions() > config.MAX_DEPTH: toreturn.append(prev_state2) # print('fail') else: toreturn.append(state2) # print('succes') return True, toreturn[0], toreturn[1]