def test(): pygame.init() agent = DQNAgent(output_dim=3) agent.model.load_weights('weights.hdf5') # while counter_games < 150: # Initialize classes game = Game(440, 440) player1 = game.player field0 = game.field # Perform first move record = 0 initialize_game(player1, game, field0, agent) if display_option: display(player1, field0, game, record) while not game.crash: # get old state state_old = agent.get_state(game, player1, field0) # predict action based on the old state prediction = agent.model.predict(state_old) final_move = np.argmax(prediction[0]) print("move {} with prediction : {}".format(final_move, prediction)) # perform new move and get new state player1.do_move(final_move, field0, game) record = get_record(game.score, record) if display_option: pygame.time.wait(speed) display(player1, field0, game, record)
def train(display_on, speed, params): pygame.init() pygame.font.init() agent = DQNAgent(params) counter_games = 0 high_score = 0 score_plot = [] counter_plot = [] while counter_games < params['episodes']: game = Game(440, 440, high_score) if display_on: game.update_display() while not game.crash: if handle_game_event(game): return # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) state = game.get_state() move = agent.get_move(state) game.do_move(move) new_state = game.get_state() reward = get_reward(game) # train short memory base on the new action and state agent.train_short_memory(state, move, reward, new_state, game.crash) agent.remember(state, move, reward, new_state, game.crash) if display_on: game.update_display() pygame.time.wait(speed) counter_games += 1 print(f'Game {counter_games} Score: {game.score}') high_score = game.high_score score_plot.append(game.score) counter_plot.append(counter_games) agent.replay_memory(params['batch_size']) agent.model.save_weights(params['weights_path']) pygame.quit() plot_seaborn(counter_plot, score_plot)
def script(): agents = [BetterRandomAgent(), BetterPathAgent(), DQNAgent()] field_sizes = [8, 20] n_games = 1000 for size in field_sizes: for agent in agents: run_n_logs(agent, n_games, size)
def __init__(self, env_name='harvest', num_agents=1): self.env_name = env_name if env_name == 'harvest': print('Initializing Harvest environment') self.env = HarvestEnv(ascii_map=HARVEST_MAP_CPR, num_agents=num_agents, render=True) elif env_name == 'cleanup': print('Initializing Cleanup environment') self.env = CleanupEnv(num_agents=num_agents, render=True) else: print('Error! Not a valid environment type') return self.num_agents = num_agents self.agent_policies = [] self.agents = list(self.env.agents.values()) # print(agents[0].action_space) self.action_dim = self.agents[0].action_space.n for _ in range(num_agents): # TODO right now only using 1 frame, update later to look back x (e.g. 4) frames. Later RNN/LSTM neural_net = ConvFC( conv_in_channels= 3, # harvest specific input is 15x15x3 (HARVEST_VIEW_SIZE = 7) conv_out_channels=3, input_size=15, hidden_size=64, output_size=self.action_dim) self.agent_policies.append( DQNAgent(0, self.action_dim - 1, neural_net)) self.env.reset()
def __init__(self): self.amountOfSimulations = 0 self.maxSpeed = 4 self.score = 0 self.previousScore = 0 self.highscore = 0 self.highscoreTime = 0 #self.delayCounter = 0 #self.delay = randint(5,10) self.keepRunning = True self.agent = DQNAgent() self.mysystem = chrono.ChSystemNSC() self.ground = theBattleground.theBattleground(self.mysystem) self.createRobot(self.mysystem) self.createApplication() self.run()
def train(epoch=10): pygame.init() agent = DQNAgent(output_dim=5) counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < epoch: # Initialize classes game = Game(440, 440) player1 = game.player1 player2 = game.player2 field0 = game.field # Perform first move initialize_game(player1, player2, game, field0, agent) if display_option: display(player1, player2, field0, game, record) game_epoch = 0 while not game.crash: #agent.epsilon is set to give randomness to actions agent.epsilon = 50 - game_epoch train_each_epoch(agent, game, field0, player1, [player2], game_epoch) train_each_epoch(agent, game, field0, player2, [player1], game_epoch) record = get_record(game.player1.score, game.player2.score, record) if display_option: display(player1, player2, field0, game, record) pygame.time.wait(speed) game_epoch += 1 game.crash = not (game.player1.display or game.player2.display) agent.replay_new(agent.memory) counter_games += 1 print('Game', counter_games, ' Score:', game.player1.score, game.player2.score) score_plot.append(game.player1.score) counter_plot.append(counter_games) print(counter_plot) agent.model.save_weights('weights_multi.hdf5') plot_seaborn(counter_plot, score_plot)
def test(): global dqn_save, checkFirst, dqn_agent if checkFirst: dqn_agent = DQNAgent(num_state, num_action, num_hidden_node) checkFirst = False else: dqn_agent = dqn_save dqn_save = dqn_agent return dqn_agent
def __init__(self, env_name, state_size, frame_size, agent="DQN", render=True, train=True, load_path="checkpoint.pth"): self.env_name = env_name self.frame_size = frame_size self.render = render self.train = train self.env = gym.make(self.env_name) if (agent == "DQN"): self.agent = DQNAgent(state_size, frame_size, self.env.action_space.n, load_path) else: raise Exception("Agent not Found") self.initialized = False self.epoch = 0
def play(display_on, speed, params): pygame.init() pygame.font.init() agent = DQNAgent(params) agent.epsilon = 0 counter_games = 0 high_score = 0 score_plot = [] counter_plot = [] while counter_games < params['episodes']: game = Game(440, 440, high_score) if display_on: game.update_display() while not game.crash: if handle_game_event(game): return state = game.get_state() move = agent.get_move(state) game.do_move(move) if display_on: game.update_display() pygame.time.wait(speed) counter_games += 1 print(f'Game {counter_games} Score: {game.score}') high_score = game.high_score score_plot.append(game.score) counter_plot.append(counter_games) pygame.quit() plot_seaborn(counter_plot, score_plot)
def __init__(self): self.rows = 0 self.cols = 0 self.path_for_my_units = None self.units_points=np.zeros(shape=(9,),dtype=int) self.epsilon=0 self.agent=DQNAgent() self.picker=Picker() self.state_old=np.zeros(121) self.new_state=[] self.picked=np.zeros(9,dtype=int) self.prediction=[[]]
class QLearningBehaviour(Behaviour): def __init__(self,visualRange=3): self.agent = DQNAgent(visualRange) self.age = 0 self.current_move = None self.current_input = None #def on_init(self,visualRange): def decide(self, input): self.current_input = input final_move = None epsilon = 150 - self.age if epsilon < 15: epsilon = 15 if randint(0, 100) > epsilon: final_move = randint(0,5) else: #get old state #state_old = agent.get_state(game, player1, food1) state_old = np.asarray(input) prediction = self.agent.model.predict(input.reshape((1,-1))) #final_move = to_categorical(np.argmax(prediction[0]), num_classes=5) final_move = np.argmax(prediction[0]) self.current_move = final_move self.age += 1 print(final_move) return final_move #perform new move and get new state #self.do_move(final_move, self.x,self.y,agent) #state_new = agent.get_state(game, player1, food1) def feedback(self, reward, state): #set treward for the new state #reward = agent.set_reward(input, move,reward) state_new = np.asarray(state) #train short memory base on the new action and state state_old = self.current_input # final_move = to_categorical(self.current_move, num_classes=6) self.agent.train_short_memory(state_old, final_move, reward, state_new) # store the new data into a long term memory self.agent.remember(state_old, final_move, reward, state_new) #record = get_record(game.score, record) #if display_option: # display(player1, food1, game, record) # pygame.time.wait(speed) self.agent.replay_new(self.agent.memory) #???
def train(): env = gym.make('CartPole-v0') agent = DQNAgent(env=env) num_episodes = 200 for i_episode in range(num_episodes): state = env.reset() total_reward = 0 while True: action = agent.get_action(state) next_state, reward, done, info = env.step(action) total_reward += reward update_array = [state, action, reward, next_state, done] agent.update(update_array) state = next_state if done: print("Episode ", i_episode, ": ", total_reward, " epsilon: ", agent.epsilon) break agent.save('myClassModel') env.close()
def play(display_on, speed, params): pygame.init() pygame.font.init() agent = DQNAgent(params) counter_games = 0 high_score = 0; #score_plot = [] #counter_plot = [] while counter_games < params['episodes']: game = Game(440, 440, high_score) if display_on: game.update_display() while not game.crash: if handle_game_event(game): return state = game.get_state() prediction = agent.model.predict(state.reshape((1,11))) move = to_categorical(np.argmax(prediction[0]), num_classes=3) game.do_move(move) if display_on: game.update_display() pygame.time.wait(speed) counter_games += 1 print(f'Game {counter_games} Score: {game.score}') high_score = game.high_score pygame.quit()
def train(epoch=10): pygame.init() agent = DQNAgent(output_dim=3) counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < epoch: # Initialize classes game = Game(440, 440) player1 = game.player field0 = game.field # Perform first move initialize_game(player1, game, field0, agent) if display_option: display(player1, field0, game, record) game_epoch = 0 while not game.crash: #agent.epsilon is set to give randomness to actions agent.epsilon = 50 - game_epoch #get old state state_old = agent.get_state(game, player1, field0) #perform random actions based on agent.epsilon, or choose the action if randint(0, 100) < agent.epsilon: final_move = randint(0, 2) # print("random with prob {}".format(agent.epsilon)) else: # predict action based on the old state prediction = agent.model.predict(state_old) final_move = np.argmax(prediction[0]) print("prediction : {}".format(prediction)) # print("move: {} to position ({}, {})".format(final_move, player1.x, player1.y)) #perform new move and get new state player1.do_move(final_move, field0, game) if game_epoch >= 19: # get new state state_new = agent.get_state(game, player1, field0) #set treward for the new state reward = agent.set_reward(player1, game.crash, final_move) #train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory if_remember = False if game.crash: agent.remember(state_old, final_move, reward, state_new, game.crash) if_remember = True # print("remember this move with reward {}".format(reward)) elif final_move == 0 and randint(1, 20) < 20: agent.remember(state_old, final_move, reward, state_new, game.crash) if_remember = True # print("remember this move with reward {}".format(reward)) elif final_move != 0 and randint(1, 20) < 20: agent.remember(state_old, final_move, reward, state_new, game.crash) if_remember = True # print("remember this move with reward {}".format(reward)) print( "actual move {} to ({}, {}) gets reward {} - remember {}". format(final_move, player1.x, player1.y, reward, if_remember)) # explore other move if final_move == 0: # no # 1 left explore_moves(game, field0, agent, player1, state_old, 1, max(0, player1.x - 1), player1.y) # 2 right explore_moves(game, field0, agent, player1, state_old, 2, min(player1.x + 1, 21), player1.y) elif final_move == 1: # left # 0 no explore_moves(game, field0, agent, player1, state_old, 0, min(player1.x + 1, 21), player1.y) # 2 right explore_moves(game, field0, agent, player1, state_old, 2, min(player1.x + 2, 21), player1.y) elif final_move == 2: # right # 0 no explore_moves(game, field0, agent, player1, state_old, 0, max(0, player1.x - 1), player1.y) # 2 right explore_moves(game, field0, agent, player1, state_old, 1, max(0, player1.x - 2), player1.y) record = get_record(game.score, record) if display_option: display(player1, field0, game, record) pygame.time.wait(speed) game_epoch += 1 agent.replay_new(agent.memory) counter_games += 1 print('Game', counter_games, ' Score:', game.score) score_plot.append(game.score) counter_plot.append(counter_games) if game.score >= record: agent.model.save_weights(modelFile + '/weights.hdf5') agent.model.save_weights(modelFile + '/weightsFinal.hdf5') plot_seaborn(counter_plot, score_plot)
def run(params): """ Run the DQN algorithm, based on the parameters previously set. """ pygame.init() agent = DQNAgent(params) agent = agent.to(DEVICE) agent.optimizer = optim.Adam(agent.parameters(), weight_decay=0, lr=params['learning_rate']) counter_games = 0 score_plot = [] counter_plot = [] record = 0 total_score = 0 while counter_games < params['episodes']: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent, params['batch_size']) if params['display']: display(player1, food1, game, record) while not game.crash: if not params['train']: agent.epsilon = 0.01 else: # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) # get old state state_old = agent.get_state(game, player1, food1) # perform random actions based on agent.epsilon, or choose the action if random.uniform(0, 1) < agent.epsilon: final_move = np.eye(3)[randint(0, 2)] else: # predict action based on the old state with torch.no_grad(): state_old_tensor = torch.tensor( state_old.reshape( (1, 11)), dtype=torch.float32).to(DEVICE) prediction = agent(state_old_tensor) final_move = np.eye(3)[np.argmax( prediction.detach().cpu().numpy()[0])] # perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) # set reward for the new state reward = agent.set_reward(player1, game.crash) if params['train']: # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if params['display']: display(player1, food1, game, record) pygame.time.wait(params['speed']) if params['train']: agent.replay_new(agent.memory, params['batch_size']) counter_games += 1 total_score += game.score print(f'Game {counter_games} Score: {game.score}') score_plot.append(game.score) counter_plot.append(counter_games) mean, stdev = get_mean_stdev(score_plot) if params['train']: model_weights = agent.state_dict() torch.save(model_weights, params["weights_path"]) if params['plot_score']: plot_seaborn(counter_plot, score_plot, params['train']) return total_score, mean, stdev
import numpy as np import gym from DQN import DQNAgent from utils import plot_learning env = gym.make('LunarLander-v2') lr = 0.001 n_games = 500 agent = DQNAgent(gamma=0.99, epsilon=1.0, lr=lr, input_dims=env.observation_space.shape, n_actions=env.action_space.n, mem_size=1000000, batch_size=64, epsilon_end=0.01) scores = [] eps_history = [] for i in range(n_games): done = False score = 0 observation = env.reset() while not done: # env.render() action = agent.choose_action(observation) observation_, reward, done, _ = env.step(action) score += reward agent.store_transition(observation, action, reward, observation_, done) observation = observation_
con_3 = Convolution1D(32, 3)(con_2) con_4 = Convolution1D(32, 3)(con_3) flt_1 = Flatten()(con_4) den_1 = Dense(32, activation='sigmoid')(flt_1) den_2 = Dense(32, activation='sigmoid')(den_1) den_3 = Dense(16, activation='sigmoid')(den_2) den_4 = Dense(16, activation='sigmoid')(den_3) predictions = Dense(8, activation="linear")(den_4) model = Model(inputs=inputs, outputs=predictions) model.compile(loss='mean_squared_error', optimizer='sgd') input_test = list() for i in range(0, 10): input_test.append([i for i in range(0, 85)]) print(model.predict(np.array(input_test).reshape(1, 85, -1))) trainer = Trainer(model) pool = ThreadPoolExecutor(max_workers=255) for i in range(0, 255): pool.submit( DQNAgent(20, reward, model=trainer).run_bot_join) DQNAgent(20, reward, model=trainer).run_bot_join() pool.shutdown(wait=True) print("Shut down!") model.save('trained_model.h5')
def run(display_option, speed, params): pygame.init() agent = DQNAgent(params) weights_filepath = params['weights_path'] if params['load_weights']: agent.model.load_weights(weights_filepath) print("weights loaded") counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < params['episodes']: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent, params['batch_size']) if display_option: display(player1, food1, game, record) while not game.crash: if not params['train']: agent.epsilon = 0 else: # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) # get old state state_old = agent.get_state(game, player1, food1) # perform random actions based on agent.epsilon, or choose the action if randint(0, 1) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, 11))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) # perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) # set reward for the new state reward = agent.set_reward(player1, game.crash) if params['train']: # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if display_option: display(player1, food1, game, record) pygame.time.wait(speed) if params['train']: agent.replay_new(agent.memory, params['batch_size']) counter_games += 1 print(f'Game {counter_games} Score: {game.score}') score_plot.append(game.score) counter_plot.append(counter_games) if params['train']: agent.model.save_weights(params['weights_path']) plot_seaborn(counter_plot, score_plot)
def run(): agent = DQNAgent(size) counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < games: # Initialize classes game = Game(size, size) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent) if display_option: display(player1, food1, game, record) while not game.crash: #agent.epsilon is set to give randomness to actions agent.epsilon = (games * 0.4) - counter_games #get old state state_old = agent.get_state(game, player1, food1) #perform random actions based on agent.epsilon, or choose the action if randint(0, games) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, agent.size))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) #perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) #set reward for the new state reward = agent.set_reward(player1, game.crash) #train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if display_option: display(player1, food1, game, record) pygame.time.wait(speed) agent.replay_new(agent.memory) counter_games += 1 score_plot.append(game.score) counter_plot.append(counter_games) print('Game', counter_games, ' Score:', game.score, 'Last 10 Avg:', np.mean(score_plot[-10:])) agent.model.save_weights('weights.hdf5') plot_seaborn(counter_plot, score_plot)
# ################################################# # Start timer start = timeit.default_timer() if DDQN is True: # Name of weights + Initialize DDQN class + run training + evaluate name_of_weights_DDQN = 'weights_DDQN.h5' name_of_plot = 'train_plot_DDQN' pre_agent = DDQNAgent() aft_agent = run(pre_agent, name_of_weights_DDQN, name_of_plot) evaluate_network(aft_agent, name_of_weights_DDQN) else: # Name of weights + Initialize DQN target class + run training + evaluatn if target: name_of_weights_DQN = 'weights_DQN_target.h5' name_of_plot = 'train_plot_DQN_target' else: name_of_weights_DQN = 'weights_DQN_not_target_2.h5' name_of_plot = 'train_plot_DQN_not_target_2' pre_agent = DQNAgent(target) aft_agent = run(pre_agent, name_of_weights_DQN, name_of_plot) evaluate_network(aft_agent, name_of_weights_DQN) stop = timeit.default_timer() print('Total run time: %i min' % ((stop - start)/60))
diff = target_q_values - q_values self.memory.update_priorities(indices, diff.detach().sequeeze().abs().cpu().numpy().tolist()) return loss if __name__ == "__main__": log_dir = "./dqn-p/1" if not osp.exists(log_dir): os.makedirs(log_dir) env_id = "Pong-v0" env = gym.make(env_id) # env = gym.wrappers.Monitor(env, osp.join(log_dir,"record")) env = WrapPytorch(env) agent = DQNAgent(env, log_dir=log_dir) # agent.load(log_dir) episode_rewards = [] ep = 0 obs = env.reset() episode_reward = 0 for frame in range(Config.MAX_FRAMES): # print("frame", frame) # env.render() epsilon = Config.epsilon_by_frame(frame) action = agent.get_action(obs, epsilon) prev_obs = obs obs, reward, done, _ = env.step(action) episode_reward += reward agent.update(prev_obs, action, reward, obs, frame)
attacks["spsa"] = { "epsilon": 0.05, "delta": 0.005, "num_steps": 5, "spsa_iters": 5, "spsa_samples": 2, "y_target": 0, "is_targeted": True } #, "early_stop_loss_threshold": -1.} tf.reset_default_graph() sess = tf.Session() env = gym.make('PongNoFrameskip-v4') statsFolder = "../experiments/pong/" dqn = DQNAgent(env, sess, "../ckpts/dqn/pong_final/dqn_final.ckpt") attackModel = AttackModel(dqn) stats = testAttacks(dqn, attackModel, attacks, gamesNum, attackProbs, actionProbThr, attDetThr, statsFolder) #adv training test tf.reset_default_graph() sess = tf.Session() env = gym.make('PongNoFrameskip-v4') statsFolder = "../experiments/pong_adv_training_0.015/" dqn = DQNAgent(env, sess, "../ckpts/dqn/pong_adv_training/0.015/dqn_final.ckpt") attackModel = AttackModel(dqn)
def run_game(): env = Tetris() episodes = 2000 max_steps = None discount = 0.98 replay_mem_size = 20000 minibatch_size = 512 epsilon = 1 epsilon_min = 0 epsilon_stop_episode = 1500 learning_rate = 5e-3 epochs = 1 show_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 hidden_dims = [64, 64] activations = ['relu', 'relu', 'linear'] agent = DQNAgent(env.get_state_size(), discount=discount, \ replay_mem_size=replay_mem_size, \ minibatch_size=minibatch_size, epsilon=epsilon, \ # epsilon_decay=epsilon_decay, \ epsilon_min=epsilon_min, \ epsilon_stop_episode=epsilon_stop_episode, \ learning_rate=learning_rate, hidden_dims=hidden_dims, \ activations=activations, \ replay_start_size=replay_start_size) log_dir = f'log/tetris-{datetime.now().strftime("%Y%m%d-%H%M%S")}-nn={str(hidden_dims)}-mem={replay_mem_size}-bs={minibatch_size}-discount={discount}' log = ModifiedTensorBoard(log_dir=log_dir) scores = [] for episode in tqdm(range(episodes)): current_state = env.reset_game() done = False step = 0 log.step = episode if show_every and episode % show_every == 0: show = True else: show = False # Run the game until either game over or we've hit max number of steps while not done and (not max_steps or step < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None # action is (x,i), state is [lines_cleared, holes, total_bumpiness, sum_height] for action, state in next_states.items(): if state == best_state: best_action = action break # reward is the score, done is gameover status reward, done = env.play_game(best_action[0], best_action[1], show=show) if show: env.show() agent.update_replay_memory(current_state, best_action, next_states[best_action], reward, done) # move to next timestep current_state = next_states[best_action] step += 1 if show: # After game is completed, collect the final score print("Episode %d score: %d epsilon: %.2f" % (episode, env.get_game_score(), agent.epsilon)) scores.append(env.get_game_score()) agent.train(epochs=epochs) if log_every and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) log.update_stats(avg_score=avg_score, min_score=min_score, max_score=max_score) if env.get_game_score() >= MIN_SCORE: if not os.path.exists('models/'): os.makedirs('models/') agent.model.save( f'models/eps_{str(episode)}nn_{str(hidden_dims)}__bs{minibatch_size}__score_{env.get_game_score()}__{int(time.time())}.h5' )
def test(): env = gym.make('CartPole-v0') my_test_agent = DQNAgent(env, model='myClassModel') avg_reward, max_reward = my_test_agent.test_agent() print("average reward: ", avg_reward, " maximum reward: ", max_reward)
from flask import Flask, jsonify, request from DQN import DQNAgent import os app = Flask(__name__) num_state = 6 num_action = 2 ### if you want to change the number of node in hidden, you must change it in build_model() in DQN.py and here both. num_hidden_node = [120, 120] dqn_agent = DQNAgent(num_state, num_action, num_hidden_node) @app.route('/model', methods=['GET']) def get_model(): return jsonify(dqn_agent.get_model()) @app.route('/update', methods=['POST']) def update(): dqn_agent.run(request.json) print("finish run") return jsonify(dqn_agent.get_model()) if __name__ == '__main__': app.run(debug=False)
trainingEpisodes = 2 testingEpisodes = 2 frames = 1000 name = "DQN" run = wandb.init(project="test", config={ "trainingEpisodes": trainingEpisodes, "testingEpisodes": testingEpisodes, "frames": frames, "epsilon": epsilon, "deepLayers": deepLayers, "layerSize": layerSize, "layerSizeMult": layerSizeMult, "learningRate": learningRate, "gamma": gamma, "epsilonDecay": epsilonDecay, "epsilonMin": epsilonMin, "batchSize": batchSize, "memory": memory, "name": name, "replay_step_size": replayStepSize, }, name=name, allow_val_change=True) config = wandb.config agent = DQNAgent(env, config) trainDQN(DQNAgent)
target_net = NeurosmashAgent() target_net.load_state_dict(policy_net.state_dict()) target_net.eval() #Init empty Memorys memory = ReplayMemory(max_size=1024) victory_memory = ReplayMemory(max_size=1024) #Init lists R = np.zeros(n_episodes) reward = 0 losses = [] epses = [] #Init DQN agent agent = DQNAgent(target_net, policy_net, memory) if torch.cuda.is_available(): print("Running on GPU") agent.target_net.cuda() agent.policy_net.cuda() torch.cuda.empty_cache() #Catch KeyboardInterrupts and save model #i=-1 # try: # #Reinforcement Loop # #for i in tqdm.trange(n_episodes): # while True: # i += 1 for i in range(n_episodes): info, reward, state = env.reset(
import random import time from DQN import DQNAgent import numpy as np pygame.init() pygame.font.init() pygame.display.set_caption('Snake Game') window_width = 440 window_height = 480 clock = pygame.time.Clock() max_score = 0 num_games = 0 game_speed = 10 epsilon = 1 agent = DQNAgent() class Game: def __init__(self, window_width, window_height): self.window_width = 440 self.window_height = 480 self.screen = pygame.display.set_mode((window_width, window_height)) self.background_image = pygame.image.load("images/background.png") self.score = 0 self.increase_length = False self.game_over = False class Player: def __init__(self, game, x, y, direction=4):
def run(): pygame.init() agent = DQNAgent() counter_games = 0 score_plot = [] counter_plot =[] record = 0 while counter_games < 60: game = Game(width, height) pipeU = game.upper_pipe pipeD = game.lower_pipe player = game.player between_pipes = False while not game.crash: agent.epsilon = 20 - counter_games state_old = agent.get_state(game, player, pipeU, pipeD) if randint(0, 200) < agent.epsilon: final_move = to_categorical(randint(0, 1), num_classes=2) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1,8))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=2) player.Move(game,final_move) move_pipes(pipeU,pipeD,160) check_collision(player,pipeU,pipeD,game) check_score(pipeU,game) between_pipes = check_pipes(pipeU, pipeD, player) reward = agent.set_reward(pipeU, game.crash, between_pipes) state_new = agent.get_state(game, player, pipeU, pipeD) agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) display(game,player,pipeU,pipeD,record) if game.score == 50: game.crash = True; agent.replay_new(agent.memory) counter_games += 1 print('Game', counter_games, ' Score:', game.score) score_plot.append(game.score) counter_plot.append(counter_games) agent.model.save_weights('weights.hdf5') plot_seaborn(counter_plot, score_plot) pygame.quit()
# Exploration settings epsilon = 1 # not a constant, going to be decayed # epsilon = 0.00 EPSILON_DECAY = 0.99975 MIN_EPSILON = 0.001 # For more repetitive results random.seed(1) np.random.seed(1) tf.random.set_seed(1) MIN_REWARD = -200 # For model save MEMORY_FRACTION = 0.20 agent = DQNAgent() env = BlobEnv() # Create models folder if not os.path.isdir('models'): os.makedirs('models') # Iterate over episodes for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'): # Update tensorboard step every episode agent.tensorboard.step = episode # Restarting episode - reset episode reward and step number episode_reward = 0 step = 1