observation = env.reset() for t in range(1000): #env.render() ##First run through of the network. Take the observation, put it through the network action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) ######RECORDS AND SAVES THE STATE##### agent.store_rewards(reward) ####################################### score += reward agent.learn(observation, reward, observation_, done) observation = observation_ if done: score_history.append(score) print("==========================================") print("Episode: ", i_episode) print("Reward: ", score) break if i_episode % 100 == 0: agent.save_model() print("Model Saved") plotLearning(score_history, filename="cartpole.png", window=10) env.close()
layer1_size=400, layer2_size=300, n_actions=2) #agent.load_models() np.random.seed(0) score_history = [] for i in range(1000): obs = env.reset() done = False score = 0 while not done: act = agent.choose_action(obs) new_state, reward, done, info = env.step(act) agent.remember(obs, act, reward, new_state, int(done)) agent.learn() score += reward obs = new_state #env.render() score_history.append(score) if i % 25 == 0: agent.save_models() print('episode ', i, 'score %.2f' % score, 'trailing 100 games avg %.3f' % np.mean(score_history[-100:])) filename = 'LunarLander-alpha000025-beta00025-400-300.png' plotLearning(score_history, filename, window=100)
input_dims=[8], lr=0.001) scores, eps_history = [], [] n_games = 500 for i in range(n_games): score = 0 done = False observation = env.reset() while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.store_transition(observation, action, reward, observation_, done) agent.learn() observation = observation_ #env.render() scores.append(score) eps_history.append(agent.epsilon) avg_score = np.mean(scores[-100:]) print('episode ', i, 'score %.2f' % score, 'average score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon) x = [i + 1 for i in range(n_games)] filename = 'lunar_lander_dqn_2.png' plotLearning(x, scores, eps_history, filename)
tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=1) np.random.seed(0) score_history = [] num_epsiodes = 2500 for i in range(num_epsiodes): obs = env.reset() done = False score = 0 while not done: action = agent.choose_action(obs) obs_, reward, done, info = env.step(action) agent.remember(obs, action, reward, obs_, done) agent.learn() score += reward obs = obs_ env.render() score_history.append(score) print('episode ', i, 'score %.2f' % score, '100 game average %.2f' % np.mean(score_history[-100:])) filename = 'pendulum.png' x = [i for i in range(num_epsiodes)] plotLearning(x, score_history, score_history, filename)
if done: stepIdx = 0 if currIt + 1 < iterationNum: env.reset() break throughput_history.append(throughput_sum) rtt_history.append(rtt_sum) score_history.append(score) print('episode: ', i, 'score: %.2f' % score) except KeyboardInterrupt: print("Ctrl-C -> Exit") finally: env.close() filename = 'TCP_A2C_20_step.png' plotLearning(score_history, filename=filename, window=10) with open('reward_20_step.csv', mode='w') as reward_file: csv_writer = csv.writer(reward_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_writer.writerow(score_history) throughput_plot_filename = 'throughput_plot_20_step.png' plotLearning(throughput_history, filename=throughput_plot_filename, window=10) rtt_plot_filename = 'rtt_plot_20_step.png' plotLearning(rtt_history, filename=rtt_plot_filename, window=10)
def main(alpha, beta, gamma): sim_param = SimParam() no_of_slices = sim_param.no_of_slices no_of_users_per_slice = sim_param.no_of_users_per_slice no_of_rb = len(sim_param.RB_pool) no_of_timeslots = int(sim_param.T_C / sim_param.T_S) # state space : #n_states = no_of_slices * no_of_users_per_slice * no_of_rb * no_of_timeslots n_states = no_of_slices # action space : #_slices ^ #_rb n_actions = no_of_slices ** no_of_rb agent = Agent(alpha=alpha, beta=beta, input_dims=[n_states], gamma=gamma, n_actions=n_actions, layer1_size=32, layer2_size=32) env = gym.make('ransim-v1') # run baseline algorithm baseline_score = 0 done = False observation = env.reset() while not done: action = 'baseline' observation_, reward, done, info = env.step(action) if done: env.plot() observation = observation_ baseline_score += reward print('baseline score: %.3f' % baseline_score) score_history = [] score = 0 num_episodes = 250 t0 = time.time() for i in range(num_episodes): t_tmp = time.time() done = False score = 0 # insert parameters class Parameters: pass parameters = Parameters() parameters.SEED_IAT = 0 parameters.SEED_SHADOWING = 0 if (i%50 ==0): NO_logging = 0 else: NO_logging = 1 observation = env.reset(parameters, NO_logging) while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) if done & (i%50 ==0): env.plot() agent.learn(observation, reward, observation_, done) observation = observation_ score += reward#.sum() score_history.append(score) elapsed_time = time.time() - t_tmp print('episode: ', i,'score: %.3f time: %d' % (score,elapsed_time)) print(time.time()-t0) filename = 'results/result_alpha_%.4f_beta_%.4f_gamma_%.2f.png' %(alpha, beta, gamma) #filename = 'cartpole-discrete-actor-critic-alpha0001-beta0005-32x32fc-1500games.png' plotLearning(score_history, filename=filename, window=10)
epsHistory.append(brain.EPSILON) done = False observation = env.reset() frames = [np.sum(observation[15:200, 30:125], axis=2)] score = 0 lastAction = 0 while not done: if len(frames) == 3: action = brain.chooseAction(frames) frames = [] else: action = lastAction observation_, reward, done, info = env.step(action) score += reward frames.append(np.sum(observation_[15:200, 30:125], axis=2)) if done and info['ale.lives'] == 0: reward = -100 brain.storeTransition( np.mean(observation[15:200, 30:125], axis=2), action, reward, np.mean(observation_[15:200, 30:125], axis=2)) observation = observation_ brain.learn(batch_size) lastAction = action #env.render( scores.append(score) print('score:', score) x = [i + 1 for i in range(numGames)] fileName = str(numGames) + 'Games' + 'Gamma' + str(brain.GAMMA) + \ 'Alpha' + str(brain.ALPHA) + 'Memory' + str(brain.memSize)+ '.png' plotLearning(x, scores, epsHistory, fileName)