def set_environment(state_size, action_size): state_count = int(state_size) action_count_b = int(action_size) global env global agent env = SON_environment(seed=seed) agent = QLearner(seed=seed)
def set_environment(state_size, action_size): state_count = int(state_size) action_count_b = int(action_size) global env global agent env = SON_environment(random_state=seed, state_size=state_count, action_size=action_count_b) agent = QLearner(seed=seed, state_size=state_count, action_size=action_count_b, batch_size=batch_size)
######################################################################################## radio_frame = 20 seeds = np.arange(100).tolist() seeds = [98] start_time = time.time() for seed in seeds: random.seed(seed) np.random.seed(seed) env = radio_environment(seed=seed) agent = QLearner(seed=seed) run_agent_fpa(env) # run_agent_tabular(env) # run_agent_deep(env) # run_agent_optimal(env) ######################################################################################## end_time = time.time() filename = 'figures/timing_M={}.txt'.format(env.M_ULA) file = open(filename, 'w') duration = 1000. * (end_time - start_time) print('Execution time: {:4f} ms.\n'.format(duration)) file.write('Execution time: {:4f} ms.\n'.format(duration))
plt.step(np.arange(len(actions)), actions, color='b', label='Actions') plt.savefig('figures/actions_episode_{}_seed{}.pdf'.format( episode_index, seed), format="pdf") # plt.show(block=True) plt.close(fig) return ######################################################################################## radio_frame = 10 seeds = np.arange(500).astype(int).tolist() #seeds = [0] # for optimal for seed in seeds: print('Now running seed: {}'.format(seed)) random.seed(seed) np.random.seed(seed) env = radio_environment(seed=seed) # run_agent_optimal(env) agent = QLearner(seed=seed) # only for the deep run_agent_deep(env) # K.clear_session() # free up GPU memory del agent ########################################################################################
from DQNLearningAgent import DQNLearningAgent as QLearner seed = 3 # change in Top_File.m also random.seed(seed) np.random.seed(seed) batch_size = 32 state_count = 3 action_count_b = 5 env = SON_environment(random_state=seed, state_size=state_count, action_size=action_count_b) agent = QLearner(seed=seed, state_size=state_count, action_size=action_count_b, batch_size=batch_size) # This is the entry point to the simulation def env_reset_wrapper(): global env global agent state = env.reset() return state def agent_get_exploration_rate_wrapper(): global env global agent return agent.exploration_rate
ax.set_ylabel(r'$Q$') ax_sec.set_ylabel(r'$L$') plt.legend([plot1, plot2], [r'Average $Q$', r'Average loss'], bbox_to_anchor=(0.1, 0.0, 0.80, 1), bbox_transform=fig.transFigure, loc='lower center', ncol=3, mode="expand", borderaxespad=0.) plt.tight_layout() plt.savefig('output.pdf', format='pdf') plt.show() plt.close(fig) seeds = np.arange(1).tolist() for seed in seeds: env = radio_environment(random_state=seed) agent = QLearner(random_state=seed) start_time = time.time() run_agent_q(env) end_time = time.time() print('Simulation took {:.2f} minutes.'.format( (end_time - start_time) / 60.)) ########################################################################################