def set_environment(state_size, action_size):
    state_count = int(state_size)
    action_count_b = int(action_size)
    global env
    global agent

    env = SON_environment(seed=seed)
    agent = QLearner(seed=seed)
示例#2
0
def set_environment(state_size, action_size):
    state_count = int(state_size)
    action_count_b = int(action_size)
    global env
    global agent

    env = SON_environment(random_state=seed,
                          state_size=state_count,
                          action_size=action_count_b)
    agent = QLearner(seed=seed,
                     state_size=state_count,
                     action_size=action_count_b,
                     batch_size=batch_size)
########################################################################################

radio_frame = 20
seeds = np.arange(100).tolist()

seeds = [98]

start_time = time.time()

for seed in seeds:

    random.seed(seed)
    np.random.seed(seed)

    env = radio_environment(seed=seed)
    agent = QLearner(seed=seed)

    run_agent_fpa(env)
#    run_agent_tabular(env)
#    run_agent_deep(env)
#    run_agent_optimal(env)

########################################################################################

end_time = time.time()

filename = 'figures/timing_M={}.txt'.format(env.M_ULA)
file = open(filename, 'w')
duration = 1000. * (end_time - start_time)
print('Execution time: {:4f} ms.\n'.format(duration))
file.write('Execution time: {:4f} ms.\n'.format(duration))
示例#4
0
    plt.step(np.arange(len(actions)), actions, color='b', label='Actions')
    plt.savefig('figures/actions_episode_{}_seed{}.pdf'.format(
        episode_index, seed),
                format="pdf")
    #    plt.show(block=True)
    plt.close(fig)
    return


########################################################################################

radio_frame = 10
seeds = np.arange(500).astype(int).tolist()
#seeds = [0] # for optimal

for seed in seeds:
    print('Now running seed: {}'.format(seed))
    random.seed(seed)
    np.random.seed(seed)

    env = radio_environment(seed=seed)

    #    run_agent_optimal(env)

    agent = QLearner(seed=seed)  # only for the deep
    run_agent_deep(env)
    #    K.clear_session() # free up GPU memory
    del agent

########################################################################################
示例#5
0
from DQNLearningAgent import DQNLearningAgent as QLearner

seed = 3  # change in Top_File.m also

random.seed(seed)
np.random.seed(seed)

batch_size = 32
state_count = 3
action_count_b = 5

env = SON_environment(random_state=seed,
                      state_size=state_count,
                      action_size=action_count_b)
agent = QLearner(seed=seed,
                 state_size=state_count,
                 action_size=action_count_b,
                 batch_size=batch_size)


# This is the entry point to the simulation
def env_reset_wrapper():
    global env
    global agent
    state = env.reset()
    return state


def agent_get_exploration_rate_wrapper():
    global env
    global agent
    return agent.exploration_rate
示例#6
0
    ax.set_ylabel(r'$Q$')
    ax_sec.set_ylabel(r'$L$')
    plt.legend([plot1, plot2], [r'Average $Q$', r'Average loss'],
               bbox_to_anchor=(0.1, 0.0, 0.80, 1),
               bbox_transform=fig.transFigure,
               loc='lower center',
               ncol=3,
               mode="expand",
               borderaxespad=0.)

    plt.tight_layout()
    plt.savefig('output.pdf', format='pdf')
    plt.show()
    plt.close(fig)


seeds = np.arange(1).tolist()

for seed in seeds:

    env = radio_environment(random_state=seed)
    agent = QLearner(random_state=seed)
    start_time = time.time()
    run_agent_q(env)
    end_time = time.time()

    print('Simulation took {:.2f} minutes.'.format(
        (end_time - start_time) / 60.))

########################################################################################