Пример #1
0
def combine_model(path_to_dqn='model_cpu'):
    model2 = DQN()
    model2.load_state_dict(torch.load(path_to_dqn))

    model1 = QModel()
    model1.q_function = np.loadtxt('model.csv', delimiter=',')

    new_s = game.GameState()
    start = time.time()
    reward = 0
    speed = 0.0
    t = 0
    image_data, reward, terminate, (x,
                                    y), up, red, _, pedes = new_s.frame_step(0)
    last_screen = get_roi(x, y, up)
    current_screen = get_roi(x, y, up)

    state2 = current_screen - last_screen
    while not terminate:

        state1 = get_state(image_data, x, y, up, red, pedes)

        action = select_action(model1, model2, state1, state2, algorithm=3)

        image_data, r, terminate, (
            x, y), up, red, sp, pedes = new_s.frame_step(action)

        last_screen = current_screen
        current_screen = get_roi(x, y, up)
        state2 = current_screen - last_screen

        reward += r

        speed += sp

        t += 1

    cur = time.time()

    print('The game last for {} frames'.format(t))
    print('The game last for {} second'.format(cur - start))
    print('The total award : {}'.format(reward))
    print('The average speed is : {}'.format(speed / t))
Пример #2
0
cos_ann = True
ann_cyc = 5

schedule = Schedule(t0, t1, e0, e1, decay_fun, cosine_annealing=cos_ann, annealing_cycles=ann_cyc)

# Policy
policy = EpsilonGreedyPolicy(schedule=schedule, value_function=Q)

# Reward Function
reward_fun = rf_info2d_pos

# Action Pre/Post-Processing Action
act_fun = act_disc2cont

# Agent
lr = 1e-4
gamma = 0.99
doubleQ = True # Run doubleQ-DQN sampling from Q_target and bootstraping from Q
rb = False
rb_max_size = 1e6
rb_batch_size = 64
tau = 0.1

agent = DQN(policy, act_fun, Q, Q_target, state_dim, action_dim, gamma, doubleQ, reward_fun=reward_fun,
			replay_buffer=rb, max_buffer_size=rb_max_size, batch_size=rb_batch_size, tau=tau, lr=lr)

# Training
show = False

train_agent(agent, desc, file_name, runs, episodes, time_steps, test_episodes, init_state, init_noise, show=show)
Пример #3
0
import sys
sys.path.append('../src/')
from random_graph import RandGraph
from deep_q_learning import DQN
import matplotlib.pyplot as plt
import pylab

g = RandGraph(graph_type='simple', actors=1000)
h = RandGraph(graph_type='simple', actors=1000)
dqn_g = DQN(g)
dqn_h = DQN(h)
dqn_h.load()

plt.style.use('seaborn-talk')

for i in range(200):

    pylab.clf()

    plt.subplot(121)
    plt.title('Random action')
    rand_act = dqn_g.action_space.sample()
    a1 = dqn_g.action_space.get_nodes(rand_act)
    g.action(a1)
    g.plot()

    plt.subplot(122)
    plt.title('Deep Q Learning')
    state = h.get_loading()
    a2 = dqn_h.predict(state)
    h.action(a2)