def combine_model(path_to_dqn='model_cpu'): model2 = DQN() model2.load_state_dict(torch.load(path_to_dqn)) model1 = QModel() model1.q_function = np.loadtxt('model.csv', delimiter=',') new_s = game.GameState() start = time.time() reward = 0 speed = 0.0 t = 0 image_data, reward, terminate, (x, y), up, red, _, pedes = new_s.frame_step(0) last_screen = get_roi(x, y, up) current_screen = get_roi(x, y, up) state2 = current_screen - last_screen while not terminate: state1 = get_state(image_data, x, y, up, red, pedes) action = select_action(model1, model2, state1, state2, algorithm=3) image_data, r, terminate, ( x, y), up, red, sp, pedes = new_s.frame_step(action) last_screen = current_screen current_screen = get_roi(x, y, up) state2 = current_screen - last_screen reward += r speed += sp t += 1 cur = time.time() print('The game last for {} frames'.format(t)) print('The game last for {} second'.format(cur - start)) print('The total award : {}'.format(reward)) print('The average speed is : {}'.format(speed / t))
cos_ann = True ann_cyc = 5 schedule = Schedule(t0, t1, e0, e1, decay_fun, cosine_annealing=cos_ann, annealing_cycles=ann_cyc) # Policy policy = EpsilonGreedyPolicy(schedule=schedule, value_function=Q) # Reward Function reward_fun = rf_info2d_pos # Action Pre/Post-Processing Action act_fun = act_disc2cont # Agent lr = 1e-4 gamma = 0.99 doubleQ = True # Run doubleQ-DQN sampling from Q_target and bootstraping from Q rb = False rb_max_size = 1e6 rb_batch_size = 64 tau = 0.1 agent = DQN(policy, act_fun, Q, Q_target, state_dim, action_dim, gamma, doubleQ, reward_fun=reward_fun, replay_buffer=rb, max_buffer_size=rb_max_size, batch_size=rb_batch_size, tau=tau, lr=lr) # Training show = False train_agent(agent, desc, file_name, runs, episodes, time_steps, test_episodes, init_state, init_noise, show=show)
import sys sys.path.append('../src/') from random_graph import RandGraph from deep_q_learning import DQN import matplotlib.pyplot as plt import pylab g = RandGraph(graph_type='simple', actors=1000) h = RandGraph(graph_type='simple', actors=1000) dqn_g = DQN(g) dqn_h = DQN(h) dqn_h.load() plt.style.use('seaborn-talk') for i in range(200): pylab.clf() plt.subplot(121) plt.title('Random action') rand_act = dqn_g.action_space.sample() a1 = dqn_g.action_space.get_nodes(rand_act) g.action(a1) g.plot() plt.subplot(122) plt.title('Deep Q Learning') state = h.get_loading() a2 = dqn_h.predict(state) h.action(a2)