from my_dueling_dqn import DuelingDQN import numpy as np import matplotlib.pyplot as plt import tensorflow as tf from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg from matplotlib.figure import Figure import tkinter as tk root = tk.Tk() root.title("matplotlib in TK") f = Figure(figsize=(6, 6), dpi=100) canvas = FigureCanvasTkAgg(f, master=root) canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1) env = freq_env() MEMORY_SIZE = 5000 ACTION_SPACE = 10 sess = tf.Session() with tf.variable_scope('natural'): natural_DQN = DuelingDQN(n_actions=ACTION_SPACE, n_features=160, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, sess=sess, dueling=False) with tf.variable_scope('dueling'): dueling_DQN = DuelingDQN(n_actions=ACTION_SPACE, n_features=160,
def main(): #env = gym.make(ENV_NAME) env = freq_env() agent = Agent(num_actions=env.actions) root = tk.Tk() root.title("matplotlib in TK") f = Figure(figsize=(6, 6), dpi=100) canvas = FigureCanvasTkAgg(f, master=root) canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1) waterfall_figure = f.add_subplot(111) if TRAIN: # Train mode if env.mode == 0: for _ in range(NUM_EPISODES): terminal = False observation = env.waterfall_reset() last_observation = observation state = agent.get_initial_state(last_observation) while not terminal: last_observation = observation action = agent.get_action(state) observation, reward, terminal = env.step( action, last_observation) #waterfall_figure.imshow(observation[0, :, :, 0]) #canvas.draw() processed_observation = preprocess(observation) #print(observation==last_observation) state = agent.run(state, action, reward, terminal, processed_observation) elif env.mode == 1: for _ in range(NUM_EPISODES): terminal = False observation = env.waterfall_reset_combo() last_observation = observation state = agent.get_initial_state(last_observation) while not terminal: last_observation = observation action = agent.get_action(state) observation, reward, terminal = env.step_combo( action, last_observation) #waterfall_figure.imshow(observation[0, :, :, 0]) #canvas.draw() processed_observation = preprocess(observation) #print(observation==last_observation) state = agent.run(state, action, reward, terminal, processed_observation) elif env.mode == 2: for _ in range(NUM_EPISODES): terminal = False observation = env.waterfall_reset_dynamic() last_observation = observation state = agent.get_initial_state(last_observation) while not terminal: last_observation = observation action = agent.get_action(state) observation, reward, terminal = env.step_dynamic( action, last_observation) #waterfall_figure.imshow(observation[0, :, :, 0]) #canvas.draw() processed_observation = preprocess(observation) #print(observation==last_observation) state = agent.run(state, action, reward, terminal, processed_observation) elif env.mode == 3: for _ in range(NUM_EPISODES): terminal = False observation = env.waterfall_reset_intelligent() last_observation = observation state = agent.get_initial_state(last_observation) while not terminal: last_observation = observation action = agent.get_action(state) observation, reward, terminal = env.step_intelligent( action, last_observation) #waterfall_figure.imshow(observation[0, :, :, 0]) #canvas.draw() processed_observation = preprocess(observation) #print(observation==last_observation) state = agent.run(state, action, reward, terminal, processed_observation) else: # Test mode if env.mode == 0: for _ in range(NUM_EPISODES_AT_TEST): terminal = False observation = env.waterfall_reset() """ for _ in range(random.randint(1, NO_OP_STEPS)): last_observation = observation observation, _, _, _ = env.step(0) # Do nothing """ last_observation = observation state = agent.get_initial_state(last_observation) while not terminal: last_observation = observation action = agent.get_action_at_test(state) observation, reward, terminal = env.step( action, last_observation) waterfall_figure.imshow(observation[0, :, :, 0]) canvas.draw() processed_observation = preprocess(observation) state = processed_observation elif env.mode == 1: for _ in range(NUM_EPISODES_AT_TEST): terminal = False observation = env.waterfall_reset_combo() """ for _ in range(random.randint(1, NO_OP_STEPS)): last_observation = observation observation, _, _, _ = env.step(0) # Do nothing """ last_observation = observation state = agent.get_initial_state(last_observation) while not terminal: last_observation = observation action = agent.get_action_at_test(state) observation, reward, terminal = env.step_combo( action, last_observation) waterfall_figure.imshow(observation[0, :, :, 0]) canvas.draw() processed_observation = preprocess(observation) state = processed_observation elif env.mode == 2: for _ in range(NUM_EPISODES_AT_TEST): terminal = False observation = env.waterfall_reset_dynamic() """ for _ in range(random.randint(1, NO_OP_STEPS)): last_observation = observation observation, _, _, _ = env.step(0) # Do nothing """ last_observation = observation state = agent.get_initial_state(last_observation) while not terminal: last_observation = observation action = agent.get_action_at_test(state) observation, reward, terminal = env.step_dynamic( action, last_observation) waterfall_figure.imshow(observation[0, :, :, 0]) canvas.draw() processed_observation = preprocess(observation) state = processed_observation elif env.mode == 3: for _ in range(NUM_EPISODES_AT_TEST): terminal = False observation = env.waterfall_reset_intelligent() """ for _ in range(random.randint(1, NO_OP_STEPS)): last_observation = observation observation, _, _, _ = env.step(0) # Do nothing """ last_observation = observation state = agent.get_initial_state(last_observation) while not terminal: last_observation = observation action = agent.get_action_at_test(state) observation, reward, terminal = env.step_intelligent( action, last_observation) waterfall_figure.imshow(observation[0, :, :, 0]) canvas.draw() processed_observation = preprocess(observation) state = processed_observation