model.add(Dropout(dropout)) model.add(Dense(output_states)) sgd = Adam(lr=0.003, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile(loss='mean_squared_error', optimizer=sgd) return model q_nn = build_network(4, 6, 1, 32, "relu", 0.0) target_nn = build_network(4, 6, 1, 32, "relu", 0.0) target_nn.set_weights(q_nn.get_weights()) replay1 = replay_memory_agent(4, 10000) dqn_controller = deep_q_agent(action_value_model=q_nn, target_model=target_nn, states_len=4, replay_memory=replay1) env = gym.make("CartPole-v0") check_save = -100 # Book keeping avg_reward_episodes = [] # Global time step gt = 0 epsilon = 1.0 for episodes in range(0, 8000): # Initial State state = env.reset() done_1 = False
import matplotlib.pyplot as plt np.random.seed(42) import gym # Make the environemnt env = gym.make("CartPole-v0") # Tensorflow Session sess = tf.Session() # Initalize the neural network ac_function = network(4, 2, sess) target_function = network(4, 2, sess) # Make sure both networks start from the same weight target_function.set_weights(ac_function) # Replay memory replay = replay_memory_agent(4, 10000) # Deep Q learning agent prof_x = deep_q_agent(ac_function, target_function, 4, replay.replay_memory, epsi_greedy) state = env.reset() # book keeping done = False episodes = 0 reward_episode = [] reward_track = [] epsilon = 1.0 while episodes < 8000: # Pick action state = np.asarray(state) state = state.reshape(1, 4) q_values = ac_function.predict_on_batch(state) action = epsi_greedy([0, 1], q_values, epsilon) # implement action state_new, reward, done, _ = env.step(action)