def create_and_train_agent(num_kernels, kernel_size, lr, history_length, batch_size, num_episodes, epsilon, discount_factor, tau, stride=1, model_dir="./models_carracing"): env = gym.make('CarRacing-v0').unwrapped state_dim = env.observation_space.shape state_dim = (state_dim[0], state_dim[1], history_length + 1) # only use straight, left, right, accelerate and brake num_actions = 5 # create Q network Q = CNN(state_dim, num_actions, num_kernels, kernel_size, lr, stride) # create target network Q_target = CNNTargetNetwork(state_dim, num_actions, num_kernels, kernel_size, lr, tau, stride) print("Creating agent now ..") # create dqn_agent dqn_agent = DQNAgent(Q, Q_target, num_actions, discount_factor, batch_size, epsilon) # reload already trained agent for further training # dqn_agent.load('./models_carracing/dqn_agent.ckpt') start_time = time.time() train_online(env, dqn_agent, num_episodes, history_length, model_dir) end_time = time.time() print("Time needed for training:", (end_time - start_time) / 60, "min")
def make_pacman_agent(name, model_path, lr, discount_factor, batch_size, epsilon, epsilon_decay, boltzmann, tau, double_q, buffer_capacity, history_length, diff_history, big=False, save_hypers=False): hypers = locals() num_actions = 5 # save hypers into folder -- used for reconstructing model at test time if save_hypers: with open(os.path.join(model_path, "hypers.json"), "w") as fh: json.dump(hypers, fh) # using -1 for unused parameters. fix later. Q_current = CNN( num_actions=num_actions, lr=lr, history_length=history_length, diff_history=diff_history, big=big) Q_target = CNNTargetNetwork( num_actions=num_actions, lr=lr, tau=tau, history_length=history_length, diff_history=diff_history, big=big) # 2. init DQNAgent (see dqn/dqn_agent.py) agent = DQNAgent( name, Q_current, Q_target, num_actions, discount_factor, batch_size, epsilon, epsilon_decay, boltzmann, double_q, buffer_capacity) return agent
def create_and_train_agent(lr, epsilon, discount_factor, bs, tau, num_episodes, hidden=20): # create environment env = gym.make("CartPole-v0").unwrapped # get state space and number of actions state_dim = 4 # env.observation_space.shape[0] num_actions = 2 # env.action_space.n # create neural networks Q = NeuralNetwork(state_dim=state_dim, num_actions=num_actions, hidden=hidden, lr=lr) Q_target = TargetNetwork(state_dim=state_dim, num_actions=num_actions, hidden=hidden, lr=lr, tau=tau) # create agent agent = DQNAgent(Q, Q_target, num_actions, discount_factor=discount_factor, batch_size=bs, epsilon=epsilon) # train agent train_online(env, agent, num_episodes=num_episodes) # get some final values to compare different networks rewards = [] for i in range(10): stats_det = run_episode(env, agent, deterministic=True, do_training=False) rewards.append(stats_det.episode_reward) return np.mean(rewards)
Q = ConvolutionNeuralNetwork(state_dim=state_dim, num_actions=num_actions, history_length=history_length, hidden=300, lr=3e-4) Q_target = CNNTargetNetwork(state_dim=state_dim, num_actions=num_actions, history_length=history_length, hidden=300, lr=3e-4) agent = DQNAgent(Q, Q_target, num_actions, method=method, discount_factor=0.95, batch_size=64, epsilon=epsilon, epsilon_decay=epsilon_decay, explore_type=explore_type, game=game, tau=tau, epsilon_min=epsilon_min) train_online(env, agent, skip_frames=skip_frames, num_episodes=1200, max_timesteps=1000, history_length=history_length, model_dir="./models_carracing") os.system('python test_carracing.py') """
Q = CNN(state_dim, nr_actions, hidden=300, lr=0.0003, history_length=history_length) Q_target = CNNTargetNetwork(state_dim, nr_actions, hidden=300, lr=0.0003, history_length=history_length) agent = DQNAgent(Q, Q_target, nr_actions, discount_factor=0.95, batch_size=batch_size, epsilon=0.05, epsilon_decay=0.95, epsilon_min=0.05, tau=0.5, game='carracing', exploration="boltzmann", history_length=history_length) train_online(env, agent, nr_episodes, history_length=history_length, skip_frames=skip_frames, model_dir="./models_carracing")
np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # get state space and number of actions state_dim = 4 # env.observation_space.shape[0] num_actions = 2 # env.action_space.n # create neural networks Q = NeuralNetwork(state_dim=state_dim, num_actions=num_actions, hidden=16) Q_target = TargetNetwork(state_dim=state_dim, num_actions=num_actions, hidden=16) # create agent agent = DQNAgent(Q, Q_target, num_actions, discount_factor=0.9) agent.load('./models_cartpole/dqn_agent.ckpt') n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std()
# Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials. env = gym.make("CartPole-v0").unwrapped # TODO: # 1. init Q network and target network (see dqn/networks.py) # 2. init DQNAgent (see dqn/dqn_agent.py) # 3. train DQN agent with train_online(...) nr_states = env.observation_space.shape[0] nr_actions = env.action_space.n batch_size = 32 nr_episodes = 1000 print(nr_states, nr_actions) Q = NeuralNetwork(state_dim=nr_states, num_actions=nr_actions, hidden=20, lr=0.001) Q_target = TargetNetwork(state_dim=nr_states, num_actions=nr_actions, hidden=20, lr=0.001) agent = DQNAgent(Q, Q_target, nr_actions, discount_factor=0.99, batch_size=batch_size, epsilon=0.05) train_online(env, agent, nr_episodes)
import numpy as np np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... # 1. init Q network and target network (see dqn/networks.py) Q = NeuralNetwork(state_dim=4, num_actions=2) Q_target = TargetNetwork(state_dim=4, num_actions=2) # 2. init DQNAgent (see dqn/dqn_agent.py) agent = DQNAgent(Q, Q_target, 2) n_test_episodes = 50 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards
import numpy as np np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... state_dim = env.observation_space.shape[0] num_actions = env.action_space.n Q = NeuralNetwork(state_dim, num_actions) Q_target = TargetNetwork(state_dim, num_actions) agent = DQNAgent(Q, Q_target, num_actions) agent.load(os.path.join("./models_cartpole/", "dqn_agent.ckpt")) n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): print(i) stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std()
if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... num_actions = 2 state_dim = 4 method = "DQL" game = "cartpole" epsilon = 0.5 epsilon_decay = 0.95 explore_type = "epsilon_greedy" Q = NeuralNetwork(state_dim=state_dim, num_actions=num_actions, hidden=200, lr=1e-4) Q_target = TargetNetwork(state_dim=state_dim, num_actions=num_actions, hidden=200, lr=1e-4) agent = DQNAgent(Q, Q_target, num_actions, method=method, discount_factor=0.6, batch_size=64, epsilon=epsilon, epsilon_decay=epsilon_decay, explore_type=explore_type, game=game) agent.load("./models_cartpole/dqn_agent.ckpt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std()
Q = NeuralNetwork(state_dim=state_dim, num_actions=num_actions, hidden=300, lr=1e-3) Q_target = TargetNetwork(state_dim=state_dim, num_actions=num_actions, hidden=300, lr=1e-3) agent = DQNAgent(Q, Q_target, num_actions, game=game, exploration=exploration, discount_factor=0.95, batch_size=32, epsilon=0.5, epsilon_decay=0.99, epsilon_min=0.05) agent.load("models_cartpole/dqn_agent.ckpt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False,
np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... nr_states = env.observation_space.shape[0] nr_actions = env.action_space.n Q = NeuralNetwork(nr_states, nr_actions) Q_target = TargetNetwork(nr_states, nr_actions) agent = DQNAgent(Q, Q_target, nr_actions) agent.load("./models_cartpole/dqn_agent__.ckpt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file
# Load movie dict # db_dict = pickle.load(open(DICT_FILE_PATH, 'rb'), encoding='latin1') db_dict = json.load(open(DICT_FILE_PATH, encoding='utf-8'))[0] # Load goal file # user_goals = pickle.load(open(USER_GOALS_FILE_PATH, 'rb'), encoding='latin1') user_goals = json.load(open(USER_GOALS_FILE_PATH, encoding='utf-8')) # Init. Objects if USE_USERSIM: user = UserSimulator(user_goals, constants, database) else: user = User(constants) emc = ErrorModelController(db_dict, constants) state_tracker = StateTracker(database, constants) dqn_agent = DQNAgent(state_tracker.get_state_size(), constants) def test_run(): """ Runs the loop that tests the agent. Tests the agent on the goal-oriented chatbot task. Only for evaluating a trained agent. Terminates when the episode reaches NUM_EP_TEST. """ print('Testing Started...') episode = 0 while episode < NUM_EP_TEST: episode_reset()
# get state space and number of actions state_dim = env.observation_space.shape state_dim = (state_dim[0], state_dim[1], history_length + 1) num_actions = 5 Q = CNN(state_dim, num_actions, num_kernels, kernel_size, stride=stride) # create target network Q_target = CNNTargetNetwork(state_dim, num_actions, num_kernels, kernel_size, 1e-4, tau, stride=stride) # create dqn_agent agent = DQNAgent(Q, Q_target, num_actions, df) agent.load('./models_carracing/dqn_agent.ckpt') n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True, history_length=history_length) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file
"a_1": stats_training.get_action_usage(1) }) # TODO: evaluate your agent once in a while for some episodes using run_episode(env, agent, deterministic=True, do_training=False) to # check its performance with greedy actions only. You can also use tensorboard to plot the mean episode reward. # ... # store model every 100 episodes and in the end. if i % 100 == 0 or i >= (num_episodes - 1): agent.saver.save(agent.sess, os.path.join(model_dir, "dqn_agent.ckpt")) tensorboard.close_session() if __name__ == "__main__": environment = "CartPole-v0" # environment = "MountainCar-v0" env = gym.make(environment).unwrapped state_dim = env.observation_space.shape[0] num_actions = env.action_space.n num_episodes = 100 Q = NeuralNetwork(state_dim, num_actions) Q_target = TargetNetwork(state_dim, num_actions) agent = DQNAgent(Q, Q_target, num_actions) train_online(env, agent, num_episodes)
def process_conversation_POST(state_tracker_id, message): now = datetime.now() date_time = now.strftime("%m_%d_%Y_%H_%M_%S") dict_investigate = {} state_tracker = None # print('tracker_id',state_tracker_id) dict_investigate['time'] = date_time dict_investigate['state_tracker_id'] = state_tracker_id if state_tracker_id in StateTracker_Container.keys(): state_tracker = StateTracker_Container[state_tracker_id][0] confirm_obj = StateTracker_Container[state_tracker_id][1] else: # print("---------------------------------in model") state_tracker = StateTracker(database, constants) confirm_obj = None StateTracker_Container[state_tracker_id] = (state_tracker, confirm_obj) # user_action, new_confirm_obj = get_user_request(message,state_tracker) print("-------------user action-----------") print(user_action,new_confirm_obj) print('-----------------------------------') dict_investigate['user_action'] = user_action if user_action['request_slots'] != {}: state_tracker.reset() confirm_obj = None if new_confirm_obj != None: confirm_obj = new_confirm_obj # try: if user_action['intent'] not in ["hello","other","done"]: dqn_agent = DQNAgent(state_tracker.get_state_size(), constants) agent_act = get_agent_action(state_tracker, dqn_agent, user_action) print('========================') print('agent action',agent_act) print('========================') StateTracker_Container[state_tracker_id] = (state_tracker,confirm_obj) # print('state_tracker.current_request_slots[0]',state_tracker.current_request_slots[0]) agent_message = response_craft(agent_act, state_tracker,confirm_obj) else: # to prevent key error agent_act = {'intent':user_action['intent'],'request_slots':[],'inform_slots':[]} # print('========================') # print('agent action',agent_act) # print('========================') agent_message = random.choice(response_to_user_free_style[user_action['intent']]) #nếu là done thì reset và cho confirm về None if user_action['intent'] == "done": state_tracker.reset() StateTracker_Container[state_tracker_id] = (state_tracker,None) dict_investigate['agent_action'] = agent_act dict_investigate['fail_pattern'] = 'success' return agent_message,agent_act
Q = CNN(state_dim, nr_actions, hidden=300, lr=0.0003, history_length=history_length) Q_target = CNNTargetNetwork(state_dim, nr_actions, hidden=300, lr=0.0003, history_length=history_length) agent = DQNAgent(Q, Q_target, nr_actions, discount_factor=0.99, batch_size=batch_size, epsilon=0.05, epsilon_decay=0.95, epsilon_min=0.05, tau=0.5, game='carracing', exploration="boltzmann", history_length=history_length) agent.load("./models_carracing/dqn_agent_1000.ckpt") #agent.load("/home/singhs/Downloads/exercise4_R_NR/models_carracing/dqn_agent_600.ckpt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): #stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) stats, frames = run_episode(env,
exploration = "greedy" batch_size = 64 Q = NeuralNetwork(state_dim=state_dim, num_actions=num_actions, hidden=400, lr=3e-4) Q_target = TargetNetwork(state_dim=state_dim, num_actions=num_actions, hidden=400, lr=3e-4) agent = DQNAgent(Q, Q_target, num_actions, game=game, exploration=exploration, discount_factor=0.95, batch_size=64, epsilon=0.5, epsilon_decay=0.99, epsilon_min=0.05) train_online(env, agent, 1000) # TODO: # 1. init Q network and target network (see dqn/networks.py) # 2. init DQNAgent (see dqn/dqn_agent.py) # 3. train DQN agent with train_online(...)
state_dim = (96, 96) history_length = 2 num_actions = 5 skip_frames = 2 method = "DQL" # method = "CQL" game = "carracing" epsilon = 0.2 epsilon_decay = 0.999 epsilon_min = 0.05 explore_type = "epsilon_greedy" tau = 0.5 Q = ConvolutionNeuralNetwork(state_dim=state_dim, num_actions=num_actions, history_length=history_length, hidden=300, lr=3e-4) Q_target = CNNTargetNetwork(state_dim=state_dim, num_actions=num_actions, history_length=history_length, hidden=300, lr=3e-4) agent = DQNAgent(Q, Q_target, num_actions, method=method, discount_factor=0.98, batch_size=64, epsilon=epsilon, epsilon_decay=epsilon_decay, explore_type=explore_type, game=game, tau=tau, epsilon_min=epsilon_min) # agent = DQNAgent(Q, Q_target, num_actions, method=method, discount_factor=0.6, batch_size=64, epsilon=epsilon, epsilon_decay=epsilon_decay, explore_type=explore_type, game=game, tau=tau) agent.load("./models_carracing/dqn_agent.ckpt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, history_length=history_length, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std()