示例#1
0
def create_and_train_agent(num_kernels,
                           kernel_size,
                           lr,
                           history_length,
                           batch_size,
                           num_episodes,
                           epsilon,
                           discount_factor,
                           tau,
                           stride=1,
                           model_dir="./models_carracing"):

    env = gym.make('CarRacing-v0').unwrapped

    state_dim = env.observation_space.shape
    state_dim = (state_dim[0], state_dim[1], history_length + 1)
    # only use straight, left, right, accelerate and brake
    num_actions = 5

    # create Q network
    Q = CNN(state_dim, num_actions, num_kernels, kernel_size, lr, stride)
    # create target network
    Q_target = CNNTargetNetwork(state_dim, num_actions, num_kernels,
                                kernel_size, lr, tau, stride)
    print("Creating agent now ..")
    # create dqn_agent
    dqn_agent = DQNAgent(Q, Q_target, num_actions, discount_factor, batch_size,
                         epsilon)
    # reload already trained agent for further training
    # dqn_agent.load('./models_carracing/dqn_agent.ckpt')

    start_time = time.time()
    train_online(env, dqn_agent, num_episodes, history_length, model_dir)
    end_time = time.time()
    print("Time needed for training:", (end_time - start_time) / 60, "min")
示例#2
0
def make_pacman_agent(name, model_path, lr, discount_factor,
                       batch_size, epsilon, epsilon_decay, boltzmann, tau,
                       double_q, buffer_capacity, history_length, diff_history, big=False,
                       save_hypers=False):

    hypers = locals()

    num_actions = 5

    # save hypers into folder -- used for reconstructing model at test time
    if save_hypers:
        with open(os.path.join(model_path, "hypers.json"), "w") as fh:
            json.dump(hypers, fh)

    # using -1 for unused parameters. fix later.
    Q_current = CNN(
        num_actions=num_actions,
        lr=lr,
        history_length=history_length,
        diff_history=diff_history,
        big=big)

    Q_target = CNNTargetNetwork(
        num_actions=num_actions,
        lr=lr,
        tau=tau,
        history_length=history_length,
        diff_history=diff_history,
        big=big)

    # 2. init DQNAgent (see dqn/dqn_agent.py)
    agent = DQNAgent(
        name,
        Q_current,
        Q_target,
        num_actions,
        discount_factor,
        batch_size,
        epsilon,
        epsilon_decay,
        boltzmann,
        double_q,
        buffer_capacity)

    return agent
示例#3
0
def create_and_train_agent(lr,
                           epsilon,
                           discount_factor,
                           bs,
                           tau,
                           num_episodes,
                           hidden=20):

    # create environment
    env = gym.make("CartPole-v0").unwrapped

    # get state space and number of actions
    state_dim = 4  # env.observation_space.shape[0]
    num_actions = 2  # env.action_space.n

    # create neural networks
    Q = NeuralNetwork(state_dim=state_dim,
                      num_actions=num_actions,
                      hidden=hidden,
                      lr=lr)
    Q_target = TargetNetwork(state_dim=state_dim,
                             num_actions=num_actions,
                             hidden=hidden,
                             lr=lr,
                             tau=tau)
    # create agent
    agent = DQNAgent(Q,
                     Q_target,
                     num_actions,
                     discount_factor=discount_factor,
                     batch_size=bs,
                     epsilon=epsilon)
    # train agent
    train_online(env, agent, num_episodes=num_episodes)

    # get some final values to compare different networks
    rewards = []
    for i in range(10):
        stats_det = run_episode(env,
                                agent,
                                deterministic=True,
                                do_training=False)
        rewards.append(stats_det.episode_reward)

    return np.mean(rewards)
示例#4
0
    Q = ConvolutionNeuralNetwork(state_dim=state_dim,
                                 num_actions=num_actions,
                                 history_length=history_length,
                                 hidden=300,
                                 lr=3e-4)
    Q_target = CNNTargetNetwork(state_dim=state_dim,
                                num_actions=num_actions,
                                history_length=history_length,
                                hidden=300,
                                lr=3e-4)
    agent = DQNAgent(Q,
                     Q_target,
                     num_actions,
                     method=method,
                     discount_factor=0.95,
                     batch_size=64,
                     epsilon=epsilon,
                     epsilon_decay=epsilon_decay,
                     explore_type=explore_type,
                     game=game,
                     tau=tau,
                     epsilon_min=epsilon_min)
    train_online(env,
                 agent,
                 skip_frames=skip_frames,
                 num_episodes=1200,
                 max_timesteps=1000,
                 history_length=history_length,
                 model_dir="./models_carracing")

    os.system('python test_carracing.py')
    """
示例#5
0
    Q = CNN(state_dim,
            nr_actions,
            hidden=300,
            lr=0.0003,
            history_length=history_length)
    Q_target = CNNTargetNetwork(state_dim,
                                nr_actions,
                                hidden=300,
                                lr=0.0003,
                                history_length=history_length)
    agent = DQNAgent(Q,
                     Q_target,
                     nr_actions,
                     discount_factor=0.95,
                     batch_size=batch_size,
                     epsilon=0.05,
                     epsilon_decay=0.95,
                     epsilon_min=0.05,
                     tau=0.5,
                     game='carracing',
                     exploration="boltzmann",
                     history_length=history_length)

    train_online(env,
                 agent,
                 nr_episodes,
                 history_length=history_length,
                 skip_frames=skip_frames,
                 model_dir="./models_carracing")
示例#6
0
np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # get state space and number of actions
    state_dim = 4  # env.observation_space.shape[0]
    num_actions = 2  # env.action_space.n

    # create neural networks
    Q = NeuralNetwork(state_dim=state_dim, num_actions=num_actions, hidden=16)
    Q_target = TargetNetwork(state_dim=state_dim, num_actions=num_actions, hidden=16)
    # create agent
    agent = DQNAgent(Q, Q_target, num_actions, discount_factor=0.9)
    agent.load('./models_cartpole/dqn_agent.ckpt')

    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()
    # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.

    env = gym.make("CartPole-v0").unwrapped

    # TODO:
    # 1. init Q network and target network (see dqn/networks.py)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    # 3. train DQN agent with train_online(...)

    nr_states = env.observation_space.shape[0]
    nr_actions = env.action_space.n
    batch_size = 32
    nr_episodes = 1000
    print(nr_states, nr_actions)

    Q = NeuralNetwork(state_dim=nr_states,
                      num_actions=nr_actions,
                      hidden=20,
                      lr=0.001)
    Q_target = TargetNetwork(state_dim=nr_states,
                             num_actions=nr_actions,
                             hidden=20,
                             lr=0.001)
    agent = DQNAgent(Q,
                     Q_target,
                     nr_actions,
                     discount_factor=0.99,
                     batch_size=batch_size,
                     epsilon=0.05)
    train_online(env, agent, nr_episodes)
import numpy as np

np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...

    # 1. init Q network and target network (see dqn/networks.py)
    Q = NeuralNetwork(state_dim=4, num_actions=2)
    Q_target = TargetNetwork(state_dim=4, num_actions=2)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    agent = DQNAgent(Q, Q_target, 2)

    n_test_episodes = 50

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env,
                            agent,
                            deterministic=True,
                            do_training=False,
                            rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
import numpy as np

np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...
    state_dim = env.observation_space.shape[0]
    num_actions = env.action_space.n

    Q = NeuralNetwork(state_dim, num_actions)
    Q_target = TargetNetwork(state_dim, num_actions)
    agent = DQNAgent(Q, Q_target, num_actions)
    agent.load(os.path.join("./models_cartpole/", "dqn_agent.ckpt"))

    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        print(i)
        stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()
示例#10
0
if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...
    num_actions = 2
    state_dim = 4
    method = "DQL"
    game = "cartpole"
    epsilon = 0.5
    epsilon_decay = 0.95
    explore_type = "epsilon_greedy"
    Q = NeuralNetwork(state_dim=state_dim, num_actions=num_actions, hidden=200, lr=1e-4)
    Q_target = TargetNetwork(state_dim=state_dim, num_actions=num_actions, hidden=200, lr=1e-4)
    agent = DQNAgent(Q, Q_target, num_actions, method=method, discount_factor=0.6, batch_size=64, epsilon=epsilon, epsilon_decay=epsilon_decay, explore_type=explore_type, game=game)

    agent.load("./models_cartpole/dqn_agent.ckpt")
 
    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()
示例#11
0
    Q = NeuralNetwork(state_dim=state_dim,
                      num_actions=num_actions,
                      hidden=300,
                      lr=1e-3)

    Q_target = TargetNetwork(state_dim=state_dim,
                             num_actions=num_actions,
                             hidden=300,
                             lr=1e-3)

    agent = DQNAgent(Q,
                     Q_target,
                     num_actions,
                     game=game,
                     exploration=exploration,
                     discount_factor=0.95,
                     batch_size=32,
                     epsilon=0.5,
                     epsilon_decay=0.99,
                     epsilon_min=0.05)

    agent.load("models_cartpole/dqn_agent.ckpt")

    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env,
                            agent,
                            deterministic=True,
                            do_training=False,
np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...

    nr_states = env.observation_space.shape[0]
    nr_actions = env.action_space.n

    Q = NeuralNetwork(nr_states, nr_actions)
    Q_target = TargetNetwork(nr_states, nr_actions)

    agent = DQNAgent(Q, Q_target, nr_actions)

    agent.load("./models_cartpole/dqn_agent__.ckpt")

    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env,
                            agent,
                            deterministic=True,
                            do_training=False,
                            rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
示例#13
0
    # Load movie dict
    # db_dict = pickle.load(open(DICT_FILE_PATH, 'rb'), encoding='latin1')
    db_dict = json.load(open(DICT_FILE_PATH, encoding='utf-8'))[0]

    # Load goal file
    # user_goals = pickle.load(open(USER_GOALS_FILE_PATH, 'rb'), encoding='latin1')
    user_goals = json.load(open(USER_GOALS_FILE_PATH, encoding='utf-8'))

    # Init. Objects
    if USE_USERSIM:
        user = UserSimulator(user_goals, constants, database)
    else:
        user = User(constants)
    emc = ErrorModelController(db_dict, constants)
    state_tracker = StateTracker(database, constants)
    dqn_agent = DQNAgent(state_tracker.get_state_size(), constants)


def test_run():
    """
    Runs the loop that tests the agent.

    Tests the agent on the goal-oriented chatbot task. Only for evaluating a trained agent. Terminates when the episode
    reaches NUM_EP_TEST.

    """

    print('Testing Started...')
    episode = 0
    while episode < NUM_EP_TEST:
        episode_reset()
示例#14
0
    # get state space and number of actions
    state_dim = env.observation_space.shape
    state_dim = (state_dim[0], state_dim[1], history_length + 1)
    num_actions = 5

    Q = CNN(state_dim, num_actions, num_kernels, kernel_size, stride=stride)
    # create target network
    Q_target = CNNTargetNetwork(state_dim,
                                num_actions,
                                num_kernels,
                                kernel_size,
                                1e-4,
                                tau,
                                stride=stride)
    # create dqn_agent
    agent = DQNAgent(Q, Q_target, num_actions, df)
    agent.load('./models_carracing/dqn_agent.ckpt')

    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env,
                            agent,
                            deterministic=True,
                            do_training=False,
                            rendering=True,
                            history_length=history_length)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
                    "a_1": stats_training.get_action_usage(1)
                })

        # TODO: evaluate your agent once in a while for some episodes using run_episode(env, agent, deterministic=True, do_training=False) to
        # check its performance with greedy actions only. You can also use tensorboard to plot the mean episode reward.
        # ...

        # store model every 100 episodes and in the end.
        if i % 100 == 0 or i >= (num_episodes - 1):
            agent.saver.save(agent.sess,
                             os.path.join(model_dir, "dqn_agent.ckpt"))

    tensorboard.close_session()


if __name__ == "__main__":

    environment = "CartPole-v0"
    # environment = "MountainCar-v0"
    env = gym.make(environment).unwrapped

    state_dim = env.observation_space.shape[0]
    num_actions = env.action_space.n
    num_episodes = 100

    Q = NeuralNetwork(state_dim, num_actions)
    Q_target = TargetNetwork(state_dim, num_actions)
    agent = DQNAgent(Q, Q_target, num_actions)

    train_online(env, agent, num_episodes)
示例#16
0
def process_conversation_POST(state_tracker_id, message):
    now = datetime.now()
    date_time = now.strftime("%m_%d_%Y_%H_%M_%S")
    dict_investigate = {}
    state_tracker = None
    # print('tracker_id',state_tracker_id)

    dict_investigate['time'] = date_time
    dict_investigate['state_tracker_id'] = state_tracker_id


    if state_tracker_id in StateTracker_Container.keys():
        state_tracker = StateTracker_Container[state_tracker_id][0]
        confirm_obj = StateTracker_Container[state_tracker_id][1]
    else:
        # print("---------------------------------in model")
        state_tracker = StateTracker(database, constants)
        confirm_obj = None
        StateTracker_Container[state_tracker_id] = (state_tracker, confirm_obj)

#
    user_action, new_confirm_obj = get_user_request(message,state_tracker)
    print("-------------user action-----------")
    print(user_action,new_confirm_obj)
    print('-----------------------------------')
    dict_investigate['user_action'] = user_action

    if user_action['request_slots'] != {}:
        state_tracker.reset()
        confirm_obj = None

    if new_confirm_obj != None:
        confirm_obj = new_confirm_obj

    # try:
    if user_action['intent'] not in ["hello","other","done"]:
        dqn_agent = DQNAgent(state_tracker.get_state_size(), constants)
        
        agent_act = get_agent_action(state_tracker, dqn_agent, user_action)
        print('========================')
        print('agent action',agent_act)
        print('========================')

        StateTracker_Container[state_tracker_id] = (state_tracker,confirm_obj)
        # print('state_tracker.current_request_slots[0]',state_tracker.current_request_slots[0])
        agent_message = response_craft(agent_act, state_tracker,confirm_obj)
    else:
        # to prevent key error
        agent_act = {'intent':user_action['intent'],'request_slots':[],'inform_slots':[]}
        # print('========================')
        # print('agent action',agent_act)
        # print('========================')
        agent_message = random.choice(response_to_user_free_style[user_action['intent']])
        #nếu là done thì reset và cho confirm về None
        if user_action['intent'] == "done":
            state_tracker.reset()
            StateTracker_Container[state_tracker_id] = (state_tracker,None)

    dict_investigate['agent_action'] = agent_act
    dict_investigate['fail_pattern'] = 'success'


    return agent_message,agent_act
示例#17
0
    Q = CNN(state_dim,
            nr_actions,
            hidden=300,
            lr=0.0003,
            history_length=history_length)
    Q_target = CNNTargetNetwork(state_dim,
                                nr_actions,
                                hidden=300,
                                lr=0.0003,
                                history_length=history_length)
    agent = DQNAgent(Q,
                     Q_target,
                     nr_actions,
                     discount_factor=0.99,
                     batch_size=batch_size,
                     epsilon=0.05,
                     epsilon_decay=0.95,
                     epsilon_min=0.05,
                     tau=0.5,
                     game='carracing',
                     exploration="boltzmann",
                     history_length=history_length)

    agent.load("./models_carracing/dqn_agent_1000.ckpt")
    #agent.load("/home/singhs/Downloads/exercise4_R_NR/models_carracing/dqn_agent_600.ckpt")

    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        #stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True)
        stats, frames = run_episode(env,
示例#18
0
    exploration = "greedy"
    batch_size = 64

    Q = NeuralNetwork(state_dim=state_dim,
                      num_actions=num_actions,
                      hidden=400,
                      lr=3e-4)

    Q_target = TargetNetwork(state_dim=state_dim,
                             num_actions=num_actions,
                             hidden=400,
                             lr=3e-4)

    agent = DQNAgent(Q,
                     Q_target,
                     num_actions,
                     game=game,
                     exploration=exploration,
                     discount_factor=0.95,
                     batch_size=64,
                     epsilon=0.5,
                     epsilon_decay=0.99,
                     epsilon_min=0.05)

    train_online(env, agent, 1000)

    # TODO:
    # 1. init Q network and target network (see dqn/networks.py)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    # 3. train DQN agent with train_online(...)
示例#19
0
    state_dim = (96, 96)
    history_length = 2
    num_actions = 5
    skip_frames = 2
    method = "DQL"
    # method = "CQL"
    game = "carracing"
    epsilon = 0.2
    epsilon_decay = 0.999
    epsilon_min = 0.05
    explore_type = "epsilon_greedy"
    tau = 0.5
    
    Q = ConvolutionNeuralNetwork(state_dim=state_dim, num_actions=num_actions, history_length=history_length, hidden=300, lr=3e-4)
    Q_target = CNNTargetNetwork(state_dim=state_dim, num_actions=num_actions, history_length=history_length, hidden=300, lr=3e-4)
    agent = DQNAgent(Q, Q_target, num_actions, method=method, discount_factor=0.98, batch_size=64, epsilon=epsilon, epsilon_decay=epsilon_decay, explore_type=explore_type, game=game, tau=tau, epsilon_min=epsilon_min)
    # agent = DQNAgent(Q, Q_target, num_actions, method=method, discount_factor=0.6, batch_size=64, epsilon=epsilon, epsilon_decay=epsilon_decay, explore_type=explore_type, game=game, tau=tau)
    agent.load("./models_carracing/dqn_agent.ckpt")
    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env, agent, history_length=history_length, deterministic=True, do_training=False, rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()