示例#1
0
class DialogEnv(gym.Env):
    def __init__(
        self,
        user_goals: List[UserGoal],
        emc_params: Dict,
        max_round_num: int,
        database: Dict,
        slot2values: Dict[str, List[Any]],
    ) -> None:

        self.user = UserSimulator(user_goals, max_round_num)
        self.emc = ErrorModelController(slot2values, emc_params)
        self.state_tracker = StateTracker(database, max_round_num)

        self.action_space = gym.spaces.Discrete(len(AGENT_ACTIONS))
        self.observation_space = gym.spaces.multi_binary.MultiBinary(
            self.state_tracker.get_state_size())

    def step(self, agent_action_index: int):
        agent_action = map_index_to_action(agent_action_index)
        self.state_tracker.update_state_agent(agent_action)
        user_action, reward, done, success = self.user.step(agent_action)
        if not done:
            self.emc.infuse_error(user_action)
        self.state_tracker.update_state_user(user_action)
        next_state = self.state_tracker.get_state(done)
        return next_state, reward, done, success

    def reset(self):
        self.state_tracker.reset()
        init_user_action = self.user.reset()
        self.emc.infuse_error(init_user_action)
        self.state_tracker.update_state_user(init_user_action)
        return self.state_tracker.get_state()
示例#2
0
def test():
    test_env = UserSimulator(disease_symptom_path,
                             disease_symptom_mapping_path)
    total_rewards = 0
    for i_episode in range(25):
        state = test_env.reset()
        top_10_predictions = 0
        top_5_predictions = 0
        for t in count():
            action = select_action(np.expand_dims(state, axis=0))
            next_state, reward, done, _ = test_env.step(action.item())

            state = next_state

            if done:
                total_rewards += reward
                with torch.no_grad():
                    q_values = policy_net(np.expand_dims(state,
                                                         axis=0)).squeeze(0)
                    diagnosis_q_values = q_values[test_env.num_symptom:]
                # # print (diagnosis_q_values.shape)
                top_10_disease, top_5_disease = test_env.get_top_diseases(
                    diagnosis_q_values)
                if test_env.goal in top_10_disease:
                    top_10_predictions += 1
                if test_env.goal in top_5_disease:
                    top_5_predictions += 1
                break

    # print ('Test Rewards : ', total_rewards/25.)
    return total_rewards / 25., top_10_predictions / 25., top_5_predictions / 25.
示例#3
0
    def __init__(
        self,
        user_goals: List[UserGoal],
        emc_params: Dict,
        max_round_num: int,
        database: Dict,
        slot2values: Dict[str, List[Any]],
    ) -> None:

        self.user = UserSimulator(user_goals, max_round_num)
        self.emc = ErrorModelController(slot2values, emc_params)
        self.state_tracker = StateTracker(database, max_round_num)

        self.action_space = gym.spaces.Discrete(len(AGENT_ACTIONS))
        self.observation_space = gym.spaces.multi_binary.MultiBinary(
            self.state_tracker.get_state_size())
示例#4
0
from state_tracker import StateTracker
from user_simulator import UserSimulator
from agent_dqn import AgentDQN

params = {
    'experience_replay_pool_size': 10000,
    'dqn_hidden_size': 60,
    'gamma': 0.9,
    'predict_mode': True,
    'max_turn': 40,
    'trained_model_path': 'data/saved_model.p'
}

state_tracker = StateTracker()
usersim = UserSimulator(3)
agent = AgentDQN(params)


def run_episode(count):
    for i in range(count):
        print("dialog:", i)
        episode_over = False
        turn = 0
        state_tracker.initialize_episode()
        agent_action = {
            'diaact': 'greeting',
            'inform_slots': {},
            'request_slots': {}
        }
        state_tracker.update(agent_action=agent_action)
        print("sys:", agent_action)
示例#5
0
    # Note: If you get an unpickling error here then run 'pickle_converter.py' and it should fix it
    database = pickle.load(open(DATABASE_FILE_PATH, 'rb'), encoding='latin1')

    # Clean DB
    remove_empty_slots(database)

    # Load movie dict
    db_dict = pickle.load(open(DICT_FILE_PATH, 'rb'), encoding='latin1')

    # Load goal File
    user_goals = pickle.load(open(USER_GOALS_FILE_PATH, 'rb'),
                             encoding='latin1')

    # Init. Objects
    if USE_USERSIM:
        user = UserSimulator(user_goals, constants, database)
    else:
        user = User(constants)
    emc = ErrorModelController(db_dict, constants)
    state_tracker = StateTracker(database, constants)
    sarsa_agent = SARSAgent(state_tracker.get_state_size(), constants)
    #dqn_agent = DQNAgent(state_tracker.get_state_size(), constants)


def run_round(state, warmup=False):
    # 1) Agent takes action given state tracker's representation of dialogue (state)
    agent_action_index, agent_action = sarsa_agent.get_action(state,
                                                              use_rule=warmup)
    # 2) Update state tracker with the agent's action
    state_tracker.update_state_agent(agent_action)
    # 3) User takes action given agent action
示例#6
0
from itertools import count
from PIL import Image
from user_simulator import UserSimulator
import pickle

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torch.autograd import Variable

disease_symptom_path = 'disease_symptom.json'
disease_symptom_mapping_path = 'disease_symptom_mapping.json'

env = UserSimulator(disease_symptom_path, disease_symptom_mapping_path)
# env = gym.make('CartPole-v0').unwrapped

is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

######################################################################
# Replay Memory
# -------------
#
# We'll be using experience replay memory for training our DQN. It stores
# the transitions that the agent observes, allowing us to reuse this data