def __init__(self, env_space, cmdl):
        self.name = "Evaluation"

        self.actions = env_space[0]
        self.action_no = action_no = self.actions.n
        self.cmdl = cmdl
        self.epsilon = 0.05

        if cmdl.agent_type == "dqn":
            self.policy = policy = get_model(cmdl.estimator, 1, cmdl.hist_len,
                                             self.action_no, cmdl.hidden_size)
            if self.cmdl.cuda:
                self.policy.cuda()
            self.policy_evaluation = DeterministicPolicy(policy)
        elif cmdl.agent_type == "categorical":
            self.policy = policy = get_model(cmdl.estimator,
                                             1,
                                             cmdl.hist_len,
                                             (action_no, cmdl.atoms_no),
                                             hidden_size=cmdl.hidden_size)
            if self.cmdl.cuda:
                self.policy.cuda()
            self.policy_evaluation = CategoricalPolicyEvaluation(policy, cmdl)
        print("[%s]  Evaluating %s agent." % (self.name, cmdl.agent_type))

        self.max_q = -1000
    def __init__(self, action_space, cmdl, is_training=True):
        DQNAgent.__init__(self, action_space, cmdl, is_training)
        self.name = "Categorical_agent"
        self.cmdl = cmdl

        hist_len, action_no = cmdl.hist_len, self.action_no
        self.policy = policy = get_model(cmdl.estimator, 1, hist_len,
                                         (action_no, cmdl.atoms_no),
                                         hidden_size=cmdl.hidden_size)
        self.target = target = get_model(cmdl.estimator, 1, hist_len,
                                         (action_no, cmdl.atoms_no),
                                         hidden_size=cmdl.hidden_size)
        if self.cmdl.cuda:
            self.policy.cuda()
            self.target.cuda()

        self.policy_evaluation = CategoricalPolicyEvaluation(policy, cmdl)
        self.policy_improvement = CategoricalPolicyImprovement(
                policy, target, cmdl)
示例#3
0
    def __init__(self, action_space, cmdl, is_training=True):
        DQNAgent.__init__(self, action_space, cmdl, is_training)
        self.name = "Categorical_agent"
        self.cmdl = cmdl

        hist_len, action_no = cmdl.hist_len, self.action_no
        self.policy = policy = get_model(cmdl.estimator,
                                         1,
                                         hist_len, (action_no, cmdl.atoms_no),
                                         hidden_size=cmdl.hidden_size)
        self.target = target = get_model(cmdl.estimator,
                                         1,
                                         hist_len, (action_no, cmdl.atoms_no),
                                         hidden_size=cmdl.hidden_size)
        if self.cmdl.cuda:
            self.policy.cuda()
            self.target.cuda()

        self.policy_evaluation = CategoricalPolicyEvaluation(policy, cmdl)
        self.policy_improvement = CategoricalPolicyImprovement(
            policy, target, cmdl)
示例#4
0
    def __init__(self, action_space, cmdl, is_training=True):
        BaseAgent.__init__(self, action_space, is_training)
        self.name = "DQN_agent"
        self.cmdl = cmdl
        eps = self.cmdl.epsilon
        e_steps = self.cmdl.epsilon_steps

        self.policy = policy = get_model(cmdl.estimator, 1, cmdl.hist_len,
                                         self.action_no, cmdl.hidden_size)
        self.target = target = get_model(cmdl.estimator, 1, cmdl.hist_len,
                                         self.action_no, cmdl.hidden_size)
        if self.cmdl.cuda:
            self.policy.cuda()
            self.target.cuda()
        self.policy_evaluation = DQNEvaluation(policy)
        self.policy_improvement = DQNImprovement(policy, target, cmdl)
        self.exploration = get_epsilon_schedule("linear", eps, 0.05, e_steps)
        self.replay_memory = ReplayMemory(capacity=cmdl.experience_replay)

        self.dtype = TorchTypes(cmdl.cuda)
        self.max_q = -1000
示例#5
0
    def __init__(self, action_space, cmdl, is_training=True):
        BaseAgent.__init__(self, action_space, is_training)
        self.name = "DQN_agent"
        self.cmdl = cmdl
        eps = self.cmdl.epsilon
        e_steps = self.cmdl.epsilon_steps

        self.policy = policy = get_model(cmdl.estimator, 1, cmdl.hist_len,
                                         self.action_no, cmdl.hidden_size)
        self.target = target = get_model(cmdl.estimator, 1, cmdl.hist_len,
                                         self.action_no, cmdl.hidden_size)
        if self.cmdl.cuda:
            self.policy.cuda()
            self.target.cuda()
        self.policy_evaluation = DQNEvaluation(policy)
        self.policy_improvement = DQNImprovement(policy, target, cmdl)
        self.exploration = get_epsilon_schedule("linear", eps, 0.05, e_steps)
        self.replay_memory = ReplayMemory(capacity=cmdl.experience_replay)

        self.dtype = TorchTypes(cmdl.cuda)
        self.max_q = -1000