示例#1
0
    def __init__(self, env, **kwargs):
        def index(r, t):
            return np.mean(r) + np.sqrt(np.log(t**2) / (2 * len(r)))

        IndexAgent.__init__(self, env, index, **kwargs)
        self.env = WriterWrapper(
            self.env, self.writer, write_scalar="action_and_reward"
        )
示例#2
0
    def __init__(self, env, **kwargs):
        def index(r, t):
            Na = len(r)
            return np.mean(r) + np.sqrt(A / Na * max(0, np.log(T / (A * Na))))

        IndexAgent.__init__(self, env, index, **kwargs)
        self.env = WriterWrapper(
            self.env, self.writer, write_scalar="action_and_reward"
        )
示例#3
0
    def __init__(self, env, m=20, **kwargs):
        def index(r, t):
            if t < m * A:
                index = -len(r)  # select an action pulled the least
            else:
                index = np.mean(r, axis=0)
            return index

        IndexAgent.__init__(self, env, index, **kwargs)
        self.env = WriterWrapper(
            self.env, self.writer, write_scalar="action_and_reward"
        )
示例#4
0
    def __init__(self, env, B=1, **kwargs):
        def stat_function(stat, Na, action, reward):
            # The statistic is the empirical mean. We compute it recursively.
            if stat is None:
                stat = np.zeros(len(Na))
            stat[action] = (Na[action] - 1
                            ) / Na[action] * stat[action] + reward / Na[action]
            return stat

        def index(stat, Na, t):
            return stat + B * np.sqrt(2 * np.log(t**2) / Na)

        RecursiveIndexAgent.__init__(self, env, stat_function, index, **kwargs)
        self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
示例#5
0
    def __init__(self, env, **kwargs):
        def index(r, p, t):
            return np.sum(1 - (1 - r) / p)

        def prob(indices, t):
            eta = np.minimum(np.sqrt(self.n_arms * np.log(self.n_arms) / (t + 1)), 1.0)
            w = np.exp(eta * indices)
            w /= w.sum()
            return (1 - eta) * w + eta * np.ones(self.n_arms) / self.n_arms

        RandomizedAgent.__init__(self, env, index, prob, **kwargs)
        self.env = WriterWrapper(
            self.env, self.writer, write_scalar="action_and_reward"
        )
示例#6
0
 def __init__(self, env, **kwargs):
     UCBVIAgent.__init__(self, env, **kwargs)
     self.env = WriterWrapper(self.env, self.writer, write_scalar="invalid")
示例#7
0
 def __init__(self, env, **kwargs):
     TSAgent.__init__(self, env, "beta", **kwargs)
     self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
示例#8
0
 def __init__(self, env, **kwargs):
     UCBVIAgent.__init__(self, env, horizon=50, **kwargs)
     self.env = WriterWrapper(self.env, self.writer, write_scalar="reward")
示例#9
0
 def __init__(self, env, **kwargs):
     RecursiveIndexAgent.__init__(self, env,
                                  **kwargs)  # default is UCB for Bernoulli.
     self.env = WriterWrapper(self.env, self.writer, write_scalar="action")