Exemplo n.º 1
0
def actor_boundary(env, actor, epsoides=1000, steps=100):
    max_boundary = np.zeros([env.state_dim, 1])
    min_boundary = np.zeros([env.state_dim, 1])

    for ep in xrange(epsoides):
        s = env.reset()
        max_boundary, min_boundary = metrics.find_boundary(
                s, max_boundary, min_boundary)
        for i in xrange(steps):
            a = actor.predict(np.reshape(np.array(s), (1, actor.s_dim))) #+ actor_noise()
            s, _, terminal = env.step(a.reshape(actor.a_dim, 1))
            max_boundary, min_boundary = metrics.find_boundary(
                    s, max_boundary, min_boundary)
            if terminal:
                break

    print('max_boundary:\n{}\nmin_boundary:\n{}'.format(
            max_boundary, min_boundary))
Exemplo n.º 2
0
    def shield_boundary(self, sample_ep=500, sample_step=100):
        """sample to find the state bound of shield

        Args:
            sample_ep (int, optional): epsoides
            sample_step (int, optional): step in each epsoide
        """
        max_boundary = np.zeros([self.env.state_dim, 1])
        min_boundary = np.zeros([self.env.state_dim, 1])

        for ep in xrange(sample_ep):
            x = self.env.reset()
            for i in xrange(sample_step):
                u = self.call_shield(x)
                max_boundary, min_boundary = metrics.find_boundary(
                    x, max_boundary, min_boundary)
                # step
                x, _, terminal = self.env.step(u)

        print('max_boundary:\n{}\nmin_boundary:\n{}'.format(
            max_boundary, min_boundary))