def actor_boundary(env, actor, epsoides=1000, steps=100): max_boundary = np.zeros([env.state_dim, 1]) min_boundary = np.zeros([env.state_dim, 1]) for ep in xrange(epsoides): s = env.reset() max_boundary, min_boundary = metrics.find_boundary( s, max_boundary, min_boundary) for i in xrange(steps): a = actor.predict(np.reshape(np.array(s), (1, actor.s_dim))) #+ actor_noise() s, _, terminal = env.step(a.reshape(actor.a_dim, 1)) max_boundary, min_boundary = metrics.find_boundary( s, max_boundary, min_boundary) if terminal: break print('max_boundary:\n{}\nmin_boundary:\n{}'.format( max_boundary, min_boundary))
def shield_boundary(self, sample_ep=500, sample_step=100): """sample to find the state bound of shield Args: sample_ep (int, optional): epsoides sample_step (int, optional): step in each epsoide """ max_boundary = np.zeros([self.env.state_dim, 1]) min_boundary = np.zeros([self.env.state_dim, 1]) for ep in xrange(sample_ep): x = self.env.reset() for i in xrange(sample_step): u = self.call_shield(x) max_boundary, min_boundary = metrics.find_boundary( x, max_boundary, min_boundary) # step x, _, terminal = self.env.step(u) print('max_boundary:\n{}\nmin_boundary:\n{}'.format( max_boundary, min_boundary))