def run(count=10, **kwargs): force.init() for i in range(count): kwargs['xh'] = i _run(**kwargs) env = SingleCartPoleEnv().unwrapped RL = DeepQNetwork(n_actions=env.action_space.n, n_features=env.observation_space.shape[0]) force.force_generator = force.ForceGenerator(0.0, 0.0, 0.0, 1.01)
def run(count=10, **kwargs): force.init() for i in range(count): kwargs = { 'mode': 'reset', 'xh': i, 'maxepochcount': 1500, 'complexunit': 100.0 } env = SingleCartPoleEnv().unwrapped net = PolicyGradients() while 1: result = net.run(**kwargs) if kwargs['mode'] == 'noreset': break if not result: break env = SingleCartPoleEnv().unwrapped net = PolicyGradients() force.force_generator = force.ForceGenerator(0.0, 0.0, 0.0, 1.01)
if not changed or newcomplex is None or newcomplex == complexes_list[-1]: sess.close() return False break # 复杂度已经达到最大,结束 print('新的环境复杂度=%.3f,k=%.2f,w=%.2f,f=%.2f,sigma=%.2f' % (newcomplex, k, w, f, sigma)) if mode == 'reset': sess.close() return True episode_number = 0 sess.close() if __name__ == '__main__': force.init() for i in range(10): kwargs = {'mode': 'reset', 'xh':i,'maxepochcount' : 1500,'complexunit':100.0} env = SingleCartPoleEnv().unwrapped net = PolicyGradients() while 1: result = net.run(**kwargs) if kwargs['mode'] == 'noreset': break if not result: break env = SingleCartPoleEnv().unwrapped net = PolicyGradients() force.force_generator = force.ForceGenerator(0.0, 0.0, 0.0, 1.01)