def test_acer(): def Sphere(individual): """Sphere test objective function. F(x) = sum_{i=1}^d xi^2 d=1,2,3,... Range: [-100,100] Minima: 0 """ return sum(x**2 for x in individual) nx=5 bounds={} for i in range(1,nx+1): bounds['x'+str(i)]=['int', -100, 100] #create an enviroment class env=CreateEnvironment(method='acer', fit=Sphere, bounds=bounds, mode='min', episode_length=50) #create a callback function to log data cb=RLLogger(check_freq=1, mode='min') #create an acer object based on the env object acer = ACER(MlpPolicy, env=env, n_steps=25, q_coef=0.55, ent_coef=0.02) #optimise the enviroment class acer.learn(total_timesteps=2000, callback=cb) #print the best results print('--------------- ACER results ---------------') print('The best value of x found:', cb.xbest) print('The best value of y found:', cb.rbest) return
def test_acer(): #create an object from the class env = IntegerSphere() #create a callback function to log data cb = RLLogger(check_freq=1) #create an acer object based on the env object acer = ACER(MlpPolicy, env=env, n_steps=25, q_coef=0.55, ent_coef=0.02) #optimise the enviroment class acer.learn(total_timesteps=2000, callback=cb) #print the best results print('--------------- ACER results ---------------') print('The best value of x found:', cb.xbest) print('The best value of y found:', cb.rbest) return
def test_ppo(): #create an object from the class env = Sphere() #create a callback function to log data cb = RLLogger(check_freq=1) #create an a2c object based on the env object ppo = PPO2(MlpPolicy, env=env, n_steps=12) #optimise the enviroment class ppo.learn(total_timesteps=2000, callback=cb) #print the best results print('--------------- PPO results ---------------') print('The best value of x found:', cb.xbest) print('The best value of y found:', cb.rbest) return
def test_dqn(): #create an object from the class env = IntegerSphere() #create a callback function to log data cb = RLLogger(check_freq=1) #create an a2c object based on the env object dqn = DQN(DQNPolicy, env=env) #optimise the enviroment class dqn.learn(total_timesteps=2000, callback=cb) #print the best results print('--------------- DQN results ---------------') print('The best value of x found:', cb.xbest) print('The best value of y found:', cb.rbest) return
F(x) = sum_{i=1}^d xi^2 d=1,2,3,... Range: [-100,100] Minima: 0 """ #-1 is used to convert minimization to maximization return -sum(x**2 for x in individual) def reset(self): self.done = False return self.action_space.sample() def render(self, mode='human'): pass #-------------------------------------------------------- # RL Optimisation #-------------------------------------------------------- #create an object from the class env = Sphere() #create a callback function to log data cb = RLLogger(check_freq=1) #create an acktr object based on the env object acktr = ACKTR(MlpPolicy, env=env, n_steps=12) #optimise the enviroment class acktr.learn(total_timesteps=2500, callback=cb) #print the best results print('--------------- ACKTR results ---------------') print('The best value of x found:', cb.xbest) print('The best value of y found:', cb.rbest)
def Sphere(individual): """Sphere test objective function. F(x) = sum_{i=1}^d xi^2 d=1,2,3,... Range: [-100,100] Minima: 0 """ return sum(x**2 for x in individual) nx = 5 bounds = {} for i in range(1, nx + 1): bounds['x' + str(i)] = ['float', -10, 10] #create an enviroment class env = CreateEnvironment(method='acktr', fit=Sphere, bounds=bounds, mode='min', episode_length=50) #create a callback function to log data cb = RLLogger(check_freq=1, mode='min') #create an acktr object based on the env object acktr = ACKTR(MlpPolicy, env=env, n_steps=12, seed=1) #optimise the enviroment class acktr.learn(total_timesteps=2000, callback=cb) #print the best results print('--------------- ACKTR results ---------------') print('The best value of x found:', cb.xbest) print('The best value of y found:', cb.rbest)