def test_maml_compilation(self): """Test whether a MAMLTrainer can be built with all frameworks.""" config = maml.DEFAULT_CONFIG.copy() config["num_workers"] = 1 config["horizon"] = 200 num_iterations = 1 # Test for tf framework (torch not implemented yet). for fw in framework_iterator(config, frameworks=("tf", "torch")): for env in [ "pendulum_mass.PendulumMassEnv", "cartpole_mass.CartPoleMassEnv" ]: if fw == "tf" and env.startswith("cartpole"): continue print("env={}".format(env)) env_ = "ray.rllib.examples.env.{}".format(env) trainer = maml.MAMLTrainer(config=config, env=env_) for i in range(num_iterations): results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer, include_prev_action_reward=True) trainer.stop()
def test_maml_compilation(self): """Test whether a MAMLTrainer can be built with all frameworks.""" config = maml.DEFAULT_CONFIG.copy() config["num_workers"] = 1 config["horizon"] = 200 num_iterations = 1 # Test for tf framework (torch not implemented yet). for _ in framework_iterator(config, frameworks=("tf")): trainer = maml.MAMLTrainer( config=config, env="ray.rllib.examples.env.pendulum_mass.PendulumMassEnv") for i in range(num_iterations): trainer.train() check_compute_single_action(trainer, include_prev_action_reward=True) trainer.stop()
def test_and_print_results(agent_folder, iteration, start_date, end_date, title, curr_path, sc_volt_test, train_test_real, diff_days, GT_hour): train_test_real_orig = train_test_real train_test_real = 'test' if train_test_real == 'train' else train_test_real path = glob.glob(agent_folder + '/checkpoint_' + str(iteration) + '/checkpoint-' + str(iteration), recursive=True) config = maml.DEFAULT_CONFIG.copy() config["observation_filter"] = 'MeanStdFilter' config["batch_mode"] = "complete_episodes" config["num_workers"] = 1 config["explore"] = False config["env_config"] = { "settings": settings, "main_path": curr_path, "train/test": train_test_real, "start_test": start_date, "end_test": end_date, "sc_volt_start_test": sc_volt_test, "diff_days": diff_days, "GT_hour_start": GT_hour, "resume_from_iter": iteration, } config[ "horizon"] = 24 # i.e. the number of steps in an episode: for a day we have 24 steps per day config["rollout_fragment_length"] = 240 #200 #config["num_envs_per_worker"] = 5 config[ "inner_adaptation_steps"] = 1 # Number of Inner adaptation steps for the MAML algorithm config[ "maml_optimizer_steps"] = 1 # Number of MAML steps per meta-update iteration (PPO steps) config["inner_lr"] = 1e-4 # Inner Adaptation Step size #config["num_sgd_iter"] = 5 # Number of SGD iterations in each outer loop. default 30 # "gamma": 0.99, # "lambda": 1.0, # "lr": 0.001, # "vf_loss_coeff": 0.5, # "clip_param": 0.3, # "kl_target": 0.01, # "kl_coeff": 0.0005, #config["num_workers"] = 1 #config["inner_lr"] = 0.01 # Inner Adaptation Step size #config["lr"] = 1e-4 # Stepsize of SGD #config["clip_actions"] = False #config["model"] = { # "fcnet_hiddens": [64, 64], # "free_log_std": True, # "fcnet_activation": "tanh", #} agent = maml.MAMLTrainer(config=config, env="simplePible") agent.restore(path[0]) env = SimplePible(config["env_config"]) #env.set_task(0) obs = env.reset() tot_rew = 0 energy_used_tot = 0 energy_prod_tot = 0 print("initial observations: ", obs) while True: learned_action = agent.compute_action(observation=obs, ) obs, reward, done, info = env.step(learned_action) print("observations: ", obs) print("action, rew, thapl_tot_events: ", learned_action, reward, info["thpl_tot_events"]) energy_used_tot += float(info["energy_used"]) energy_prod_tot += float(info["energy_prod"]) tot_rew += reward if done: obs = env.reset() start_date = start_date + datetime.timedelta(days=episode_lenght) if start_date >= end_date: print("done") break print("tot reward", round(tot_rew, 3)) print("Energy Prod per day: ", energy_prod_tot / episode_lenght, "Energy Used: ", energy_used_tot / episode_lenght) print("Detected events averaged per day: ", (int(info["PIR_events_detect"]) + int(info["thpl_events_detect"])) / episode_lenght) print("Tot events averaged per day: ", (int(info["PIR_tot_events"]) + int(info["thpl_tot_events"])) / episode_lenght) accuracy = RL_func.calc_accuracy(info) print("Accuracy: ", accuracy) if train_test_real_orig == "test" or train_test_real_orig == "train": env.render(tot_rew, title, energy_used_tot, accuracy) return path, info["SC_volt"], int(info["GT_hours_start"])
def training(start_train_date, end_train_date, resume, diff_days): config = maml.DEFAULT_CONFIG.copy() config["observation_filter"] = 'MeanStdFilter' config["batch_mode"] = "complete_episodes" config["num_workers"] = num_cores config["lr"] = 1e-4 # Stepsize of SGD config["env_config"] = { "settings": settings, "main_path": curr_path, "start_train": start_train_date, "end_train": end_train_date, "train/test": "train", "sc_volt_start_train": sc_volt_train, "diff_days": diff_days, "GT_hour_start": 0, } #config["scheduler"] = pbt config[ "horizon"] = 24 # i.e. the number of steps in an episode: for a day we have 24 steps per day config[ "rollout_fragment_length"] = 240 #200 # Size of batches collected from each worker #config["num_envs_per_worker"] = 5 config[ "inner_adaptation_steps"] = 1 # Number of Inner adaptation steps for the MAML algorithm config[ "maml_optimizer_steps"] = 1 # Number of MAML steps per meta-update iteration (PPO steps) #config["num_sgd_iter"] = 5 # Number of SGD iterations in each outer loop. default 30 # "gamma": 0.99, # "lambda": 1.0, # "lr": 0.001, # "vf_loss_coeff": 0.5, # "clip_param": 0.3, # "kl_target": 0.01, # "kl_coeff": 0.0005, config["inner_lr"] = 1e-4 # Inner Adaptation Step size #config["explore"] = True #config["clip_actions"] = False #config["num_gpus"] = 0 #config["model"] = { # "fcnet_hiddens": [64, 64], # "free_log_std": True, # "fcnet_activation": "tanh", #} #model: # fcnet_hiddens: [64, 64] # free_log_std: True #} trainer = maml.MAMLTrainer(config=config, env="simplePible") if resume_path != "": print("Restoring checkpoint: ", resume) sleep(5) trainer.restore( resume ) # Can optionally call trainer.restore(path) to load a checkpoint. global prev_res prev_res = [] for i in range(0, int(settings[0]["training_iterations"])): #print("before") result = trainer.train() #print("after") #sleep(3) print(pretty_print(result)) if int(result["training_iteration"]) % 10 == 0: #if max_min > int(result["episode_reward_mean"]) checkpoint = trainer.save() print("checkpoint saved at", checkpoint) checkp_split = checkpoint.split('/') parent_dir = '/'.join(checkp_split[0:-2]) curr_res = float(result["episode_reward_mean"]) #if (int(result["training_iteration"]) > 10) and prev_res != []: ''' if len(prev_res) >= 5 and curr_res != 0.0: avg_res = sum(prev_res)/len(prev_res) print(curr_res, avg_res) diff_perc = (((curr_res - avg_res)/curr_res) * 100) print("\nDiff Percentage: ", diff_perc) if diff_perc < 3 and diff_perc > -3: print("Converged!") sleep(2) break if len(prev_res) >= 5: prev_res = np.roll(prev_res, 1) prev_res[0] = curr_res else: prev_res.append(curr_res) ''' #print(prev_res) #sleep(4) # Remove previous agents and save bew agetn into Agents_Saved #print("out", parent_dir, save_agent_folder) RL_func.rm_old_save_new_agent(parent_dir, save_agent_folder)