def main(): """ # Example with a simple Dummy vec env env = gym.envs.make('panda-ip-reach-v0', renders= True) env = DummyVecEnv([lambda: env]) """ print("Env created !") env = PandaReachGymEnv(renders=True) env.render(mode='rgb_array') model = DDPG.load("ddpg_panda_reach") print("model loaded !") while True: obs, done = env.reset(), False print("===================================") print("obs") print(obs) episode_rew = 0 while not done: env.render(mode='rgb_array') action, _states = model.predict(obs) obs, rew, done, info = env.step(action) episode_rew += rew print("Episode reward", episode_rew)
def train_stable_baselines(submodule, flags): """Train policies using the PPO algorithm in stable-baselines.""" from stable_baselines3.common.vec_env import DummyVecEnv flow_params = submodule.flow_params # Path to the saved files exp_tag = flow_params['exp_tag'] result_name = '{}/{}'.format(exp_tag, strftime("%Y-%m-%d-%H:%M:%S")) # Perform training. start_time = timeit.default_timer() # print experiment.json information print("=========================================") print('Beginning training.') print('Algorithm :', flags.algorithm) model = run_model_stablebaseline(flow_params, flags.num_cpus, flags.rollout_size, flags.num_steps, flags.algorithm, flags.exp_config) stop_time = timeit.default_timer() run_time = stop_time - start_time print("Training is Finished") print("total runtime: ", run_time) # Save the model to a desired folder and then delete it to demonstrate # loading. print('Saving the trained model!') path = os.path.realpath(os.path.expanduser('~/baseline_results')) ensure_dir(path) save_path = os.path.join(path, result_name) model.save(save_path) # dump the flow params with open(os.path.join(path, result_name) + '.json', 'w') as outfile: json.dump(flow_params, outfile, cls=FlowParamsEncoder, sort_keys=True, indent=4) # Replay the result by loading the model print('Loading the trained model and testing it out!') if flags.exp_config.lower() == "ppo": from stable_baselines3 import PPO model = PPO.load(save_path) elif flags.exp_config.lower() == "ddpg": from stable_baselines3 import DDPG model = DDPG.load(save_path) flow_params = get_flow_params(os.path.join(path, result_name) + '.json') flow_params['sim'].render = True env = env_constructor(params=flow_params, version=0)() # The algorithms require a vectorized environment to run eval_env = DummyVecEnv([lambda: env]) obs = eval_env.reset() reward = 0 for _ in range(flow_params['env'].horizon): action, _states = model.predict(obs) obs, rewards, dones, info = eval_env.step(action) reward += rewards print('the final reward is {}'.format(reward))
def load_model(env, algorithm, filename): if algorithm == "ddpg": return DDPG.load(filename, env=env) elif algorithm == "td3": return TD3.load(filename, env=env) elif algorithm == "sac": return SAC.load(filename, env=env) else: raise Exception("--> Alican's LOG: Unknown agent type!")
def test(MODEL_TEST): log_dir = "model_save/" + MODEL_PATH + "/" + MODEL_PATH + MODEL_TEST env = ENV(util='test', par=PARAM, dt=DT) env.render = True env = Monitor(env, log_dir) if PARAM['algo']=='td3': model = TD3.load(log_dir) elif PARAM['algo']=='ddpg': model = DDPG.load(log_dir) elif PARAM['algo']=='ppo': model = PPO.load(log_dir) # plot_results(f"model_save/") trade_dt = pd.DataFrame([]) # trade_dt: 所有股票的交易数据 result_dt = pd.DataFrame([]) # result_dt: 所有股票一年测试结果数据 for i in range(TEST_STOCK_NUM): state = env.reset() stock_bh_id = 'stock_bh_'+str(i) # 记录每个股票交易的buy_hold stock_port_id = 'stock_port_'+str(i) # 记录每个股票交易的portfolio stock_action_id = 'stock_action_' + str(i) # 记录每个股票交易的action flow_L_id = 'stock_flow_' + str(i) # 记录每个股票的流水 stock_bh_dt, stock_port_dt, action_policy_dt, flow_L_dt = [], [], [], [] day = 0 while True: action = model.predict(state) next_state, reward, done, info = env.step(action[0]) state = next_state # print("trying:",day,"reward:", reward,"now profit:",env.profit) # 测试每一步的交易policy stock_bh_dt.append(env.buy_hold) stock_port_dt.append(env.Portfolio_unit) action_policy_dt.append(action[0][0]) # 用于记录policy flow_L_dt.append(env.flow) day+=1 if done: print('stock: {}, total profit: {:.2f}%, buy hold: {:.2f}%, sp: {:.4f}, mdd: {:.2f}%, romad: {:.4f}' .format(i, env.profit*100, env.buy_hold*100, env.sp, env.mdd*100, env.romad)) # 交易完后记录:股票ID,利润(单位100%),buy_hold(单位100%),夏普率,最大回撤率(单位100%),romad result=pd.DataFrame([[i,env.profit*100,env.buy_hold*100,env.sp,env.mdd*100,env.romad]]) break trade_dt_stock = pd.DataFrame({stock_port_id: stock_port_dt, stock_bh_id: stock_bh_dt, stock_action_id: action_policy_dt, flow_L_id: flow_L_dt}) # 支股票的交易数据 trade_dt = pd.concat([trade_dt, trade_dt_stock], axis=1) # 所有股票交易数据合并(加行) result_dt = pd.concat([result_dt,result],axis=0) # 所有股票结果数据合并(加列) result_dt.columns = ['stock_id','prfit(100%)','buy_hold(100%)','sp','mdd(100%)','romad'] trade_dt.to_csv('out_dt/trade_'+MODEL_PATH+'.csv',index=False) result_dt.to_csv('out_dt/result_'+MODEL_PATH+'.csv',index=False)
def main(): args = parse_arguments() load_path = os.path.join("logs", args.env, args.agent, "best_model.zip") stats_path = os.path.join(args.log_dir, args.env, args.agent, "vec_normalize.pkl") if args.agent == 'ddpg': from stable_baselines3 import DDPG model = DDPG.load(load_path) elif args.agent == 'td3': from stable_baselines3 import TD3 model = TD3.load(load_path) elif args.agent == 'ppo': from stable_baselines3 import PPO model = PPO.load(load_path) env = make_vec_env(args.env, n_envs=1) env = VecNormalize.load(stats_path, env) # do not update them at test time env.training = False # reward normalization is not needed at test time env.norm_reward = False # env = gym.make(args.env) img = [] if args.render: env.render('human') done = False obs = env.reset() action = model.predict(obs) if args.gif: img.append(env.render('rgb_array')) if args.timesteps is None: while not done: action, _= model.predict(obs) obs, reward, done, info = env.step(action) if args.gif: img.append(env.render('rgb_array')) else: env.render() else: for i in range(args.timesteps): action, _= model.predict(obs) obs, reward, done, info = env.step(action) if args.gif: img.append(env.render('rgb_array')) else: env.render() if args.gif: imageio.mimsave(f'{os.path.join("logs", args.env, args.agent, "recording.gif")}', [np.array(img) for i, img in enumerate(img) if i%2 == 0], fps=29)
def test_ddpg(): log_dir = f"model_save/best_model_ddpg_cnn" env = ENV(istest=True) env.render = True env = Monitor(env, log_dir) model = DDPG.load(log_dir) plot_results(f"model_save/") for i in range(10): state = env.reset() while True: action = model.predict(state) next_state, reward, done, info = env.step(action[0]) state = next_state # print("trying:",i,"action:", action,"now profit:",env.profit) if done: print('stock',i,' total profit=',env.profit,' buy hold=',env.buy_hold) break
def run(env, algname, filename): if algname == "TD3": model = TD3.load(f"{algname}_pkl") elif algname == "SAC": if filename: model = SAC.load(f"{filename}") else: model = SAC.load(f"{algname}_pkl") elif algname == "DDPG": model = DDPG.load(f"{algname}_pkl") else: raise "Wrong algorithm name provided." obs = env.reset() while True: action, _states = model.predict(obs) obs, rewards, done, info = env.step(action) env.render() if done: break
def test_ddpg(): log_dir = f"model_save/best_model_ddpg_sp2" env = ENV(istest=True) env.render = True env = Monitor(env, log_dir) model = DDPG.load(log_dir) plot_results(f"model_save/") for i in range(10): state = env.reset() day = 0 while True: action = model.predict(state) next_state, reward, done, info = env.step(action[0]) state = next_state # print("trying:",day,"reward:", reward,"now profit:",env.profit) day += 1 if done: print( 'stock: {}, total profit: {:.2f}%, buy hold: {:.2f}%, sp: {:.4f}, mdd: {:.2f}%, romad: {:.4f}' .format(i, env.profit * 100, env.buy_hold * 100, env.sp, env.mdd * 100, env.romad)) break
import os import time import pdb import math import numpy as np import pybullet as p import gym from gym import error, spaces, utils from gym.utils import seeding from stable_baselines3 import DDPG from stable_baselines3.common.env_checker import check_env from stable_baselines3.common.evaluation import evaluate_policy from gym_pybullet_drones.envs.RLTetherAviary import RLTetherAviary if __name__ == "__main__": #### Check the environment's spaces ################################################################ env = RLTetherAviary(gui=1, record=1) # env.USE_GUI_RPM = True model_name = "<path-to-model>" model = DDPG.load(model_name) obs = env.reset() for i in range(1000): action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) # env.render()
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) # model = DDPG(MlpPolicy, env, verbose=1, param_noise=param_noise, action_noise=action_noise) model = DDPG(MlpPolicy, env, verbose=1, action_noise=action_noise) model.learn(total_timesteps=400000) model.save( "/home/nightmareforev/git/bullet_stuff/multi_kuka_sim/kukakr5Arc/envs/saved_policies/kukakr5Arc_reacher" ) print('Saving model.... Model saved') del model # remove to demonstrate saving and loading model = DDPG.load( "/home/nightmareforev/git/bullet_stuff/multi_kuka_sim/kukakr5Arc/envs/saved_policies/kukakr5Arc_reacher", env=env) print('Loading model.....Model loaded') #env.render() goes before env.reset() for the render to work #env.render() #obs = env.reset() #while True: # action, _states = model.predict(obs) # obs, rewards, dones, info = env.step(action) # env.render() while True: print("running saved policy") obs = env.reset()
env = gym.make('gym_spm:spm-v0') # env = make_vec_env(env_id, n_envs=1, seed=0) # env = VecCheckNan(env, raise_exception=True) # env = check_env(env) # The noise objects for DDPG n_actions = env.action_space.shape[-1] action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=25.67 * np.ones(n_actions)) # model = TD3(MlpPolicy, env, action_noise=action_noise, verbose=1, tensorboard_log="./TD3_spm_v2_SOC_point5_two_state/") model = DDPG(MlpPolicy, env, action_noise=action_noise, verbose=1, tensorboard_log="./DDPG_spm_v2_SOC_point5_two_state/") model.learn(total_timesteps=25000, tb_log_name='DDPG_test_run_3_SOCpoint5_two_state') model.save('DDPG_test_3_SOC_point5_two_states') model.load('DDPG_test_2_SOC_point5_two_states') mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10) print("Mean Reward = ", mean_reward) epsi_sp_list = [] action_list = [] soc_list = [] Concentration_list = [] Concentration_list1 = [] obs = env.reset() for _ in range(3600): action, _states = model.predict(obs, deterministic=True) obs, rewards, done, info = env.step(action)
def load_weights(self, weights_file): """ Load the model from a zip archive """ logger.info(f"load weight from file: {weights_file}") self.model = DDPG.load(weights_file, env=self.env) pass
# Instantiate Model n_actions = env.action_space.shape[-1] action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=25.67 * np.ones(n_actions)) model = DDPG(MlpPolicy, env, action_noise=action_noise, verbose=1, tensorboard_log=log_dir) # Train OR Load Model if train_model: model.learn(total_timesteps=25000, tb_log_name=details) model.save(model_dir_description) else: model.load(model_dir_description) mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10) print("Mean Reward = ", mean_reward) epsi_sp_list = [] action_list = [] soc_list = [] Concentration_list = [] Concentration_list1 = [] obs = env.reset() for _ in range(3600):
# If a model is from a bucket, download model if 'gs://' in args.model: # Download from given bucket (gcloud configured with privileges) client = gcloud.init_storage_client() bucket_name = args.model.split('/')[2] model_path = args.model.split(bucket_name + '/')[-1] gcloud.read_from_bucket(client, bucket_name, model_path) model_path = './' + model_path else: model_path = args.model model = None if args.algorithm == 'DQN': model = DQN.load(model_path) elif args.algorithm == 'DDPG': model = DDPG.load(model_path) elif args.algorithm == 'A2C': model = A2C.load(model_path) elif args.algorithm == 'PPO': model = PPO.load(model_path) elif args.algorithm == 'SAC': model = SAC.load(model_path) elif args.algorithm == 'TD3': model = TD3.load(model_path) else: raise RuntimeError('Algorithm specified is not registered.') # ---------------------------------------------------------------------------- # # Execute loaded agent # # ---------------------------------------------------------------------------- # for i in range(args.episodes):
import gym import numpy as np from stable_baselines3 import DDPG from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise env = gym.make('MountainCarContinuous-v0') # The noise objects for DDPG n_actions = env.action_space.shape[-1] action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)) model = DDPG('MlpPolicy', env, action_noise=action_noise, verbose=1) model.learn(total_timesteps=1000000, log_interval=10) model.save("ddpg_pendulum") env = model.get_env() del model # remove to demonstrate saving and loading model = DDPG.load("ddpg_pendulum") obs = env.reset() while True: action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) env.render()
# input_path = "./Model/DDGP_1.pt" # policy_path = "./Model/Policy.pt" # # value_func_path = "./Model/Value_Func.pt" # # n_actions = env.action_space.shape[-1] # action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=.75 * np.ones(n_actions)) # # model = DDPG(MlpPolicy, env, action_noise=action_noise, verbose=1) # # # model.load(input_path) model = DDPG.load("./Model/DDGP_1.pt") # torch.save(model, "./Model/Full_Model.pt") model.policy.save(policy_path) dict_thing = model.policy.state_dict() x = torch.randn(2) print(x.shape) print(x) print(model.actor) # print(dict_thing)
#!/usr/bin/env python import gym from stable_baselines3 import PPO from pathlib import Path from util import Evaluate, plot2, plot3, plot_picture env = gym.make("reference_environment:reference-environment-v0") # agent = PPO.load("MODEL_0.zip") from stable_baselines3 import DDPG agent = DDPG.load("MODEL_ALPHA_GENERATION.zip") evaluate = Evaluate(env, agent) seeds = evaluate.read_seeds(fname="seeds.csv") # mean_reward = evaluate.RL_agent(seeds) # Add your agent to the Evaluate class and call it here e.g. evaluate.my_agent(seeds) # mean_reward = evaluate.matching_agent(seeds) # Add your agent to the Evaluate class and call it here e.g. evaluate.my_agent(seeds) # mean_reward = evaluate.min_agent(seeds) # Add your agent to the Evaluate class and call it here e.g. evaluate.my_agent(seeds) mean_reward = evaluate.transformed_agent(seeds, H=7, transform="Standard") # ### Plot the last episode # plot2(env.state, "fixed_policy") # plot3(env.state, "fixed_policy.png") # # # ## Plot the last episode # plot2(env.state, "fixed_policy_h=4") # plot3(env.state, "fixed_policy_h=4.png") plot_picture(env.state, "fixed_policy")
model_path = '' if 'gs://' in args.model: # Download from given bucket (gcloud configured with privileges) client = gcloud.init_storage_client() bucket_name = args.model.split('/')[2] model_path = args.model.split(bucket_name + '/')[-1] gcloud.read_from_bucket(client, bucket_name, model_path) model_path = './' + model_path else: model_path = args.model model = None if args.algorithm == 'DQN': model = DQN.load(model_path, tensorboard_log=args.tensorboard) elif args.algorithm == 'DDPG': model = DDPG.load(model_path, tensorboard_log=args.tensorboard) elif args.algorithm == 'A2C': model = A2C.load(model_path, tensorboard_log=args.tensorboard) elif args.algorithm == 'PPO': model = PPO.load(model_path, tensorboard_log=args.tensorboard) elif args.algorithm == 'SAC': model = SAC.load(model_path, tensorboard_log=args.tensorboard) elif args.algorithm == 'TD3': model = TD3.load(model_path, tensorboard_log=args.tensorboard) else: raise RuntimeError('Algorithm specified is not registered.') model.set_env(env) # ---------------------------------------------------------------------------- # # Calculating total training timesteps based on number of episodes #
client = Client(remote_base) env_id = "reference-environment-v0" seed = int(os.getenv("RANGL_SEED", 123456)) instance_id = client.env_create(env_id, seed) client.env_monitor_start( instance_id, directory=f"monitor/{instance_id}", force=True, resume=False, video_callable=False, ) model = DDPG.load("MODEL_ALPHA_GENERATION.zip") observation = client.env_reset(instance_id) print(observation) import numpy as np def ObservationTransform(obs, H, transform, steps_per_episode=int(96)): step_count, generator_1_level, generator_2_level = obs[:3] agent_prediction = np.array(obs[3:]) agent_horizon_prediction = agent_prediction[-1] * np.ones( steps_per_episode) agent_horizon_prediction[:int(steps_per_episode -
if os.path.isfile(ARGS.exp + '/success_model.zip'): path = ARGS.exp + '/success_model.zip' elif os.path.isfile(ARGS.exp + '/best_model.zip'): path = ARGS.exp + '/best_model.zip' else: print("[ERROR]: no model under the specified path", ARGS.exp) if algo == 'a2c': model = A2C.load(path) if algo == 'ppo': model = PPO.load(path) if algo == 'sac': model = SAC.load(path) if algo == 'td3': model = TD3.load(path) if algo == 'ddpg': model = DDPG.load(path) #### Parameters to recreate the environment ################ env_name = ARGS.exp.split("-")[1] + "-aviary-v0" OBS = ObservationType.KIN if ARGS.exp.split( "-")[3] == 'kin' else ObservationType.RGB if ARGS.exp.split("-")[4] == 'rpm': ACT = ActionType.RPM elif ARGS.exp.split("-")[4] == 'dyn': ACT = ActionType.DYN elif ARGS.exp.split("-")[4] == 'pid': ACT = ActionType.PID elif ARGS.exp.split("-")[4] == 'vel': ACT = ActionType.VEL elif ARGS.exp.split("-")[4] == 'tun': ACT = ActionType.TUN
train = True env = gym.make('gym_spm:spm-v0') # The noise objects for DDPG n_actions = env.action_space.shape[-1] action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=25.67 * np.ones(n_actions)) model = DDPG(MlpPolicy, env, action_noise=action_noise, verbose=1, tensorboard_log="./DDPG_spm_v2_SOC_point5_two_state/") # model = TD3(MlpPolicy, env, action_noise=action_noise, verbose=1, tensorboard_log="./TD3_spm_v2_SOC_point5_two_state/") if train: model.learn(total_timesteps=2500000, tb_log_name='test_run_3_SOCpoint5_two_state') model.save('TD3_test_3_SOC_point5_two_states') else: model.load('TD3_test_2_SOC_point5_two_states') mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10) print("Mean Reward = ", mean_reward) epsi_sp_list = [] action_list = [] soc_list = [] Concentration_list = [] Concentration_list1 = [] obs = env.reset() for _ in range(3600): action, _states = model.predict(obs, deterministic=True)