def test_simulated_carracing(self): """ Test simulated Car Racing """ env = SimulatedCarracing('logs/exp0') env.reset() seq_len = 1000 actions = sample_continuous_policy(env.action_space, seq_len, 1. / FPS) for i in range(seq_len): action = actions[i] next_obs, reward, terminal = env.step(action) env.render() print(next_obs.shape, reward) if terminal: break
def test_simulated_carracing(self): """ Test simulated Car Racing """ env = SimulatedCarracing('logs/exp0') env.reset() seq_len = 1000 actions = sample_continuous_policy( env.action_space, seq_len, 1. / FPS) for i in range(seq_len): action = actions[i] next_obs, reward, terminal = env.step(action) env.render() print(next_obs.shape, reward) if terminal: break
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914 """ Generates data """ assert exists(data_dir), "The data directory does not exist..." env = RobonyanRepositioningGymEnv(renders=True, isDiscrete=False) seq_len = 1000 for i in range(rollouts): env.render(mode='human') env.reset() env.env.viewer.window.dispatch_events() if noise_type == 'white': a_rollout = [env.action_space.sample() for _ in range(seq_len)] elif noise_type == 'brown': a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50) s_rollout = [] img_rollout = [] oneDs_rolloout = [] r_rollout = [] d_rollout = [] t = 0 while True: action = a_rollout[t] t += 1 s, r, done, _ = env.step(action) img = s[0] oneDs = s[1] env.env.viewer.window.dispatch_events() s_rollout += [s] img_rollout += [img] oneDs_rolloout += [oneDs] r_rollout += [r] d_rollout += [done] if done: print("> End of rollout {}, {} frames...".format(i, len(s_rollout))) np.savez(join(data_dir, 'rollout_{}'.format(i)), observations=np.array(s_rollout), images=np.array(img_rollout), oneDstates=np.array(oneDs_rolloout), rewards=np.array(r_rollout), actions=np.array(a_rollout), terminals=np.array(d_rollout)) break
def generate_data(data_dir, noise_type): # pylint: disable=R0914 """ Generates data """ assert exists(data_dir), "The data directory does not exist..." env = gym.make("CarRacing-v0") #seq_len = 1000 for j in range(MAX_TRIALS): for i in range(MAX_FRAMES): env.reset() env.env.viewer.window.dispatch_events() if noise_type == 'white': a_rollout = [ env.action_space.sample() for _ in range(MAX_FRAMES) ] elif noise_type == 'brown': a_rollout = sample_continuous_policy(env.action_space, MAX_FRAMES, 1. / 50) s_rollout = [] r_rollout = [] d_rollout = [] t = 0 while True: action = a_rollout[t] t += 1 s, r, done, _ = env.step(action) env.render("rgb_array") env.env.viewer.window.dispatch_events() s_rollout += [s] r_rollout += [r] d_rollout += [done] if done: print("> End of rollout {}, {} frames...".format( i, len(s_rollout))) np.savez_compressed(join( data_dir, 'rollout_{}'.format(i + j * 1000)), observations=np.array(s_rollout), rewards=np.array(r_rollout), actions=np.array(a_rollout), terminals=np.array(d_rollout)) break
def generate_data(num_rollouts, rollout_len, data_dir, noise_type, **carnav_kwargs): # pylint: disable=R0914 """ Generates data """ data_dir = Path(data_dir) if not data_dir.exists(): data_dir.mkdir(555, True, True) env = CarNav(**carnav_kwargs) for i in range(num_rollouts): env.reset() if noise_type == 'white': a_rollout = [env.action_space.sample() for _ in range(rollout_len)] elif noise_type == 'brown': a_rollout = sample_continuous_policy(env.action_space, rollout_len, 1. / 50) s_rollout = [] r_rollout = [] d_rollout = [] t = 0 done = False while not done and t < rollout_len: action = a_rollout[t] t += 1 s, r, done, _ = env.step(action) s_rollout += [s] r_rollout += [r] d_rollout += [done] if done or t == rollout_len - 1: print("> End of rollout {}, {} frames...".format( i, len(s_rollout))) np.savez( join(data_dir, 'rollout_{}'.format(i)), observations=np.array(s_rollout), rewards=np.array(r_rollout), actions=np.array(a_rollout[:t]), # just save actions used terminals=np.array(d_rollout)) break
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914 """ Generates data """ assert exists(data_dir), "The data directory does not exist..." env = gym.make("CarRacing-v0") seq_len = 1000 for i in range(rollouts): env.reset() env.env.viewer.window.dispatch_events() if noise_type == 'white': a_rollout = [env.action_space.sample() for _ in range(seq_len)] elif noise_type == 'brown': a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50) s_rollout = [] r_rollout = [] d_rollout = [] t = 0 while True: action = a_rollout[t] t += 1 # The CarRacing-v0 environment has a step limit of 1000, this can be seen in env.spec.max_episode_steps s, r, done, _ = env.step(action) env.env.viewer.window.dispatch_events() s_rollout += [s] r_rollout += [r] d_rollout += [done] if done: # Because these are random policies, most of them will not be done before the step limit of 1000 print("> End of rollout {}, {} frames...".format( i, len(s_rollout))) np.savez(join(data_dir, 'rollout_{}'.format(i)), observations=np.array(s_rollout), rewards=np.array(r_rollout), actions=np.array(a_rollout), terminals=np.array(d_rollout)) break
def generate_data(rollouts, data_dir, noise_type, iteration_num): # pylint: disable=R0914 assert exists(data_dir), "The data directory does not exist..." env = gym.make("BipedalWalkerHardcore-v2") env = wrappers.Monitor(env, "./videos/random_policy/") seq_len = 1000 for i in range(rollouts): env.reset() if noise_type == "white": a_rollout = [env.action_space.sample() for _ in range(seq_len)] elif noise_type == "brown": a_rollout = sample_continuous_policy(env.action_space, seq_len, 1.0 / 50) s_rollout = [] r_rollout = [] d_rollout = [] t = 0 while True: action = a_rollout[t] t += 1 s, r, done, _ = env.step(action) im_frame = env.render(mode="rgb_array") img = PIL.Image.fromarray(im_frame) img = img.resize((64, 64)) s_rollout += [np.array(img)] r_rollout += [r] d_rollout += [done] if done: print("> End of rollout {}, {} frames...".format(i, len(s_rollout))) np.savez( join(data_dir, "rollout_{}".format(i)), observations=np.array(s_rollout), rewards=np.array(r_rollout), actions=np.array(a_rollout), terminals=np.array(d_rollout), ) break
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914 """ Generates data """ assert exists(data_dir), "The data directory does not exist..." env = gym.make("CarRacing-v0") seq_len = 1000 for i in range(rollouts): env.reset() env.env.viewer.window.dispatch_events() if noise_type == 'white': a_rollout = [env.action_space.sample() for _ in range(seq_len)] elif noise_type == 'brown': a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50) s_rollout = [] r_rollout = [] d_rollout = [] t = 0 while True: action = a_rollout[t] t += 1 s, r, done, _ = env.step(action) env.env.viewer.window.dispatch_events() s_rollout += [s] r_rollout += [r] d_rollout += [done] if done: print("> End of rollout {}, {} frames...".format(i, len(s_rollout))) np.savez(join(data_dir, 'rollout_{}'.format(i)), observations=np.array(s_rollout), rewards=np.array(r_rollout), actions=np.array(a_rollout), terminals=np.array(d_rollout)) break
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914 """ Generates data """ assert exists(data_dir), "The data directory does not exist..." df = YahooDownloader(start_date = '2009-01-01', end_date = '2021-01-01', ticker_list = ['AAPL']).fetch_data() df.sort_values(['date','tic'],ignore_index=True) fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature = False) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) processed_full.sort_values(['date','tic'],ignore_index=True) train = data_split(processed_full, '2009-01-01','2019-01-01') trade = data_split(processed_full, '2019-01-01','2021-01-01') stock_dimension = len(train.tic.unique()) state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, # "buy_cost_pct": 0.001i, # "sell_cost_pct": 0.001, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df = train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env = env_train # env = gym.make("CarRacing-v0") seq_len = 10000 for i in range(rollouts): env.reset() # env.env.viewer.window.dispatch_events() if noise_type == 'white': a_rollout = [env.action_space.sample() for _ in range(seq_len)] elif noise_type == 'brown': a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50) s_rollout = [] r_rollout = [] d_rollout = [] t = 0 while True: action = a_rollout[t] t += 1 s, r, done, _ = env.step(action) # env.env.viewer.window.dispatch_events() s_rollout += [s] r_rollout += [r] d_rollout += [done] if done: print("> End of rollout {}, {} frames...".format(i, len(s_rollout))) np.savez(join(data_dir, 'rollout_{}'.format(i)), observations=np.array(s_rollout), rewards=np.array(r_rollout), actions=np.array(a_rollout), terminals=np.array(d_rollout)) break