def __init__(self): print("setting up trading env") df = pd.read_pickle("cache/encoded_rows.pkl") encoded = np.load("cache/unscaled_data.npy").astype(np.float32) self.trader = Trader() self.current_step = 1 valid_tickers = self.trader.quotes.valid_tickers # filter valid tickers valid_rows, valid_x = [], [] for idx, row in df.iterrows(): if row["Ticker"] in valid_tickers: valid_rows.append(row) valid_x.append(encoded[idx]) df = pd.DataFrame(valid_rows) encoded = np.array(valid_x) # only use subset of data split = int(0.4 * len(encoded)) df, encoded = df.iloc[split:], encoded[split:] split = int(0.6 * len(encoded)) encoded, encoded_test = encoded[:split], encoded[split:] self.df, self.df_test = df.iloc[:split], df.iloc[split:] self.day = arrow.get(self.df["Time"].iloc[0].format("YYYY-MM-DD")) # scale scaler = MinMaxScaler() scaler.fit(encoded) self.encoded, self.encoded_test = scaler.transform( encoded), scaler.transform(encoded_test) joblib.dump(scaler, "cache/dqn_scaler.gz")
def clustering(encodings, df, method, params): clusters = method(**params).fit(encodings) dump(clusters, 'clustering.joblib') # check each cluster for profitability n_clusters = len(set(clusters.labels_)) # noise labeled as -1 if -1 in clusters.labels_: n_clusters -= 1 best = 0 best_score = 0 print(f"{n_clusters} number of clusters") for i in range(n_clusters): trader = Trader() day = arrow.get(df["Time"].iloc[0].format("YYYY-MM-DD")) for idx, row in df.iterrows(): current_day = arrow.get(row["Time"].format("YYYY-MM-DD")) # new day, check expiries if current_day > day: trader.eod(day.format("YYYY-MM-DD")) day = current_day # if target cluster, buy if clusters.labels_[idx] == i: current_price = row["Spot"] expiry = row["Expiry"].format("YYYY-MM-DD") ticker = row["Ticker"] trader.trade_on_signal(ticker, "BULLISH", current_price, expiry) reward = trader.current_reward print(f"cluster {i}\treturn: {reward:.2f}%") if reward > best_score: best_score = reward best = i print(f"best {best}") return best
def clustering(encodings, df, method, params, topn=1): clusters = method(**params).fit(encodings) dump(clusters, "clustering.joblib") # check each cluster for profitability n_clusters = len(set(clusters.labels_)) # noise labeled as -1 if -1 in clusters.labels_: n_clusters -= 1 scores = [] print(f"{n_clusters} number of clusters") for i in range(n_clusters): trader = Trader() day = arrow.get(df["Time"].iloc[0].format("YYYY-MM-DD")) for idx, row in df.iterrows(): current_day = arrow.get(row["Time"].format("YYYY-MM-DD")) # new day, check expiries if current_day > day: trader.eod(day.format("YYYY-MM-DD")) day = current_day # if target cluster, buy if clusters.labels_[idx] == i: current_price = row["Spot"] expiry = row["Expiry"].format("YYYY-MM-DD") ticker = row["Ticker"] trader.trade_on_signal(ticker, "BULLISH", current_price, expiry) scores.append(trader.current_reward) print(f"cluster {i}\treturn: {scores[-1]:.2f}%") top = sorted(scores, reverse=True)[:topn] return [scores.index(s) for s in top]
def test(encodings, df, target_cluster): clusters = load('clustering.joblib') trader = Trader() day = arrow.get(df["Time"].iloc[0].format("YYYY-MM-DD")) print(f"start {day}") for idx, row in df.iterrows(): current_day = arrow.get(row["Time"].format("YYYY-MM-DD")) # new day, check expiries if current_day > day: trader.eod(day.format("YYYY-MM-DD")) day = current_day # if target cluster, buy if clusters.labels_[idx] == target_cluster: current_price = row["Spot"] expiry = row["Expiry"].format("YYYY-MM-DD") ticker = row["Ticker"] trader.trade_on_signal(ticker, "BULLISH", current_price, expiry) print(f"end {current_day}") print(f"cluster {target_cluster}\treturn: {trader.current_reward:.2f}%")
def reset(self): self.trader = Trader() self.current_step = 1 self.day = arrow.get(self.df["Time"].iloc[0].format("YYYY-MM-DD")) return self.encoded[0]
class TraderEnv(object): def __init__(self): print("setting up trading env") df = pd.read_pickle("cache/encoded_rows.pkl") encoded = np.load("cache/unscaled_data.npy").astype(np.float32) self.trader = Trader() self.current_step = 1 valid_tickers = self.trader.quotes.valid_tickers # filter valid tickers valid_rows, valid_x = [], [] for idx, row in df.iterrows(): if row["Ticker"] in valid_tickers: valid_rows.append(row) valid_x.append(encoded[idx]) df = pd.DataFrame(valid_rows) encoded = np.array(valid_x) # only use subset of data split = int(0.4 * len(encoded)) df, encoded = df.iloc[split:], encoded[split:] split = int(0.6 * len(encoded)) encoded, encoded_test = encoded[:split], encoded[split:] self.df, self.df_test = df.iloc[:split], df.iloc[split:] self.day = arrow.get(self.df["Time"].iloc[0].format("YYYY-MM-DD")) # scale scaler = MinMaxScaler() scaler.fit(encoded) self.encoded, self.encoded_test = scaler.transform( encoded), scaler.transform(encoded_test) joblib.dump(scaler, "cache/dqn_scaler.gz") def step(self, action): row = self.df.iloc[self.current_step] # new day, check expiries current_day = arrow.get(row["Time"].format("YYYY-MM-DD")) if current_day != self.day: self.trader.eod(self.day.format("YYYY-MM-DD")) self.day = current_day if action == 0: current_price = row["Spot"] expiry = row["Expiry"].format("YYYY-MM-DD") ticker = row["Ticker"] self.trader.trade_on_signal(ticker, "BULLISH", current_price, expiry) next_state = self.encoded[self.current_step] self.current_step += 1 reward = self.trader.current_reward done = reward < -50 or self.current_step == len(self.encoded) return next_state, reward, done def reset(self): self.trader = Trader() self.current_step = 1 self.day = arrow.get(self.df["Time"].iloc[0].format("YYYY-MM-DD")) return self.encoded[0]
print(f"end {current_day}") print(f"cluster {target_cluster}\treturn: {trader.current_reward:.2f}%") def main(encodings, df): return clustering(encodings, df, KMeans, {"n_clusters":100}) if __name__ == "__main__": df = pd.read_pickle("cache/encoded_rows.pkl") print(df.head()) encoded = np.load("cache/unscaled_data.npy").astype(np.float32) assert len(encoded) == len(df) trader = Trader() valid_tickers = trader.quotes.valid_tickers # filter valid tickers valid_rows, valid_x = [], [] for idx, row in df.iterrows(): if row["Ticker"] in valid_tickers: valid_rows.append(row) valid_x.append(encoded[idx]) print(encoded.shape) df = pd.DataFrame(valid_rows) encoded = np.array(valid_x) assert len(encoded) == len(df) split = int(0.6 * len(encoded))
def main( df, num_episodes=50000, max_timesteps=500, actor_hidden_dim=32, critic_hidden_dim=256, minibatch_size=64, lr=0.0005, betas=(0.9, 0.999), lam=0.95, gamma=0.99, eps_clip=0.2, value_clip=0.4, beta_s=0.01, update_timesteps=5000, num_policy_updates_per_aux=32, epochs=1, epochs_aux=6, seed=None, render=False, render_every_eps=250, save_every=1000, load=False, monitor=False, ): state_dim = len(df["encoding"].iloc[0]) num_actions = len(signals) memories = deque([]) aux_memories = deque([]) agent = PPG( state_dim, num_actions, actor_hidden_dim, critic_hidden_dim, epochs, epochs_aux, minibatch_size, lr, betas, lam, gamma, beta_s, eps_clip, value_clip, ) if load: agent.load() if seed is not None: torch.manual_seed(seed) np.random.seed(seed) time = 0 num_policy_updates = 0 for eps in tqdm(range(num_episodes), desc="episodes"): trader = Trader() for idx in range(len(df) - 1): time += 1 state = np.array(df["encoding"].iloc[idx]).astype(np.float32) state = torch.from_numpy(state).to(device) action_probs, _ = agent.actor(state) value = agent.critic(state) dist = Categorical(action_probs) action = dist.sample() action_log_prob = dist.log_prob(action) action = action.item() trader.trade_on_signal(df["symbol"].iloc[idx], signals[action], df["datetime"].iloc[idx]) reward = trader.reward(df["datetime"].iloc[idx]) next_state = np.array(df["encoding"].iloc[idx + 1]).astype( np.float32) memory = Memory(state, action, action_log_prob, reward, False, value) memories.append(memory) state = next_state if time % update_timesteps == 0: agent.learn(memories, aux_memories, next_state) num_policy_updates += 1 memories.clear() if num_policy_updates % num_policy_updates_per_aux == 0: agent.learn_aux(aux_memories) aux_memories.clear() print(f"reward after episode: {reward:.2f}%") if eps % save_every == 0: agent.save()
def main( encodings, df, num_episodes=1000, actor_hidden_dim=32, critic_hidden_dim=256, minibatch_size=64, lr=0.0005, betas=(0.9, 0.999), lam=0.95, gamma=0.99, eps_clip=0.2, value_clip=0.4, beta_s=0.01, update_timesteps=10000, num_policy_updates_per_aux=32, epochs=1, epochs_aux=6, seed=None, save_every=5, load=False, ): state_dim = encodings.shape[1] num_actions = len(signals) memories = deque([]) aux_memories = deque([]) agent = PPG( state_dim, num_actions, actor_hidden_dim, critic_hidden_dim, epochs, epochs_aux, minibatch_size, lr, betas, lam, gamma, beta_s, eps_clip, value_clip, ) if load: agent.load() if seed is not None: torch.manual_seed(seed) np.random.seed(seed) time = 0 num_policy_updates = 0 reward = 0 writer = SummaryWriter() for eps in tqdm(range(num_episodes), desc="episodes"): trader = Trader() day = arrow.get(df["Time"].iloc[0].format("YYYY-MM-DD")) done = False pbar = tqdm(range(len(encodings) - 1), total=len(encodings)) for idx in pbar: # get row for price and date row = df.iloc[idx] current_day = arrow.get(row["Time"].format("YYYY-MM-DD")) time += 1 # new day, check expiries if current_day != day: trader.eod(day.format("YYYY-MM-DD")) day = current_day state = encodings[idx] state = torch.from_numpy(state).to(device) action_probs, _ = agent.actor(state) value = agent.critic(state) dist = Categorical(action_probs) action = dist.sample() action_log_prob = dist.log_prob(action) action = action.item() if action == 0: current_price = row["Spot"] expiry = row["Expiry"].format("YYYY-MM-DD") ticker = row["Ticker"] trader.trade_on_signal(ticker, "BULLISH", current_price, expiry) reward = trader.current_reward if idx % 1000 == 0: pbar.set_description(f"current return {reward:.2f}%") if reward < -60: done = True next_state = encodings[idx + 1] memory = Memory(state, action, action_log_prob, reward, done, value) memories.append(memory) state = next_state if time % update_timesteps == 0: agent.learn(memories, aux_memories, next_state) num_policy_updates += 1 memories.clear() if num_policy_updates % num_policy_updates_per_aux == 0: agent.learn_aux(aux_memories) aux_memories.clear() if done: break if eps % save_every == 0: agent.save() writer.add_scalar("reward", reward, eps) writer.flush() writer.close()