示例#1
0
    def __init__(self):
        print("setting up trading env")
        df = pd.read_pickle("cache/encoded_rows.pkl")
        encoded = np.load("cache/unscaled_data.npy").astype(np.float32)

        self.trader = Trader()
        self.current_step = 1
        valid_tickers = self.trader.quotes.valid_tickers

        # filter valid tickers
        valid_rows, valid_x = [], []
        for idx, row in df.iterrows():
            if row["Ticker"] in valid_tickers:
                valid_rows.append(row)
                valid_x.append(encoded[idx])

        df = pd.DataFrame(valid_rows)
        encoded = np.array(valid_x)

        # only use subset of data
        split = int(0.4 * len(encoded))
        df, encoded = df.iloc[split:], encoded[split:]

        split = int(0.6 * len(encoded))
        encoded, encoded_test = encoded[:split], encoded[split:]
        self.df, self.df_test = df.iloc[:split], df.iloc[split:]
        self.day = arrow.get(self.df["Time"].iloc[0].format("YYYY-MM-DD"))

        # scale
        scaler = MinMaxScaler()
        scaler.fit(encoded)
        self.encoded, self.encoded_test = scaler.transform(
            encoded), scaler.transform(encoded_test)
        joblib.dump(scaler, "cache/dqn_scaler.gz")
def clustering(encodings, df, method, params):
    clusters = method(**params).fit(encodings)
    dump(clusters, 'clustering.joblib')

    # check each cluster for profitability
    n_clusters = len(set(clusters.labels_))

    # noise labeled as -1
    if -1 in clusters.labels_:
        n_clusters -= 1

    best = 0
    best_score = 0

    print(f"{n_clusters} number of clusters")
    for i in range(n_clusters):
        trader = Trader()
        day = arrow.get(df["Time"].iloc[0].format("YYYY-MM-DD"))

        for idx, row in df.iterrows():
            current_day = arrow.get(row["Time"].format("YYYY-MM-DD"))

            # new day, check expiries
            if current_day > day:
                trader.eod(day.format("YYYY-MM-DD"))
                day = current_day

            # if target cluster, buy
            if clusters.labels_[idx] == i:
                current_price = row["Spot"]
                expiry = row["Expiry"].format("YYYY-MM-DD")
                ticker = row["Ticker"]
                trader.trade_on_signal(ticker, "BULLISH", current_price, expiry)

        reward = trader.current_reward
        print(f"cluster {i}\treturn: {reward:.2f}%")

        if reward > best_score:
            best_score = reward
            best = i 
        
    print(f"best {best}")
    return best
def clustering(encodings, df, method, params, topn=1):
    clusters = method(**params).fit(encodings)
    dump(clusters, "clustering.joblib")

    # check each cluster for profitability
    n_clusters = len(set(clusters.labels_))

    # noise labeled as -1
    if -1 in clusters.labels_:
        n_clusters -= 1

    scores = []

    print(f"{n_clusters} number of clusters")
    for i in range(n_clusters):
        trader = Trader()
        day = arrow.get(df["Time"].iloc[0].format("YYYY-MM-DD"))

        for idx, row in df.iterrows():
            current_day = arrow.get(row["Time"].format("YYYY-MM-DD"))

            # new day, check expiries
            if current_day > day:
                trader.eod(day.format("YYYY-MM-DD"))
                day = current_day

            # if target cluster, buy
            if clusters.labels_[idx] == i:
                current_price = row["Spot"]
                expiry = row["Expiry"].format("YYYY-MM-DD")
                ticker = row["Ticker"]
                trader.trade_on_signal(ticker, "BULLISH", current_price,
                                       expiry)

        scores.append(trader.current_reward)
        print(f"cluster {i}\treturn: {scores[-1]:.2f}%")

    top = sorted(scores, reverse=True)[:topn]
    return [scores.index(s) for s in top]
def test(encodings, df, target_cluster):
    clusters = load('clustering.joblib')

    trader = Trader()
    day = arrow.get(df["Time"].iloc[0].format("YYYY-MM-DD"))
    print(f"start {day}")

    for idx, row in df.iterrows():
        current_day = arrow.get(row["Time"].format("YYYY-MM-DD"))

        # new day, check expiries
        if current_day > day:
            trader.eod(day.format("YYYY-MM-DD"))
            day = current_day

        # if target cluster, buy
        if clusters.labels_[idx] == target_cluster:
            current_price = row["Spot"]
            expiry = row["Expiry"].format("YYYY-MM-DD")
            ticker = row["Ticker"]
            trader.trade_on_signal(ticker, "BULLISH", current_price, expiry)

    print(f"end {current_day}")
    print(f"cluster {target_cluster}\treturn: {trader.current_reward:.2f}%")
示例#5
0
 def reset(self):
     self.trader = Trader()
     self.current_step = 1
     self.day = arrow.get(self.df["Time"].iloc[0].format("YYYY-MM-DD"))
     return self.encoded[0]
示例#6
0
class TraderEnv(object):
    def __init__(self):
        print("setting up trading env")
        df = pd.read_pickle("cache/encoded_rows.pkl")
        encoded = np.load("cache/unscaled_data.npy").astype(np.float32)

        self.trader = Trader()
        self.current_step = 1
        valid_tickers = self.trader.quotes.valid_tickers

        # filter valid tickers
        valid_rows, valid_x = [], []
        for idx, row in df.iterrows():
            if row["Ticker"] in valid_tickers:
                valid_rows.append(row)
                valid_x.append(encoded[idx])

        df = pd.DataFrame(valid_rows)
        encoded = np.array(valid_x)

        # only use subset of data
        split = int(0.4 * len(encoded))
        df, encoded = df.iloc[split:], encoded[split:]

        split = int(0.6 * len(encoded))
        encoded, encoded_test = encoded[:split], encoded[split:]
        self.df, self.df_test = df.iloc[:split], df.iloc[split:]
        self.day = arrow.get(self.df["Time"].iloc[0].format("YYYY-MM-DD"))

        # scale
        scaler = MinMaxScaler()
        scaler.fit(encoded)
        self.encoded, self.encoded_test = scaler.transform(
            encoded), scaler.transform(encoded_test)
        joblib.dump(scaler, "cache/dqn_scaler.gz")

    def step(self, action):
        row = self.df.iloc[self.current_step]

        # new day, check expiries
        current_day = arrow.get(row["Time"].format("YYYY-MM-DD"))
        if current_day != self.day:
            self.trader.eod(self.day.format("YYYY-MM-DD"))
            self.day = current_day

        if action == 0:
            current_price = row["Spot"]
            expiry = row["Expiry"].format("YYYY-MM-DD")
            ticker = row["Ticker"]
            self.trader.trade_on_signal(ticker, "BULLISH", current_price,
                                        expiry)

        next_state = self.encoded[self.current_step]
        self.current_step += 1
        reward = self.trader.current_reward
        done = reward < -50 or self.current_step == len(self.encoded)

        return next_state, reward, done

    def reset(self):
        self.trader = Trader()
        self.current_step = 1
        self.day = arrow.get(self.df["Time"].iloc[0].format("YYYY-MM-DD"))
        return self.encoded[0]
    print(f"end {current_day}")
    print(f"cluster {target_cluster}\treturn: {trader.current_reward:.2f}%")


def main(encodings, df):
    return clustering(encodings, df, KMeans, {"n_clusters":100})

if __name__ == "__main__":
    df = pd.read_pickle("cache/encoded_rows.pkl")
    print(df.head())

    encoded = np.load("cache/unscaled_data.npy").astype(np.float32)
    assert len(encoded) == len(df)

    trader = Trader()
    valid_tickers = trader.quotes.valid_tickers

    # filter valid tickers
    valid_rows, valid_x = [], []
    for idx, row in df.iterrows():
        if row["Ticker"] in valid_tickers:
            valid_rows.append(row)
            valid_x.append(encoded[idx])

    print(encoded.shape)
    df = pd.DataFrame(valid_rows)
    encoded = np.array(valid_x)
    assert len(encoded) == len(df)

    split = int(0.6 * len(encoded))
示例#8
0
def main(
    df,
    num_episodes=50000,
    max_timesteps=500,
    actor_hidden_dim=32,
    critic_hidden_dim=256,
    minibatch_size=64,
    lr=0.0005,
    betas=(0.9, 0.999),
    lam=0.95,
    gamma=0.99,
    eps_clip=0.2,
    value_clip=0.4,
    beta_s=0.01,
    update_timesteps=5000,
    num_policy_updates_per_aux=32,
    epochs=1,
    epochs_aux=6,
    seed=None,
    render=False,
    render_every_eps=250,
    save_every=1000,
    load=False,
    monitor=False,
):
    state_dim = len(df["encoding"].iloc[0])
    num_actions = len(signals)

    memories = deque([])
    aux_memories = deque([])

    agent = PPG(
        state_dim,
        num_actions,
        actor_hidden_dim,
        critic_hidden_dim,
        epochs,
        epochs_aux,
        minibatch_size,
        lr,
        betas,
        lam,
        gamma,
        beta_s,
        eps_clip,
        value_clip,
    )

    if load:
        agent.load()

    if seed is not None:
        torch.manual_seed(seed)
        np.random.seed(seed)

    time = 0
    num_policy_updates = 0

    for eps in tqdm(range(num_episodes), desc="episodes"):
        trader = Trader()

        for idx in range(len(df) - 1):
            time += 1

            state = np.array(df["encoding"].iloc[idx]).astype(np.float32)
            state = torch.from_numpy(state).to(device)
            action_probs, _ = agent.actor(state)
            value = agent.critic(state)

            dist = Categorical(action_probs)
            action = dist.sample()
            action_log_prob = dist.log_prob(action)
            action = action.item()

            trader.trade_on_signal(df["symbol"].iloc[idx], signals[action],
                                   df["datetime"].iloc[idx])
            reward = trader.reward(df["datetime"].iloc[idx])

            next_state = np.array(df["encoding"].iloc[idx + 1]).astype(
                np.float32)
            memory = Memory(state, action, action_log_prob, reward, False,
                            value)
            memories.append(memory)

            state = next_state

            if time % update_timesteps == 0:
                agent.learn(memories, aux_memories, next_state)
                num_policy_updates += 1
                memories.clear()

                if num_policy_updates % num_policy_updates_per_aux == 0:
                    agent.learn_aux(aux_memories)
                    aux_memories.clear()

        print(f"reward after episode: {reward:.2f}%")

        if eps % save_every == 0:
            agent.save()
示例#9
0
def main(
    encodings,
    df,
    num_episodes=1000,
    actor_hidden_dim=32,
    critic_hidden_dim=256,
    minibatch_size=64,
    lr=0.0005,
    betas=(0.9, 0.999),
    lam=0.95,
    gamma=0.99,
    eps_clip=0.2,
    value_clip=0.4,
    beta_s=0.01,
    update_timesteps=10000,
    num_policy_updates_per_aux=32,
    epochs=1,
    epochs_aux=6,
    seed=None,
    save_every=5,
    load=False,
):
    state_dim = encodings.shape[1]
    num_actions = len(signals)

    memories = deque([])
    aux_memories = deque([])

    agent = PPG(
        state_dim,
        num_actions,
        actor_hidden_dim,
        critic_hidden_dim,
        epochs,
        epochs_aux,
        minibatch_size,
        lr,
        betas,
        lam,
        gamma,
        beta_s,
        eps_clip,
        value_clip,
    )

    if load:
        agent.load()

    if seed is not None:
        torch.manual_seed(seed)
        np.random.seed(seed)

    time = 0
    num_policy_updates = 0
    reward = 0
    writer = SummaryWriter()

    for eps in tqdm(range(num_episodes), desc="episodes"):
        trader = Trader()
        day = arrow.get(df["Time"].iloc[0].format("YYYY-MM-DD"))
        done = False

        pbar = tqdm(range(len(encodings) - 1), total=len(encodings))
        for idx in pbar:
            # get row for price and date
            row = df.iloc[idx]
            current_day = arrow.get(row["Time"].format("YYYY-MM-DD"))
            time += 1

            # new day, check expiries
            if current_day != day:
                trader.eod(day.format("YYYY-MM-DD"))
                day = current_day

            state = encodings[idx]
            state = torch.from_numpy(state).to(device)
            action_probs, _ = agent.actor(state)
            value = agent.critic(state)

            dist = Categorical(action_probs)
            action = dist.sample()
            action_log_prob = dist.log_prob(action)
            action = action.item()

            if action == 0:
                current_price = row["Spot"]
                expiry = row["Expiry"].format("YYYY-MM-DD")
                ticker = row["Ticker"]
                trader.trade_on_signal(ticker, "BULLISH", current_price,
                                       expiry)

            reward = trader.current_reward

            if idx % 1000 == 0:
                pbar.set_description(f"current return {reward:.2f}%")

            if reward < -60:
                done = True

            next_state = encodings[idx + 1]
            memory = Memory(state, action, action_log_prob, reward, done,
                            value)

            memories.append(memory)
            state = next_state

            if time % update_timesteps == 0:
                agent.learn(memories, aux_memories, next_state)
                num_policy_updates += 1
                memories.clear()

                if num_policy_updates % num_policy_updates_per_aux == 0:
                    agent.learn_aux(aux_memories)
                    aux_memories.clear()

            if done:
                break

        if eps % save_every == 0:
            agent.save()

        writer.add_scalar("reward", reward, eps)

    writer.flush()
    writer.close()