def approximation(): try: os.mkdir("approximation/results/") except Exception as e: print(e) for K in [50, 100, 200]: stats = [] paths = get_paths(K) with open(f"approximation/{K}/linear_models.pkl", "rb") as file: model = joblib.load(file) for path_name in paths: state = get_initial_state() states, costs = [state], [0] for i, tick in enumerate(paths[path_name]): action = get_linear_parametrized_action(model, state.copy(), i) next_state, cost = transition_and_cost(state.copy(), action, tick) states.append(next_state) costs.append(cost) state = next_state stats.append(calculate_stats(states, costs, path_name)) df = pd.DataFrame(stats, columns=COLS) df.to_csv(f"approximation/results/{K}.csv", index=False)
def back_recursion(): try: os.mkdir("back_recursion/results/") except Exception as e: print(e) for K in [50, 100, 200]: stats = [] paths = get_paths(K) with open(f"back_recursion/{K}/back_recursion.pkl", "rb") as file: model = joblib.load(file) for path_name in paths: state = get_initial_state() states, costs = [state], [0] for i, tick in enumerate(paths[path_name]): next_state, cost = transition_and_cost( state.copy(), model[i]['U'][tuple(state)], tick) states.append(next_state) costs.append(cost) state = next_state stats.append(calculate_stats(states, costs, path_name)) df = pd.DataFrame(stats, columns=COLS) df.to_csv(f"back_recursion/results/{K}.csv", index=False)
def compute_policy(get_action, K, path_name): bid_prices = [] bid_volumes = [] ask_prices = [] ask_volumes = [] prices = [] cprices = [] unrealized_pnls = [] realized_pnls = [] net_position = [] path = np.load(f"paths/{K}/{path_name}.npy") state = get_initial_state() for i in range(K): print("Step", i) action = get_action(i, state, K) ## Log the action pre-jump prices.append(0) cprices = np.cumsum(prices) bid_prices.append(action[0] + cprices[-1]) bid_volumes.append(action[1]) ask_prices.append(action[2] + cprices[-1]) ask_volumes.append(action[3]) unrealized_pnls.append(state[1]) realized_pnls.append(0) net_position.append(state[0]) tick = path[i] state, cost = transition_and_cost(state, action, tick) ## Log the result post-action prices.append(tick) cprices = np.cumsum(prices) bid_prices.append(action[0] + cprices[-1] - tick) bid_volumes.append(action[1]) ask_prices.append(action[2] + cprices[-1] - tick) ask_volumes.append(action[3]) unrealized_pnls.append(state[1]) realized_pnls.append(cost) net_position.append(state[0]) realized_pnls = np.cumsum(realized_pnls) cprices = np.cumsum(prices) return bid_prices, bid_volumes, ask_prices, ask_volumes, unrealized_pnls, realized_pnls, net_position, cprices
def deeper(state, total_cost, weight, k): if k == 2: global costs costs.append(total_cost * weight) else: actions = get_action_subset(state) ps = coocc[state[-1] + TICK_LIMIT, :] for action in actions: for tick in np.random.choice(TICKS, p=ps, size=4): next_state, cost = transition_and_cost(state.copy(), action, tick) deeper(next_state, total_cost + cost, ps[tick + TICK_LIMIT] * weight, k + 1)
def get_linear_parametrized_action(k, state, K): costs = [] actions = get_possible_actions(state) for action in actions: avg_cost = 0 for tick in TICKS: p = coocc[state[-1] + TICK_LIMIT, tick + TICK_LIMIT] next_state, cost = transition_and_cost(state.copy(), action, tick) X = np.array([next_state]) pred = linear_models[k+1].predict(X)[0] avg_cost += p * (cost + pred) costs.append(avg_cost) idx = np.argmax(costs) return actions[idx]
def solve(K_): try: os.mkdir(f"back_recursion/{K_}") except Exception as e: print(e) K = K_ start = time.time() states = state_generator(K) ################################################################################################### ### N-Step MEMORY, J_N, U_N = {}, {}, {} for state in states[K]: J_N[tuple(state)] = terminal_cost(state) J_K_1, U_K_1 = J_N, U_N MEMORY[K] = {"J": J_K_1, "U": U_K_1} ## K-Steps while (K > 0): K -= 1 print("Starting Stage", K) J_K, U_K = {}, {} for j, state in enumerate(states[K]): actions = get_possible_actions(state) costs = [-10000] * len(actions) for i, action in enumerate(actions): avg_cost = 0 for tick in TICKS: new_state, cost = transition_and_cost( state.copy(), action, tick) p = coocc[state[-1] + TICK_LIMIT, tick + TICK_LIMIT] ns = tuple(new_state) cost += J_K_1[ns] avg_cost += p * cost costs[i] = avg_cost idx = np.argmax(costs) state = tuple(state) J_K[state] = costs[idx] U_K[state] = actions[idx] J_K_1 = J_K U_K_1 = U_K MEMORY[K] = {"J": J_K_1, "U": U_K_1} gc.collect() with open(f"back_recursion/{args.K}/back_recursion.pkl", "wb") as file: joblib.dump(MEMORY, file) ################################################################################################### end = time.time() try: with open("timers/timer_dict.pkl", "rb") as file: timer_dict = joblib.load(file) key = timer_dict.get("back_recursion", None) if not key: timer_dict["back_recursion"] = {} timer_dict["back_recursion"][K_] = end - start with open("timers/timer_dict.pkl", "wb") as file: joblib.dump(timer_dict, file) except Exception as e: print(e) with open("timers/timer_dict.pkl", "wb") as file: timer_dict = {"back_recursion": {}} timer_dict["back_recursion"][K_] = end - start joblib.dump(timer_dict, file)
def rollout(K_, path_name): try: os.mkdir(f"rollout/{K_}/") except Exception as e: print(e) path = np.load(f"paths/{K_}/{path_name}.npy") ################################################################################################### start = time.time() K = K_ state = get_initial_state() states, policy, rewards = [], [], [] ## Only 1 step ahead for k in range(K): print("Stage", k) actions = get_possible_actions(state) cost_to_gos = [] for action in actions: avg_cost = 0 for tick in TICKS: next_state, cost = transition_and_cost(state.copy(), action, tick) p = coocc[state[-1] + TICK_LIMIT, tick + TICK_LIMIT] ## Approximate Cost To Go Function global costs costs = [] deeper(next_state, 0, 1, 0) avg_cost += (np.mean(costs) + cost) * p cost_to_gos.append(avg_cost) idx = np.argmax(cost_to_gos) best_action = actions[idx] next_state, reward = transition_and_cost(state.copy(), best_action, path[k]) states.append([state, next_state]) policy.append(best_action) rewards.append(reward) state = next_state end = time.time() ################################################################################################### objs = {"states": states, "policy": policy, "rewards": rewards} with open(f'rollout/{K_}/{path_name}_policy.pkl', 'wb') as file: joblib.dump(objs, file) try: with open("timers/timer_dict.pkl", "rb") as file: timer_dict = joblib.load(file) key = timer_dict.get("rollout", None) if not key: timer_dict["rollout"] = {} timer_dict["rollout"][K_] = end - start with open("timers/timer_dict.pkl", "wb") as file: joblib.dump(timer_dict, file) except Exception as e: print(e) with open("timers/timer_dict.pkl", "wb") as file: timer_dict = {"rollout": {}} timer_dict["rollout"][K_] = end - start joblib.dump(timer_dict, file)
def approx(K_): try: os.mkdir(f"approximation/{K_}") except Exception as e: print(e) start = time.time() np.random.seed(72) models = {} K = K_ X, y = [], [] for state, count in states[K].items(): cost = terminal_cost(state) state = list(state) count = int(count / 4) X.extend([state] * count) y.extend([cost] * count) X, y = np.array(X), np.array(y) model = LinearRegression().fit(X, y) models[K] = model print(f"Stage {K} Model Fitted") K -= 1 while (K >= 0): X = [] y = [] for i, (state, count) in enumerate(states[K].items()): state = list(state) count = int(count / 4) actions = get_possible_actions(state) costs = [] for action in actions: tick_costs = [] next_states = [] ps = [] for tick in TICKS: p = coocc[state[-1] + TICK_LIMIT, tick + TICK_LIMIT] next_state, cost = transition_and_cost( state.copy(), action, tick) ps.append(p) tick_costs.append(cost) next_states.append(next_state) tick_costs = np.array(tick_costs) ps = np.array(ps) next_states = np.array(next_states) J_k_1 = model.predict(next_states).reshape(-1) costs.append(((tick_costs + J_k_1) * ps).sum()) idx = np.argmax(costs) X.extend([state] * count) y.extend([costs[idx]] * count) X = np.array(X) y = np.array(y) model = LinearRegression().fit(X, y) models[K] = model print(f"Stage {K} Model Fitted") K -= 1 end = time.time() with open(f'approximation/{K_}/linear_models.pkl', 'wb') as file: joblib.dump(models, file) ################################################################################################### try: with open("timers/timer_dict.pkl", "rb") as file: timer_dict = joblib.load(file) key = timer_dict.get("approximation", None) if not key: timer_dict["approximation"] = {} timer_dict["approximation"][K_] = end - start with open("timers/timer_dict.pkl", "wb") as file: joblib.dump(timer_dict, file) except Exception as e: print(e) with open("timers/timer_dict.pkl", "wb") as file: timer_dict = {"approximation": {}} timer_dict["approximation"][K_] = end - start joblib.dump(timer_dict, file)
i : {} for i in range(LENGTH) } start = time.time() for i, path in enumerate(paths): print(f"Progress: {(i + 1 ) / len(paths) * 100}%") state = get_initial_state() for i, v in enumerate(path): if i+1 == LENGTH: break actions = get_possible_actions(state) for action in actions: next_state, cost = transition_and_cost(state.copy(), action, v) next_state = tuple(next_state) try: state_at_step[i+1][next_state] += 1 except: state_at_step[i+1][next_state] = 1 idx = np.random.randint(0, len(actions)) next_state, cost = transition_and_cost(state.copy(), actions[idx], v) state = next_state end = time.time() print("Total Computation Time", end - start) with open(f'states/states_{NUM_PATHS}.pkl', 'wb') as file: joblib.dump(state_at_step, file)