示例#1
0
def approximation():

    try:
        os.mkdir("approximation/results/")
    except Exception as e:
        print(e)

    for K in [50, 100, 200]:

        stats = []
        paths = get_paths(K)
        with open(f"approximation/{K}/linear_models.pkl", "rb") as file:
            model = joblib.load(file)

        for path_name in paths:

            state = get_initial_state()
            states, costs = [state], [0]

            for i, tick in enumerate(paths[path_name]):
                action = get_linear_parametrized_action(model, state.copy(), i)
                next_state, cost = transition_and_cost(state.copy(), action,
                                                       tick)
                states.append(next_state)
                costs.append(cost)
                state = next_state

            stats.append(calculate_stats(states, costs, path_name))

        df = pd.DataFrame(stats, columns=COLS)
        df.to_csv(f"approximation/results/{K}.csv", index=False)
示例#2
0
def back_recursion():

    try:
        os.mkdir("back_recursion/results/")
    except Exception as e:
        print(e)

    for K in [50, 100, 200]:

        stats = []
        paths = get_paths(K)
        with open(f"back_recursion/{K}/back_recursion.pkl", "rb") as file:
            model = joblib.load(file)

        for path_name in paths:

            state = get_initial_state()
            states, costs = [state], [0]

            for i, tick in enumerate(paths[path_name]):
                next_state, cost = transition_and_cost(
                    state.copy(), model[i]['U'][tuple(state)], tick)
                states.append(next_state)
                costs.append(cost)
                state = next_state

            stats.append(calculate_stats(states, costs, path_name))

        df = pd.DataFrame(stats, columns=COLS)
        df.to_csv(f"back_recursion/results/{K}.csv", index=False)
示例#3
0
def compute_policy(get_action, K, path_name):

	bid_prices = []
	bid_volumes = []

	ask_prices = []
	ask_volumes = []
	 
	prices = []
	cprices = []

	unrealized_pnls = []
	realized_pnls = []
	net_position = []

	path = np.load(f"paths/{K}/{path_name}.npy")
	state = get_initial_state()
	for i in range(K):
			
		print("Step", i)

		action = get_action(i, state, K)
		
		## Log the action pre-jump
		prices.append(0)
		cprices = np.cumsum(prices)
			
		bid_prices.append(action[0] + cprices[-1])
		bid_volumes.append(action[1])

		ask_prices.append(action[2] + cprices[-1])
		ask_volumes.append(action[3])
		
		unrealized_pnls.append(state[1])
		realized_pnls.append(0)
		net_position.append(state[0])
		
		tick = path[i]
		state, cost = transition_and_cost(state, action, tick)
		
		## Log the result post-action
		prices.append(tick)
		cprices = np.cumsum(prices)
			
		bid_prices.append(action[0] + cprices[-1] - tick)
		bid_volumes.append(action[1])

		ask_prices.append(action[2] + cprices[-1] - tick)
		ask_volumes.append(action[3])

		unrealized_pnls.append(state[1])
		realized_pnls.append(cost)
		net_position.append(state[0])
		
	realized_pnls = np.cumsum(realized_pnls)
	cprices = np.cumsum(prices)

	return bid_prices, bid_volumes, ask_prices, ask_volumes, unrealized_pnls, realized_pnls, net_position, cprices
示例#4
0
def deeper(state, total_cost, weight, k):

    if k == 2:
        global costs
        costs.append(total_cost * weight)
    else:
        actions = get_action_subset(state)
        ps = coocc[state[-1] + TICK_LIMIT, :]
        for action in actions:
            for tick in np.random.choice(TICKS, p=ps, size=4):
                next_state, cost = transition_and_cost(state.copy(), action,
                                                       tick)
                deeper(next_state, total_cost + cost,
                       ps[tick + TICK_LIMIT] * weight, k + 1)
示例#5
0
def get_linear_parametrized_action(k, state, K):

	costs = []
	actions = get_possible_actions(state)
	for action in actions:
		
		avg_cost = 0
		
		for tick in TICKS:
			
			p = coocc[state[-1] + TICK_LIMIT, tick + TICK_LIMIT]
			next_state, cost = transition_and_cost(state.copy(), action, tick)
			X = np.array([next_state])
			pred = linear_models[k+1].predict(X)[0]
			avg_cost += p * (cost + pred)
		
		costs.append(avg_cost)
	
	idx = np.argmax(costs)
	return actions[idx]
示例#6
0
def solve(K_):

    try:
        os.mkdir(f"back_recursion/{K_}")
    except Exception as e:
        print(e)

    K = K_
    start = time.time()
    states = state_generator(K)

    ###################################################################################################

    ### N-Step
    MEMORY, J_N, U_N = {}, {}, {}
    for state in states[K]:
        J_N[tuple(state)] = terminal_cost(state)

    J_K_1, U_K_1 = J_N, U_N
    MEMORY[K] = {"J": J_K_1, "U": U_K_1}

    ## K-Steps
    while (K > 0):

        K -= 1
        print("Starting Stage", K)

        J_K, U_K = {}, {}
        for j, state in enumerate(states[K]):

            actions = get_possible_actions(state)
            costs = [-10000] * len(actions)

            for i, action in enumerate(actions):

                avg_cost = 0

                for tick in TICKS:

                    new_state, cost = transition_and_cost(
                        state.copy(), action, tick)
                    p = coocc[state[-1] + TICK_LIMIT, tick + TICK_LIMIT]

                    ns = tuple(new_state)
                    cost += J_K_1[ns]
                    avg_cost += p * cost

                costs[i] = avg_cost

            idx = np.argmax(costs)
            state = tuple(state)
            J_K[state] = costs[idx]
            U_K[state] = actions[idx]

        J_K_1 = J_K
        U_K_1 = U_K
        MEMORY[K] = {"J": J_K_1, "U": U_K_1}

        gc.collect()

    with open(f"back_recursion/{args.K}/back_recursion.pkl", "wb") as file:
        joblib.dump(MEMORY, file)

    ###################################################################################################

    end = time.time()

    try:

        with open("timers/timer_dict.pkl", "rb") as file:
            timer_dict = joblib.load(file)
            key = timer_dict.get("back_recursion", None)
            if not key:
                timer_dict["back_recursion"] = {}
            timer_dict["back_recursion"][K_] = end - start

        with open("timers/timer_dict.pkl", "wb") as file:
            joblib.dump(timer_dict, file)

    except Exception as e:

        print(e)
        with open("timers/timer_dict.pkl", "wb") as file:
            timer_dict = {"back_recursion": {}}
            timer_dict["back_recursion"][K_] = end - start
            joblib.dump(timer_dict, file)
示例#7
0
def rollout(K_, path_name):

    try:
        os.mkdir(f"rollout/{K_}/")
    except Exception as e:
        print(e)

    path = np.load(f"paths/{K_}/{path_name}.npy")
    ###################################################################################################

    start = time.time()

    K = K_
    state = get_initial_state()
    states, policy, rewards = [], [], []

    ## Only 1 step ahead
    for k in range(K):

        print("Stage", k)

        actions = get_possible_actions(state)
        cost_to_gos = []

        for action in actions:
            avg_cost = 0
            for tick in TICKS:
                next_state, cost = transition_and_cost(state.copy(), action,
                                                       tick)
                p = coocc[state[-1] + TICK_LIMIT, tick + TICK_LIMIT]
                ## Approximate Cost To Go Function
                global costs
                costs = []
                deeper(next_state, 0, 1, 0)
                avg_cost += (np.mean(costs) + cost) * p
            cost_to_gos.append(avg_cost)

        idx = np.argmax(cost_to_gos)
        best_action = actions[idx]
        next_state, reward = transition_and_cost(state.copy(), best_action,
                                                 path[k])

        states.append([state, next_state])
        policy.append(best_action)
        rewards.append(reward)

        state = next_state

    end = time.time()

    ###################################################################################################

    objs = {"states": states, "policy": policy, "rewards": rewards}
    with open(f'rollout/{K_}/{path_name}_policy.pkl', 'wb') as file:
        joblib.dump(objs, file)

    try:

        with open("timers/timer_dict.pkl", "rb") as file:
            timer_dict = joblib.load(file)
            key = timer_dict.get("rollout", None)
            if not key:
                timer_dict["rollout"] = {}
            timer_dict["rollout"][K_] = end - start

        with open("timers/timer_dict.pkl", "wb") as file:
            joblib.dump(timer_dict, file)

    except Exception as e:

        print(e)
        with open("timers/timer_dict.pkl", "wb") as file:
            timer_dict = {"rollout": {}}
            timer_dict["rollout"][K_] = end - start
            joblib.dump(timer_dict, file)
def approx(K_):

    try:
        os.mkdir(f"approximation/{K_}")
    except Exception as e:
        print(e)

    start = time.time()
    np.random.seed(72)
    models = {}
    K = K_

    X, y = [], []
    for state, count in states[K].items():

        cost = terminal_cost(state)

        state = list(state)
        count = int(count / 4)

        X.extend([state] * count)
        y.extend([cost] * count)

    X, y = np.array(X), np.array(y)

    model = LinearRegression().fit(X, y)
    models[K] = model
    print(f"Stage {K} Model Fitted")

    K -= 1
    while (K >= 0):

        X = []
        y = []

        for i, (state, count) in enumerate(states[K].items()):

            state = list(state)
            count = int(count / 4)

            actions = get_possible_actions(state)
            costs = []

            for action in actions:

                tick_costs = []
                next_states = []
                ps = []

                for tick in TICKS:

                    p = coocc[state[-1] + TICK_LIMIT, tick + TICK_LIMIT]
                    next_state, cost = transition_and_cost(
                        state.copy(), action, tick)

                    ps.append(p)
                    tick_costs.append(cost)
                    next_states.append(next_state)

                tick_costs = np.array(tick_costs)
                ps = np.array(ps)

                next_states = np.array(next_states)
                J_k_1 = model.predict(next_states).reshape(-1)
                costs.append(((tick_costs + J_k_1) * ps).sum())

            idx = np.argmax(costs)
            X.extend([state] * count)
            y.extend([costs[idx]] * count)

        X = np.array(X)
        y = np.array(y)

        model = LinearRegression().fit(X, y)
        models[K] = model
        print(f"Stage {K} Model Fitted")
        K -= 1

    end = time.time()

    with open(f'approximation/{K_}/linear_models.pkl', 'wb') as file:
        joblib.dump(models, file)

    ###################################################################################################

    try:

        with open("timers/timer_dict.pkl", "rb") as file:
            timer_dict = joblib.load(file)
            key = timer_dict.get("approximation", None)
            if not key:
                timer_dict["approximation"] = {}
            timer_dict["approximation"][K_] = end - start

        with open("timers/timer_dict.pkl", "wb") as file:
            joblib.dump(timer_dict, file)

    except Exception as e:

        print(e)
        with open("timers/timer_dict.pkl", "wb") as file:
            timer_dict = {"approximation": {}}
            timer_dict["approximation"][K_] = end - start
            joblib.dump(timer_dict, file)
示例#9
0
    i : {} for i in range(LENGTH)
}
start = time.time()
for i, path in enumerate(paths):
    
    print(f"Progress: {(i + 1 ) / len(paths) * 100}%")
    state = get_initial_state()

    for i, v in enumerate(path):
        
        if i+1 == LENGTH: break
        actions = get_possible_actions(state)
        
        for action in actions:
        
            next_state, cost = transition_and_cost(state.copy(), action, v)
            next_state = tuple(next_state)
            
            try:
                state_at_step[i+1][next_state] += 1
            except:
                state_at_step[i+1][next_state] = 1
        
        idx = np.random.randint(0, len(actions))
        next_state, cost = transition_and_cost(state.copy(), actions[idx], v)
        state = next_state
end = time.time()

print("Total Computation Time", end - start)
with open(f'states/states_{NUM_PATHS}.pkl', 'wb') as file:
    joblib.dump(state_at_step, file)