Пример #1
0
 def optimize(self, env, t):
     if self.method == "cycles":
         pulled_arms = optimal(env, t, t)["matched_cycles"][t]
         pulled_arms = list(map(lambda x: tuple(sorted(x)), pulled_arms))
     else:
         pulled_arms = optimal(env, t, t)["matched"][t]
     return pulled_arms
Пример #2
0
def get_features(env):
    opt = optimal(env)

    features = []
    labels = []

    for t in range(env.time_length):
        liv = np.array(env.get_living(t))
        A = env.A(t)
        has_cycle = np.diag(A @ A) > 0
        liv = liv[has_cycle]

        m = opt["matched"][t]

        Y = np.zeros(len(liv))
        Y[np.isin(liv, list(m))] = 1
        labels.append(Y)

        if len(liv) > 0:
            X = env.X(t)[has_cycle]
            subg = env.subgraph(liv)
            E = run_node2vec(subg)
            features.append(np.hstack([X, E]))

        env.removed_container[t].update()

    return np.vstack(features), np.hstack(labels)
 def get_cost(self, v: np.ndarray):
     cycle = list(
         chain(*[self.cycles[i] for i, x in enumerate(v) if x == 1]))
     snap = snapshot(self.env, self.t)
     snap.removed_container[self.t].update(cycle)
     snap.populate(self.t + 1, self.t + self.h + 1, seed=clock_seed())
     reward = optimal(snap, self.t, self.h + 1)["obj"] + len(cycle)
     return -(reward / self.h)
Пример #4
0
def count_matched_today(env, t_begin, t_end, taken, n_times=10):
    snap = snapshot(env, t_begin)
    matched_today = 0

    for i in range(n_times):
        if t_begin + 1 < t_end:
            snap.populate(t_begin + 1, t_end)
        opt_mt = optimal(snap, t_begin, t_end)["matched_cycles"][t_begin]
        matched_today += taken in opt_mt

    return matched_today / n_times
Пример #5
0
def get_actions(env, t, n_times=100):

    sols = []

    for _ in range(n_times):

        opt_m = optimal(env, t, t)["matched_pairs"]
        if len(opt_m) == 0:
            break
        sols.append(tuple(sorted(opt_m)))

    return list(set(sols)) + [()]
Пример #6
0
def get_cycles(env, t, n_times):

    cycles = set()

    for _ in range(n_times):

        cs = optimal(env, t, t)["matched_cycles"][t]
        cycles.update([tuple(cyc) for cyc in cs])

    cycles = list(cycles)
    shuffle(cycles)

    return cycles
Пример #7
0
def best_cycle(net, env, t, thres = 0):
    liv_idx = dict({j:i for i,j in enumerate(env.get_living(t))})
    if len(liv_idx) == 0:
        return None
    a_probs = evaluate_policy(net, env, t) 
    cycles = list(map(tuple, optimal(env, t, t)["matched_cycles"][t])) 
    if len(cycles) == 0:
        return None
    np_probs = np.zeros(len(cycles))
    for q,(i,j) in enumerate(cycles):
        np_probs[q] = np.mean((a_probs[liv_idx[i]], a_probs[liv_idx[j]]))
    selected = np.argmax(np_probs)
    if np_probs[selected] >= thres:
        return cycles[selected]
    else:
        return None
Пример #8
0
def rollout(env, t_begin, t_end, taken, gamma):

    snap = snapshot(env, t_begin)
    snap.populate(t_begin + 1, t_end, seed=clock_seed())
    snap.removed_container[t_begin].update(taken)

    opt = optimal(snap, t_begin + 1, t_end)
    opt_matched = get_n_matched(opt["matched"], t_begin, t_end)
    opt_matched[0] = len(taken)
    opt_value = disc_mean(opt_matched, gamma)

    #    g = greedy(snap, t_begin+1, t_end)
    #    g_matched = get_n_matched(g["matched"], t_begin, t_end)
    #    g_matched[0] = len(taken)
    #    g_value = disc_mean(g_matched,  gamma)

    r = opt_value  #- g_value

    return r
Пример #9
0
def get_features(env):
    opt = optimal(env)

    labels = []
    pair_features = []
    networkx_features = []
    node2vec_features = []

    for t in trange(env.time_length):
        try:
            liv = np.array(env.get_living(t))
            A_full = env.A(t)
            has_cycle = np.einsum("ij,ji->i", A_full, A_full) > 0
            if not np.any(has_cycle):
                continue

            X = env.X(t)[has_cycle]
            A = A_full[has_cycle, :][:, has_cycle]
            E = run_node2vec(A)
            G = get_additional_regressors(env, t, dtype="numpy")[has_cycle]

            liv_and_cycle = liv[has_cycle]
            m = opt["matched"][t]

            Y = np.isin(liv_and_cycle, list(m)).astype(int)
            labels.append(Y)

            env.removed_container[t].update(m)

            assert G.shape[0] == E.shape[0] == X.shape[0]
            pair_features.append(X)
            networkx_features.append(G)
            node2vec_features.append(E)

        except Exception as e:
            print(e)
            import pdb
            pdb.set_trace()

    return pair_features, networkx_features, node2vec_features, labels
Пример #10
0
    horizon = 10
    newseed = str(np.random.randint(1e8))
    train = True
    disc = 0.1

    net = torch.load("results/RNN_50-1-abo_4386504")

    #%%

    for k in [2]:

        print("Creating environment")
        env = ABOKidneyExchange(entry_rate, death_rate, time_length, seed=k)

        print("Solving environment")
        opt = optimal(env)
        gre = greedy(env)

        o = get_n_matched(opt["matched"], 0, env.time_length)
        g = get_n_matched(gre["matched"], 0, env.time_length)

        rewards = []
        actions = []
        t = -1
        print("Beginning")
        #%%
        for t in range(env.time_length):

            living = np.array(env.get_living(t))
            if len(living) == 1:
                continue
Пример #11
0
            max_time = 200
            n_sims = 100  #choice([1, 5, 10, 20, 50, 100, 500, 1000, 2000])
            n_prior = 50  #choice([1, 5, 10, 20, 50])
            seed = 123456  #clock_seed()

        print("Opening file", file)
        try:
            net = torch.load("results/" + file)
        except Exception as e:
            print(str(e))
            continue

        env_type = "abo"
        env = ABOKidneyExchange(entry_rate, death_rate, max_time, seed=seed)

        opt = optimal(env)
        gre = greedy(env)

        o = get_n_matched(opt["matched"], 0, env.time_length)
        g = get_n_matched(gre["matched"], 0, env.time_length)

        rewards = np.zeros(env.time_length)

        #%%
        np.random.seed(clock_seed())
        for t in range(env.time_length):
            probs, count = evaluate_policy(net, env, t, dtype="numpy")

            for i in range(count):
                probs, _ = evaluate_policy(net, env, t, dtype="numpy")
                cycles = two_cycles(env, t)
from random import choice
import numpy as np
from tqdm import trange

from matching.utils.env_utils import two_cycles
from matching.solver.kidney_solver2 import optimal, greedy
from matching.utils.data_utils import clock_seed, get_n_matched

from matching.environment.abo_environment import ABOKidneyExchange
from matching.bandits.combinatorial import CombinatorialBandit

env = ABOKidneyExchange(4, .1, 101)

rewards = np.zeros(env.time_length)

opt = optimal(env)
gre = greedy(env)
o = get_n_matched(opt["matched"], 0, env.time_length)
g = get_n_matched(gre["matched"], 0, env.time_length)

for t in trange(env.time_length):

    cycles = two_cycles(env, t)
    print("len(cycles): ", len(cycles))
    if len(cycles) > 0:
        algo = CombinatorialBandit(env, t, iters_per_arm=10, max_match=4)
        algo.simulate()
        best = algo.choose()
        env.removed_container[t].update(best)
        rewards[t] = len(best)
Пример #13
0
def greedy_actions(env, t, n_repeats):
    return set([tuple(optimal(env, t, t)["matched"][t]) 
                for _ in range(n_repeats)])
Пример #14
0
    net.opt = torch.optim.Adam(net.parameters(), lr=0.0001)

    #%%

    for epoch in range(5):

        for seed in [0, 1, 2] * 5:

            print("Creating environment")
            env = OPTNKidneyExchange(entry_rate,
                                     death_rate,
                                     time_length,
                                     seed=seed)

            print("Solving environment")
            opt = optimal(env)
            gre = greedy(env)

            o = get_n_matched(opt["matched"], 0, env.time_length)
            g = get_n_matched(gre["matched"], 0, env.time_length)

            rewards = []
            actions = []
            t = 0
            print("Beginning")

            for t in range((2 * horizon + 1) * (epoch + 1)):

                print("Getting living")
                living = np.array(env.get_living(t))
                if len(living) > 1:
Пример #15
0
seed = clock_seed()

args = {'entry_rate': entry_rate,
        'death_rate': death_rate,
        'time_length': 1001,
        'seed': seed}

if envtype == "abo":
    env = ABOKidneyExchange(,
elif envtype == "saidman":
    env = SaidmanKidneyExchange(,
elif envtype == "optn":
    env = OPTNKidneyExchange(**args)

opt = optimal(env)
gre = greedy(env)

g = get_n_matched(gre["matched"], 0, env.time_length)
o = get_n_matched(opt["matched"], 0, env.time_length)

#%%
rewards = np.zeros(env.time_length)
for t in trange(env.time_length):
    
    liv = np.array(env.get_living(t))
    A = env.A(t)
    has_cycle = np.diag(A @ A) > 0
    liv_and_cycle = liv[has_cycle]
    yhat_full = np.zeros(len(liv), dtype=bool)