def rollout(env, t_begin, t_end, taken, gamma=0.97): snap = snapshot(env, t_begin) snap.populate(t_begin + 1, t_end, seed=clock_seed()) snap.removed_container[t_begin].update(taken) value = greedy(snap, t_begin + 1, t_end) matched = get_n_matched(value["matched"], t_begin, t_end) matched[0] = len(taken) return disc_mean(matched, gamma)
def rollout(env, t_begin, t_end, taken, gamma): snap = snapshot(env, t_begin) snap.populate(t_begin+1, t_end, seed = clock_seed()) snap.removed_container[t_begin].update(taken) # opt = optimal(snap, t_begin+1, t_end) # opt_matched = get_n_matched(opt["matched"], t_begin, t_end) # opt_matched[0] = len(taken) # opt_value = disc_mean(opt_matched, gamma) g = greedy(snap, t_begin+1, t_end) g_matched = get_n_matched(g["matched"], t_begin, t_end) g_matched[0] = len(taken) g_value = disc_mean(g_matched, gamma) r = g_value #- g_value return r
newseed = str(np.random.randint(1e8)) train = True disc = 0.1 net = torch.load("results/RNN_50-1-abo_4386504") #%% for k in [2]: print("Creating environment") env = ABOKidneyExchange(entry_rate, death_rate, time_length, seed=k) print("Solving environment") opt = optimal(env) gre = greedy(env) o = get_n_matched(opt["matched"], 0, env.time_length) g = get_n_matched(gre["matched"], 0, env.time_length) rewards = [] actions = [] t = -1 print("Beginning") #%% for t in range(env.time_length): living = np.array(env.get_living(t)) if len(living) == 1: continue