def small_case(): tc = Timer('cargando', verbose=True) tc.start() N = 100 S = list(product(tuple(range(N + 1)), tuple(range(N + 1)))) A = {U, D, L, R} forbidden = random_red_areas_rectangulares((int(0.3 * N), int(0.7 * N)), (int(0.3 * N), int(0.7 * N)), p=0.03, N=N) forbidden += random_red_areas_rectangulares((int(0.8 * N), N), (0, int(0.2 * N)), p=0.01, N=N) forbidden += random_red_areas_rectangulares((0, int(0.2 * N)), (int(0.8 * N), N), p=0.01, N=N) target = list(product(tuple(range(N - 2, N)), tuple(range(N - 2, N)))) M = 2000 p_space = pplaneSpace(A, S, target, forbidden, M) _lambda = 0.9 p_mdp = infiniteTime(p_space, _lambda, sparse=True, sense=MDP.minimize, verbose=False) tc.stop() polVI, vVI = p_mdp.solve(method=MDP.ValueIteration) polPI, vPI = p_mdp.solve(method=MDP.PolicyIteration) polLP, vLP = p_mdp.solve(method=MDP.LinearPrograming) print(p_mdp.computing_times) simulate_and_plot(p_space, polPI, n=10) simulate_and_plot(p_space, polVI, n=10) simulate_and_plot(p_space, polLP, n=10)
def lower_bound(self, samples, alpha=0.01): vals = [] M = len(samples) n = len(samples[0]) f'{"-" * 100}\nStarted computing lower bound\n{"-" * 100}') t = Timer('lower bound') self.computing_times['lower_bound'] = t t.start() for s in samples: self.second_stage_sps = s self.solve(reset=True) vals.append(self.objVal) mu = sum(vals) / M sigma = np.sqrt(sum((v - mu)**2 for v in vals) / (M - 1)) lower_bound = mu + sts.norm.ppf(alpha) * sigma / np.sqrt(M) t.stop() f'Finish computing lower bound. Total time was {t.total_time} seconds.' ) return vals, mu, sigma, lower_bound
def simulate_and_plot(p_space, pol, start=(0, 0), n=10, verbose=False, plot=True): t = Timer("Simulatoin", verbose=verbose) t.start() spaceship = Spaceship(p_space, start, pol) the_space = TheSpace(p_space, spaceship, verbose=verbose) the_space.simulate(n, start) if plot: the_space.paint_space() t.stop() return spaceship
def cont_experiments(): G=3 P=3 n, M, N = 5000, 100, 15000 alpha = 0.01 timers = dict() t = Timer('first stage creation') t.start() fs = create_FS(G) t.stop() timers['fs'] = t t = Timer('second stage creation', verbose=True) t.start() ssps = create_SS_c(n, G, P) tssp = TwoStageSP(fs, ssps, verbose=True) t.stop() timers['ss'] = t tssp.solve(multi_cut=False) # print(f'L sol:\nx:\t{tssp.x_hat}\ntheta:\t{tssp.theta_hat}') t = Timer('ub ss creation', verbose=True) t.start() ub_sstages = create_SS_c(N, new=False) t.stop() timers['ub_ss'] = t t = Timer('lb ss creation', verbose=True) t.start() lb_samples = [] for i in range(M): sample = create_SS_c(n, new=False) lb_samples.append(sample) t.stop() timers['lb_ss'] = t # tssp.confidence_interval(lower_bound_samples=lb_samples, upper_bound_sample=ub_sstages, alpha=alpha) vals, mean, sigma, ub = tssp.upper_bound(ub_sstages, alpha=alpha) print(mean, sigma, ub) sns.displot(data=vals, kde=True) plt.xlabel(r"$Q(\hat{x}, \xi)$") vals, mean, sigma, lb = tssp.lower_bound(lb_samples, alpha=alpha) print(mean, sigma, lb) sns.displot(data=vals, kde=True) plt.xlabel(r"$\hat{f}_n$")
def read_objects(N, _lambda): tc = Timer('Cargando', verbose=True) tc.start() p_space, space = build_space(N) path = f'examples/data/Forbidden{N}.pickle' with open(path, 'rb') as file: p_space.F = pickle.load(file) path = f'examples/data/Q_tensor{N}.pickle' with open(path, 'rb') as file: Q_tensor = pickle.load(file) path = f'examples/data/r_tensor{N}.pickle' with open(path, 'rb') as file: r_tensor = pickle.load(file) path = f'examples/data/a_tensor{N}.pickle' with open(path, 'rb') as file: a_tensors = pickle.load(file) path = f'examples/data/A_matrix{N}.pickle' with open(path, 'rb') as file: A_matrix = pickle.load(file) path = f'examples/data/b_vector{N}.pickle' with open(path, 'rb') as file: b_vector = pickle.load(file) path = f'examples/data/c_vector{N}.pickle' with open(path, 'rb') as file: c_vector = pickle.load(file) p_mdp = infiniteTime(p_space, _lambda, sparse=True, sense=MDP.minimize, verbose=True, load_from_files=True) p_mdp.load_tensors(Q_tensor, r_tensor, a_tensors) p_mdp.build_LP(A_matrix, b_vector, c_vector) tc.stop() space.paint_space() return p_mdp
def optimal_value(self, method, **kwargs): """ Abstract method that computes the value function for the problem and creates the optimal policy. Arguments _________ method: the method ttha Returns ------- float The value function for the given time and state. """ if method == MDP.ValueIteration: try: v_0 = check_kwargs('v_0', self.v.clone().double(), kwargs) except AttributeError: v_0 = check_kwargs('v_0', self.v.copy().double(), kwargs) epsilon = check_kwargs('epsilon', 1E-1, kwargs) improvement_method = check_kwargs('improvement_method', 'GS', kwargs) t = Timer(f'{MDP.ValueIteration}_{improvement_method}', verbose=self.verbose) self.computing_times[] = t t.start() self._value_iteration(v_0, epsilon, improvement_method) self.policy.add_policy(self.a_policy) t.stop() elif method == MDP.PolicyIteration: initial_policy = check_kwargs('initial_policy', self.policy, kwargs) if 'initial_policy' in kwargs.keys(): initial_policy = kwargs['initial_policy'] else: if self.policy.matrix is None: self.policy.create_random_policy() t = Timer(f'{MDP.PolicyIteration}') self.computing_times[] = t t.start() self._policy_iteration(initial_policy) t.stop() elif method == MDP.LinearPrograming: alpha = check_kwargs('alpha', 1 / self.l_S * np.ones(shape=self.l_S), kwargs) t = Timer(f'{MDP.LinearPrograming}') self.computing_times[] = t t.start() x = self._linear_programing(alpha) t.stop() if x is not None: self.policy = self._policy_from_lp(x) self.v = self.policy_valuation(self.policy) else: self.logger.warning( "Something went wrong. Infeasible model...")
def upper_bound(self, sample, alpha=0.01): vals = [] N = len(sample) f'\n{"-" * 100}\nStarted computing upper bound\n{"-" * 100}') t = Timer('upper bound') self.computing_times['upper_bound'] = t t.start() for s in sample: s.build_dual(self.x_hat) feasibility, pi_sigma, wk = s.solve_dual() vals.append(wk) mu = sum(vals) / N sigma = np.sqrt(sum((v - mu)**2 for v in vals) / (N - 1)) upper_bound = self.first_stage.c @ self.x_hat + mu + sts.norm.ppf( 1 - alpha) * sigma / np.sqrt(N) t.stop() f'{"-" * 100}\nFinish computing upper bound. Total time was {t.total_time} seconds.\n{"-" * 100}' ) return vals, mu, sigma, upper_bound[0][0]
def create_and_pickle(N): tc = Timer('cargando', verbose=True) tc.start() p_space, space = build_space(N) space.paint_space() p_mdp = infiniteTime(p_space, _lambda, sparse=True, sense=MDP.minimize, verbose=False) tc.stop() path = f'examples/data/Forbidden{N}.pickle' with open(path, 'wb') as file: pickle.dump(, file) path = f'examples/data/Q_tensor{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.Q_tensor, file) path = f'examples/data/r_tensor{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.r_tensor, file) path = f'examples/data/a_tensor{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.a_tensors, file) path = f'examples/data/A_matrix{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.LP.LP_A, file) path = f'examples/data/b_vector{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.LP.LP_b, file) path = f'examples/data/c_vector{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.LP.LP_c, file)
def _lambda_to_1(lb=0.9, ub=0.999): cmap = cm.get_cmap('coolwarm', 256) _Lambda = np.linspace(lb, ub, 30) def f(s): return 10 * s def c(a): return 3 * a - 0.01 * a**2 def h(s): return s MDPS = dict() f1, ax1, ax1c = cmap_plot() f2, ax2, ax2c = cmap_plot() f3, ax3, ax3c = cmap_plot() T_T = Timer('Con truco') T_F = Timer('Sin truco') for l in _Lambda: print(l) T_T.start() inv_reward = inventoryReward(inv_space, f, c, h, K, l, lambda_e_1=True) MDPS[l] = infiniteTime(inv_space, inv_reward, l) MDPS[l]._value_iteration() T_T.stop() T_F.start() inv_reward = inventoryReward(inv_space, f, c, h, K, l, lambda_e_1=False) MDPS[l] = infiniteTime(inv_space, inv_reward, l) MDPS[l]._value_iteration() T_F.stop() ax1.plot(MDPS[l].S, MDPS[l].v, c=cmap((l - lb) / (ub - lb)), label=r'$\lambda = $' + str(round(l, 4))) ax2.plot(MDPS[l].S, MDPS[l].v * (1 - l), c=cmap((l - lb) / (ub - lb)), label=r'$\lambda = $' + str(round(l, 4))) if l == lb: c_pol = MDPS[l].a_policy c_l = l if MDPS[l].a_policy != c_pol: c_u = l i_0 = 0 for i in MDPS[l].space.S: if c_pol[i] > 0: i_0 = i i_0 += 5 ax3.plot(range(i_0), list(c_pol.values())[:i_0], '-o', c=cmap(((c_u + c_l) / 2 - lb) / (ub - lb)), label=r'$\lambda \in$ ' + f'[{round(c_l, 3)}, {round(c_u, 3)})') c_pol = MDPS[l].a_policy c_l = c_u i_0 = 0 for i in MDPS[l].space.S: if c_pol[i] > 0: i_0 = i i_0 += 5 ax3.plot(range(i_0), list(c_pol.values())[:i_0], '-o', c=cmap(((c_u + ub) / 2 - lb) / (ub - lb)), label=r'$\lambda \in$ ' + f'[{round(c_l, 3)}, {round(ub, 3)}]') norm = mpl.colors.Normalize(vmin=lb, vmax=ub) mpl.colorbar.ColorbarBase(ax1c, cmap=cmap, norm=norm) mpl.colorbar.ColorbarBase(ax2c, cmap=cmap, norm=norm) mpl.colorbar.ColorbarBase(ax3c, cmap=cmap, norm=norm) ax1.set_xlabel('Estados') ax2.set_xlabel('Estados') ax3.set_xlabel('Estados') ax1.set_ylabel(r'$(1 - \lambda) v^*_\lambda$') ax2.set_ylabel(r'$v^*_\lambda$') ax3.set_ylabel('Acción') ax3.legend() f4, ax4 = plt.subplots() ax4.plot(_Lambda, [MDPS[l].computing_times['GS'].total_time for l in _Lambda]) ax4.set_xlabel(r'$\lambda$') ax4.set_ylabel(r'Tiempo de cómputo (s)') print( f'El tiempo total que se ahorra uno es {(T_F.total_time - T_T.total_time) }, en porcentages {(T_F.total_time - T_T.total_time) / T_T.total_time}' )