def __init__(self, space: finiteTimeSpace, _lambda: float, **kwargs): super().__init__(space, **kwargs) self.S = list(self.S) self.policy = DMSPolicy(space) self.v = pt.zeros((len(self.S), 1)) self.lambda_ = _lambda self.a_policy = dict() if ('load_from_files', True) in kwargs.items(): self.logger.info('No tensors created, remember to add the later.') self.Q_tensor, self.r_tensor, self.a_tensors = None, None, None self.LP = None else: self.logger.info('Starting to create tensors') self.computing_times['preprocess'] = Timer('Preprocess', verbose=self.verbose) self.computing_times['preprocess'].start() # dummy LP object for accessing indexers for building the matrix self.LP = MDPLPSolver(self) # builds the tensors, matrices needed for the method. self.Q_tensor, self.r_tensor, self.a_tensors, LP_matrix, lp_cost = self._build_tensors( ) for s in self.S: self._build_r_P(s) self.computing_times['preprocess'].stop() self.LP = check_kwargs('lp_solver', gurobiMDPSolver(self, LP_matrix, lp_cost), kwargs)
def policy_valuation(self, policy: DMSPolicy): """ Values a given policy. Parameters ---------- policy: Policy Policy to value history: History History Returns ------- float The value of the policy """ if 'policy_valuation' not in self.computing_times.keys(): self.computing_times['policy_valuation'] = Timer( 'policy_valuation', verbose=self.verbose) self.computing_times['policy_valuation'].start() Pd, rd = self._build_P_mu(policy) if self.sparse: A = (sp.identity(self.l_S) - self.lambda_ * Pd) A = A.tocsc() v = sp.linalg.spsolve(A, rd) else: A = (np.identity(self.l_S) - self.lambda_ * Pd.numpy()) v = np.linalg.solve(A, rd) self.computing_times['policy_valuation'].stop() return v
def _build_P_mu(self, policy=None): if 'building_P_Mu' not in self.computing_times.keys(): self.computing_times['building_P_Mu'] = Timer('building_P_Mu', verbose=self.verbose) self.computing_times['building_P_Mu'].start() if policy is None: policy = self.policy rew = self.r_tensor.numpy() if self.sparse: pol_tensors = policy.a_matrices() z = True for a in self.A: if z: Ps = self.a_tensors[a].dot(pol_tensors[a]) # rd = pol_tensors[a].multiply(rew) z = False else: Ps += self.a_tensors[a].dot(pol_tensors[a]) # rd += self.a_tensors[a].multiply(rew) rd = np.nan_to_num((rew * policy.matrix), 0).sum(axis=1) rd = sp.csr_matrix(rd).T else: Ps = pt.einsum("saj,as->sj", self.Q_tensor.to_dense(), policy.matrix.type(pt.double)) rd = np.nan_to_num((rew * policy.matrix.numpy().T), 0).sum(axis=1) rd = sp.csr_matrix(rd).T rd = rd.todense() self.computing_times['building_P_Mu'].stop() return Ps, rd
def small_case(): tc = Timer('cargando', verbose=True) tc.start() N = 100 S = list(product(tuple(range(N + 1)), tuple(range(N + 1)))) A = {U, D, L, R} forbidden = random_red_areas_rectangulares((int(0.3 * N), int(0.7 * N)), (int(0.3 * N), int(0.7 * N)), p=0.03, N=N) forbidden += random_red_areas_rectangulares((int(0.8 * N), N), (0, int(0.2 * N)), p=0.01, N=N) forbidden += random_red_areas_rectangulares((0, int(0.2 * N)), (int(0.8 * N), N), p=0.01, N=N) target = list(product(tuple(range(N - 2, N)), tuple(range(N - 2, N)))) M = 2000 p_space = pplaneSpace(A, S, target, forbidden, M) _lambda = 0.9 p_mdp = infiniteTime(p_space, _lambda, sparse=True, sense=MDP.minimize, verbose=False) tc.stop() polVI, vVI = p_mdp.solve(method=MDP.ValueIteration) polPI, vPI = p_mdp.solve(method=MDP.PolicyIteration) polLP, vLP = p_mdp.solve(method=MDP.LinearPrograming) print(p_mdp.computing_times) simulate_and_plot(p_space, polPI, n=10) simulate_and_plot(p_space, polVI, n=10) simulate_and_plot(p_space, polLP, n=10)
def lower_bound(self, samples, alpha=0.01): vals = [] M = len(samples) n = len(samples[0]) self.outputLog.info( f'{"-" * 100}\nStarted computing lower bound\n{"-" * 100}') t = Timer('lower bound') self.computing_times['lower_bound'] = t t.start() for s in samples: self.second_stage_sps = s self.solve(reset=True) vals.append(self.objVal) mu = sum(vals) / M sigma = np.sqrt(sum((v - mu)**2 for v in vals) / (M - 1)) lower_bound = mu + sts.norm.ppf(alpha) * sigma / np.sqrt(M) t.stop() self.outputLog.info( f'Finish computing lower bound. Total time was {t.total_time} seconds.' ) return vals, mu, sigma, lower_bound
def _improvement_GS(self, v): """ Parameters ---------- v Returns ------- """ if 'policy_improvement_GS' not in self.computing_times.keys(): self.computing_times['policy_improvement_GS'] = Timer( 'policy_improvement_GS', verbose=self.verbose) self.computing_times['policy_improvement_GS'].start() v_r = pt.zeros(v.shape, dtype=pt.double) try: v = v.clone() except AttributeError: v = v.copy() pol = {} pol_indexes = [[], []] pol_values = [] for s in self.S: i = self.S_int[s] rs, Ps = self._build_r_P(s) to_max = rs + self.lambda_ * Ps.matmul(v_r + v) pairs = list(zip(self.A, to_max.tolist())) u, v_j = max(pairs, key=lambda x: x[1]) pol[s] = u pol_indexes[0].append(self.S_int[s]) pol_indexes[1].append(self.A_int[u]) pol_values.append(1) v_r[i] = v_j[0] v[i] = 0 if i % 10000 == 0: self.logger.info(f'Policy improvment iteration {i}') matrix = np.asarray( sp.coo_matrix((pol_values, (pol_indexes[0], pol_indexes[1])), shape=(self.space.l_S, self.space.l_A)).todense()) policy = DMSPolicy(self.space, matrix, from_matrix=True) policy.policy = pol self.computing_times['policy_improvement_GS'].stop() return policy, v_r
def simulate_and_plot(p_space, pol, start=(0, 0), n=10, verbose=False, plot=True): t = Timer("Simulatoin", verbose=verbose) t.start() spaceship = Spaceship(p_space, start, pol) the_space = TheSpace(p_space, spaceship, verbose=verbose) the_space.simulate(n, start) if plot: the_space.paint_space() t.stop() return spaceship
def read_objects(N, _lambda): tc = Timer('Cargando', verbose=True) tc.start() p_space, space = build_space(N) path = f'examples/data/Forbidden{N}.pickle' with open(path, 'rb') as file: p_space.F = pickle.load(file) path = f'examples/data/Q_tensor{N}.pickle' with open(path, 'rb') as file: Q_tensor = pickle.load(file) path = f'examples/data/r_tensor{N}.pickle' with open(path, 'rb') as file: r_tensor = pickle.load(file) path = f'examples/data/a_tensor{N}.pickle' with open(path, 'rb') as file: a_tensors = pickle.load(file) path = f'examples/data/A_matrix{N}.pickle' with open(path, 'rb') as file: A_matrix = pickle.load(file) path = f'examples/data/b_vector{N}.pickle' with open(path, 'rb') as file: b_vector = pickle.load(file) path = f'examples/data/c_vector{N}.pickle' with open(path, 'rb') as file: c_vector = pickle.load(file) p_mdp = infiniteTime(p_space, _lambda, sparse=True, sense=MDP.minimize, verbose=True, load_from_files=True) p_mdp.load_tensors(Q_tensor, r_tensor, a_tensors) p_mdp.build_LP(A_matrix, b_vector, c_vector) tc.stop() space.paint_space() return p_mdp
def upper_bound(self, sample, alpha=0.01): vals = [] N = len(sample) self.outputLog.info( f'\n{"-" * 100}\nStarted computing upper bound\n{"-" * 100}') t = Timer('upper bound') self.computing_times['upper_bound'] = t t.start() for s in sample: s.build_dual(self.x_hat) feasibility, pi_sigma, wk = s.solve_dual() vals.append(wk) mu = sum(vals) / N sigma = np.sqrt(sum((v - mu)**2 for v in vals) / (N - 1)) upper_bound = self.first_stage.c @ self.x_hat + mu + sts.norm.ppf( 1 - alpha) * sigma / np.sqrt(N) t.stop() self.outputLog.info( f'{"-" * 100}\nFinish computing upper bound. Total time was {t.total_time} seconds.\n{"-" * 100}' ) return vals, mu, sigma, upper_bound[0][0]
def create_and_pickle(N): tc = Timer('cargando', verbose=True) tc.start() p_space, space = build_space(N) space.paint_space() p_mdp = infiniteTime(p_space, _lambda, sparse=True, sense=MDP.minimize, verbose=False) tc.stop() path = f'examples/data/Forbidden{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.space.F, file) path = f'examples/data/Q_tensor{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.Q_tensor, file) path = f'examples/data/r_tensor{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.r_tensor, file) path = f'examples/data/a_tensor{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.a_tensors, file) path = f'examples/data/A_matrix{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.LP.LP_A, file) path = f'examples/data/b_vector{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.LP.LP_b, file) path = f'examples/data/c_vector{N}.pickle' with open(path, 'wb') as file: pickle.dump(p_mdp.LP.LP_c, file)
def optimal_value(self, method, **kwargs): """ Abstract method that computes the value function for the problem and creates the optimal policy. Arguments _________ method: the method ttha Returns ------- float The value function for the given time and state. """ if method == MDP.ValueIteration: try: v_0 = check_kwargs('v_0', self.v.clone().double(), kwargs) except AttributeError: v_0 = check_kwargs('v_0', self.v.copy().double(), kwargs) epsilon = check_kwargs('epsilon', 1E-1, kwargs) improvement_method = check_kwargs('improvement_method', 'GS', kwargs) t = Timer(f'{MDP.ValueIteration}_{improvement_method}', verbose=self.verbose) self.computing_times[t.name] = t t.start() self._value_iteration(v_0, epsilon, improvement_method) self.policy.add_policy(self.a_policy) t.stop() elif method == MDP.PolicyIteration: initial_policy = check_kwargs('initial_policy', self.policy, kwargs) if 'initial_policy' in kwargs.keys(): initial_policy = kwargs['initial_policy'] else: if self.policy.matrix is None: self.policy.create_random_policy() t = Timer(f'{MDP.PolicyIteration}') self.computing_times[t.name] = t t.start() self._policy_iteration(initial_policy) t.stop() elif method == MDP.LinearPrograming: alpha = check_kwargs('alpha', 1 / self.l_S * np.ones(shape=self.l_S), kwargs) t = Timer(f'{MDP.LinearPrograming}') self.computing_times[t.name] = t t.start() x = self._linear_programing(alpha) t.stop() if x is not None: self.policy = self._policy_from_lp(x) self.v = self.policy_valuation(self.policy) else: self.logger.warning( "Something went wrong. Infeasible model...")
def _lambda_to_1(lb=0.9, ub=0.999): cmap = cm.get_cmap('coolwarm', 256) _Lambda = np.linspace(lb, ub, 30) def f(s): return 10 * s def c(a): return 3 * a - 0.01 * a**2 def h(s): return s MDPS = dict() f1, ax1, ax1c = cmap_plot() f2, ax2, ax2c = cmap_plot() f3, ax3, ax3c = cmap_plot() T_T = Timer('Con truco') T_F = Timer('Sin truco') for l in _Lambda: print(l) T_T.start() inv_reward = inventoryReward(inv_space, f, c, h, K, l, lambda_e_1=True) MDPS[l] = infiniteTime(inv_space, inv_reward, l) MDPS[l]._value_iteration() T_T.stop() T_F.start() inv_reward = inventoryReward(inv_space, f, c, h, K, l, lambda_e_1=False) MDPS[l] = infiniteTime(inv_space, inv_reward, l) MDPS[l]._value_iteration() T_F.stop() ax1.plot(MDPS[l].S, MDPS[l].v, c=cmap((l - lb) / (ub - lb)), label=r'$\lambda = $' + str(round(l, 4))) ax2.plot(MDPS[l].S, MDPS[l].v * (1 - l), c=cmap((l - lb) / (ub - lb)), label=r'$\lambda = $' + str(round(l, 4))) if l == lb: c_pol = MDPS[l].a_policy c_l = l if MDPS[l].a_policy != c_pol: c_u = l i_0 = 0 for i in MDPS[l].space.S: if c_pol[i] > 0: i_0 = i i_0 += 5 ax3.plot(range(i_0), list(c_pol.values())[:i_0], '-o', c=cmap(((c_u + c_l) / 2 - lb) / (ub - lb)), label=r'$\lambda \in$ ' + f'[{round(c_l, 3)}, {round(c_u, 3)})') c_pol = MDPS[l].a_policy c_l = c_u i_0 = 0 for i in MDPS[l].space.S: if c_pol[i] > 0: i_0 = i i_0 += 5 ax3.plot(range(i_0), list(c_pol.values())[:i_0], '-o', c=cmap(((c_u + ub) / 2 - lb) / (ub - lb)), label=r'$\lambda \in$ ' + f'[{round(c_l, 3)}, {round(ub, 3)}]') norm = mpl.colors.Normalize(vmin=lb, vmax=ub) mpl.colorbar.ColorbarBase(ax1c, cmap=cmap, norm=norm) mpl.colorbar.ColorbarBase(ax2c, cmap=cmap, norm=norm) mpl.colorbar.ColorbarBase(ax3c, cmap=cmap, norm=norm) ax1.set_xlabel('Estados') ax2.set_xlabel('Estados') ax3.set_xlabel('Estados') ax1.set_ylabel(r'$(1 - \lambda) v^*_\lambda$') ax2.set_ylabel(r'$v^*_\lambda$') ax3.set_ylabel('Acción') ax3.legend() f4, ax4 = plt.subplots() ax4.plot(_Lambda, [MDPS[l].computing_times['GS'].total_time for l in _Lambda]) ax4.set_xlabel(r'$\lambda$') ax4.set_ylabel(r'Tiempo de cómputo (s)') print( f'El tiempo total que se ahorra uno es {(T_F.total_time - T_T.total_time) }, en porcentages {(T_F.total_time - T_T.total_time) / T_T.total_time}' ) plt.show()
def cont_experiments(): G=3 P=3 n, M, N = 5000, 100, 15000 alpha = 0.01 timers = dict() t = Timer('first stage creation') t.start() fs = create_FS(G) t.stop() timers['fs'] = t t = Timer('second stage creation', verbose=True) t.start() ssps = create_SS_c(n, G, P) tssp = TwoStageSP(fs, ssps, verbose=True) t.stop() timers['ss'] = t tssp.solve(multi_cut=False) # print(f'L sol:\nx:\t{tssp.x_hat}\ntheta:\t{tssp.theta_hat}') t = Timer('ub ss creation', verbose=True) t.start() ub_sstages = create_SS_c(N, new=False) t.stop() timers['ub_ss'] = t t = Timer('lb ss creation', verbose=True) t.start() lb_samples = [] for i in range(M): sample = create_SS_c(n, new=False) lb_samples.append(sample) t.stop() timers['lb_ss'] = t # tssp.confidence_interval(lower_bound_samples=lb_samples, upper_bound_sample=ub_sstages, alpha=alpha) vals, mean, sigma, ub = tssp.upper_bound(ub_sstages, alpha=alpha) print(mean, sigma, ub) sns.displot(data=vals, kde=True) plt.xlabel(r"$Q(\hat{x}, \xi)$") plt.show() vals, mean, sigma, lb = tssp.lower_bound(lb_samples, alpha=alpha) print(mean, sigma, lb) sns.displot(data=vals, kde=True) plt.xlabel(r"$\hat{f}_n$") plt.show()
def l_shaped(self, reset=False): if reset: self.first_stage.reset_model() assert self.first_stage.theta.shape[0] == 1 self.computing_times['L_shaped'] = Timer('L_shaped') # Stopping criterion stop = False # feasibility and optimality cuts counters r, s, v = TallyCounter('Feasibility'), TallyCounter( 'Optimality'), TallyCounter('Iterations') self.computing_times['L_shaped'].start() self.infoLog.info(f'\n{"-" * 100}\nL-Shaped' + \ f'\nConstraints: {len(self.first_stage.model.getConstrs())}' + \ f'\tVariables: {len(self.first_stage.model.getVars())}\n{"-" * 100}') self.outputLog.info(f'Lower bound\tUpper Bound\tGAP') while not stop: # Solve first stage and query x_hat, Theta_hat. x_hat, theta_hat = self.first_stage.solve() # Solve for each sub-problem solve its dual and add respective cuts. E = np.zeros(shape=(1, self.first_stage.n)) e = 0 infeasible = False for sp in self.second_stage_sps: sp.build_dual(x_hat) feasibility, pi_sigma, wk = sp.solve_dual() if feasibility: pi = pi_sigma E += sp.prob * pi.T @ sp.T_k e += sp.prob * pi.T @ sp.h_k else: infeasible = True sigma = pi_sigma D = sigma.T @ sp.T_k d = sigma.T @ sp.h_k # Add feasibility cuts self.first_stage.model.addConstr( D @ self.first_stage.x >= d, name=f'FC_{r.count_}') self.outputLog.info(f'-inf\tinf\t-\tFC') r.count() break if not infeasible: # Add optimality cut self.first_stage.model.addConstr( E @ self.first_stage.x >= e - self.first_stage.theta, name=f'OC_{s.count_}') if not self.first_stage.consider_theta: self.first_stage.consider_theta = True self.first_stage.model.setObjective( self.first_stage.c @ self.first_stage.x + self.first_stage.theta) s.count() # Check optimality conditions w = e - E @ x_hat if round(theta_hat[0], 4) >= round(w[0][0], 4): stop = True if theta_hat[0] > -np.inf: base = (self.first_stage.c @ x_hat)[0][0] lb, ub = ((base + theta_hat)[0], (base + w[0][0])) v.add((lb, ub)) self.outputLog.info( f'{round(lb, 4)}\t{round(ub, 4)}\t{round((ub - lb) / ub, 4)}' ) else: v.count() self.computing_times['L_shaped'].stop() self.x_hat, self.theta_hat = self.first_stage.solve() self.objVal = self.first_stage.model.objVal self.solved = True self.outputLog.info(f'\nL-shaped converged\n{"-" * 100}' + \ f'\nTime:\t{self.computing_times["L_shaped"].total_time} seconds' + \ f'\nIterations:\t{v.count_}' + \ f'\nOptimal value:\t{self.objVal}' + \ f'\nTotal cuts:\t{r.count_ + s.count_}' + \ f'\n\tfeasibility:\t{r.count_}({100 * round(r.count_ / (r.count_ + s.count_), 3)}%)' + \ f'\n\toptimality:\t{s.count_}' + \ f'({round(100 * s.count_ / (r.count_ + s.count_), 3)}%)') return r, s, v
def multi_cut_l_shaped(self, reset=False): """ Implements the multi cut L-shaped algorithm for solving the two stage stochastic program. Returns ------- """ if reset: self.first_stage.reset_model(True, self.num_scenarios) assert self.first_stage.theta.shape[0] > 1 self.computing_times['multi_cut'] = Timer('multi_cut') # Stopping criterion stop = False # feasibility and optimality cuts counters r = TallyCounter('Feasibility') s = [ TallyCounter(f'Optimality_{k}') for k in range(self.num_scenarios) ] v = TallyCounter('Iterations') self.computing_times['multi_cut'].start() consider_theta = [False] * self.num_scenarios self.outputLog.info(f'\n{"-" * 100}\nMulti-cut L-Shaped' + \ f'\nConstraints: {len(self.first_stage.model.getConstrs())}' + \ f'\tVariables: {len(self.first_stage.model.getVars())}\n{"-" * 100}') self.outputLog.info(f'Lower bound\tUpper Bound\tGAP') while not stop: # Solve first stage and query x_hat, Theta_hat. x_hat, theta_hat = self.first_stage.solve() # Solve for each sub-problem solve its dual and add respective cuts. infeasible = False optimal = True w = 0 for k, sp in enumerate(self.second_stage_sps): sp.build_dual(x_hat) feasibility, pi_sigma, wk = sp.solve_dual() w += sp.prob * wk if feasibility: pi = pi_sigma if round(theta_hat[k], 4) < round(wk, 4): # Add optimality cuts optimal = False E = pi.T @ sp.T_k e = pi.T @ sp.h_k self.first_stage.model.addConstr( E @ self.first_stage.x + self.first_stage.theta[k] >= e, name=f'OC_{k}{s[k].count_}') s[k].count() consider_theta[k] = True else: infeasible = True sigma = pi_sigma D = sigma.T @ sp.T_k d = sigma.T @ sp.h_k # Add feasibility cuts self.first_stage.model.addConstr( D @ self.first_stage.x >= d, name=f'FC_{k}{r.count_}') self.outputLog.info(f'-inf\tinf\t-\tFC') r.count() break if not infeasible and optimal: stop = True elif not infeasible and not optimal: if all(consider_theta) and not self.first_stage.consider_theta: self.first_stage.consider_theta = True self.first_stage.model.setObjective( self.first_stage.c @ self.first_stage.x + self.probabilities.T @ self.first_stage.theta) if sum(theta_hat) > -np.inf: base = (self.first_stage.c @ x_hat)[0][0] lb, ub = (base + self.probabilities.T @ theta_hat, base + w) v.add((lb, ub)) self.outputLog.info( f'{round(lb, 4)}\t{round(ub, 4)}\t{round((ub - lb) / ub, 4)}' ) else: v.count() self.computing_times['multi_cut'].stop() self.x_hat, self.theta_hat = self.first_stage.solve() self.objVal = self.first_stage.model.objVal self.solved = True self.outputLog.info(f'\nMulti-cut L-shaped converged\n{"-" * 100}' + \ f'\nTime:\t{self.computing_times["multi_cut"].total_time} seconds' + \ f'\nIterations:\t{v.count_}' + \ f'\nOptimal value:\t{self.first_stage.model.objVal}' + \ f'\nTotal cuts:\t{r.count_ + sum(o.count_ for o in s)}' + \ f'\n\tfeasibility:\t{r.count_}({100 * round(r.count_ / (r.count_ + sum(o.count_ for o in s)), 3)}%)' + \ f'\n\toptimality:\t{sum(o.count_ for o in s)}' + \ f'({round(100 * sum(o.count_ for o in s) / (r.count_ + sum(o.count_ for o in s)), 3)}%)') return r, s, v