def small_case():
    tc = Timer('cargando', verbose=True)
    tc.start()
    N = 100

    S = list(product(tuple(range(N + 1)), tuple(range(N + 1))))
    A = {U, D, L, R}

    forbidden = random_red_areas_rectangulares((int(0.3 * N), int(0.7 * N)), (int(0.3 * N), int(0.7 * N)), p=0.03, N=N)
    forbidden += random_red_areas_rectangulares((int(0.8 * N), N), (0, int(0.2 * N)), p=0.01, N=N)
    forbidden += random_red_areas_rectangulares((0, int(0.2 * N)), (int(0.8 * N), N), p=0.01, N=N)

    target = list(product(tuple(range(N - 2, N)), tuple(range(N - 2, N))))

    M = 2000
    p_space = pplaneSpace(A, S, target, forbidden, M)

    _lambda = 0.9

    p_mdp = infiniteTime(p_space, _lambda, sparse=True, sense=MDP.minimize, verbose=False)
    tc.stop()

    polVI, vVI = p_mdp.solve(method=MDP.ValueIteration)
    polPI, vPI = p_mdp.solve(method=MDP.PolicyIteration)
    polLP, vLP = p_mdp.solve(method=MDP.LinearPrograming)

    print(p_mdp.computing_times)
    simulate_and_plot(p_space, polPI, n=10)
    simulate_and_plot(p_space, polVI, n=10)
    simulate_and_plot(p_space, polLP, n=10)
Exemplo n.º 2
0
    def lower_bound(self, samples, alpha=0.01):
        vals = []
        M = len(samples)
        n = len(samples[0])
        self.outputLog.info(
            f'{"-" * 100}\nStarted computing lower bound\n{"-" * 100}')
        t = Timer('lower bound')
        self.computing_times['lower_bound'] = t
        t.start()

        for s in samples:
            self.second_stage_sps = s
            self.solve(reset=True)
            vals.append(self.objVal)

        mu = sum(vals) / M
        sigma = np.sqrt(sum((v - mu)**2 for v in vals) / (M - 1))

        lower_bound = mu + sts.norm.ppf(alpha) * sigma / np.sqrt(M)
        t.stop()
        self.outputLog.info(
            f'Finish computing lower bound. Total time was {t.total_time} seconds.'
        )

        return vals, mu, sigma, lower_bound
def simulate_and_plot(p_space, pol, start=(0, 0), n=10, verbose=False, plot=True):
    t = Timer("Simulatoin", verbose=verbose)
    t.start()
    spaceship = Spaceship(p_space, start, pol)
    the_space = TheSpace(p_space, spaceship, verbose=verbose)
    the_space.simulate(n, start)
    if plot: the_space.paint_space()
    t.stop()
    return spaceship
Exemplo n.º 4
0
def cont_experiments():
    G=3
    P=3

    n, M, N = 5000, 100, 15000
    alpha = 0.01
    timers = dict()

    t = Timer('first stage creation')

    t.start()
    fs = create_FS(G)
    t.stop()
    timers['fs'] = t

    t = Timer('second stage creation', verbose=True)
    t.start()
    ssps = create_SS_c(n, G, P)
    tssp = TwoStageSP(fs, ssps, verbose=True)
    t.stop()
    timers['ss'] = t

    tssp.solve(multi_cut=False)
    # print(f'L sol:\nx:\t{tssp.x_hat}\ntheta:\t{tssp.theta_hat}')

    t = Timer('ub ss creation', verbose=True)
    t.start()
    ub_sstages = create_SS_c(N, new=False)
    t.stop()
    timers['ub_ss'] = t

    t = Timer('lb ss creation', verbose=True)
    t.start()
    lb_samples = []
    for i in range(M):
        sample = create_SS_c(n, new=False)
        lb_samples.append(sample)

    t.stop()
    timers['lb_ss'] = t

    # tssp.confidence_interval(lower_bound_samples=lb_samples, upper_bound_sample=ub_sstages, alpha=alpha)

    vals, mean, sigma, ub = tssp.upper_bound(ub_sstages, alpha=alpha)
    print(mean, sigma, ub)
    sns.displot(data=vals, kde=True)
    plt.xlabel(r"$Q(\hat{x}, \xi)$")
    plt.show()
    vals, mean, sigma, lb = tssp.lower_bound(lb_samples, alpha=alpha)
    print(mean, sigma, lb)
    sns.displot(data=vals, kde=True)
    plt.xlabel(r"$\hat{f}_n$")
    plt.show()
def read_objects(N, _lambda):
    tc = Timer('Cargando', verbose=True)
    tc.start()

    p_space, space = build_space(N)

    path = f'examples/data/Forbidden{N}.pickle'
    with open(path, 'rb') as file:
        p_space.F = pickle.load(file)

    path = f'examples/data/Q_tensor{N}.pickle'
    with open(path, 'rb') as file:
        Q_tensor = pickle.load(file)

    path = f'examples/data/r_tensor{N}.pickle'
    with open(path, 'rb') as file:
        r_tensor = pickle.load(file)

    path = f'examples/data/a_tensor{N}.pickle'
    with open(path, 'rb') as file:
        a_tensors = pickle.load(file)

    path = f'examples/data/A_matrix{N}.pickle'
    with open(path, 'rb') as file:
        A_matrix = pickle.load(file)

    path = f'examples/data/b_vector{N}.pickle'
    with open(path, 'rb') as file:
        b_vector = pickle.load(file)

    path = f'examples/data/c_vector{N}.pickle'
    with open(path, 'rb') as file:
        c_vector = pickle.load(file)

    p_mdp = infiniteTime(p_space, _lambda, sparse=True, sense=MDP.minimize, verbose=True, load_from_files=True)

    p_mdp.load_tensors(Q_tensor, r_tensor, a_tensors)
    p_mdp.build_LP(A_matrix, b_vector, c_vector)
    tc.stop()
    space.paint_space()

    return p_mdp
Exemplo n.º 6
0
    def optimal_value(self, method, **kwargs):
        """
        Abstract method that computes the value function for the problem and creates the optimal policy.

        Arguments
        _________
        method:
            the method ttha
        Returns
        -------
        float
            The value function for the given time and state.

        """
        if method == MDP.ValueIteration:
            try:
                v_0 = check_kwargs('v_0', self.v.clone().double(), kwargs)
            except AttributeError:
                v_0 = check_kwargs('v_0', self.v.copy().double(), kwargs)
            epsilon = check_kwargs('epsilon', 1E-1, kwargs)
            improvement_method = check_kwargs('improvement_method', 'GS',
                                              kwargs)
            t = Timer(f'{MDP.ValueIteration}_{improvement_method}',
                      verbose=self.verbose)
            self.computing_times[t.name] = t
            t.start()
            self._value_iteration(v_0, epsilon, improvement_method)
            self.policy.add_policy(self.a_policy)
            t.stop()

        elif method == MDP.PolicyIteration:
            initial_policy = check_kwargs('initial_policy', self.policy,
                                          kwargs)
            if 'initial_policy' in kwargs.keys():
                initial_policy = kwargs['initial_policy']
            else:
                if self.policy.matrix is None:
                    self.policy.create_random_policy()
            t = Timer(f'{MDP.PolicyIteration}')
            self.computing_times[t.name] = t
            t.start()
            self._policy_iteration(initial_policy)
            t.stop()

        elif method == MDP.LinearPrograming:
            alpha = check_kwargs('alpha',
                                 1 / self.l_S * np.ones(shape=self.l_S),
                                 kwargs)
            t = Timer(f'{MDP.LinearPrograming}')
            self.computing_times[t.name] = t
            t.start()
            x = self._linear_programing(alpha)
            t.stop()
            if x is not None:
                self.policy = self._policy_from_lp(x)
                self.v = self.policy_valuation(self.policy)
            else:
                self.logger.warning(
                    "Something went wrong. Infeasible model...")
Exemplo n.º 7
0
    def upper_bound(self, sample, alpha=0.01):
        vals = []
        N = len(sample)
        self.outputLog.info(
            f'\n{"-" * 100}\nStarted computing upper bound\n{"-" * 100}')
        t = Timer('upper bound')
        self.computing_times['upper_bound'] = t
        t.start()
        for s in sample:
            s.build_dual(self.x_hat)
            feasibility, pi_sigma, wk = s.solve_dual()
            vals.append(wk)

        mu = sum(vals) / N
        sigma = np.sqrt(sum((v - mu)**2 for v in vals) / (N - 1))

        upper_bound = self.first_stage.c @ self.x_hat + mu + sts.norm.ppf(
            1 - alpha) * sigma / np.sqrt(N)
        t.stop()
        self.outputLog.info(
            f'{"-" * 100}\nFinish computing upper bound. Total time was {t.total_time} seconds.\n{"-" * 100}'
        )
        return vals, mu, sigma, upper_bound[0][0]
def create_and_pickle(N):
    tc = Timer('cargando', verbose=True)
    tc.start()

    p_space, space = build_space(N)
    space.paint_space()
    p_mdp = infiniteTime(p_space, _lambda, sparse=True, sense=MDP.minimize, verbose=False)
    tc.stop()

    path = f'examples/data/Forbidden{N}.pickle'
    with open(path, 'wb') as file:
        pickle.dump(p_mdp.space.F, file)

    path = f'examples/data/Q_tensor{N}.pickle'
    with open(path, 'wb') as file:
        pickle.dump(p_mdp.Q_tensor, file)

    path = f'examples/data/r_tensor{N}.pickle'
    with open(path, 'wb') as file:
        pickle.dump(p_mdp.r_tensor, file)

    path = f'examples/data/a_tensor{N}.pickle'
    with open(path, 'wb') as file:
        pickle.dump(p_mdp.a_tensors, file)

    path = f'examples/data/A_matrix{N}.pickle'
    with open(path, 'wb') as file:
        pickle.dump(p_mdp.LP.LP_A, file)

    path = f'examples/data/b_vector{N}.pickle'
    with open(path, 'wb') as file:
        pickle.dump(p_mdp.LP.LP_b, file)

    path = f'examples/data/c_vector{N}.pickle'
    with open(path, 'wb') as file:
        pickle.dump(p_mdp.LP.LP_c, file)
Exemplo n.º 9
0
def _lambda_to_1(lb=0.9, ub=0.999):

    cmap = cm.get_cmap('coolwarm', 256)
    _Lambda = np.linspace(lb, ub, 30)

    def f(s):
        return 10 * s

    def c(a):
        return 3 * a - 0.01 * a**2

    def h(s):
        return s

    MDPS = dict()

    f1, ax1, ax1c = cmap_plot()
    f2, ax2, ax2c = cmap_plot()
    f3, ax3, ax3c = cmap_plot()

    T_T = Timer('Con truco')
    T_F = Timer('Sin truco')
    for l in _Lambda:
        print(l)
        T_T.start()
        inv_reward = inventoryReward(inv_space, f, c, h, K, l, lambda_e_1=True)
        MDPS[l] = infiniteTime(inv_space, inv_reward, l)
        MDPS[l]._value_iteration()
        T_T.stop()
        T_F.start()
        inv_reward = inventoryReward(inv_space,
                                     f,
                                     c,
                                     h,
                                     K,
                                     l,
                                     lambda_e_1=False)
        MDPS[l] = infiniteTime(inv_space, inv_reward, l)
        MDPS[l]._value_iteration()
        T_F.stop()

        ax1.plot(MDPS[l].S,
                 MDPS[l].v,
                 c=cmap((l - lb) / (ub - lb)),
                 label=r'$\lambda = $' + str(round(l, 4)))
        ax2.plot(MDPS[l].S,
                 MDPS[l].v * (1 - l),
                 c=cmap((l - lb) / (ub - lb)),
                 label=r'$\lambda = $' + str(round(l, 4)))

        if l == lb:
            c_pol = MDPS[l].a_policy
            c_l = l

        if MDPS[l].a_policy != c_pol:
            c_u = l
            i_0 = 0
            for i in MDPS[l].space.S:
                if c_pol[i] > 0:
                    i_0 = i

            i_0 += 5
            ax3.plot(range(i_0),
                     list(c_pol.values())[:i_0],
                     '-o',
                     c=cmap(((c_u + c_l) / 2 - lb) / (ub - lb)),
                     label=r'$\lambda \in$ ' +
                     f'[{round(c_l, 3)}, {round(c_u, 3)})')

            c_pol = MDPS[l].a_policy
            c_l = c_u

    i_0 = 0
    for i in MDPS[l].space.S:
        if c_pol[i] > 0:
            i_0 = i

    i_0 += 5
    ax3.plot(range(i_0),
             list(c_pol.values())[:i_0],
             '-o',
             c=cmap(((c_u + ub) / 2 - lb) / (ub - lb)),
             label=r'$\lambda \in$ ' + f'[{round(c_l, 3)}, {round(ub, 3)}]')

    norm = mpl.colors.Normalize(vmin=lb, vmax=ub)
    mpl.colorbar.ColorbarBase(ax1c, cmap=cmap, norm=norm)
    mpl.colorbar.ColorbarBase(ax2c, cmap=cmap, norm=norm)
    mpl.colorbar.ColorbarBase(ax3c, cmap=cmap, norm=norm)

    ax1.set_xlabel('Estados')
    ax2.set_xlabel('Estados')
    ax3.set_xlabel('Estados')

    ax1.set_ylabel(r'$(1 - \lambda) v^*_\lambda$')
    ax2.set_ylabel(r'$v^*_\lambda$')
    ax3.set_ylabel('Acción')
    ax3.legend()

    f4, ax4 = plt.subplots()
    ax4.plot(_Lambda,
             [MDPS[l].computing_times['GS'].total_time for l in _Lambda])
    ax4.set_xlabel(r'$\lambda$')
    ax4.set_ylabel(r'Tiempo de cómputo (s)')

    print(
        f'El tiempo total que se ahorra uno es {(T_F.total_time - T_T.total_time) }, en porcentages {(T_F.total_time - T_T.total_time) / T_T.total_time}'
    )

    plt.show()