Пример #1
0
    def vmax_continuous(self, Value, s, xij, i, j):
        xl, xu = self.bounds(s, i, j)

        for it in range(self.options.maxitncp):
            vv, vx, vxx = self.__Bellman_rhs(Value, s, xij, i, j)

            # Compute Newton step, update continuous action, check convergence
            vx, delx = lcpstep(self.options.ncpmethod, xij, xl, xu, vx, vxx)
            xij[:] += delx
            if np.linalg.norm(vx.flatten(), np.Inf) < self.options.tol:
                break

        return self.__Bellman_rhs(Value, s, xij, i, j)[0][0]
Пример #2
0
    def vmax_continuous_restricted(self, Value, s, xij, i, j):

        ns = s.shape[-1]
        dx = self.dims.dx
        xl, xu = self.bounds(s, i, j)

        dr = self.restrictions(s, xij, i, j).shape[0]
        lij = np.ones((dr, ns))
        xl = np.vstack((xl, np.zeros((dr, ns))))
        xu = np.vstack((xu, np.tile(np.inf, (dr, ns))))
        ZEROS = np.zeros((dr, dr, ns))


        for it in range(self.options.maxitncp):
            vv, vx, vxx = self.__Bellman_rhs(Value, s, xij, i, j)

            # Adjust derivative to add the Lagrange multiplier term
            h, hx, hxx = self.restrictions(s, xij, i, j, True)

            for ir in range(dr):
                vx += lij[ir] * hx[ir]
                vxx += lij[ir] * hxx[ir]

            vx = np.vstack((vx, -h))

            vxx = np.vstack((np.hstack((vxx, np.swapaxes(hx, 0,1))),
                             np.hstack((-hx, ZEROS))))


            #  Compute Newton step, update continuous action, check convergence
            vx, delxl = lcpstep(self.options.ncpmethod,
                               np.vstack((xij, lij)),
                               xl, xu, vx, vxx)
            delx, dell = np.split(delxl, [dx])
            xij[:] += delx
            lij[:] += dell

            print('it = ', it, '\tchange = ', np.linalg.norm(vx.flatten(), np.Inf))
            if np.linalg.norm(vx.flatten(), np.Inf) < self.options.tol:
                break


        return self.__Bellman_rhs(Value, s, xij, i, j)[0][0]