def vmax_continuous(self, Value, s, xij, i, j): xl, xu = self.bounds(s, i, j) for it in range(self.options.maxitncp): vv, vx, vxx = self.__Bellman_rhs(Value, s, xij, i, j) # Compute Newton step, update continuous action, check convergence vx, delx = lcpstep(self.options.ncpmethod, xij, xl, xu, vx, vxx) xij[:] += delx if np.linalg.norm(vx.flatten(), np.Inf) < self.options.tol: break return self.__Bellman_rhs(Value, s, xij, i, j)[0][0]
def vmax_continuous_restricted(self, Value, s, xij, i, j): ns = s.shape[-1] dx = self.dims.dx xl, xu = self.bounds(s, i, j) dr = self.restrictions(s, xij, i, j).shape[0] lij = np.ones((dr, ns)) xl = np.vstack((xl, np.zeros((dr, ns)))) xu = np.vstack((xu, np.tile(np.inf, (dr, ns)))) ZEROS = np.zeros((dr, dr, ns)) for it in range(self.options.maxitncp): vv, vx, vxx = self.__Bellman_rhs(Value, s, xij, i, j) # Adjust derivative to add the Lagrange multiplier term h, hx, hxx = self.restrictions(s, xij, i, j, True) for ir in range(dr): vx += lij[ir] * hx[ir] vxx += lij[ir] * hxx[ir] vx = np.vstack((vx, -h)) vxx = np.vstack((np.hstack((vxx, np.swapaxes(hx, 0,1))), np.hstack((-hx, ZEROS)))) # Compute Newton step, update continuous action, check convergence vx, delxl = lcpstep(self.options.ncpmethod, np.vstack((xij, lij)), xl, xu, vx, vxx) delx, dell = np.split(delxl, [dx]) xij[:] += delx lij[:] += dell print('it = ', it, '\tchange = ', np.linalg.norm(vx.flatten(), np.Inf)) if np.linalg.norm(vx.flatten(), np.Inf) < self.options.tol: break return self.__Bellman_rhs(Value, s, xij, i, j)[0][0]