xmin = 0 # minimum action xmax = 6 # maximum action m = 100 # number of actions X = np.linspace(xmin, xmax, m) # vector of actions # Reward Function f = np.full((m, n), -np.inf) for k in range(m): f[k, S >= X[k]] = (X[k]**(1 - gamma)) / (1 - gamma) - cost * X[k] # State Transition Function g = np.zeros_like(f) for k in range(m): snext = alpha * (S - X[k]) - 0.5 * beta * (S - X[k])**2 g[k] = getindex(snext, S) # Model Structure model = DDPmodel(f, g, delta) model.solve() ## Analysis # Plot Optimal Policy demo.figure('Optimal Harvest Policy', 'Stock', 'Harvest') plt.plot(S, X[model.policy]) # Plot Value Function demo.figure('Optimal Value Function', 'Stock', 'Value') plt.plot(S, model.value)
xmax = 6 # maximum action m = 100 # number of actions X = np.linspace(xmin, xmax, m) # vector of actions # Reward Function f = np.full((m, n), -np.inf) for k in range(m): f[k, S >= X[k]] = (X[k] ** (1 - gamma)) / (1 - gamma) - cost * X[k] # State Transition Function g = np.zeros_like(f) for k in range(m): snext = alpha * (S - X[k]) - 0.5 * beta * (S - X[k]) ** 2 g[k] = getindex(snext, S) # Model Structure model = DDPmodel(f, g, delta) model.solve() ## Analysis # Plot Optimal Policy demo.figure('Optimal Harvest Policy', 'Stock', 'Harvest') plt.plot(S,X[model.policy])
# Action Space X = np.arange(3) + 1 # vector of actions m = X.size # number of actions # Reward Function f = np.zeros((m, n)) # State Transition Probability Matrix P = np.zeros((m, n, n)) for k in range(m): P[k, 0, 0] = 1 i = range(1, n) # does not survive predation snext = 0 j = getindex(snext, S) P[k, i, j] += 1 - p[k] # survives predation, finds food snext = S[i] - 1 + e[k] j = getindex(snext, S) P[k, i, j] += p[k] * q[k] # survives predation, finds no food snext = S[i] - 1 j = getindex(snext, S) P[k, i, j] += p[k] * (1 - q[k]) # Terminal Value Function vterm = np.ones(n) # terminal value: survive vterm[0] = 0 # terminal value: death
# Action Space X = np.arange(sbar + 1) # vector of actions m = X.size # number of actions # Reward Function f = np.full((m, n), -np.inf) for c, s in enumerate(S): for r, x in enumerate(X): if x <= s: f[r, c] = price * x - (x ** 2) / (1 + s) # State Transition Function g = np.empty_like(f) for r, x in enumerate(X): snext = S - x g[r] = getindex(snext, S) # Model Structure model = DDPmodel(f, g, delta) model.solve() # Analysis # Simulate Model sinit = S.max() nyrs = 15 t = np.arange(nyrs + 1) spath, xpath = model.simulate(sinit, nyrs) # Plot Optimal Policy
X = np.arange(1 + maxcap) # vector of actions m = X.size # number of actions # Reward Function f = np.full((m, n), -np.inf) for k in range(m): f[k, k:] = alpha1 * X[k]**beta1 + alpha2 * (S[k:] - X[k])**beta2 # State Transition Probability Matrix P = np.zeros((m, n, n)) for k in range(m): for i in range(n): for j in range(r.size): snext = min(S[i] - X[k] + r[j], maxcap) inext = getindex(snext, S) P[k, i, inext] = P[k, i, inext] + p[j] # Model Structure model = DDPmodel(f, P, delta) model.solve() ## Analysis # Plot Optimal Policy demo.figure('Optimal Irrigation Policy', 'Water Level', 'Irrigation', [-1, 31], [0, 6]) plt.plot(S, X[model.policy], '*') # Plot Value Function demo.figure('Optimal Value Function', 'Water Level', 'Value')
# Action Space X = ['no action', 'service', 'replace'] # vector of actions m = len(X) # number of actions # Reward Function f = np.zeros((m, n)) q = 50 - 2.5 * S1 - 2.5 * S1 ** 2 f[0] = q * np.minimum(1, 1 - (S1 - S2) / maxage) f[1] = q * np.minimum(1, 1 - (S1 - S2 - 1) / maxage) - mancost f[2] = 50 - repcost # State Transition Function g = np.empty_like(f) g[0] = getindex(np.c_[S1 + 1, S2], S) g[1] = getindex(np.c_[S1 + 1, S2 + 1], S) g[2] = getindex(np.c_[1, 0], S) # Model Structure model = DDPmodel(f, g, delta) model.solve() ## Analysis # Simulate Model sinit = 0 nyrs = 12 t = np.arange(nyrs + 1)