def get_vf_func_approx( self, t: int, features: Sequence[Callable[[NonTerminal[float]], float]], reg_coeff: float) -> LinearFunctionApprox[NonTerminal[float]]: return LinearFunctionApprox.create(feature_functions=features, regularization_coeff=reg_coeff, direct_solve=True)
def get_linear_model() -> LinearFunctionApprox[Triple]: ffs = feature_functions() ag = adam_gradient() return LinearFunctionApprox.create(feature_functions=ffs, adam_gradient=ag, regularization_coeff=0., direct_solve=True)
def least_squares_tdq( transitions: Iterable[TransitionStep[S, A]], feature_functions: Sequence[Callable[[Tuple[NonTerminal[S], A]], float]], target_policy: DeterministicPolicy[S, A], γ: float, ε: float) -> LinearFunctionApprox[Tuple[NonTerminal[S], A]]: '''transitions is a finite iterable''' num_features: int = len(feature_functions) a_inv: np.ndarray = np.eye(num_features) / ε b_vec: np.ndarray = np.zeros(num_features) for tr in transitions: phi1: np.ndarray = np.array( [f((tr.state, tr.action)) for f in feature_functions]) if isinstance(tr.next_state, NonTerminal): phi2 = phi1 - γ * np.array([ f((tr.next_state, target_policy.action_for( tr.next_state.state))) for f in feature_functions ]) else: phi2 = phi1 temp: np.ndarray = a_inv.T.dot(phi2) a_inv = a_inv - np.outer(a_inv.dot(phi1), temp) / (1 + phi1.dot(temp)) b_vec += phi1 * tr.reward opt_wts: np.ndarray = a_inv.dot(b_vec) return LinearFunctionApprox.create(feature_functions=feature_functions, weights=Weights.create(opt_wts))
def linear_func_approx( self, features: Sequence[Callable[[Tuple[float, float]], float]], reg: float) -> LinearFunctionApprox[Tuple[float, float]]: return LinearFunctionApprox.create( feature_functions=features, adam_gradient=self.adam_gradient(), regularization_coeff=reg, )
def fitted_lspi_put_option( expiry: float, num_steps: int, num_paths: int, spot_price: float, spot_price_frac: float, rate: float, vol: float, strike: float, training_iters: int) -> LinearFunctionApprox[Tuple[float, float]]: num_laguerre: int = 4 epsilon: float = 1e-3 ident: np.ndarray = np.eye(num_laguerre) features: List[Callable[[Tuple[float, float]], float]] = [lambda _: 1.] features += [(lambda t_s, i=i: np.exp(-t_s[1] / (2 * strike)) * lagval( t_s[1] / strike, ident[i])) for i in range(num_laguerre)] features += [ lambda t_s: np.cos(-t_s[0] * np.pi / (2 * expiry)), lambda t_s: np.log(expiry - t_s[0]) if t_s[0] != expiry else 0., lambda t_s: (t_s[0] / expiry)**2 ] training_data: Sequence[TrainingDataType] = training_sim_data( expiry=expiry, num_steps=num_steps, num_paths=num_paths, spot_price=spot_price, spot_price_frac=spot_price_frac, rate=rate, vol=vol) dt: float = expiry / num_steps gamma: float = np.exp(-rate * dt) num_features: int = len(features) states: Sequence[Tuple[float, float]] = [(i * dt, s) for i, s, _ in training_data] next_states: Sequence[Tuple[float, float]] = \ [((i + 1) * dt, s1) for i, _, s1 in training_data] feature_vals: np.ndarray = np.array([[f(x) for f in features] for x in states]) next_feature_vals: np.ndarray = np.array([[f(x) for f in features] for x in next_states]) non_terminal: np.ndarray = np.array( [i < num_steps - 1 for i, _, _ in training_data]) exer: np.ndarray = np.array([max(strike - s1, 0) for _, s1 in next_states]) wts: np.ndarray = np.zeros(num_features) for _ in range(training_iters): a_inv: np.ndarray = np.eye(num_features) / epsilon b_vec: np.ndarray = np.zeros(num_features) cont: np.ndarray = np.dot(next_feature_vals, wts) cont_cond: np.ndarray = non_terminal * (cont > exer) for i in range(len(training_data)): phi1: np.ndarray = feature_vals[i] phi2: np.ndarray = phi1 - \ cont_cond[i] * gamma * next_feature_vals[i] temp: np.ndarray = a_inv.T.dot(phi2) a_inv -= np.outer(a_inv.dot(phi1), temp) / (1 + phi1.dot(temp)) b_vec += phi1 * (1 - cont_cond[i]) * exer[i] * gamma wts = a_inv.dot(b_vec) return LinearFunctionApprox.create(feature_functions=features, weights=Weights.create(wts))
exponent: float = 0.5 ffs: Sequence[Callable[[InventoryState], float]] = [(lambda x, s=s: float(x == s)) for s in nt_states] mc_ag: AdamGradient = AdamGradient(learning_rate=0.05, decay1=0.9, decay2=0.999) td_ag: AdamGradient = AdamGradient(learning_rate=0.003, decay1=0.9, decay2=0.999) mc_func_approx: LinearFunctionApprox[ InventoryState] = LinearFunctionApprox.create(feature_functions=ffs, adam_gradient=mc_ag) td_func_approx: LinearFunctionApprox[ InventoryState] = LinearFunctionApprox.create(feature_functions=ffs, adam_gradient=td_ag) it_mc: Iterable[FunctionApprox[InventoryState]] = mc_prediction_learning_rate( mrp=si_mrp, start_state_distribution=Choose(set(nt_states)), gamma=gamma, tolerance=mc_episode_length_tol, initial_func_approx=mc_func_approx, ) it_td: Iterable[FunctionApprox[InventoryState]] = td_prediction_learning_rate( mrp=si_mrp,
μ: float σ: float expiry: int def __init__(self, μ: float, σ: float, expiry: int, expectation_samples: int = 10000): self.μ = μ self.σ = σ super().__init__(sampler=lambda: (np.random.randint(expiry + 1), np.random.normal(loc=self.μ, scale=self.σ)), expectation_samples=expectation_samples) nt_states_distribution = InitialDistrib(strike, sigma, expiry_val) ag = AdamGradient(learning_rate=0.5, decay1=0.9, decay2=0.999) ffs = [lambda x: x[0], lambda x: x[1]] lfa = LinearFunctionApprox.create(feature_functions=ffs, adam_gradient=ag, regularization_coeff=0.001, direct_solve=True) solution_2 = value_iteration(mdp, 1, lfa, nt_states_distribution, 100) """ for i in solution_2: print(i) """ #This second method does not really work
mc_ag: AdamGradient = AdamGradient( learning_rate=0.05, decay1=0.9, decay2=0.999 ) td_ag: AdamGradient = AdamGradient( learning_rate=0.003, decay1=0.9, decay2=0.999 ) mc_func_approx: LinearFunctionApprox[InventoryState] = \ LinearFunctionApprox.create( feature_functions=ffs, adam_gradient=mc_ag ) td_func_approx: LinearFunctionApprox[InventoryState] = \ LinearFunctionApprox.create( feature_functions=ffs, adam_gradient=td_ag ) it_mc: Iterable[FunctionApprox[InventoryState]] = \ mc_prediction_learning_rate( mrp=si_mrp, start_state_distribution=Choose(set(nt_states)), gamma=gamma, episode_length_tolerance=mc_episode_length_tol, initial_func_approx=mc_func_approx
init_price_stdev: float = 10.0 num_shares: int = 100 num_time_steps: int = 5 alpha: float = 0.03 beta: float = 0.05 price_diff = [lambda p_s: beta * p_s.shares for _ in range(num_time_steps)] dynamics = [ lambda p_s: Gaussian(μ=p_s.price - alpha * p_s.shares, σ=0.) for _ in range(num_time_steps) ] ffs = [ lambda p_s: p_s.state.price * p_s.state.shares, lambda p_s: float(p_s.state.shares * p_s.state.shares) ] fa: FunctionApprox = LinearFunctionApprox.create(feature_functions=ffs) init_price_distrib: Gaussian = Gaussian(μ=init_price_mean, σ=init_price_stdev) ooe: OptimalOrderExecution = OptimalOrderExecution( shares=num_shares, time_steps=num_time_steps, avg_exec_price_diff=price_diff, price_dynamics=dynamics, utility_func=lambda x: x, discount_factor=1, func_approx=fa, initial_price_distribution=init_price_distrib) it_vf: Iterator[Tuple[ValueFunctionApprox[PriceAndShares], DeterministicPolicy[PriceAndShares, int]]] = \ ooe.backward_induction_vf_and_pi()