示例#1
0
 def get_vf_func_approx(
         self, t: int, features: Sequence[Callable[[NonTerminal[float]],
                                                   float]],
         reg_coeff: float) -> LinearFunctionApprox[NonTerminal[float]]:
     return LinearFunctionApprox.create(feature_functions=features,
                                        regularization_coeff=reg_coeff,
                                        direct_solve=True)
示例#2
0
def get_linear_model() -> LinearFunctionApprox[Triple]:
    ffs = feature_functions()
    ag = adam_gradient()
    return LinearFunctionApprox.create(feature_functions=ffs,
                                       adam_gradient=ag,
                                       regularization_coeff=0.,
                                       direct_solve=True)
示例#3
0
文件: td.py 项目: shenoy1/RL-book
def least_squares_tdq(
        transitions: Iterable[TransitionStep[S, A]],
        feature_functions: Sequence[Callable[[Tuple[NonTerminal[S], A]],
                                             float]],
        target_policy: DeterministicPolicy[S, A], γ: float,
        ε: float) -> LinearFunctionApprox[Tuple[NonTerminal[S], A]]:
    '''transitions is a finite iterable'''
    num_features: int = len(feature_functions)
    a_inv: np.ndarray = np.eye(num_features) / ε
    b_vec: np.ndarray = np.zeros(num_features)
    for tr in transitions:
        phi1: np.ndarray = np.array(
            [f((tr.state, tr.action)) for f in feature_functions])
        if isinstance(tr.next_state, NonTerminal):
            phi2 = phi1 - γ * np.array([
                f((tr.next_state, target_policy.action_for(
                    tr.next_state.state))) for f in feature_functions
            ])
        else:
            phi2 = phi1
        temp: np.ndarray = a_inv.T.dot(phi2)
        a_inv = a_inv - np.outer(a_inv.dot(phi1), temp) / (1 + phi1.dot(temp))
        b_vec += phi1 * tr.reward

    opt_wts: np.ndarray = a_inv.dot(b_vec)
    return LinearFunctionApprox.create(feature_functions=feature_functions,
                                       weights=Weights.create(opt_wts))
 def linear_func_approx(
         self, features: Sequence[Callable[[Tuple[float, float]], float]],
         reg: float) -> LinearFunctionApprox[Tuple[float, float]]:
     return LinearFunctionApprox.create(
         feature_functions=features,
         adam_gradient=self.adam_gradient(),
         regularization_coeff=reg,
     )
def fitted_lspi_put_option(
        expiry: float, num_steps: int, num_paths: int, spot_price: float,
        spot_price_frac: float, rate: float, vol: float, strike: float,
        training_iters: int) -> LinearFunctionApprox[Tuple[float, float]]:

    num_laguerre: int = 4
    epsilon: float = 1e-3

    ident: np.ndarray = np.eye(num_laguerre)
    features: List[Callable[[Tuple[float, float]], float]] = [lambda _: 1.]
    features += [(lambda t_s, i=i: np.exp(-t_s[1] / (2 * strike)) * lagval(
        t_s[1] / strike, ident[i])) for i in range(num_laguerre)]
    features += [
        lambda t_s: np.cos(-t_s[0] * np.pi / (2 * expiry)),
        lambda t_s: np.log(expiry - t_s[0])
        if t_s[0] != expiry else 0., lambda t_s: (t_s[0] / expiry)**2
    ]

    training_data: Sequence[TrainingDataType] = training_sim_data(
        expiry=expiry,
        num_steps=num_steps,
        num_paths=num_paths,
        spot_price=spot_price,
        spot_price_frac=spot_price_frac,
        rate=rate,
        vol=vol)

    dt: float = expiry / num_steps
    gamma: float = np.exp(-rate * dt)
    num_features: int = len(features)
    states: Sequence[Tuple[float,
                           float]] = [(i * dt, s) for i, s, _ in training_data]
    next_states: Sequence[Tuple[float, float]] = \
        [((i + 1) * dt, s1) for i, _, s1 in training_data]
    feature_vals: np.ndarray = np.array([[f(x) for f in features]
                                         for x in states])
    next_feature_vals: np.ndarray = np.array([[f(x) for f in features]
                                              for x in next_states])
    non_terminal: np.ndarray = np.array(
        [i < num_steps - 1 for i, _, _ in training_data])
    exer: np.ndarray = np.array([max(strike - s1, 0) for _, s1 in next_states])
    wts: np.ndarray = np.zeros(num_features)
    for _ in range(training_iters):
        a_inv: np.ndarray = np.eye(num_features) / epsilon
        b_vec: np.ndarray = np.zeros(num_features)
        cont: np.ndarray = np.dot(next_feature_vals, wts)
        cont_cond: np.ndarray = non_terminal * (cont > exer)
        for i in range(len(training_data)):
            phi1: np.ndarray = feature_vals[i]
            phi2: np.ndarray = phi1 - \
                cont_cond[i] * gamma * next_feature_vals[i]
            temp: np.ndarray = a_inv.T.dot(phi2)
            a_inv -= np.outer(a_inv.dot(phi1), temp) / (1 + phi1.dot(temp))
            b_vec += phi1 * (1 - cont_cond[i]) * exer[i] * gamma
        wts = a_inv.dot(b_vec)

    return LinearFunctionApprox.create(feature_functions=features,
                                       weights=Weights.create(wts))
示例#6
0
exponent: float = 0.5

ffs: Sequence[Callable[[InventoryState],
                       float]] = [(lambda x, s=s: float(x == s))
                                  for s in nt_states]

mc_ag: AdamGradient = AdamGradient(learning_rate=0.05,
                                   decay1=0.9,
                                   decay2=0.999)

td_ag: AdamGradient = AdamGradient(learning_rate=0.003,
                                   decay1=0.9,
                                   decay2=0.999)

mc_func_approx: LinearFunctionApprox[
    InventoryState] = LinearFunctionApprox.create(feature_functions=ffs,
                                                  adam_gradient=mc_ag)

td_func_approx: LinearFunctionApprox[
    InventoryState] = LinearFunctionApprox.create(feature_functions=ffs,
                                                  adam_gradient=td_ag)

it_mc: Iterable[FunctionApprox[InventoryState]] = mc_prediction_learning_rate(
    mrp=si_mrp,
    start_state_distribution=Choose(set(nt_states)),
    gamma=gamma,
    tolerance=mc_episode_length_tol,
    initial_func_approx=mc_func_approx,
)

it_td: Iterable[FunctionApprox[InventoryState]] = td_prediction_learning_rate(
    mrp=si_mrp,
示例#7
0
        μ: float
        σ: float
        expiry: int

        def __init__(self,
                     μ: float,
                     σ: float,
                     expiry: int,
                     expectation_samples: int = 10000):
            self.μ = μ
            self.σ = σ
            super().__init__(sampler=lambda:
                             (np.random.randint(expiry + 1),
                              np.random.normal(loc=self.μ, scale=self.σ)),
                             expectation_samples=expectation_samples)

    nt_states_distribution = InitialDistrib(strike, sigma, expiry_val)
    ag = AdamGradient(learning_rate=0.5, decay1=0.9, decay2=0.999)
    ffs = [lambda x: x[0], lambda x: x[1]]

    lfa = LinearFunctionApprox.create(feature_functions=ffs,
                                      adam_gradient=ag,
                                      regularization_coeff=0.001,
                                      direct_solve=True)
    solution_2 = value_iteration(mdp, 1, lfa, nt_states_distribution, 100)
    """
    for i in solution_2:
        print(i)
    """
    #This second method does not really work
mc_ag: AdamGradient = AdamGradient(
    learning_rate=0.05,
    decay1=0.9,
    decay2=0.999
)

td_ag: AdamGradient = AdamGradient(
    learning_rate=0.003,
    decay1=0.9,
    decay2=0.999
)

mc_func_approx: LinearFunctionApprox[InventoryState] = \
    LinearFunctionApprox.create(
        feature_functions=ffs,
        adam_gradient=mc_ag
    )

td_func_approx: LinearFunctionApprox[InventoryState] = \
    LinearFunctionApprox.create(
        feature_functions=ffs,
        adam_gradient=td_ag
    )

it_mc: Iterable[FunctionApprox[InventoryState]] = \
    mc_prediction_learning_rate(
        mrp=si_mrp,
        start_state_distribution=Choose(set(nt_states)),
        gamma=gamma,
        episode_length_tolerance=mc_episode_length_tol,
        initial_func_approx=mc_func_approx
示例#9
0
    init_price_stdev: float = 10.0
    num_shares: int = 100
    num_time_steps: int = 5
    alpha: float = 0.03
    beta: float = 0.05

    price_diff = [lambda p_s: beta * p_s.shares for _ in range(num_time_steps)]
    dynamics = [
        lambda p_s: Gaussian(μ=p_s.price - alpha * p_s.shares, σ=0.)
        for _ in range(num_time_steps)
    ]
    ffs = [
        lambda p_s: p_s.state.price * p_s.state.shares,
        lambda p_s: float(p_s.state.shares * p_s.state.shares)
    ]
    fa: FunctionApprox = LinearFunctionApprox.create(feature_functions=ffs)
    init_price_distrib: Gaussian = Gaussian(μ=init_price_mean,
                                            σ=init_price_stdev)

    ooe: OptimalOrderExecution = OptimalOrderExecution(
        shares=num_shares,
        time_steps=num_time_steps,
        avg_exec_price_diff=price_diff,
        price_dynamics=dynamics,
        utility_func=lambda x: x,
        discount_factor=1,
        func_approx=fa,
        initial_price_distribution=init_price_distrib)
    it_vf: Iterator[Tuple[ValueFunctionApprox[PriceAndShares],
                          DeterministicPolicy[PriceAndShares, int]]] = \
        ooe.backward_induction_vf_and_pi()