def _build_rental_return_distributions(self): self._car_count = [c for c in range(self._max_cars + 1)] self._demand_distribution = Multinoulli({c: self._poisson(self._rental_rate, c) for c in range(self._max_cars + 1)}) self._demand_distribution[self._max_cars] += 1.0 - sum(self._demand_distribution.values()) self._demand_distribution.enable() self._return_distribution = Multinoulli({c: self._poisson(self._return_rate, c) for c in range(self._max_cars + 1)}) self._return_distribution[self._max_cars] += 1.0 - sum(self._return_distribution.values()) self._return_distribution.enable()
def __init__(self, environment: Environment, environment_parameters: EnvironmentParameters): super().__init__(environment, environment_parameters) self._environment: Environment = environment self._environment_parameters: EnvironmentParameters = environment_parameters self._max_card: int = 10 # 10, J, Q or K combined self._card_distribution: Multinoulli[int] = Multinoulli()
def _build_summaries(self): for starting_cars in self.outcome_distributions.keys(): expected_cars_rented: float = 0.0 ending_cars_distribution: Multinoulli[int] = Multinoulli() cars_rented_x_probability_by_ending_cars: DictZero[int, float] = DictZero() for outcome, probability in self.outcome_distributions[starting_cars].items(): cars_rented_x_probability = outcome.cars_rented * probability expected_cars_rented += cars_rented_x_probability ending_cars_distribution[outcome.ending_cars] += probability cars_rented_x_probability_by_ending_cars[outcome.ending_cars] += cars_rented_x_probability ending_cars_distribution.enable() expected_cars_rented_by_ending_cars: DictZero[int, float] = DictZero() for ending_cars, ending_cars_probability in ending_cars_distribution.items(): cars_rented_x_probability = cars_rented_x_probability_by_ending_cars[ending_cars] # E[r|s,a,s'] = Sum_over_r( p(r,s'|s,a).r ) / p(s'|s,a) conditional_expected_cars_rented = cars_rented_x_probability / ending_cars_probability expected_cars_rented_by_ending_cars[ending_cars] = conditional_expected_cars_rented self.expected_cars_rented[starting_cars] = expected_cars_rented self.ending_cars_distribution[starting_cars] = ending_cars_distribution self.cars_rented_x_probability_by_ending_cars[starting_cars] = cars_rented_x_probability_by_ending_cars self.expected_cars_rented_by_ending_cars[starting_cars] = expected_cars_rented_by_ending_cars
def get_all_outcomes(self, state: State, action: Action) -> Multinoulli[Response]: """ dict of possible responses for a single state and action could be used for one state, action in theory but too many for all states and actions so potentially not useful in practice """ self._calc_start_of_day(state, action) l1 = self._location_1 l2 = self._location_2 outcomes1: Multinoulli[LocationOutcome] = l1.get_outcome_distribution( self._starting_cars_1) outcomes2: Multinoulli[LocationOutcome] = l2.get_outcome_distribution( self._starting_cars_2) # collate (s', r) # outcome_dict: dict[(next_state, reward), probability] # outcome_dict: DictZero[tuple[State, float], float] = DictZero() response_distribution: Multinoulli[Response] = Multinoulli() for outcome1, probability1 in outcomes1.items(): for outcome2, probability2 in outcomes2.items(): cars_rented = outcome1.cars_rented + outcome2.cars_rented new_state = State(is_terminal=False, ending_cars_1=outcome1.ending_cars, ending_cars_2=outcome2.ending_cars) probability = probability1 * probability2 reward = self._calc_reward(cars_rented) response_distribution[reward, new_state] += probability response_distribution.enable() return response_distribution
def get_summary_outcomes(self, state: State, action: Action) -> Multinoulli[Response]: """ dict of possible responses for a single state and action with the expected_reward given in place of reward """ self._calc_start_of_day(state, action) l1 = self._location_1 l2 = self._location_2 ending_cars_dist1: Multinoulli[int] = l1.get_ending_cars_distribution( self._starting_cars_1) ending_cars_dist2: Multinoulli[int] = l2.get_ending_cars_distribution( self._starting_cars_2) response_distribution: Multinoulli[Response] = Multinoulli() for ending_cars1, probability1 in ending_cars_dist1.items(): cars_rented1 = l1.get_expected_cars_rented_given_ending_cars( self._starting_cars_1, ending_cars1) for ending_cars2, probability2 in ending_cars_dist2.items(): cars_rented2 = l2.get_expected_cars_rented_given_ending_cars( self._starting_cars_2, ending_cars2) cars_rented = cars_rented1 + cars_rented2 new_state = State(is_terminal=False, ending_cars_1=ending_cars1, ending_cars_2=ending_cars2) probability = probability1 * probability2 reward = self._calc_reward(cars_rented) response_distribution[reward, new_state] += probability response_distribution.enable() return response_distribution
def __init__(self, max_cars: int, rental_rate: float, return_rate: float, excess_parking_cost: float): self._max_cars: int = max_cars self._rental_rate: float = rental_rate self._return_rate: float = return_rate self._excess_parking_cost: float = excess_parking_cost self._car_count: list[int] = [] self._demand_distribution: Multinoulli[int] = Multinoulli() self._return_distribution: Multinoulli[int] = Multinoulli() # for each starting_cars find possible outcomes # dict[starting_cars, dict[LocationOutcome, probability]] self.outcome_distributions: dict[int, Multinoulli[LocationOutcome]] = {} self._counter: int = 0 # summaries # dict[starting_cars, cars_rented * probability] self.expected_cars_rented: dict[int, float] = {} # dict[starting_cars, dict[ending_cars, probability]] self.ending_cars_distribution: dict[int, Multinoulli[int]] = {} # dict[starting_cars, dict[ending_cars, cars_rented_x_probability]] self.cars_rented_x_probability_by_ending_cars: dict[int, dict[int, float]] = {} # dict[starting_cars, dict[ending_cars, expected_cars_rented]] self.expected_cars_rented_by_ending_cars: dict[int, dict[int, float]] = {}
def _build_outcome_distribution(self, starting_cars: int): outcome_distribution: Multinoulli[LocationOutcome, float] = Multinoulli() # cars_rented_x_probability: float = 0.0 for car_demand, demand_probability in self._demand_distribution.items(): cars_rented = self._get_cars_rented(starting_cars, car_demand) for cars_returned, return_probability in self._return_distribution.items(): ending_cars = self._get_ending_cars(starting_cars, cars_rented, cars_returned) probability = demand_probability * return_probability if probability > 0.0: location_outcome = LocationOutcome(ending_cars, cars_rented) outcome_distribution[location_outcome] += probability outcome_distribution.enable() self.outcome_distributions[starting_cars] = outcome_distribution
def get_state_transition_distribution( self, state: State, action: Action) -> Multinoulli[State]: """ dict[ s', p(s'|s,a) ] distribution of next states for a (state, action) """ distribution: Multinoulli[State] = Multinoulli() for toss in [Toss.HEADS, Toss.TAILS]: probability = self._toss_distribution[toss] if toss == Toss.HEADS: new_capital = state.capital + action.stake else: new_capital = state.capital - action.stake is_terminal: bool = (new_capital == 0 or new_capital == self._environment_parameters.max_capital) next_state = State(is_terminal=is_terminal, capital=new_capital) distribution[next_state] = probability distribution.enable(do_self_check=False) return distribution
def _calc_next_state_distribution(self, state: State, action: Action) -> Multinoulli[State]: """ dict[ s', p(s'|s,a) ] distribution of next states for a (state, action) """ self._calc_start_of_day(state, action) ending_cars_distribution1 = self._location_1.get_ending_cars_distribution( self._starting_cars_1) ending_cars_distribution2 = self._location_2.get_ending_cars_distribution( self._starting_cars_2) next_state_distribution: Multinoulli[State] = Multinoulli() for ending_cars1, probability1 in ending_cars_distribution1.items(): for ending_cars2, probability2 in ending_cars_distribution2.items( ): next_state = State(is_terminal=False, ending_cars_1=ending_cars1, ending_cars_2=ending_cars2) probability = probability1 * probability2 next_state_distribution[next_state] = probability next_state_distribution.enable() return next_state_distribution
from __future__ import annotations from mdp.common import Multinoulli from mdp.task.jacks.model.state import State x: Multinoulli[State] = Multinoulli() my_state = State(is_terminal=False, ending_cars_1=5, ending_cars_2=6) my_state2 = State(is_terminal=False, ending_cars_1=7, ending_cars_2=6) my_state3 = State(is_terminal=False, ending_cars_1=9, ending_cars_2=6) x[my_state] = 0.8 x[my_state2] += 0.2 print(x[my_state]) print(x[my_state2]) print(x[my_state3]) x.enable() print(x.draw_one())
class Location: def __init__(self, max_cars: int, rental_rate: float, return_rate: float, excess_parking_cost: float): self._max_cars: int = max_cars self._rental_rate: float = rental_rate self._return_rate: float = return_rate self._excess_parking_cost: float = excess_parking_cost self._car_count: list[int] = [] self._demand_distribution: Multinoulli[int] = Multinoulli() self._return_distribution: Multinoulli[int] = Multinoulli() # for each starting_cars find possible outcomes # dict[starting_cars, dict[LocationOutcome, probability]] self.outcome_distributions: dict[int, Multinoulli[LocationOutcome]] = {} self._counter: int = 0 # summaries # dict[starting_cars, cars_rented * probability] self.expected_cars_rented: dict[int, float] = {} # dict[starting_cars, dict[ending_cars, probability]] self.ending_cars_distribution: dict[int, Multinoulli[int]] = {} # dict[starting_cars, dict[ending_cars, cars_rented_x_probability]] self.cars_rented_x_probability_by_ending_cars: dict[int, dict[int, float]] = {} # dict[starting_cars, dict[ending_cars, expected_cars_rented]] self.expected_cars_rented_by_ending_cars: dict[int, dict[int, float]] = {} # given starting_cars as input, value is expected revenue # E[r[l] | s, a] # self._expected_revenue: np.ndarray = np.zeros(self._max_cars + 1, float) # given starting_cars as first value, value is probability of ending_cars # Pr(s'[l] | s, a) # self._prob_ending_cars: np.ndarray = np.zeros(shape=(self._max_cars + 1, self._max_cars + 1), dtype=float) def build(self): self._build_rental_return_distributions() self._build_outcome_distributions() for distribution in self.outcome_distributions.values(): for _ in distribution.keys(): self._counter += 1 # print(f"daily_outcomes = {self._counter}") self._build_summaries() def _build_rental_return_distributions(self): self._car_count = [c for c in range(self._max_cars + 1)] self._demand_distribution = Multinoulli({c: self._poisson(self._rental_rate, c) for c in range(self._max_cars + 1)}) self._demand_distribution[self._max_cars] += 1.0 - sum(self._demand_distribution.values()) self._demand_distribution.enable() self._return_distribution = Multinoulli({c: self._poisson(self._return_rate, c) for c in range(self._max_cars + 1)}) self._return_distribution[self._max_cars] += 1.0 - sum(self._return_distribution.values()) self._return_distribution.enable() def _poisson(self, lambda_: float, n: int) -> float: return stats.poisson.pmf(k=n, mu=lambda_) def _build_outcome_distributions(self): for starting_cars in self._car_count: self._build_outcome_distribution(starting_cars) def _build_outcome_distribution(self, starting_cars: int): outcome_distribution: Multinoulli[LocationOutcome, float] = Multinoulli() # cars_rented_x_probability: float = 0.0 for car_demand, demand_probability in self._demand_distribution.items(): cars_rented = self._get_cars_rented(starting_cars, car_demand) for cars_returned, return_probability in self._return_distribution.items(): ending_cars = self._get_ending_cars(starting_cars, cars_rented, cars_returned) probability = demand_probability * return_probability if probability > 0.0: location_outcome = LocationOutcome(ending_cars, cars_rented) outcome_distribution[location_outcome] += probability outcome_distribution.enable() self.outcome_distributions[starting_cars] = outcome_distribution def _get_cars_rented(self, starting_cars: int, car_demand: int) -> int: return min(starting_cars, car_demand) def _get_ending_cars(self, starting_cars: int, cars_rented: int, cars_returned: int) -> int: ending_cars = starting_cars - cars_rented + cars_returned if ending_cars > self._max_cars: ending_cars = self._max_cars return ending_cars def _build_summaries(self): for starting_cars in self.outcome_distributions.keys(): expected_cars_rented: float = 0.0 ending_cars_distribution: Multinoulli[int] = Multinoulli() cars_rented_x_probability_by_ending_cars: DictZero[int, float] = DictZero() for outcome, probability in self.outcome_distributions[starting_cars].items(): cars_rented_x_probability = outcome.cars_rented * probability expected_cars_rented += cars_rented_x_probability ending_cars_distribution[outcome.ending_cars] += probability cars_rented_x_probability_by_ending_cars[outcome.ending_cars] += cars_rented_x_probability ending_cars_distribution.enable() expected_cars_rented_by_ending_cars: DictZero[int, float] = DictZero() for ending_cars, ending_cars_probability in ending_cars_distribution.items(): cars_rented_x_probability = cars_rented_x_probability_by_ending_cars[ending_cars] # E[r|s,a,s'] = Sum_over_r( p(r,s'|s,a).r ) / p(s'|s,a) conditional_expected_cars_rented = cars_rented_x_probability / ending_cars_probability expected_cars_rented_by_ending_cars[ending_cars] = conditional_expected_cars_rented self.expected_cars_rented[starting_cars] = expected_cars_rented self.ending_cars_distribution[starting_cars] = ending_cars_distribution self.cars_rented_x_probability_by_ending_cars[starting_cars] = cars_rented_x_probability_by_ending_cars self.expected_cars_rented_by_ending_cars[starting_cars] = expected_cars_rented_by_ending_cars def get_outcome_distribution(self, starting_cars: int) -> Multinoulli[LocationOutcome]: return self.outcome_distributions[starting_cars] def get_ending_cars_distribution(self, starting_cars: int) -> Multinoulli[int]: return self.ending_cars_distribution[starting_cars] def get_transition_probability(self, starting_cars: int, ending_cars: int) -> float: return self.ending_cars_distribution[starting_cars][ending_cars] def get_expected_cars_rented_given_ending_cars(self, starting_cars: int, ending_cars: int) -> float: return self.expected_cars_rented_by_ending_cars[starting_cars][ending_cars] def draw_outcome(self, starting_cars: int) -> LocationOutcome: return self.outcome_distributions[starting_cars].draw_one() # outcome_distribution = self.outcome_distributions[starting_cars] # outcome: LocationOutcome = random.choices( # population=list(outcome_distribution.keys()), # weights=list(outcome_distribution.values()) # )[0] # return outcome # car_demand: int = random.choices(population=self._car_count, weights=self._demand_prob)[0] # cars_rented = self._get_cars_rented(starting_cars, car_demand) # cars_returned = random.choices(population=self._car_count, weights=self._return_prob)[0] # ending_cars = self._get_ending_cars(starting_cars, cars_rented, cars_returned) # return LocationOutcome(ending_cars, cars_rented) def parking_costs(self, end_cars: int) -> float: if end_cars > 10: return self._excess_parking_cost else: return 0.0
from __future__ import annotations # from typing import TYPE_CHECKING from scipy import stats from mdp.common import Multinoulli # _car_count: list[int] = [] # _demand_distribution: Distribution[int] = Distribution() _max_cars = 5 def _poisson(lambda_: float, n: int) -> float: return stats.poisson.pmf(k=n, mu=lambda_) _car_count = [c for c in range(_max_cars + 1)] print(_car_count) _demand_distribution = Multinoulli( {c: _poisson(4.0, c) for c in range(_max_cars + 1)}) print(_demand_distribution) _demand_distribution[_max_cars] += 1.0 - sum( _demand_distribution.dict.values()) print(_demand_distribution) _demand_distribution.enable()