def __init__(self, initial_wealth=25.0, edge_prior_alpha=7, edge_prior_beta=3, max_wealth_alpha=5.0, max_wealth_m=200.0, max_rounds_mean=300.0, max_rounds_sd=25.0, reseed=True): # store the hyper-parameters for passing back into __init__() during resets so # the same hyper-parameters govern the next game's parameters, as the user # expects: # TODO: this is boilerplate, is there any more elegant way to do this? self.initial_wealth = float(initial_wealth) self.edge_prior_alpha = edge_prior_alpha self.edge_prior_beta = edge_prior_beta self.max_wealth_alpha = max_wealth_alpha self.max_wealth_m = max_wealth_m self.max_rounds_mean = max_rounds_mean self.max_rounds_sd = max_rounds_sd if reseed or not hasattr(self, 'np_random'): self.seed() # draw this game's set of parameters: edge = self.np_random.beta(edge_prior_alpha, edge_prior_beta) max_wealth = round( genpareto.rvs(max_wealth_alpha, max_wealth_m, random_state=self.np_random)) max_rounds = int( round(self.np_random.normal(max_rounds_mean, max_rounds_sd))) # add an additional global variable which is the sufficient statistic for the # Pareto distribution on wealth cap; alpha doesn't update, but x_m does, and # simply is the highest wealth count we've seen to date: self.max_ever_wealth = float(self.initial_wealth) # for the coinflip edge, it is total wins/losses: self.wins = 0 self.losses = 0 # for the number of rounds, we need to remember how many rounds we've played: self.rounds_elapsed = 0 # the rest proceeds as before: self.action_space = spaces.Discrete(int(max_wealth * 100)) self.observation_space = spaces.Tuple(( spaces.Box(0, max_wealth, shape=[1], dtype=np.float32), # current wealth spaces.Discrete(max_rounds + 1), # rounds elapsed spaces.Discrete(max_rounds + 1), # wins spaces.Discrete(max_rounds + 1), # losses spaces.Box(0, max_wealth, [1], dtype=np.float32))) # maximum observed wealth self.reward_range = (0, max_wealth) self.edge = edge self.wealth = self.initial_wealth self.max_rounds = max_rounds self.rounds = self.max_rounds self.max_wealth = max_wealth
def __init__(self, n=5, slip=0.2, small=2, large=10): self.n = n self.slip = slip # probability of 'slipping' an action self.small = small # payout for 'backwards' action self.large = large # payout at end of chain for 'forwards' action self.state = 0 # Start at beginning of the chain self.action_space = spaces.Discrete(2) self.observation_space = spaces.Discrete(self.n) self.seed()
def __init__(self, nS, nA, P, isd): self.P = P self.isd = isd self.lastaction = None # for rendering self.nS = nS self.nA = nA self.action_space = spaces.Discrete(self.nA) self.observation_space = spaces.Discrete(self.nS) self.seed() self.reset()
def __init__(self, natural=False): self.action_space = spaces.Discrete(2) self.observation_space = spaces.Tuple(( spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))) self.seed() # Flag to payout 1.5 on a "natural" blackjack win, like casino rules # Ref: http://www.bicyclecards.com/how-to-play/blackjack/ self.natural = natural # Start the first game self.reset()
def __init__(self): EzPickle.__init__(self) self.seed() self.viewer = None self.world = Box2D.b2World() self.moon = None self.lander = None self.particles = [] self.prev_reward = None # useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8, ), dtype=np.float32) if self.continuous: # Action is two floats [main engine, left-right engines]. # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power. # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off self.action_space = spaces.Box(-1, +1, (2, ), dtype=np.float32) else: # Nop, fire left engine, main engine, right engine self.action_space = spaces.Discrete(4) self.reset()
def __init__(self): self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 self.total_mass = (self.masspole + self.masscart) self.length = 0.5 # actually half the pole's length self.polemass_length = (self.masspole * self.length) self.force_mag = 10.0 self.tau = 0.02 # seconds between state updates self.kinematics_integrator = 'euler' # Angle at which to fail the episode self.theta_threshold_radians = 12 * 2 * math.pi / 360 self.x_threshold = 2.4 # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds high = np.array([ self.x_threshold * 2, np.finfo(np.float32).max, self.theta_threshold_radians * 2, np.finfo(np.float32).max]) self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box(-high, high, dtype=np.float32) self.seed() self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): self.viewer = None high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) low = -high self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) self.action_space = spaces.Discrete(3) self.state = None self.seed()
def __init__(self): self.seed() self.viewer = None self.observation_space = spaces.Box(0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8) self.action_space = spaces.Discrete(3) self.reset()
def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0., full_action_space=False): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__(self, game, obs_type, frameskip, repeat_action_probability) assert obs_type in ('ram', 'image') self.game_path = atari_py.get_game_path(game) if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist' % (game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym_wmgds/issues/349 assert isinstance( repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format( repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self.seed() self._action_set = (self.ale.getLegalActionSet() if full_action_space else self.ale.getMinimalActionSet()) self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128, )) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8) else: raise error.Error('Unrecognized observation type: {}'.format( self._obs_type))
def __init__(self, initial_wealth=25.0, edge=0.6, max_wealth=250.0, max_rounds=300): self.action_space = spaces.Discrete(int(max_wealth * 100)) # betting in penny # increments self.observation_space = spaces.Tuple(( spaces.Box(0, max_wealth, [1], dtype=np.float32), # (w,b) spaces.Discrete(max_rounds + 1))) self.reward_range = (0, max_wealth) self.edge = edge self.wealth = initial_wealth self.initial_wealth = initial_wealth self.max_rounds = max_rounds self.max_wealth = max_wealth self.np_random = None self.rounds = None self.seed() self.reset()
def __init__(self): self.seed() self.viewer = None self.observation_space = spaces.Box(0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8) self.action_space = spaces.Discrete(10) self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8) for digit in range(10): for y in range(6): self.bogus_mnist[digit, y, :] = [ ord(char) for char in bogus_mnist[digit][y] ] self.reset()
def __init__(self): self.range = 1000 # Randomly selected number is within +/- this value self.bounds = 10000 self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), dtype=np.float32) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self.seed() self.reset()
def __init__(self): self.range = 1000 # +/- value the randomly select number can be between self.bounds = 2000 # Action space bounds self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), dtype=np.float32) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self.seed() self.reset()
def __init__(self): self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.5 self.low = np.array([self.min_position, -self.max_speed]) self.high = np.array([self.max_position, self.max_speed]) self.viewer = None self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32) self.seed() self.reset()
def __init__(self, spots=37): self.n = spots + 1 self.action_space = spaces.Discrete(self.n) self.observation_space = spaces.Discrete(1) self.seed()