def __init__(self, actions, mapname, colormap, name="PlaceCellEnvironment", imgsize=(1.0, 1.0), dx=0.01, placedev=0.1, num_places=None): """Initialize environment variables. :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param mapname: name of file describing environment map :param colormap: dict mapping pixel colours to labels :param name: name for environment :param imgsize: width of space represented by the map image :param dx: distance agent moves each timestep :param placedev: standard deviation of gaussian place cell activations :param num_places: number of placecells to use (if None it will attempt to fill the space) """ EnvironmentTemplate.__init__(self, name, 2, actions) # parameters self.colormap = colormap self.rewardamount = 0 # number of timesteps spent in reward # number of timesteps to spend in reward before agent is reset # note: convenient to express this as time_in_reward / dt self.rewardresetamount = 0.6 / 0.001 self.num_actions = len(actions) self.imgsize = [float(x) for x in imgsize] self.dx = dx self.placedev = placedev self.num_places = num_places self.optimal_move = None self.defaultreward = -0.075 # load environment self.map = ImageIO.read(File(HRLutils.datafile(mapname))) # generate place cells self.gen_placecells(min_spread=1.0 * placedev) # initial conditions self.state = self.random_location(avoid=["wall", "target"]) self.place_activations = [0 for _ in self.placecells] self.create_origin("place", lambda: self.place_activations) # note: making the value small, so that the noise node will give us # some random exploration as well self.create_origin( "optimal_move", lambda: [0.1 if self.optimal_move == a[0] else 0.0 for a in self.actions])
def __init__(self, stateD, actions, filename, name="BoxWorld", delay=0.1, cellwidth=1.0, dx=0.01, cartesian=False): """Initializes environment variables. :param stateD: dimension of state :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param filename: name of file containing map description :param name: name for environment :param delay: time to wait between action updates :param cellwidth: physical distance represented by each character in map file :param dx: distance agent moves in one timestep :param cartesian: if True, represent the agent's location in x,y cartesian space (0,0 in centre) if False, agent's location is in matrix space (0,0 in top left) """ EnvironmentTemplate.__init__(self, name, stateD, actions) self.wallboxes = [] self.targetboxes = [] self.mudboxes = [] self.worldbox = None self.delay = delay self.update_time = 0.5 # the time to perform the next action update self.learntime = [-1, -1] self.resettime = [0.05, 0.1] # reset right at the beginning to set things up self.rewardresettime = 0.6 #time agent will spend in reward location before being reset self.num_actions = len(actions) self.chosen_action = None self.cellwidth = cellwidth self.dx = dx f = open(filename) data = [line[:-1] if line.endswith("\n") else line for line in f] f.close() if cartesian: # modify all the coordinates so that they lie in the standard cartesian space # rather than the matrix row/column numbering system self.yoffset = (len(data) / 2) * self.cellwidth self.yscale = -1 self.xoffset = (len(data[0]) / 2) * self.cellwidth else: self.yoffset = 0 self.yscale = 1 self.xoffset = 0 self.load_boxes(data) self.i_state = self.random_location() # internal state is the location in terms of the internal coordinate system (i.e. 0,0 in the top left) self.state = self.transform_point(self.i_state) # this is the state in terms of the agent's coordinate system (usually cartesian where 0,0 is the middle) self.create_origin("learn", lambda: [1.0 if self.t > self.learntime[0] and self.t < self.learntime[1] else 0.0]) self.create_origin("reset", lambda: [1.0 if self.t > self.resettime[0] and self.t < self.resettime[1] else 0.0])
def __init__(self, actions, mapname, colormap, name="PlaceCellEnvironment", imgsize=(1.0, 1.0), dx=0.01, placedev=0.1, num_places=None): """Initialize environment variables. :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param mapname: name of file describing environment map :param colormap: dict mapping pixel colours to labels :param name: name for environment :param imgsize: width of space represented by the map image :param dx: distance agent moves each timestep :param placedev: standard deviation of gaussian place cell activations :param num_places: number of placecells to use (if None it will attempt to fill the space) """ EnvironmentTemplate.__init__(self, name, 2, actions) # parameters self.colormap = colormap self.rewardamount = 0 # number of timesteps spent in reward # number of timesteps to spend in reward before agent is reset # note: convenient to express this as time_in_reward / dt self.rewardresetamount = 0.6 / 0.001 self.num_actions = len(actions) self.imgsize = [float(x) for x in imgsize] self.dx = dx self.placedev = placedev self.num_places = num_places self.optimal_move = None self.defaultreward = -0.075 # load environment self.map = ImageIO.read(File(HRLutils.datafile(mapname))) # generate place cells self.gen_placecells(min_spread=1.0 * placedev) # initial conditions self.state = self.random_location(avoid=["wall", "target"]) self.place_activations = [0 for _ in self.placecells] self.create_origin("place", lambda: self.place_activations) # note: making the value small, so that the noise node will give us # some random exploration as well self.create_origin("optimal_move", lambda: [0.1 if self.optimal_move == a[0] else 0.0 for a in self.actions])
def __init__(self, flat=False): """Set up task parameters. :param flat: if True, no hierarchical relationship between stimuli and reward; if False, stimuli-response rewards will be dependent on colour """ self.rewardval = 1.5 # actions correspond to three different button presses actions = [("left", [1, 0, 0]), ("middle", [0, 1, 0]), ("right", [0, 0, 1])] # number of instances of each attribute (stimuli formed through # different combinations of attribute instances) self.num_orientations = 3 self.num_shapes = 3 self.num_colours = 2 self.presentationtime = 0.5 # length of time to present each stimuli self.rewardtime = 0.1 # length of reward period # next presentation interval self.presentationperiod = [0, self.presentationtime] # next reward interval self.rewardperiod = [self.presentationtime, self.presentationtime + self.rewardtime] self.answer = random.choice(actions)[0] # answer selected by agent self.stateD = (self.num_orientations + self.num_shapes + self.num_colours) self.correct = [0] * 20 EnvironmentTemplate.__init__(self, "BadreEnvironment", self.stateD, actions) self.answers = self.gen_answers(flat) self.create_origin("optimal_move", lambda: [a[1] for a in actions if a[0] == self.answer][0]) self.create_origin("score", lambda: [sum(self.correct) / len(self.correct)])
def __init__(self, actions, playernum=0): EnvironmentTemplate.__init__(self, "PongEnvironment", 2, actions) self.state_radius = 0.9 self.max_y = 480 self.playernum = playernum self.place_dev = 0.2 self.rewardscale = 5 self.optimal_move = 0 self.stats = [0]*4 self.mapping = {"up":-1, "stay":0, "down":1} self.placecells = self.gen_placecells(min_spread=self.place_dev * 0.5) self.place_activations = [0 for _ in self.placecells] self.reader = None self.error = None self.create_origin("place", lambda: self.place_activations) self.create_origin("stats", lambda: self.stats)