Exemplo n.º 1
0
class ChemotaxisEnv(Environment):
	"""
	Experiment adapted from "Evolving Dynamical Neural Networks for Adaptive Behavior" (Beer & Gallagher, 1992)
	
	An seeker is enclosed in a square box with a food item inside.
	This food item emits a chemical signal whose intensity falls off
	as the inverse square of the distance from the food.
	The intensity of the chemical signal within the environment varies
	five orders of magnitude from the item to the far corners of the box, i.e. at a distance of root(2).
	We wish the seeker to find and remain in the vicinity of the food item,
	starting from arbitrary locations and orientations within the environment.

	To accomplish this task, the seeker is endowed with a circular body with a diameter of .01.
	The seeker possesses chemical sensors that can directly sense
	the intensity of the chemical signal at their location.
	These sensors are symmetrically placed about the center line of the body.
	In addition, the seeker has two effectors located on opposite sides of its body.
	These effectors can apply forces that move the body forward and rotate it.
	In the simplified physics of this environment,
	the velocity of movement is proportional to the force applied.

	State space (continuous):
		food		location of food item as a coordinate pair from (0,0) to (100,100)
		seeker.loc	location of the center of the seeker
		seeker.dir	angle of the seeker in radians from the positive x-axis, i.e. east=0
	Action space (continuous):
		l			output signal of the left effector neuron
		r			output signal of the right effector neuron
	"""
	
	def __init__(self):
		# pygame initialization

		self.screen = pygame.display.set_mode((SCREEN_SIZE, SCREEN_SIZE))
		pygame.mouse.set_visible(0)

		self.background = pygame.Surface(self.screen.get_size())
		self.background = self.background.convert()
		self.background.fill((255, 255, 255))
		
		self.food = Food()
		self.food_sprite = pygame.sprite.RenderPlain(self.food) # create sprite group for the food

		self.seeker = Seeker()
		self.seeker_sprite = pygame.sprite.RenderPlain(self.seeker) # create sprite group for the seeker

		self._draw()

		# pybrain initialization
		self.action = [0.0, 0.0]
		
		self.reset()

	def _draw(self):
		#self.seeker_sprite.update(self.background)
		self.screen.blit(self.background, (0, 0))
		self.food.update()
		self.food_sprite.draw(self.screen)
		self.seeker.update(self.screen) # we don't need to draw this here because we draw it with pymunk
		pygame.display.flip()

	def _calcDistance(self, loc1, loc2):
		""" Calculates the Euclidean distance between two coordinate pairs. """
		from math import sqrt
		return sqrt((loc2[0] - loc1[0]) ** 2 + (loc2[1] - loc1[1]) ** 2)

	def calcSignal(self, loc):
		""" Calculates the chemical signal at a specific location, which is
		the inverse square of the distance between the given location and the food. """

		dist = self._calcDistance(self.food.loc, loc)
		if dist == 0:
			return 1
		else:
			return 1/dist # why does changing the reward magnitude change the sensor-delta magnitude?

	def getSensors(self):
		""" the currently visible state of the world (the observation may be
			stochastic - repeated calls returning different values)

			:rtype: by default, this is assumed to be a numpy array of doubles
		"""
		# get sensor locations
		lx, ly, rx, ry = self.seeker.calcAbsoluteSensorPositions()

		# return the strength of the chemical signals at the seeker's left and right sensors
		return [ self.calcSignal(toPygame((lx, ly))), self.calcSignal(toPygame((rx, ry))) ]

	def performAction(self, action):
		""" perform an action on the world that changes its internal state (maybe
			stochastically).
			:key action: an action that should be executed in the Environment.
			:type action: by default, this is assumed to be a numpy array of doubles
			
			action[0] is the left motor/effector neuron output, action[1] is the right
		"""

		self.seeker.move_body(action[0], action[1])

		self.movement_tracker.append(toPygame(self.seeker.body.position))

		# redraw
		self._draw()

	def reset(self):
		""" Reinitializes the environment with the food in a random location
			and the seeker with a random direction in a random location.
		"""
		from random import random
		self.movement_tracker = []
		self.food.setLocation((random()*SCREEN_SIZE, random()*SCREEN_SIZE))
		self.seeker.reset()