Пример #1
0
	def agent_init(self,taskSpec):

		self.episode = 0
		self.steps   = 1
		

		print "TaskSpec:"
		print  taskSpec
		print
		self.action = Action()
		self.action_types = []
		self.action.intArray = []
		self.action.doubleArray = []
		self.action.numInts  = 1        
		
		rnd.seed(0)
		TaskSpec = TaskSpecParser(taskSpec)
		
		self.input_ranges = TaskSpec.getDoubleObservations()        
		print 'observations ranges',self.input_ranges




		#parse action
		self.action_ranges = TaskSpec.getIntActions()[0]
		print "action ranges",self.action_ranges
		
		
		self.actionlist    = range(self.action_ranges[0],self.action_ranges[1]+1)
		self.nactions      = len(self.actionlist)
		print "action list",self.actionlist

		#build a nearest neighbor function approximator
		self.Q = NNQ(nactions=self.nactions,input_ranges=self.input_ranges,nelemns=[7,7,3,3],alpha=0.5,lm=0.8)        
		
		self.SelectAction = e_greedy_selection(epsilon=0.1) #set eplison for e-greedy exploration
		self.SelectAction.parent = self

		# discount factor
		self.gamma   = 1.0#TaskSpec.getDiscountFactor()

		
		self.proving+= 1
Пример #2
0
    def agent_init(self, taskSpec):

        self.episode = 0
        self.steps = 1

        print "TaskSpec:"
        print taskSpec
        print
        self.action = Action()
        self.action_types = []
        self.action.intArray = []
        self.action.doubleArray = []
        self.action.numInts = 1

        rnd.seed(0)
        TaskSpec = TaskSpecParser(taskSpec)

        self.input_ranges = TaskSpec.getDoubleObservations()
        print 'observations ranges', self.input_ranges

        #parse action
        self.action_ranges = TaskSpec.getIntActions()[0]
        print "action ranges", self.action_ranges

        self.actionlist = range(self.action_ranges[0],
                                self.action_ranges[1] + 1)
        self.nactions = len(self.actionlist)
        print "action list", self.actionlist

        #build a nearest neighbor function approximator
        self.Q = NNQ(nactions=self.nactions,
                     input_ranges=self.input_ranges,
                     nelemns=[7, 7, 3, 3],
                     alpha=0.5,
                     lm=0.8)

        self.SelectAction = e_greedy_selection(
            epsilon=0.1)  #set eplison for e-greedy exploration
        self.SelectAction.parent = self

        # discount factor
        self.gamma = 1.0  #TaskSpec.getDiscountFactor()

        self.proving += 1