Пример #1
0
 def policy_observation(self,policy): #policy observation returns policy feature sum
   """ Helicopter alternative evaluation function for IRL. """
   observation, sum_error = self.heli.reset()
   featureSum = self.features_from_state(observation)
   while not self.heli.terminal:
     action = policy.propagate(observation,1)
     observation, err = self.heli.update(action) # Maybe I should return q here as well. so that we have the same info to start with.
     state,q,a = extract_sa(observation) # Function from evaluator
     state = state+q
     feat = self.features_from_state(state[:-1])
     featureSum=[sum(x) for x in zip(*[featureSum,feat])]
   return featureSum
Пример #2
0
 def hoverIRL(policy,thet):
   """ Helicopter alternative evaluation function. """
   state, sum_error = heli.reset()
   while not heli.terminal:
     action = policy.propagate(state, 1)
     state, error = heli.update(action) # Maybe I should return q here as well. so that we have the same info to start with.
     st,q,a = extract_sa(state)
     st = st+q
     feat = featuresFromState(st[:-1])
     errorTest = numpy.dot(theta,feat)
     if errorTest==error:
       sum_error += errorTest
     else:
       sum_error+=error
     print"-----------"
     print errorTest
     print error
     print "-----------"
   return 1 / math.log(sum_error)
Пример #3
0
 def hoverIRL(policy, thet):
     """ Helicopter alternative evaluation function. """
     state, sum_error = heli.reset()
     while not heli.terminal:
         action = policy.propagate(state, 1)
         state, error = heli.update(
             action
         )  # Maybe I should return q here as well. so that we have the same info to start with.
         st, q, a = extract_sa(state)
         st = st + q
         feat = featuresFromState(st[:-1])
         errorTest = numpy.dot(theta, feat)
         if errorTest == error:
             sum_error += errorTest
         else:
             sum_error += error
         print "-----------"
         print errorTest
         print error
         print "-----------"
     return 1 / math.log(sum_error)