def policy_observation(self,policy): #policy observation returns policy feature sum """ Helicopter alternative evaluation function for IRL. """ observation, sum_error = self.heli.reset() featureSum = self.features_from_state(observation) while not self.heli.terminal: action = policy.propagate(observation,1) observation, err = self.heli.update(action) # Maybe I should return q here as well. so that we have the same info to start with. state,q,a = extract_sa(observation) # Function from evaluator state = state+q feat = self.features_from_state(state[:-1]) featureSum=[sum(x) for x in zip(*[featureSum,feat])] return featureSum
def hoverIRL(policy,thet): """ Helicopter alternative evaluation function. """ state, sum_error = heli.reset() while not heli.terminal: action = policy.propagate(state, 1) state, error = heli.update(action) # Maybe I should return q here as well. so that we have the same info to start with. st,q,a = extract_sa(state) st = st+q feat = featuresFromState(st[:-1]) errorTest = numpy.dot(theta,feat) if errorTest==error: sum_error += errorTest else: sum_error+=error print"-----------" print errorTest print error print "-----------" return 1 / math.log(sum_error)
def hoverIRL(policy, thet): """ Helicopter alternative evaluation function. """ state, sum_error = heli.reset() while not heli.terminal: action = policy.propagate(state, 1) state, error = heli.update( action ) # Maybe I should return q here as well. so that we have the same info to start with. st, q, a = extract_sa(state) st = st + q feat = featuresFromState(st[:-1]) errorTest = numpy.dot(theta, feat) if errorTest == error: sum_error += errorTest else: sum_error += error print "-----------" print errorTest print error print "-----------" return 1 / math.log(sum_error)