def abf(direction, x_, y_, s, level): #abstraction is a function of %known atm score = 0.0 #2* w = int(2 * math.pow(3, level) + 1) #l=int(math.pow(3,level+1)) l = int(math.pow(3, level + 1) + 1) size = w * l if direction == (1, 0): for x in range(x_, x_ + l + 1): for y in range(y_ - w / 2, y_ + w / 2 + 1): if s.check_boundaries(Location(x, y)) == True: if bool(s.seen[x][y]) == False: score += 1. elif direction == (-1, 0): #print x_,y_, "START" for x in range(x_, x_ - l - 1, -1): for y in range(y_ - w / 2, y_ + w / 2 + 1): #print x,y, bool(s.seen[x][y]), s.color[x][y] if s.check_boundaries(Location(x, y)) == True: if bool(s.seen[x][y]) == False: score += 1. #print_seen(s) elif direction == (0, 1): for y in range(y_, y_ + l + 1): for x in range(x_ - w / 2, x_ + w / 2 + 1): if s.check_boundaries(Location(x, y)) == True: if bool(s.seen[x][y]) == False: score += 1. elif direction == (0, -1): #print direction for y in range(y_, y_ - l - 1, -1): for x in range(x_ - w / 2, x_ + w / 2 + 1): if s.check_boundaries(Location(x, y)) == True: if bool(s.seen[x][y]) == False: score += 1. #print score return int(score / float(size) * 10)
def execute(self, action_, environment_data_): (x, y) = self.get_transition(action_, self.x, self.y) if environment_data_.check_boundaries(Location(x, y)) is True: (self.x, self.y) = (x, y) self.recalculate_measurement_space() self.measure(environment_data_, False) #self.history = self.abstractions.abf(environment_data_) environment_data_.update_agent_location((self.x, self.y))
def execute(self, action_, environment_data_): if environment_data_.check_boundaries( Location(self.x + get_transition_x(int(action_[1])), self.y + get_transition_y(int(action_[1])))) is True: self.x += get_transition_x(int(action_[1])) self.y += get_transition_y(int(action_[1])) self.recalculate_measurement_space() self.measure(environment_data_, False) #self.history = self.abstractions.abf(environment_data_) environment_data_.update_agent_location((self.x, self.y))
def simulate(self, action_, s): base_reward = s.get_reward() (x, y) = self.get_transition(action_, self.x, self.y) if s.check_boundaries(Location(x, y)) is True: (self.x, self.y) = (x, y) s.update_agent_location((self.x, self.y)) self.recalculate_measurement_space() self.measure(s, True) return (s, s.get_reward() - base_reward)
def simulate(self, action_, s): base_reward = s.get_reward() if s.check_boundaries( Location(self.x + get_transition_x(int(action_[1])), self.y + get_transition_y(int(action_[1])))) is True: self.x += get_transition_x(int(action_[1])) self.y += get_transition_y(int(action_[1])) s.update_agent_location((self.x, self.y)) self.recalculate_measurement_space() self.measure(s, True) #print s.get_reward()-base_reward return (s, s.get_reward() - base_reward)
def execute(self,action_,environment_data_): (x,y) = self.get_transition(action_,self.x,self.y) if environment_data_.check_boundaries(Location(x,y)) is True: (self.x,self.y) = (x,y) self.recalculate_measurement_space() self.measure(environment_data_,False) if (self.x,self.y) == environment_data_.middle: print "charging" self.battery+=5 if self.battery >100: self.battery=100 environment_data_.update_agent_location((self.x,self.y))
def simulate_full(self,policy,s): base_reward= s.get_reward() base_reward2= s.get_reward2() for i in range(policy.num_steps): (x,y) = self.get_transition(policy.get_next_action(self.x,self.y,s)[0],self.x,self.y) if s.check_boundaries(Location(x,y)) is True: (self.x,self.y) = (x,y) self.recalculate_measurement_space() self.measure(s,True) if (self.x,self.y) == s.middle: self.battery+=1 if self.battery >100: self.battery=100 return (s,s.get_reward()-base_reward, -s.get_reward2()+base_reward2)
def recalculate_measurement_space(self): self.measurement_space=[] for i in range(self.x-1, self.x+2 ): for j in range(self.y-1, self.y+2): self.measurement_space.append(Location(i,j))