Пример #1
0
    def reward(prev_state, new_state):
        # We will primarily calculate initial reward
        # based on distance between leader and the flying bird

        # Calculate the previous distance
        old_learner_loc = prev_state[0]
        old_leader_loc = prev_state[1]
        distance_old = distance(old_learner_loc, old_leader_loc)

        # Calculate new distance
        new_learner_loc = newState[0]
        new_leader_loc = newState[1]
        distance_new = distance(new_learner_loc, new_leader_loc)
        #distance_new = distance(new_learner_loc, old_leader_loc)

        reward = 0

        if distance_new < crashdistance:
            #reward = - 600*(1/distance_new)
            reward = -600
            #print reward
        elif distance_old > distance_new:
            #reward = distance_new / float(8)
            reward = 10
            # reward += (1/distance_new)
        elif distance_old < distance_new:
            #reward = - distance_new / float(2)
            reward = -5
        return reward
Пример #2
0
    def isfollowing(state, newState):
        # Define following as being within 20-30 units
        # See if it took a step toward the leader correctly
        oldDist = distance(state[0], state[1])
        newDist = distance(newState[0], newState[1])
        follow = 0
        if oldDist >= newDist:
            follow = 1

        # Check to see alignemnt
        angleBoid = (newState[0][2] + 90) % 360
        if angleBoid > 180:
            angleBoid = 180 - (angleBoid % 180)
        angleLeader = newState[1][2]
        if angleLeader > 180:
            angleLeader = 180 - (angleLeader % 180)

        angleDif = math.fabs(angleBoid - angleLeader)

        # Check we hit the leader
        crash = 0
        if distance(newState[0], newState[1]) < crashdistance - 2:
            crash = 1

        # Check if maintained a good follow distance (i.e. stayed within 7 units of leader)
        stay_follow = 0
        if distance(newState[0], newState[1]) > crashdistance - 2 and distance(
                newState[0], newState[1]) <= crashdistance + 10:
            stay_follow = 1

        return (follow, crash, stay_follow, angleDif)
Пример #3
0
    def reward(prev_state, new_state):
        # We will primarily calculate initial reward 
        # based on distance between leader and the flying bird

        # Calculate the previous distance
        old_learner_loc = prev_state[0]
        old_leader_loc = prev_state[1]
        distance_old = distance(old_learner_loc, old_leader_loc)

        # Calculate new distance 
        new_learner_loc = newState[0]
        new_leader_loc = newState[1]
        distance_new = distance(new_learner_loc, new_leader_loc)

        reward = 0
        # Base reward on how the distance changes

        if distance_new < crashdistance:
            reward = - 2*(1/distance_new)
        elif distance_old > distance_new:
            reward = distance_new / float(8)
            # reward += (1/distance_new)
        elif distance_old < distance_new:
            reward = - distance_new / float(2)
            # reward -= (1/distance_new)



        if not allGood(new_learner_loc[0], new_learner_loc[1]):
            return -1000

        return reward
Пример #4
0
    def reward(prev_state, new_state):
        # Unpac the states
        boid, leader, birds, velocity = prev_state
        update_boid, update_leader, update_birds, velocity = new_state

        # Calculate the distance to the leader before step
        dist_leader = distanceObj(boid, leader)

        # Get distance to the centroid of flock
        dist = dist_leader
        avg_x = leader.x
        avg_y = leader.y
        for bird in birds:
            avg_x += bird.x
            avg_y += bird.y
        centroid = (avg_x / float(len(birds) + 1),
                    avg_y / float(len(birds) + 1))
        # Distance to centroid before action
        dist_center = distance((boid.x, boid.y), centroid)

        # Updated distance to leader
        updated_distance = distanceObj(update_boid, update_leader)

        # Calculate birds that are too close
        # And re-calc centroid
        number_too_close = 0
        close_birds = []
        avg_x = update_leader.x
        avg_y = update_leader.y
        if updated_distance < 30:
            close_birds.append(updated_distance)
        bird_distances = []
        for bird in update_birds:
            new_dist = distanceObj(update_boid, bird)
            # See if we are too close
            if new_dist < 30:
                number_too_close += 1
                close_birds.append(new_dist)

            avg_x += bird.x
            avg_y += bird.y

        updated_centroid = (avg_x / float(len(birds) + 1),
                            avg_y / float(len(birds) + 1))

        #Start calculating reward
        # Give bad negative reward if we are too close!
        reward = 0
        if len(close_birds) > 0:
            #reward = -200 / float(close_birds[0])
            reward = -20
            # Two CLOSE birds!!!
            if len(close_birds) > 1:
                #reward -= 200 / float(close_birds[1])
                reward *= 2
            return reward

        # Move toward the centroid
        updated_dist_center = distance((update_boid.x, update_boid.y),
                                       updated_centroid)
        #updated_dist_center = distance((update_boid.x, update_boid.y), centroid)
        #if dist_leader < 50:
        if updated_dist_center <= dist_center:
            #reward += 1 + math.fabs(updated_dist_center - dist_center)
            reward += 2
        else:
            reward += 0

        # Move toward the leader with more importance than the center
        updated_distance = distanceObj(update_boid, update_leader)
        #updated_distance = distanceObj(update_boid, leader)
        #if dist_leader >= 50:
        if dist_leader >= updated_distance:
            #reward += 5 + 3 * math.fabs(updated_distance - dist_leader)
            #reward += 10
            #reward += 3 + math.fabs(updated_distance - dist_leader)
            reward += 5
            #reward += 100
        else:
            #reward -= 2 - 2 * math.fabs(updated_distance - dist_leader)
            #reward -= 1 + math.fabs(updated_distance - dist_leader)
            reward -= 1
            #reward -= 5

        return reward
Пример #5
0
 def isfollowing(state):
     # Define following as being within 20-30 units
     #if not(distance(state[0], state[1]) > 20 and distance(state[0], state[1]) <= 35):
     #print distance(state[0], state[1])
     return distance(state[0], state[1]) > crashdistance - 5 and distance(
         state[0], state[1]) <= crashdistance + 15