Exemplo n.º 1
0
    def get_best_action(world):
        # Choose a random cluster
        next_location: classes.Location = decision.neighbour_filtering.filtering_neighbours(
            world.state,
            number_of_neighbours=1,
        )[0] if world.state.vehicle.battery_inventory > BATTERY_INVENTORY * 0.1 else world.state.depots[
            0]

        if world.state.is_at_depot():
            swappable_scooters_ids = []
            number_of_scooters_to_swap = 0
        else:
            # Find all scooters that can be swapped here
            swappable_scooters_ids = [
                scooter.id for scooter in
                world.state.current_location.get_swappable_scooters()
            ]

            # Calculate how many scooters that can be swapped
            number_of_scooters_to_swap = world.state.get_max_number_of_swaps(
                world.state.current_location)

        # Return an action with no re-balancing, only scooter swapping
        return classes.Action(
            battery_swaps=swappable_scooters_ids[:number_of_scooters_to_swap],
            pick_ups=[],
            delivery_scooters=[],
            next_location=next_location.id,
        )
Exemplo n.º 2
0
 def test_next_state_from_action(self):
     value_function = decision.value_functions.LinearValueFunction(
         *self.value_function_args
     )
     value_function.setup(self.world.state)
     # Record current state
     vehicle = self.world.state.vehicles[0]
     # Scooters is current cluster
     scooters = self.world.state.clusters[
         vehicle.current_location.id
     ].get_swappable_scooters()
     deliver_scooter = random.choice(
         [
             scooter
             for scooter in random.choice(
                 [
                     cluster
                     for cluster in self.world.state.clusters
                     if cluster.id != vehicle.current_location.id
                 ]
             ).scooters
         ]
     )
     vehicle.pick_up(deliver_scooter)
     # Action that does a bit of everything
     action = classes.Action(
         [scooter.id for scooter in scooters[:3]],
         [scooter.id for scooter in scooters[3:10]],
         [deliver_scooter.id],
         random.choice(
             [
                 cluster.id
                 for cluster in self.world.state.clusters
                 if cluster.id != vehicle.current_location.id
             ]
         ),
     )
     function_next_state_features = value_function.convert_next_state_features(
         self.world.state, vehicle, action
     )
     self.world.state.do_action(action, vehicle, self.world.time)
     next_state_features = value_function.convert_state_to_features(
         self.world.state, vehicle
     )
     self.assertEqual(len(function_next_state_features), len(next_state_features))
     for i, value in enumerate(function_next_state_features):
         self.assertAlmostEqual(
             function_next_state_features[i],
             next_state_features[i],
             msg=f"not equal at {i}",
         )
Exemplo n.º 3
0
    def get_best_action(self, world, vehicle):
        # Choose a random cluster

        if vehicle.is_at_depot():
            swappable_scooters_ids = []
            number_of_scooters_to_swap = 0
        else:
            # Find all scooters that can be swapped here
            swappable_scooters_ids = [
                scooter.id
                for scooter in vehicle.current_location.get_swappable_scooters(
                    battery_limit=70
                )
            ]

            # Calculate how many scooters that can be swapped
            number_of_scooters_to_swap = vehicle.get_max_number_of_swaps()

        if (
            vehicle.battery_inventory - number_of_scooters_to_swap
            < vehicle.battery_inventory_capacity * 0.1
        ) and not vehicle.is_at_depot():
            next_location = world.state.depots[0]
        else:
            next_location = sorted(
                [
                    cluster
                    for cluster in world.state.clusters
                    if cluster.id != vehicle.current_location.id
                    and cluster.id not in world.tabu_list
                ],
                key=lambda cluster: (
                    len(cluster.scooters) - cluster.get_current_state()
                )
                / (len(cluster.scooters) + 1),
                reverse=True,
            )[0]

        # Return an action with no re-balancing, only scooter swapping
        return classes.Action(
            battery_swaps=swappable_scooters_ids[:number_of_scooters_to_swap],
            pick_ups=[],
            delivery_scooters=[],
            next_location=next_location.id,
        )
Exemplo n.º 4
0
#root.mainline['11'] = c.Weapon()
#root.mainline['12'] = c.Gear()
#root.mainline['13'] = c.Food()
#root.mainline['14'] = c.Magic()
#root.mainline['15'] = c.Spell()
#root.mainline['16'] = c.Skill()
#root.mainline['17'] = c.Quest()
#root.mainline['18'] = c.Encounter()
#root.mainline['19'] = c.Location()
#root.mainline['20'] = c.Building()
#root.mainline['21'] = c.Lodging()

root.structure['1'] = c.Root()  # structural model
root.structure['2'] = c.Actor()
root.structure['3'] = c.Item()
root.structure['4'] = c.Action()
root.structure['5'] = c.Place()
root.structure['6'] = c.Player()
root.structure['7'] = c.NonPlayer()
root.structure['8'] = c.Monster()
root.structure['9'] = c.Animal()
root.structure['10'] = c.Armour()
root.structure['11'] = c.Weapon()
root.structure['12'] = c.Gear()
root.structure['13'] = c.Food()
root.structure['14'] = c.Magic()
root.structure['15'] = c.Spell()
root.structure['16'] = c.Skill()
root.structure['17'] = c.Quest()
root.structure['18'] = c.Encounter()
root.structure['19'] = c.Location()
Exemplo n.º 5
0
    def ann_learning(self, value_function):
        value_function.setup(self.world.state)
        self.world.LOST_TRIP_REWARD = -1

        # Creating a list of states with associated negative reward
        simulation_state = copy.deepcopy(self.world.state)
        vehicle = simulation_state.vehicles[0]
        system_simulated_states = []
        i = 0
        # simulating to provoke lost demand
        while len(system_simulated_states) < 10:
            _, _, lost_demand = system_simulation.scripts.system_simulate(
                simulation_state
            )
            # recording state and lost reward if there was lost demand after simulation
            if len(lost_demand) > 0:
                system_simulated_states.append(
                    (
                        value_function.get_state_features(
                            simulation_state,
                            vehicle,
                            i * globals.ITERATION_LENGTH_MINUTES,
                        ),
                        sum([lost_demand for lost_demand, _ in lost_demand])
                        * self.world.LOST_TRIP_REWARD,
                    )
                )

            i += 1

        # simulating doing actions that yields positive reward
        # (swap battery in clusters with available scooters less than ideal state)
        unsimulated_world = copy.deepcopy(self.world)
        accumulated_action_time = 0
        unsimulated_states = []
        # recording clusters with available scooters less than ideal state
        deficient_cluster = [
            cluster
            for cluster in unsimulated_world.state.clusters
            if len(cluster.get_available_scooters()) < cluster.ideal_state
        ]
        counter = 0
        vehicle = unsimulated_world.state.vehicles[0]
        # safety break if internal break doesn't apply
        while counter < len(deficient_cluster) and len(unsimulated_states) < 10:
            # swapping batteries on the n-th cluster in deficient cluster list
            cluster = deficient_cluster[counter]
            vehicle.battery_inventory = vehicle.battery_inventory_capacity
            vehicle.current_location = cluster
            # creating an action to swap all batteries and recording the state and reward
            action = classes.Action(
                [scooter.id for scooter in cluster.get_swappable_scooters()][
                    : vehicle.battery_inventory
                ],
                [],
                [],
                deficient_cluster[counter + 1].id,
            )
            reward = action.get_reward(
                vehicle,
                0,
                self.world.DEPOT_REWARD,
                self.world.VEHICLE_INVENTORY_STEP_SIZE,
                self.world.PICK_UP_REWARD,
            )
            unsimulated_states.append(
                (
                    value_function.get_state_features(
                        unsimulated_world.state, vehicle, accumulated_action_time
                    ),
                    reward,
                )
            )
            # calculating action distance and action time so it can be used when getting state features
            # (unnecessary, but have to use a time when creating state features)
            action_distance = unsimulated_world.state.get_distance(
                vehicle.current_location.id, action.next_location
            )
            accumulated_action_time += unsimulated_world.state.do_action(
                action, vehicle, accumulated_action_time
            ) + action.get_action_time(action_distance)

            counter += 1

        # training two times on the positive and negative rewarded states
        for _ in range(2):
            for i in range(len(system_simulated_states) - 1):
                state_features, reward = system_simulated_states[i]
                next_state_features = system_simulated_states[i + 1][0]
                update_value_function(
                    value_function, state_features, next_state_features, reward
                )

            for i in range(len(unsimulated_states) - 1):
                state_features, reward = unsimulated_states[i]
                next_state_features = unsimulated_states[i + 1][0]
                update_value_function(
                    value_function, state_features, next_state_features, reward
                )

        # check if the ann predicts higher value for the positively rewarded state then the negative one
        self.assertGreater(
            value_function.estimate_value_from_state_features(unsimulated_states[0][0]),
            value_function.estimate_value_from_state_features(
                system_simulated_states[0][0]
            ),
        )
Exemplo n.º 6
0
    def get_best_action(self, world, vehicle):
        # Find all possible actions
        actions = world.state.get_possible_actions(
            vehicle,
            divide=self.get_possible_actions_divide,
            exclude=world.tabu_list,
            time=world.time,
            number_of_neighbours=self.number_of_neighbors,
        )
        state = world.state
        cache = EpsilonGreedyValueFunctionPolicy.get_cache(state)
        # Get state representation of current state
        state_features = self.value_function.get_state_features(
            world.state, vehicle, cache
        )

        # Epsilon greedy choose an action based on value function
        if self.epsilon > random.rand():
            best_action = random.choice(actions)
        else:
            # Create list containing all actions and their rewards and values (action, reward, value_function_value)
            action_info = [
                (
                    classes.Action([], [], [], random.choice(world.state.locations).id),
                    -1000,
                    [],
                )  # No actions bug
            ]
            reward = 0
            for action in actions:
                # look one action ahead
                forward_state: classes.State = copy.deepcopy(state)
                forward_vehicle: classes.Vehicle = forward_state.get_vehicle_by_id(
                    vehicle.id
                )
                # perform action
                forward_state.do_action(action, forward_vehicle, world.time)
                # Simulate the system to generate potential lost trips
                _, _, lost_demands = system_simulation.scripts.system_simulate(
                    forward_state
                )
                # Record lost trip rewards
                reward = (
                    sum(map(lambda lost_trips: lost_trips[0], lost_demands))
                    if len(lost_demands) > 0
                    else 0
                )
                # Find all actions after taking the action moving the state to s_{t+1}
                next_action_actions = forward_state.get_possible_actions(
                    forward_vehicle,
                    divide=self.get_possible_actions_divide,
                    exclude=world.tabu_list + [action.next_location],
                    time=world.time
                    + action.get_action_time(
                        state.get_distance(
                            vehicle.current_location.id,
                            forward_vehicle.current_location.id,
                        )
                    ),
                    number_of_neighbours=self.number_of_neighbors,
                )
                cache = EpsilonGreedyValueFunctionPolicy.get_cache(forward_state)
                forward_action_info = []
                for next_state_action in next_action_actions:
                    # Generate the features for this new state after the action
                    next_state_features = self.value_function.get_next_state_features(
                        forward_state,
                        forward_vehicle,
                        next_state_action,
                        cache,
                    )
                    # Calculate the expected future reward of being in this new state
                    next_state_value = (
                        self.value_function.estimate_value_from_state_features(
                            next_state_features
                        )
                    )
                    # Add the transition to a list for later evaluation
                    forward_action_info.append(
                        (next_state_action, next_state_value, next_state_features)
                    )

                # find the greedy best next action
                best_next_action, next_state_value, next_state_features = max(
                    forward_action_info, key=lambda pair: pair[1]
                )
                # Add this transition for later evaluation
                action_info.append(
                    (
                        action,
                        next_state_value + reward * world.LOST_TRIP_REWARD,
                        next_state_features,
                    )
                )
            # Choose the action with the highest value and reward
            best_action, next_state_value, next_state_features = max(
                action_info, key=lambda pair: pair[1]
            )
            if not world.disable_training:
                if self.value_function.use_replay_buffer():
                    self.value_function.train(world.REPLAY_BUFFER_SIZE)
                else:
                    self.value_function.train(
                        (
                            state_features,
                            reward * world.LOST_TRIP_REWARD,
                            next_state_features,
                        )
                    )
        return best_action, state_features
Exemplo n.º 7
0
    def get_best_action(self, world, vehicle):
        vehicle_has_scooter_inventory = len(vehicle.scooter_inventory) > 0
        if vehicle.is_at_depot():
            scooters_to_deliver = []
            scooters_to_pickup = []
            number_of_scooters_to_pick_up = 0
            number_of_scooters_to_swap = 0
            scooters_to_swap = []
        else:
            # If vehicle has scooter inventory, deliver all scooters and swap all swappable scooters
            if vehicle_has_scooter_inventory:
                # Deliver all scooters in scooter inventory, and don't pick up any new scooters
                scooters_to_deliver = [
                    scooter.id for scooter in vehicle.scooter_inventory
                ]
                scooters_to_pickup = []
                number_of_scooters_to_pick_up = 0
                # Swap as many scooters as possible as this cluster most likely needs it
                swappable_scooters = vehicle.current_location.get_swappable_scooters()
                number_of_scooters_to_swap = min(
                    vehicle.battery_inventory, len(swappable_scooters)
                )
                scooters_to_swap = [scooter.id for scooter in swappable_scooters][
                    :number_of_scooters_to_swap
                ]
            else:
                # Pick up as many scooters as possible, the min(scooter capacity, deviation from ideal state)
                number_of_scooters_to_pick_up = max(
                    min(
                        vehicle.scooter_inventory_capacity
                        - len(vehicle.scooter_inventory),
                        vehicle.battery_inventory,
                        len(vehicle.current_location.scooters)
                        - vehicle.current_location.ideal_state,
                    ),
                    0,
                )
                scooters_to_pickup = [
                    scooter.id for scooter in vehicle.current_location.scooters
                ][:number_of_scooters_to_pick_up]
                # Do not swap any scooters in a cluster with a lot of scooters
                scooters_to_swap = []
                number_of_scooters_to_swap = 0
                # There are no scooters to deliver due to empty inventory
                scooters_to_deliver = []

        def get_next_location_id(is_finding_positive_deviation):
            return sorted(
                [
                    cluster
                    for cluster in world.state.clusters
                    if cluster.id != vehicle.current_location.id
                    and cluster.id not in world.tabu_list
                ],
                key=lambda cluster: len(cluster.get_available_scooters())
                - cluster.ideal_state,
                reverse=is_finding_positive_deviation,
            )[0].id

        # If vehicles has under 10% battery inventory, go to depot.
        if (
            vehicle.battery_inventory
            - number_of_scooters_to_swap
            - number_of_scooters_to_pick_up
            < vehicle.battery_inventory_capacity * 0.1
        ) and not vehicle.is_at_depot():
            next_location_id = world.state.depots[0].id
        else:
            """
            If vehicle has scooter inventory upon arrival,
            go to new positive deviation cluster to pick up new scooters.
            If there are no scooter inventory, go to cluster where you
            can drop off scooters picked up in this cluster, ergo negative deviation cluster.
            If, however, you are in the depot, you should do the opposite as the depot does not
            change the scooter inventory.
            """
            visit_positive_deviation_cluster_next = (
                vehicle_has_scooter_inventory
                if not vehicle.is_at_depot()
                else not vehicle_has_scooter_inventory
            )
            next_location_id = get_next_location_id(
                visit_positive_deviation_cluster_next
            )

        return classes.Action(
            scooters_to_swap,
            scooters_to_pickup,
            scooters_to_deliver,
            next_location_id,
        )
Exemplo n.º 8
0
 def get_best_action(self, world, vehicle):
     return classes.Action([], [], [], 0)