def apply(self): # self.log.debug("Applying {}".format(self)) trip = self.trips[self.trip] if self.VERIFY_COST_DELTA: old = utils.weighted_trip_length(trip[:, utils.LOCATION], trip[:, utils.WEIGHT]) trip_1 = trip[trip[:, utils.LON] < self.longitude_to_split] trip_1 = trip_1[trip_1[:, utils.LAT].argsort()[::-1]] trip_2 = trip[trip[:, utils.LON] >= self.longitude_to_split] trip_2 = trip_2[trip_2[:, utils.LAT].argsort()[::-1]] self.first_trip_percentage = len(trip_1) / len(trip) existing_trips = [t[0, utils.TRIP] for t in self.trips] new_trip_id = np.max(existing_trips) + 1 trip_2[:, utils.TRIP] = new_trip_id self.trips[self.trip] = trip_1 self.trips.append(trip_2) if self.VERIFY_COST_DELTA: new = utils.weighted_trip_length(self.trips[self.trip][:, utils.LOCATION], self.trips[self.trip][:, utils.WEIGHT]) + \ utils.weighted_trip_length(trip_2[:, utils.LOCATION], trip_2[:, utils.WEIGHT]) utils.verify_costs_are_equal(self.cost_delta(), new - old)
def apply(self): if self.trip_to_merge is None: Neighbor.log.warning("Not applying trip merge because no valid merge was found") return # self.log.debug("Applying {}".format(self)) trip = self.trips[self.trip_to_merge] if self.VERIFY_COST_DELTA: old = utils.weighted_trip_length(trip[:, utils.LOCATION], trip[:, utils.WEIGHT]) for trip_index in self.trip_assignments_for_gifts.keys(): old += utils.weighted_trip_length(self.trips[trip_index][:, utils.LOCATION], self.trips[trip_index][:, utils.WEIGHT]) for gift, trip_index, index_in_trip in self.gift_insertions: gift[utils.TRIP] = self.trips[trip_index][0,1] self.trips[trip_index] = np.insert(self.trips[trip_index], index_in_trip, gift, axis=0) if self.VERIFY_COST_DELTA: new = 0 for trip_index in self.trip_assignments_for_gifts.keys(): new += utils.weighted_trip_length(self.trips[trip_index][:, utils.LOCATION], self.trips[trip_index][:, utils.WEIGHT]) utils.verify_costs_are_equal(self.cost_delta(), new-old) # only delete the row afterwards to not mess up the indexes for the cost calculation del self.trips[self.trip_to_merge]
def apply(self): # self.log.debug("Applying {}".format(self)) source = self.trips[self.trip] destination = self.trips[self.destination_trip] if self.VERIFY_COST_DELTA: old = utils.weighted_trip_length(source[:, utils.LOCATION], source[:, utils.WEIGHT]) + \ utils.weighted_trip_length(destination[:, utils.LOCATION], destination[:, utils.WEIGHT]) gift = source[ self. gift_to_move] # NOTE: This apparently can be index-out-of-bounds! gift[utils.TRIP] = destination[0, utils.TRIP] destination = np.insert(destination, self.destination_insertion_index, gift, axis=0) self.trips[self.destination_trip] = destination source = np.delete(source, self.gift_to_move, axis=0) self.trips[self.trip] = source if self.VERIFY_COST_DELTA: new = utils.weighted_trip_length(source[:, utils.LOCATION], source[:, utils.WEIGHT]) + \ utils.weighted_trip_length(destination[:, utils.LOCATION], destination[:, utils.WEIGHT]) utils.verify_costs_are_equal(self.cost_delta(), new - old)
def apply(self): if self.trip_to_merge is None: Neighbor.log.warning( "Not applying trip merge because no valid merge was found") return # self.log.debug("Applying {}".format(self)) trip = self.trips[self.trip_to_merge] if self.VERIFY_COST_DELTA: old = utils.weighted_trip_length(trip[:, utils.LOCATION], trip[:, utils.WEIGHT]) for trip_index in self.modified_trips: old += utils.weighted_trip_length( self.trips[trip_index][:, utils.LOCATION], self.trips[trip_index][:, utils.WEIGHT]) self.trips.clear() self.trips.extend(self.trips_with_applied_merge) if self.VERIFY_COST_DELTA: new = 0 for trip_index in self.modified_trips: new += utils.weighted_trip_length( self.trips[trip_index][:, utils.LOCATION], self.trips[trip_index][:, utils.WEIGHT]) utils.verify_costs_are_equal(self.cost_delta(), new - old) # only delete the row afterwards to not mess up the indexes for the cost calculation del self.trips[self.trip_to_merge]
def apply(self): # self.log.debug("Applying {}".format(self)) if self.VERIFY_COST_DELTA: old = utils.weighted_trip_length(self.trip[:, utils.LOCATION], self.trip[:, utils.WEIGHT]) self.trip[[self.first_gift, self.second_gift ]] = self.trip[[self.second_gift, self.first_gift]] if self.VERIFY_COST_DELTA: new = utils.weighted_trip_length(self.trip[:, utils.LOCATION], self.trip[:, utils.WEIGHT]) utils.verify_costs_are_equal(self.cost_delta(), new - old)
def print_stats(file_name=None, df=None, plots=False): if file_name is not None: df = pd.read_csv(file_name).merge(gifts, on="GiftId") if df is None: print("Need to specify either file name or df") score = utils.weighted_reindeer_weariness(df) trip_sizes = df.groupby("TripId").size() trips = df.TripId.unique() weights = np.array([df[df.TripId == trip].Weight.sum() for trip in trips]) costs = np.array([utils.weighted_trip_length(df[df.TripId == trip][["Longitude", "Latitude"]], df[df.TripId == trip].Weight) for trip in trips]) efficiencies = weights / costs print("Score: {:.5f}B for {} trips".format(score / 1e9, len(trip_sizes))) print("Trip sizes: min/median/max:\t\t{:>6.2f}\t{:>6.2f}\t{:>7.2f};\t{:>6.2f}+-{:>9.2f}".format( trip_sizes.min(), trip_sizes.median(), trip_sizes.max(), trip_sizes.mean(), trip_sizes.std()**2)) print("Costs per trip: min/median/max [M]:\t{:>6.2f}\t{:>6.2f}\t{:>7.2f};\t{:>6.2f}+-{:>9.2f}".format( costs.min()/1e6, np.median(costs)/1e6, costs.max()/1e6, costs.mean()/1e6, (costs.std()/1e6)**2)) print("Weights per trip: min/median/max:\t{:>6.2f}\t{:>6.2f}\t{:>7.2f};\t{:>6.2f}+-{:>9.2f}".format( weights.min(), np.median(weights), weights.max(), weights.mean(), (weights.std())**2)) print("Efficiencies per trip: min/median/max:\t{:>6.2f}\t{:>6.2f}\t{:>7.2f};\t{:>6.2f}+-{:>9.2f}".format( efficiencies.min()*1e6, np.median(efficiencies)*1e6, efficiencies.max()*1e6, efficiencies.mean()*1e6, (efficiencies.std()*1e6)**2)) if plots: fig, axes = plt.subplots(2, 2) axes[0, 0].hist(weights, bins=100) axes[0, 0].set_title("Weights") axes[0, 1].hist(costs, bins=100) axes[0, 1].set_title("Costs") axes[1, 0].hist(efficiencies, bins=100) axes[1, 0].set_title("Efficiencies") axes[1, 1].hist(trip_sizes, bins=100) axes[1, 1].set_title("Trip sizes") if file_name is not None: fig.suptitle("Stats for {}".format(file_name))
def _find_best_split_index(self, trip): minimum_cost = np.finfo(np.float64).max best_index = None # don't split before first item for i in range(1, len(trip)): first_trip = trip[:i] second_trip = trip[i:] cost_first_trip = utils.weighted_trip_length( first_trip[:, utils.LOCATION], first_trip[:, utils.WEIGHT]) cost_second_trip = utils.weighted_trip_length( second_trip[:, utils.LOCATION], second_trip[:, utils.WEIGHT]) current_cost = cost_first_trip + cost_second_trip if current_cost < minimum_cost: minimum_cost = current_cost best_index = i return best_index, minimum_cost
def apply(self): # self.log.debug("Applying {}".format(self)) trip = self.trips[self.trip] if self.VERIFY_COST_DELTA: old = utils.weighted_trip_length(trip[:, utils.LOCATION], trip[:, utils.WEIGHT]) new_trip = trip[self.index_to_split:] existing_trips = [t[0, utils.TRIP] for t in self.trips] new_trip_id = np.max(existing_trips) + 1 new_trip[:, utils.TRIP] = new_trip_id self.trips[self.trip] = trip[:self.index_to_split] self.trips.append(new_trip) if self.VERIFY_COST_DELTA: new = utils.weighted_trip_length(self.trips[self.trip][:, utils.LOCATION], self.trips[self.trip][:, utils.WEIGHT]) + \ utils.weighted_trip_length(new_trip[:, utils.LOCATION], new_trip[:, utils.WEIGHT]) utils.verify_costs_are_equal(self.cost_delta(), new - old)
def apply(self): # self.log.debug("Applying {}".format(self)) first_trip = self.trips[self.first_trip] second_trip = self.trips[self.second_trip] if self.VERIFY_COST_DELTA: old = utils.weighted_trip_length(first_trip[:, utils.LOCATION], first_trip[:, utils.WEIGHT]) + \ utils.weighted_trip_length(second_trip[:, utils.LOCATION], second_trip[:, utils.WEIGHT]) # extract insertees now (before they're removed) and update their trip assignment first_gift_row = first_trip[self.first_gift] trip_id_for_second_gift = first_trip[0, utils.TRIP] first_gift_row[utils.TRIP] = second_trip[0, utils.TRIP] second_gift_row = second_trip[self.second_gift] second_gift_row[utils.TRIP] = trip_id_for_second_gift # update first trip first_trip = np.insert(first_trip, self.first_trip_insertion_index, second_gift_row, axis=0) index_to_remove = self.first_gift if self.first_gift < self.first_trip_insertion_index else self.first_gift + 1 first_trip = np.delete(first_trip, index_to_remove, axis=0) self.trips[self.first_trip] = first_trip # update second trip second_trip = np.insert(second_trip, self.second_trip_insertion_index, first_gift_row, axis=0) index_to_remove = self.second_gift if self.second_gift < self.second_trip_insertion_index else self.second_gift + 1 second_trip = np.delete(second_trip, index_to_remove, axis=0) self.trips[self.second_trip] = second_trip if self.VERIFY_COST_DELTA: new = utils.weighted_trip_length(first_trip[:, utils.LOCATION], first_trip[:, utils.WEIGHT]) + \ utils.weighted_trip_length(second_trip[:, utils.LOCATION], second_trip[:, utils.WEIGHT]) utils.verify_costs_are_equal(self.cost_delta(), new - old)
def apply(self): # self.log.debug("Applying {}".format(self)) trip = self.trips[self.trip] if self.VERIFY_COST_DELTA: old = utils.weighted_trip_length(trip[:, utils.LOCATION], trip[:, utils.WEIGHT]) gift = trip[self.gift_index] trip = np.delete(trip, self.gift_index, axis=0) index_to_insert = self.new_index if self.new_index < self.gift_index else self.new_index + 0 trip = np.insert(trip, index_to_insert, gift, axis=0) self.trips[self.trip] = trip if self.VERIFY_COST_DELTA: new = utils.weighted_trip_length(trip[:, utils.LOCATION], trip[:, utils.WEIGHT]) utils.verify_costs_are_equal(self.cost_delta(), new - old)
def cost_delta(self): if self.cost is not None: return self.cost trip = self.trips[self.trip] cost_of_old_trip = utils.weighted_trip_length(trip[:, utils.LOCATION], trip[:, utils.WEIGHT]) # find split index with minimum cost self.index_to_split, cost_of_split = self._find_best_split_index(trip) self.cost = cost_of_split - cost_of_old_trip return self.cost
def cost_delta(self): if self.cost is not None: return self.cost trip = self.trips[self.trip] cost_of_old_trip = utils.weighted_trip_length(trip[:, utils.LOCATION], trip[:, utils.WEIGHT]) # check splitting in the middle third of longitudes longitudes = np.sort( trip[:, utils.LON][:])[int(len(trip) / 3.0):int(len(trip) * 2.0 / 3)] minimum_cost = np.finfo(np.float64).max for i, lon in enumerate(longitudes): # split trips and sort by LAT descending trip_1 = trip[trip[:, utils.LON] < lon] trip_1 = trip_1[trip_1[:, utils.LAT].argsort()[::-1]] trip_2 = trip[trip[:, utils.LON] >= lon] trip_2 = trip_2[trip_2[:, utils.LAT].argsort()[::-1]] if len(trip_1) * len(trip_2) == 0: # don't split here if one of the resulting trips is empty continue cost_2_1 = utils.weighted_trip_length(trip_1[:, utils.LOCATION], trip_1[:, utils.WEIGHT]) cost_2_2 = utils.weighted_trip_length(trip_2[:, utils.LOCATION], trip_2[:, utils.WEIGHT]) if cost_2_1 + cost_2_2 < minimum_cost: minimum_cost = cost_2_1 + cost_2_2 self.longitude_to_split = lon self.cost = minimum_cost - cost_of_old_trip return self.cost
def evaluate_trips(self): unique_trips = self.trips.TripId.unique() merged = self.trips.merge(self.gifts, on="GiftId") trips = [merged[merged.TripId == t] for t in unique_trips] score = utils.weighted_reindeer_weariness(merged) utils.log_success_or_error( self.log, score < self.current_score, "Cost of the {} trips: {:.5f}B ({:.5f}M with {} trips)".format( unique_trips.shape[0], score / 1e9, (score - self.current_score) / 1e6, self.current_trip_count)) utils.log_success_or_error( self.log, score < self.current_best, "Compared to best: {:.5f}M".format( (score - self.current_best) / 1e6)) weights = np.asarray([trip.Weight.sum() for trip in trips]) self.log.info( "Sleigh utilization: min {:.2f}, max {:.2f}, avg {:.2f}, std {:.2f}" .format(weights.min(), weights.max(), weights.mean(), weights.std())) costs = np.asarray([ utils.weighted_trip_length(trip[["Latitude", "Longitude"]], trip.Weight) for trip in trips ]) self.log.info( "Trip costs: min {:.2f}M, max {:.2f}M, avg {:.2f}M, std {:.2f}k". format(costs.min() / 1e6, costs.max() / 1e6, costs.mean() / 1e6, costs.std() / 1e3)) stops = np.asarray([trip.shape[0] for trip in trips]) self.log.info( "Stops per trip: min {}, max {}, avg {:.2f}, std {:.2f}".format( stops.min(), stops.max(), stops.mean(), stops.std())) cache_info = utils.get_cache_info() self.log.info("Distance cache info: {} ({:.2f}% hits))".format( cache_info, 100.0 * cache_info.hits / (cache_info.hits + cache_info.misses)))
def cost_delta(self): if self.cost is not None: return self.cost self.trip_to_merge, self.trip_assignments_for_gifts = self._find_trip_to_merge() if self.trip_to_merge is None: return 0 trip = self.trips[self.trip_to_merge] self.gift_insertions = [] cost_of_insertions = 0 for trip_index, gift in self.trip_assignments_for_gifts.items(): index_in_trip, cost = Neighbor.find_best_insertion_index(self.trips[trip_index], gift) self.gift_insertions.append((gift, trip_index, index_in_trip)) cost_of_insertions += cost self.cost = cost_of_insertions - utils.weighted_trip_length(trip[:, utils.LOCATION], trip[:, utils.WEIGHT]) return self.cost