def _filter_similar_journeys(journey_pairs_pool, request, similar_journey_generator): """ Compare journeys 2 by 2. The given generator tells which part of journeys are compared. In case of similar journeys, the function '_get_worst_similar_vjs' decides which one to delete. """ logger = logging.getLogger(__name__) is_debug = request.get('debug', False) for j1, j2 in journey_pairs_pool: if to_be_deleted(j1) or to_be_deleted(j2): continue if compare(j1, j2, similar_journey_generator): # After comparison, if the 2 journeys are similar, the worst one must be eliminated worst = _get_worst_similar(j1, j2, request) logger.debug( "the journeys {}, {} are similar, we delete {}".format( j1.internal_id, j2.internal_id, worst.internal_id ) ) mark_as_dead( worst, is_debug, 'duplicate_journey', 'similar_to_{other}'.format(other=j1.internal_id if worst == j2 else j2.internal_id), )
def _filter_similar_journeys(journeys, request, similar_journey_generator): """ we filter similar journeys The given generator tells which part of journeys are compared in case of similar journeys we let _get_worst_similar_vjs decide which one to delete """ logger = logging.getLogger(__name__) for j1, j2 in itertools.combinations(journeys, 2): if to_be_deleted(j1) or to_be_deleted(j2): continue if compare(j1, j2, similar_journey_generator): #chose the best worst = _get_worst_similar(j1, j2, request) logger.debug("the journeys {}, {} are similar, we delete {}".format(j1.internal_id, j2.internal_id, worst.internal_id)) mark_as_dead(worst, 'duplicate_journey', 'similar_to_{other}' .format(other=j1.internal_id if worst == j2 else j2.internal_id))
def _filter_similar_journeys(journeys, request): """ for the moment very simple filter. we filter the journeys with the same vjs in case of similar journeys we let _get_worst_similar_vjs decide which one to delete """ logger = logging.getLogger(__name__) for j1, j2 in itertools.combinations(journeys, 2): if _to_be_deleted(j1) or _to_be_deleted(j2): continue if compare(j1, j2, similar_journeys_generator): #chose the best worst = _get_worst_similar_vjs(j1, j2, request) logger.debug( "the journeys {}, {} are similar, we delete {}".format( j1.internal_id, j2.internal_id, worst.internal_id)) _mark_as_dead( worst, 'duplicate_journey', 'similar_to_{other}'.format( other=j1.internal_id if worst == j2 else j2.internal_id))
def _remove_extra_journeys(self, journeys, max_nb_journeys, clockwise, timezone): """ for destineo we want to filter certain journeys - we want at most 'max_nb_journeys', but we always want to keep the non_pt_walk - we don't want 2 alternatives using the same buses but with different boarding stations for similar journeys, we want to pick up: - the earliest one (for clockwise, else tardiest) - the one that leave the tardiest (for clockwise, else earliest) - the one with the less fallback (we know it's walking) """ to_delete = [] def same_vjs(j): # same departure date and vjs journey_dt = datetime.utcfromtimestamp(j.departure_date_time) journey_date = pytz.utc.localize(journey_dt).astimezone( pytz.timezone(timezone)).date() yield journey_date for s in j.sections: yield s.uris.vehicle_journey def get_journey_to_remove(idx_j1, j1, idx_j2, j2): if clockwise: if j1.arrival_date_time != j2.arrival_date_time: return idx_j1 if j1.arrival_date_time > j2.arrival_date_time else idx_j2 if j1.departure_date_time != j2.departure_date_time: return idx_j1 if j1.departure_date_time < j2.departure_date_time else idx_j2 else: if j1.departure_date_time != j2.departure_date_time: return idx_j1 if j1.departure_date_time < j2.departure_date_time else idx_j2 if j1.arrival_date_time != j2.arrival_date_time: return idx_j1 if j1.arrival_date_time > j2.arrival_date_time else idx_j2 return idx_j1 if helpers.walking_duration( j1) > helpers.walking_duration(j2) else idx_j2 for (idx1, j1), (idx2, j2) in itertools.combinations(enumerate(journeys), 2): if idx1 in to_delete or idx2 in to_delete: continue if not compare(j1, j2, same_vjs): continue to_delete.append(get_journey_to_remove(idx1, j1, idx2, j2)) if max_nb_journeys: count = 0 for idx, journey in enumerate(journeys): if idx in to_delete: continue if journey.type == 'non_pt_walk': continue if count >= max_nb_journeys: to_delete.append(idx) count += 1 to_delete.sort(reverse=True) logger = logging.getLogger(__name__) logger.debug('remove %s extra journeys: %s', len(to_delete), [journeys[i].type for i in to_delete]) for idx in to_delete: del journeys[idx]