def add_persons_from_trips( population: core.Population, trips: Optional[pd.DataFrame] = None, ): logger = logging.getLogger(__name__) if trips is None or 'hid' not in trips.columns: return None logger.info("Adding persons from trips") for hid, hh_data in trips.groupby('hid'): household = population.households.get(hid) if household is None: logger.warning( f"Failed to find household {hid} in population - unable to add person." ) continue for pid, person_data in hh_data.groupby('pid'): if pid in household.people: continue person = core.Person( pid, home_area=person_data.iloc[0].to_dict().get("hzone"), ) household.add(person)
def add_persons_from_persons_attributes( population: core.Population, persons_attributes: Optional[pd.DataFrame] = None, ): logger = logging.getLogger(__name__) if persons_attributes is None or 'hid' not in persons_attributes.columns: return None logger.info("Adding persons from persons_attributes") for hid, hh_data in persons_attributes.groupby('hid'): household = population.get(hid) if household is None: logger.warning( f"Failed to find household {hid} in population - unable to add person." ) continue for pid in hh_data.index: if pid in household.people: continue person_attributes = persons_attributes.loc[pid].to_dict() person = core.Person(pid, attributes=person_attributes, home_area=person_attributes.pop( 'hzone', None), freq=person_attributes.pop('freq', None)) household.add(person)
def read_matsim(plans_path, attributes_path=None, weight: int = 100, version: int = 11, household_key: Union[str, None] = None, simplify_pt_trips: bool = False, autocomplete: bool = True, crop: bool = True): """ Load a MATSim format population into core population format. It is possible to maintain the unity of housholds using a household uid in the attributes input, ie: <attribute class="java.lang.String" name="hid">hh_0001</attribute> :param plans: path to matsim format xml :param attributes: path to matsim format xml :param weight: int :param version: int {11,12}, default = 11 :param household_key: {str, None} :return: Population """ logger = logging.getLogger(__name__) population = core.Population() if attributes_path is not None and version == 12: raise UserWarning(""" You have provided an attributes_path and enables matsim version 12, but v12 does not require an attributes input: Either remove the attributes_path arg, or enable version 11. """) if version not in [11, 12]: raise UserWarning("Version must be set to 11 or 12.") attributes_map = {} if version == 12: attributes_map = load_attributes_map_from_v12(plans_path) elif attributes_path: attributes_map = load_attributes_map(attributes_path) for person_id, plan in selected_plans(plans_path): attributes = attributes_map.get(person_id, {}) person = core.Person(person_id, attributes=attributes, freq=weight) act_seq = 0 leg_seq = 0 arrival_dt = datetime(1900, 1, 1) departure_dt = None for stage in plan: """ Loop through stages incrementing time and extracting attributes. """ if stage.tag in ['act', 'activity']: act_seq += 1 act_type = stage.get('type') loc = None x, y = stage.get('x'), stage.get('y') if x and y: loc = Point(int(float(x)), int(float(y))) if act_type == 'pt interaction': departure_dt = arrival_dt + timedelta( seconds=0. ) # todo this seems to be the case in matsim for pt interactions else: departure_dt = utils.safe_strptime( stage.get('end_time', '23:59:59')) if departure_dt < arrival_dt: logger.warning( f"Negative duration activity found at pid={person_id}") person.add( activity.Activity( seq=act_seq, act=act_type, loc=loc, link=stage.get('link'), area=None, # todo start_time=arrival_dt, end_time=departure_dt)) if stage.tag == 'leg': route, mode, network_route, transit_route = \ extract_route_attributes(stage, version) leg_seq += 1 trav_time = stage.get('trav_time') if trav_time: h, m, s = trav_time.split(":") leg_duration = timedelta(hours=int(h), minutes=int(m), seconds=int(s)) arrival_dt = departure_dt + leg_duration else: arrival_dt = departure_dt # todo this assumes 0 duration unless already known distance = route.get("distance") if distance is not None: distance = float(distance) person.add( activity.Leg( seq=leg_seq, mode=mode, start_link=route.get('start_link'), end_link=route.get('end_link'), start_time=departure_dt, end_time=arrival_dt, distance=distance, service_id=transit_route.get("transitLineId"), route_id=transit_route.get("transitRouteId"), o_stop=transit_route.get("accessFacilityId"), d_stop=transit_route.get("egressFacilityId"), network_route=network_route, )) if simplify_pt_trips: person.plan.simplify_pt_trips() person.plan.set_leg_purposes() if crop: person.plan.crop() if autocomplete: person.plan.autocomplete_matsim() """ Check if using households, then update population accordingly. """ if household_key and attributes.get(household_key): # using households if population.get( attributes.get(household_key)): # existing household household = population.get(attributes.get(household_key)) household.add(person) else: # new household household = core.Household(attributes.get(household_key), freq=weight) household.add(person) population.add(household) else: # not using households, create dummy household household = core.Household(person_id, freq=weight) household.add(person) population.add(household) return population
def from_to_travel_diary_read( trips: pd.DataFrame, persons_attributes: Union[pd.DataFrame, None] = None, hhs_attributes: Union[pd.DataFrame, None] = None, include_loc=False, sort_by_seq: Union[bool, None] = False, ): """ Turn Diary Plan tabular data inputs (derived from travel survey and attributes) into core population format. This is a variation of the standard load_travel_diary() method because it does not require activity inference or home location. We expect broadly the same data schema except rather than purp (purpose) we use trips oact (origin activity) and dact (destination activity). :param trips: DataFrame :param persons_attributes: DataFrame :param hhs_attributes: DataFrame :return: core.Population :param include_loc=False, bool, optionally include location data as shapely Point geometries ('start_loc' and 'end_loc' columns) :param sort_by_seq=None, optionally force trip sorting as True or False """ # TODO check for required col headers and give useful error? logger = logging.getLogger(__name__) if sort_by_seq is None and 'seq' in trips.columns: sort_by_seq = True population = core.Population() for hid, household_trips in trips.groupby('hid'): if hhs_attributes is not None: hh_attributes = hhs_attributes.loc[hid].to_dict() else: hh_attributes = {} household = core.Household(hid, attributes=hh_attributes, freq=hh_attributes.pop('freq', None)) for pid, person_trips in household_trips.groupby('pid'): if sort_by_seq: person_trips = person_trips.sort_values('seq') if persons_attributes is not None: person_attributes = persons_attributes.loc[pid].to_dict() else: person_attributes = {} person = core.Person( pid, attributes=person_attributes, freq=person_attributes.pop('freq', None), ) first_act = person_trips.iloc[0].oact.lower() if not first_act == "home": logger.warning( f" Person pid:{pid} hid:{hid} plan does not start with 'home' activity" ) loc = None if include_loc: loc = person_trips.start_loc.iloc[0] person.add( activity.Activity( seq=0, act=first_act, area=person_trips.iloc[0].ozone, loc=loc, start_time=utils.parse_time(0), )) for n in range(len(person_trips)): trip = person_trips.iloc[n] start_loc = None end_loc = None if include_loc: start_loc = trip.start_loc end_loc = trip.end_loc purpose = trip.dact.lower() person.add( activity.Leg(seq=n, purp=purpose, mode=trip['mode'].lower(), start_area=trip.ozone, end_area=trip.dzone, start_loc=start_loc, end_loc=end_loc, start_time=utils.parse_time(trip.tst), end_time=utils.parse_time(trip.tet))) person.add( activity.Activity( seq=n + 1, act=purpose, area=trip.dzone, loc=end_loc, start_time=utils.parse_time(trip.tet), )) person.plan.finalise_activity_end_times() household.add(person) population.add(household) return population
def tour_based_travel_diary_read( trips: pd.DataFrame, persons_attributes: Union[pd.DataFrame, None] = None, hhs_attributes: Union[pd.DataFrame, None] = None, include_loc=False, sort_by_seq: Union[bool, None] = None, ): """ Complex travel diray reader. Will try to infer home activiity and tour based purposes. :param trips: DataFrame :param persons_attributes: DataFrame :param hhs_attributes: DataFrame :param include_loc=False, bool, optionally include location data as shapely Point geometries ('start_loc' and 'end_loc' columns) :param sort_by_seq=None, optionally force trip sorting as True or False :return: core.Population """ population = core.Population() if sort_by_seq is None and 'seq' in trips.columns: sort_by_seq = True for hid, household_trips in trips.groupby('hid'): if hhs_attributes is not None: hh_attributes = hhs_attributes.loc[hid].to_dict() else: hh_attributes = {} household = core.Household(hid, attributes=hh_attributes, freq=hh_attributes.pop('freq', None)) for pid, person_trips in household_trips.groupby('pid'): if sort_by_seq: person_trips = person_trips.sort_values('seq') if persons_attributes is not None: person_attributes = persons_attributes.loc[pid].to_dict() else: person_attributes = {} home_area = person_trips.hzone.iloc[0] person = core.Person(pid, attributes=person_attributes, home_area=home_area, freq=person_attributes.pop('freq', None)) loc = None if include_loc: loc = person_trips.start_loc.iloc[0] person.add( activity.Activity( seq=0, act=None, area=person_trips.ozone.iloc[0], loc=loc, start_time=utils.parse_time(0), )) for n in range(len(person_trips)): trip = person_trips.iloc[n] start_loc = None end_loc = None if include_loc: start_loc = trip.start_loc end_loc = trip.end_loc person.add( activity.Leg( seq=n, purp=trip.purp.lower(), mode=trip['mode'].lower(), start_area=trip.ozone, end_area=trip.dzone, start_loc=start_loc, end_loc=end_loc, start_time=utils.parse_time(trip.tst), end_time=utils.parse_time(trip.tet), freq=trip.freq, )) person.add( activity.Activity( seq=n + 1, act=None, area=trip.dzone, loc=end_loc, start_time=utils.parse_time(trip.tet), )) person.plan.finalise_activity_end_times() person.plan.infer_activities_from_tour_purpose() person.plan.set_leg_purposes() household.add(person) population.add(household) return population
def basic_travel_diary_read(trips_df, attributes_df): population = core.Population() for hid, household_data in trips_df.groupby('hid'): household = core.Household(hid) for pid, person_data in household_data.groupby('pid'): trips = person_data.sort_values('seq') home_area = trips.hzone.iloc[0] origin_area = trips.ozone.iloc[0] activity_map = {home_area: 'home'} activities = ['home', 'work'] person = core.Person(pid, freq=person_data.freq.iloc[0], attributes=attributes_df.loc[pid].to_dict(), home_area=home_area) person.add( activity.Activity( seq=0, act='home' if home_area == origin_area else 'work', area=origin_area, start_time=utils.minutes_to_datetime(0), )) for n in range(len(trips)): trip = trips.iloc[n] destination_activity = trip.purp person.add( activity.Leg(seq=n, mode=trip['mode'].lower(), purp=trip.purp.lower(), start_area=trip.ozone, end_area=trip.dzone, start_time=utils.minutes_to_datetime( trip.tst), end_time=utils.minutes_to_datetime(trip.tet))) if destination_activity in activities and activity_map.get( trip.dzone): # assume return trip to this activity person.add( activity.Activity( seq=n + 1, act=activity_map[trip.dzone], area=trip.dzone, start_time=utils.minutes_to_datetime(trip.tet), )) else: person.add( activity.Activity( seq=n + 1, act=trip.purp.lower(), area=trip.dzone, start_time=utils.minutes_to_datetime(trip.tet), )) if trip.dzone not in activity_map: # update history # only keeping first activity at each location to ensure returns home activity_map[trip.dzone] = trip.purp.lower() activities.append(destination_activity) person.plan.finalise() household.add(person) population.add(household) return population
def read_matsim(plans_path, attributes_path=None, weight=1000, household_key=None, simplify_pt_trips=False, autocomplete=True, crop=True): """ Load a MATSim format population into core population format. It is possible to maintain the unity of housholds using a household uid in the attributes input, ie: <attribute class="java.lang.String" name="hid">hh_0001</attribute> :param plans: path to matsim format xml :param attributes: path to matsim format xml :param weight: int :param household_key: {str, None} :return: Population """ logger = logging.getLogger(__name__) population = core.Population() if attributes_path: attributes_map = load_attributes_map(attributes_path) for person_id, plan in selected_plans(plans_path): if attributes_path: attributes = attributes_map[person_id] else: attributes = {} person = core.Person(person_id, attributes=attributes, freq=weight) act_seq = 0 leg_seq = 0 arrival_dt = datetime(1900, 1, 1) departure_dt = None for stage in plan: """ Loop through stages incrementing time and extracting attributes. """ if stage.tag in ['act', 'activity']: act_seq += 1 act_type = stage.get('type') loc = None x, y = stage.get('x'), stage.get('y') if x and y: loc = Point(int(float(x)), int(float(y))) if act_type == 'pt interaction': departure_dt = arrival_dt + timedelta( seconds=0. ) # todo this seems to be the case in matsim for pt interactions else: departure_dt = utils.safe_strptime( stage.get('end_time', '23:59:59')) if departure_dt < arrival_dt: logger.warning( f"Negative duration activity found at pid={person_id}") person.add( activity.Activity( seq=act_seq, act=act_type, loc=loc, link=stage.get('link'), area=None, # todo start_time=arrival_dt, end_time=departure_dt)) if stage.tag == 'leg': leg_seq += 1 trav_time = stage.get('trav_time') if trav_time: h, m, s = trav_time.split(":") leg_duration = timedelta(hours=int(h), minutes=int(m), seconds=int(s)) arrival_dt = departure_dt + leg_duration else: arrival_dt = departure_dt # todo this assumes 0 duration unless already known person.add( activity.Leg( seq=leg_seq, mode=stage.get('mode'), start_loc=None, end_loc=None, start_link=stage.get('start_link'), end_link=stage.get('end_link'), start_area=None, end_area=None, start_time=departure_dt, end_time=arrival_dt, )) if simplify_pt_trips: person.plan.simplify_pt_trips() if crop: person.plan.crop() if autocomplete: person.plan.autocomplete_matsim() """ Check if using households, then update population accordingly. """ if household_key and attributes.get(household_key): # using households if population.get( attributes.get(household_key)): # existing household household = population.get(attributes.get(household_key)) household.add(person) else: # new household household = core.Household(attributes.get(household_key)) household.add(person) population.add(household) else: # not using households, create dummy household household = core.Household(person_id) household.add(person) population.add(household) return population
def load_activity_plan( trips: pd.DataFrame, person_attributes: Union[pd.DataFrame, None] = None, hh_attributes: Union[pd.DataFrame, None] = None, sample_perc: Union[float, None] = None, ): """ Turn Activity Plan tabular data inputs (derived from travel survey and attributes) into core population format. This is a variation of the standard load_travel_diary() method because it does not require activity inference. However all plans are expected to be tour based, so assumed to start and end at home. We expect broadly the same data schema except rather than trip 'purpose' we use trips 'activity'. :param trips: DataFrame :param person_attributes: DataFrame :param hh_attributes: DataFrame :param sample_perc: Float. If different to None, it samples the travel population by the corresponding percentage. :return: core.Population """ # TODO check for required col headers and give useful error? logger = logging.getLogger(__name__) if not isinstance(trips, pd.DataFrame): raise UserWarning("Unrecognised input for population travel diaries") if person_attributes is not None and not isinstance( person_attributes, pd.DataFrame): raise UserWarning( "Unrecognised input for population person attributes") if hh_attributes is not None and not isinstance(hh_attributes, pd.DataFrame): raise UserWarning( "Unrecognised input for population household attributes") if sample_perc is not None: trips = sample_population( trips, sample_perc, weight_col='freq') # sample the travel population population = core.Population() for hid, household_data in trips.groupby('hid'): if hh_attributes is not None: hh_attribute_dict = hh_attributes.loc[hid].to_dict() else: hh_attribute_dict = None household = core.Household(hid, attributes=hh_attribute_dict) for pid, person_data in household_data.groupby('pid'): trips = person_data.sort_values('seq') home_area = trips.hzone.iloc[0] origin_area = trips.ozone.iloc[0] if not origin_area == home_area: logger.warning( f" Person pid:{pid} plan does not start with 'home' activity" ) if person_attributes is not None: person_attribute_dict = person_attributes.loc[pid].to_dict() else: person_attribute_dict = None person = core.Person( pid, freq=person_data.freq.iloc[0], attributes=person_attribute_dict, # home_area=home_area ) person.add( activity.Activity( seq=0, act='home', area=origin_area, start_time=utils.minutes_to_datetime(0), )) for n in range(len(trips)): trip = trips.iloc[n] person.add( activity.Leg(seq=n, mode=trip['mode'].lower(), start_area=trip.ozone, end_area=trip.dzone, start_time=utils.minutes_to_datetime( trip.tst), end_time=utils.minutes_to_datetime(trip.tet))) person.add( activity.Activity( seq=n + 1, act=trip.activity.lower(), area=trip.dzone, start_time=utils.minutes_to_datetime(trip.tet), )) person.plan.finalise() household.add(person) population.add(household) return population
def complex_travel_diary_read(trips, all_person_attributes, all_hh_attributes, include_loc=False): population = core.Population() for hid, household_data in trips.groupby('hid'): if all_hh_attributes is not None: hh_attributes = all_hh_attributes.loc[hid].to_dict() else: hh_attributes = None household = core.Household(hid, attributes=hh_attributes) for pid, person_data in household_data.groupby('pid'): trips = person_data.sort_values('seq') if all_person_attributes is not None: person_attributes = all_person_attributes.loc[pid].to_dict() else: person_attributes = None person = core.Person(pid, freq=person_data.freq.iloc[0], attributes=person_attributes, home_area=trips.hzone.iloc[0]) loc = None if include_loc: loc = trips.start_loc.iloc[0] person.add( activity.Activity( seq=0, act=None, area=trips.ozone.iloc[0], loc=loc, start_time=utils.minutes_to_datetime(0), )) for n in range(len(trips)): trip = trips.iloc[n] start_loc = None end_loc = None if include_loc: start_loc = trip.start_loc end_loc = trip.end_loc person.add( activity.Leg( seq=n, purp=trip.purp.lower(), mode=trip['mode'].lower(), start_area=trip.ozone, end_area=trip.dzone, start_loc=start_loc, end_loc=end_loc, start_time=utils.minutes_to_datetime(trip.tst), end_time=utils.minutes_to_datetime(trip.tet), )) person.add( activity.Activity( seq=n + 1, act=None, area=trip.dzone, loc=end_loc, start_time=utils.minutes_to_datetime(trip.tet), )) person.plan.finalise() person.plan.infer_activities_from_leg_purpose() household.add(person) population.add(household) return population
def trip_based_travel_diary_read( trips: pd.DataFrame, persons_attributes: Union[pd.DataFrame, None] = None, hhs_attributes: Union[pd.DataFrame, None] = None, include_loc=False, sort_by_seq: Union[bool, None] = None, ): """ Turn Activity Plan tabular data inputs (derived from travel survey and attributes) into core population format. This is a variation of the standard load_travel_diary() method because it does not require activity inference. However all plans are expected to be tour based, so assumed to start and end at home. We expect broadly the same data schema except rather than trip 'purpose' we use trips 'activity'. :param trips: DataFrame :param persons_attributes: DataFrame :param hhs_attributes: DataFrame :param include_loc=False, bool, optionally include location data as shapely Point geometries ('start_loc' and 'end_loc' columns) :param sort_by_seq=None, optionally force trip sorting as True or False :return: core.Population """ logger = logging.getLogger(__name__) population = build_population(trips=trips, persons_attributes=persons_attributes, hhs_attributes=hhs_attributes) if sort_by_seq is None and 'seq' in trips.columns: sort_by_seq = True for hid, household in population: for pid, person in household: person_trips = trips.loc[(trips.hid == hid) & (trips.pid == pid)] if not len(person_trips): person.stay_at_home() continue if sort_by_seq: person_trips = person_trips.sort_values('seq') home_area = person_trips.hzone.iloc[0] origin_area = person_trips.ozone.iloc[0] if not origin_area == home_area: logger.warning( f" Person pid:{pid} plan does not start with 'home' activity" ) if persons_attributes is not None: person_attributes = persons_attributes.loc[pid].to_dict() else: person_attributes = {} person = core.Person( pid, attributes=person_attributes, freq=person_attributes.pop('freq', None), # home_area=home_area ) loc = None if include_loc: loc = person_trips.start_loc.iloc[0] person.add( activity.Activity( seq=0, act='home', area=origin_area, loc=loc, start_time=utils.parse_time(0), )) for n in range(len(person_trips)): trip = person_trips.iloc[n] start_loc = None end_loc = None if include_loc: start_loc = trip.start_loc end_loc = trip.end_loc purpose = trip.purp.lower() person.add( activity.Leg(seq=n, purp=purpose, mode=trip['mode'].lower(), start_area=trip.ozone, end_area=trip.dzone, start_loc=start_loc, end_loc=end_loc, start_time=utils.parse_time(trip.tst), end_time=utils.parse_time(trip.tet))) person.add( activity.Activity( seq=n + 1, act=purpose, area=trip.dzone, loc=end_loc, start_time=utils.parse_time(trip.tet), )) person.plan.finalise_activity_end_times() household.add(person) population.add(household) return population