def livnehIDsAndAreas(df: geopandas.GeoDataFrame, crs: str = '4326') -> dict: # clipped data df.drop_duplicates(['id'], inplace=True) df.sort_values(['id'], axis=0, inplace=True) df = df.to_crs(epsg=crs) df['area_m2'] = df['geometry'].area df = df.filter(items=['coordinates', 'lat', 'lon', 'id', 'area_m2']) df = __points2grids(df, crs=crs) df = df.to_crs(epsg=crs) df['total_area_m2'] = df['geometry'].area return df
def compute_screen_line_counts( feed: "Feed", screen_lines: gp.GeoDataFrame, dates: List[str] ) -> pd.DataFrame: """ Find all the Feed trips active on the given YYYYMMDD dates whose shapes intersect the given GeoDataFrame of screen lines, that is, of straight WGS84 LineStrings. Compute the intersection times and directions for each trip. Return a DataFrame with the columns - ``'date'`` - ``'trip_id'`` - ``'route_id'`` - ``'route_short_name'`` - ``'shape_id'``: shape ID of the trip - ``'screen_line_id'``: ID of the screen line as specified in ``screen_lines`` or as assigned after the fact. - ``'crossing_distance'``: distance along the trip shape of the screen line intersection ``'crossing_time'``: time that the trip's vehicle crosses the scren line; one trip could cross multiple times - ``'crossing_direction'``: 1 or -1; 1 indicates trip travel from the left side to the right side of the screen line; -1 indicates trip travel in the opposite direction Notes: - Assume the Feed's stop times DataFrame has an accurate ``shape_dist_traveled`` column. - Assume that trips travel in the same direction as their shapes, an assumption that is part of the GTFS. - Assume that the screen line is straight and simple. - Probably does not give correct results for trips with self-intersecting shapes. - The algorithm works as follows 1. Find the trip shapes that intersect the screen lines. 2. For each such shape and screen line, compute the intersection points, the distance of the point along the shape, and the orientation of the screen line relative to the shape. 3. For each given date, restrict to trips active on the date and interpolate a stop time for the intersection point using the ``shape_dist_traveled`` column. 4. Use that interpolated time as the crossing time of the trip vehicle. """ dates = feed.subset_dates(dates) if not dates: return pd.DataFrame() # Get shapes as GeoDataFrame shapes_g = feed.geometrize_shapes(use_utm=True) # Convert screen lines to UTM crs = shapes_g.crs screen_lines = screen_lines.to_crs(crs) # Create screen line IDs if necessary n = screen_lines.shape[0] if "screen_line_id" not in screen_lines.columns: screen_lines["screen_line_id"] = hp.make_ids(n, "sl") # Make a vector in the direction of each screen line to calculate crossing orientation. # Does not work in case of a bent screen line. p1 = screen_lines.geometry.map(lambda x: np.array(x.coords[0])) p2 = screen_lines.geometry.map(lambda x: np.array(x.coords[-1])) screen_lines["screen_line_vector"] = p2 - p1 # Get intersection points of shapes and screen lines g0 = ( # Only keep shapes that intersect screen lines to reduce computations gp.sjoin(shapes_g, screen_lines.filter(["screen_line_id", "geometry"])).merge( screen_lines, on="screen_line_id" ) # Compute intersection points .assign( int_point=lambda x: gp.GeoSeries(x.geometry_x, crs=crs).intersection( gp.GeoSeries(x.geometry_y, crs=crs) ) ) ) # Unpack multipoint intersections to yield a new GeoDataFrame records = [] for row in g0.itertuples(index=False): if isinstance(row.int_point, sg.Point): intersections = [row.int_point] else: intersections = row.int_point for int_point in intersections: record = { "shape_id": row.shape_id, "screen_line_id": row.screen_line_id, "geometry": row.geometry_x, "int_point": int_point, "screen_line_vector": row.screen_line_vector, } records.append(record) g = gp.GeoDataFrame.from_records(records) g.crs = crs # Get distance (in meters) of each intersection point along shape g["crossing_dist"] = g.apply(lambda x: x.geometry.project(x.int_point), axis=1) # Build a tiny vector along each shape p2 = g.apply(lambda x: x.geometry.interpolate(x.crossing_dist + 1), axis=1).map( lambda x: np.array(x.coords[0]) ) p1 = g.int_point.map(lambda x: np.array(x.coords[0])) g["shape_vector"] = p2 - p1 # Compute crossing direction by taking the vector cross product of # the shape vector and the screen line vector det = g.apply( lambda x: np.linalg.det(np.array([x.shape_vector, x.screen_line_vector])), axis=1, ) g["crossing_direction"] = det.map(lambda x: 1 if x >= 0 else -1) # Convert to feed distance units converter = hp.get_convert_dist("m", feed.dist_units) g["crossing_dist"] = g["crossing_dist"].map(converter) # Summarize work so far into a lookup table h = ( g.filter(["shape_id", "screen_line_id", "crossing_direction", "crossing_dist"]) .set_index("shape_id") .sort_values( ["shape_id", "crossing_dist"] ) # Need this sorting for interpolation to work ) # Get stop times of trips whose shapes lie in h st = ( feed.trips.loc[lambda x: x.shape_id.isin(h.index)] # Merge in route short names and stop times .merge(feed.routes[["route_id", "route_short_name"]]).merge(feed.stop_times) # Keep only non-NaN departure times .loc[lambda x: x.departure_time.notna()] # Convert to seconds past midnight .assign(departure_time=lambda x: x.departure_time.map(hp.timestr_to_seconds)) ) # Compute crossing times by date records = [] ta = feed.compute_trip_activity(dates) for date in dates: # Subset to trips active on date and merge with g ids = ta.loc[lambda x: x[date] == 1, "trip_id"] f = st.loc[lambda x: x.trip_id.isin(ids)].sort_values( ["trip_id", "shape_dist_traveled"] ) # Need this sorting for interpolation to work # Get crossing time for each trip for tid, group in f.groupby("trip_id"): sid = group["shape_id"].iat[0] rid = group["route_id"].iat[0] rsn = group["route_short_name"].iat[0] dists = group["shape_dist_traveled"].values times = group["departure_time"].values crossing_dists = h.loc[[sid], "crossing_dist"].values crossing_times = np.interp(crossing_dists, dists, times) for i, row in enumerate(h.loc[[sid]].itertuples(index=False)): record = { "date": date, "trip_id": tid, "route_id": group.route_id.iat[0], "route_short_name": group.route_short_name.iat[0], "shape_id": group.shape_id.iat[0], "screen_line_id": row.screen_line_id, "crossing_direction": row.crossing_direction, "crossing_distance": row.crossing_dist, "crossing_time": crossing_times[i], } records.append(record) result = pd.DataFrame.from_records(records).assign( crossing_time=lambda x: x.crossing_time.map( lambda x: hp.timestr_to_seconds(x, inverse=True) ) ) return result
def clean_acs(fd: gpd.GeoDataFrame, returns=False, groups=True, home=True, reduced=True, error=True) -> gpd.GeoDataFrame: r""" Clean up and organize ACS flow data. American Community Survey (ACS) data has information on many modes of transportation and their error margins. This function provides various options to simplify this data and to reduce and combine various modes of transportation. Parameters ---------- returns : bool, defaults to False Add duplicate data with switched origin and destination codes groups : bool, defaults to True Create an active transportation group (`walk` and `bike`), transit group (`bus`, `streetcar`, `subway`, `railroad`, and `ferry`), and a carpool group (`car_2p`, `car_3p`, `car_4p`, `car_5p`, and `car_7p`). home : bool, defaults to True People working from home do not travel. Subtract `home` from `all` reduced : bool, defaults to True Only keep `all`, `home`, `walk`, `bike`, and `sov`, groups (if True). error : bool, defaults to True Keep the error data. Returns ------- geopandas.GeoDataFrame Cleaned up GeoDataFrame with origin-destination data broken down by mode See Also -------- ~stplanpy.acs.read_acs Examples -------- An example data file, "`od_data.csv`_", can be downloaded from github. .. code-block:: python from stplanpy import acs flow_data = acs.read_acs("od_data.csv") flow_data = flow_data.clean_acs() """ if (returns): # Add return data for commute trips per day df = fd.copy() df = df[[ "dest_taz", "orig_taz", "all", "all_error", "sov", "sov_error", "car_2p", "car_2p_error", "car_3p", "car_3p_error", "car_4p", "car_4p_error", "car_5p", "car_5p_error", "car_7p", "car_7p_error", "bus", "bus_error", "streetcar", "streetcar_error", "subway", "subway_error", "railroad", "railroad_error", "ferry", "ferry_error", "bike", "bike_error", "walk", "walk_error", "taxi", "taxi_error", "motorcycle", "motorcycle_error", "other", "other_error", "home", "home_error", "auto", "auto_error"]] df.rename(columns = { "dest_taz":"orig_taz", "orig_taz":"dest_taz"}, inplace = True) fd = pd.concat([fd, df], ignore_index=True) if (groups): # Define some groups fd["active"] = ( + fd["walk"] + fd["bike"]) fd["active_error"] = ( + fd["walk_error"]**2 + fd["bike_error"]**2)**(1/2) fd["transit"] = ( + fd["bus"] + fd["streetcar"] + fd["subway"] + fd["railroad"] + fd["ferry"]) fd["transit_error"] = ( + fd["bus_error"]**2 + fd["streetcar_error"]**2 + fd["subway_error"]**2 + fd["railroad_error"]**2 + fd["ferry_error"]**2)**(1/2) fd["carpool"] = ( + fd["car_2p"] + fd["car_3p"] + fd["car_4p"] + fd["car_5p"] + fd["car_7p"]) fd["carpool_error"] = ( + fd["car_2p_error"]**2 + fd["car_3p_error"]**2 + fd["car_4p_error"]**2 + fd["car_5p_error"]**2 + fd["car_7p_error"]**2)**(1/2) if (home): # People working from home do not travel fd["all"] = fd["all"] - fd["home"] if (reduced and groups): # Columns to keep fd = fd[[ "orig_taz", "dest_taz", "all", "all_error", "home", "home_error", "walk", "walk_error", "bike", "bike_error", "sov", "sov_error", "active", "active_error", "transit", "transit_error", "carpool", "carpool_error", fd.geometry.name]] elif (reduced and not groups): # Columns to keep fd = fd[[ "orig_taz", "dest_taz", "all", "all_error", "home", "home_error", "walk", "walk_error", "bike", "bike_error", "sov", "sov_error", fd.geometry.name]] if not (error): fd = fd[fd.columns.drop(list(fd.filter(regex="_error")))] # Fix index fd = fd.reset_index(drop=True) return fd