def get_all_of_time(data: pd.DataFrame, start: Union[str, pd.Timestamp], end: Union[str, pd.Timestamp], time_name: str = 'Time') -> pd.DataFrame: """ Restrict data to dates between start and end. Args: data: data containing time column for conditioning start: start time end: end time time_name: name of column containing time Returns: restricted dataset. """ if not (isinstance(start, pd.Timestamp) or isinstance(end, pd.Timestamp)): validate_multiple_params([start, end], lambda x: validate_data_is_type(x, str)) validate_matches_time_format(start) validate_matches_time_format(end) start, end = pd.Timestamp(start), pd.Timestamp(end) validate_data_is_type(data, pd.DataFrame) validate_data_is_time_column(data[time_name]) return data[(start <= data[time_name]) & (data[time_name] <= end)].reset_index(drop=True)
def get_all_incidents(data: pd.DataFrame, speed_limit: int) -> pd.DataFrame: """ Get all speed incidents for all buses. Args: data: data regarding all buses activity speed_limit: maximum speed limit we treat as acceptable (km/hour). Returns: All speed incidents in the format based on _report_incident """ validate_if_contains_columns(data, ['Lines', 'Brigade', 'Lon', 'Lat', 'Time']) validate_data_is_type(speed_limit, int) data.sort_values(by='Time', ascending=True, inplace=True) data.reset_index(drop=True, inplace=True) report = pd.DataFrame(columns=['Lines', 'Speed', 'Lat', 'Lon', 'Time']) for line in data['Lines'].unique(): per_line = get_all_of_line(data, line) for brigade in per_line['Brigade'].unique(): per_brigade = get_all_of_brigade(per_line, brigade) incidents = get_speed_incidents_for_bus(per_brigade, speed_limit) if len(incidents) > 0: incidents['Lines'] = line report = report.append(incidents) return report.reset_index(drop=True)
def remove_duplicates(data: pd.DataFrame) -> pd.DataFrame: """ Remove duplicates from data and reset index. Args: data: any data frame Returns: restricted dataset. """ validate_data_is_type(data, pd.DataFrame) return data.drop_duplicates().reset_index(drop=True)
def test_validate_multiple_params(): """Test for bwaw.utils.validation.validate_multiple_params""" with pytest.raises(TypeError): validate_multiple_params(['a', 5], lambda x: validate_data_is_type(x, str)) validate_multiple_params([5., 3.], lambda x: validate_data_is_type(x, int)) validate_multiple_params([1, 2], lambda x: validate_data_is_type(x, int)) validate_multiple_params([2., 3.], lambda x: validate_data_is_type(x, float))
def convert_response_list_to_dataframe(response_list: List) -> pd.DataFrame: """ Converts response list to pandas data frame. Args: response_list: list of values or dicts Returns: data in the format of pandas data frame """ validate_data_is_type(response_list, list) return pd.DataFrame(response_list)
def get_active_buses(api_key: str) -> List: """ Get method for list of all currently active buses. Args: api_key: API key provided by UMWaw Returns: list of metadata of all currently active buses """ validate_data_is_type(api_key, str) response = _get_resource_from_request( resource_request=_create_active_buses_request(api_key)) return _format_active_bus_response(response)
def test_validate_data_is_type(): """Test for bwaw.utils.validation.validate_data_is_type""" with pytest.raises(TypeError): validate_data_is_type(5, str) validate_data_is_type('a', int) validate_data_is_type(5, int) validate_data_is_type(5., float)
def convert_dataframe_to_response_list(data: pd.DataFrame) -> List: """ Converts pandas data frame to response list. Args: data: data in the format of pandas data frame Returns: list of values or dicts """ validate_data_is_type(data, pd.DataFrame) if len(data.columns) == 1: return list(data[data.columns[0]]) if len(data.columns) > 1: return [dict(row) for _, row in data.iterrows()] raise ValueError('Empty data frame.')
def get_timetable_for_line_on_bus_stop(api_key: str, bus_stop_id: str, bus_stop_nr: str, line: str) -> List: """ Get method for list of line timetable on bus stop. Args: api_key: API key provided by UMWaw bus_stop_id: bus stop identifier bus_stop_nr: bus stop number (eg. 01, 02, etc.) line: bus line number Returns: list of line timetable on bus stop. """ validate_multiple_params([api_key, bus_stop_id, bus_stop_nr, line], lambda x: validate_data_is_type(x, str)) req = _create_request(table_name=TABLE.TIMETABLES, parameters={ PARAMETER.RESOURCE_ID2: RESOURCE_ID.TIMETABLE_FOR_LINE, PARAMETER.API_KEY: api_key, PARAMETER.BUS_STOP_ID: bus_stop_id, PARAMETER.BUS_STOP_NR: bus_stop_nr, PARAMETER.LINE_NR: line }) response = _get_resource_from_request(resource_request=req) return _format_timetable_on_stop_response(response)
def _adjust_date(column: pd.Series, start_from: pd.Timestamp): validate_data_is_type(column, pd.Series) validate_data_is_time_column(column) pre_start = column[column.dt.time < start_from.time()] post_start = column[column.dt.time >= start_from.time()] post_start = pd.to_datetime( post_start.apply(lambda x: f'{start_from.date()} {x.time()}')) day_after = start_from.date() + pd.Timedelta(days=1) pre_start = pd.to_datetime( pre_start.apply(lambda x: f'{day_after} {x.time()}')) output = pd.concat((pre_start, post_start)) assert output.shape == column.shape return output
def _create_active_buses_request(api_key: str) -> request.Request: """ Creates a request for list of active buses. Args: api_key: API key provided by UMWaw Returns: request for list of active buses """ validate_data_is_type(api_key, str) return _create_request(table_name=TABLE.BUSES, parameters={ PARAMETER.RESOURCE_ID1: RESOURCE_ID.BUSES_ACTIVE, PARAMETER.API_KEY: api_key, PARAMETER.TYPE: CONSTANTS.ACTIVE_BUS_STATIC_TYPE })
def get_bus_stops_coordinates(api_key: str) -> List: """ Get method for list of all bus stops' coordinates. Args: api_key: API key provided by UMWaw Returns: list of all bus stops' coordinates """ validate_data_is_type(api_key, str) req = _create_request(table_name=TABLE.STOPS, parameters={ PARAMETER.RESOURCE_ID2: RESOURCE_ID.BUS_STOP_COORDINATE, PARAMETER.API_KEY: api_key }) response = _get_resource_from_request(resource_request=req) return _format_all_coordinates_response(response)
def column_str_to_datetime(column: pd.Series, time_only: bool = False) -> pd.Series: """ Convert string column containing time to datetime. Args: column: given column time_only: if only time is provided in string Returns: formatted column """ validate_data_is_type(column, pd.Series) for string in column: validate_matches_time_format(string) if time_only: return pd.to_datetime(column, format='%H:%M:%S') return pd.to_datetime(column)
def get_active_buses_over_time(api_key: str, no_of_requests: int = 1, interval_btwn_requests: int = 1, keep_partial_if_fail: bool = True) -> List: """ Get method for list of all currently active buses requested over some period. Args: api_key: API key provided by UMWaw no_of_requests: number of calls to UMWaw interval_btwn_requests: time [minutes] between calls to UMWaw keep_partial_if_fail: if partial results should be stored if call fails Returns: list of metadata of all currently active buses aggregated from whole period """ validate_data_is_type(api_key, str) response = _get_resource_over_time( resource_request=_create_active_buses_request(api_key), no_of_requests=no_of_requests, interval_btwn_requests=interval_btwn_requests, keep_partial_if_fail=keep_partial_if_fail) return [d for r in response for d in _format_active_bus_response(r)]
def get_speed_incidents_for_bus(data: pd.DataFrame, speed_limit: int) -> pd.DataFrame: """ Get all speed incidents for a single bus. Args: data: data regarding bus activity speed_limit: maximum speed limit we treat as acceptable (km/hour). Returns: All speed incidents in the format based on _report_incident """ validate_if_contains_columns(data, ['Lon', 'Lat', 'Time', 'Lines', 'Brigade']) validate_data_is_type(speed_limit, int) if len(data['Lines'].unique()) > 1 or len(data['Brigade'].unique()) > 1: raise ValueError( 'Data does not consist of information from single bus/brigade.') data.sort_values(by='Time', ascending=True, inplace=True) data.reset_index(drop=True, inplace=True) report = [] for i in range(len(data) - 1): distance = _calculate_distance_km(lon_x=data.at[i, 'Lon'], lat_x=data.at[i, 'Lat'], lon_y=data.at[i + 1, 'Lon'], lat_y=data.at[i + 1, 'Lat']) time = _calculate_time_difference_hours(data.at[i, 'Time'], data.at[i + 1, 'Time']) if time: speed = _calculate_speed(distance, time) if 150 > speed > speed_limit: report.append(_report_incident(data.iloc[[i, i + 1]], speed)) return pd.DataFrame(report)
def get_bus_stops_ids_by_name(api_key: str, name: str) -> List: """ Get method for list of all bus stops' ids by bus stop name. Args: api_key: API key provided by UMWaw name: bus stop name Returns: list of all bus stops' ids by bus stop name """ validate_multiple_params([api_key, name], lambda x: validate_data_is_type(x, str)) req = _create_request(table_name=TABLE.TIMETABLES, parameters={ PARAMETER.RESOURCE_ID2: RESOURCE_ID.BUS_STOP_BY_NAME, PARAMETER.API_KEY: api_key, PARAMETER.BUS_STOP_NAME: name }) response = _get_resource_from_request(resource_request=req) return _format_bus_stop_id_response(response)
def get_all_lines_on_bus_stop(api_key: str, bus_stop_id: str, bus_stop_nr: str) -> List: """ Get method for list of all bus lines on given bus stop. Args: api_key: API key provided by UMWaw bus_stop_id: bus stop identifier bus_stop_nr: bus stop number (eg. 01, 02, etc.) Returns: list of all bus lines on given bus stop """ validate_multiple_params([api_key, bus_stop_nr, bus_stop_id], lambda x: validate_data_is_type(x, str)) req = _create_request(table_name=TABLE.TIMETABLES, parameters={ PARAMETER.RESOURCE_ID2: RESOURCE_ID.BUSES_ON_STOP, PARAMETER.API_KEY: api_key, PARAMETER.BUS_STOP_ID: bus_stop_id, PARAMETER.BUS_STOP_NR: bus_stop_nr }) response = _get_resource_from_request(resource_request=req) return _format_all_lines_on_stop_response(response)
def get_punctuality_list_for_bus(bus_coordinates: pd.DataFrame, stops_coordinates: pd.DataFrame, api_key: str = None, path: Path = None, proximity: int = 10, time: int = 1, verbosity: bool = False) -> List: """ Generate punctuality record for single bus. Args: bus_coordinates: array of active buses for single bus stops_coordinates: array of bus stops coordinates api_key: UMWaw API key if timetables are processed online path: path to directory containing .csv files if timetables are already downloaded proximity: proximity error regarding closeness between bus and a bus stop (in meters) time: minimum time meaning punctuality incident (in minutes) verbosity: if progress bar of timetables processing should be shown Returns: list with True - punctuality incident, False - bus on time """ validate_multiple_params([bus_coordinates, stops_coordinates], lambda x: validate_data_is_type(x, pd.DataFrame)) validate_multiple_params([proximity, time], lambda x: validate_data_is_type(x, int)) if api_key: validate_data_is_type(api_key, str) if path: validate_data_is_type(path, Path) validate_data_is_type(verbosity, bool) progress_bar = tqdm(total=len(bus_coordinates)) if verbosity else None proximity = _proximity_to_tolerance(proximity) time *= 60 punctuality = [] for brigade in bus_coordinates["Brigade"].unique(): for _, row in get_all_of_brigade(bus_coordinates, brigade).iterrows(): found_bus_stops = stops_coordinates[ (abs(stops_coordinates['Latitude'] - row['Lat']) < proximity) & (abs(stops_coordinates['Longitude'] - row['Lon']) < proximity)] if len(found_bus_stops) > 0: res = found_bus_stops[["ID", "Number"]].to_dict(orient="records")[0] try: res = _process_timetable( bus_stop_id=res['ID'], bus_stop_nr=res['Number'], bus_line=bus_coordinates.at[0, 'Lines'], brigade=brigade, start_time_adjust=bus_coordinates['Time'].min(), api_key=api_key, path=path) time_diff = (res['Time'] - row['Time']).min().total_seconds() punctuality.append(time_diff >= time) except ValueError: continue if progress_bar: progress_bar.update(1) return punctuality
def _get_all_of_value(data: pd.DataFrame, name: str, value: Union[str, int, float]) -> pd.DataFrame: validate_data_is_type(data, pd.DataFrame) validate_if_contains_columns(data, [name]) return data[data[name] == value].reset_index(drop=True)