示例#1
0
def get_all_of_time(data: pd.DataFrame,
                    start: Union[str, pd.Timestamp],
                    end: Union[str, pd.Timestamp],
                    time_name: str = 'Time') -> pd.DataFrame:
    """
    Restrict data to dates between start and end.
    Args:
        data: data containing time column for conditioning
        start: start time
        end: end time
        time_name: name of column containing time

    Returns:
        restricted dataset.
    """
    if not (isinstance(start, pd.Timestamp) or isinstance(end, pd.Timestamp)):
        validate_multiple_params([start, end],
                                 lambda x: validate_data_is_type(x, str))
        validate_matches_time_format(start)
        validate_matches_time_format(end)
        start, end = pd.Timestamp(start), pd.Timestamp(end)

    validate_data_is_type(data, pd.DataFrame)
    validate_data_is_time_column(data[time_name])

    return data[(start <= data[time_name])
                & (data[time_name] <= end)].reset_index(drop=True)
示例#2
0
def get_all_incidents(data: pd.DataFrame, speed_limit: int) -> pd.DataFrame:
    """
    Get all speed incidents for all buses.
    Args:
        data: data regarding all buses activity
        speed_limit: maximum speed limit we treat as acceptable (km/hour).

    Returns:
        All speed incidents in the format based on _report_incident
    """
    validate_if_contains_columns(data,
                                 ['Lines', 'Brigade', 'Lon', 'Lat', 'Time'])
    validate_data_is_type(speed_limit, int)
    data.sort_values(by='Time', ascending=True, inplace=True)
    data.reset_index(drop=True, inplace=True)
    report = pd.DataFrame(columns=['Lines', 'Speed', 'Lat', 'Lon', 'Time'])

    for line in data['Lines'].unique():
        per_line = get_all_of_line(data, line)
        for brigade in per_line['Brigade'].unique():
            per_brigade = get_all_of_brigade(per_line, brigade)
            incidents = get_speed_incidents_for_bus(per_brigade, speed_limit)
            if len(incidents) > 0:
                incidents['Lines'] = line
                report = report.append(incidents)

    return report.reset_index(drop=True)
示例#3
0
def remove_duplicates(data: pd.DataFrame) -> pd.DataFrame:
    """
    Remove duplicates from data and reset index.
    Args:
        data: any data frame

    Returns:
        restricted dataset.
    """
    validate_data_is_type(data, pd.DataFrame)
    return data.drop_duplicates().reset_index(drop=True)
示例#4
0
def test_validate_multiple_params():
    """Test for bwaw.utils.validation.validate_multiple_params"""
    with pytest.raises(TypeError):
        validate_multiple_params(['a', 5],
                                 lambda x: validate_data_is_type(x, str))
        validate_multiple_params([5., 3.],
                                 lambda x: validate_data_is_type(x, int))

    validate_multiple_params([1, 2], lambda x: validate_data_is_type(x, int))
    validate_multiple_params([2., 3.],
                             lambda x: validate_data_is_type(x, float))
示例#5
0
def convert_response_list_to_dataframe(response_list: List) -> pd.DataFrame:
    """
    Converts response list to pandas data frame.
    Args:
        response_list: list of values or dicts

    Returns:
        data in the format of pandas data frame
    """
    validate_data_is_type(response_list, list)
    return pd.DataFrame(response_list)
示例#6
0
def get_active_buses(api_key: str) -> List:
    """
    Get method for list of all currently active buses.
    Args:
        api_key: API key provided by UMWaw

    Returns:
        list of metadata of all currently active buses
    """
    validate_data_is_type(api_key, str)
    response = _get_resource_from_request(
        resource_request=_create_active_buses_request(api_key))
    return _format_active_bus_response(response)
示例#7
0
def test_validate_data_is_type():
    """Test for bwaw.utils.validation.validate_data_is_type"""
    with pytest.raises(TypeError):
        validate_data_is_type(5, str)
        validate_data_is_type('a', int)

    validate_data_is_type(5, int)
    validate_data_is_type(5., float)
示例#8
0
def convert_dataframe_to_response_list(data: pd.DataFrame) -> List:
    """
    Converts pandas data frame to response list.
    Args:
        data: data in the format of pandas data frame

    Returns:
        list of values or dicts
    """
    validate_data_is_type(data, pd.DataFrame)
    if len(data.columns) == 1:
        return list(data[data.columns[0]])
    if len(data.columns) > 1:
        return [dict(row) for _, row in data.iterrows()]
    raise ValueError('Empty data frame.')
示例#9
0
def get_timetable_for_line_on_bus_stop(api_key: str, bus_stop_id: str,
                                       bus_stop_nr: str, line: str) -> List:
    """
    Get method for list of line timetable on bus stop.
    Args:
        api_key: API key provided by UMWaw
        bus_stop_id: bus stop identifier
        bus_stop_nr: bus stop number (eg. 01, 02, etc.)
        line: bus line number

    Returns:
        list of line timetable on bus stop.
    """
    validate_multiple_params([api_key, bus_stop_id, bus_stop_nr, line],
                             lambda x: validate_data_is_type(x, str))

    req = _create_request(table_name=TABLE.TIMETABLES,
                          parameters={
                              PARAMETER.RESOURCE_ID2:
                              RESOURCE_ID.TIMETABLE_FOR_LINE,
                              PARAMETER.API_KEY: api_key,
                              PARAMETER.BUS_STOP_ID: bus_stop_id,
                              PARAMETER.BUS_STOP_NR: bus_stop_nr,
                              PARAMETER.LINE_NR: line
                          })
    response = _get_resource_from_request(resource_request=req)
    return _format_timetable_on_stop_response(response)
示例#10
0
def _adjust_date(column: pd.Series, start_from: pd.Timestamp):
    validate_data_is_type(column, pd.Series)
    validate_data_is_time_column(column)

    pre_start = column[column.dt.time < start_from.time()]
    post_start = column[column.dt.time >= start_from.time()]

    post_start = pd.to_datetime(
        post_start.apply(lambda x: f'{start_from.date()} {x.time()}'))
    day_after = start_from.date() + pd.Timedelta(days=1)
    pre_start = pd.to_datetime(
        pre_start.apply(lambda x: f'{day_after} {x.time()}'))

    output = pd.concat((pre_start, post_start))
    assert output.shape == column.shape

    return output
示例#11
0
def _create_active_buses_request(api_key: str) -> request.Request:
    """
    Creates a request for list of active buses.
    Args:
        api_key: API key provided by UMWaw

    Returns:
        request for list of active buses
    """
    validate_data_is_type(api_key, str)
    return _create_request(table_name=TABLE.BUSES,
                           parameters={
                               PARAMETER.RESOURCE_ID1:
                               RESOURCE_ID.BUSES_ACTIVE,
                               PARAMETER.API_KEY: api_key,
                               PARAMETER.TYPE: CONSTANTS.ACTIVE_BUS_STATIC_TYPE
                           })
示例#12
0
def get_bus_stops_coordinates(api_key: str) -> List:
    """
    Get method for list of all bus stops' coordinates.
    Args:
        api_key: API key provided by UMWaw

    Returns:
        list of all bus stops' coordinates
    """
    validate_data_is_type(api_key, str)
    req = _create_request(table_name=TABLE.STOPS,
                          parameters={
                              PARAMETER.RESOURCE_ID2:
                              RESOURCE_ID.BUS_STOP_COORDINATE,
                              PARAMETER.API_KEY: api_key
                          })
    response = _get_resource_from_request(resource_request=req)
    return _format_all_coordinates_response(response)
示例#13
0
def column_str_to_datetime(column: pd.Series,
                           time_only: bool = False) -> pd.Series:
    """
    Convert string column containing time to datetime.
    Args:
        column: given column
        time_only: if only time is provided in string

    Returns:
        formatted column
    """
    validate_data_is_type(column, pd.Series)
    for string in column:
        validate_matches_time_format(string)

    if time_only:
        return pd.to_datetime(column, format='%H:%M:%S')

    return pd.to_datetime(column)
示例#14
0
def get_active_buses_over_time(api_key: str,
                               no_of_requests: int = 1,
                               interval_btwn_requests: int = 1,
                               keep_partial_if_fail: bool = True) -> List:
    """
    Get method for list of all currently active buses requested over some period.
    Args:
        api_key: API key provided by UMWaw
        no_of_requests: number of calls to UMWaw
        interval_btwn_requests: time [minutes] between calls to UMWaw
        keep_partial_if_fail: if partial results should be stored if call fails

    Returns:
        list of metadata of all currently active buses aggregated from whole period
    """
    validate_data_is_type(api_key, str)
    response = _get_resource_over_time(
        resource_request=_create_active_buses_request(api_key),
        no_of_requests=no_of_requests,
        interval_btwn_requests=interval_btwn_requests,
        keep_partial_if_fail=keep_partial_if_fail)
    return [d for r in response for d in _format_active_bus_response(r)]
示例#15
0
def get_speed_incidents_for_bus(data: pd.DataFrame,
                                speed_limit: int) -> pd.DataFrame:
    """
    Get all speed incidents for a single bus.
    Args:
        data: data regarding bus activity
        speed_limit: maximum speed limit we treat as acceptable (km/hour).

    Returns:
        All speed incidents in the format based on _report_incident
    """
    validate_if_contains_columns(data,
                                 ['Lon', 'Lat', 'Time', 'Lines', 'Brigade'])
    validate_data_is_type(speed_limit, int)
    if len(data['Lines'].unique()) > 1 or len(data['Brigade'].unique()) > 1:
        raise ValueError(
            'Data does not consist of information from single bus/brigade.')

    data.sort_values(by='Time', ascending=True, inplace=True)
    data.reset_index(drop=True, inplace=True)
    report = []

    for i in range(len(data) - 1):
        distance = _calculate_distance_km(lon_x=data.at[i, 'Lon'],
                                          lat_x=data.at[i, 'Lat'],
                                          lon_y=data.at[i + 1, 'Lon'],
                                          lat_y=data.at[i + 1, 'Lat'])
        time = _calculate_time_difference_hours(data.at[i, 'Time'],
                                                data.at[i + 1, 'Time'])
        if time:
            speed = _calculate_speed(distance, time)

            if 150 > speed > speed_limit:
                report.append(_report_incident(data.iloc[[i, i + 1]], speed))

    return pd.DataFrame(report)
示例#16
0
def get_bus_stops_ids_by_name(api_key: str, name: str) -> List:
    """
    Get method for list of all bus stops' ids by bus stop name.
    Args:
        api_key: API key provided by UMWaw
        name: bus stop name

    Returns:
        list of all bus stops' ids by bus stop name
    """
    validate_multiple_params([api_key, name],
                             lambda x: validate_data_is_type(x, str))
    req = _create_request(table_name=TABLE.TIMETABLES,
                          parameters={
                              PARAMETER.RESOURCE_ID2:
                              RESOURCE_ID.BUS_STOP_BY_NAME,
                              PARAMETER.API_KEY: api_key,
                              PARAMETER.BUS_STOP_NAME: name
                          })
    response = _get_resource_from_request(resource_request=req)
    return _format_bus_stop_id_response(response)
示例#17
0
def get_all_lines_on_bus_stop(api_key: str, bus_stop_id: str,
                              bus_stop_nr: str) -> List:
    """
    Get method for list of all bus lines on given bus stop.
    Args:
        api_key: API key provided by UMWaw
        bus_stop_id: bus stop identifier
        bus_stop_nr: bus stop number (eg. 01, 02, etc.)

    Returns:
        list of all bus lines on given bus stop
    """
    validate_multiple_params([api_key, bus_stop_nr, bus_stop_id],
                             lambda x: validate_data_is_type(x, str))
    req = _create_request(table_name=TABLE.TIMETABLES,
                          parameters={
                              PARAMETER.RESOURCE_ID2:
                              RESOURCE_ID.BUSES_ON_STOP,
                              PARAMETER.API_KEY: api_key,
                              PARAMETER.BUS_STOP_ID: bus_stop_id,
                              PARAMETER.BUS_STOP_NR: bus_stop_nr
                          })
    response = _get_resource_from_request(resource_request=req)
    return _format_all_lines_on_stop_response(response)
示例#18
0
def get_punctuality_list_for_bus(bus_coordinates: pd.DataFrame,
                                 stops_coordinates: pd.DataFrame,
                                 api_key: str = None,
                                 path: Path = None,
                                 proximity: int = 10,
                                 time: int = 1,
                                 verbosity: bool = False) -> List:
    """
    Generate punctuality record for single bus.
    Args:
        bus_coordinates: array of active buses for single bus
        stops_coordinates: array of bus stops coordinates
        api_key: UMWaw API key if timetables are processed online
        path: path to directory containing .csv files if timetables are already downloaded
        proximity: proximity error regarding closeness between bus and a bus stop (in meters)
        time: minimum time meaning punctuality incident (in minutes)
        verbosity: if progress bar of timetables processing should be shown

    Returns:
        list with True - punctuality incident, False - bus on time
    """
    validate_multiple_params([bus_coordinates, stops_coordinates],
                             lambda x: validate_data_is_type(x, pd.DataFrame))
    validate_multiple_params([proximity, time],
                             lambda x: validate_data_is_type(x, int))
    if api_key:
        validate_data_is_type(api_key, str)
    if path:
        validate_data_is_type(path, Path)
    validate_data_is_type(verbosity, bool)

    progress_bar = tqdm(total=len(bus_coordinates)) if verbosity else None
    proximity = _proximity_to_tolerance(proximity)
    time *= 60
    punctuality = []
    for brigade in bus_coordinates["Brigade"].unique():
        for _, row in get_all_of_brigade(bus_coordinates, brigade).iterrows():
            found_bus_stops = stops_coordinates[
                (abs(stops_coordinates['Latitude'] - row['Lat']) < proximity)
                &
                (abs(stops_coordinates['Longitude'] - row['Lon']) < proximity)]

            if len(found_bus_stops) > 0:
                res = found_bus_stops[["ID",
                                       "Number"]].to_dict(orient="records")[0]
                try:
                    res = _process_timetable(
                        bus_stop_id=res['ID'],
                        bus_stop_nr=res['Number'],
                        bus_line=bus_coordinates.at[0, 'Lines'],
                        brigade=brigade,
                        start_time_adjust=bus_coordinates['Time'].min(),
                        api_key=api_key,
                        path=path)
                    time_diff = (res['Time'] -
                                 row['Time']).min().total_seconds()
                    punctuality.append(time_diff >= time)
                except ValueError:
                    continue

            if progress_bar:
                progress_bar.update(1)

    return punctuality
示例#19
0
def _get_all_of_value(data: pd.DataFrame, name: str,
                      value: Union[str, int, float]) -> pd.DataFrame:
    validate_data_is_type(data, pd.DataFrame)
    validate_if_contains_columns(data, [name])
    return data[data[name] == value].reset_index(drop=True)