Exemplo n.º 1
0
    def filter_by_bbox(self, left: float, bottom: float, right: float,
                       top: float) -> StationsResult:
        """
        Method to filter stations_result by bounding box.

        :param bottom: bottom latitude as float
        :param left: left longitude as float
        :param top: top latitude as float
        :param right: right longitude as float
        :return: df with stations_result in bounding box
        """
        left, bottom, right, top = float(left), float(bottom), float(
            right), float(top)

        if left >= right:
            raise ValueError("bbox left border should be smaller then right")

        if bottom >= top:
            raise ValueError("bbox bottom border should be smaller then top")

        lat_interval = pd.Interval(bottom, top, closed="both")
        lon_interval = pd.Interval(left, right, closed="both")

        df = self.all().df

        df = df.loc[
            df[Columns.LATITUDE.value].apply(lambda x: x in lat_interval)
            & df[Columns.LONGITUDE.value].apply(lambda x: x in lon_interval
                                                ), :, ]

        return StationsResult(stations=self, df=df.reset_index(drop=True))
Exemplo n.º 2
0
    def all(self) -> StationsResult:
        """
        Wraps the _all method and applies date filters.

        :return: pandas.DataFrame with the information of different available stations
        """
        df = self._all()

        df = df.reindex(columns=self._base_columns)

        df = self._coerce_meta_fields(df)

        # TODO: exchange with more foreceful filtering if user wants
        # if self.start_date:
        #     df = df[
        #         df[Columns.FROM_DATE.value] <= self.start_date
        #     ]
        #
        # if self.end_date:
        #     df = df[
        #         df[Columns.TO_DATE.value] >= self.end_date
        #     ]

        result = StationsResult(self, df.copy().reset_index(drop=True))

        return result
Exemplo n.º 3
0
def test_format_geojson():
    """Test export of DataFrame to geojson"""
    output = StationsResult(df=df_station, stations=None).to_geojson()

    response = json.loads(output)

    station_names = {station["properties"]["name"] for station in response["features"]}

    assert "Freyung vorm Wald" in station_names
Exemplo n.º 4
0
    def filter_by_rank(
        self,
        latitude: float,
        longitude: float,
        rank: int,
    ) -> StationsResult:
        """
        Wrapper for get_nearby_stations_by_number using the given parameter set. Returns
        nearest stations defined by number.

        :param latitude: latitude in degrees
        :param longitude: longitude in degrees
        :param rank: number of stations to be returned, greater 0
        :return: pandas.DataFrame with station information for the selected stations
        """
        rank = int(rank)

        if rank <= 0:
            raise ValueError("'num_stations_nearby' has to be at least 1.")

        coords = Coordinates(np.array(latitude), np.array(longitude))

        df = self.all().df.reset_index(drop=True)

        distances, indices_nearest_neighbours = derive_nearest_neighbours(
            df[Columns.LATITUDE.value].values,
            df[Columns.LONGITUDE.value].values,
            coords,
            rank,
        )

        distances = pd.Series(distances)
        indices_nearest_neighbours = pd.Series(indices_nearest_neighbours)

        # If num_stations_nearby is higher then the actual amount of stations
        # further indices and distances are added which have to be filtered out
        distances = distances[: min(df.shape[0], rank)]
        indices_nearest_neighbours = indices_nearest_neighbours[
            : min(df.shape[0], rank)
        ]

        distances_km = np.array(distances * EARTH_RADIUS_KM)

        df = df.iloc[indices_nearest_neighbours, :].reset_index(drop=True)

        df[Columns.DISTANCE.value] = distances_km

        if df.empty:
            log.warning(
                f"No weather stations were found for coordinate "
                f"{latitude}°N and {longitude}°E and number {rank}"
            )

        result = StationsResult(self, df.reset_index(drop=True))

        return result
Exemplo n.º 5
0
def test_format_geojson():

    output = StationsResult(df=df_station, stations=None).to_geojson()

    response = json.loads(output)

    station_names = [
        station["properties"]["name"] for station in response["features"]
    ]

    assert "Freyung vorm Wald" in station_names
Exemplo n.º 6
0
    def all(self) -> StationsResult:
        """
        Wraps the _all method and applies date filters.

        :return: pandas.DataFrame with the information of different available stations_result
        """
        df = self._all().copy().reset_index(drop=True)

        df = df.reindex(columns=self._base_columns)

        df = self._coerce_meta_fields(df)

        return StationsResult(self, df.reset_index(drop=True))
Exemplo n.º 7
0
    def filter_by_station_id(self, station_id: Tuple[str,
                                                     ...]) -> StationsResult:
        """
        Method to filter stations_result by station ids

        :param station_id: list of stations_result that are requested
        :return: df with filtered stations_result
        """
        df = self.all().df

        station_id = self._parse_station_id(pd.Series(station_id))

        log.info(f"Filtering for station_id={list(station_id)}")

        df = df[df[Columns.STATION_ID.value].isin(station_id)]

        return StationsResult(self, df)
Exemplo n.º 8
0
    def filter_by_rank(
        self,
        latitude: float,
        longitude: float,
        rank: int,
    ) -> StationsResult:
        """
        Wrapper for get_nearby_stations_by_number using the given parameter set. Returns
        nearest stations_result defined by number.

        :param latitude: latitude in degrees
        :param longitude: longitude in degrees
        :param rank: number of stations_result to be returned, greater 0
        :return: pandas.DataFrame with station information for the selected stations_result
        """
        rank = int(rank)

        if rank <= 0:
            raise ValueError("'num_stations_nearby' has to be at least 1.")

        coords = Coordinates(np.array(latitude), np.array(longitude))

        df = self.all().df.reset_index(drop=True)

        distances, indices_nearest_neighbours = derive_nearest_neighbours(
            df[Columns.LATITUDE.value].values,
            df[Columns.LONGITUDE.value].values,
            coords,
            min(rank, df.shape[0]),
        )

        df = df.iloc[indices_nearest_neighbours.flatten(), :].reset_index(
            drop=True)

        df[Columns.DISTANCE.value] = pd.Series(distances.flatten() *
                                               EARTH_RADIUS_KM,
                                               dtype=float)

        if df.empty:
            log.warning(
                f"No weather stations_result were found for coordinate "
                f"{latitude}°N and {longitude}°E and number {rank}")

        return StationsResult(self, df.reset_index(drop=True))
Exemplo n.º 9
0
    def filter_by_name(
        self, name: str, first: bool = True, threshold: int = 90
    ) -> StationsResult:
        """
        Method to filter stations for station name using string comparison.

        :param name: name of looked up station
        :param first: boolean if only first station is returned
        :param threshold: threshold for string match 0...100
        :return: df with matched station
        """
        if first:
            extract_fun = process.extractOne
        else:
            extract_fun = process.extract

        threshold = int(threshold)

        if threshold < 0:
            raise ValueError("threshold must be ge 0")

        df = self.all().df

        station_match = extract_fun(
            query=name,
            choices=df[Columns.NAME.value],
            scorer=fuzz.token_set_ratio,
            score_cutoff=threshold,
        )

        if station_match:
            if first:
                station_match = [station_match]
            station_name = pd.Series(station_match).apply(lambda x: x[0])

            df = df[df[Columns.NAME.value].isin(station_name)]

            df = df.reset_index(drop=True)
        else:
            df = pd.DataFrame().reindex(columns=df.columns)

        result = StationsResult(stations=self, df=df)

        return result
Exemplo n.º 10
0
    def filter_by_sql(self, sql: str) -> StationsResult:
        """

        :param sql:
        :return:
        """
        import duckdb

        df = self.all().df

        df: pd.DataFrame = duckdb.query_df(df, "data", sql).df()

        df.loc[:,
               Columns.FROM_DATE.value] = df.loc[:, Columns.FROM_DATE.
                                                 value].dt.tz_localize(self.tz)
        df.loc[:,
               Columns.TO_DATE.value] = df.loc[:, Columns.TO_DATE.
                                               value].dt.tz_localize(self.tz)

        return StationsResult(stations=self, df=df.reset_index(drop=True))
Exemplo n.º 11
0
    def filter_by_distance(
        self, latitude: float, longitude: float, distance: float, unit: str = "km"
    ) -> StationsResult:
        """
        Wrapper for get_nearby_stations_by_distance using the given parameter set.
        Returns nearest stations defined by distance (km).

        :param latitude: latitude in degrees
        :param longitude: longitude in degrees
        :param distance: distance (km) for which stations will be selected
        :param unit: unit string for conversion
        :return: pandas.DataFrame with station information for the selected stations
        """
        distance = float(distance)

        # Theoretically a distance of 0 km is possible
        if distance < 0:
            raise ValueError("'distance' has to be at least 0.0")

        unit = unit.strip()

        distance_in_km = guess(distance, unit, [Distance]).km

        # TODO: replace the repeating call to self.all()
        all_nearby_stations = self.filter_by_rank(
            latitude, longitude, self.all().df.shape[0]
        ).df

        df = all_nearby_stations[
            all_nearby_stations[Columns.DISTANCE.value] <= distance_in_km
        ]

        if df.empty:
            log.warning(
                f"No weather stations were found for coordinate "
                f"{latitude}°N and {longitude}°E and distance {distance_in_km}km"
            )

        result = StationsResult(stations=self, df=df.reset_index(drop=True))

        return result