Пример #1
0
    def _colored_map_country(self, country, variable, title, date, **kwargs):
        """
        Create country-specific colored map to show the values at province level.

        Args:
            country (str): country name
            variable (str): variable name to show
            title (str): title of the figure
            date (str or None): date of the records or None (the last value)
            kwargs: arguments of covsirphy.ColoredMap() and covsirphy.ColoredMap.plot()
        """
        df = self._cleaned_df.copy()
        country_alias = self.ensure_country_name(country)
        # Check variable name
        if variable not in df.columns:
            candidates = [col for col in df.columns if col not in self.AREA_ABBR_COLS]
            raise UnExpectedValueError(name="variable", value=variable, candidates=candidates)
        # Select country-specific data
        self._ensure_dataframe(df, name="cleaned dataset", columns=[self.COUNTRY, self.PROVINCE])
        df = df.loc[df[self.COUNTRY] == country_alias]
        df = df.loc[df[self.PROVINCE] != self.UNKNOWN]
        if df.empty:
            raise SubsetNotFoundError(
                country=country, country_alias=country_alias, message="at province level")
        # Select date
        if date is not None:
            self._ensure_dataframe(df, name="cleaned dataset", columns=[self.DATE])
            df = df.loc[df[self.DATE] == pd.to_datetime(date)]
        df = df.groupby(self.PROVINCE).last().reset_index()
        # Plotting
        df[self.COUNTRY] = country_alias
        df.rename(columns={variable: "Value"}, inplace=True)
        self._colored_map(title=title, data=df, level=self.PROVINCE, **kwargs)
Пример #2
0
    def _colored_map_global(self, variable, title, date, **kwargs):
        """
        Create global colored map to show the values at country level.

        Args:
            variable (str): variable name to show
            title (str): title of the figure
            date (str or None): date of the records or None (the last value)
            kwargs: arguments of ColoredMap() and ColoredMap.plot()
        """
        df = self._cleaned_df.copy()
        # Check variable name
        if variable not in df.columns:
            candidates = [col for col in df.columns if col not in self.AREA_ABBR_COLS]
            raise UnExpectedValueError(name="variable", value=variable, candidates=candidates)
        # Remove cruise ships
        df = df.loc[df[self.COUNTRY] != self.OTHERS]
        # Recognize province as a region/country
        if self.PROVINCE in df:
            df[self.ISO3] = df[self.ISO3].cat.add_categories(["GRL"])
            df[self.COUNTRY] = df[self.COUNTRY].cat.add_categories(["Greenland"])
            df.loc[df[self.PROVINCE] == "Greenland", self.AREA_ABBR_COLS] = ["GRL", "Greenland", self.UNKNOWN]
        # Select country level data
        if self.PROVINCE in df.columns:
            df = df.loc[df[self.PROVINCE] == self.UNKNOWN]
        # Select date
        if date is not None:
            self._ensure_dataframe(df, name="cleaned dataset", columns=[self.DATE])
            df = df.loc[df[self.DATE] == pd.to_datetime(date)]
        df[self.COUNTRY] = df[self.COUNTRY].astype(str)
        df = df.groupby(self.COUNTRY).last().reset_index()
        # Plotting
        df.rename(columns={variable: "Value"}, inplace=True)
        self._colored_map(title=title, data=df, level=self.COUNTRY, **kwargs)
Пример #3
0
    def _register_extras(self, extras):
        """
        Verify the extra datasets.

        Args:
            extras (list[covsirphy.CleaningBase]): extra datasets

        Raises:
            TypeError: non-data cleaning instance was included as an extra dataset
            UnExpectedValueError: instance of un-expected data cleaning class was included as an extra dataset
        """
        self._ensure_list(extras, name="extras")
        # Verify the datasets
        for (i, extra_data) in enumerate(extras, start=1):
            statement = f"{self.num2str(i)} extra dataset"
            # Check the data is a data cleaning class
            self._ensure_instance(extra_data, CleaningBase, name=statement)
            # Check the data can be accepted as an extra dataset
            if isinstance(extra_data, tuple(self.EXTRA_DICT.values())):
                continue
            raise UnExpectedValueError(name=statement,
                                       value=type(extra_data),
                                       candidates=list(self.EXTRA_DICT.keys()))
        # Register the datasets
        extra_iter = itertools.product(extras, self.EXTRA_DICT.items())
        for (extra_data, (name, data_class)) in extra_iter:
            if isinstance(extra_data, data_class):
                self._data_dict[name] = extra_data
Пример #4
0
    def _ensure_selectable(self, target, candidates, name="target"):
        """
        Ensure that the target can be selectable.

        Args:
            target (object): target to check
            candidates (list[object]): list of candidates
            name (str): name of the target
        """
        self._ensure_list(candidates, name="candidates")
        if target in candidates:
            return target
        raise UnExpectedValueError(name=name, value=target, candidates=candidates)
Пример #5
0
    def score(self, metric=None, metrics="RMSLE"):
        """
        Calculate score with specified metric.

        Args:
            metric (str or None): ME, MAE, MSE, MSLE, MAPE, RMSE, RMSLE, R2 or None (use @metrics)
            metrics (str): alias of @metric

        Raises:
            UnExpectedValueError: un-expected metric was applied
            ValueError: ME was selected as metric when the targets have multiple columns

        Returns:
            float: score with the metric

        Note:
            ME: maximum residual error
            MAE: mean absolute error
            MSE: mean square error
            MSLE: mean squared logarithmic error
            MAPE: mean absolute percentage error
            RMSE: root mean squared error
            RMSLE: root mean squared logarithmic error
            R2: the coefficient of determination

        Note:
            When @metric is None, @metrics will be used as @metric. Default value is "RMSLE".
        """
        metric = (metric or metrics).upper()
        # Check metric name
        if metric not in self._METRICS_DICT:
            raise UnExpectedValueError("metric",
                                       metric,
                                       candidates=list(
                                           self._METRICS_DICT.keys()))
        # Calculate score
        try:
            return float(self._METRICS_DICT[metric][0](self._true, self._pred))
        except ValueError:
            # Multioutput not supported
            raise ValueError(
                f"When the targets have multiple columns, we cannot select {metric}."
            ) from None
Пример #6
0
    def smaller_is_better(cls, metric=None, metrics="RMSLE"):
        """
        Whether smaller value of the metric is better or not.

        Args:
            metric (str or None): ME, MAE, MSE, MSLE, MAPE, RMSE, RMSLE, R2 or None (use @metrics)
            metrics (str): alias of @metric

        Returns:
            bool: whether smaller value is better or not
        """
        metric = (metric or metrics).upper()
        # Check metric name
        if metric not in cls._METRICS_DICT:
            raise UnExpectedValueError("metric",
                                       metric,
                                       candidates=list(
                                           cls._METRICS_DICT.keys()))
        return cls._METRICS_DICT[metric][1]