示例#1
0
    def history(self, target, y0_dict=None, show_figure=True, filename=None):
        """
        Show the history of variables and parameter values to compare scenarios.

        Args:
            target (str): parameter or variable to show (Rt etc.)
            y0_dict (dict or None): dictionary of initial values or None
                - key (str): variable name
                - value (float): initial value
            show_figure (bool): If True, show the result as a figure
            filename (str): filename of the figure, or None (show figure)
        """
        df = self.track(y0_dict=y0_dict)
        if target not in df.columns:
            col_str = ", ".join(list(df.columns))
            raise KeyError(
                f"@target must be selected from {col_str}, but {target} was applied."
            )
        df = df.pivot_table(values=target,
                            index=self.DATE,
                            columns=self.SERIES,
                            aggfunc="last")
        if show_figure:
            series = self._series_dict["Main"]
            change_dates = [unit.start_date for unit in series][1:]
            title = f"{self.area}: {self.RT_FULL if target == self.RT else target} over time"
            line_plot(df,
                      title,
                      ylabel=target,
                      h=1.0 if target == self.RT else None,
                      v=change_dates,
                      math_scale=False,
                      filename=filename)
        return df
示例#2
0
    def records(self, show_figure=True, filename=None):
        """
        Return the records as a dataframe.

        Args:
            show_figure (bool): if True, show the records as a line-plot.
            filename (str): filename of the figure, or None (show figure)

        Returns:
            (pandas.DataFrame)
                Index:
                    reset index
                Columns:
                    - Date (pd.TimeStamp): Observation date
                    - Confirmed (int): the number of confirmed cases
                    - Infected (int): the number of currently infected cases
                    - Fatal (int): the number of fatal cases
                    - Recovered (int): the number of recovered cases (> 0)

        Notes:
            Records with Recovered > 0 will be selected.
        """
        df = self.jhu_data.subset(country=self.country, province=self.province)
        if not show_figure:
            return df
        line_plot(df.set_index(self.DATE).drop(self.C, axis=1),
                  f"{self.area}: Cases over time",
                  y_integer=True,
                  filename=filename)
        return df
示例#3
0
 def param_history(self, targets=None, box_plot=True, **kwargs):
     """
     Show the ratio to 1st parameters as a figure (bar plot).
     @targets <list[str] or str>: parameters to show (including Rt etc.)
     @box_plot <bool>: if True, box plot. if False, line plot.
     @kwargs: keword arguments of pd.DataFrame.plot or line_plot()
     """
     _ = self.show_parameters()
     targets = self.param_df.columns if targets is None else targets
     targets = [targets] if isinstance(targets, str) else targets
     if "R0" in targets:
         targets = [t.replace("R0", "Rt") for t in targets]
     df = self.param_df.loc[:, targets]
     df.index = self.param_df[["start_date", "end_date"]].apply(
         lambda x: f"{x[0]}-{x[1].replace('-', 'today')}",
         axis=1
     )
     df = df / df.iloc[0]
     if box_plot:
         df.plot.bar(title="Ratio to 1st parameters", **kwargs)
         plt.xticks(rotation=0)
         plt.legend(bbox_to_anchor=(1.02, 0),
                    loc="lower left", borderaxespad=0)
         plt.show()
     else:
         _df = df.reset_index(drop=True)
         _df.index = _df.index + 1
         line_plot(
             _df, title="Ratio to 1st parameters",
             xlabel="Phase", ylabel=str(), math_scale=False,
             **kwargs
         )
示例#4
0
    def param_history(self,
                      targets=None,
                      name="Main",
                      divide_by_first=True,
                      show_figure=True,
                      filename=None,
                      show_box_plot=True,
                      **kwargs):
        """
        Return subset of summary.

        Args:
            targets (list[str] or str): parameters to show (Rt etc.)
            name (str): phase series name
            divide_by_first (bool): if True, divide the values by 1st phase's values
            show_box_plot (bool): if True, box plot. if False, line plot
            show_figure (bool): If True, show the result as a figure
            filename (str): filename of the figure, or None (show figure)
            kwargs: keword arguments of pd.DataFrame.plot or line_plot()

        Returns:
            (pandas.DataFrame)

        Notes:
            If 'Main' was used as @name, main PhaseSeries will be used.
        """
        # Check arguments
        if "box_plot" in kwargs.keys():
            raise KeyError("Please use 'show_box_plot', not 'box_plot'")
        name = self.MAIN if name == "Main" else name
        if name not in self.series_dict.keys():
            raise KeyError(f"@name {name} scenario has not been registered.")
        # Select target to show
        df = self._param_history(targets, name)
        # Divide by the first phase parameters
        if divide_by_first:
            df = df / df.iloc[0, :]
            title = f"{self.area}: Ratio to 1st phase parameters ({name} scenario)"
        else:
            title = f"{self.area}: History of parameter values ({name} scenario)"
        if not show_figure:
            return df
        if show_box_plot:
            h_values = [1.0] if divide_by_first or self.RT in targets else None
            box_plot(df, title, h=h_values, filename=filename)
            return df
        _df = df.reset_index(drop=True)
        _df.index = _df.index + 1
        h = 1.0 if divide_by_first else None
        line_plot(_df,
                  title=title,
                  xlabel="Phase",
                  ylabel=str(),
                  math_scale=False,
                  h=h,
                  filename=filename)
        return df
示例#5
0
    def param_history(self,
                      param,
                      roll_window=None,
                      show_figure=True,
                      filename=None,
                      **kwargs):
        """
        Return subset of summary and show a figure to show the history in each country.

        Args:
            param (str): parameter to show
            roll_window (int or None): rolling average window if necessary
            show_figure (bool): If True, show the result as a figure
            filename (str): filename of the figure, or None (show figure)
            kwargs: keword arguments of pd.DataFrame.plot or line_plot()

        Returns:
            pandas.DataFrame:
                Index: (int) phase number
                Columns: (str) country names
                Values: parameter values
        """
        if self.model is None:
            raise TypeError(
                "PolicyMeasures.estimate(model) must be done in advance.")
        selectable_params = [
            *self.model.PARAMETERS, *self.model.DAY_PARAMETERS, self.RT
        ]
        if param not in selectable_params:
            sel_param_str = ', '.join(selectable_params)
            raise KeyError(
                f"@param must be selected from {sel_param_str}, but {param} was applied."
            )
        # Get the parameter value of each date
        df = self.summary().reset_index()
        df[self.START] = pd.to_datetime(df[self.START],
                                        format=self.DATE_FORMAT)
        df[self.END] = pd.to_datetime(df[self.END], format=self.DATE_FORMAT)
        df[self.DATE] = df[[self.START, self.END]].apply(
            lambda x: pd.date_range(x[0], x[1]).tolist(), axis=1)
        df = df.explode(self.DATE)
        df = df.pivot_table(values=param,
                            index=self.DATE,
                            columns=self.COUNTRY)
        # Rolling mean
        if roll_window is not None:
            roll_window = self.ensure_natural_int(roll_window,
                                                  name="roll_window")
            df = df.rolling(window=roll_window).mean()
        # Show figure
        if not show_figure:
            return df
        line_plot(df,
                  title=f"History of {param} in each country",
                  ylabel=param,
                  h=1 if param == self.RT else None)
        return df
示例#6
0
    def simulate(self,
                 name="Main",
                 y0_dict=None,
                 show_figure=True,
                 filename=None):
        """
        Simulate ODE models with set parameter values and show it as a figure.

        Args:
            name (str): phase series name. If 'Main', main PhaseSeries will be used
            y0_dict (dict):
                - key (str): variable name
                - value (float): initial value
                - dictionary of initial values or None
                - if model will be changed in the later phase, must be specified
            show_figure (bool):
                - if True, show the result as a figure.
            filename (str): filename of the figure, or None (show figure)

        Returns:
            (pandas.DataFrame)
                Index:
                    reset index
                Columns:
                    - Date (str): date, like 31Dec2020
                    - Country (str): country/region name
                    - Province (str): province/prefecture/state name
                    - variables of the models (int): Confirmed (int) etc.
        """
        name = self.MAIN if name == "Main" else name
        df = self.series_dict[name].summary()
        # Future phases must be added in advance
        if self.FUTURE not in df[self.TENSE].unique():
            raise KeyError(
                f"Future phases of {name} scenario must be registered by Scenario.add_phase() in advance."
            )
        # Simulation
        dim_df, start_objects = self._simulate(name=name, y0_dict=y0_dict)
        dim_df = dim_df.set_index(self.DATE).resample("D").mean()
        dim_df = dim_df.astype(np.int64)
        fig_df = dim_df.copy()
        dim_df[self.DATE] = dim_df.index.strftime(self.DATE_FORMAT)
        dim_df = dim_df.reset_index(drop=True)
        dim_df = dim_df.loc[:, [self.DATE, *dim_df.columns.tolist()[:-1]]]
        # Return dataframe if figure is not needed
        if not show_figure:
            return dim_df
        # Show figure
        fig_cols_set = set(fig_df.columns) & set(self.FIG_COLUMNS)
        fig_cols = [col for col in self.FIG_COLUMNS if col in fig_cols_set]
        line_plot(fig_df[fig_cols],
                  title=f"{self.area}: Predicted number of cases",
                  filename=filename,
                  y_integer=True,
                  v=start_objects[1:])
        return dim_df
示例#7
0
 def show_record(self):
     """
     Show the records.
     """
     line_plot(
         self.record_df.drop("Confirmed", axis=1).set_index("Date"),
         f"{self.name}: Cases over time",
         y_integer=True
     )
     return self.record_df
示例#8
0
    def history(self,
                param,
                roll_window=None,
                show_figure=True,
                filename=None,
                **kwargs):
        """
        Return subset of summary and show a figure to show the history of all countries.

        Args:
            param (str): parameter/day parameter/Rt/OxCGRT score to show
            roll_window (int or None): rolling average window if necessary
            show_figure (bool): If True, show the result as a figure
            filename (str): filename of the figure, or None (show figure)
            kwargs: keword arguments of line_plot()

        Returns:
            pandas.DataFrame:
                Index: Date (pd.TimeStamp) date
                Columns: (str) country names
                Values: parameter values
        """
        # Get the parameter value of each date
        df = self.track()
        # Select the param
        if param not in df.columns:
            sel_param_str = ', '.join(df.columns.tolist())
            raise KeyError(
                f"@param must be selected from {sel_param_str}, but {param} was applied."
            )
        df = df.pivot_table(values=param,
                            index=self.DATE,
                            columns=self.COUNTRY,
                            aggfunc="last")
        # Rolling mean
        if roll_window is not None:
            roll_window = self.ensure_natural_int(roll_window,
                                                  name="roll_window")
            df = df.rolling(window=roll_window).mean()
        # Show figure
        if not show_figure:
            return df
        line_plot(df,
                  title=f"History of {param} in each country",
                  ylabel=param,
                  h=1 if param == self.RT else None,
                  filename=filename,
                  **kwargs)
        return df
示例#9
0
    def line_plot(self, df, show_figure=True, filename=None, **kwargs):
        """
        Display or save a line plot of the dataframe.

        Args:
            show_figure (bool): whether show figure when interactive mode or not
            filename (str or None): filename of the figure or None (not save) when script mode

        Note:
            When interactive mode and @show_figure is True, display the figure.
            When script mode and filename is not None, save the figure.
            When using interactive shell, we can change the modes by Scenario.interactive = True/False.
        """
        if self._interactive and show_figure:
            return line_plot(df=df, filename=None, **kwargs)
        if not self._interactive and filename is not None:
            return line_plot(df=df, filename=filename, **kwargs)
示例#10
0
    def simulate(self,
                 name="Main",
                 y0_dict=None,
                 show_figure=True,
                 filename=None):
        """
        Simulate ODE models with set parameter values and show it as a figure.

        Args:
            name (str): phase series name. If 'Main', main PhaseSeries will be used
            y0_dict (dict or None): dictionary of initial values or None
                - key (str): variable name
                - value (float): initial value
            show_figure (bool):
                - if True, show the result as a figure.
            filename (str): filename of the figure, or None (show figure)

        Returns:
            (pandas.DataFrame)
                Index:
                    reset index
                Columns:
                    - Date (pd.TimeStamp): Observation date
                    - Country (str): country/region name
                    - Province (str): province/prefecture/state name
                    - Variables of the model and dataset (int): Confirmed etc.
        """
        series = self._ensure_name(name)
        # Simulation
        sim_df = series.simulate(record_df=self.record_df, y0_dict=y0_dict)
        if not show_figure:
            return sim_df
        # Show figure
        df = sim_df.set_index(self.DATE)
        fig_cols_set = set(df.columns) & set(self.FIG_COLUMNS)
        fig_cols = [col for col in self.FIG_COLUMNS if col in fig_cols_set]
        change_dates = [unit.start_date for unit in series][1:]
        line_plot(
            df[fig_cols],
            title=f"{self.area}: Predicted number of cases ({name} scenario)",
            filename=filename,
            y_integer=True,
            v=change_dates)
        return sim_df
示例#11
0
 def restore_graph(self, drop_cols=None, min_infected=1, **kwargs):
     """
     Show the dimentional simulate data as a figure.
     @drop_cols <list[str]>: the columns not to be shown
     @min_infected <int>: if Infected < min_infected, the records will not be used
     @kwargs: keyword arguments of line_plot() function
     """
     df = self.restore_df(min_infected=min_infected)
     if drop_cols is not None:
         df = df.drop(drop_cols, axis=1)
     today = datetime.now().replace(hour=0,
                                    minute=0,
                                    second=0,
                                    microsecond=0)
     axvlines = [today, *self.axvlines] if len(
         self.axvlines) == 1 else self.axvlines[:]
     line_plot(df,
               title=f"{self.name}: {', '.join(self.title_list)}",
               v=axvlines[:-1],
               h=self.total_population,
               **kwargs)
示例#12
0
 def predict_graph(self, step_n, name=None, excluded_cols=None):
     """
     Predict the values in the future and create a figure.
     @step_n <int>: the number of steps
     @name <str>: name of the area
     @excluded_cols <list[str]>: the excluded columns in the figure
     """
     if self.name is not None:
         name = self.name
     else:
         name = str() if name is None else name
     df = self.predict_df(step_n=step_n)
     if excluded_cols is not None:
         df = df.drop(excluded_cols, axis=1)
     r0 = self.param_dict["R0"]
     title = f"Prediction in {name} with {self.model.NAME} model: R0 = {r0}"
     today = datetime.now().replace(hour=0,
                                    minute=0,
                                    second=0,
                                    microsecond=0)
     line_plot(df, title, v=today, h=self.total_population)
示例#13
0
    def history_rate(self,
                     params=None,
                     name="Main",
                     show_figure=True,
                     filename=None):
        """
        Show change rates of parameter values in one figure.
        We can find the parameters which increased/decreased significantly.

        Args:
            params (list[str] or None): parameters to show
            name (str): phase series name
            show_figure (bool): If True, show the result as a figure
            filename (str): filename of the figure, or None (show figure)
        """
        df = self._track_param(name=name)
        model = self._series_dict[name].unit("last").model
        cols = list(set(df.columns) & set(model.PARAMETERS))
        if params is not None:
            if not isinstance(params, (list, set)):
                raise TypeError(
                    f"@params must be a list of parameters, but {params} were applied."
                )
            cols = list(set(cols) & set(params)) or cols
        df = df.loc[:, cols] / df.loc[df.index[0], cols]
        if show_figure:
            series = self._series_dict[name]
            change_dates = [unit.start_date for unit in series][1:]
            f_date = df.index[0].strftime(self.DATE_FORMAT)
            title = f"{self.area}: {model.NAME} parameter change rates over time (1.0 on {f_date})"
            ylabel = f"Value per that on {f_date}"
            line_plot(df,
                      title,
                      ylabel=ylabel,
                      v=change_dates,
                      math_scale=False,
                      filename=filename)
        return df
示例#14
0
 def compare_estimated_numbers(self, phases=None):
     """
     Compare the number of confimred cases estimated
      with the parameters and show graph.
     @variable <str>: variable to compare
     @phases <list[str]>: phase to show (if None, all)
     """
     phases = list(self.phase_dict.keys()) if phases is None else phases
     # Observed
     df = pd.DataFrame(self.record_df.set_index("Date")["Confirmed"])
     # Estimated
     for (num, estimator) in self.estimator_dict.items():
         model, info_dict, param_dict = estimator.info()
         diff = (datetime.today() - info_dict["start_time"]).total_seconds()
         day_n = int(diff / 60 / 60 / 24 + 1)
         predicter = Predicter(**info_dict)
         predicter.add(model, end_day_n=day_n, **param_dict)
         # Calculate the number of confirmed cases
         new_df = predicter.restore_df().drop(
             "Susceptible", axis=1
         ).sum(axis=1)
         new_df = new_df.resample("D").last()
         df = pd.concat([df, new_df], axis=1)
     # Show graph
     df = df.fillna(0).astype(np.int64)
     df.columns = ["Observed"] + \
         [f"{phase}_param" for phase in self.phase_dict.keys()]
     df = df.loc[
         self.phase_dict["1st"]["start_date"]: self.record_df["Date"].max(), :]
     for col in df.columns[1:]:
         if col[:col.find("_")] not in phases:
             continue
         line_plot(
             df.replace(0, np.nan)[["Observed", col]],
             f"Confirmed cases over time: Actual and predicted with {col}",
             y_integer=True
         )
示例#15
0
    def positive_rate(self,
                      country,
                      province=None,
                      window=7,
                      show_figure=True,
                      filename=None):
        """
        Return the PCR rate of a country as a dataframe.

        Args:
            country(str): country name or ISO3 code
            province(str or None): province name
            window (int): window of moving average, >= 1
            show_figure (bool): if True, show the records as a line-plot.
            filename (str): filename of the figure, or None (display figure)

        Raises:
            covsirphy.PCRIncorrectPreconditionError: the dataset has too many missing values

        Returns:
            pandas.DataFrame
                Index
                    reset index
                Columns
                    - Date (pandas.TimeStamp): Observation date
                    - Tests (int): the number of total tests performed
                    - Confirmed (int): the number of confirmed cases
                    - Tests_diff (int): daily tests performed
                    - Confirmed_diff (int): daily confirmed cases
                    - Test_positive_rate (float): positive rate (%) of the daily cases over the total daily tests performed

        Note:
            If non monotonic records were found for either confirmed cases or tests,
            "with partially complemented tests data" will be added to the title of the figure.
        """
        window = self._ensure_natural_int(window, name="window")
        # Subset with area
        country_alias = self.ensure_country_name(country)
        province = province or self.UNKNOWN
        try:
            subset_df = self._subset_select(country_alias, province)
        except PCRIncorrectPreconditionError:
            raise PCRIncorrectPreconditionError(
                country=country,
                province=province,
                message="Too many missing Tests records") from None
        # Process PCR data
        df, is_complemented = self._pcr_processing(subset_df, window)
        # Calculate PCR values
        df[self.PCR_RATE] = df[[self.C_DIFF, self.T_DIFF
                                ]].apply(lambda x: x[0] / x[1] * 100
                                         if x[1] > self.min_pcr_tests else 0,
                                         axis=1)
        if not show_figure:
            return df
        # Create figure
        area = self.area_name(country, province=province)
        comp_status = "\nwith partially complemented tests data" if is_complemented else ""
        line_plot(
            df.set_index(self.DATE)[self.PCR_RATE],
            title=f"{area}: Test positive rate (%) over time {comp_status}",
            ylabel="Test positive rate (%)",
            y_integer=True,
            filename=filename,
            show_legend=False,
        )
        return df