Exemplo n.º 1
0
    def _curve_fitting(self, phase, start_date, end_date):
        """
        Perform curve fitting for the phase.

        Args:
            phase (str): phase name
            start_date (str): start date of the phase
            end_date (str): end date of the phase

        Returns:
            tuple
                (pandas.DataFrame): Result of curve fitting
                    Index: reset index
                    Columns:
                        - (phase name)_predicted: predicted value of Susceptible
                        - (phase_name)_actual: actual value of Susceptible
                        - (phase_name)_Recovered: Recovered
                (int): minimum value of R, which is the change point of the curve
        """
        sr_df = self.sr_df.copy()
        sta = self.date_obj(start_date)
        end = self.date_obj(end_date)
        sr_df = sr_df.loc[(sr_df.index >= sta) & (sr_df.index <= end), :]
        trend = Trend(sr_df)
        df = trend.run()
        if trend.rmsle() > self.max_rmsle:
            df[f"{self.S}{self.P}"] = None
        # Get min value for vline
        r_value = int(df[self.R].min())
        # Rename the columns
        phase = self.INITIAL if phase == "0th" else phase
        df = df.rename({f"{self.S}{self.P}": f"{phase}{self.P}"}, axis=1)
        df = df.rename({f"{self.S}{self.A}": f"{phase}{self.A}"}, axis=1)
        df = df.rename({f"{self.R}": f"{phase}_{self.R}"}, axis=1)
        return (df, r_value)
Exemplo n.º 2
0
    def show(self, area, change_dates=None, **kwargs):
        """
        show the S-R trend in a figure.

        Args:
            area (str): area name
            change_dates (list[str] or None): list of change points
            kwargs: keyword arguments of covsirphy.trend_plot()

        Note:
            @change_dates must be specified if ChangeFinder.run() was not done.
        """
        # Curve fitting
        start_dates, end_dates = self.date_range(change_dates)
        nested = [
            self._curve_fitting(self.num2str(num), start_date, end_date)
            for (num, (start_date, end_date))
            in enumerate(zip(start_dates, end_dates))
        ]
        df_list, vlines = zip(*nested)
        comp_df = pd.concat([self.sr_df, *df_list], axis=1)
        comp_df = comp_df.rename({self.S: f"{self.S}{self.A}"}, axis=1)
        comp_df = comp_df.apply(
            lambda x: pd.to_numeric(x, errors="coerce", downcast="integer"), axis=0)
        # Show figure
        pred_cols = [col for col in comp_df.columns if col.endswith(self.P)]
        if len(pred_cols) == 1:
            title = f"{area}: S-R trend without change points"
        else:
            _list = self._change_dates[:]
            strings = [", ".join(_list[i: i + 6]) for i in range(0, len(_list), 6)]
            change_str = ",\n".join(strings)
            title = f"{area}: S-R trend changed on\n{change_str}"
        Trend.show_with_many(
            result_df=comp_df, predicted_cols=pred_cols, title=title, v=vlines[1:], **kwargs)
Exemplo n.º 3
0
    def show(self, show_figure=True, filename=None):
        """
        show the result as a figure and return a dictionary of phases.

        Args:
        @show_figure (bool): if True, show the result as a figure.
        @filename (str): filename of the figure, or None (display figure)

        Returns:
            (covsirphy.PhaseSeries)
        """
        # Create phase dictionary
        phase_series = self._create_phases()
        phase_dict = phase_series.to_dict()
        # Curve fitting
        nested = [
            self._curve_fitting(phase, info)
            for (phase, info) in phase_dict.items()
        ]
        df_list, vlines = zip(*nested)
        comp_df = pd.concat(df_list[1:], axis=1)
        comp_df[self.R] = comp_df.fillna(0).loc[
            :, comp_df.columns.str.endswith(self.R)
        ].sum(axis=1)
        comp_df[f"{self.S}{self.A}"] = comp_df.fillna(0).loc[
            :, comp_df.columns.str.endswith(self.A)
        ].sum(axis=1)
        comp_df = comp_df.apply(
            lambda x: pd.to_numeric(x, errors="coerce", downcast="integer"),
            axis=0
        )
        # Show figure
        if not show_figure:
            return phase_series
        pred_cols = comp_df.loc[
            :, comp_df.columns.str.endswith(self.P)
        ].columns.tolist()
        if len(pred_cols) == 1:
            title = f"{self.area}: S-R trend without change points"
        else:
            _list = self.change_dates[:]
            strings = [
                ", ".join(_list[i: i + 6]) for i in range(0, len(_list), 6)
            ]
            change_str = ",\n".join(strings)
            title = f"{self.area}: S-R trend changed on\n{change_str}"
        Trend.show_with_many(
            result_df=comp_df,
            predicted_cols=pred_cols,
            title=title,
            vlines=vlines[2:],
            filename=filename
        )
        return phase_series
Exemplo n.º 4
0
 def error_f(self, start_dates, end_dates):
     """
     Definition of error score to minimize in the study.
     This is weighted average of RMSLE scores.
     @start_dates <list[str]>: list of start date of phases (candidates)
     @end_dates <list[str]>: list of end date of phases (candidates)
     @return <float> : score of the error function to minimize
     """
     scores = list()
     for (start_date, end_date) in zip(start_dates, end_dates):
         population = self.pop_dict[start_date]
         trend = Trend(self.clean_df,
                       population,
                       self.country,
                       province=self.province,
                       start_date=start_date,
                       end_date=end_date)
         trend.analyse()
         scores.append(trend.rmsle())
     return np.average(scores, weights=range(1, len(scores) + 1))
Exemplo n.º 5
0
    def _curve_fitting(self, phase, info):
        """
        Perform curve fitting for the phase.

        Args:
            phase (str): phase name
            info (dict[str]): start date, end date and population

        Returns:
            (tuple)
                (pandas.DataFrame): Result of curve fitting
                    Index: reset index
                    Columns:
                        - (phase name)_predicted: predicted value of Susceptible
                        - (phase_name)_actual: actual value of Susceptible
                        - (phase_name)_Recovered: Recovered
                (int): minimum value of R, which is the change point of the curve
        """
        start_date = info[self.START]
        end_date = info[self.END]
        population = info[self.N]
        trend = Trend(
            self.jhu_data, population, self.country, province=self.province,
            start_date=start_date, end_date=end_date
        )
        trend.analyse()
        df = trend.result()
        if trend.rmsle() > self.max_rmsle:
            df[f"{self.S}{self.P}"] = None
        # Get min value for vline
        r_value = int(df[self.R].min())
        # Rename the columns
        phase = self.INITIAL if phase == "0th" else phase
        df = df.rename({f"{self.S}{self.P}": f"{phase}{self.P}"}, axis=1)
        df = df.rename({f"{self.S}{self.A}": f"{phase}{self.A}"}, axis=1)
        df = df.rename({f"{self.R}": f"{phase}_{self.R}"}, axis=1)
        return (df, r_value)
Exemplo n.º 6
0
    def show(self, show_figure=True, filename=None):
        """
        show the result as a figure and return a dictionary of phases.

        Args:
        @show_figure <bool>:
            - if True, show the result as a figure.
        @filename <str>: filename of the figure, or None (show figure)

        Returns:
            <covsirphy.PhaseSeries>
        """
        # Create phase dictionary
        phase_series = self._create_phases()
        phase_dict = phase_series.to_dict()
        # Curve fitting
        df_list = list()
        vlines = list()
        for (phase, info) in phase_dict.items():
            start_date = info[self.START]
            end_date = info[self.END]
            population = info[self.N]
            trend = Trend(self.clean_df,
                          population,
                          self.country,
                          province=self.province,
                          start_date=start_date,
                          end_date=end_date)
            trend.analyse()
            df = trend.result()
            # Get min value for vline
            vlines.append(df[self.R].min())
            # Rename the columns
            phase = phase.replace("0th", self.INITIAL)
            df = df.rename({f"{self.S}{self.P}": f"{phase}{self.P}"}, axis=1)
            df = df.rename({f"{self.S}{self.A}": f"{phase}{self.A}"}, axis=1)
            df = df.rename({f"{self.R}": f"{phase}_{self.R}"}, axis=1)
            df_list.append(df)
        if self.n_points == 0:
            comp_df = pd.concat(df_list, axis=1)
        else:
            comp_df = pd.concat(df_list[1:], axis=1)
        comp_df[self.R] = comp_df.fillna(
            0).loc[:, comp_df.columns.str.endswith(self.R)].sum(axis=1)
        comp_df[f"{self.S}{self.A}"] = comp_df.fillna(
            0).loc[:, comp_df.columns.str.endswith(self.A)].sum(axis=1)
        comp_df = comp_df.apply(
            lambda x: pd.to_numeric(x, errors="coerce", downcast="integer"),
            axis=0)
        # Show figure
        if not show_figure:
            return phase_series
        pred_cols = comp_df.loc[:, comp_df.columns.str.
                                endswith(self.P)].columns.tolist()
        if len(pred_cols) == 1:
            title = f"{self.area}: S-R trend without change points"
        else:
            change_str = ", ".join(self.change_dates)
            title = f"{self.area}: S-R trend changed on {change_str}"
        Trend.show_with_many(result_df=comp_df,
                             predicted_cols=pred_cols,
                             title=title,
                             vlines=vlines[2:],
                             filename=filename)
        return phase_series