Пример #1
0
    def delete_all(self):
        """
        Delete all phases.

        Returns:
            covsirphy.PhaseSeries
        """
        arg_dict = {
            "first_date": self._series.first_date,
            "last_date": self._series.last_date,
            "population": self._series.unit("last").population
        }
        self._series = PhaseSeries(**arg_dict)
        return self._series
Пример #2
0
 def __init__(self, jhu_data, population_data, country, province=None):
     # Population
     population_data = self.validate_instance(population_data,
                                              PopulationData,
                                              name="population_data")
     self.population = population_data.value(country, province=province)
     # Records
     self.jhu_data = self.validate_instance(jhu_data,
                                            JHUData,
                                            name="jhu_data")
     # Area name
     self.country = country
     self.province = province or self.UNKNOWN
     self.area = JHUData.area_name(country, province)
     # First/last date of the area
     df = jhu_data.subset(country=country, province=self.province)
     self.first_date = df[self.DATE].min().strftime(self.DATE_FORMAT)
     self.last_date = df[self.DATE].max().strftime(self.DATE_FORMAT)
     # Init
     self.tau = None
     # {model_name: model_class}
     self.model_dict = dict()
     # {scenario_name: PhaseSeries}
     self.series_dict = dict()
     self.series_dict[self.MAIN] = PhaseSeries(self.first_date,
                                               self.last_date,
                                               self.population)
     # {scenario: {phase: Estimator}}
     self.estimator_dict = dict()
Пример #3
0
    def _create_phases(self):
        """
        Create a dictionary of phases.

        Returns:
            (covsirphy.PhaseSeries)
        """
        start_dates, end_dates = self._phase_range(self._change_dates)
        pop_list = [self.pop_dict[date] for date in start_dates]
        phase_series = PhaseSeries(
            self.dates[0], self.dates[-1], self.population, use_0th=self._use_0th
        )
        phase_itr = enumerate(zip(start_dates, end_dates, pop_list))
        for (i, (start_date, end_date, population)) in phase_itr:
            phase_series.add(
                start_date=start_date,
                end_date=end_date,
                population=population
            )
        return phase_series
Пример #4
0
 def _init_phase_series(self):
     """
     Initialize dictionary of phase series.
     """
     self._series_dict = {
         self.MAIN:
         PhaseSeries(self._first_date, self._last_date, self.population)
     }
     self.record_df = self.jhu_data.subset(country=self.country,
                                           province=self.province,
                                           start_date=self._first_date,
                                           end_date=self._last_date,
                                           population=self.population)
Пример #5
0
    def _create_phases(self):
        """
        Create a dictionary of phases.

        Returns:
            (covsirphy.PhaseSeries)
        """
        start_dates, end_dates = self._phase_range(self.change_dates)
        pop_list = [self.pop_dict[date] for date in start_dates]
        phases = [self.num2str(num) for num in range(len(start_dates))]
        phase_series = PhaseSeries(
            self.dates[0], self.dates[-1], self.population
        )
        phase_itr = enumerate(zip(start_dates, end_dates, pop_list, phases))
        for (i, (start_date, end_date, population, phase)) in phase_itr:
            if i == 0:
                continue
            phase_series.add(
                start_date=start_date,
                end_date=end_date,
                population=population
            )
        return phase_series
Пример #6
0
    def create_series(first_date, last_date, population):
        """
        Create PhaseSeries instance.

        Args:
            first_date (str): the first date of the records
            last_date (str): the last date of the records
            population (int): population value

        Returns:
            covsirphy.PhaseSeries
        """
        return PhaseSeries(first_date=first_date,
                           last_date=last_date,
                           population=population)
Пример #7
0
class ParamTracker(Term):
    """
    Split records with S-R trend analysis and estimate parameter values of the phases.

    Args:
        record_df (pandas.DataFrame): records

            Index
                reset index
            Columns
                - Date (pandas.TimeStamp): Observation date
                - Confirmed (int): the number of confirmed cases
                - Infected (int): the number of currently infected cases
                - Fatal (int): the number of fatal cases
                - Recovered (int): the number of recovered cases
                - Susceptible (int): the number of susceptible cases
        phase_series (covsirphy.PhaseSeries): phase series object with first/last dates and population
        area (str or None): area name, like Japan/Tokyo, or empty string
        tau (int or None): tau value [min]
    """
    @deprecate("", new="ODEHandler", version="2.19.1-zeta-fu1")
    def __init__(self, record_df, phase_series, area=None, tau=None):
        # Phase series
        self._series = self._ensure_instance(phase_series,
                                             PhaseSeries,
                                             name="phase_series")
        # Records
        self._ensure_dataframe(record_df,
                               name="record_df",
                               columns=self.SUB_COLUMNS)
        df = record_df.loc[
            record_df[self.DATE] >= self._ensure_date(phase_series.first_date)]
        self.record_df = df.loc[
            df[self.DATE] <= self._ensure_date(phase_series.last_date)]
        # Area name
        self.area = area or ""
        # Tau value
        self.tau = self._ensure_tau(tau)

    def __len__(self):
        return len(self._series)

    @staticmethod
    def create_series(first_date, last_date, population):
        """
        Create PhaseSeries instance.

        Args:
            first_date (str): the first date of the records
            last_date (str): the last date of the records
            population (int): population value

        Returns:
            covsirphy.PhaseSeries
        """
        return PhaseSeries(first_date=first_date,
                           last_date=last_date,
                           population=population)

    @property
    def series(self):
        """
        covsirphy.PhaseSeries: phase series object (series of covsirphy.PhaseUnit)
        """
        return self._series

    @property
    def last_model(self):
        """
        covsirphy.ModelBase: ODE model if the last phase
        """
        return self._series.unit(phase="last").model

    def trend(self, force=True, show_figure=False, **kwargs):
        """
        Split the records with trend analysis.

        Args:
            force (bool): if True, change points will be over-written
            show_figure (bool): if True, show the result as a figure
            kwargs: keyword arguments of covsirphy.TrendDetector(), .TrendDetector.sr() and .trend_plot()

        Returns:
            covsirphy.PhaseSeries
        """
        detector = TrendDetector(data=self.record_df,
                                 area=self.area,
                                 **find_args(TrendDetector, **kwargs))
        # Perform S-R trend analysis
        detector.sr(**find_args(TrendDetector.sr, **kwargs))
        # Register phases
        if force or not self._series:
            self._series.clear(include_past=True)
            _, end_dates = detector.dates()
            [self._series.add(end_date=end_date) for end_date in end_dates]
        # Show S-R plane
        if show_figure:
            detector.show(**find_args(trend_plot, **kwargs))
        return self._series

    def _ensure_phase_setting(self):
        """
        Ensure that phases were set.
        """
        if not self._series:
            raise UnExecutedError(".trend() or .add()")

    def find_phase(self, date):
        """
        Find the name of the phase which has the date.

        Args:
            date (str): date, like 01Jan2020

        Returns:
            tuple(str, covsirphy.PhaseUnit):
                str: phase name, like 1st, 2nd,...
                covsirphy.PhaseUnit: phase unit
        """
        self._ensure_phase_setting()
        self._ensure_date(date)
        phase_nest = [(self.num2str(i), unit)
                      for (i, unit) in enumerate(self._series) if date in unit]
        try:
            return phase_nest[0]
        except IndexError:
            raise IndexError(f"Phase on {date} is not registered.") from None

    def change_dates(self):
        """
        Return the list of changed dates (start dates of phases since 1st phase).

        Returns:
            list[str]: list of change dates
        """
        return [unit.start_date for unit in self._series][1:]

    def near_change_dates(self):
        """
        Show the list of dates which are yesterday/tomorrow of the start/end dates.

        Returns:
            list[str]: list of dates
        """
        base_dates = [
            date for ph in self._series
            for date in [ph.start_date, ph.end_date]
        ]
        return [
            date for base_date in base_dates for date in
            [self.yesterday(base_date), base_date,
             self.tomorrow(base_date)]
        ]

    def all_phases(self):
        """
        Return the names of all enabled phases.

        Returns:
            list[str]: the names of all enabled phases
        """
        return [
            self.num2str(num) for (num, unit) in enumerate(self._series)
            if unit
        ]

    def disable(self, phases):
        """
        The phases will be disabled.

        Args:
            phase (list[str] or None): phase names or None (all enabled phases)

        Returns:
            covsirphy.PhaseSeries
        """
        phases = self._ensure_list(phases or self.all_phases(),
                                   candidates=None,
                                   name="phases")
        for phase in phases:
            self._series.disable(phase)
        return self._series

    def enable(self, phases):
        """
        The phases will be enabled.

        Args:
            phase (list[str] or None): phase names or None (all disabled phases)

        Returns:
            covsirphy.PhaseSeries
        """
        all_dis_phases = [
            self.num2str(num) for (num, unit) in enumerate(self._series)
            if not unit
        ]
        phases = self._ensure_list(phases or all_dis_phases,
                                   candidates=None,
                                   name="phases")
        for phase in phases:
            self._series.enable(phase)
        return self._series

    def add(self,
            end_date=None,
            days=None,
            population=None,
            model=None,
            **kwargs):
        """
        Add a new phase.
        The start date will be the next date of the last registered phase.

        Args:
            end_date (str): end date of the new phase
            days (int): the number of days to add
            population (int or None): population value of the start date
            model (covsirphy.ModelBase or None): ODE model
            kwargs: keyword arguments of ODE model parameters, not including tau value

        Returns:
            covsirphy.PhaseSeries

        Note:
            - If the phases series has not been registered, new phase series will be created.
            - Either @end_date or @days must be specified.
            - If @end_date and @days are None, the end date will be the last date of the records.
            - If both of @end_date and @days were specified, @end_date will be used.
            - If @popultion is None, initial value will be used.
            - If @model is None, the model of the last phase will be used.
            - Tau will be fixed as the last phase's value.
            - kwargs: Default values are the parameter values of the last phase.
        """
        if end_date is not None:
            self._ensure_date(end_date, name="end_date")
        try:
            self._series.add(end_date=end_date,
                             days=days,
                             population=population,
                             model=model,
                             tau=self.tau,
                             **kwargs)
        except ValueError:
            last_date = self._series.unit("last").end_date
            raise ValueError(
                f'@end_date must be over {last_date}. However, {end_date} was applied.'
            ) from None
        return self._series

    def delete_all(self):
        """
        Delete all phases.

        Returns:
            covsirphy.PhaseSeries
        """
        arg_dict = {
            "first_date": self._series.first_date,
            "last_date": self._series.last_date,
            "population": self._series.unit("last").population
        }
        self._series = PhaseSeries(**arg_dict)
        return self._series

    def delete(self, phases):
        """
        Delete selected phases.

        Args:
            phases (list[str]): phases names to delete

        Returns:
            covsirphy.PhaseSeries
        """
        all_phases = self.all_phases()
        if "last" in set(phases):
            phases = [ph for ph in phases if ph != "last"] + [all_phases[-1]]
        self._ensure_list(phases, candidates=all_phases, name="phases")
        phases = sorted(list(set(phases)), key=self.str2num, reverse=True)
        for ph in phases:
            self._series.delete(ph)
        return self._series

    def combine(self, phases, population=None, **kwargs):
        """
        Combine the sequential phases as one phase.
        New phase name will be automatically determined.

        Args:
            phases (list[str]): list of phases
            population (int): population value of the start date
            kwargs: keyword arguments to save as phase information

        Raises:
            TypeError: @phases is not a list

        Returns:
            covsirphy.Scenario: self
        """
        all_phases = self.all_phases()
        if "last" in set(phases):
            phases.remove("last")
            phases = sorted(phases, key=self.str2num, reverse=False)
            last_phase = "last"
        else:
            phases = sorted(phases, key=self.str2num, reverse=False)
            last_phase = phases[-1]
        self._ensure_list(phases, candidates=all_phases, name="phases")
        # Setting of the new phase
        start_date = self._series.unit(phases[0]).start_date
        end_date = self._series.unit(last_phase).end_date
        population = population or self._series.unit(last_phase).population
        new_unit = PhaseUnit(start_date, end_date, population)
        new_unit.set_ode(**kwargs)
        # Phases to keep
        kept_units = [
            unit for unit in self.series
            if unit < start_date or unit > end_date
        ]
        # Replace units
        self._series.replaces(phase=None,
                              new_list=kept_units + [new_unit],
                              keep_old=False)
        return self._series

    def separate(self, date, population=None, **kwargs):
        """
        Create a new phase with the change point.
        New phase name will be automatically determined.

        Args:
            date (str): change point, i.e. start date of the new phase
            population (int): population value of the change point
            kwargs: keyword arguments of PhaseUnit.set_ode() if update is necessary

        Returns:
            covsirphy.PhaseSeries
        """
        phase, old = self.find_phase(date)
        if date in self.near_change_dates():
            raise ValueError(
                f"Cannot be separated on {date} because this date is too close to registered change dates."
            )
        new_pre = PhaseUnit(old.start_date, self.yesterday(date),
                            old.population)
        setting_dict = old.to_dict()
        setting_dict.update(kwargs)
        new_pre.set_ode(**setting_dict)
        new_fol = PhaseUnit(date, old.end_date, population or old.population)
        new_fol.set_ode(model=old.model, **setting_dict)
        self._series.replaces(phase, [new_pre, new_fol])
        return self._series

    def past_phases(self, phases=None):
        """
        Return names and phase units of the past phases.

        Args:
            phases (tuple/list[str]): list of phase names, like 1st, 2nd...

        Returns:
            tuple(list[str], list[covsirphy.PhaseUnit]):
                list[str]: list of phase names
                list[covsirphy.PhaseUnit]: list of phase units

        Note:
            If @phases is None, return the all past phases.
            If @phases is not None, intersection will be selected.
        """
        self._ensure_phase_setting()
        # List of past phases
        last_date = self.record_df[self.DATE].max().strftime(self.DATE_FORMAT)
        past_nest = [[self.num2str(num), unit]
                     for (num, unit) in enumerate(self._series)
                     if unit and unit <= last_date]
        past_phases, _ = zip(*past_nest)
        # Select phases to use
        selected_phases = self._ensure_list(phases or past_phases,
                                            candidates=past_phases,
                                            name="phases")
        final_phases = [ph for ph in past_phases if ph in set(selected_phases)]
        # Convert phase names to phase units
        return (final_phases, [self._series.unit(ph) for ph in final_phases])

    def future_phases(self):
        """
        Return names and phase units of the future phases.

        Returns:
            tuple(list[str], list[covsirphy.PhaseUnit]):
                list[str]: list of phase names
                list[covsirphy.PhaseUnit]: list of phase units
        """
        self._ensure_phase_setting()
        # All phases
        all_nest = [[self.num2str(num), unit]
                    for (num, unit) in enumerate(self._series) if unit]
        # Past phases
        past_phases, _ = self.past_phases(phases=None)
        # Future phases
        future_nest = [[ph, unit] for (ph, unit) in all_nest
                       if ph not in past_phases]
        if not future_nest:
            return ([], [])
        return tuple(zip(*future_nest))

    def estimate(self, model, phases=None, n_jobs=-1, **kwargs):
        """
        Perform parameter estimation for each phases.

        Args:
            model (covsirphy.ModelBase): ODE model
            phases (list[str]): list of phase names, like 1st, 2nd...
            n_jobs (int): the number of parallel jobs or -1 (CPU count)
            kwargs: keyword arguments of model parameters and covsirphy.Estimator.run()

        Returns:
            tuple(int, covsirphy.PhaseSeries): tau value [min] and phase series

        Note:
            - If @phases is None, all past phase will be used.
            - Phases with estimated parameter values will be ignored.
            - In kwargs, tau value cannot be included.
        """
        self._ensure_phase_setting()
        model = self._ensure_subclass(model, ModelBase, "model")
        units = [
            unit.set_id(phase=phase)
            for (phase, unit) in zip(*self.past_phases(phases=phases))
            if unit.id_dict is None
        ]
        if not units:
            raise IndexError("All phases have completed parameter estimation.")
        # Parameter estimation
        mp_estimator = MPEstimator(record_df=self.record_df,
                                   model=model,
                                   tau=self.tau,
                                   **kwargs)
        mp_estimator.add(units)
        results = mp_estimator.run(n_jobs=n_jobs, **kwargs)
        self.tau = mp_estimator.tau
        # Register the results
        self._series.replaces(phase=None, new_list=results, keep_old=True)
        return (self.tau, self._series)

    def simulate(self, y0_dict=None):
        """
        Simulate ODE models with set/estimated parameter values.

        Args:
            y0_dict(dict[str, float] or None): dictionary of initial values of variables

        Returns:
            pandas.DataFrame
                Index
                    reset index
                Columns
                    - Date (pd.Timestamp): Observation date
                    - Country (str): country/region name
                    - Province (str): province/prefecture/state name
                    - Variables of the model and dataset (int): Confirmed etc.
        """
        self._ensure_phase_setting()
        try:
            return self._series.simulate(record_df=self.record_df,
                                         y0_dict=y0_dict)
        except NameError:
            raise UnExecutedError(".estimate()")

    def _compare_with_actual(self, variables, y0_dict=None):
        """
        Compare actual/simulated number of cases.

        Args:
            variables (list[str]): variables to use in calculation
            y0_dict(dict[str, float] or None): dictionary of initial values of variables

        Returns:
            tuple(pandas.DataFrame, pandas.DataFrame):
                - actual (pandas.DataFrame):
                    Index Date (pd.Timestamp)
                    Columns variables defined by @variables
                - simulated (pandas.DataFrame):
                    Index Date (pd.Timestamp)
                    Columns variables defined by @variables
        """
        record_df = self.record_df.copy().set_index(self.DATE)
        simulated_df = self.simulate(y0_dict=y0_dict).set_index(self.DATE)
        df = record_df.join(simulated_df, how="inner", rsuffix="_sim").dropna()
        rec_df = df.loc[:, variables]
        sim_df = df.loc[:, [f"{col}_sim" for col in variables]]
        sim_df.columns = variables
        return (rec_df, sim_df)

    def score(self, variables=None, phases=None, y0_dict=None, **kwargs):
        """
        Evaluate accuracy of phase setting and parameter estimation of selected enabled phases.

        Args:
            variables (list[str] or None): variables to use in calculation
            phases (list[str] or None): phases to use in calculation
            y0_dict(dict[str, float] or None): dictionary of initial values of variables
            kwargs: keyword arguments of covsirphy.Evaluator.score()

        Returns:
            float: score with the specified metrics

        Note:
            If @variables is None, ["Infected", "Fatal", "Recovered"] will be used.
            "Confirmed", "Infected", "Fatal" and "Recovered" can be used in @variables.
            If @phases is None, all phases will be used.
        """
        # Arguments
        variables = variables or [self.CI, self.F, self.R]
        variables = self._ensure_list(variables,
                                      self.VALUE_COLUMNS,
                                      name="variables")
        # Disable the non-target phases
        all_phases, _ = self.past_phases(phases=None)
        target_phases, _ = self.past_phases(phases=phases)
        ignored_phases = list(set(all_phases) - set(target_phases))
        if ignored_phases:
            self.disable(ignored_phases)
        # Get the number of cases
        rec_df, sim_df = self._compare_with_actual(variables=variables,
                                                   y0_dict=y0_dict)
        # Calculate score
        evaluator = Evaluator(rec_df, sim_df)
        score = evaluator.score(**find_args(Evaluator.score, **kwargs))
        # Enable the disabled non-target phases
        if ignored_phases:
            self.enable(ignored_phases)
        return score

    def last_end_date(self):
        """
        Return the last end date of the series.

        Returns:
            str: the last end date
        """
        return self._series.unit(phase="last").end_date