Пример #1
0
    def records_main(self):
        """
        Return records of the main datasets as a dataframe.

        Raises:
            NotRegisteredMainError: JHUData was not registered
            SubsetNotFoundError: failed in subsetting because of lack of data

        Returns:
            pandas.DataFrame:
                Index
                    reset index
                Columns:
                    - Date (pd.Timestamp): Observation date
                    - Confirmed (int): the number of confirmed cases
                    - Infected (int): the number of currently infected cases
                    - Fatal (int): the number of fatal cases
                    - Recovered (int): the number of recovered cases ( > 0)
                    - Susceptible (int): the number of susceptible cases
        """
        jhu_data = self._data_dict[self.__NAME_JHU]
        # Main datasets should be registered
        if jhu_data is None:
            raise NotRegisteredMainError(".register(jhu_data)")
        # Subsetting
        df, self._complemented = jhu_data.records(
            **self._area_dict,
            start_date=self._first_date,
            end_date=self._last_date,
            population=self._population,
            **self._complement_dict,
        )
        # Columns which are included in the main dataset except for 'Date'
        self._main_cols = list(set(df.columns) - set([self.DATE]))
        return df
Пример #2
0
    def population(self):
        """
        int: population value

        Raises:
            NotRegisteredMainError: no information because JHUData was not registered
        """
        if self._population is None:
            raise NotRegisteredMainError(".register(jhu_data)")
        return self._population
Пример #3
0
    def complemented(self):
        """
        bool or str: whether complemented or not and the details, None when not confirmed

        Raises:
            NotRegisteredMainError: no information because JHUData was not registered
        """
        if self._complemented is None:
            raise NotRegisteredMainError(".register(jhu_data)")
        return self._complemented
Пример #4
0
    def records_extras(self):
        """
        Return records of the extra datasets as a dataframe.

        Raises:
            NotRegisteredMainError: either JHUData or PopulationData was not registered
            NotRegisteredExtraError: no extra datasets were registered

        Returns:
            pandas.DataFrame:
                Index
                    reset index
                Columns:
                    - Date(pd.Timestamp): Observation date
                    - columns defined in the extra datasets
        """
        if None in self._data_dict.values():
            raise NotRegisteredMainError(
                ".register(jhu_data, population_data)")
        if not set(self._data_dict) - set(self.MAIN_DICT):
            raise NotRegisteredExtraError(
                ".register(jhu_data, population_data, extras=[...])",
                message="with extra datasets")
        # Get all subset
        df = pd.DataFrame(columns=[self.DATE])
        for (name, data) in self._data_dict.items():
            if name in self.MAIN_DICT:
                continue
            try:
                subset_df = data.subset(**self._area_dict)
            except TypeError:
                subset_df = data.subset(country=self._area_dict["country"])
            except SubsetNotFoundError:
                continue
            new_cols = (set(subset_df) - set(df.columns)) | set([self.DATE])
            subset_df = subset_df.loc[:, subset_df.columns.isin(new_cols)]
            df = df.merge(subset_df, how="outer", on=self.DATE)
        # Remove columns which is included in the main datasets
        df = df.loc[:, ~df.columns.isin(self._main_cols)]
        # Data cleaning
        df = df.set_index(self.DATE).resample("D").last()
        df = df.fillna(method="ffill").fillna(0)
        # Subsetting by dates
        df = df.loc[pd.to_datetime(self._first_date):pd.
                    to_datetime(self._last_date)]
        # Convert float values to integer if values will not be changed
        for col in df.columns:
            converted2int = df[col].astype(np.int64)
            if np.array_equal(converted2int, df[col]):
                df[col] = converted2int
        return df.reset_index()
Пример #5
0
    def recovery_period(self):
        """
        Return representative value of recovery period of all countries.

        Raises:
            NotRegisteredMainError: JHUData was not registered

        Returns:
            int: recovery period [days]
        """
        jhu_data = self._data_dict[self.__NAME_JHU]
        if jhu_data is None:
            raise NotRegisteredMainError(".register(jhu_data)")
        return jhu_data.recovery_period
Пример #6
0
    def show_complement(self):
        """
        Show the details of complement that was (or will be) performed for the records.

        Raises:
            NotRegisteredMainError: JHUData was not registered

        Returns:
            pandas.DataFrame: as the same as JHUData.show_complement()

        Note:
            Keyword arguments of JHUData,subset_complement() can be specified with DataHandler.switch_complement().
        """
        jhu_data = self._data_dict[self.__NAME_JHU]
        if jhu_data is None:
            raise NotRegisteredMainError(".register(jhu_data)")
        comp_dict = self._complement_dict.copy()
        comp_dict.pop("auto_complement")
        return jhu_data.show_complement(start_date=self._first_date,
                                        end_date=self._last_date,
                                        **self._area_dict,
                                        **comp_dict)