def _scrape(self, refresh=False, **kwargs):
        _logger.debug('Get case totals data')
        totals_json = get_json(self.CASE_DATA_URL)
        assert totals_json, 'Error finding total cases and deaths'

        most_recent_totals = totals_json[0]
        # dict.get sets value to None if key not availale
        report_date = datetime.strptime(most_recent_totals.get('date'),
                                        '%Y-%m-%dT%H:%M:%S.%f').date()
        total_cases = most_recent_totals.get('confirmedcases')
        total_deaths = most_recent_totals.get('confirmeddeaths')

        assert total_cases, 'Error finding total cases'
        assert total_deaths, 'Error finding total deaths'

        # convert from string to int
        total_cases = int(total_cases)
        total_deaths = int(total_deaths)

        _logger.debug('Get race data')
        race_json = get_json(self.RACE_DATA_URL)
        assert race_json, 'Error getting race cases and deaths json'

        most_recent_nh_black_data = pydash.find(
            race_json, lambda data: data['hisp_race'] == 'NH Black')
        assert most_recent_nh_black_data, 'Error finding total NH Black entry'
        aa_cases = most_recent_nh_black_data.get('case_tot')
        aa_deaths = most_recent_nh_black_data.get('deaths')

        assert aa_cases, 'Error finding total NH Black cases'
        assert aa_deaths, 'Error finding total NH Black deaths'

        # convert from string to int
        aa_cases = int(aa_cases)
        aa_deaths = int(aa_deaths)

        pct_aa_cases = to_percentage(aa_cases, total_cases)
        pct_aa_deaths = to_percentage(aa_deaths, total_deaths)

        return [
            self._make_series(date=report_date,
                              cases=total_cases,
                              deaths=total_deaths,
                              aa_cases=aa_cases,
                              aa_deaths=aa_deaths,
                              pct_aa_cases=pct_aa_cases,
                              pct_aa_deaths=pct_aa_deaths,
                              pct_includes_unknown_race=True,
                              pct_includes_hispanic_black=False)
        ]
    def _scrape(self, **kwargs):
        _logger.debug('Find the update date')
        metadata = get_json(self.METADATA_URL)
        date = datetime.datetime.fromisoformat(
            metadata['result']['last_modified']).date()
        _logger.info(f'Processing data for {date}')

        _logger.debug('Read in the file')
        df_in = pd.read_excel(self.DATA_URL,
                              sheet_name='Race').set_index('RACE')

        total_cases = df_in['COVID_COUNT'].sum()
        total_deaths = df_in['COVID_DEATHS'].sum()
        aa_cases = df_in.loc['Black or African American', 'COVID_COUNT']
        aa_deaths = df_in.loc['Black or African American', 'COVID_DEATHS']
        aa_cases_pct = df_in.loc['Black or African American',
                                 'COVID_COUNT_PCT']
        aa_deaths_pct = df_in.loc['Black or African American',
                                  'COVID_DEATHS_PCT']

        return [
            self._make_series(
                date=date,
                cases=total_cases,
                deaths=total_deaths,
                aa_cases=aa_cases,
                aa_deaths=aa_deaths,
                pct_aa_cases=aa_cases_pct,
                pct_aa_deaths=aa_deaths_pct,
                pct_includes_unknown_race=True,
                pct_includes_hispanic_black=True,
            )
        ]
    def _scrape(self, **kwargs):
        json = get_json(self.DATA_URL)

        state_info = pydash.get(json, 'state_testing_results.values.-1')
        demographics_data = pydash.get(json, 'demographics.race')
        aa_data = aa_data = pydash.find(
            demographics_data, lambda data: data['description'] == 'Black')

        date = datetime.strptime(state_info['testDate'], '%m/%d/%Y').date()
        cases = state_info.get('confirmed_cases')
        deaths = state_info.get('deaths')
        aa_cases = aa_data.get('count')
        aa_deaths = aa_data.get('deaths')

        assert cases, 'Could not find number of confirmed cases'
        assert deaths, 'Could not find number of deaths'
        assert aa_cases, 'Could not find number of AA cases'
        assert aa_deaths, 'Could not find number of AA deaths'

        pct_aa_cases = to_percentage(aa_cases, cases)
        pct_aa_deaths = to_percentage(aa_deaths, deaths)

        return [
            self._make_series(date=date,
                              cases=cases,
                              deaths=deaths,
                              aa_cases=aa_cases,
                              aa_deaths=aa_deaths,
                              pct_aa_cases=pct_aa_cases,
                              pct_aa_deaths=pct_aa_deaths,
                              pct_includes_unknown_race=True,
                              pct_includes_hispanic_black=False)
        ]
示例#4
0
    def _get_group_names(url, group):
        """Since requesting a variable group returns many variables, it is not
        convenient to pass in column names to use for each of them.
        Instead, we can use the group definition (retrieved via the
        Census API discovery interface) to construct descriptive (if
        verbose) column names for each variable.

        Arguments:
          url: the API dataset endpoint to query.
          group: a variable group ID.

        Returns a dict mapping variable IDs to descriptive column
        names.

        """
        resp = get_json(url + f'/groups/{group}.json', force_cache=True)
        for val in resp['variables'].values():
            if val.get('concept'):
                concept = val['concept']
        return {
            key: f'{concept}_{value["label"]}'
            for key, value in resp['variables'].items()
        }
示例#5
0
    def _scrape(self, **kwargs):
        content = get_content(self.UTILS_URL).decode('utf-8')
        match = re.search(r'serviceUrl(.|\n)*?"prod": "(.*?)"', content, re.M)
        if not match:
            raise ValueError(f'Unable to find service URL in {self.UTILS_URL}')
        data_url = self.DATA_URL_TEMPLATE.format(service=match.group(2))
        data = get_json(data_url)
        # sample data: {
        #   "status": "ok",
        #   "message": "",
        #   "data": {
        #     "cvDataId": 154,
        #     "created": 1592520572106,
        #     "updated": 1592520572106,
        #     "archived": null,
        #     "cases": 10153,
        #     "tests": 275897,
        #     "totalHospitalizations": 1726,
        #     "currentHospitalizations": 157,
        #     "deaths": 456,
        #     "recovered": 4439,
        #     "male": 5045,
        #     "female": 4979,
        #     "genderNR": 97,
        #     "0-9": 444,
        #     "10-19": 882,
        #     "20-29": 1672,
        #     "30-39": 1835,
        #     "40-49": 1577,
        #     "50-59": 1482,
        #     "60-69": 1145,
        #     "70-79": 546,
        #     "80-89": 371,
        #     "90+": 167,
        #     "ageNR": 0,
        #     "amInd": 5419,
        #     "asian": 65,
        #     "black": 203,
        #     "hawaiian": 18,
        #     "unknown": 454,
        #     "other": 55,
        #     "white": 1157,
        #     "hispanic": 2750
        #   }
        # }
        if data['status'] != 'ok':
            raise ValueError(
                f'/GetPublicStatewideData failed with status: {data["status"]}')
        date = datetime.date.fromtimestamp(data['data']['updated'] / 1000)
        _logger.info(f'Processing data for {date}')
        # Get totals data
        total_cases = data['data']['cases']
        total_deaths = data['data']['deaths']

        # Get AA case data
        aa_cases = data['data']['black']
        aa_cases_pct = to_percentage(aa_cases, total_cases)

        # No AA death data
        # TODO: find AA death data
        aa_deaths_pct = float('nan')
        aa_deaths = float('nan')

        return [self._make_series(
            date=date,
            cases=total_cases,
            deaths=total_deaths,
            aa_cases=aa_cases,
            aa_deaths=aa_deaths,
            pct_aa_cases=aa_cases_pct,
            pct_aa_deaths=aa_deaths_pct,
            pct_includes_unknown_race=True,
            pct_includes_hispanic_black=False,
        )]