def _scrape(self, **kwargs):
        date, cases = query_geoservice(**self.CASES)
        total_cases = cases.iloc[0, 0]

        _, cases_by_race = query_geoservice(**self.CASES_BY_RACE)
        cases_by_race = cases_by_race.set_index('Race')
        known_cases = total_cases - cases_by_race.loc['Not Reported',
                                                      'value']
        aa_cases = cases_by_race.loc['African American/Black', 'value']
        pct_aa_cases = to_percentage(aa_cases, known_cases)

        _, deaths = query_geoservice(**self.DEATHS)
        total_deaths = deaths.iloc[0, 0]

        _, deaths_by_race = query_geoservice(**self.DEATHS_BY_RACE)
        deaths_by_race = deaths_by_race.set_index('Race')
        known_deaths = deaths_by_race.drop('Not Reported',
                                           errors='ignore').sum()['value']
        aa_deaths = deaths_by_race.loc['African American', 'value']
        pct_aa_deaths = to_percentage(aa_deaths, known_deaths)

        return [self._make_series(
            date=date,
            cases=total_cases,
            deaths=total_deaths,
            aa_cases=aa_cases,
            aa_deaths=aa_deaths,
            pct_aa_cases=pct_aa_cases,
            pct_aa_deaths=pct_aa_deaths,
            pct_includes_unknown_race=False,
            pct_includes_hispanic_black=True,
            known_race_cases=known_cases,
            known_race_deaths=known_deaths,
        )]
示例#2
0
    def _scrape(self, **kwargs):
        date, demog = query_geoservice(**self.DEMOG)
        _logger.info(f'Processing data for {date}')

        total_cases = demog.loc[0, 'positives']
        known_cases = total_cases - demog.loc[0, 'unk_race']
        aa_cases = demog.loc[0, 'black']
        pct_aa_cases = to_percentage(aa_cases, known_cases)

        total_deaths = demog.loc[0, 'deaths']
        known_deaths = total_deaths - demog.loc[0, 'd_unk_race']
        aa_deaths = demog.loc[0, 'd_black']
        pct_aa_deaths = to_percentage(aa_deaths, known_deaths)

        return [
            self._make_series(
                date=date,
                cases=total_cases,
                deaths=total_deaths,
                aa_cases=aa_cases,
                aa_deaths=aa_deaths,
                pct_aa_cases=pct_aa_cases,
                pct_aa_deaths=pct_aa_deaths,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_cases,
                known_race_deaths=known_deaths,
            )
        ]
    def _scrape(self, **kwargs):
        date, data = query_geoservice(**self.DATA)
        _logger.info(f'Processing data for {date}')

        total_cases = data.loc[0, 'POSITIVE']
        total_deaths = data.loc[0, 'DEATHS']
        unknown_cases = data.loc[0, 'POS_UNK']
        unknown_deaths = data.loc[0, 'DTH_UNK']
        known_cases = total_cases - unknown_cases
        known_deaths = total_deaths - unknown_deaths
        aa_cases = data.loc[0, 'POS_BLK']
        aa_deaths = data.loc[0, 'DTH_BLK']

        aa_cases_pct = to_percentage(aa_cases, known_cases)
        aa_deaths_pct = to_percentage(aa_deaths, known_deaths)

        return [
            self._make_series(
                date=date,
                cases=total_cases,
                deaths=total_deaths,
                aa_cases=aa_cases,
                aa_deaths=aa_deaths,
                pct_aa_cases=aa_cases_pct,
                pct_aa_deaths=aa_deaths_pct,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_cases,
                known_race_deaths=known_deaths,
            )
        ]
        def _scrape(self, **kwargs):
            date, data = query_geoservice(**self.DEMOG)
            _logger.info(f'Processing data for {date}')

            total_cases = data.loc[0, 'CasesAll']
            known_cases = total_cases - data.loc[0, 'C_RaceUnknown']
            aa_cases = data.loc[0, 'C_RaceBlack']
            pct_aa_cases = to_percentage(aa_cases, known_cases)

            total_deaths = data.loc[0, 'Deaths']
            # Does not include demographic breakdown of deaths
            known_deaths = nan
            aa_deaths = nan
            pct_aa_deaths = nan

            return [
                self._make_series(
                    date=date,
                    cases=total_cases,
                    deaths=total_deaths,
                    aa_cases=aa_cases,
                    aa_deaths=aa_deaths,
                    pct_aa_cases=pct_aa_cases,
                    pct_aa_deaths=pct_aa_deaths,
                    pct_includes_unknown_race=False,
                    pct_includes_hispanic_black=True,
                    known_race_cases=known_cases,
                    known_race_deaths=known_deaths,
                )
            ]
示例#5
0
    def _scrape(self, **kwargs):
        # Get the timestamp
        date_published, cases_df = query_geoservice(**self.CASES)
        _logger.info(f'Processing data for {date_published}')
        cases_df = cases_df.set_index('Race_Eth')
        cases = cases_df['value'].sum()
        cases_unknown = cases_df.loc['Not Reported', 'value']
        known_cases = cases - cases_unknown

        _, deaths_df = query_geoservice(**self.DEATHS)
        deaths_df = deaths_df.set_index('Race_Eth')
        deaths = deaths_df['value'].sum()
        if 'Not Reported' in deaths_df.index:
            deaths_unknown = deaths_df.loc['Not Reported', 'value']
        else:
            deaths_unknown = 0
        known_deaths = deaths - deaths_unknown

        try:
            cases_aa = cases_df.loc['Black Alone', 'value']
            pct_cases_aa = to_percentage(cases_aa, known_cases)
        except IndexError:
            raise ValueError('Case counts for Black Alone not found')

        try:
            if 'Black Alone' in deaths_df.index:
                deaths_aa = deaths_df.loc['Black Alone', 'value']
            else:
                deaths_aa = 0
            pct_deaths_aa = to_percentage(deaths_aa, known_deaths)
        except IndexError:
            raise ValueError('Death counts for Black Alone not found')

        return [self._make_series(
            date=date_published,
            cases=cases,
            deaths=deaths,
            aa_cases=cases_aa,
            aa_deaths=deaths_aa,
            pct_aa_cases=pct_cases_aa,
            pct_aa_deaths=pct_deaths_aa,
            pct_includes_unknown_race=False,
            pct_includes_hispanic_black=False,
            known_race_cases=known_cases,
            known_race_deaths=known_deaths,
        )]
示例#6
0
    def _scrape(self, **kwargs):
        # NE does not version data, so there the update date is null.
        # We must query the date from one of the tables instead.
        _, date_df = query_geoservice(**self.DATE)
        date = date_df.loc[0, 'LAB_REPORT_DATE'].date()
        _logger.info(f'Processing data for {date}')

        _, total_cases_df = query_geoservice(**self.TOTAL_CASES)
        total_cases = total_cases_df.loc[0, 'value']

        _, total_deaths_df = query_geoservice(**self.TOTAL_DEATHS)
        total_deaths = total_deaths_df.loc[0, 'value']

        _, demog_df = query_geoservice(**self.DEMOG)
        demog_df = demog_df.set_index('Category')
        demog_df = demog_df[list(
            filter(lambda x: x.startswith('race_'), demog_df.columns))]

        known_df = demog_df.drop(columns=['race_Unknown']).sum(axis=1)

        aa_cases = demog_df.loc['PositiveCases', 'race_AfricanAmerican']
        aa_deaths = demog_df.loc['Deaths', 'race_AfricanAmerican']
        known_cases = known_df['PositiveCases']
        known_deaths = known_df['Deaths']
        aa_cases_pct = to_percentage(aa_cases, known_cases)
        aa_deaths_pct = to_percentage(aa_deaths, known_deaths)

        return [
            self._make_series(
                date=date,
                cases=int(total_cases),
                deaths=int(total_deaths),
                aa_cases=int(aa_cases),
                aa_deaths=int(aa_deaths),
                pct_aa_cases=aa_cases_pct,
                pct_aa_deaths=aa_deaths_pct,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_cases,
                known_race_deaths=known_deaths,
            )
        ]
示例#7
0
    def _scrape(self, **kwargs):
        # Download the metadata
        date, totals = query_geoservice(**self.TOTALS)
        _logger.info(f'Processing data for {date}')

        # Download and extract total case and death data
        total_cases = totals.loc[0, 'Cases']
        total_deaths = totals.loc[0, 'Deaths']

        # Download and extract AA case and death data
        _, cases = query_geoservice(**self.RACE_CASE)
        cases = cases.set_index('Race')
        aa_cases_cnt = cases.loc['Black or African American', 'Cases']
        known_cases = cases.drop('Unknown').sum()['Cases']
        aa_cases_pct = to_percentage(aa_cases_cnt, known_cases)

        _, deaths = query_geoservice(**self.RACE_DEATH)
        deaths = deaths.set_index('Race')
        known_deaths = deaths.drop('Unknown', errors='ignore').sum()['Deaths']
        try:
            aa_deaths_cnt = deaths.loc['Black or African American', 'value']
            aa_deaths_pct = to_percentage(aa_deaths_cnt, known_deaths)
        except KeyError:
            aa_deaths_cnt = 0
            aa_deaths_pct = 0

        return [
            self._make_series(
                date=date,
                cases=total_cases,
                deaths=total_deaths,
                aa_cases=aa_cases_cnt,
                aa_deaths=aa_deaths_cnt,
                pct_aa_cases=aa_cases_pct,
                pct_aa_deaths=aa_deaths_pct,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_cases,
                known_race_deaths=known_deaths,
            )
        ]
示例#8
0
    def _scrape(self, **kwargs):

        # Next get the cumulative case and death counts
        date_published, total = query_geoservice(**self.TOTALS)
        _logger.info(f'Processing data for {date_published}')

        try:
            cases = total.loc[0, 'Cases']
            deaths = total.loc[0, 'Deaths']
        except IndexError:
            raise ValueError('Total count data not found')

        # And finally the race/ethnicity breakdowns
        _, data = query_geoservice(**self.RACE)
        data = data.set_index('RaceEthnicity')

        try:
            known = data.sum()
            cases_aa = data.loc['Black', 'Cases']
            deaths_aa = data.loc['Black', 'Deaths']
            known_cases = known['Cases']
            known_deaths = known['Deaths']
            pct_cases_aa = to_percentage(cases_aa, known_cases)
            pct_deaths_aa = to_percentage(deaths_aa, known_deaths)
        except IndexError:
            raise ValueError('No data found for Black RaceEthnicity category')

        return [self._make_series(
            date=date_published,
            cases=cases,
            deaths=deaths,
            aa_cases=cases_aa,
            aa_deaths=deaths_aa,
            pct_aa_cases=pct_cases_aa,
            pct_aa_deaths=pct_deaths_aa,
            pct_includes_unknown_race=False,
            pct_includes_hispanic_black=False,
            known_race_cases=known_cases,
            known_race_deaths=known_deaths,
        )]
    def _scrape(self, **kwargs):
        # Download and extract the case and death totals
        date, totals = query_geoservice(**self.TOTAL)
        _logger.info(f'Processing data for {date}')
        total_cases = totals.loc[0, 'Cases']
        total_deaths = totals.loc[0, 'Deaths']

        # Extract by-race data
        _, cases_race = query_geoservice(**self.RACE_CASE)
        cases_race = cases_race.set_index('RACE').dropna().astype(int)
        known_cases = cases_race.drop(
            ['REFUSED TO ANSWER RACE', 'UNKNOWN RACE'],
            errors='ignore').sum()['Cases']
        aa_cases = cases_race.loc['BLACK', 'Cases']
        aa_cases_pct = to_percentage(aa_cases, known_cases)

        _, deaths_race = query_geoservice(**self.RACE_DEATH)
        deaths_race = deaths_race.set_index('RACE').dropna().astype(int)
        known_deaths = deaths_race.drop(
            ['REFUSED TO ANSWER RACE', 'UNKNOWN RACE'],
            errors='ignore').sum()['Deaths']
        aa_deaths = deaths_race.loc['BLACK', 'Deaths']
        aa_deaths_pct = to_percentage(aa_deaths, known_deaths)

        return [
            self._make_series(
                date=date,
                cases=total_cases,
                deaths=total_deaths,
                aa_cases=aa_cases,
                aa_deaths=aa_deaths,
                pct_aa_cases=aa_cases_pct,
                pct_aa_deaths=aa_deaths_pct,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_cases,
                known_race_deaths=known_deaths,
            )
        ]
示例#10
0
    def _scrape(self, **kwargs):
        _, total_cases_df = query_geoservice(**self.TOTAL_CASES_QUERY)
        _, total_deaths_df = query_geoservice(**self.TOTAL_DEATHS_QUERY)
        date, raw_race_df = query_geoservice(**self.RACE_QUERY)
        race_df = raw_race_df.groupby('Race').agg({
            'Cases': 'sum',
            'Deaths': 'sum'
        })

        assert len(total_cases_df
                   ) == 1, 'total_cases_df has unexpected number of rows'
        assert len(total_deaths_df
                   ) == 1, 'total_deaths_df has unexepected number of rows'
        assert len(race_df) == 7, 'race_df has unexpected number of rows'

        cases = total_cases_df.iloc[0]['value']
        deaths = total_deaths_df.iloc[0]['value']
        aa_cases = race_df.loc['Black']['Cases']
        aa_deaths = race_df.loc['Black']['Deaths']
        known_race_cases = race_df.drop('Unknown')['Cases'].sum()
        known_race_deaths = race_df.drop('Unknown')['Deaths'].sum()
        pct_aa_cases = to_percentage(aa_cases, known_race_cases)
        pct_aa_deaths = to_percentage(aa_deaths, known_race_deaths)

        return [
            self._make_series(date=date,
                              cases=cases,
                              deaths=deaths,
                              aa_cases=aa_cases,
                              aa_deaths=aa_deaths,
                              pct_aa_cases=pct_aa_cases,
                              pct_aa_deaths=pct_aa_deaths,
                              pct_includes_unknown_race=False,
                              pct_includes_hispanic_black=True,
                              known_race_cases=known_race_cases,
                              known_race_deaths=known_race_deaths)
        ]
示例#11
0
    def _scrape(self, **kwargs):
        # Download the case data
        date, cases = query_geoservice(**self.CASES)
        _logger.info(f'Processing data for {date}')
        cases = cases.set_index('Racecat')

        # Extract/calculate case info
        total_cases = cases.loc[:, 'value'].sum()
        total_known_cases = cases.loc[:, 'value'].drop('Unknown').sum()
        aa_cases_cnt = cases.loc['Black', 'value']
        aa_cases_pct = to_percentage(aa_cases_cnt, total_known_cases)

        # Download the deaths data
        _, deaths = query_geoservice(**self.DEATHS)
        deaths = deaths.set_index('Racecat')

        # Extract/calculate deaths info
        total_deaths = deaths.loc[:, 'value'].sum()
        total_known_deaths = deaths.loc[:, 'value'].drop('Unknown').sum()
        aa_deaths_cnt = deaths.loc['Black', 'value']
        aa_deaths_pct = to_percentage(aa_deaths_cnt, total_known_deaths)

        return [
            self._make_series(
                date=date,
                cases=total_cases,
                deaths=total_deaths,
                aa_cases=aa_cases_cnt,
                aa_deaths=aa_deaths_cnt,
                pct_aa_cases=aa_cases_pct,
                pct_aa_deaths=aa_deaths_pct,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=False,
                known_race_cases=total_known_cases,
                known_race_deaths=total_known_deaths,
            )
        ]
    def _scrape(self, **kwargs):
        # Get totals date
        date, totals = query_geoservice(self.TOTALS)
        _logger.info(f'Processing data for {date}')
        total_cases = totals.loc[0, 'TOTAL_CASES']
        total_deaths = totals.loc[0, 'TOTAL_DEATHS']

        # No AA case data
        # TODO: find AA case data
        aa_cases = float('nan')
        aa_cases_pct = float('nan')

        # Download the deaths data
        # First load the main page to get a session ID
        s = requests.Session()
        r = s.get(self.RACE_MAIN_PAGE)
        r.raise_for_status()
        _logger.info(f'Tableau session ID is {r.headers["x-session-id"]}')
        # Prepare the data download URL and retrieve
        r = s.get(
            self.DEATHS_RACE_URL_TEMPLATE.format(
                session_id=r.headers['x-session-id']))
        r.raise_for_status()
        # Load the data into a DataFrame
        deaths = pd.read_csv(BytesIO(r.content))

        n_col = deaths.columns[1]
        total_deaths = int(re.search(r'\d+', n_col).group(0))
        aa_row = deaths[deaths['Race/Ethnicity1'].str.search('Black')]
        aa_deaths = aa_row[n_col]
        aa_deaths_pct = round(100 * aa_row['Percent (Race)'], 2)

        return [
            self._make_series(
                date=date,
                cases=total_cases,
                deaths=total_deaths,
                aa_cases=aa_cases,
                aa_deaths=aa_deaths,
                pct_aa_cases=aa_cases_pct,
                pct_aa_deaths=aa_deaths_pct,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=False,
            )
        ]
示例#13
0
    def _scrape(self, **kwargs):
        # Download the metadata
        date, data = query_geoservice(**self.DATA)
        _logger.info(f'Processing data for {date}')
        data = data.set_index('Demographic')

        # Discard non-race rows
        data = data.reindex([
            'White', 'Black', 'AI/AN', 'Asian', 'NHOPI', 'Multiple', 'Other',
            'Unknown Race'
        ])

        # Add total rows
        data.loc['Grand Total', :] = data.sum()
        data.loc['Known Race', :] = data.drop(['Grand Total',
                                               'Unknown Race']).sum()

        # Extract/calculate case info
        total_cases = data.loc['Grand Total', 'Cases']
        known_cases = data.loc['Known Race', 'Cases']
        aa_cases_cnt = data.loc['Black', 'Cases']
        aa_cases_pct = to_percentage(aa_cases_cnt, known_cases, 2)

        # Extract/calculate death info
        total_deaths = data.loc['Grand Total', 'Deaths']
        known_deaths = data.loc['Known Race', 'Deaths']
        aa_deaths_cnt = data.loc['Black', 'Deaths']
        aa_deaths_pct = to_percentage(aa_deaths_cnt, known_deaths, 2)

        return [
            self._make_series(
                date=date,
                cases=total_cases,
                deaths=total_deaths,
                aa_cases=aa_cases_cnt,
                aa_deaths=aa_deaths_cnt,
                pct_aa_cases=aa_cases_pct,
                pct_aa_deaths=aa_deaths_pct,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_cases,
                known_race_deaths=known_deaths,
            )
        ]
    def _scrape(self, **kwargs):
        _, summary_df = arcgis.query_geoservice(**self.SUMMARY_QUERY)
        cases = summary_df.loc[0, 'Cases']
        deaths = summary_df.loc[0, 'Deaths']

        runner = WebdriverRunner()
        results = runner.run(WebdriverSteps().go_to_url(
            self.RACE_URL).wait_for_number_of_elements(
                (By.XPATH, '//canvas'), 14).find_request(
                    'race_cases',
                    find_by=tableau.find_tableau_request).go_to_url(
                        self.MAIN_PAGE_URL).get_page_source())
        soup = results.page_source
        date = self.get_date(soup)

        parser = tableau.TableauParser(request=results.requests['race_cases'])
        cases_df = parser.get_dataframe_from_key('Census')
        cases_df = cases_df[cases_df['Measure Names'] == 'Case %'].set_index(
            'Race')
        aa_cases = cases_df.loc['Black', 'SUM(Case Count)']
        known_race_cases = cases_df['SUM(Case Count)'].sum()

        pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases)

        return [
            self._make_series(
                date=date,
                cases=cases,
                deaths=deaths,
                aa_cases=aa_cases,
                pct_aa_cases=pct_aa_cases,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_race_cases,
            )
        ]