Exemplo n.º 1
0
class Vermont(ScraperBase):
    """Vermont reports COVID-19 demographic breakdowns of cases and deaths
    on their ArcGIS dashboard. The dashboard is at
    https://experience.arcgis.com/experience/85f43bd849e743cb957993a545d17170
    """

    # Services are under https://services1.arcgis.com/BkFxaEFNwHqX3tAw
    TOTALS = dict(
        flc_id='94479a6d67fc406999c9b66dec7d4adb',
        layer_name='V_EPI_DailyCount_PUBLIC',
        out_fields=[
            'date',
            'cumulative_positives as Cases',
            'total_deaths as Deaths',
        ],
        order_by='date desc',
        limit=1,
    )

    RACE_CASE = dict(flc_id='0e6f8a6aeb084acaa5f7973e556cf708',
                     layer_name='V_EPI_PositiveCases_PUBLIC',
                     group_by='Race',
                     stats=[
                         make_geoservice_stat('count', 'OBJECTID_2', 'Cases'),
                     ])
    RACE_DEATH = dict(flc_id='0e6f8a6aeb084acaa5f7973e556cf708',
                      layer_name='V_EPI_PositiveCases_PUBLIC',
                      where="Death='Yes'",
                      group_by='Race',
                      stats=[
                          make_geoservice_stat('count', 'OBJECTID_2',
                                               'Deaths'),
                      ])

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def _scrape(self, **kwargs):
        # Download the metadata
        date, totals = query_geoservice(**self.TOTALS)
        _logger.info(f'Processing data for {date}')

        # Download and extract total case and death data
        total_cases = totals.loc[0, 'Cases']
        total_deaths = totals.loc[0, 'Deaths']

        # Download and extract AA case and death data
        _, cases = query_geoservice(**self.RACE_CASE)
        cases = cases.set_index('Race')
        aa_cases_cnt = cases.loc['Black or African American', 'Cases']
        known_cases = cases.drop('Unknown').sum()['Cases']
        aa_cases_pct = to_percentage(aa_cases_cnt, known_cases)

        _, deaths = query_geoservice(**self.RACE_DEATH)
        deaths = deaths.set_index('Race')
        known_deaths = deaths.drop('Unknown', errors='ignore').sum()['Deaths']
        try:
            aa_deaths_cnt = deaths.loc['Black or African American', 'value']
            aa_deaths_pct = to_percentage(aa_deaths_cnt, known_deaths)
        except KeyError:
            aa_deaths_cnt = 0
            aa_deaths_pct = 0

        return [
            self._make_series(
                date=date,
                cases=total_cases,
                deaths=total_deaths,
                aa_cases=aa_cases_cnt,
                aa_deaths=aa_deaths_cnt,
                pct_aa_cases=aa_cases_pct,
                pct_aa_deaths=aa_deaths_pct,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_cases,
                known_race_deaths=known_deaths,
            )
        ]
Exemplo n.º 2
0
class WisconsinMilwaukee(ScraperBase):
    """Milwaukee publishes COVID-19 demographic disaggregations on their
    ArcGIS dashboard at:
    https://www.arcgis.com/apps/opsdashboard/index.html#/018eedbe075046779b8062b5fe1055bf

    We retrieve the data from their FeatureServers.
    """

    # The services are at https://services5.arcgis.com/8Q02ELWlq5TYUASS
    CASES = dict(
        flc_id='73e2e7131f954bb6a1b0fbbd9dd53f5b',
        layer_name='Cases',
        group_by='Race_Eth',
        stats=[make_geoservice_stat('count', 'ObjectId', 'value')],
    )

    DEATHS = dict(
        flc_id='02f3b03e877e480ca5c2eb750dcbbc8c',
        layer_name='Deaths',
        group_by='Race_Eth',
        stats=[make_geoservice_stat('count', 'ObjectId', 'value')],
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def name(self):
        return 'Wisconsin -- Milwaukee'
    
    @classmethod
    def is_beta(cls):
        return getattr(cls, 'BETA_SCRAPER', True)

    def _get_aa_pop_stats(self):
        return get_aa_pop_stats(self.census_api, 'Wisconsin',
                                county='Milwaukee')

    def _scrape(self, **kwargs):
        # Get the timestamp
        date_published, cases_df = query_geoservice(**self.CASES)
        _logger.info(f'Processing data for {date_published}')
        cases_df = cases_df.set_index('Race_Eth')
        cases = cases_df['value'].sum()
        cases_unknown = cases_df.loc['Not Reported', 'value']
        known_cases = cases - cases_unknown

        _, deaths_df = query_geoservice(**self.DEATHS)
        deaths_df = deaths_df.set_index('Race_Eth')
        deaths = deaths_df['value'].sum()
        if 'Not Reported' in deaths_df.index:
            deaths_unknown = deaths_df.loc['Not Reported', 'value']
        else:
            deaths_unknown = 0
        known_deaths = deaths - deaths_unknown

        try:
            cases_aa = cases_df.loc['Black Alone', 'value']
            pct_cases_aa = to_percentage(cases_aa, known_cases)
        except IndexError:
            raise ValueError('Case counts for Black Alone not found')

        try:
            if 'Black Alone' in deaths_df.index:
                deaths_aa = deaths_df.loc['Black Alone', 'value']
            else:
                deaths_aa = 0
            pct_deaths_aa = to_percentage(deaths_aa, known_deaths)
        except IndexError:
            raise ValueError('Death counts for Black Alone not found')

        return [self._make_series(
            date=date_published,
            cases=cases,
            deaths=deaths,
            aa_cases=cases_aa,
            aa_deaths=deaths_aa,
            pct_aa_cases=pct_cases_aa,
            pct_aa_deaths=pct_deaths_aa,
            pct_includes_unknown_race=False,
            pct_includes_hispanic_black=False,
            known_race_cases=known_cases,
            known_race_deaths=known_deaths,
        )]

    def _format_error(self, e):
        if isinstance(e, OverflowError):
            return f'{ERROR} ... processing last update timstamp: {repr(e)}'
        elif isinstance(e, requests.RequestException):
            return f'{ERROR} ... retrieving URL {e.request.url}: {repr(e)}'
        else:
            return super()._format_error(e)
Exemplo n.º 3
0
class Louisiana(ScraperBase):
    """The information for Louisiana comes from an ArcGIS dashboard.

    The associated services are commented below, above the query parameters.
    """

    # https://services5.arcgis.com/O5K6bb5dZVZcTo5M/ArcGIS/rest/services/Cases_and_Deaths_by_Race_by_Region/FeatureServer/layers
    RACE_QUERY = dict(flc_id='0ee12fc733d143e0ab35a33bb0f93406',
                      layer_name='Sheet3',
                      out_fields=['Race', 'Deaths', 'Cases'])

    # https://services5.arcgis.com/O5K6bb5dZVZcTo5M/ArcGIS/rest/services/Combined_COVID_Reporting/FeatureServer/layers
    TOTAL_CASES_QUERY = dict(
        flc_id='63aa4507396e4aa1ba90ee3eb5b8f05a',
        layer_name='Sheet1',
        where="Measure='Case Count' AND Group_Num<>38",
        stats=[make_geoservice_stat('sum', 'Value', 'value')])

    # https://services5.arcgis.com/O5K6bb5dZVZcTo5M/ArcGIS/rest/services/Combined_COVID_Reporting/FeatureServer/layers
    TOTAL_DEATHS_QUERY = dict(
        flc_id='63aa4507396e4aa1ba90ee3eb5b8f05a',
        layer_name='Sheet1',
        where="Measure='Deaths' AND Group_Num<>38",
        stats=[make_geoservice_stat('sum', 'Value', 'value')])

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def _scrape(self, **kwargs):
        _, total_cases_df = query_geoservice(**self.TOTAL_CASES_QUERY)
        _, total_deaths_df = query_geoservice(**self.TOTAL_DEATHS_QUERY)
        date, raw_race_df = query_geoservice(**self.RACE_QUERY)
        race_df = raw_race_df.groupby('Race').agg({
            'Cases': 'sum',
            'Deaths': 'sum'
        })

        assert len(total_cases_df
                   ) == 1, 'total_cases_df has unexpected number of rows'
        assert len(total_deaths_df
                   ) == 1, 'total_deaths_df has unexepected number of rows'
        assert len(race_df) == 7, 'race_df has unexpected number of rows'

        cases = total_cases_df.iloc[0]['value']
        deaths = total_deaths_df.iloc[0]['value']
        aa_cases = race_df.loc['Black']['Cases']
        aa_deaths = race_df.loc['Black']['Deaths']
        known_race_cases = race_df.drop('Unknown')['Cases'].sum()
        known_race_deaths = race_df.drop('Unknown')['Deaths'].sum()
        pct_aa_cases = to_percentage(aa_cases, known_race_cases)
        pct_aa_deaths = to_percentage(aa_deaths, known_race_deaths)

        return [
            self._make_series(date=date,
                              cases=cases,
                              deaths=deaths,
                              aa_cases=aa_cases,
                              aa_deaths=aa_deaths,
                              pct_aa_cases=pct_aa_cases,
                              pct_aa_deaths=pct_aa_deaths,
                              pct_includes_unknown_race=False,
                              pct_includes_hispanic_black=True,
                              known_race_cases=known_race_cases,
                              known_race_deaths=known_race_deaths)
        ]
Exemplo n.º 4
0
class Nebraska(ScraperBase):
    """Nebraska publishes totals and by-race breakdowns on their ArcGIS
    dashboard at
    https://experience.arcgis.com/experience/ece0db09da4d4ca68252c3967aa1e9dd

    Nebraska uses ArcGIS Enterprise, and using their portal (search by
    ID) features requires authentication. Therefore we point directly
    to the MapServer providing data for the dashboard.

    """
    DATE = dict(flc_url=_NE_SERVER_URL,
                layer_name='DHHS_GIS.dhhs.covid19_hot_accumulative_lab_dt',
                out_fields=['LAB_REPORT_DATE'],
                order_by='LAB_REPORT_DATE desc',
                limit=1)

    TOTAL_CASES = dict(flc_url=_NE_SERVER_URL,
                       layer_name='COVID19_COLD',
                       where="lab_status='Positive' AND NE_JURIS='yes'",
                       stats=[make_geoservice_stat('count', 'ID', 'value')])

    TOTAL_DEATHS = dict(flc_url=_NE_SERVER_URL,
                        layer_name='COVID19_CASE_COLD',
                        where=' AND '.join([
                            "case_status='Confirmed'", "NE_JURIS='yes'",
                            "Did_Pat_Die_From_Illness='Y'"
                        ]),
                        stats=[make_geoservice_stat('count', 'ID', 'value')])

    DEMOG = dict(flc_url=_NE_SERVER_URL,
                 layer_name='DHHS_GIS.DHHS.COVID19_RE_HORIZONTAL',
                 order_by='Category desc')

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def _scrape(self, **kwargs):
        # NE does not version data, so there the update date is null.
        # We must query the date from one of the tables instead.
        _, date_df = query_geoservice(**self.DATE)
        date = date_df.loc[0, 'LAB_REPORT_DATE'].date()
        _logger.info(f'Processing data for {date}')

        _, total_cases_df = query_geoservice(**self.TOTAL_CASES)
        total_cases = total_cases_df.loc[0, 'value']

        _, total_deaths_df = query_geoservice(**self.TOTAL_DEATHS)
        total_deaths = total_deaths_df.loc[0, 'value']

        _, demog_df = query_geoservice(**self.DEMOG)
        demog_df = demog_df.set_index('Category')
        demog_df = demog_df[list(
            filter(lambda x: x.startswith('race_'), demog_df.columns))]

        known_df = demog_df.drop(columns=['race_Unknown']).sum(axis=1)

        aa_cases = demog_df.loc['PositiveCases', 'race_AfricanAmerican']
        aa_deaths = demog_df.loc['Deaths', 'race_AfricanAmerican']
        known_cases = known_df['PositiveCases']
        known_deaths = known_df['Deaths']
        aa_cases_pct = to_percentage(aa_cases, known_cases)
        aa_deaths_pct = to_percentage(aa_deaths, known_deaths)

        return [
            self._make_series(
                date=date,
                cases=int(total_cases),
                deaths=int(total_deaths),
                aa_cases=int(aa_cases),
                aa_deaths=int(aa_deaths),
                pct_aa_cases=aa_cases_pct,
                pct_aa_deaths=aa_deaths_pct,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_cases,
                known_race_deaths=known_deaths,
            )
        ]
class Pennsylvania(ScraperBase):
    """Pennsylvania has an ArcGIS dashboard at
    https://experience.arcgis.com/experience/cfb3803eb93d42f7ab1c2cfccca78bf7

    We query the underlying FeatureServers to acquire the data.

    """
    CASES = dict(
        flc_url='https://services2.arcgis.com/xtuWQvb2YQnp0z3F/arcgis/rest/services/Pennsylvania_Public_COVID19_Dashboard_Data/FeatureServer',
        layer_name='Public Health Dashboard',
        stats=[make_geoservice_stat('sum', 'Confirmed', 'value')],
    )

    DEATHS = dict(
        flc_url='https://services2.arcgis.com/xtuWQvb2YQnp0z3F/arcgis/rest/services/Pennsylvania_Public_COVID19_Dashboard_Data/FeatureServer',
        layer_name='Public Health Dashboard',
        stats=[make_geoservice_stat('sum', 'Deaths', 'value')],
    )

    CASES_BY_RACE = dict(
        flc_url='https://services2.arcgis.com/xtuWQvb2YQnp0z3F/arcgis/rest/services/Pennsylvania_Public_COVID19_Dashboard_Data/FeatureServer',
        layer_name='Race_Data',
        group_by='Race',
        stats=[make_geoservice_stat('sum', 'Positive_Cases', 'value')]
    )

    DEATHS_BY_RACE = dict(
        flc_url='https://services2.arcgis.com/xtuWQvb2YQnp0z3F/arcgis/rest/services/deathrace/FeatureServer',
        layer_name='deathrace',
        group_by='Race',
        stats=[make_geoservice_stat('sum', 'Deaths', 'value')],
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def _scrape(self, **kwargs):
        date, cases = query_geoservice(**self.CASES)
        total_cases = cases.iloc[0, 0]

        _, cases_by_race = query_geoservice(**self.CASES_BY_RACE)
        cases_by_race = cases_by_race.set_index('Race')
        known_cases = total_cases - cases_by_race.loc['Not Reported',
                                                      'value']
        aa_cases = cases_by_race.loc['African American/Black', 'value']
        pct_aa_cases = to_percentage(aa_cases, known_cases)

        _, deaths = query_geoservice(**self.DEATHS)
        total_deaths = deaths.iloc[0, 0]

        _, deaths_by_race = query_geoservice(**self.DEATHS_BY_RACE)
        deaths_by_race = deaths_by_race.set_index('Race')
        known_deaths = deaths_by_race.drop('Not Reported',
                                           errors='ignore').sum()['value']
        aa_deaths = deaths_by_race.loc['African American', 'value']
        pct_aa_deaths = to_percentage(aa_deaths, known_deaths)

        return [self._make_series(
            date=date,
            cases=total_cases,
            deaths=total_deaths,
            aa_cases=aa_cases,
            aa_deaths=aa_deaths,
            pct_aa_cases=pct_aa_cases,
            pct_aa_deaths=pct_aa_deaths,
            pct_includes_unknown_race=False,
            pct_includes_hispanic_black=True,
            known_race_cases=known_cases,
            known_race_deaths=known_deaths,
        )]
class Missouri(ScraperBase):
    """Missouri has an ArcGIS dashboard that includes demographic
    breakdowns of confirmed cases and deaths.  We identified the
    underlying FeatureServer calls to populate this, and invoke those
    directly.

    The dashboard is at:
    http://mophep.maps.arcgis.com/apps/MapSeries/index.html?appid=8e01a5d8d8bd4b4f85add006f9e14a9d
    """

    # Services are at https://services6.arcgis.com/Bd4MACzvEukoZ9mR
    TOTAL = dict(
        flc_url=
        'https://services6.arcgis.com/Bd4MACzvEukoZ9mR/ArcGIS/rest/services/county_MOHSIS_map/FeatureServer',
        layer_name='county_mohsis_map_temp',
        stats=[
            make_geoservice_stat('sum', 'Cases', 'Cases'),
            make_geoservice_stat('sum', 'Deaths', 'Deaths'),
        ],
    )

    RACE_CASE = dict(
        flc_url=
        'https://services6.arcgis.com/Bd4MACzvEukoZ9mR/arcgis/rest/services/Case_by_Race_Automated/FeatureServer',
        layer_name='CasesByRace_Temp',
        out_fields=['RACE', 'Frequency as Cases'],
    )

    RACE_DEATH = dict(
        flc_url=
        'https://services6.arcgis.com/Bd4MACzvEukoZ9mR/ArcGIS/rest/services/Death_by_Race_Automated/FeatureServer',
        layer_name='DeathByRace_Temp',
        out_fields=['RACE', 'Frequency as Deaths'],
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def _scrape(self, **kwargs):
        # Download and extract the case and death totals
        date, totals = query_geoservice(**self.TOTAL)
        _logger.info(f'Processing data for {date}')
        total_cases = totals.loc[0, 'Cases']
        total_deaths = totals.loc[0, 'Deaths']

        # Extract by-race data
        _, cases_race = query_geoservice(**self.RACE_CASE)
        cases_race = cases_race.set_index('RACE').dropna().astype(int)
        known_cases = cases_race.drop(
            ['REFUSED TO ANSWER RACE', 'UNKNOWN RACE'],
            errors='ignore').sum()['Cases']
        aa_cases = cases_race.loc['BLACK', 'Cases']
        aa_cases_pct = to_percentage(aa_cases, known_cases)

        _, deaths_race = query_geoservice(**self.RACE_DEATH)
        deaths_race = deaths_race.set_index('RACE').dropna().astype(int)
        known_deaths = deaths_race.drop(
            ['REFUSED TO ANSWER RACE', 'UNKNOWN RACE'],
            errors='ignore').sum()['Deaths']
        aa_deaths = deaths_race.loc['BLACK', 'Deaths']
        aa_deaths_pct = to_percentage(aa_deaths, known_deaths)

        return [
            self._make_series(
                date=date,
                cases=total_cases,
                deaths=total_deaths,
                aa_cases=aa_cases,
                aa_deaths=aa_deaths,
                pct_aa_cases=aa_cases_pct,
                pct_aa_deaths=aa_deaths_pct,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_cases,
                known_race_deaths=known_deaths,
            )
        ]
class Hawaii(ScraperBase):
    """The data from Hawaii comes from 3 sources:
        date: the main page
        cases and deaths: arcgis api
        cases by race: tableau dashboard.

    At the time of implementation, Hawaii does not report deaths by race data.
    """
    SUMMARY_QUERY = dict(
        flc_id='20126c66ea9c479f9a4279722f418f05',
        layer_name='covid_county_counts',
        stats=[
            arcgis.make_geoservice_stat('sum', 'cases', 'Cases'),
            arcgis.make_geoservice_stat('sum', 'deaths', 'Deaths')
        ])

    RACE_URL = 'https://public.tableau.com/views/HawaiiCOVID-19-RaceChart/ChartDash?:showVizHome=no'
    MAIN_PAGE_URL = 'https://health.hawaii.gov/coronavirusdisease2019/what-you-should-know/current-situation-in-hawaii/'

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def get_date(self, soup):
        elt = soup.find(string=re.compile('Updated daily')).parent
        match = re.search(r'\w+ \d{1,2}, \d{2,4}', elt.text)
        return datetime.strptime(match.group(), '%B %d, %Y').date()

    def _scrape(self, **kwargs):
        _, summary_df = arcgis.query_geoservice(**self.SUMMARY_QUERY)
        cases = summary_df.loc[0, 'Cases']
        deaths = summary_df.loc[0, 'Deaths']

        runner = WebdriverRunner()
        results = runner.run(WebdriverSteps().go_to_url(
            self.RACE_URL).wait_for_number_of_elements(
                (By.XPATH, '//canvas'), 14).find_request(
                    'race_cases',
                    find_by=tableau.find_tableau_request).go_to_url(
                        self.MAIN_PAGE_URL).get_page_source())
        soup = results.page_source
        date = self.get_date(soup)

        parser = tableau.TableauParser(request=results.requests['race_cases'])
        cases_df = parser.get_dataframe_from_key('Census')
        cases_df = cases_df[cases_df['Measure Names'] == 'Case %'].set_index(
            'Race')
        aa_cases = cases_df.loc['Black', 'SUM(Case Count)']
        known_race_cases = cases_df['SUM(Case Count)'].sum()

        pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases)

        return [
            self._make_series(
                date=date,
                cases=cases,
                deaths=deaths,
                aa_cases=aa_cases,
                pct_aa_cases=pct_aa_cases,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_race_cases,
            )
        ]