def _scrape(self, **kwargs): date, cases = query_geoservice(**self.CASES) total_cases = cases.iloc[0, 0] _, cases_by_race = query_geoservice(**self.CASES_BY_RACE) cases_by_race = cases_by_race.set_index('Race') known_cases = total_cases - cases_by_race.loc['Not Reported', 'value'] aa_cases = cases_by_race.loc['African American/Black', 'value'] pct_aa_cases = to_percentage(aa_cases, known_cases) _, deaths = query_geoservice(**self.DEATHS) total_deaths = deaths.iloc[0, 0] _, deaths_by_race = query_geoservice(**self.DEATHS_BY_RACE) deaths_by_race = deaths_by_race.set_index('Race') known_deaths = deaths_by_race.drop('Not Reported', errors='ignore').sum()['value'] aa_deaths = deaths_by_race.loc['African American', 'value'] pct_aa_deaths = to_percentage(aa_deaths, known_deaths) return [self._make_series( date=date, cases=total_cases, deaths=total_deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=pct_aa_cases, pct_aa_deaths=pct_aa_deaths, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_cases, known_race_deaths=known_deaths, )]
def _scrape(self, **kwargs): date, demog = query_geoservice(**self.DEMOG) _logger.info(f'Processing data for {date}') total_cases = demog.loc[0, 'positives'] known_cases = total_cases - demog.loc[0, 'unk_race'] aa_cases = demog.loc[0, 'black'] pct_aa_cases = to_percentage(aa_cases, known_cases) total_deaths = demog.loc[0, 'deaths'] known_deaths = total_deaths - demog.loc[0, 'd_unk_race'] aa_deaths = demog.loc[0, 'd_black'] pct_aa_deaths = to_percentage(aa_deaths, known_deaths) return [ self._make_series( date=date, cases=total_cases, deaths=total_deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=pct_aa_cases, pct_aa_deaths=pct_aa_deaths, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_cases, known_race_deaths=known_deaths, ) ]
def _scrape(self, **kwargs): date, data = query_geoservice(**self.DATA) _logger.info(f'Processing data for {date}') total_cases = data.loc[0, 'POSITIVE'] total_deaths = data.loc[0, 'DEATHS'] unknown_cases = data.loc[0, 'POS_UNK'] unknown_deaths = data.loc[0, 'DTH_UNK'] known_cases = total_cases - unknown_cases known_deaths = total_deaths - unknown_deaths aa_cases = data.loc[0, 'POS_BLK'] aa_deaths = data.loc[0, 'DTH_BLK'] aa_cases_pct = to_percentage(aa_cases, known_cases) aa_deaths_pct = to_percentage(aa_deaths, known_deaths) return [ self._make_series( date=date, cases=total_cases, deaths=total_deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=aa_cases_pct, pct_aa_deaths=aa_deaths_pct, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_cases, known_race_deaths=known_deaths, ) ]
def _scrape(self, **kwargs): date, data = query_geoservice(**self.DEMOG) _logger.info(f'Processing data for {date}') total_cases = data.loc[0, 'CasesAll'] known_cases = total_cases - data.loc[0, 'C_RaceUnknown'] aa_cases = data.loc[0, 'C_RaceBlack'] pct_aa_cases = to_percentage(aa_cases, known_cases) total_deaths = data.loc[0, 'Deaths'] # Does not include demographic breakdown of deaths known_deaths = nan aa_deaths = nan pct_aa_deaths = nan return [ self._make_series( date=date, cases=total_cases, deaths=total_deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=pct_aa_cases, pct_aa_deaths=pct_aa_deaths, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_cases, known_race_deaths=known_deaths, ) ]
def _scrape(self, **kwargs): # Get the timestamp date_published, cases_df = query_geoservice(**self.CASES) _logger.info(f'Processing data for {date_published}') cases_df = cases_df.set_index('Race_Eth') cases = cases_df['value'].sum() cases_unknown = cases_df.loc['Not Reported', 'value'] known_cases = cases - cases_unknown _, deaths_df = query_geoservice(**self.DEATHS) deaths_df = deaths_df.set_index('Race_Eth') deaths = deaths_df['value'].sum() if 'Not Reported' in deaths_df.index: deaths_unknown = deaths_df.loc['Not Reported', 'value'] else: deaths_unknown = 0 known_deaths = deaths - deaths_unknown try: cases_aa = cases_df.loc['Black Alone', 'value'] pct_cases_aa = to_percentage(cases_aa, known_cases) except IndexError: raise ValueError('Case counts for Black Alone not found') try: if 'Black Alone' in deaths_df.index: deaths_aa = deaths_df.loc['Black Alone', 'value'] else: deaths_aa = 0 pct_deaths_aa = to_percentage(deaths_aa, known_deaths) except IndexError: raise ValueError('Death counts for Black Alone not found') return [self._make_series( date=date_published, cases=cases, deaths=deaths, aa_cases=cases_aa, aa_deaths=deaths_aa, pct_aa_cases=pct_cases_aa, pct_aa_deaths=pct_deaths_aa, pct_includes_unknown_race=False, pct_includes_hispanic_black=False, known_race_cases=known_cases, known_race_deaths=known_deaths, )]
def _scrape(self, **kwargs): # NE does not version data, so there the update date is null. # We must query the date from one of the tables instead. _, date_df = query_geoservice(**self.DATE) date = date_df.loc[0, 'LAB_REPORT_DATE'].date() _logger.info(f'Processing data for {date}') _, total_cases_df = query_geoservice(**self.TOTAL_CASES) total_cases = total_cases_df.loc[0, 'value'] _, total_deaths_df = query_geoservice(**self.TOTAL_DEATHS) total_deaths = total_deaths_df.loc[0, 'value'] _, demog_df = query_geoservice(**self.DEMOG) demog_df = demog_df.set_index('Category') demog_df = demog_df[list( filter(lambda x: x.startswith('race_'), demog_df.columns))] known_df = demog_df.drop(columns=['race_Unknown']).sum(axis=1) aa_cases = demog_df.loc['PositiveCases', 'race_AfricanAmerican'] aa_deaths = demog_df.loc['Deaths', 'race_AfricanAmerican'] known_cases = known_df['PositiveCases'] known_deaths = known_df['Deaths'] aa_cases_pct = to_percentage(aa_cases, known_cases) aa_deaths_pct = to_percentage(aa_deaths, known_deaths) return [ self._make_series( date=date, cases=int(total_cases), deaths=int(total_deaths), aa_cases=int(aa_cases), aa_deaths=int(aa_deaths), pct_aa_cases=aa_cases_pct, pct_aa_deaths=aa_deaths_pct, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_cases, known_race_deaths=known_deaths, ) ]
def _scrape(self, **kwargs): # Download the metadata date, totals = query_geoservice(**self.TOTALS) _logger.info(f'Processing data for {date}') # Download and extract total case and death data total_cases = totals.loc[0, 'Cases'] total_deaths = totals.loc[0, 'Deaths'] # Download and extract AA case and death data _, cases = query_geoservice(**self.RACE_CASE) cases = cases.set_index('Race') aa_cases_cnt = cases.loc['Black or African American', 'Cases'] known_cases = cases.drop('Unknown').sum()['Cases'] aa_cases_pct = to_percentage(aa_cases_cnt, known_cases) _, deaths = query_geoservice(**self.RACE_DEATH) deaths = deaths.set_index('Race') known_deaths = deaths.drop('Unknown', errors='ignore').sum()['Deaths'] try: aa_deaths_cnt = deaths.loc['Black or African American', 'value'] aa_deaths_pct = to_percentage(aa_deaths_cnt, known_deaths) except KeyError: aa_deaths_cnt = 0 aa_deaths_pct = 0 return [ self._make_series( date=date, cases=total_cases, deaths=total_deaths, aa_cases=aa_cases_cnt, aa_deaths=aa_deaths_cnt, pct_aa_cases=aa_cases_pct, pct_aa_deaths=aa_deaths_pct, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_cases, known_race_deaths=known_deaths, ) ]
def _scrape(self, **kwargs): # Next get the cumulative case and death counts date_published, total = query_geoservice(**self.TOTALS) _logger.info(f'Processing data for {date_published}') try: cases = total.loc[0, 'Cases'] deaths = total.loc[0, 'Deaths'] except IndexError: raise ValueError('Total count data not found') # And finally the race/ethnicity breakdowns _, data = query_geoservice(**self.RACE) data = data.set_index('RaceEthnicity') try: known = data.sum() cases_aa = data.loc['Black', 'Cases'] deaths_aa = data.loc['Black', 'Deaths'] known_cases = known['Cases'] known_deaths = known['Deaths'] pct_cases_aa = to_percentage(cases_aa, known_cases) pct_deaths_aa = to_percentage(deaths_aa, known_deaths) except IndexError: raise ValueError('No data found for Black RaceEthnicity category') return [self._make_series( date=date_published, cases=cases, deaths=deaths, aa_cases=cases_aa, aa_deaths=deaths_aa, pct_aa_cases=pct_cases_aa, pct_aa_deaths=pct_deaths_aa, pct_includes_unknown_race=False, pct_includes_hispanic_black=False, known_race_cases=known_cases, known_race_deaths=known_deaths, )]
def _scrape(self, **kwargs): # Download and extract the case and death totals date, totals = query_geoservice(**self.TOTAL) _logger.info(f'Processing data for {date}') total_cases = totals.loc[0, 'Cases'] total_deaths = totals.loc[0, 'Deaths'] # Extract by-race data _, cases_race = query_geoservice(**self.RACE_CASE) cases_race = cases_race.set_index('RACE').dropna().astype(int) known_cases = cases_race.drop( ['REFUSED TO ANSWER RACE', 'UNKNOWN RACE'], errors='ignore').sum()['Cases'] aa_cases = cases_race.loc['BLACK', 'Cases'] aa_cases_pct = to_percentage(aa_cases, known_cases) _, deaths_race = query_geoservice(**self.RACE_DEATH) deaths_race = deaths_race.set_index('RACE').dropna().astype(int) known_deaths = deaths_race.drop( ['REFUSED TO ANSWER RACE', 'UNKNOWN RACE'], errors='ignore').sum()['Deaths'] aa_deaths = deaths_race.loc['BLACK', 'Deaths'] aa_deaths_pct = to_percentage(aa_deaths, known_deaths) return [ self._make_series( date=date, cases=total_cases, deaths=total_deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=aa_cases_pct, pct_aa_deaths=aa_deaths_pct, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_cases, known_race_deaths=known_deaths, ) ]
def _scrape(self, **kwargs): _, total_cases_df = query_geoservice(**self.TOTAL_CASES_QUERY) _, total_deaths_df = query_geoservice(**self.TOTAL_DEATHS_QUERY) date, raw_race_df = query_geoservice(**self.RACE_QUERY) race_df = raw_race_df.groupby('Race').agg({ 'Cases': 'sum', 'Deaths': 'sum' }) assert len(total_cases_df ) == 1, 'total_cases_df has unexpected number of rows' assert len(total_deaths_df ) == 1, 'total_deaths_df has unexepected number of rows' assert len(race_df) == 7, 'race_df has unexpected number of rows' cases = total_cases_df.iloc[0]['value'] deaths = total_deaths_df.iloc[0]['value'] aa_cases = race_df.loc['Black']['Cases'] aa_deaths = race_df.loc['Black']['Deaths'] known_race_cases = race_df.drop('Unknown')['Cases'].sum() known_race_deaths = race_df.drop('Unknown')['Deaths'].sum() pct_aa_cases = to_percentage(aa_cases, known_race_cases) pct_aa_deaths = to_percentage(aa_deaths, known_race_deaths) return [ self._make_series(date=date, cases=cases, deaths=deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=pct_aa_cases, pct_aa_deaths=pct_aa_deaths, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_race_cases, known_race_deaths=known_race_deaths) ]
def _scrape(self, **kwargs): # Download the case data date, cases = query_geoservice(**self.CASES) _logger.info(f'Processing data for {date}') cases = cases.set_index('Racecat') # Extract/calculate case info total_cases = cases.loc[:, 'value'].sum() total_known_cases = cases.loc[:, 'value'].drop('Unknown').sum() aa_cases_cnt = cases.loc['Black', 'value'] aa_cases_pct = to_percentage(aa_cases_cnt, total_known_cases) # Download the deaths data _, deaths = query_geoservice(**self.DEATHS) deaths = deaths.set_index('Racecat') # Extract/calculate deaths info total_deaths = deaths.loc[:, 'value'].sum() total_known_deaths = deaths.loc[:, 'value'].drop('Unknown').sum() aa_deaths_cnt = deaths.loc['Black', 'value'] aa_deaths_pct = to_percentage(aa_deaths_cnt, total_known_deaths) return [ self._make_series( date=date, cases=total_cases, deaths=total_deaths, aa_cases=aa_cases_cnt, aa_deaths=aa_deaths_cnt, pct_aa_cases=aa_cases_pct, pct_aa_deaths=aa_deaths_pct, pct_includes_unknown_race=False, pct_includes_hispanic_black=False, known_race_cases=total_known_cases, known_race_deaths=total_known_deaths, ) ]
def _scrape(self, **kwargs): # Get totals date date, totals = query_geoservice(self.TOTALS) _logger.info(f'Processing data for {date}') total_cases = totals.loc[0, 'TOTAL_CASES'] total_deaths = totals.loc[0, 'TOTAL_DEATHS'] # No AA case data # TODO: find AA case data aa_cases = float('nan') aa_cases_pct = float('nan') # Download the deaths data # First load the main page to get a session ID s = requests.Session() r = s.get(self.RACE_MAIN_PAGE) r.raise_for_status() _logger.info(f'Tableau session ID is {r.headers["x-session-id"]}') # Prepare the data download URL and retrieve r = s.get( self.DEATHS_RACE_URL_TEMPLATE.format( session_id=r.headers['x-session-id'])) r.raise_for_status() # Load the data into a DataFrame deaths = pd.read_csv(BytesIO(r.content)) n_col = deaths.columns[1] total_deaths = int(re.search(r'\d+', n_col).group(0)) aa_row = deaths[deaths['Race/Ethnicity1'].str.search('Black')] aa_deaths = aa_row[n_col] aa_deaths_pct = round(100 * aa_row['Percent (Race)'], 2) return [ self._make_series( date=date, cases=total_cases, deaths=total_deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=aa_cases_pct, pct_aa_deaths=aa_deaths_pct, pct_includes_unknown_race=False, pct_includes_hispanic_black=False, ) ]
def _scrape(self, **kwargs): # Download the metadata date, data = query_geoservice(**self.DATA) _logger.info(f'Processing data for {date}') data = data.set_index('Demographic') # Discard non-race rows data = data.reindex([ 'White', 'Black', 'AI/AN', 'Asian', 'NHOPI', 'Multiple', 'Other', 'Unknown Race' ]) # Add total rows data.loc['Grand Total', :] = data.sum() data.loc['Known Race', :] = data.drop(['Grand Total', 'Unknown Race']).sum() # Extract/calculate case info total_cases = data.loc['Grand Total', 'Cases'] known_cases = data.loc['Known Race', 'Cases'] aa_cases_cnt = data.loc['Black', 'Cases'] aa_cases_pct = to_percentage(aa_cases_cnt, known_cases, 2) # Extract/calculate death info total_deaths = data.loc['Grand Total', 'Deaths'] known_deaths = data.loc['Known Race', 'Deaths'] aa_deaths_cnt = data.loc['Black', 'Deaths'] aa_deaths_pct = to_percentage(aa_deaths_cnt, known_deaths, 2) return [ self._make_series( date=date, cases=total_cases, deaths=total_deaths, aa_cases=aa_cases_cnt, aa_deaths=aa_deaths_cnt, pct_aa_cases=aa_cases_pct, pct_aa_deaths=aa_deaths_pct, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_cases, known_race_deaths=known_deaths, ) ]
def _scrape(self, **kwargs): _, summary_df = arcgis.query_geoservice(**self.SUMMARY_QUERY) cases = summary_df.loc[0, 'Cases'] deaths = summary_df.loc[0, 'Deaths'] runner = WebdriverRunner() results = runner.run(WebdriverSteps().go_to_url( self.RACE_URL).wait_for_number_of_elements( (By.XPATH, '//canvas'), 14).find_request( 'race_cases', find_by=tableau.find_tableau_request).go_to_url( self.MAIN_PAGE_URL).get_page_source()) soup = results.page_source date = self.get_date(soup) parser = tableau.TableauParser(request=results.requests['race_cases']) cases_df = parser.get_dataframe_from_key('Census') cases_df = cases_df[cases_df['Measure Names'] == 'Case %'].set_index( 'Race') aa_cases = cases_df.loc['Black', 'SUM(Case Count)'] known_race_cases = cases_df['SUM(Case Count)'].sum() pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases) return [ self._make_series( date=date, cases=cases, deaths=deaths, aa_cases=aa_cases, pct_aa_cases=pct_aa_cases, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_race_cases, ) ]