Python extract_arcgis_attributes示例，fetcher.utils.extract_arcgis_attributes Python示例

示例#1

0

显示文件

def handle_wa(res, mapping):
    tagged = {}
    for result in res[:1]:
        tagged = extract_arcgis_attributes(result, mapping, 'WA')

    # cases, hosp and death excel
    names = {'Cases': Fields.POSITIVE, 'Hospitalizations': Fields.HOSP}
    for name, field in names.items():
        df = res[1][name]
        tagged[field.name] = df.iloc[-1][name]

    return tagged

示例#2

0

显示文件

def handle_vt(res, mapping, queries):
    mapped = extract_arcgis_attributes(res[0], mapping, 'VT')
    for x in mapped:
        x[DATE_USED] = queries[0].constants[DATE_USED]

    # cases by ??
    df = res[1].rename(columns=mapping).set_index(DATE).sort_index().cumsum()
    add_query_constants(df, queries[1])
    df[TS] = df.index.normalize().tz_localize(None)
    mapped.extend(df.to_dict(orient='records'))

    return mapped

示例#3

0

显示文件

文件： states.py 项目： griffindvs/covid19-datafetcher

def handle_ne(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'NE')
        tagged.update(partial)
    stats = res[-1]
    # this is where mapping and no "as" support is breaking
    stats = stats.get('features', [{}])[0].get('attributes')
    tagged[Fields.POSITIVE.name] = stats['TotalPositiveAsOfThisDate']
    tagged[Fields.NEGATIVE.name] = stats['TotalNotDetectedAsOfThisDate']
    tagged[Fields.INCONCLUSIVE.name] = stats['TotalInconclusiveAsOfThisDate']
    tagged[Fields.TOTAL.name] = stats['AllTestsAsOfThisDate']
    return tagged

示例#4

0

显示文件

文件： backfill.py 项目： COVID19Tracking/covid19-datafetcher

def handle_ak(res, mapping, queries):
    tests = extract_arcgis_attributes(res[0], mapping)
    cumsum_tests_df = make_cumsum_df(tests)
    add_query_constants(cumsum_tests_df, queries[0])
    tagged = cumsum_tests_df.to_dict(orient='records')

    # cases
    cases = pd.DataFrame([x['attributes']
                          for x in res[1]['features']]).rename(columns=mapping)
    cases[TS] = pd.to_datetime(cases[TS], unit='ms')
    cases = cases.set_index(TS).sort_index().cumsum().resample('1d').ffill()
    cases[TS] = cases.index
    add_query_constants(cases, queries[1])
    tagged.extend(cases.to_dict(orient='records'))

    # last item: already cumulative
    data = extract_arcgis_attributes(res[2], mapping)
    for x in data:
        x[DATE_USED] = queries[2].constants[DATE_USED]
    tagged.extend(data)

    return tagged

示例#5

0

显示文件

def handle_ut(res, mapping):
    tagged = {}
    soup_start = 1
    for result in res[:soup_start]:
        partial = extract_arcgis_attributes(result, mapping, 'NJ')
        tagged.update(partial)

    stats = res[1]
    for k, v in mapping.items():
        x = stats.find(id=k)
        if x:
            name = v
            value_item = x.find(class_='value')
            if not value_item:
                value_item = x.find(class_='value-output')
            if not value_item:
                continue
            value = atoi(value_item.get_text(strip=True))
            tagged[v] = value

    # inverse mapping
    revmap = {v: k for k, v in mapping.items()}
    hosp = res[2]
    tables = hosp.find_all('table')

    curr_hosp_table = tables[0]
    tds = curr_hosp_table.find_all('td',
                                   string=re.compile(
                                       revmap[Fields.CURR_HOSP.name]))
    curr_hosp = 0
    for td in tds:
        for x in td.next_siblings:
            if (x.name == 'td'):
                curr_hosp += atoi(x.get_text(strip=True))
    tagged[Fields.CURR_HOSP.name] = curr_hosp

    #TODO: code here can be improved, combined with top part
    td = curr_hosp_table.find('td',
                              string=re.compile(revmap[Fields.CURR_ICU.name]))
    for x in td.next_siblings:
        if (x.name == 'td'):
            val = atoi(x.get_text(strip=True))
            tagged[Fields.CURR_ICU.name] = val

    for t in tables[1:]:
        if t.caption.get_text(strip=True) in mapping:
            td = t.find_all('td', limit=2)[1]
            tagged[mapping[t.caption.get_text(strip=True)]] = atoi(
                td.get_text(strip=True))

    return tagged

示例#6

0

显示文件

文件： states.py 项目： mgithub46/covid19-datafetcher

def handle_wi(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'WI')
        tagged.update(partial)

    # testing encounters
    testing = res[-1]
    encounters = [
        k for k, v in mapping.items() if v == Fields.PCR_TEST_ENCOUNTERS.name
    ][0]
    value = testing[testing['Measure Names'] == encounters]['Totals'].sum()
    tagged[mapping[encounters]] = value
    return tagged

示例#7

0

显示文件

文件： states.py 项目： griffindvs/covid19-datafetcher

def handle_fl(res, mapping):
    '''Need to add the non-FL residents to the totals:
    they separate it for death and hosp"
    '''
    mapped = map_attributes(res[0], mapping, 'FL')

    for result in res[1:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'FL')
        mapped.update(partial)

    # pcr encounters
    result = res[-1].get('features', [{}])[0].get('attributes')
    mapped[Fields.PCR_TEST_ENCOUNTERS.name] = sum(result.values())
    return mapped

示例#8

0

显示文件

文件： states.py 项目： griffindvs/covid19-datafetcher

def handle_sc(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, debug_state='SC')
        tagged.update(partial)

    # testing
    df = res[-1]
    df = df.unstack()
    df.index = df.index.map("-".join)
    partial = map_attributes(df, mapping)
    tagged.update(partial)

    return tagged

示例#9

0

显示文件

def handle_la(res, mapping):
    tagged = {}
    for stats in res[:2]:
        if 'features' in stats and len(stats['features']) > 0:
            attributes = stats['features']
            attributes = {attr.get('attributes', {}).get('Measure'):
                          attr.get('attributes', {}).get('SUM_Value') for attr in attributes}
            tagged.update(map_attributes(attributes, mapping, 'LA'))

    # everything else
    for result in res[2:]:
        partial = extract_arcgis_attributes(result, mapping, 'LA')
        tagged.update(partial)

    return tagged

示例#10

0

显示文件

文件： states.py 项目： mgithub46/covid19-datafetcher

def handle_tx(res, mapping):
    tagged = {}
    for result in res[:-2]:
        partial = extract_arcgis_attributes(result, mapping, debug_state='TX')
        tagged.update(partial)

    # positive pcr
    pcr_pos = res[-2]
    val = sum(pcr_pos['features'][0]['attributes'].values())
    tagged[Fields.SPECIMENS_POS.name] = val

    # last item is the current ICU DataFrame
    df = res[-1]
    icu = df.loc[df[df.columns[0]] == 'Total'][df.columns[-1]].iloc[-1]
    tagged[Fields.CURR_ICU.name] = icu
    return tagged

示例#11

0

显示文件

文件： backfill.py 项目： COVID19Tracking/covid19-datafetcher

def handle_al(res, mapping, queries):
    tagged = []
    for i, data in enumerate(res[:-1]):
        data = extract_arcgis_attributes(data, mapping)
        query_constants = queries[i].constants
        for x in data:
            x.update(query_constants)
        tagged.extend(data)

    tests = pd.DataFrame([x['attributes'] for x in res[-1]['features']
                          ]).rename(columns=mapping)
    tests['DATE'] = '20' + tests['DATE']
    cumsum_df = _yet_another_prep_cumsum_df(tests)
    add_query_constants(cumsum_df, queries[-1])
    tagged.extend(cumsum_df.to_dict(orient='records'))
    return tagged

示例#12

0

显示文件

文件： states.py 项目： baajur/covid19-datafetcher

def handle_ne(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'NE')
        tagged.update(partial)
    stats = res[-1]
    if 'features' in stats and len(stats['features']) > 0:
        attributes = stats['features']
        for attr in attributes:
            # expecting {attributes: {lab_status: NAME, COUNT_EXPR0: VALUE}}
            name = attr['attributes']['lab_status']
            value = attr['attributes']['COUNT_EXPR0']
            if name in mapping:
                tagged[mapping[name]] = value

    return tagged

示例#13

0

显示文件

文件： states.py 项目： baajur/covid19-datafetcher

def handle_mo(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, debug_state='MO')
        tagged.update(partial)

    hosp = res[-1]
    # kinda funny, yes
    hosp_title = [k for k, v in mapping.items() if v == Fields.CURR_HOSP.name][-1]
    vent_title = [k for k, v in mapping.items() if v == Fields.CURR_VENT.name][-1]
    for row in hosp.itertuples():
        if isinstance(row[1], str) and row[1].startswith(hosp_title):
            tagged[Fields.CURR_HOSP.name] = row[2]
        if isinstance(row[1], str) and row[1].startswith(vent_title):
            tagged[Fields.CURR_VENT.name] = row[2]

    return tagged

示例#14

0

显示文件

def handle_fl(res, mapping):
    '''Need to add the non-FL residents to the totals:
    they separate it for death and hosp"
    '''
    mapped = map_attributes(res[0], mapping, 'FL')

    for result in res[1:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'FL')
        mapped.update(partial)

    # Current hosp csv
    hosp = res[-1]
    for r in hosp:
        if r.get('County') == 'All':
            mapped[Fields.CURR_HOSP.name] = atoi(r.get('COVID Hospitalizations'))

    return mapped

示例#15

0

显示文件

文件： states.py 项目： griffindvs/covid19-datafetcher

def handle_ga(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, debug_state='GA')
        tagged.update(partial)
    tagged[Fields.CURR_HOSP.name] += tagged.pop('CURR_HOSP_PUI')

    # last item is zip
    files = ["total_testing.csv", "summary_totals.csv"]
    with zipContextManager(res[-1]) as zipdir:
        for filename in files:
            summary = csv.DictReader(open(os.path.join(zipdir, filename), 'r'))
            summary = list(summary)
            summary = summary[-1]
            partial = map_attributes(summary, mapping, 'GA')
            tagged.update(partial)

    return tagged

示例#16

0

显示文件

文件： source_utils.py 项目： mgithub46/covid19-datafetcher

def process_source_responses(source, results):
    processed_results = []
    if source.extras:
        processed_results = source.extras(results, source.mapping)
    else:
        for i, result in enumerate(results):
            query = source.queries[i]
            if query.type == 'arcgis':
                partial = extract_arcgis_attributes(result, source.mapping,
                                                    source.name)
            else:
                # This is a guess; getting an unknown top level object
                partial = extract_attributes(result, query.data_path,
                                             source.mapping, source.name)
            processed_results.append(partial)

    data = _aggregate_state_results(source, processed_results)
    return data

示例#17

0

显示文件

def handle_nj(res, mapping):
    '''Need to parse everything the same, and add past recoveries
    to the new query, because I do not know how to add a constant
    to the ArcGIS query
    '''
    mapped = {}
    for result in res:
        partial = extract_arcgis_attributes(result, mapping, 'NJ')
        mapped.update(partial)

    # it's not a magic value, it's from an existing query, but
    # it's always the same
    mapped[Fields.RECOVERED.name] += 15642

    # This magic number math happens on the dashboard
    mapped[Fields.PROBABLE.name] += 58

    return mapped

示例#18

0

显示文件

文件： states.py 项目： griffindvs/covid19-datafetcher

def handle_wa(res, mapping):
    tagged = {}
    for result in res[:1]:
        tagged = extract_arcgis_attributes(result, mapping, 'WA')

    # cases, hosp and death excel
    names = {'Cases': Fields.POSITIVE, 'Hospitalizations': Fields.HOSP}
    for name, field in names.items():
        df = res[1][name]
        tagged[field.name] = df.iloc[-1][name]

    # tests
    df = res[-1].rename(columns=mapping)
    df = df.groupby(df.columns.values, axis=1).sum().sum()
    # df['SPECIMENS'] = df['SPECIMENS_POS'] + df['SPECIMENS_NEG']
    tagged.update(df.filter(like='SPECIMEN').to_dict())

    return tagged

示例#19

0

显示文件

文件： states.py 项目： baajur/covid19-datafetcher

def handle_ar(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'AR')
        tagged.update(partial)

    soup = res[-1]
    tables = soup.find_all("table")
    table = tables[-1].find("tbody")
    for tr in table.find_all("tr"):
        cols = tr.find_all("td")
        if len(cols) < 2:
            continue
        name = cols[0].get_text(strip=True)
        value = cols[1].get_text(strip=True)
        if name in mapping:
            tagged[mapping[name]] = atoi(value)

    return tagged

示例#20

0

显示文件

def handle_or(res, mapping):
    mapped = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'NJ')
        mapped.update(partial)

    # The last item is the page that needs to be scraped
    page = res[-1]
    # main stats
    h4 = page.find('h4', string=re.compile("Overview"))
    main_table = h4.find_next_sibling('table')
    for row in main_table.find_all('tr'):
        tds = row.find_all('td')
        if len(tds) < 2:
            continue
        name = tds[0].get_text(strip=True)
        value = tds[1].get_text(strip=True)
        if name in mapping:
            try:
                mapped[mapping[name]] = atoi(value)
            except Exception as e:
                logging.warning("OR: failed to parse {} for {}".format(
                    value, name))

    tables = page.find_all('table')
    hosp = tables[4]
    curr_hosp = tables[6]

    td = hosp.find_all("td", limit=2)
    mapped[Fields.HOSP.name] = atoi(td[1].get_text(strip=True))

    # TODO: Unify this code (data tables)
    for tr in curr_hosp.find_all("tr"):
        tds = tr.find_all('td')
        if len(tds) < 2:
            continue
        name = tds[0].get_text(strip=True)
        value = tds[1].get_text(strip=True)
        if name in mapping:
            mapped[mapping[name]] = atoi(value)

    return mapped

示例#21

0

显示文件

文件： states.py 项目： mgithub46/covid19-datafetcher

def handle_hi(res, mapping):
    tagged = {}
    for result in res[:1]:
        partial = extract_arcgis_attributes(result, mapping, debug_state='HI')
        tagged.update(partial)

    stats = res[1]
    # last row with values
    last_state_row = {}
    for row in stats:
        if row['Region'] == 'State' and row.get('Cases_Tot'):
            last_state_row = row

    # expecting the order be old -> new data, so last line is the newest
    for k, v in last_state_row.items():
        if k in mapping:
            tagged[mapping[k]] = v

    testing = res[2]
    for k, v in testing.sum().items():
        # need to ignore date
        if k != 'Date' and k in mapping:
            tagged[mapping[k]] = v

    probables = res[3]
    h2 = probables.find('h3', id='probables')
    table = h2.find_next('table')
    probables_index = -1
    for i, th in enumerate(table.find('thead').find_all('th')):
        if th.get_text(strip=True).find("Total Probable Cases") >= 0:
            probables_index = i
            break

    probables_val = 0
    if probables_index >= 0:
        for tr in table.find('tbody').find_all('tr'):
            td = tr.find_all('td')[probables_index]
            probables_val += atoi(td.get_text(strip=True))

    tagged[Fields.PROBABLE.name] = probables_val

    return tagged

示例#22

0

显示文件

文件： states.py 项目： griffindvs/covid19-datafetcher

def handle_nj(res, mapping):
    '''Need to parse everything the same, and add past recoveries
    to the new query, because I do not know how to add a constant
    to the ArcGIS query
    '''
    mapped = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'NJ')
        mapped.update(partial)

    # it's not a magic value, it's from an existing query, but
    # it's always the same
    mapped[Fields.RECOVERED.name] += 15642

    # Find the magic number added to probables
    widget = res[-1]['widgets'][17]
    val = atoi(widget.get('valueConversion', {}).get('offset', 0))
    mapped[Fields.PROBABLE.name] += val

    return mapped

示例#23

0

显示文件

文件： states.py 项目： griffindvs/covid19-datafetcher

def handle_mn(res, mapping):
    mapped = {}
    for result in res[:1]:
        partial = extract_arcgis_attributes(result, mapping, 'NJ')
        mapped.update(partial)

    # testing
    soup = res[-1]
    h2 = soup.find_all(['h2', 'h3'])
    for x in h2:
        title = x.get_text(strip=True).strip().strip(":")
        if title in ['Testing', 'Deaths', 'Hospitalizations', 'Daily Update']:
            tables = x.find_next_siblings('table', limit=2)
            for t in tables:
                for tr in t.find_all('tr'):
                    title = tr.find('th').get_text(strip=True).strip()
                    value = tr.find('td').get_text(strip=True).strip()
                    if title in mapping:
                        mapped[mapping[title]] = atoi(value)

    return mapped

示例#24

0

显示文件

文件： states.py 项目： baajur/covid19-datafetcher

def handle_al(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, debug_state='AL')
        tagged.update(partial)

    widgets = res[-1].get('widgets', {})
    # 6 = hospitalizations
    # 29 = recoveries
    extras = [(widgets[6], Fields.HOSP.name),
              (widgets[29], Fields.RECOVERED.name)]

    for widget, field in extras:
        if widget.get('defaultSettings', {}) \
                    .get('description', "").find("STATEWIDE") >= 0:
            # now check that it's a numeric value
            val = widget['defaultSettings']['middleSection']['textInfo']['text'].strip()
            if re.match("[1-9][0-9,]*", val) is not None:
                tagged[field] = atoi(val)

    return tagged

示例#25

0

显示文件

文件： backfill.py 项目： COVID19Tracking/covid19-datafetcher

def handle_fl(res, mapping, queries):
    # simply a cumsum table
    tagged = []
    for i, data in enumerate(res[:-2]):
        df = extract_arcgis_attributes(res[i], mapping)
        cumsum_df = make_cumsum_df(df,
                                   convert_to_num=mapping.values(),
                                   fill_na_val=0)
        add_query_constants(cumsum_df, queries[i])
        tagged.extend(cumsum_df.to_dict(orient='records'))

    # The last item is the aggregated case-line data
    try:
        dfs = []
        for data in res[-2:]:
            df = pd.DataFrame([x['attributes'] for x in data['features']])
            df = df.rename(
                columns={
                    **{
                        'EXPR_1': 'Year',
                        'EXPR_2': 'Month',
                        'EXPR_3': 'Day'
                    },
                    **mapping
                })
            df[DATE] = pd.to_datetime(df[['Year', 'Month', 'Day']])
            df = df.set_index(DATE).sort_index()
            dfs.append(df)
            df = pd.concat(
                dfs,
                axis=1).filter(like='POSITIVE').fillna(0).sum(axis=1).cumsum()
            df = df.to_frame()
        add_query_constants(df, queries[-1])
        df[TS] = df.index
        tagged.extend(df.to_dict(orient='records'))
    except Exception as e:
        logger.warning(str(e))

    return tagged

示例#26

0

显示文件

文件： states.py 项目： griffindvs/covid19-datafetcher

def handle_hi(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, debug_state='HI')
        tagged.update(partial)

    probables = res[-1]
    h2 = probables.find('h3', id='probables')
    table = h2.find_next('table')
    probables_index = -1
    for i, th in enumerate(table.find('thead').find_all('th')):
        if th.get_text(strip=True).find("Total Probable Cases") >= 0:
            probables_index = i
            break

    probables_val = 0
    if probables_index >= 0:
        for tr in table.find('tbody').find_all('tr'):
            td = tr.find_all('td')[probables_index]
            probables_val += atoi(td.get_text(strip=True))

    tagged[Fields.PROBABLE.name] = probables_val

    return tagged

示例#27

0

显示文件

文件： states.py 项目： griffindvs/covid19-datafetcher

def handle_ut(res, mapping):
    tagged = {}
    soup_start = 1
    for result in res[:soup_start]:
        partial = extract_arcgis_attributes(result, mapping, 'UT')
        tagged.update(partial)

    stats = res[1]
    for k, v in mapping.items():
        x = stats.find(id=k)
        if x:
            value_item = x.find(class_='value')
            if not value_item:
                value_item = x.find(class_='value-output')
            if not value_item:
                continue
            value = atoi(value_item.get_text(strip=True))
            tagged[v] = value

    # inverse mapping
    revmap = {v: k for k, v in mapping.items()}
    hosp = res[2]
    tables = hosp.find_all('table')

    curr_hosp_table = tables[0]
    tds = curr_hosp_table.find_all('td',
                                   string=re.compile(
                                       revmap[Fields.CURR_HOSP.name]))
    curr_hosp = 0
    for td in tds:
        for x in td.next_siblings:
            if x.name == 'td':
                curr_hosp += atoi(x.get_text(strip=True))
    tagged[Fields.CURR_HOSP.name] = curr_hosp

    # TODO: code here can be improved, combined with top part
    td = curr_hosp_table.find('td',
                              string=re.compile(revmap[Fields.CURR_ICU.name]))
    for x in td.next_siblings:
        if x.name == 'td':
            val = atoi(x.get_text(strip=True))
            tagged[Fields.CURR_ICU.name] = val

    for t in tables[1:]:
        if t.caption.get_text(strip=True) in mapping:
            td = t.find_all('td', limit=2)[1]
            tagged[mapping[t.caption.get_text(strip=True)]] = atoi(
                td.get_text(strip=True))

    # Downloadable file
    zipurl = res[-1]
    # Sometimes there are files for multiple dates, we need the most recent
    specimens_file_prefix = 'Overview_Total Tests by'
    specimens_file_latest = specimens_file_prefix
    recovered_file = 'Overview_Cumulative COVID-19 Cases'
    recovered_file_latest = recovered_file
    people_tested_file = 'Overview_Number of People Tested by'
    people_tested_latest = people_tested_file
    test_type = ['PCR/amplification', 'Antigen by DFA/IF']
    result = ['POSITIVE', 'NEGATIVE']
    with zipContextManager(zipurl) as zipdir:
        with os.scandir(zipdir) as it:
            for entry in it:
                df = None
                fields = []
                if not entry.is_file:
                    # just in case
                    continue
                if entry.name.startswith(specimens_file_prefix):
                    if entry.name < specimens_file_latest:
                        continue
                    # specimens
                    fields = [
                        Fields.SPECIMENS_POS, Fields.SPECIMENS_NEG,
                        Fields.ANTIGEN_POS, Fields.ANTIGEN_NEG
                    ]
                    specimens_file_latest = entry.name
                elif entry.name.startswith(people_tested_file):
                    if entry.name < people_tested_latest:
                        continue
                    # people tested
                    fields = [
                        Fields.CONFIRMED, Fields.NEGATIVE,
                        Fields.ANTIGEN_POS_PEOPLE, Fields.ANTIGEN_NEG_PEOPLE,
                        Fields.TOTAL, Fields.ANTIGEN_TOTAL_PEOPLE
                    ]
                    people_tested_latest = entry.name
                elif entry.name.startswith(recovered_file):
                    if entry.name < recovered_file_latest:
                        continue
                    # recoveries
                    fields = [Fields.RECOVERED]
                    recovered_file_latest = entry.name
                if fields and entry.name.startswith(recovered_file):
                    df = pd.read_csv(os.path.join(zipdir, entry.name))
                    last = df['Estimated Recovered *'].iloc[-1]
                    if Fields.RECOVERED in fields:
                        tagged[Fields.RECOVERED.name] = last
                elif fields and not entry.name.startswith(recovered_file):
                    df = pd.read_csv(os.path.join(zipdir, entry.name))
                    summed = df.groupby(['Test Type', 'Result']).sum()
                    i = 0
                    for tt in test_type:
                        for rr in result:
                            tag = fields[i]
                            tag = tag if isinstance(tag, str) else tag.name
                            value = summed.loc[tt, rr]['Count']
                            tagged[tag] = value
                            i += 1
                    # handle totals
                    if Fields.CONFIRMED in fields:
                        tagged[Fields.TOTAL.name] = sum([
                            summed.loc[test_type[0], rr]['Count']
                            for rr in result
                        ])
    return tagged

示例#28

0

显示文件

文件： states.py 项目： griffindvs/covid19-datafetcher

def handle_mi(res, mapping):
    tagged = {}
    for result in res[:2]:
        partial = extract_arcgis_attributes(result, mapping, 'MI')
        tagged.update(partial)

    # Recoveries soup
    recovered_page = res[-3]
    recover_p = recovered_page.find('div', 'fullContent')
    span = recover_p.find('span').get_text(strip=True)
    tagged[Fields.RECOVERED.name] = atoi(span)

    # Hospitalization soup
    hospitalization_page = res[-2]
    tables = hospitalization_page.find_all('table')
    vent = 0
    icu = 0
    hosp = 0
    for t in tables:
        caption = t.find('caption').get_text(strip=True)
        if caption.startswith('COVID-19 Metrics'):
            for row in t.find_all('tr'):
                th = row.find('th')
                if th and th.get_text(
                        strip=True).startswith('Total Hospitalized Adult'):
                    # take last td
                    td = row.find_all('td')[-1]
                    hosp += atoi(td.get_text(strip=True))
                elif th and th.get_text(
                        strip=True).startswith('Hospitalized Peds'):
                    td = row.find_all('td')[-1]
                    hosp += atoi(td.get_text(strip=True))
                elif th and th.get_text(strip=True).startswith(
                        'Adult ICU Confirmed/Suspected'):
                    td = row.find_all('td')[-1]
                    icu += atoi(td.get_text(strip=True))
                elif th and th.get_text(
                        strip=True).startswith('Hospitalized and Ventilated'):
                    td = row.find_all('td')[-1]
                    vent += atoi(td.get_text(strip=True))

    tagged[Fields.CURR_VENT.name] = atoi(vent)
    tagged[Fields.CURR_HOSP.name] = atoi(hosp)
    tagged[Fields.CURR_ICU.name] = atoi(icu)

    # TODO: Can use the reverse mapping
    soup = res[-1]
    h = soup.find("h5", string=re.compile('[dD][aA][tT][aA]'))
    parent = h.find_parent("ul")
    links = parent.find_all("a")

    base_url = 'https://www.michigan.gov'
    cases_url = base_url + links[0]['href']
    tests_url = base_url + links[3]['href']
    results_url = base_url + links[4]['href']

    try:
        df = pd.read_excel(cases_url, engine='xlrd')
        filter_col = 'CASE_STATUS'
        summed = df.groupby(filter_col).sum()
        for m in ['Cases', 'Deaths']:
            for t in ['Confirmed', 'Probable']:
                tagged[mapping[m + t]] = summed[m][t]
    except Exception as e:
        logging.warning("Exception getting cases by status", e)

    try:
        df = pd.read_excel(tests_url, engine='xlrd')
        filter_col = 'TestType'
        summed = df.groupby(filter_col).sum()
        for m in ['Diagnostic', 'Serology']:
            tagged[mapping[m]] = summed['Count'][m]
    except Exception:
        logging.warning("[MI] failed to fetch test results")

    try:
        df = pd.read_excel(results_url, engine='xlrd')
        fields = ['Negative', 'Positive']
        summed = df[fields].sum()
        for x in fields:
            tagged[mapping[x]] = summed[x]
    except Exception:
        logging.warning("[MI] Failed to fetch test results")

    return tagged

示例#29

0

显示文件

def handle_ky(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'KY')
        tagged.update(partial)

    # soup time
    soup = res[-1]
    h3 = soup.find("h3", string=re.compile("Coronavirus Monitoring"))
    if not h3:
        # quick fail
        return tagged

    datadiv = h3.find_next_siblings("div", "row")
    datadiv = datadiv[1]
    for item in datadiv.find_all("div", "info-card"):
        title = item.find("span", "title")
        value = item.find("span", "number")
        if not value:
            continue

        probable = item.find_all("span", "probable")
        pattern = "([a-zA-Z ]*): ?([0-9,]*)"

        # class = title, number, probable
        title = title.get_text(strip=True)
        value = value.get_text(strip=True)

        probable = " ".join([p.get_text(strip=True) if p else "" for p in probable])
        if probable and probable.strip():
            probable = re.findall(pattern, probable)

        if title.lower().find("total test") >= 0:
            for (k, v) in probable:
                if k.lower().find("pcr") >= 0:
                    tagged[Fields.SPECIMENS.name] = atoi(v)
                elif k.lower().find("serology") >= 0:
                    tagged[Fields.ANTIBODY_TOTAL.name] = atoi(v)
                elif k.lower().find('antigen') >= 0:
                    tagged[Fields.ANTIGEN_TOTAL.name] = atoi(v)
        elif title.lower().find("positive") >= 0:
            tagged[Fields.POSITIVE.name] = atoi(value)
            for (k, v) in probable:
                if k.lower().find("probable") >= 0:
                    tagged[Fields.PROBABLE.name] = atoi(v)
                elif k.lower().find("confirm") >= 0:
                    tagged[Fields.CONFIRMED.name] = atoi(v)
        elif title.lower().find("death") >= 0:
            tagged[Fields.DEATH.name] = atoi(value)
            for (k, v) in probable:
                if k.lower().find("probable") >= 0:
                    tagged[Fields.DEATH_PROBABLE.name] = atoi(v)
                elif k.lower().find("confirm") >= 0:
                    tagged[Fields.DEATH_CONFIRMED.name] = atoi(v)
        elif title.lower().find("recover") >= 0:
            tagged[Fields.RECOVERED.name] = atoi(value)

    updated = h3.find_next_sibling("p").get_text(strip=True)
    tagged[Fields.DATE.name] = updated

    return tagged

示例#30

0

显示文件

def handle_ar(res, mapping):
    # simply a cumsum table
    data = extract_arcgis_attributes(res[0], mapping)
    cumsum_df = make_cumsum_df(data)
    return cumsum_df.to_dict(orient='records')