Python ParseData示例，fetchhelper.ParseData Python示例

示例#1

0

显示文件

def collect_country():
    country_data = fetchhelper.Updater(url_countries, ext='country.json')
    country_data.check_fetch(rawfile=args.rawfile[0], compressed=True)
    if not country_data.rawdata.strip():
        if datetime.date.today().isoweekday() == 7:
            # They nowadays turn the servers off on Sundays or sth.
            pass
        else:
            print("Empty country.json")
        return

    jdat = json.loads(country_data.rawdata)

    parse = fetchhelper.ParseData(country_data, 'countries')
    parse.parsedtime = datatime
    with open(parse.parsedfile, 'w') as f:
        cw = csv.writer(f)
        header = ['Code', 'Country', 'Timestamp', 'Confirmed', 'Deaths']
        cw.writerow(header)
        for data in sorted(jdat['data'], key=(lambda d: d['areaCode'])):
            code = data['areaCode']
            name = data['areaName']
            confirmed = data['cumCasesByPublishDate']
            deaths = data['cumDeaths28DaysByPublishDate']
            cw.writerow([code, name, datatime, confirmed, deaths])
    parse.deploy_timestamp()
    return parse

示例#2

0

显示文件

def parse_v1(parses, html):
    parse_c = fetchhelper.ParseData(update, 'confirmed')
    parse_counts(parse_c, html, "Best.*tigte F.*lle")
    parses.append(parse_c)

    # Seems to be removed for good
    #parse_r = fetchhelper.ParseData(update, 'recovered')
    #try:
    #    parse_counts(parse_r, infobox, "Genesene Personen")
    #    parses.append(parse_r)
    #except AttributeError as err:
    #    # It seems to be removed, we ignore it
    #    print(err)

    parse_d = fetchhelper.ParseData(update, 'deceased')
    parse_counts(parse_d, html, "Todesf.*lle")
    parses.append(parse_d)

示例#3

0

显示文件

def parse_v2(parses, html):
    table = fetchhelper.text_table(html.find('table'))
    ths = table[0]
    assert ('Bundesland' in ths[0])
    assert ('gesamt' in ths[-1])
    trs = table[1:]
    assert ('tigte' in trs[0][0])
    assert ('Todesf' in trs[1][0])
    assert ('Genesen' in trs[2][0])
    assert ('Hospital' in trs[3][0])
    assert ('Intensiv' in trs[4][0])
    assert ('Testungen' in trs[5][0])
    parse = [
        fetchhelper.ParseData(update, 'confirmed'),
        fetchhelper.ParseData(update, 'deaths'),
        fetchhelper.ParseData(update, 'recovered'),
        fetchhelper.ParseData(update, 'hospital'),
        fetchhelper.ParseData(update, 'intensivecare'),
        fetchhelper.ParseData(update, 'tests'),
    ]
    labels = [
        'confirmed', 'deceased', 'recovered', 'hospital', 'intensivecare',
        'tests'
    ]

    areas = {
        'Bgld.': 'Burgenland',
        'Kt.': 'Kärnten',
        'Ktn.': 'Kärnten',
        'NÖ': 'Niederösterreich',
        'OÖ': 'Oberösterreich',
        'Sbg.': 'Salzburg',
        'Stmk.': 'Steiermark',
        'T': 'Tirol',
        'Vbg.': 'Vorarlberg',
        'W': 'Wien'
    }

    for i, tds in enumerate(trs):
        assert (len(ths) == len(tds))
        mo = re.search(r'Stand (\d\d.\d\d.\d\d\d\d), *(\d\d:\d\d) ?Uhr',
                       tds[0])
        if mo is None:
            print("cannot parse date")
            sys.exit(1)
        parse = fetchhelper.ParseData(update, labels[i])
        datadate = parse.parsedtime = datetime.strptime(
            mo.group(1) + ' ' + mo.group(2),
            '%d.%m.%Y %H:%M').replace(tzinfo=datatz)
        with open(parse.parsedfile, 'w') as f:
            cw = csv.writer(f)
            cw.writerow(['Area', 'Date', 'Value'])
            for col in range(1, len(tds) - 1):
                area = areas[strip_footnote(ths[col])]
                count = cleannum(tds[col])
                cw.writerow([area, datadate.isoformat(), count])
        parse.deploy_timestamp()
        parses.append(parse)

示例#4

0

显示文件

def collect_utla():
    utla_data = fetchhelper.Updater(url_utlas, ext='utla.json')
    utla_data.check_fetch(rawfile=args.rawfile[1], compressed=True)
    if not utla_data.rawdata.strip():
        if datetime.date.today().isoweekday() == 7:
            pass
        else:
            print("Empty utla.json")
        return

    jdat = json.loads(utla_data.rawdata)

    parse = fetchhelper.ParseData(utla_data, 'utla')
    parse.parsedtime = datatime
    with open(parse.parsedfile, 'w') as f:
        cw = csv.writer(f)
        header = [
            'Code', 'UTLA', 'Region', 'Timestamp', 'Confirmed', 'Deaths',
            'Backdated'
        ]
        cw.writerow(header)
        for data in sorted(jdat['data'], key=(lambda d: d['areaCode'])):
            code = data['areaCode']
            name = data['areaName']
            confirmed = data['cumCasesByPublishDate']
            fallback = ''
            if confirmed is None:
                confirmed = data['cumCasesBySpecimenDate']
                if confirmed is not None:
                    fallback += 'C'
            deaths = data['cumDeaths28DaysByPublishDate']
            if deaths is None:
                deaths = data['cumDeaths28DaysByDeathDate']
                if deaths is not None:
                    fallback += 'D'
            cw.writerow([
                code, name, (regions[code][1] if code[0] == 'E' else None),
                datatime, confirmed, deaths, fallback
            ])
    parse.deploy_timestamp()
    return parse

示例#5

0

显示文件

文件： fetch_disabled.py 项目： sth/covid-19-data

)
update.check_fetch(rawfile=args.rawfile)
if args.only_changed:
    if not update.raw_changed():
        print("downloaded raw data unchanged")
        exit(0)

html = BeautifulSoup(update.rawdata, 'html.parser')

tab = html.find(
    string=re.compile('R.*gion de notification')).find_parent('table')

datestr = tab.find_previous('h4').get_text()
mo = re.search('(\d\d/\d\d/\d\d\d\d) à (\d\d)h', datestr)

parse = fetchhelper.ParseData(update, 'data')
parse.parsedtime = datetime.strptime(
    mo.group(1) + ' ' + mo.group(2), '%d/%m/%Y %H').replace(tzinfo=datatz)

with open(parse.parsedfile, 'w') as outf:
    cw = csv.writer(outf)
    cw.writerow(['Area', 'Region', 'Date', 'Confirmed'])

    group = 'Métropole'
    for tr in tab.find('tbody').find_all('tr'):
        tds = tr.find_all('td')
        area = tds[0].get_text()
        counttxt = tds[1].get_text()
        if '**' in counttxt:
            continue
        count = cleannum(tds[1].get_text())

示例#6

0

显示文件

            cols.append(datatime.isoformat())
            if tab_combined:
                cols += [
                    clean_num(tds[1].get_text()),
                    clean_num(tds[n_deaths].get_text())
                ]
            else:
                cols.append(clean_num(tds[1].get_text()))
                if deaths:
                    cols.append(deaths.get(lk, 0))
            cout.writerow(cols)

    # If the current day is later than the contenttime we assume the
    # content time is a mistake and we adjust it to the current day.
    # (This problem has happend before)
    #if parse.update.rawtime.date() > parse.parsedtime.date():
    #    if parse.parseddiff.changed and not parse.parseddiff.first:
    #        print("Adjust date", parse.parsedtime, "->", parse.update.rawtime)
    #        parse.parsedtime = parse.update.rawtime

    parse.deploy_timestamp()


rparse = fetchhelper.ParseData(update, 'regierungsbezirk')
parse_table(rparse, html, 'regierungsbezirk')

lparse = fetchhelper.ParseData(update, 'landkreis')
parse_table(lparse, html, 'landkreis')

fetchhelper.git_commit([rparse, lparse], args)

示例#7

0

显示文件

文件： fetch.py 项目： sth/covid-19-data

import datetime, re, csv, os
import json
import dateutil.tz

datatz = dateutil.tz.gettz('Europe/Berlin')

# Bundesländer
url_bl = 'https://services7.arcgis.com/mOBPykOjAyBO2ZKk/ArcGIS/rest/services/Coronaf%c3%a4lle_in_den_Bundesl%c3%a4ndern/FeatureServer/0/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=LAN_ew_GEN%2CAktualisierung%2CFallzahl%2CDeath%2CLAN_ew_AGS&returnGeometry=false&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token='

updatebl = fetchhelper.Updater(url_bl, ext='bl.json')
updatebl.check_fetch(rawfile=args.rawfile[0])

jdat = json.loads(updatebl.rawdata)

parsebl = fetchhelper.ParseData(updatebl, 'data')
parsebl.parsedtime = None
with open(parsebl.parsedfile, 'w') as outf:
    cout = csv.writer(outf)
    cout.writerow(['Bundesland', 'AGS', 'Timestamp', 'EConfirmed', 'EDeaths'])
    for jfeat in sorted(jdat['features'], key=(lambda f: f['attributes']['LAN_ew_GEN'])):
        ts = datetime.datetime.fromtimestamp(jfeat['attributes']['Aktualisierung']/1000, tz=datatz)
        if parsebl.parsedtime is None or ts > parsebl.parsedtime:
            parsebl.parsedtime = ts
        cout.writerow([
            jfeat['attributes']['LAN_ew_GEN'],
            jfeat['attributes']['LAN_ew_AGS'],
            ts.isoformat(),
            jfeat['attributes']['Fallzahl'],
            jfeat['attributes']['Death'],
        ])

示例#8

0

显示文件

文件： fetch.py 项目： sth/covid-19-data

url_cases = 'https://atlas.jifo.co/api/connectors/41be7d71-7260-497f-a60b-adce5aa9445d'
url_recovered = 'https://atlas.jifo.co/api/connectors/2adaf217-e526-492a-bcad-5ed6ec6ad3ad'

datatz = dateutil.tz.gettz('Europe/Berlin')

update = fetchhelper.Updater(url_cases, ext='cases.json')
update.check_fetch(rawfile=rawfiles[0])
jdat = json.loads(update.rawdata)

header = jdat['data'][0][0]
i_kom = header.index("Ort")
i_con = header.index("Gesamtzahl seit Ausbruch")

parses = []

parse = fetchhelper.ParseData(update, 'data')
parse.parsedtime = datetime.datetime.fromtimestamp(jdat['refreshed'] / 1000,
                                                   tz=datatz)
with open(parse.parsedfile, 'w') as f:
    cw = csv.writer(f)
    cw.writerow(['Kommune', 'Timestamp', 'Confirmed'])
    for jrow in jdat['data'][0][1:]:
        kom = jrow[i_kom]
        if kom in ('Zuordnung fehlt', 'Gesamt', ''):
            continue
        if kom.startswith('Stand vom'):
            continue
        if jrow[i_kom] == 'Pfaffenhofen a.d.Glonn':
            jrow[i_kom] = 'Pfaffenhofen a.d. Glonn'
        cw.writerow([jrow[i_kom], parse.parsedtime.isoformat(), jrow[i_con]])

示例#9

0

显示文件

文件： fetch_2021-06-25T18:00.py 项目： sth/covid-19-data

fetchhelper.add_arguments(ap)
args = ap.parse_args()

import subprocess, csv
from datetime import datetime, timedelta
import dateutil.tz

datatz = dateutil.tz.gettz('Europe/Berlin')

update = fetchhelper.Updater(
    'https://sozialministerium.baden-wuerttemberg.de/fileadmin/redaktion/m-sm/intern/downloads/Downloads_Gesundheitsschutz/Tabelle_Coronavirus-Faelle-BW.xlsx',
    ext='xlsx')

update.check_fetch(args.rawfile, binary=True)

parse = fetchhelper.ParseData(update, 'timeline')

proc = subprocess.Popen(['xlsx2csv', update.rawfile],
                        stdout=subprocess.PIPE,
                        encoding='utf-8')
cr = csv.reader(proc.stdout)

with open(parse.parsedfile, 'w') as pf:
    cpf = csv.writer(pf)
    start = False
    dates = None
    for row in cr:
        if not start:
            if row and row[0] == 'Stadt-/Landkreis':
                start = True
            continue

示例#10

0

显示文件

    if not row[0]:
        continue

    # There is no consistent date for these numbers, but we assume there are published at the end of the day
    timestamp = datetime.datetime.strptime(
        row[0] + ' 23:59', '%m-%d-%y %H:%M').replace(tzinfo=datatz)
    for n in [3, 4, 5, 6]:
        country = clean_label(header[n])
        if row[n] == '':
            continue
        deaths = clean_num(row[n])
        countrydata[timestamp][country].deaths = deaths

parses = []
for timestamp, tsdata in sorted(countrydata.items()):
    parse = fetchhelper.ParseData(update, 'countries')
    parse.parsedtime = timestamp

    has_deaths = any(cdata.deaths is not None for cdata in tsdata.values())

    with open(parse.parsedfile, 'w') as f:
        cw = csv.writer(f)
        header = ['Code', 'Country', 'Timestamp', 'Confirmed']
        if has_deaths:
            header.append('Deaths')
        cw.writerow(header)
        for _, cdata in sorted(tsdata.items()):
            row = [
                cdata.code, cdata.name,
                cdata.timestamp.isoformat(), cdata.confirmed
            ]

示例#11

0

显示文件

if args.rawfile is not None:
    args.rawfile = args.rawfile.split(',')
else:
    args.rawfile = [None, None]

countrydata = {}

country_data = fetchhelper.Updater(url_countries, ext='country.json')
country_data.check_fetch(rawfile=args.rawfile[1])
jdat = json.loads(country_data.rawdata)

datatime = datetime.datetime.fromisoformat(
    jdat['metadata']['lastUpdatedAt'].rstrip('Z')).astimezone(
        datetime.timezone.utc)
parses = []
parse = fetchhelper.ParseData(country_data, 'countries')
parse.parsedtime = datatime
with open(parse.parsedfile, 'w') as f:
    cw = csv.writer(f)
    header = ['Code', 'Country', 'Timestamp', 'Confirmed', 'Deaths']
    cw.writerow(header)
    for (code, data) in jdat.items():
        if code == 'metadata':
            continue
        name = data['name']['value']
        confirmed = int(data['totalCases']['value'])
        deaths = int(data['deaths']['value'])
        cw.writerow([code, name, datatime, confirmed, deaths])
parse.deploy_timestamp()
parses.append(parse)

示例#12

0

显示文件

文件： fetch.py 项目： sth/covid-19-data

    'Västernorrland': 0,
    'Västmanland': 0,
    'Västra_Götaland': 0,
    'Örebro': 0,
    'Östergötland': 0,
}

parses = []
datatime = None
features = [f for f in jd['features'] if f['attributes']['Statistikdatum'] is not None]
for feat in sorted(features, key=(lambda f: f['attributes']['Statistikdatum'])):
    attrs = feat['attributes']
    datatime = datetime.datetime.utcfromtimestamp(attrs['Statistikdatum']/1000).replace(hour=11, minute=30, tzinfo=datatz)
    for attr, value in attrs.items():
        if attr in areasum:
            areasum[attr] += value

parse = fetchhelper.ParseData(update, 'data', variant=datatime.isoformat())
parse.parsedtime = datatime
with open(parse.parsedfile, 'w') as outf:
    cw = csv.writer(outf)
    header = ['Area', 'Timestamp', 'Confirmed']
    cw.writerow(header)
    for area, count in sorted(areasum.items()):
        cw.writerow([area.replace('_', ' '), datatime.isoformat(), count])

parse.deploy_timestamp()
parses.append(parse)

fetchhelper.git_commit(parses, args)

示例#13

0

显示文件

文件： fetch_2021-05-11T13:00.py 项目： sth/covid-19-data

# on 10.09.2020 there were additional empty cells
for tr in table:
    if tr[-1] == '':
        tr.pop()
ths = table[0]
assert ('Bundesland' in ths[0])
assert ('gesamt' in ths[-1])
trs = table[1:]
assert ('tigte' in trs[0][0])
assert ('Todesf' in trs[1][0])
assert ('Genesen' in trs[2][0])
assert ('Hospital' in trs[3][0])
assert ('Intensiv' in trs[4][0])
assert ('Testungen' in trs[5][0])
parse = [
    fetchhelper.ParseData(update, 'confirmed'),
    fetchhelper.ParseData(update, 'deaths'),
    fetchhelper.ParseData(update, 'recovered'),
    fetchhelper.ParseData(update, 'hospital'),
    fetchhelper.ParseData(update, 'intensivecare'),
    fetchhelper.ParseData(update, 'tests'),
]
labels = [
    'confirmed', 'deceased', 'recovered', 'hospital', 'intensivecare', 'tests'
]

areas = {
    'Bgld.': 'Burgenland',
    'Kt.': 'Kärnten',
    'Ktn.': 'Kärnten',
    'NÖ': 'Niederösterreich',