示例#1
0
def get_sh_url_from_json(url):
    m = sc.jsondownload(url, silent=True)

    # 2020-04-24
    """
    {
        data_filetype: "xlsx",
        data_shareInAreaPage: "[]",
        data_kachellabel: "Fallzahlen Corona Kanton Schaffhausen.xlsx",
        data_areaPage_repositoryid: "3275",
        data_custom_author: "Gesundheitsamt Kanton Schaffhausen",
        data_tagarea: "[]",
        data_shareInDomain: "[]",
        data_zielgruppen: "",
        data_publication_date: "23.04.2020",
        data_idpath: "/1752/8540/1753/1765/1755/1763/2733/2747/3275/3666465",
        data_custom_publication_date_date: "23.04.2020",
        data_shareArticleProfileId: "",
        data_file_name: "Fallzahlen Corona Kanton Schaffhausen.xlsx",
        data_author: "MWETT",
        data_file_copyrights: "",
        data_custom_publication_timed: "[]",
        data_published: "published",
        data_addmodules: "",
        data_listlabel: "Fallzahlen Corona Kanton Schaffhausen.xlsx",
        data_tags: "",
        data_widget_data: "[]",
        data_filemeta: "{"uploaded":1,"fileName":"d4ffb019-a2ef-4782-87be-0aafb4b43558","key":"TEMPUPLOADFILES","url":"/CMS/get/file/d4ffb019-a2ef-4782-87be-0aafb4b43558","originalname":"Fallzahlen Corona Kanton Schaffhausen.xlsx","fileid":"d4ffb019-a2ef-4782-87be-0aafb4b43558","category":"null","title":"null","filesize":12286}",
        data_shareInGlobal: "[]",
        data_verbande: "",
        data_file_description: "",
        data_custom_publication_date_time: "09:31",
        data_galleries: "[]",
        data_sharepaths: "",
        data_permalink: "/Webseite/Kanton-Schaffhausen/Beh-rde/Verwaltung/Departement-des-Innern/Gesundheitsamt-3666465-DE.html",
        data_schlagworte: "",
        data_approvedpaths: "["/1752/8540/1753/1765/1755/1763/2733/2747/3275/3666465"]",
        contentid: "3666465",
        domainid: "1753",
        contenttypeid: "101",
        transactiontime: "23.04 09:09",
        author: "dande",
        language: "DE",
        activated_languages: [
                "DE"
                ],
                sliderimages: [ ],
                genericimages: { }
    }
    """

    meta = json.loads(m['data_filemeta'])
    url = f"https://sh.ch{meta['url']}"
    return url
示例#2
0
import datetime
import re
from bs4 import BeautifulSoup
import scrape_common as sc
import scrape_sh_common as shc

# extract content_id of main page
url = 'https://sh.ch/CMS/Webseite/Kanton-Schaffhausen/Beh-rde/Verwaltung/Departement-des-Innern/Gesundheitsamt-3209198-DE.html'
d = sc.download(url, silent=True)
content_id = sc.find(r"var contentid = '(\d+)';", d)
assert content_id

# get main page contents with the content id
url = f'https://sh.ch/CMS/content.jsp?contentid={content_id}&language=DE'
d = sc.jsondownload(url, silent=True)

# and extract the Lagebericht content ids
soup = BeautifulSoup(d['data_post_content'], 'html.parser')
links = soup.find_all('a', text=re.compile(r'Lagebericht'))
content_ids = []
for link in links:
    content_ids.append(link.get('contentid'))

# fetch the PDFs and parse
for content_id in content_ids:
    url = f'https://sh.ch/CMS/content.jsp?contentid={content_id}&language=DE'
    pdf_url = shc.get_sh_url_from_json(url)
    pdf = sc.download_content(pdf_url, silent=True)

    td = sc.TestData(canton='SH', url=pdf_url)
示例#3
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import datetime
import json
import scrape_common as sc

# A JavaScript content loaded from https://sh.ch/CMS/Webseite/Kanton-Schaffhausen/Beh-rde/Verwaltung/Departement-des-Innern/Gesundheitsamt-3209198-DE.html
m = sc.jsondownload('https://sh.ch/CMS/content.jsp?contentid=3666465&language=DE', silent=True)

# 2020-04-24
"""
{
    data_filetype: "xlsx",
    data_shareInAreaPage: "[]",
    data_kachellabel: "Fallzahlen Corona Kanton Schaffhausen.xlsx",
    data_areaPage_repositoryid: "3275",
    data_custom_author: "Gesundheitsamt Kanton Schaffhausen",
    data_tagarea: "[]",
    data_shareInDomain: "[]",
    data_zielgruppen: "",
    data_publication_date: "23.04.2020",
    data_idpath: "/1752/8540/1753/1765/1755/1763/2733/2747/3275/3666465",
    data_custom_publication_date_date: "23.04.2020",
    data_shareArticleProfileId: "",
    data_file_name: "Fallzahlen Corona Kanton Schaffhausen.xlsx",
    data_author: "MWETT",
    data_file_copyrights: "",
    data_custom_publication_timed: "[]",
    data_published: "published",
    data_addmodules: "",
示例#4
0
#!/usr/bin/env python3

import datetime
import scrape_common as sc

json_url = 'https://services1.arcgis.com/YAuo6vcW85VPu7OE/arcgis/rest/services/Fallzahlen_Total_Kanton/FeatureServer/0/query?where=1%3D1&objectIds=&time=&resultType=none&outFields=*&returnHiddenFields=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnDistinctValues=false&cacheHint=false&orderByFields=Eingangs_Datum&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&sqlFormat=standard&f=pjson'
data = sc.jsondownload(json_url, silent=True)

# 2020-04-02
"""
features: [
{
    attributes: {
            Eingangs_Datum: 1582675200000,
            Anzahl_Fälle_total__kumuliert_: 2,
            Neue_Faelle: 2,
            Neue_aktive_Fälle: 2,
            Anzahl_aktive_Fälle_total: 2,
            Anzahl_Personen_in_Isolation: 0,
            Anzahl_Personen_in_Quarantäne: 0,
            Verstorbene: 0,
            Verstorbene__kumuliert_: 0,
            Neue_Hospitalisierungen: 0,
            Hospitalisiert_Total: 0,
            Neu_Pflege: 0,
            Hospitalisiert_Pflege: 0,
            Neu_IPS: 0,
            Hospialisiert_IPS: 0,
            Neu_IPS_beatmet: 0,
            Hospitalisiert_IPS_beatmet: 0,
            FID: 1