示例#1
0
def date_from_text(date_str):
    new_date = parse_date(date_str)
    day = new_date.split("T", 1)[0].split('-', 2)
    day = datetime.date(int(day[0]), int(day[1]), int(day[2]))
    return day
示例#2
0
        'district_id': '1909',
        'population': 47926,
    },
    'Zurzach': {
        'pattern': r'^Z.+zach.*',
        'district_id': '1911',
        'population': 34650,
    },
}

data_url = 'https://www.ag.ch/de/themen_1/coronavirus_2/lagebulletins/lagebulletins_1.jsp'
d = sc.download(data_url, silent=True)
soup = BeautifulSoup(d, 'html.parser')
img_caption = soup.find(string=re.compile(r".*Inzidenz pro 100'000 Einwohner nach Bezirke.*"))
img_date = sc.find(r'\(Stand:?\s+(.*\d{4})', img_caption.string)
img_date = datetime.datetime.fromisoformat(parse_date(img_date).split('T', 1)[0])
img_url = img_caption.find_previous('img')['src']
img_url = 'https://www.ag.ch/media/kanton_aargau/themen_1/coronavirus_1/bilder_11/daten/Inzidenz_pro_100K_Einwohner_content_large.jpg'
if not img_url.startswith('http'):
    img_url = f'https://www.ag.ch{img_url}'

# download the image to a temporary file
_, path = tempfile.mkstemp(suffix='.jpg')
sc.download_file(img_url, path)

# convert to binary image
img = cv2.imread(path)
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
gray, img_bin = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
gray = cv2.bitwise_not(img_bin)
示例#3
0
        v = v.strip()

        # Ignore k or v, if v is "None"
        if v == "None":
            print(f'WARNING: {k} is None', file=sys.stderr)
            warns.append(f"{k} is None")
            continue

        if k == "Downloading":
            url_sources.append(v)
            continue
        if k == "Scraped at":
            scrape_time = v
            continue
        if k == "Date and time":
            new_date = parse_date(v)
            parts = new_date.split("T", 1)
            day = parts[0].split('-', 2)
            day = datetime.date(int(day[0]), int(day[1]), int(day[2]))

            if parts[1] == '24:00':
                day = day + datetime.timedelta(days=1)
                new_date = f"{day.isoformat()}T00:00"

            now = datetime.date.today()
            if day > now:
                print(f"Parsed date/time must not be in the future: parsed: {day}: now: {now}", file=sys.stderr)
                errs.append(f"Date {day} in the future")
            # In case there are multiple "Date and time", use first one,
            # or the one which is more specific (includes time).
            if date is None or len(new_date) > len(date):
示例#4
0
    },
    'Zurzach': {
        'pattern': r'^Z.+zach.*',
        'district_id': '1911',
        'population': 34650,
    },
}

data_url = 'https://www.ag.ch/de/themen_1/coronavirus_2/lagebulletins/lagebulletins_1.jsp'
d = sc.download(data_url, silent=True)
soup = BeautifulSoup(d, 'html.parser')
img_caption = soup.find(
    string=re.compile(r".*Inzidenz pro 100'000 Einwohner nach Bezirke.*"))
img_date = sc.find(r'\(Stand:?\s+(.*\d{4})', img_caption.string)
img_date = datetime.datetime.fromisoformat(
    parse_date(img_date).split('T', 1)[0])
img_url = img_caption.find_previous('img')['src']
img_url = 'https://www.ag.ch/media/kanton_aargau/themen_1/coronavirus_1/bilder_11/daten/Inzidenz_pro_100K_Einwohner_content_large.jpg'
if not img_url.startswith('http'):
    img_url = f'https://www.ag.ch{img_url}'

# download the image to a temporary file
_, path = tempfile.mkstemp(suffix='.jpg')
sc.download_file(img_url, path)

# convert to binary image
img = cv2.imread(path)
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
gray, img_bin = cv2.threshold(gray, 0, 255,
                              cv2.THRESH_BINARY | cv2.THRESH_OTSU)
gray = cv2.bitwise_not(img_bin)