def getPage(url, parsed):
    req = scrape.request_url(url)
    text = req.text
    if parsed:
        text = BeautifulSoup(text, "html.parser")
    #text=text.decode("UTF-8").encode("UTF-8") # only on some installations ;-)
    return text
from bs4 import BeautifulSoup

import requests
import datetime
import re

import locale
locale.setlocale(locale.LC_TIME, "de_DE.utf-8")

import scrape
import helper
from database_interface import *

main_url = "https://www.kreis-kleve.de/de/fachbereich5/coronavirus/"

req = scrape.request_url(main_url)
bs = BeautifulSoup(req.text, "html.parser")

cases_pattern = "insgesamt [0-9]+ bestätigte"

text = bs.getText()

status_raw = re.findall("Stand .*?\)", text)[0]
status = helper.get_status(status_raw)

cases_raw = re.findall(cases_pattern, text)[0]
cases = int(re.findall(r'[0-9]+', cases_raw)[0])

add_to_database("05154", status, cases, "Kreis Kleve")
import logging
import locale

if scrape.SCRAPER_DEBUG:
    logging.basicConfig(level=logging.DEBUG)
    import pprint
logger = logging.getLogger(__name__)

from database_interface import *

DISTRICT_UID = "05974"


main_url = "https://www.presse-service.de/rss.aspx?v=2&p=551"

req = scrape.request_url(main_url,headers=scrape.RANDOM_CLIENT_HEADERS,options={'debug': scrape.SCRAPER_DEBUG, 'forceEncoding': 'utf8'})
#req.encoding = 'utf8'
bs = BeautifulSoup(req.text, "html.parser")
news_list = bs.findAll("item")
for item in news_list:
    status_pattern = "(.*) Das Referat .*"
    cases_pattern = "([0-9]+) .* Corona-Fälle"

    cases_raw = re.search(cases_pattern, item.title.text)
    if cases_raw == None:
        continue
    cases = int(cases_raw.group(1))
    logger.info("\n")
    logger.debug('%s' % item.title.text)
    logger.debug('%s' % item.guid.text)
    logger.debug('%s' % item.pubdate.text)
from bs4 import BeautifulSoup

import requests
import datetime
import re

import locale
locale.setlocale(locale.LC_TIME, "de_DE.utf-8")

import scrape
import helper
from database_interface import *

main_url = "https://www.kreis-reutlingen.de/de/Aktuelles/Landkreis-aktuell/Landkreis-aktuell?view=publish&item=article&id=1923"

req = scrape.request_url(main_url, options={'cookies':{'skipEntranceUrl':'1'}})
bs = BeautifulSoup(req.text, "html.parser")

cases_pattern = "Gesamtzahl laborbestätigter Fälle: [0-9]+"

text=bs.getText()

status_raw = re.findall("Stand: .*?2020", text)[0]
status=helper.get_status(status_raw)

cases_raw = re.findall(cases_pattern,text)[0]
cases = int(re.findall(r'[0-9]+', cases_raw)[0])

add_to_database("08415", status, cases, "Kreis Reutlingen")
Exemplo n.º 5
0
from bs4 import BeautifulSoup

import requests
import datetime
import re

import scrape
from helper import *
from database_interface import *

# (Labordiagnostisch bestätigt, Stand 21.03.2020)

url = "https://www.segeberg.de/Quicknavigation/Startseite"

req = scrape.request_url(url)
bs = BeautifulSoup(req.text, "html.parser")

prefix = "FÄLLE IM KREIS SEGEBERG: \d+"
prefix_date = "Stand.*\)"

text = bs.getText()

status_raw = re.findall(prefix_date, text)[0].replace(")", "")
status = get_status(status_raw)

cases_raw = re.findall(prefix, text)[0]
cases = get_number_only(cases_raw)

add_to_database("01060", status, cases, name="Kreis Segeberg")