Пример #1
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    data = {
        "last_updated": convert_date(soup.find("tr").find("strong").text, "Stand: %d.%m.%Y, %H:%M Uhr"),
        "data_source": data_source,
        "lots": []
    }

    rows = soup.find_all("tr")
    rows = rows[1:]
    region_header = ""

    for row in rows:
        if len(row.find_all("th")) > 0:
            # This is a header row, save it for later
            region_header = row.find("th", {"class": "head1"}).text

        else:
            if row.find("td").text == "Gesamt":
                continue

            # This is a parking lot row
            raw_lot_data = row.find_all("td")

            if len(raw_lot_data) == 2:
                type_and_name = process_name(raw_lot_data[0].text)
                data["lots"].append({
                    "name": type_and_name[1],
                    "type": type_and_name[0],
                    "total": get_most_lots_from_known_data("Lübeck", type_and_name[1]),
                    "free": 0,
                    "region": region_header,
                    "state": process_state_map.get(raw_lot_data[1].text, ""),
                    "coords": geodata.coords(type_and_name[1]),
                    "id": generate_id(__file__, type_and_name[1]),
                    "forecast": False
                })

            elif len(raw_lot_data) == 4:
                type_and_name = process_name(raw_lot_data[0].text)
                data["lots"].append({
                    "name": type_and_name[1],
                    "type": type_and_name[0],
                    "total": int(raw_lot_data[1].text),
                    "free": int(raw_lot_data[2].text),
                    "region": region_header,
                    "state": "open",
                    "coords": geodata.coords(type_and_name[1]),
                    "id": generate_id(__file__, type_and_name[1]),
                    "forecast": False
                })

    return data
Пример #2
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    lot_table_trs = soup.select("table[cellpadding=5]")[0].find_all("tr")

    data = {
        "last_updated": convert_date(lot_table_trs[-1].text.strip(), "%d.%m.%Y %H:%M Uhr"),
        "data_source": data_source,
        "lots": []
    }

    for tr in lot_table_trs[1:-1]:
        tds = tr.find_all("td")
        type_and_name = process_name(tds[0].text)
        data["lots"].append({
            "name": type_and_name[1],
            "type": type_and_name[0],
            "free": int(tds[1].text),
            "total": total_number_map.get(tds[0].text, 0),
            "state": state_map.get(tds[2].text, ""),
            "coords": geodata.coords(type_and_name[1]),
            "id": generate_id(__file__, type_and_name[1]),
            "forecast": False
        })

    return data
Пример #3
0
def parse_html(xml_data):
    feed = feedparser.parse(xml_data)

    last_updated = feed["entries"][0]["updated"]
    data = {
        "lots": [],
        # remove trailing timezone for consensistency
        "last_updated": last_updated.replace("Z", ""),
        "data_source": data_source
    }

    for entry in feed["entries"]:
        summary = parse_summary(entry["summary"])
        title = parse_title(entry["title"])

        old_id = entry["id"].split("=")[1]

        data["lots"].append({
            "name": title[0],
            "address": title[1],
            "id": generate_id(__file__, title[0]),
            "state": summary[0],
            "free": summary[1],
            "total": total_number_map.get(title[0], 0),
            "coords": geodata.coords(title[0]),
            "forecast": False,
        })

    return data
Пример #4
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    free_lots = soup.find_all("td", {"class": "stell"})
    assert len(free_lots) == 6, "Expect to find 6 lots in Bonn, got: %d" % len(
        free_lots)
    time = soup.find("td", {"class": "stand"}).text.strip()

    lots = []
    for idx, free in enumerate(free_lots):
        lot = lot_map.get(idx)
        lots.append({
            "name": lot.name,
            "coords": geodata.coords(lot.name),
            "free": int(free.text),
            "address": lot.address,
            "total": lot.total,
            "state": "nodata",
            "id": generate_id(__file__, lot.name),
            "forecast": False
        })

    return {
        "last_updated": convert_date(time, "%d.%m.%y %H:%M:%S"),
        "data_source": data_source,
        "lots": lots
    }
Пример #5
0
def parse_html(xml_data):
    feed = feedparser.parse(xml_data)

    last_updated = feed["entries"][0]["updated"]
    data = {
        "lots": [],
        # remove trailing timezone for consensistency
        "last_updated": last_updated.replace("Z", ""),
        "data_source": data_source
    }

    for entry in feed["entries"]:
        summary = parse_summary(entry["summary"])
        title = parse_title(entry["title"])

        old_id = entry["id"].split("=")[1]

        data["lots"].append({
            "name": title[0],
            "address": title[1],
            "id": generate_id(__file__, title[0]),
            "state": summary[0],
            "free": summary[1],
            "total": total_number_map.get(title[0], 0),
            "coords": geodata.coords(title[0]),
            "forecast": False,
        })

    return data
Пример #6
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    data = {
        "last_updated": convert_date(soup.p.string, "(%d.%m.%Y, %H.%M Uhr)"),
        "data_source": data_source,
        "lots": []
    }

    # get all lots
    raw_lots = soup.find_all("tr")

    for lot in raw_lots:
        elements = lot.find_all("td")

        lot_name = elements[0].text

        data["lots"].append({
            "name": lot_name,
            "free": int(elements[1].text),
            "total": data_map.get(lot_name)["total"],
            "type": data_map.get(lot_name)["type"],
            "address": data_map.get(lot_name)["address"],
            "coords": geodata.coords(lot_name),
            "state": "nodata",
            "id": generate_id(__file__, lot_name),
            "forecast": False
        })

    return data
Пример #7
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    free_lots = soup.find_all("td", {"class": "stell"})
    assert len(free_lots) == 6, "Expect to find 6 lots in Bonn, got: %d" % len(free_lots)
    time = soup.find("td", {"class": "stand"}).text.strip()

    lots = []
    for idx, free in enumerate(free_lots):
        lot = lot_map.get(idx)
        lots.append({
            "name": lot.name,
            "coords": geodata.coords(lot.name),
            "free": int(free.text),
            "address": lot.address,
            "total": lot.total,
            "state": "nodata",
            "id": generate_id(__file__, lot.name),
            "forecast": False
        })

    return {
        "last_updated": convert_date(time, "%d.%m.%y %H:%M:%S"),
        "data_source": data_source,
        "lots": lots
    }
Пример #8
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")
    data = {
        "lots": [],
        "data_source": data_source,
        "last_updated": convert_date(soup.find(id="P1_LAST_UPDATE").text, "%d.%m.%Y %H:%M:%S")
    }

    for table in soup.find_all("table"):
        if table["summary"] != "":
            region = table["summary"]

            for lot_row in table.find_all("tr"):
                if lot_row.find("th") is not None:
                    continue

                state_div = lot_row.find("div")
                if "green" in state_div["class"]:
                    state = "open"
                elif "yellow" in state_div["class"]:
                    state = "open"
                elif "red" in state_div["class"]:
                    state = "open"
                elif "park-closed" in state_div["class"]:
                    state = "closed"
                else:
                    state = "nodata"

                lot_name = lot_row.find("td", {"headers": "BEZEICHNUNG"}).text

                try:
                    free = int(lot_row.find("td", {"headers": "FREI"}).text)
                except ValueError:
                    free = 0

                try:
                    total = int(lot_row.find("td", {"headers": "KAPAZITAET"}).text)
                except ValueError:
                    total = get_most_lots_from_known_data("Dresden", lot_name)

                id = generate_id(__file__, lot_name)
                forecast = os.path.isfile("forecast_data/" + id + ".csv")

                data["lots"].append({
                    "coords": geodata.coords(lot_name),
                    "name": lot_name,
                    "total": total,
                    "free": free,
                    "state": state,
                    "id": id,
                    "lot_type": type_map.get(lot_name, ""),
                    "address": address_map.get(lot_name, ""),
                    "forecast": forecast,
                    "region": region
                })

    return data
Пример #9
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    # last update time (UTC)
    try:
        update_time = convert_date(soup.select('p > strong')[-1].text, "Stand: %d.%m.%Y - %H:%M:%S")
    except ValueError:
        update_time = convert_date(soup.select('p > strong')[-2].text, "Stand: %d.%m.%Y - %H:%M:%S")

    data = {
        "last_updated": update_time,
        "data_source": data_source,
        "lots": []
    }

    # get all tables with lots
    raw_lot_list = soup.find_all("div", {"class": "listing"})

    # get all lots
    for lot_list in raw_lot_list:
        raw_lots = lot_list.select('tr + tr')

        for lot in raw_lots:
            lot_name = lot.select('a')[0].text

            try:
                lot_free = int(lot.select('td + td')[0].text)
                lot_state = "open" if "green" in str(lot.select("td + td")[0]) else "closed"
            except ValueError:
                lot_free = 0
                lot_state = "nodata"

            data["lots"].append({
                "name": lot_name,
                "free": lot_free,
                "total": total_number_map.get(lot_name, 0),
                "coords": geodata.coords(lot_name),
                "state": lot_state,
                "id": generate_id(__file__, lot_name),
                "forecast": False
            })

    return data
Пример #10
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html)

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    last_updated = soup.select("p#last_updated")[0].text

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"),
        "data_source": data_source,
        "lots": []
    }

    for tr in soup.find_all("tr"):
        lot_name = tr.find("td", {"class": "lot_name"}).text
        lot_free = tr.find("td", {"class": "lot_free"}).text
        lot_total = tr.find("td", {"class": "lot_total"}).text
        lot_address = tr.find("td", {"class": "lot_address"}).text
        lot_type = tr.find("td", {"class": "lot_type"}).text

        # please be careful about the state only being allowed to contain either open, closed or nodata
        # should the page list other states, please map these into the three listed possibilities
        state = tr.find("td", {"class": "lot_state"}).text

        data["lots"].append({
            "name": lot_name,
            "free": lot_free,
            "total": lot_total,
            "address": lot_address,
            "coords": geodata.coords(lot_name),
            "state": state,
            "type": lot_type,
            # use the utility function generate_id to generate an ID for this lot
            # it takes this file path and the lot's name as params
            "id": generate_id(__file__, lot_name),
            "forecast": False,
        })

    return data
Пример #11
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html)

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    last_updated = soup.select("p#last_updated")[0].text

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"),
        "data_source": data_source,
        "lots": []
    }

    for tr in soup.find_all("tr"):
        lot_name = tr.find("td", {"class": "lot_name"}).text
        lot_free = tr.find("td", {"class": "lot_free"}).text
        lot_total = tr.find("td", {"class": "lot_total"}).text
        lot_address = tr.find("td", {"class": "lot_address"}).text
        lot_type = tr.find("td", {"class": "lot_type"}).text

        # please be careful about the state only being allowed to contain either open, closed or nodata
        # should the page list other states, please map these into the three listed possibilities
        state = tr.find("td", {"class": "lot_state"}).text

        data["lots"].append({
            "name": lot_name,
            "free": lot_free,
            "total": lot_total,
            "address": lot_address,
            "coords": geodata.coords(lot_name),
            "state": state,
            "type": lot_type,
            # use the utility function generate_id to generate an ID for this lot
            # it takes this file path and the lot's name as params
            "id": generate_id(__file__, lot_name),
            "forecast": False,
        })

    return data
Пример #12
0
def parse_html(html):
    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    # last_updated is the date when the data on the page was last updated
    last_updated = str(soup.select("body"))
    start = str.find(last_updated, "Letzte Aktualisierung:") + 23
    last_updated = last_updated[start:start + 16] + ' Uhr'

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"),
        "data_source": data_source,
        "lots": []
    }

    status_map = {
        "Offen": "open",
        "Geschlossen": "closed"
    }

    # Oldenburg does not send the totals on there website,
    # so wie take some Values from a 2011st PDF:
    # http://www.oldenburg.de/fileadmin/oldenburg/Benutzer/PDF/41/414/Parkplatz_Uebersicht2.pdf
    # and http://gis4oldenburg.oldenburg.de/?es=C12S77
    # what possible can go wrong ¯\_(ツ)_/¯
    lots_map = {
        "Waffenplatz": [650, "Waffenplatz 3"],
        "City": [440, "Staulinie 10"],
        "Galeria Kaufhof": [326, "Ritterstraße"],
        "Pferdemarkt": [401, "Pferdemarkt 13"],
        # CCO 1 & 2 are together only known together with 420, but they seem to be somewhat like this
        "CCO Parkdeck 1": [190, "Heiligengeiststraße 4"],
        "CCO Parkdeck 2": [230, "Heiligengeiststraße 4"],
        "Hbf/ZOB": [358, "Karlstraße"],
        "Theaterwall": [125, "Theaterwall 4"],
        "Theatergarage": [107, "Roonstraße"],
        "Heiligengeist-Höfe": [275, "Georgstraße"],
        "Schlosshöfe": [430, "Mühlenstraße"],
    }

    for tr in soup.find_all("tr"):
        if tr.td is None:
            continue
        td = tr.findAll('td')
        lot_name = td[0].b.string
        lot_free = int(td[1].b.text)

        # get the values from the map above, or return zero
        # should trown an execption -> [email protected]
        lot_total = lots_map[lot_name][0]
        lot_address = lots_map[lot_name][1]

        # lot_type = tr.find("td").text

        # please be careful about the state only being allowed to contain either open, closed or nodata
        # should the page list other states, please map these into the three listed possibilities
        state = status_map.get(td[3].text, "nodata")

        data["lots"].append({
            # use the utility function generate_id to generate an ID for this lot
            # it takes this file path and the lot's name as params
            "id": generate_id(__file__, lot_name),
            "name": lot_name,
            "free": lot_free,
            "state": state,
            "total": lot_total,
            "address": lot_address,
            "coords": geodata.coords(lot_name),
            # "type": lot_type,
            "forecast": False
        })
    return data
Пример #13
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")
    data = {
        "lots": [],
        "data_source":
        data_source,
        "last_updated":
        convert_date(soup.find(id="P1_LAST_UPDATE").text, "%d.%m.%Y %H:%M:%S")
    }

    for table in soup.find_all("table"):
        if table["summary"] != "":
            region = table["summary"]

            for lot_row in table.find_all("tr"):
                if lot_row.find("th") is not None:
                    continue

                state_div = lot_row.find("div")
                if "green" in state_div["class"]:
                    state = "open"
                elif "yellow" in state_div["class"]:
                    state = "open"
                elif "red" in state_div["class"]:
                    state = "open"
                elif "park-closed" in state_div["class"]:
                    state = "closed"
                else:
                    state = "nodata"

                lot_name = lot_row.find("td", {"headers": "BEZEICHNUNG"}).text

                try:
                    free = int(lot_row.find("td", {"headers": "FREI"}).text)
                except ValueError:
                    free = 0

                try:
                    total = int(
                        lot_row.find("td", {
                            "headers": "KAPAZITAET"
                        }).text)
                except ValueError:
                    total = get_most_lots_from_known_data("Dresden", lot_name)

                id = generate_id(__file__, lot_name)
                forecast = os.path.isfile("forecast_data/" + id + ".csv")

                data["lots"].append({
                    "coords": geodata.coords(lot_name),
                    "name": lot_name,
                    "total": total,
                    "free": free,
                    "state": state,
                    "id": id,
                    "lot_type": type_map.get(lot_name, ""),
                    "address": address_map.get(lot_name, ""),
                    "forecast": forecast,
                    "region": region
                })

    return data
Пример #14
0
def parse_html(html):
    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    # last_updated is the date when the data on the page was last updated
    last_updated = str(soup.select("body"))
    start = str.find(last_updated, "Letzte Aktualisierung:") + 23
    last_updated = last_updated[start : start + 16] + " Uhr"

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"),
        "data_source": data_source,
        "lots": [],
    }

    status_map = {"Offen": "open", "Geschlossen": "closed"}

    # Oldenburg does not send the totals on there website,
    # so wie take some Values from a 2011st PDF:
    # http://www.oldenburg.de/fileadmin/oldenburg/Benutzer/PDF/41/414/Parkplatz_Uebersicht2.pdf
    # and http://gis4oldenburg.oldenburg.de/?es=C12S77
    # what possible can go wrong ¯\_(ツ)_/¯
    lots_map = {
        "Waffenplatz": [650, "Waffenplatz 3"],
        "City": [440, "Staulinie 10"],
        "Galeria Kaufhof": [326, "Ritterstraße"],
        "Pferdemarkt": [401, "Pferdemarkt 13"],
        # CCO 1 & 2 are together only known together with 420, but they seem to be somewhat like this
        "CCO Parkdeck 1": [190, "Heiligengeiststraße 4"],
        "CCO Parkdeck 2": [230, "Heiligengeiststraße 4"],
        "Hbf/ZOB": [358, "Karlstraße"],
        "Theaterwall": [125, "Theaterwall 4"],
        "Theatergarage": [107, "Roonstraße"],
        "Heiligengeist-Höfe": [275, "Georgstraße"],
        "Schlosshöfe": [430, "Mühlenstraße"],
    }

    for tr in soup.find_all("tr"):
        if tr.td is None:
            continue
        td = tr.findAll("td")
        lot_name = td[0].b.string
        lot_free = int(td[1].b.text)

        # get the values from the map above, or return zero
        # should trown an execption -> [email protected]
        lot_total = lots_map[lot_name][0]
        lot_address = lots_map[lot_name][1]

        # lot_type = tr.find("td").text

        # please be careful about the state only being allowed to contain either open, closed or nodata
        # should the page list other states, please map these into the three listed possibilities
        state = status_map.get(td[3].text, "nodata")

        data["lots"].append(
            {
                # use the utility function generate_id to generate an ID for this lot
                # it takes this file path and the lot's name as params
                "id": generate_id(__file__, lot_name),
                "name": lot_name,
                "free": lot_free,
                "state": state,
                "total": lot_total,
                "address": lot_address,
                "coords": geodata.coords(lot_name),
                # "type": lot_type,
                "forecast": False,
            }
        )
    return data