def parse_html(html): soup = BeautifulSoup(html, "html.parser") data = { "last_updated": convert_date(soup.find("tr").find("strong").text, "Stand: %d.%m.%Y, %H:%M Uhr"), "data_source": data_source, "lots": [] } rows = soup.find_all("tr") rows = rows[1:] region_header = "" for row in rows: if len(row.find_all("th")) > 0: # This is a header row, save it for later region_header = row.find("th", {"class": "head1"}).text else: if row.find("td").text == "Gesamt": continue # This is a parking lot row raw_lot_data = row.find_all("td") if len(raw_lot_data) == 2: type_and_name = process_name(raw_lot_data[0].text) data["lots"].append({ "name": type_and_name[1], "type": type_and_name[0], "total": get_most_lots_from_known_data("Lübeck", type_and_name[1]), "free": 0, "region": region_header, "state": process_state_map.get(raw_lot_data[1].text, ""), "coords": geodata.coords(type_and_name[1]), "id": generate_id(__file__, type_and_name[1]), "forecast": False }) elif len(raw_lot_data) == 4: type_and_name = process_name(raw_lot_data[0].text) data["lots"].append({ "name": type_and_name[1], "type": type_and_name[0], "total": int(raw_lot_data[1].text), "free": int(raw_lot_data[2].text), "region": region_header, "state": "open", "coords": geodata.coords(type_and_name[1]), "id": generate_id(__file__, type_and_name[1]), "forecast": False }) return data
def parse_html(html): soup = BeautifulSoup(html, "html.parser") lot_table_trs = soup.select("table[cellpadding=5]")[0].find_all("tr") data = { "last_updated": convert_date(lot_table_trs[-1].text.strip(), "%d.%m.%Y %H:%M Uhr"), "data_source": data_source, "lots": [] } for tr in lot_table_trs[1:-1]: tds = tr.find_all("td") type_and_name = process_name(tds[0].text) data["lots"].append({ "name": type_and_name[1], "type": type_and_name[0], "free": int(tds[1].text), "total": total_number_map.get(tds[0].text, 0), "state": state_map.get(tds[2].text, ""), "coords": geodata.coords(type_and_name[1]), "id": generate_id(__file__, type_and_name[1]), "forecast": False }) return data
def parse_html(xml_data): feed = feedparser.parse(xml_data) last_updated = feed["entries"][0]["updated"] data = { "lots": [], # remove trailing timezone for consensistency "last_updated": last_updated.replace("Z", ""), "data_source": data_source } for entry in feed["entries"]: summary = parse_summary(entry["summary"]) title = parse_title(entry["title"]) old_id = entry["id"].split("=")[1] data["lots"].append({ "name": title[0], "address": title[1], "id": generate_id(__file__, title[0]), "state": summary[0], "free": summary[1], "total": total_number_map.get(title[0], 0), "coords": geodata.coords(title[0]), "forecast": False, }) return data
def parse_html(html): soup = BeautifulSoup(html, "html.parser") free_lots = soup.find_all("td", {"class": "stell"}) assert len(free_lots) == 6, "Expect to find 6 lots in Bonn, got: %d" % len( free_lots) time = soup.find("td", {"class": "stand"}).text.strip() lots = [] for idx, free in enumerate(free_lots): lot = lot_map.get(idx) lots.append({ "name": lot.name, "coords": geodata.coords(lot.name), "free": int(free.text), "address": lot.address, "total": lot.total, "state": "nodata", "id": generate_id(__file__, lot.name), "forecast": False }) return { "last_updated": convert_date(time, "%d.%m.%y %H:%M:%S"), "data_source": data_source, "lots": lots }
def parse_html(html): soup = BeautifulSoup(html, "html.parser") data = { "last_updated": convert_date(soup.p.string, "(%d.%m.%Y, %H.%M Uhr)"), "data_source": data_source, "lots": [] } # get all lots raw_lots = soup.find_all("tr") for lot in raw_lots: elements = lot.find_all("td") lot_name = elements[0].text data["lots"].append({ "name": lot_name, "free": int(elements[1].text), "total": data_map.get(lot_name)["total"], "type": data_map.get(lot_name)["type"], "address": data_map.get(lot_name)["address"], "coords": geodata.coords(lot_name), "state": "nodata", "id": generate_id(__file__, lot_name), "forecast": False }) return data
def parse_html(html): soup = BeautifulSoup(html, "html.parser") free_lots = soup.find_all("td", {"class": "stell"}) assert len(free_lots) == 6, "Expect to find 6 lots in Bonn, got: %d" % len(free_lots) time = soup.find("td", {"class": "stand"}).text.strip() lots = [] for idx, free in enumerate(free_lots): lot = lot_map.get(idx) lots.append({ "name": lot.name, "coords": geodata.coords(lot.name), "free": int(free.text), "address": lot.address, "total": lot.total, "state": "nodata", "id": generate_id(__file__, lot.name), "forecast": False }) return { "last_updated": convert_date(time, "%d.%m.%y %H:%M:%S"), "data_source": data_source, "lots": lots }
def parse_html(html): soup = BeautifulSoup(html, "html.parser") data = { "lots": [], "data_source": data_source, "last_updated": convert_date(soup.find(id="P1_LAST_UPDATE").text, "%d.%m.%Y %H:%M:%S") } for table in soup.find_all("table"): if table["summary"] != "": region = table["summary"] for lot_row in table.find_all("tr"): if lot_row.find("th") is not None: continue state_div = lot_row.find("div") if "green" in state_div["class"]: state = "open" elif "yellow" in state_div["class"]: state = "open" elif "red" in state_div["class"]: state = "open" elif "park-closed" in state_div["class"]: state = "closed" else: state = "nodata" lot_name = lot_row.find("td", {"headers": "BEZEICHNUNG"}).text try: free = int(lot_row.find("td", {"headers": "FREI"}).text) except ValueError: free = 0 try: total = int(lot_row.find("td", {"headers": "KAPAZITAET"}).text) except ValueError: total = get_most_lots_from_known_data("Dresden", lot_name) id = generate_id(__file__, lot_name) forecast = os.path.isfile("forecast_data/" + id + ".csv") data["lots"].append({ "coords": geodata.coords(lot_name), "name": lot_name, "total": total, "free": free, "state": state, "id": id, "lot_type": type_map.get(lot_name, ""), "address": address_map.get(lot_name, ""), "forecast": forecast, "region": region }) return data
def parse_html(html): soup = BeautifulSoup(html, "html.parser") # last update time (UTC) try: update_time = convert_date(soup.select('p > strong')[-1].text, "Stand: %d.%m.%Y - %H:%M:%S") except ValueError: update_time = convert_date(soup.select('p > strong')[-2].text, "Stand: %d.%m.%Y - %H:%M:%S") data = { "last_updated": update_time, "data_source": data_source, "lots": [] } # get all tables with lots raw_lot_list = soup.find_all("div", {"class": "listing"}) # get all lots for lot_list in raw_lot_list: raw_lots = lot_list.select('tr + tr') for lot in raw_lots: lot_name = lot.select('a')[0].text try: lot_free = int(lot.select('td + td')[0].text) lot_state = "open" if "green" in str(lot.select("td + td")[0]) else "closed" except ValueError: lot_free = 0 lot_state = "nodata" data["lots"].append({ "name": lot_name, "free": lot_free, "total": total_number_map.get(lot_name, 0), "coords": geodata.coords(lot_name), "state": lot_state, "id": generate_id(__file__, lot_name), "forecast": False }) return data
def parse_html(html): # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for. soup = BeautifulSoup(html) # last_updated is the date when the data on the page was last updated, it should be listed on most pages last_updated = soup.select("p#last_updated")[0].text data = { # convert_date is a utility function you can use to turn this date into the correct string format "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"), "data_source": data_source, "lots": [] } for tr in soup.find_all("tr"): lot_name = tr.find("td", {"class": "lot_name"}).text lot_free = tr.find("td", {"class": "lot_free"}).text lot_total = tr.find("td", {"class": "lot_total"}).text lot_address = tr.find("td", {"class": "lot_address"}).text lot_type = tr.find("td", {"class": "lot_type"}).text # please be careful about the state only being allowed to contain either open, closed or nodata # should the page list other states, please map these into the three listed possibilities state = tr.find("td", {"class": "lot_state"}).text data["lots"].append({ "name": lot_name, "free": lot_free, "total": lot_total, "address": lot_address, "coords": geodata.coords(lot_name), "state": state, "type": lot_type, # use the utility function generate_id to generate an ID for this lot # it takes this file path and the lot's name as params "id": generate_id(__file__, lot_name), "forecast": False, }) return data
def parse_html(html): # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for. soup = BeautifulSoup(html, "html.parser") # last_updated is the date when the data on the page was last updated last_updated = str(soup.select("body")) start = str.find(last_updated, "Letzte Aktualisierung:") + 23 last_updated = last_updated[start:start + 16] + ' Uhr' data = { # convert_date is a utility function you can use to turn this date into the correct string format "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"), "data_source": data_source, "lots": [] } status_map = { "Offen": "open", "Geschlossen": "closed" } # Oldenburg does not send the totals on there website, # so wie take some Values from a 2011st PDF: # http://www.oldenburg.de/fileadmin/oldenburg/Benutzer/PDF/41/414/Parkplatz_Uebersicht2.pdf # and http://gis4oldenburg.oldenburg.de/?es=C12S77 # what possible can go wrong ¯\_(ツ)_/¯ lots_map = { "Waffenplatz": [650, "Waffenplatz 3"], "City": [440, "Staulinie 10"], "Galeria Kaufhof": [326, "Ritterstraße"], "Pferdemarkt": [401, "Pferdemarkt 13"], # CCO 1 & 2 are together only known together with 420, but they seem to be somewhat like this "CCO Parkdeck 1": [190, "Heiligengeiststraße 4"], "CCO Parkdeck 2": [230, "Heiligengeiststraße 4"], "Hbf/ZOB": [358, "Karlstraße"], "Theaterwall": [125, "Theaterwall 4"], "Theatergarage": [107, "Roonstraße"], "Heiligengeist-Höfe": [275, "Georgstraße"], "Schlosshöfe": [430, "Mühlenstraße"], } for tr in soup.find_all("tr"): if tr.td is None: continue td = tr.findAll('td') lot_name = td[0].b.string lot_free = int(td[1].b.text) # get the values from the map above, or return zero # should trown an execption -> [email protected] lot_total = lots_map[lot_name][0] lot_address = lots_map[lot_name][1] # lot_type = tr.find("td").text # please be careful about the state only being allowed to contain either open, closed or nodata # should the page list other states, please map these into the three listed possibilities state = status_map.get(td[3].text, "nodata") data["lots"].append({ # use the utility function generate_id to generate an ID for this lot # it takes this file path and the lot's name as params "id": generate_id(__file__, lot_name), "name": lot_name, "free": lot_free, "state": state, "total": lot_total, "address": lot_address, "coords": geodata.coords(lot_name), # "type": lot_type, "forecast": False }) return data
def parse_html(html): soup = BeautifulSoup(html, "html.parser") data = { "lots": [], "data_source": data_source, "last_updated": convert_date(soup.find(id="P1_LAST_UPDATE").text, "%d.%m.%Y %H:%M:%S") } for table in soup.find_all("table"): if table["summary"] != "": region = table["summary"] for lot_row in table.find_all("tr"): if lot_row.find("th") is not None: continue state_div = lot_row.find("div") if "green" in state_div["class"]: state = "open" elif "yellow" in state_div["class"]: state = "open" elif "red" in state_div["class"]: state = "open" elif "park-closed" in state_div["class"]: state = "closed" else: state = "nodata" lot_name = lot_row.find("td", {"headers": "BEZEICHNUNG"}).text try: free = int(lot_row.find("td", {"headers": "FREI"}).text) except ValueError: free = 0 try: total = int( lot_row.find("td", { "headers": "KAPAZITAET" }).text) except ValueError: total = get_most_lots_from_known_data("Dresden", lot_name) id = generate_id(__file__, lot_name) forecast = os.path.isfile("forecast_data/" + id + ".csv") data["lots"].append({ "coords": geodata.coords(lot_name), "name": lot_name, "total": total, "free": free, "state": state, "id": id, "lot_type": type_map.get(lot_name, ""), "address": address_map.get(lot_name, ""), "forecast": forecast, "region": region }) return data
def parse_html(html): # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for. soup = BeautifulSoup(html, "html.parser") # last_updated is the date when the data on the page was last updated last_updated = str(soup.select("body")) start = str.find(last_updated, "Letzte Aktualisierung:") + 23 last_updated = last_updated[start : start + 16] + " Uhr" data = { # convert_date is a utility function you can use to turn this date into the correct string format "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"), "data_source": data_source, "lots": [], } status_map = {"Offen": "open", "Geschlossen": "closed"} # Oldenburg does not send the totals on there website, # so wie take some Values from a 2011st PDF: # http://www.oldenburg.de/fileadmin/oldenburg/Benutzer/PDF/41/414/Parkplatz_Uebersicht2.pdf # and http://gis4oldenburg.oldenburg.de/?es=C12S77 # what possible can go wrong ¯\_(ツ)_/¯ lots_map = { "Waffenplatz": [650, "Waffenplatz 3"], "City": [440, "Staulinie 10"], "Galeria Kaufhof": [326, "Ritterstraße"], "Pferdemarkt": [401, "Pferdemarkt 13"], # CCO 1 & 2 are together only known together with 420, but they seem to be somewhat like this "CCO Parkdeck 1": [190, "Heiligengeiststraße 4"], "CCO Parkdeck 2": [230, "Heiligengeiststraße 4"], "Hbf/ZOB": [358, "Karlstraße"], "Theaterwall": [125, "Theaterwall 4"], "Theatergarage": [107, "Roonstraße"], "Heiligengeist-Höfe": [275, "Georgstraße"], "Schlosshöfe": [430, "Mühlenstraße"], } for tr in soup.find_all("tr"): if tr.td is None: continue td = tr.findAll("td") lot_name = td[0].b.string lot_free = int(td[1].b.text) # get the values from the map above, or return zero # should trown an execption -> [email protected] lot_total = lots_map[lot_name][0] lot_address = lots_map[lot_name][1] # lot_type = tr.find("td").text # please be careful about the state only being allowed to contain either open, closed or nodata # should the page list other states, please map these into the three listed possibilities state = status_map.get(td[3].text, "nodata") data["lots"].append( { # use the utility function generate_id to generate an ID for this lot # it takes this file path and the lot's name as params "id": generate_id(__file__, lot_name), "name": lot_name, "free": lot_free, "state": state, "total": lot_total, "address": lot_address, "coords": geodata.coords(lot_name), # "type": lot_type, "forecast": False, } ) return data