示例#1
0
def parse_html(xml_data):
    feed = feedparser.parse(xml_data)

    try:
        last_updated = feed["entries"][0]["updated"]
    except KeyError:
        last_updated = utc_now()

    data = {
        "lots": [],
        # remove trailing timezone for consensistency
        "last_updated": last_updated.replace("Z", "")
    }

    for entry in feed["entries"]:
        summary = parse_summary(entry["summary"])
        title_elements = parse_title(entry["title"])

        lot_identifier = (title_elements[2] + " " + title_elements[0]).strip()
        lot = geodata.lot(lot_identifier)

        data["lots"].append({
            "name": title_elements[0],
            "address": title_elements[1],
            "id": lot.id,
            "state": summary[0],
            "free": summary[1],
            "total": lot.total,
            "coords": lot.coords,
            "forecast": False,
            "lot_type": title_elements[2]
        })

    return data
示例#2
0
def parse_html(xml_data):
    feed = feedparser.parse(xml_data)

    try:
        last_updated = feed["entries"][0]["updated"]
    except KeyError:
        last_updated = utc_now()

    data = {
        "lots": [],
        # remove trailing timezone for consensistency
        "last_updated": last_updated.replace("Z", "")
    }

    for entry in feed["entries"]:
        summary = parse_summary(entry["summary"])
        title_elements = parse_title(entry["title"])

        lot_identifier = (title_elements[2] + " " + title_elements[0]).strip()
        lot = geodata.lot(lot_identifier)

        data["lots"].append({
            "name": title_elements[0],
            "address": title_elements[1],
            "id": lot.id,
            "state": summary[0],
            "free": summary[1],
            "total": lot.total,
            "coords": lot.coords,
            "forecast": False,
            "lot_type": title_elements[2]
        })

    return data
示例#3
0
文件: Bonn.py 项目: henningvs/ParkAPI
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")
    
    lots = []
    
    for row in soup.find_all("div", class_='vc_row wpb_row section vc_row-fluid parking-lots grid_section'):
      for column in row.find_all("div", class_='vc_col-sm-3 wpb_column vc_column_container '):
        h3 = column.find_all("h3")
        if not h3[0].a == None:
          name = h3[0].a.string
          lot = geodata.lot(name)
          lots.append({
            "name": name,
            "coords": lot.coords,
            "free": int(h3[1].span.strong.get_text()),
            "address": lot.address,
            "total": lot.total,
            "state": "nodata",
            "id": lot.id,
            "forecast": False
         })
    
    return {
        "last_updated": utc_now(),
        "lots": lots
    }
示例#4
0
def parse_html(xml_data):
    feed = feedparser.parse(xml_data)

    try:
        last_updated = feed["entries"][0]["updated"]
        last_updated = datetime.strptime(last_updated[5:25], "%d %b %Y %H:%M:%S").isoformat()
    except KeyError:
        last_updated = utc_now()



    data = {
        "lots": [],
        "last_updated": last_updated
    }

    for entry in feed["entries"]:
        summary = parse_summary(entry["summary"])
        title_elements = parse_title(entry["title"])

        lot_identifier = html.unescape((title_elements[2] + " " + title_elements[0]).strip())
        lot = geodata.lot(lot_identifier)

        data["lots"].append({
            "name": html.unescape(title_elements[0]),
            "address": lot.address,
            "id": html.unescape(lot.id),
            "state": "open",
            "free": summary[1],
            "total": lot.total,
            "coords": lot.coords,
            "forecast": False,
            "lot_type": title_elements[2]
        })
    return data
示例#5
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    lots = []

    for row in soup.find_all("div", class_='parking-lots'):
        entity_wrapper_class = 'wpb_column vc_column_container vc_col-sm-3'
        for column in row.find_all("div", class_=entity_wrapper_class):
            h3 = column.find_all("h3")
            if not h3[0].a is None and len(h3) > 1:
                name = h3[0].a.string
                free = 0
                for heading in h3:
                    for heading_element in heading.find_all("span"):
                        if heading_element.find("strong") is not None:
                            free = int(heading_element.strong.get_text())
                lot = geodata.lot(name)
                ltype = None
                for p in [pt for pt in ["Parkhaus", "Parkplatz"] if pt in name]:
                    ltype = p
                lots.append({
                    "name": name,
                    "coords": lot.coords,
                    "free": free,
                    "address": lot.address,
                    "total": lot.total,
                    "state": "unknown",
                    "id": lot.id,
                    "lot_type": ltype,
                    "forecast": False
                })

            else:
                name = h3[0].string
                ltype = None
                if "Parkhaus" in name:
                    ltype = "Parkhaus"
                elif "Parkplatz" in name:
                    ltype = "Parkplatz"
                lot = geodata.lot(name)
                lots.append({
                    "name": name,
                    "coords": lot.coords,
                    "free": 0,
                    "address": lot.address,
                    "total": lot.total,
                    "state": "nodata",
                    "id": lot.id,
                    "lot_type": ltype,
                    "forecast": False
                })

    return {
        "last_updated": utc_now(),
        "lots": lots
    }
示例#6
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    lots = []

    for row in soup.find_all("div", class_='parking-lots'):
        entity_wrapper_class = 'wpb_column vc_column_container vc_col-sm-3'
        for column in row.find_all("div", class_=entity_wrapper_class):
            h3 = column.find_all("h3")
            if not h3[0].a is None and len(h3) > 1:
                name = h3[0].a.string
                free = 0
                for heading in h3:
                    for heading_element in heading.find_all("span"):
                        if heading_element.find("strong") is not None:
                            free = int(heading_element.strong.get_text())
                lot = geodata.lot(name)
                ltype = None
                for p in [
                        pt for pt in ["Parkhaus", "Parkplatz"] if pt in name
                ]:
                    ltype = p
                lots.append({
                    "name": name,
                    "coords": lot.coords,
                    "free": free,
                    "address": lot.address,
                    "total": lot.total,
                    "state": "unknown",
                    "id": lot.id,
                    "lot_type": ltype,
                    "forecast": False
                })

            elif h3[0].string:
                name = h3[0].string
                ltype = None
                if "Parkhaus" in name:
                    ltype = "Parkhaus"
                elif "Parkplatz" in name:
                    ltype = "Parkplatz"
                lot = geodata.lot(name)
                lots.append({
                    "name": name,
                    "coords": lot.coords,
                    "free": 0,
                    "address": lot.address,
                    "total": lot.total,
                    "state": "nodata",
                    "id": lot.id,
                    "lot_type": ltype,
                    "forecast": False
                })

    return {"last_updated": utc_now(), "lots": lots}
示例#7
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    data = {
        "last_updated": '',
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    try:
        # <div class="container-fluid"
        parking_data = soup.find('div', class_='container-fluid')
        # Letzte Aktualisierung: 04.07.2019 11:03:00
        last_updated = convert_date(
            parking_data.find('h5').text,
            'Letzte Aktualisierung: %d.%m.%Y %H:%M:%S')
        data["last_updated"] = last_updated
    except:
        # if the service is unavailable (did happen in one of my tests):
        data["last_updated"] = utc_now()
        # return data

    parking_lots = parking_data.find_all('div', class_='well')
    for one_parking_lot in parking_lots:
        parking_name = one_parking_lot.find('b').text.strip()
        lot = geodata.lot(parking_name)
        parking_free = 0
        try:
            parking_status = 'open'
            parking_free = int(
                one_parking_lot.find_all(
                    'div', role='progressbar')[1].find('b').text.strip())
        except:
            parking_status = 'nodata'

        data["lots"].append({
            "name": parking_name,
            "free": parking_free,
            "total": lot.total,
            "address": lot.address,
            "coords": lot.coords,
            "state": parking_status,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False
        })

    return data
示例#8
0
文件: Bonn.py 项目: DASTOBI/ParkAPI
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")
    
    lots = []
    
    for row in soup.find_all("div", class_='parking-lots'):
      for column in row.find_all("div", class_='wpb_column vc_column_container vc_col-sm-3'):
        h3 = column.find_all("h3")
        if not h3[0].a == None:
          name = h3[0].a.string
          lot = geodata.lot(name)
          ltype = None
          for p in [pt for pt in ["Parkhaus", "Parkplatz"] if pt in name]:
            ltype = p
          lots.append({
            "name": name,
            "coords": lot.coords,
            "free": int(h3[1].span.strong.get_text()),
            "address": lot.address,
            "total": lot.total,
            "state": "unknown",
            "id": lot.id,
            "lot_type": ltype,
            "forecast": False
         })
        else:
          name = h3[0].string
          ltype = None
          if "Parkhaus" in name:
            ltype = "Parkhaus"
          elif "Parkplatz" in name:
            ltype="Parkplatz"
          lot = geodata.lot(name)
          lots.append({
            "name": name,
            "coords": lot.coords,
            "free": 0,
            "address": lot.address,
            "total": lot.total,
            "state": "nodata",
            "id": lot.id,
            "lot_type": ltype,
            "forecast": False
         })
    
    return {
        "last_updated": utc_now(),
        "lots": lots
    }
示例#9
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    lots = []

    for row in soup.find_all("div", class_='parking-lots'):
        for column in row.find_all(
                "div", class_='wpb_column vc_column_container vc_col-sm-3'):
            h3 = column.find_all("h3")
            if not h3[0].a == None:
                name = h3[0].a.string
                lot = geodata.lot(name)
                ltype = None
                for p in [
                        pt for pt in ["Parkhaus", "Parkplatz"] if pt in name
                ]:
                    ltype = p
                lots.append({
                    "name": name,
                    "coords": lot.coords,
                    "free": int(h3[1].span.strong.get_text()),
                    "address": lot.address,
                    "total": lot.total,
                    "state": "unknown",
                    "id": lot.id,
                    "lot_type": ltype,
                    "forecast": False
                })
            else:
                name = h3[0].string
                ltype = None
                if "Parkhaus" in name:
                    ltype = "Parkhaus"
                elif "Parkplatz" in name:
                    ltype = "Parkplatz"
                lot = geodata.lot(name)
                lots.append({
                    "name": name,
                    "coords": lot.coords,
                    "free": 0,
                    "address": lot.address,
                    "total": lot.total,
                    "state": "nodata",
                    "id": lot.id,
                    "lot_type": ltype,
                    "forecast": False
                })

    return {"last_updated": utc_now(), "lots": lots}
示例#10
0
文件: scraper.py 项目: sibbl/ParkAPI
def save_data_to_db(cursor, parking_data, city):
    """Save the data given into the Postgres DB."""
    timestamp_updated = parking_data["last_updated"]
    timestamp_downloaded = util.utc_now()
    json_data = json.dumps(parking_data)
    sql_string = "INSERT INTO parkapi(timestamp_updated, timestamp_downloaded, city, data) " \
                 "VALUES (%(updated)s, %(downloaded)s, %(city)s, %(data)s) RETURNING 'id';"
    cursor.execute(sql_string, {
        "updated": timestamp_updated,
        "downloaded": timestamp_downloaded,
        "city": city,
        "data": json_data
    })

    print("Saved " + city + " to DB.")
示例#11
0
def parse_html(xml):
    soup = BeautifulSoup(xml, "html.parser")

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages

    try:
        last_updated = soup.select("zeitstempel")[0].text
    except KeyError:
        last_updated = utc_now()

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        "last_updated": datetime.strptime(last_updated[0:16], "%d.%m.%Y %H:%M").isoformat(),
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    for ph in soup.find_all("parkhaus"):
        lot_name = ph.find("name").text
        lot_actual = int(ph.find("aktuell").text)
        lot_total = int(ph.find("gesamt").text)
        lot_free = lot_total - lot_actual

        # please be careful about the state only being allowed to contain either open, closed or nodata
        # should the page list other states, please map these into the three listed possibilities
        # translate german state to english
        stateGerman = ph.find("status").text
        if stateGerman == ("Offen"):
            state = "open"
        elif stateGerman == ("Geschlossen"):
            state = "closed"
        else:
            state = "nodata"

        lot = geodata.lot(lot_name)
        data["lots"].append({
            "name": lot.name,
            "free": lot_free,
            "total": lot_total,
            "address": lot.address,
            "coords": lot.coords,
            "state": state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False,
        })

    return data
示例#12
0
def save_data_to_db(cursor, parking_data, city):
    """Save the data given into the Postgres DB."""
    timestamp_updated = parking_data["last_updated"]
    timestamp_downloaded = util.utc_now()
    json_data = json.dumps(parking_data)
    sql_string = "INSERT INTO parkapi(timestamp_updated, timestamp_downloaded, city, data) " \
                 "VALUES (%(updated)s, %(downloaded)s, %(city)s, %(data)s) RETURNING 'id';"
    cursor.execute(
        sql_string, {
            "updated": timestamp_updated,
            "downloaded": timestamp_downloaded,
            "city": city,
            "data": json_data
        })

    print("Saved " + city + " to DB.")
示例#13
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    data = {
        "last_updated":
        utc_now(),  # not found on site, so we use something else
        # URL for the page where the scraper can gather the data
        "lots": []
    }
    # for handling duplicate entries
    dataUniqe = dict()

    # find all entries:
    # suche <div class="houses">
    parking_houses = soup.find_all('div', class_='houses')
    for parking_group in parking_houses:
        parking_lots = parking_group.find_all('li')
        for one_lot in parking_lots:
            parking_name = one_lot.find('a').text
            if (not parking_name in dataUniqe):
                dataUniqe[parking_name] = 1  # add this to the list
                lot = geodata.lot(parking_name)

                parking_state = 'open'
                parking_free = 0
                try:
                    parking_free = int(
                        one_lot.find('span',
                                     class_='free-text').text.split()[0])
                except:
                    parking_state = 'nodata'

                data["lots"].append({
                    "name": parking_name,
                    "free": parking_free,
                    "total": lot.total,
                    "address": lot.address,
                    "coords": lot.coords,
                    "state": parking_state,
                    "lot_type": lot.type,
                    "id": lot.id,
                    "forecast": False
                })

    return data
示例#14
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    # last update time (UTC)
    # Konstanz does not support the last_updated yet. I hope they will inform me when it's added
    # as the data seems accurate I will return the current time and date
    data = {"last_updated": utc_now(), "lots": []}

    # get all tables with lots
    parken = soup.find_all("table", class_="parken")

    # get all lots
    for park_lot in parken:
        td = park_lot.find_all("td")
        parking_name = td[0].text.strip()
        if parking_name == "Parkmöglichkeit":
            continue
        # work-around for the Umlaute-problem: ugly but working
        if ('Marktst' in parking_name): parking_name = 'Marktstätte'
        elif ('bele' in parking_name): parking_name = 'Döbele'
        # get the data
        lot = geodata.lot(parking_name)
        # look for free lots
        parking_state = 'open'
        parking_free = 0
        try:
            parking_free = int(td[1].text)
        except:
            parking_state = 'nodata'

        data["lots"].append({
            "name": parking_name,
            "free": parking_free,
            "total": lot.total,
            "address": lot.address,
            "coords": lot.coords,
            "state": parking_state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False
        })

    return data
示例#15
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    # Uhrzeit like Konstanz
    data = {
        # last_updated like Konstanz
        "last_updated": utc_now(),
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    table = soup.find('table', id='haupttabelle')
    table2 = table.find('table', width='790')
    rows = table2.find_all('tr')
    for row in rows[3:12]:
        parking_data = row.find_all('td')
        parking_name = parking_data[0].text
        lot = geodata.lot(parking_name)
        try:
            parking_state = 'open'
            parking_free = int(parking_data[2].text)
        except:
            parking_free = 0
            parking_state = 'nodata'

        data["lots"].append({
            "name": parking_name,
            "free": parking_free,
            "total": lot.total,
            "address": lot.address,
            "coords": lot.coords,
            "state": parking_state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False,
        })

    return data
示例#16
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    # last update time (UTC)
    # Karlsruhe does not support the last_upted yet.
    # as the data seems accurate I will return the current time and date
    data = {"last_updated": utc_now(), "lots": []}

    lots = soup.find_all('div', class_='parkhaus')
    for parking_lot in lots:
        parking_name = parking_lot.find('a').text
        lot = geodata.lot(parking_name)

        parking_state = 'open'
        parking_free = 0
        parking_fuellstand = parking_lot.find('div', class_='fuellstand')
        try:
            if (parking_fuellstand == None):
                parking_state = 'nodata'
            else:
                temp = parking_fuellstand.text.split()
                parking_free = int(temp[0])
        except:
            parking_state = 'nodata'

        data["lots"].append({
            "name": parking_name,
            "free": parking_free,
            "total": lot.total,
            "address": lot.address,
            "coords": lot.coords,
            "state": parking_state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False,
        })

    return data
示例#17
0
def parse_html(html):

    data = {
        "last_updated":
        utc_now(),  # not found on site, so we use something else
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    dataJSON = json.loads(html)
    # over all parking-lots
    for parking_lot in dataJSON:
        parking_name = parking_lot['title']
        if (parking_name != 'Reserve'):
            lot = geodata.lot(parking_name)
            try:
                parking_free = 0
                if (parking_lot['isOpened'] == False):
                    parking_status = 'closed'
                else:
                    parking_status = 'open'
                    parking_free = int(parking_lot['free'])
            except:
                parking_status = 'nodata'
            data["lots"].append({
                "name": parking_name,
                "free": parking_free,
                "total": parking_lot['parkings'],
                "address": lot.address,
                "coords": lot.coords,
                "state": parking_status,
                "lot_type": lot.type,
                "id": lot.id,
                "forecast": False
            })

    return data
示例#18
0
def get_api_status():
    return jsonify({
        "status": "online",
        "server_time": util.utc_now(),
        "load": getloadavg()
    })
示例#19
0
def get_api_status():
    return jsonify({
        "status": "online",
        "server_time": util.utc_now(),
        "load": getloadavg()
    })
示例#20
0
def add_metadata(data):
    """Adds metadata to a scraped output dict"""
    data["last_downloaded"] = util.utc_now()
    return data
示例#21
0
def add_metadata(data):
    """Adds metadata to a scraped output dict"""
    data["last_downloaded"] = util.utc_now()
    return data