예제 #1
0
def parse_details_html(url):
    try:
        soup = to_soup(url)

        basic_data = as_dict(
                table_into_json(soup.find_all('section', {'id': 'szczegoly-oferty'})[0].table))
        extended_data_sections = soup.find_all('section', {'id': 'dodatkowe-oplaty'})
        extended_data = {}
        if extended_data_sections:
            extended_data = as_dict(table_into_json(extended_data_sections[0].table))

        return {
            u"Link": url,
            u"Region": retrieve_meta(soup, 'dimension-region'),
            u"Ulica": retrieve_meta(soup, "streetAddress", "itemprop").lower().strip("ul.").strip(),
            u"Cena mieszkania": as_int(retrieve_meta(soup, 'dimension-price')),
            u"Cena za metr": as_int(retrieve_meta(soup, 'dimension-price-m2')),
            u"Powierzchnia": as_float(retrieve_meta(soup, 'dimension-area')),
            u"Pokoje": as_int(retrieve_meta(soup, 'dimension-rooms')),
            u'Cena parkingu': parse_parking_places(basic_data['Miejsca postojowe:'][1]),
            u'Piętro': as_int(retrieve_meta(soup, 'dimension-floor')),
            u"Koszty dodatkowe": sum([as_int(value) for value in extended_data.values()]) or None,
            u"Długosć geograficzna": as_float(retrieve_meta(soup, 'longitude', 'itemprop')),
            u"Szerokość geograficzna": as_float(retrieve_meta(soup, 'latitude', 'itemprop')),
            u"Termin": as_date(
                    one_of(basic_data, ['Realizacja inwestycji:', u'Realizacja nieruchomości:'])[
                    -16:-6])
        }
    except requests.exceptions.ConnectionError:
        return url
    except Exception, e:
        raise Exception("Failed to fetch %s; %s" % (url, traceback.format_exc()), e)
예제 #2
0
def get_public_transport_time(gps_from, gps_to, time=datetime.datetime.now()):
    weekday = (int(time.strftime("%w")) + 5) % 6

    url = (
        "http://www.m.rozkladzik.pl/krakow/wyszukiwarka_polaczen.html?"
        "from={from_x};{from_y}|c|{from_x}|{from_y}&"
        "to={to_x};{to_y}|c|{to_x}|{to_y}&profile=opt&maxWalkChange=400&minChangeTime=2&time={time}&day={day}".format(
            from_x=gps_from[0],
            from_y=gps_from[1],
            to_x=gps_to[0],
            to_y=gps_to[1],
            time=time.strftime("%H:%M"),
            day=weekday,
        )
    )

    soup = to_soup(url)

    times = []
    for sum_row in soup.find_all("div", {"class": "route_sum_row"}):
        time_td = sum_row.find_all("td", {"class": "time"})[0]
        times.append(as_int(stringify_child(time_td)[1]))
    return min(times)