def parse_details_html(url): try: soup = to_soup(url) basic_data = as_dict( table_into_json(soup.find_all('section', {'id': 'szczegoly-oferty'})[0].table)) extended_data_sections = soup.find_all('section', {'id': 'dodatkowe-oplaty'}) extended_data = {} if extended_data_sections: extended_data = as_dict(table_into_json(extended_data_sections[0].table)) return { u"Link": url, u"Region": retrieve_meta(soup, 'dimension-region'), u"Ulica": retrieve_meta(soup, "streetAddress", "itemprop").lower().strip("ul.").strip(), u"Cena mieszkania": as_int(retrieve_meta(soup, 'dimension-price')), u"Cena za metr": as_int(retrieve_meta(soup, 'dimension-price-m2')), u"Powierzchnia": as_float(retrieve_meta(soup, 'dimension-area')), u"Pokoje": as_int(retrieve_meta(soup, 'dimension-rooms')), u'Cena parkingu': parse_parking_places(basic_data['Miejsca postojowe:'][1]), u'Piętro': as_int(retrieve_meta(soup, 'dimension-floor')), u"Koszty dodatkowe": sum([as_int(value) for value in extended_data.values()]) or None, u"Długosć geograficzna": as_float(retrieve_meta(soup, 'longitude', 'itemprop')), u"Szerokość geograficzna": as_float(retrieve_meta(soup, 'latitude', 'itemprop')), u"Termin": as_date( one_of(basic_data, ['Realizacja inwestycji:', u'Realizacja nieruchomości:'])[ -16:-6]) } except requests.exceptions.ConnectionError: return url except Exception, e: raise Exception("Failed to fetch %s; %s" % (url, traceback.format_exc()), e)
def get_public_transport_time(gps_from, gps_to, time=datetime.datetime.now()): weekday = (int(time.strftime("%w")) + 5) % 6 url = ( "http://www.m.rozkladzik.pl/krakow/wyszukiwarka_polaczen.html?" "from={from_x};{from_y}|c|{from_x}|{from_y}&" "to={to_x};{to_y}|c|{to_x}|{to_y}&profile=opt&maxWalkChange=400&minChangeTime=2&time={time}&day={day}".format( from_x=gps_from[0], from_y=gps_from[1], to_x=gps_to[0], to_y=gps_to[1], time=time.strftime("%H:%M"), day=weekday, ) ) soup = to_soup(url) times = [] for sum_row in soup.find_all("div", {"class": "route_sum_row"}): time_td = sum_row.find_all("td", {"class": "time"})[0] times.append(as_int(stringify_child(time_td)[1])) return min(times)