def get_sights(reload=False): """ Load sights data from file """ filename = 'sights.yaml' sights_data = load_from(filename) if reload: save = False for country, cities in get_links_to_sight_index().iteritems(): country_level = sights_data.setdefault(country, OrderedDict()) for city, index_url in cities.iteritems(): city_level = country_level.setdefault(city, OrderedDict()) if not city_level: sight_page = '%s/sights' % index_url for url in lpparser.parse_sight_index(sight_page): sight_url = '%s%s' % (__conf__.get('website'), url) city_level.setdefault(sight_url, OrderedDict()) for sight_url, sight_info in city_level.iteritems(): if not sight_info: print(u'Downloading %s' % sight_url) sight_info = lpparser.parse_sight(sight_url) save = True city_level[sight_url] = sight_info else: # print('Already saved: %s' % sight_url) pass if save: save_to(sights_data, filename) return sights_data
def get_links_to_sight_index(): """ Return a dictionary with the links where the sight indexes can be found """ filename = 'cities.yaml' cities_data = load_from(filename) loaded_urls = flatten([city.values() for city in cities_data.values()]) save = False index_urls = flat_to_url(__conf__.get('pages'), __conf__.get('website')) for url in index_urls: if url in loaded_urls: continue # print(url) country, city = lpparser.get_country_city(url) if country: country_level = cities_data.setdefault(country, OrderedDict()) country_level[city] = url save = True if save: save_to(cities_data, filename) return cities_data
def get_buildings_info(buildings, update=False): """ Get the information from all the buildings """ filename = 'info.yaml' info = load_from(filename) if not info: info = { title: ohmparser.get_building_info(url) for title, url in buildings.iteritems() } update = True if update: for title, building in info.iteritems(): if not 'title' in building: building['title'] = title if not 'Latitude' in building: try: results = Geocoder.geocode(building['location']) coordinates = getattr(results[0], 'coordinates') building['Latitude'] = coordinates[0] building['Longitude'] = coordinates[1] except (KeyError, GeocoderError): pass save_to(info, filename) return info
def geolocate(sights_data, filename): """ Geolocate all the sights """ sights_geo = load_from(filename) save = False for country, cities in sights_data.iteritems(): for city, sights in cities.iteritems(): for sight, sight_info in sights.iteritems(): # Copy geo fields before overwriting geo_fields = [ 'locations', 'custom_locator', 'no_location', 'exclude', ] try: geo_sight = sights_geo[country][city][sight] for field in geo_fields: if field in geo_sight: sight_info[field] = geo_sight[field] except KeyError: pass if ('locations' not in sight_info and 'no_location' not in sight_info): save = True locations = get_sight_location(sight_info) if locations: sight_info['locations'] = locations print(u'Location for {city}, {country}, ' u'{locations}'.format(**sight_info)) else: sight_info['no_location'] = True try: print( 'Could not find location for {title}, {city}, ' '{country}'.decode('utf8').format(**sight_info) ) except UnicodeDecodeError: print('Unicode error') else: # One time fixes pass if save: save_to(sights_data, filename) return sights_data
def get_buildings(): """ Return a dictionary with all the buildings offered in Open House Melbourne First check if they were alreday saved """ filename = 'buildings.yaml' buildings = load_from(filename) if not buildings: buildings = ohmparser.get_building_list( __conf__.get('root_url'), __conf__.get('building_list_page'), ) save_to(buildings, filename) return buildings
def post_process_sights(sights_data, filename): """ Apply diverse post-processing functions over the data """ post_func = ( extract_price, remove_new_lines, fix_image_src, download_image, ) save = False for country, cities in sights_data.iteritems(): for city, sights in cities.iteritems(): for sight, sight_info in sights.iteritems(): for func in post_func: save |= func(sight_info) if save: save_to(sights_data, filename) return sights_data