Пример #1
0
def get_sights(reload=False):
    """
    Load sights data from file
    """
    filename = 'sights.yaml'
    sights_data = load_from(filename)

    if reload:
        save = False

        for country, cities in get_links_to_sight_index().iteritems():
            country_level = sights_data.setdefault(country, OrderedDict())
            for city, index_url in cities.iteritems():
                city_level = country_level.setdefault(city, OrderedDict())
                if not city_level:
                    sight_page = '%s/sights' % index_url
                    for url in lpparser.parse_sight_index(sight_page):
                        sight_url = '%s%s' % (__conf__.get('website'), url)
                        city_level.setdefault(sight_url, OrderedDict())

                for sight_url, sight_info in city_level.iteritems():
                    if not sight_info:
                        print(u'Downloading %s' % sight_url)
                        sight_info = lpparser.parse_sight(sight_url)
                        save = True

                        city_level[sight_url] = sight_info
                    else:
                        # print('Already saved: %s' % sight_url)
                        pass

        if save:
            save_to(sights_data, filename)

    return sights_data
Пример #2
0
def get_links_to_sight_index():
    """
    Return a dictionary with the links where the sight indexes can be found
    """
    filename = 'cities.yaml'
    cities_data = load_from(filename)
    loaded_urls = flatten([city.values() for city in cities_data.values()])
    save = False
    index_urls = flat_to_url(__conf__.get('pages'), __conf__.get('website'))

    for url in index_urls:
        if url in loaded_urls:
            continue

        # print(url)
        country, city = lpparser.get_country_city(url)
        if country:
            country_level = cities_data.setdefault(country, OrderedDict())
            country_level[city] = url
            save = True

    if save:
        save_to(cities_data, filename)

    return cities_data
Пример #3
0
def get_buildings_info(buildings, update=False):
    """
    Get the information from all the buildings
    """
    filename = 'info.yaml'
    info = load_from(filename)

    if not info:
        info = {
            title: ohmparser.get_building_info(url)
            for title, url in buildings.iteritems()
        }
        update = True

    if update:
        for title, building in info.iteritems():
            if not 'title' in building:
                building['title'] = title
            if not 'Latitude' in building:
                try:
                    results = Geocoder.geocode(building['location'])

                    coordinates = getattr(results[0], 'coordinates')
                    building['Latitude'] = coordinates[0]
                    building['Longitude'] = coordinates[1]
                except (KeyError, GeocoderError):
                    pass

        save_to(info, filename)

    return info
Пример #4
0
def geolocate(sights_data, filename):
    """
    Geolocate all the sights
    """
    sights_geo = load_from(filename)

    save = False
    for country, cities in sights_data.iteritems():
        for city, sights in cities.iteritems():
            for sight, sight_info in sights.iteritems():
                # Copy geo fields before overwriting
                geo_fields = [
                    'locations',
                    'custom_locator',
                    'no_location',
                    'exclude',
                ]
                try:
                    geo_sight = sights_geo[country][city][sight]
                    for field in geo_fields:
                        if field in geo_sight:
                            sight_info[field] = geo_sight[field]
                except KeyError:
                    pass

                if ('locations' not in sight_info
                    and 'no_location' not in sight_info):
                    save = True
                    locations = get_sight_location(sight_info)
                    if locations:
                        sight_info['locations'] = locations
                        print(u'Location for {city}, {country}, '
                              u'{locations}'.format(**sight_info))
                    else:
                        sight_info['no_location'] = True
                        try:
                            print(
                                'Could not find location for {title}, {city}, '
                                '{country}'.decode('utf8').format(**sight_info)
                            )
                        except UnicodeDecodeError:
                            print('Unicode error')
                else:
                    # One time fixes
                    pass
    if save:
        save_to(sights_data, filename)

    return sights_data
Пример #5
0
def get_buildings():
    """
    Return a dictionary with all the buildings offered in Open House Melbourne
    First check if they were alreday saved
    """
    filename = 'buildings.yaml'
    buildings = load_from(filename)

    if not buildings:
        buildings = ohmparser.get_building_list(
            __conf__.get('root_url'),
            __conf__.get('building_list_page'),
        )
        save_to(buildings, filename)

    return buildings
Пример #6
0
def post_process_sights(sights_data, filename):
    """
    Apply diverse post-processing functions over the data
    """
    post_func = (
        extract_price,
        remove_new_lines,
        fix_image_src,
        download_image,
    )

    save = False
    for country, cities in sights_data.iteritems():
        for city, sights in cities.iteritems():
            for sight, sight_info in sights.iteritems():
                for func in post_func:
                    save |= func(sight_info)
    if save:
        save_to(sights_data, filename)

    return sights_data