Exemplo n.º 1
0
def run():
    freebase_api = freebase.Freebase()

    output = {}
    gj = json.load(open("comparea/static/data/comparea.geo.json"))
    wiki_url_prefix = 'http://en.wikipedia.org/wiki/'

    for feature in gj['features']:
        key = feature['id']
        props = feature['properties']
        url = props['wikipedia_url']
        title = freebase.wiki_url_to_title(url)
        if not title:
            sys.stderr.write('ERROR %s has invalid wiki URL: %s\n' %
                             (title, url))
            continue

        try:
            d = freebase_api.get_topic_json(title)
        except IOError:
            sys.stderr.write('ERROR unable to fetch %s\n' % title)
            continue

        if 'error' in d:
            sys.stderr.write('ERROR unable to fetch %s\n' % title)
            continue

        md = extract_freebase_metadata(key, title, d)
        try:
            md.update(cia.get_country_data(key))
        except KeyError:
            pass  # no CIA data
        try:
            md.update(spreadsheet.get_feature_data(key))
        except KeyError:
            pass  # no manual data
        output[key] = md

    print(json.dumps(output, indent=2, sort_keys=True))
Exemplo n.º 2
0
 def setUp(self):
     self.freebase = freebase.Freebase(api_key=1234, use_cache=False)
Exemplo n.º 3
0
def main(args):
    assert len(args) == 2
    csv_reader = csv.reader(open(sys.argv[1], 'rb'))
    fb = freebase.Freebase()
    osm_fetcher = osm.OSM()

    for idx, row in enumerate(csv_reader):
        osm_type, osm_id, wiki_title, name = row
        if not is_english_wikititle(wiki_title):
            continue
        if wiki_title.startswith("en:"):
            wiki_title = wiki_title[3:]

        path = os.path.join(osm_fetcher.cache_dir,
                            '%s%s.xml.json' % (osm_type, osm_id))
        try:
            d = json.load(file(path))
        except (ValueError, IOError):
            continue

        props = get_feature_props(d, osm_type, osm_id)
        if not props:
            continue

        if not has_polygon(d):
            continue
        freebase_data = fb._get_from_cache(wiki_title)
        if not freebase_data:
            continue

        try:
            total_languages = len(
                freebase_data['property']
                ['/common/topic/topic_equivalent_webpage']['values'])
        except KeyError:
            total_languages = 0

        pass_condition = None
        if props.get('leisure') == 'park':
            if total_languages >= 10:
                pass_condition = ['park', str(total_languages)]
        admin_level = props.get('admin_level')
        if admin_level:
            admin_level = int(admin_level)
            if admin_level <= 4:
                continue  # no states, countries -- other sources cover these.
            # Counties are generally not that interesting, with a few
            # exceptions like NYC boroughs and Los Angeles.
            if 'County' in name:
                if name not in COUNTY_WHITELIST:
                    continue

            d = fetch_metadata.extract_freebase_metadata(
                '', wiki_title, freebase_data)
            if d.get('population', 0) >= 100000:
                pass_condition = [
                    'admin_pop',
                    str(admin_level),
                    str(d.get('population'))
                ]

        if pass_condition:
            print '\t'.join(row + pass_condition)
Exemplo n.º 4
0
    osm_type,osm_id,wikipedia_title,name

This doesn't output anything, it just populates the local freebase cache.
'''

import csv
import sys
import time
import urllib2
import os

from data import freebase

if __name__ == '__main__':
    csv = csv.reader(open(sys.argv[1], 'rb'))
    fb = freebase.Freebase()

    for idx, row in enumerate(csv):
        osm_type, osm_id, wiki_title, name = row[:4]
        if wiki_title.startswith("en:"):
            wiki_title = wiki_title[3:]
        if '://' in wiki_title:
            continue

        try:
            fb.get_topic_json(wiki_title)
        except urllib2.HTTPError:
            sys.stderr.write("Failed to fetch fb data for %s\n" % wiki_title)
            pass
        except:
            sys.stderr.write("Other error on %s\n" % wiki_title)
Exemplo n.º 5
0
def main(args):
    osm_fetcher = osm.OSM()
    fb = freebase.Freebase()
    features_out = []
    for line in fileinput.input():
        osm_type, osm_id, wiki_title, name = line.strip().split('\t')[:4]
        if wiki_title.startswith("en:"):
            wiki_title = wiki_title[3:]

        # GeoJSON data.
        path = os.path.join(osm_fetcher.cache_dir,
                            '%s%s.xml.json' % (osm_type, osm_id))
        try:
            d = json.load(file(path))
        except (ValueError, IOError):
            continue

        props = osm_filter.get_feature_props(d, osm_type, osm_id)
        if not props:
            continue
        freebase_data = fb._get_from_cache(wiki_title)
        assert (freebase_data)

        freebase_extract = fetch_metadata.extract_freebase_metadata(
            '', wiki_title, freebase_data)

        #land_json = path.replace('.xml.json', '.xml.land.simple.json')
        land_json = path.replace('.xml.json', '.xml.land.json')
        if os.path.exists(land_json):
            try:
                d = json.load(file(land_json))
            except (ValueError, IOError):
                continue
        else:
            sys.stderr.write('Could not find %s\n' % land_json)

        d['id'] = make_comparea_id(osm_type, osm_id)
        props = {
            'population': 0,
            'population_date': '',
            'population_source': '',
            'population_source_url': '',
            'area_km2_source_url': '#'
        }
        props.update(
            fetch_metadata.extract_freebase_metadata('', wiki_title,
                                                     freebase_data))
        if 'area_km2' in props: del props['area_km2']
        props.update({
            'name': wiki_title.replace('_', ' '),
        })

        only_polygons.remove_non_polygons(d)
        geojson_util.make_polygons_clockwise(d)

        apply_monkey_patches(d)
        area_km2 = geojson_util.get_area_of_feature(d) / 1e6
        if area_km2 == 0:
            sys.stderr.write('Discarding %s (no area)\n' % wiki_title)
            continue
        c_lon, c_lat = geojson_util.centroid_of_feature(d)
        props.update({
            'area_km2': area_km2,
            'area_km2_source': 'calculated',
            'centroid_lon': c_lon,
            'centroid_lat': c_lat
        })
        d['properties'] = props
        adjust_name(d, freebase_data)

        features_out.append(d)

    geojson_out = {'type': 'FeatureCollection', 'features': features_out}
    print json.dumps(geojson_out, indent=2)