Пример #1
0
def text_to_location(address_text, seed_location=None, allow_numberless=True):
    '''
    Attempts to resolve a raw text string into a Location using the Google
    Geocoding API.

    If Location object is provided as seed_location Location, the completed
    fields from it are used as hints to help resolving. These hints are more
    forgiving than those used in the Resolve API. Note that the Location's 
    address field will have no bearing on the results.

    If allow_numberless is False, the resolved location must be one more 
    specific that a neighborhood.

    Returns None if resolution was not possible.
    '''
    # if given a seed location, this is implemented by copying over the
    # seed location and inserting the given raw text into the Location's
    # field
    if seed_location:
        location = copy.deepcopy(seed_location)
    else:
        location = Location()
    location.address = address_text

    return resolve_location(location, allow_numberless)
Пример #2
0
def text_to_location(address_text,seed_location=None,allow_numberless=True):
    '''
    Attempts to resolve a raw text string into a Location using the Google
    Geocoding API.

    If Location object is provided as seed_location Location, the completed
    fields from it are used as hints to help resolving. These hints are more
    forgiving than those used in the Resolve API. Note that the Location's 
    address field will have no bearing on the results.

    If allow_numberless is False, the resolved location must be one more 
    specific that a neighborhood.

    Returns None if resolution was not possible.
    '''
    # if given a seed location, this is implemented by copying over the 
    # seed location and inserting the given raw text into the Location's 
    # field
    if seed_location:
        location = copy.deepcopy(seed_location)
    else:
        location = Location()
    location.address = address_text

    return resolve_location(location,allow_numberless)
Пример #3
0
def _geocode_result_to_location(result):
    coords = result.get_geocoding()
    return Location(address=result.get_street_address().strip(),
                    postcode=result.get_postalcode().strip(),
                    town=result.get_town().strip(),
                    state=result.get_state().strip(),
                    country=result.get_country().strip(),
                    latitude=coords[0],
                    longitude=coords[1])
Пример #4
0
def _resolve_result_to_place(result):
    '''
    Returns a tuple of Place, Factual GUID from a Resolve result
    '''
    resolved_loc = Location(country='US',
                            address=result.get('address', '').strip(),
                            town=result.get('locality', '').strip(),
                            state=result.get('region', '').strip(),
                            postcode=result.get('postcode', '').strip(),
                            latitude=result.get('latitude'),
                            longitude=result.get('longitude'))

    return Place(name=result.get('name'), location=resolved_loc)
Пример #5
0
def fbloc_to_loc(fbloc):
    '''
    Converts a dict of fields composing a Facebook location to a Location.
    '''
    state = fbloc.get('state', '').strip()
    # State entry is often full state name
    if len(state) != 2 and state != '':
        state = state_name_to_abbrev.get(state, '')

    return Location(address=fbloc.get('street', '').strip(),
                    town=fbloc.get('city', '').strip(),
                    state=state,
                    postcode=fbloc.get('postcode', '').strip(),
                    latitude=fbloc.get('latitude'),
                    longitude=fbloc.get('longitude'))
Пример #6
0
 def _seeded_resolve(name=None,address=None,postcode=None,town=None,state=None):
     if seed_location:
         l = copy.deepcopy(seed_location)
     else:
         l = Location()
     if name:        l.name = name
     if address:     l.address = address
     if postcode:    l.postcode = postcode
     if town:        l.town = town
     if state:       l.state = state
     return resolve_place(Place(name=name,location=l))
Пример #7
0
 def _seeded_resolve(name=None,
                     address=None,
                     postcode=None,
                     town=None,
                     state=None):
     if seed_location:
         l = copy.deepcopy(seed_location)
     else:
         l = Location()
     if name: l.name = name
     if address: l.address = address
     if postcode: l.postcode = postcode
     if town: l.town = town
     if state: l.state = state
     return resolve_place(Place(name=name, location=l))
Пример #8
0
def run():
    in_filename = os.path.join(os.path.dirname(__file__), 'obid.csv')

    #clear all tables
    Location.objects.all().delete()
    PlaceMeta.objects.all().delete()
    Place.objects.all().delete()
    Organization.objects.all().delete()
    ExternalPlaceSource.objects.all().delete()
    FacebookPage.objects.all().delete()
    FacebookOrgRecord.objects.all().delete()

    gplaces_category_map = load_category_map('google_places')
    gp_hits, gp_misses = 0, 0

    rows = OBIDRow.rows_from_csv(in_filename)

    # cycle through each row with a facebook reference and store a reference
    page_mgr = PageImportManager()
    fb_rows = [row for row in rows if row.fb_id]
    for row, info in zip(
            fb_rows, page_mgr.pull_page_info([row.fb_id for row in fb_rows])):
        if isinstance(info, dict):
            info.pop('metadata',
                     None)  # don't need to store metadata if it exists
            FacebookPage.objects.get_or_create(
                fb_id=info['id'],
                defaults=dict(pageinfo_json=json.dumps(info)))
            row.fb_id = info['id']  # ensure a numeric id
        else:
            print 'ERROR: Pulling fb page %s resulted in the following exception: "%s"' % (
                str(row.fb_id), str(info))
            row.fb_id = ''

    # cycle through all rows and store everything
    for i, row in enumerate(rows):
        if not row.place:
            print 'ERROR: no place for entry %d' % i

        # resolve the location
        location = resolve_location(
            Location(address=row.address, postcode='15213'))

        if location:
            # hack to get around Google Geocoding appending the unviersity onto all addresses
            if ( location.address.startswith('University') and not row.address.lower().startswith('univ') ) or \
               ( location.address.startswith('Carnegie Mellon') and row.address.lower().startswith('carnegie mellon') ):
                location.address = ','.join(location.address.split(',')[1:])

            try:
                # if exact match exists, use it instead of the newly found one
                location = Location.objects.get(address=location.address,
                                                postcode=location.postcode)
            except Location.DoesNotExist:
                location.save()
        else:
            print 'WARNING: Geocoding failed for entry %d ("%s")' % (i,
                                                                     row.place)

        diff_org = row.org != row.place
        org, place = None, None

        # import org
        # if the row has a fb id, we'll try to import the Org from Facebook
        # only import Org from Facebook if it's the same as the Place (fb id relates to place only)
        if row.fb_id and not diff_org:
            try:
                org = FacebookOrgRecord.objects.get(fb_id=row.fb_id)
            except FacebookOrgRecord.DoesNotExist:
                report = page_mgr.import_org(row.fb_id)
                if report.model_instance:
                    org = report.model_instance
                else:
                    print 'WARNING: Organization FB import failed for entry %d (fbid %s)' % (
                        i, str(row.fb_id))

        if not org:
            org, created = Organization.objects.get_or_create(name=row.org)

        # import place
        if row.fb_id:
            try:
                place = ExternalPlaceSource.facebook.get(uid=row.fb_id)
            except ExternalPlaceSource.DoesNotExist:
                report = page_mgr.import_place(row.fb_id, import_owners=False)
                if report.model_instance:
                    place = report.model_instance
                    if not place.owner:  # no owner is created automatically, so set it if not created
                        place.owner = org
                        place.save()
                else:
                    print 'WARNING: Place FB import failed for entry %d (fbid %s)' % (
                        i, str(row.fb_id))

        if not place:
            place, created = Place.objects.get_or_create(name=row.place,
                                                         location=location,
                                                         owner=org)

        if row.url:
            PlaceMeta.objects.create(place=place,
                                     meta_key='url',
                                     meta_value=row.url)
            if not diff_org:  # also save the url as the org's url if they're the same
                org.url = row.url
                org.save()

        if row.phone:
            PlaceMeta.objects.create(place=place,
                                     meta_key='phone',
                                     meta_value=row.phone)

        print 'Imported %s' % row.place
        try:
            print '  (linked to FB page %s)' % ExternalPlaceSource.facebook.get(
                place=place).uid
        except ExternalPlaceSource.DoesNotExist:
            pass

        # store tags from Google Place lookup
        if location and \
            location.latitude is not None and location.longitude is not None:
            coords = (location.latitude, location.longitude)
            radius = 1000
        else:
            coords = (40.4425, -79.9575)
            radius = 5000

        response = gplaces_client.search_request(coords,
                                                 radius,
                                                 keyword=row.place)

        if len(response) > 0 and 'reference' in response[0]:
            details = gplaces_client.details_request(response[0]['reference'])
            all_tags = set()
            for typ in details.get('types', []):
                if typ in gplaces_category_map:
                    all_tags.update(gplaces_category_map[typ])
                else:
                    print 'WARNING: Unknown Google Places type: "%s"' % typ
            if len(all_tags) > 0:
                print '  Tags:',
                for t in all_tags:
                    print '%s,' % t,
                print
            gp_hits += 1
        else:
            print '  WARNING: Failure querying Google Places for "%s" within %dm of (%f,%f)' % (
                row.place, radius, coords[0], coords[1])
            gp_misses += 1
    print gp_hits, gp_misses