Exemplo n.º 1
0
def import_into_xapian():
    client = Client(settings.XAPIAN_BASE_URL, settings.XAPIAN_SPECIES_DB)
    client.newdb([{
        'field_name': 'common_name',
        'store': True,
        'freetext': {'language': 'en'} # language used for stemming
    }, {
        'field_name': 'scientific_name',
        'store': True,
        'freetext': {'language': 'en'} # Remove when stemming bug is fixed
    }, {
        'field_name': 'freebase_id',
        'store': True,
        'freetext': {'language': 'en'}
    }], overwrite=True) # replaces existing index if there is one
    # We have a database!
    
    # Now we create documents
    queue = []
    count = 0
    for row in import_from_file():
        if not row['scientific_name']:
            continue
        count += 1
        doc = Document()
        # doc.id = 'X' will over-ride auto ID /AND/ cause replace if exists
        doc.extend([
            ('common_name', row['name']),
            ('scientific_name', row['scientific_name']),
            ('freebase_id', row['id']),
        ])
        # client.add(doc) - would work here
        queue.append(doc)
        if len(queue) >= 1000:
            client.bulkadd(queue)
            queue = []
            print "Imported %d" % count
    # Catch the remainder
    if queue:
        client.bulkadd(queue)
Exemplo n.º 2
0
def search_split(request, what, near):
    if near.lower() == 'me':
        (current_location, (lat, lon)) = location_from_request(request)
        if current_location:
            near = current_location
    # Look up location using Google geocoder (more likely to return results)
    name, (lat, lon) = google_geocode(near)
    if not name:
        # Treat as a lat/long pair instead, see if that works
        c = Client(settings.XAPIAN_BASE_URL, settings.XAPIAN_PERSONAL_PREFIX)
        result = c.parse_latlong(near)
        if result['ok']:
            lat, lon = result['latitude'], result['longitude']
        else:
            return search_single(
                request, '%s near %s' % (what, near), bypass=True
            )
    
    results, results_info, results_corrected_q = search_places(
        what or SEARCH_ALL, details=True, latlon=(lat, lon), num=PER_PAGE
    )
    species_results, species_results_info, species_results_corrected_q = \
        search_known_species(
            what, details=True, default_op=Query.OP_OR,
    )
    users_results, users_results_info, users_results_corrected_q = \
        search_users(
            what, details=True, default_op=Query.OP_OR,
    )
    
    results = [
        place for place in results if not place.is_unlisted
    ]
    
    for result in results:
        result.species_list = result.get_species()
        for species in result.species_list:
            if species in species_results:
                species.matches_search = True
        # If we got back a distance, bung that on there too
        try:
            result.distance = ([
                d for d in results_info['items'] 
                if d['id'] == 'places.Place:%s' % result.id
            ][0]['geo_distance']['latlon'] / 1609.344)
        except (KeyError, IndexError):
            pass
    
    return render(request, 'search/search_split.html', {
        'what': what,
        'near': near,
        'location_name': name,
        'results': results,
        'results_info': pformat(results_info),
        'results_corrected_q': results_corrected_q,
        'species_results': (species_results or [])[:5],
        'species_results_info': pformat(species_results_info),
        'species_results_corrected_q': species_results_corrected_q,
        'species_results_more': len(species_results or [])>5,
        'users_results': users_results,
        'users_results_info': pformat(users_results_info),
        'users_results_corrected_q': users_results_corrected_q,
    })
Exemplo n.º 3
0
def import_into_xapian():
    client = Client(
        settings.XAPIAN_BASE_URL, settings.XAPIAN_LOCATION_DB
    )
    try:
        client.deldb()
    except: # BAD: Naked except
        pass
    client.newdb([{
        'field_name': 'place_name',
        'store': True,
        'freetext': {'language': 'en'} # language used for stemming
    }, {
        'field_name': 'county', # Maps to admin_name2
        'store': True,
        'freetext': {}
    }, {
        'field_name': 'country_code',
        'store': True,
        'freetext': {} # TODO: Use exact match here, not yet implemented
    }, {
        'field_name': 'postal_code',
        'store': True,
        'freetext': {} # TODO: Can we do prefix search only?
    }, {
        'field_name': 'description',
        'store': True, # stored but not indexed
    }, {
        'field_name': 'latlon',
        'store': True,
        'type': 'geo',
        'geo': {}, # no options yet
    }])
    # We have a database!
    
    # We throw away anything that results in a description that we have 
    # already used for something else. There are only 213 (out of 27,000)
    # where a duplicate description has more than one lat/lon pair - so 
    # we've chosen to just discard those.
    seen_descriptions = set()
    
    # Now we create documents
    queue = []
    count = 0
    for row in import_from_file():
        # Some (3) of them don't have lat or lon - ignore those
        if not (row['latitude'] and row['longitude']):
            continue
        description = make_description(row)
        if description in seen_descriptions:
            continue
        seen_descriptions.add(description)
        count += 1
        doc = Document()
        # doc.id = 'X' will over-ride auto ID /AND/ cause replace if exists
        doc.extend([
            ('place_name', row['place_name']),
            ('county', row['admin_name2']),
            ('postal_code', row['postal_code']),
            ('country_code', row['country_code']),
            ('description', description),
            ('latlon', '%s %s' % (
                row['latitude'], row['longitude'],
            )),
            # TODO: Ignoring accuracy field for the moment
        ])
        # client.add(doc) - would work here
        queue.append(doc)
        if len(queue) >= 1000:
            client.bulkadd(queue)
            queue = []
            print "Imported %d" % count
    # Catch the remainder
    if queue:
        client.bulkadd(queue)