def geolocate(tag_sentence, username): """ Function to pull location information from a sentence. The location is then passed to the `geonames_api` to obtain latitude and longitudes for each event. Inputs ------ tag_sentence : sentence with POS tags as generated by the NLTK function `pos_tag()`. List of tuples. username : username for geonames.org. String. Returns ------- lat : latitude coordinate lon : longitude coordinate """ #TODO: What about two word cities? Baton Rouge, New Orleans, etc. #Create bigrams bigrams = nk.bigrams(tag_sentence) loc = None #Words that indicate a location keep = ['in', 'to', 'from'] #Select words from the bigram where the first word is 'to' or 'in' #and the second word has a proper noun tag. for (w1, t1), (w2, t2) in bigrams: if (w1 in keep) and (t2 == 'NNP'): loc = w2 #If the above didn't work, try trigrams if not loc: trigrams = nk.trigrams(tag_sentence) for (w1, t1), (w2, t2), (w3, t3) in trigrams: if (t1.startswith('N') and t2 == 'IN' and t3 == 'NNP'): loc = w3 #If it found a location if loc: #Create parameters to pass to the geonames_api loc = nk.stem.PorterStemmer().stem(loc) params = geonames_api.make_params({'q': loc}) #Try to obtain coordinates from geonames try: lat, lon = geonames_api.get_lat_lon(params, username) return lat, lon #but if something went wrong, return 'NA' for lat, lon except IndexError: lat, lon = 'NA', 'NA' return lat, lon #If a location hasn't been found, return 'NA' for lat, lon if not loc: lat, lon = 'NA', 'NA' return lat, lon
def geolocate(trigrams, username): """ Function to pull location information from a sentence. The location is then passed to the `geonames_api` to obtain latitude and longitudes for each event. Parameters ------ username: String. Username for geonames.org. Returns ------- lat: String. latitude coordinate lon: String. longitude coordinate """ #TODO: What about two word cities? Baton Rouge, New Orleans, etc. #Create bigrams loc = None #Words that indicate a location keep = ['in', 'to', 'from'] #Select words from the bigram where the first word is 'to' or 'in' #and the second word has a proper noun tag. for (w1, t1), (w2, t2), (w3, t3) in trigrams: if (w1 in keep) and (t2 == 'NNP'): loc = w2 elif (t1.startswith('N') and w2 in keep and t3 == 'NNP'): loc = w3 #If it found a location if loc: #Create parameters to pass to the geonames_api loc = nltk.stem.PorterStemmer().stem(loc) params = geonames_api.make_params({'q': loc}) #Try to obtain coordinates from geonames try: lat, lon = geonames_api.get_lat_lon(params, username) return lat, lon #but if something went wrong, return 'NA' for lat, lon except IndexError: lat, lon = 'NA', 'NA' return lat, lon #If a location hasn't been found, return 'NA' for lat, lon if not loc: lat, lon = 'NA', 'NA' return lat, lon