def test_address_normalization_nbsp(self): # Since beautiful soup keeps in the output, it screws up google # so it should be removed test = u'WEST HALL ARCH \xa0 1085 SOUTH UNIVERSITY' actual = parser.normalize_address(test) expected = u'1085 SOUTH UNIVERSITY, Ann Arbor, MI' self.assertEqual(expected, actual)
with open(file, 'r') as f: data.extend(parser.parse_page(f.read())) with open('.database', 'r') as f: connection = sqlite3.connect(f.read().strip()) # Keep track of requests made, if we have made 2000 - stop because google will # start denying at 2500 reqs = 0 # Sort and group by location so we can reduce the number of requests made to # the database and insert in bulk data = sorted(data, key=lambda x: x[2]) for location, group in groupby(data, key=lambda x: x[2]): group = list(group) location = parser.normalize_address(location) print location, len(group) # Test to see if the location is already in the database, if it is use that # data, else make a request to google and save the response query = 'SELECT Latitude, Longitude FROM Locations WHERE Location = ?' cur = connection.execute(query, (location, )) rv = cur.fetchall() if rv: lat, lng = float(rv[0][0]), float(rv[0][1]) else: url = 'http://maps.googleapis.com/maps/api/geocode/json?sensor=false' r = requests.get(url, params={'address': location}) results = r.json()['results'] reqs += 1 print "reqs: ", reqs
def test_empty_address(self): test = u'' actual = parser.normalize_address(test) expected = 'Ann Arbor, MI' self.assertEqual(expected, actual)
def test_address_normalization(self): test = u'1300 BLOCK CATHERINE STREET' actual = parser.normalize_address(test) expected = u'1300 BLOCK CATHERINE STREET, Ann Arbor, MI' self.assertEqual(expected, actual)
with open(file, 'r') as f: data.extend(parser.parse_page(f.read())) with open('.database', 'r') as f: connection = sqlite3.connect(f.read().strip()) # Keep track of requests made, if we have made 2000 - stop because google will # start denying at 2500 reqs = 0 # Sort and group by location so we can reduce the number of requests made to # the database and insert in bulk data = sorted(data, key=lambda x: x[2]) for location, group in groupby(data, key=lambda x: x[2]): group = list(group) location = parser.normalize_address(location) print location, len(group) # Test to see if the location is already in the database, if it is use that # data, else make a request to google and save the response query = 'SELECT Latitude, Longitude FROM Locations WHERE Location = ?' cur = connection.execute(query, (location,)) rv = cur.fetchall() if rv: lat, lng = float(rv[0][0]), float(rv[0][1]) else: url = 'http://maps.googleapis.com/maps/api/geocode/json?sensor=false' r = requests.get(url, params={'address': location}) results = r.json()['results'] reqs += 1 print "reqs: ", reqs