def calculate_distances(self, location_list): ''' We allow users to look for tickets that are within x miles of their location. Every user and every ticket will have an associated Location record, and every Location record has a latitude and a longitude. This function calculates the distance between every location record in the system and indexes them based on whether they are less than the distance defined in TICKET_DISTANCE_CHOICES. ''' distance_dict = {} # The index is included so that we don't do every comparison twice. Obviously the distance between x and y is # the same distance between y and x. for index, location1 in enumerate(location_list): for location2 in location_list[index + 1:]: try: distance = calculate_distance_between_zip_codes(location1, location2) except ValueError: return for radius in TICKET_DISTANCE_CHOICES: if distance <= radius: if location1.zip_code in distance_dict: if radius in distance_dict[location1.zip_code]: distance_dict[location1.zip_code][radius].append(location2.zip_code) else: distance_dict[location1.zip_code] = {radius: [location2.zip_code]} else: distance_dict[location1.zip_code] = {radius: [location2.zip_code]} if location2.zip_code in distance_dict: if radius in distance_dict[location2.zip_code]: distance_dict[location2.zip_code][radius].append(location1.zip_code) else: distance_dict[location2.zip_code] = {radius: [location1.zip_code]} else: distance_dict[location2.zip_code] = {radius: [location1.zip_code]} logging.debug('Evaluating zip code {} with {}'.format(location1.zip_code, location2.zip_code)) self.write_json_file(distance_dict, self.output_file)
def read_zip_codes(self): location_list = [] alias_list = [] same_city_state = [] # Note that these shadow the imported Alias and Location objects form within this function scope! Location = namedtuple('Location', ['line_no', 'zip_code', 'latitude', 'longitude', 'primary_city', 'state', 'estimated_population', 'timezone']) Alias = namedtuple('Alias', ['zip_code', 'name', 'state', 'estimated_population']) city_state_combos = {} try: reader = csv.reader(open(self.input_file)) # If the file isn't available locally, get it from s3 except: bucket, key = self.open_s3() key.key = 'static_root/locations/csv/zip_code_database.csv' # Handle everything in memory csv_file = key.get_contents_as_string().decode("utf-8") #get_contents_as_string() is actually returning a bytes object reader = csv.reader(StringIO(csv_file)) # csv.reader cannot take a string! It needs a file object for (zip_code, mail_type, primary_city, aliases, unacceptable_cities, state, county, timezone, area_codes, latitude, longitude, world_region, country, decommissioned, estimated_population, notes) in reader: # Skip the line containing the column definitions if reader.line_num == 1: continue # Do not handle any zip codes outside of the US if country != 'US': logging.debug('line #{}: country equal to {}'.format(reader.line_num, country)) continue # Do not handle military zip codes if mail_type == 'MILITARY': logging.debug('line #{}: country equal to {}'.format(reader.line_num, mail_type)) continue if state not in states_abbreviation_list: logging.debug('line #{}: state equal to {}'.format(reader.line_num, state)) continue else: # All strings in the database are lowercase. Let's work with lowercase from the beginning. state = state.lower() if latitude: latitude = float(latitude) else: logging.error('line #{}: Latitude not present.'.format(reader.line_num)) if longitude: longitude = float(longitude) else: logging.error('line #{}: longitude not present.'.format(reader.line_num)) if not zip_code: logging.error('line #{}: zipcode not present.'.format(reader.line_num)) if primary_city: # All strings in the database are lowercase. Let's work with lowercase from the beginning. primary_city = primary_city.strip().lower() else: logging.error('line #{}: primary_city not present.'.format(reader.line_num)) if not county: logging.debug('line #{}: county not present.'.format(reader.line_num)) if not timezone: logging.error('line #{}: timezone not present for zipcode {}'.format(reader.line_num, zip_code)) continue if estimated_population: estimated_population = int(estimated_population) else: logging.debug('line #{}: estimated populated not present. Marking as 0'.format(reader.line_num)) estimated_population = 0 if aliases: aliases = [city.strip().lower() for city in aliases.split(',')] for name in aliases: alias = Alias(zip_code, name, state, estimated_population=estimated_population) alias_list.append(alias) location = Location(reader.line_num, zip_code, latitude, longitude, primary_city, state, estimated_population, timezone) location_list.append(location) if not (primary_city, state) in city_state_combos: city_state_combos[(primary_city, state)] = location else: location1 = city_state_combos[(primary_city, state)] location2 = Location(line_no=reader.line_num, latitude=latitude, longitude=longitude, primary_city=primary_city, state=state, zip_code=zip_code, estimated_population=estimated_population, timezone=timezone) if (location1.latitude, location1.longitude) != (location2.latitude, location2.longitude): distance_apart = calculate_distance_between_zip_codes(location1, location2) if distance_apart > 10: same_city_state.append((distance_apart, location1, location2)) if same_city_state: same_city_state = sorted(same_city_state, key=lambda loc: loc[0]) for match in same_city_state: logging.critical('{distance:.2f}\n {loc1}\n {loc2}\n\n'.format(distance=match[0], loc1=match[1], loc2=match[2])) return location_list, alias_list