def test_get_or_create_location_creates_new_locations(): """If no location is found, we create a new one""" ref = mommy.make( 'references.RefCountryCode', country_code='USA', _fill_optional=True) row = dict( vendorcountrycode='USA', zipcode='12345-6789', streetaddress='Addy1', streetaddress2='Addy2', streetaddress3=None, vendor_state_code='ST', city='My Town') # this canonicalization step runs during load_submission, also row = canonicalize_location_dict(row) # can't find it because we're looking at the US fields assert Location.objects.count() == 0 helpers.get_or_create_location( row, load_usaspending_contracts.location_mapper_vendor) assert Location.objects.count() == 1 loc = Location.objects.all().first() assert loc.location_country_code == ref assert loc.zip5 == '12345' assert loc.zip_last4 == '6789' assert loc.address_line1 == 'ADDY1' assert loc.address_line2 == 'ADDY2' assert loc.address_line3 is None assert loc.state_code == 'ST' assert loc.city_name == 'MY TOWN'
def get_or_create_location(row, mapper): location_dict = mapper(row) country_code = fetch_country_code(location_dict["location_country_code"]) location_dict["location_country_code"] = country_code # Country-specific adjustments if country_code and country_code.country_code == "USA": location_dict.update(zip5=location_dict["location_zip"][:5], zip_last4=location_dict["location_zip"][5:]) location_dict.pop("location_zip") else: location_dict.update( foreign_postal_code=location_dict.pop("location_zip", None), foreign_province=location_dict.pop("state_code", None)) if "city_name" in location_dict: location_dict['foreign_city_name'] = location_dict.pop("city_name") location_dict = canonicalize_location_dict(location_dict) location_tup = tuple(location_dict.items()) location = location_cache.get(location_tup) if location: return location location = Location.objects.filter(**location_dict).first() if not location: location = Location.objects.create(**location_dict) location_cache.set(location_tup, location) return location
def test_get_or_create_location_creates_new_locations(): """If no location is found, we create a new one""" row = dict( vendorcountrycode='USA', zipcode='12345-6789', streetaddress='Addy1', streetaddress2='Addy2', streetaddress3=None, vendor_state_code='ST', city='My Town') # this canonicalization step runs during load_submission, also row = canonicalize_location_dict(row) # can't find it because we're looking at the US fields assert Location.objects.count() == 0 helpers.get_or_create_location( row, load_usaspending_contracts.location_mapper_vendor) assert Location.objects.count() == 1 loc = Location.objects.all().first() assert loc.location_country_code == 'USA' assert loc.zip5 == '12345' assert loc.zip_last4 == '6789' assert loc.address_line1 == 'ADDY1' assert loc.address_line2 == 'ADDY2' assert loc.address_line3 is None assert loc.state_code == 'ST' assert loc.city_name == 'MY TOWN'
def create_location(location_map, row, location_value_map=None): """ Create a location object Input parameters: - location_map: a dictionary with key = field name on the location model and value = corresponding field name on the current row of data - row: the row of data currently being loaded """ if location_value_map is None: location_value_map = {} row = canonicalize_location_dict(row) location_data = load_data_into_model( Location(), row, value_map=location_value_map, field_map=location_map, as_dict=True, save=False) return Location.objects.create(**location_data)
def get_or_create_location(location_map, row, location_value_map={}): """ Retrieve or create a location object Input parameters: - location_map: a dictionary with key = field name on the location model and value = corresponding field name on the current row of data - row: the row of data currently being loaded """ location_country = RefCountryCode.objects.filter( country_code=row[location_map.get('location_country_code')]).first() # temporary fix until broker is patched: remove later state_code = row.get(location_map.get('state_code')) if state_code is not None: # Fix for procurement data foreign provinces stored as state_code if location_country and location_country.country_code != "USA": location_value_map.update({'foreign_province': state_code}) location_value_map.update({'state_code': None}) else: location_value_map.update( {'state_code': state_code.replace('.', '')}) # end of temporary fix if location_country: location_value_map.update({ 'location_country_code': location_country, 'country_name': location_country.country_name }) else: # no country found for this code location_value_map.update({ 'location_country_code': None, 'country_name': None }) row = canonicalize_location_dict(row) location_data = load_data_into_model(Location(), row, value_map=location_value_map, field_map=location_map, as_dict=True) del location_data[ 'data_source'] # hacky way to ensure we don't create a series of empty location records if len(location_data): try: location_object, created = Location.objects.get_or_create( **location_data, defaults={'data_source': 'DBR'}) except MultipleObjectsReturned: # incoming location data is so sparse that comparing it to existing locations # yielded multiple records. create a new location with this limited info. # note: this will need fixed up to prevent duplicate location records with the # same sparse data location_object = Location.objects.create(**location_data) created = True return location_object, created else: # record had no location information at all return None, None
def test_canonicalize_location_dict(): assert h.canonicalize_location_dict(raw) == desired
def load_locations(self, fabs_broker_data, total_rows, pop_flag=False): start_time = datetime.now() for index, row in enumerate(fabs_broker_data, 1): if not (index % 10000): logger.info('Locations: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) if pop_flag: location_value_map = {"place_of_performance_flag": True} field_map = pop_field_map else: location_value_map = {'recipient_flag': True} field_map = le_field_map row = canonicalize_location_dict(row) country_code = row[field_map.get('location_country_code')] pop_code = row[field_map.get( 'performance_code')] if pop_flag else None # We can assume that if the country code is blank and the place of performance code is NOT '00FORGN', then # the country code is USA if pop_flag and not country_code and pop_code != '00FORGN': row[field_map.get('location_country_code')] = 'USA' # Get country code obj location_country_code_obj = self.country_code_map.get( row[field_map.get('location_country_code')]) # Fix state code periods state_code = row.get(field_map.get('state_code')) if state_code is not None: location_value_map.update( {'state_code': state_code.replace('.', '')}) if location_country_code_obj: location_value_map.update({ 'location_country_code': location_country_code_obj, 'country_name': location_country_code_obj.country_name }) if location_country_code_obj.country_code != 'USA': location_value_map.update({ 'state_code': None, 'state_name': None }) else: # no country found for this code location_value_map.update({ 'location_country_code': None, 'country_name': None }) location_instance_data = load_data_into_model( Location(), row, value_map=location_value_map, field_map=field_map, as_dict=True) loc_instance = Location(**location_instance_data) loc_instance.load_city_county_data() loc_instance.fill_missing_state_data() loc_instance.fill_missing_zip5() if pop_flag: pop_bulk.append(loc_instance) else: lel_bulk.append(loc_instance) if pop_flag: logger.info( 'Bulk creating POP Locations (batch_size: {})...'.format( BATCH_SIZE)) Location.objects.bulk_create(pop_bulk, batch_size=BATCH_SIZE) else: logger.info( 'Bulk creating LE Locations (batch_size: {})...'.format( BATCH_SIZE)) Location.objects.bulk_create(lel_bulk, batch_size=BATCH_SIZE)
def get_or_create_location(location_map, row, location_value_map=None, empty_location=None, d_file=False, save=True): """ Retrieve or create a location object Input parameters: - location_map: a dictionary with key = field name on the location model and value = corresponding field name on the current row of data - row: the row of data currently being loaded """ if location_value_map is None: location_value_map = {} row = canonicalize_location_dict(row) # For only FABS if "place_of_performance_code" in row: # If the recipient's location country code is empty or it's 'UNITED STATES # OR the place of performance location country code is empty and the performance code isn't 00FORGN # OR the place of performance location country code is empty and there isn't a performance code # OR the country code is a US territory # THEN we can assume that the location country code is 'USA' if ('recipient_flag' in location_value_map and location_value_map['recipient_flag'] and (row[location_map.get('location_country_code')] is None or row[location_map.get('location_country_code')] == 'UNITED STATES')) or \ ('place_of_performance_flag' in location_value_map and location_value_map['place_of_performance_flag'] and row[location_map.get('location_country_code')] is None and "performance_code" in location_map and row[location_map["performance_code"]] != '00FORGN') or \ ('place_of_performance_flag' in location_value_map and location_value_map['place_of_performance_flag'] and row[location_map.get('location_country_code')] is None and "performance_code" not in location_map) or \ (row[location_map.get('location_country_code')] in territory_country_codes): row[location_map["location_country_code"]] = 'USA' state_code = row.get(location_map.get('state_code')) if state_code is not None: # Remove . in state names (i.e. D.C.) location_value_map.update({'state_code': state_code.replace('.', '')}) location_value_map.update({ 'location_country_code': location_map.get('location_country_code'), 'country_name': location_map.get('location_country_name'), 'state_code': None, # expired 'state_name': None, }) location_data = load_data_into_model(Location(), row, value_map=location_value_map, field_map=location_map, as_dict=True) del location_data[ 'data_source'] # hacky way to ensure we don't create a series of empty location records if len(location_data): if len(location_data) == 1 and "place_of_performance_flag" in location_data and\ location_data["place_of_performance_flag"]: location_object = None created = False elif save: location_object = load_data_into_model( Location(), row, value_map=location_value_map, field_map=location_map, as_dict=False, save=True) created = False else: location_object = load_data_into_model( Location(), row, value_map=location_value_map, field_map=location_map, as_dict=False) # location_object = Location.objects.create(**location_data) created = True return location_object, created else: # record had no location information at all return None, None
def load_locations(self, fpds_broker_data, total_rows, pop_flag=False): start_time = datetime.now() for index, row in enumerate(fpds_broker_data, 1): if not (index % 10000): logger.info('Locations: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) if pop_flag: location_value_map = {"place_of_performance_flag": True} field_map = pop_field_map else: location_value_map = {'recipient_flag': True} field_map = le_field_map row = canonicalize_location_dict(row) # THIS ASSUMPTION DOES NOT HOLD FOR FPDS SINCE IT DOES NOT HAVE A PLACE OF PERFORMANCE CODE # We can assume that if the country code is blank and the place of performance code is NOT '00FORGN', then # the country code is USA # if pop_flag and not country_code and pop_code != '00FORGN': # row[field_map.get('location_country_code')] = 'USA' # Get country code obj location_country_code_obj = self.country_code_map.get(row[field_map.get('location_country_code')]) # Fix state code periods state_code = row.get(field_map.get('state_code')) if state_code is not None: location_value_map.update({'state_code': state_code.replace('.', '')}) if location_country_code_obj: location_value_map.update({ 'location_country_code': location_country_code_obj, 'country_name': location_country_code_obj.country_name }) if location_country_code_obj.country_code != 'USA': location_value_map.update({ 'state_code': None, 'state_name': None }) else: # no country found for this code location_value_map.update({ 'location_country_code': None, 'country_name': None }) location_instance_data = load_data_into_model( Location(), row, value_map=location_value_map, field_map=field_map, as_dict=True) loc_instance = Location(**location_instance_data) loc_instance.load_city_county_data() loc_instance.fill_missing_state_data() loc_instance.fill_missing_zip5() if pop_flag: pop_bulk.append(loc_instance) else: lel_bulk.append(loc_instance) if pop_flag: logger.info('Bulk creating POP Locations (batch_size: {})...'.format(BATCH_SIZE)) Location.objects.bulk_create(pop_bulk, batch_size=BATCH_SIZE) else: logger.info('Bulk creating LE Locations (batch_size: {})...'.format(BATCH_SIZE)) Location.objects.bulk_create(lel_bulk, batch_size=BATCH_SIZE)
def get_or_create_location(location_map, row, location_value_map=None, empty_location=None, d_file=False, save=True): """ Retrieve or create a location object Input parameters: - location_map: a dictionary with key = field name on the location model and value = corresponding field name on the current row of data - row: the row of data currently being loaded """ if location_value_map is None: location_value_map = {} row = canonicalize_location_dict(row) # For only FABS if "place_of_performance_code" in row: # If the recipient's location country code is empty or it's 'UNITED STATES # OR the place of performance location country code is empty and the performance code isn't 00FORGN # OR the place of performance location country code is empty and there isn't a performance code # OR the country code is a US territory # THEN we can assume that the location country code is 'USA' if ('recipient_flag' in location_value_map and location_value_map['recipient_flag'] and (row[location_map.get('location_country_code')] is None or row[location_map.get('location_country_code')] == 'UNITED STATES')) or \ ('place_of_performance_flag' in location_value_map and location_value_map['place_of_performance_flag'] and row[location_map.get('location_country_code')] is None and "performance_code" in location_map and row[location_map["performance_code"]] != '00FORGN') or \ ('place_of_performance_flag' in location_value_map and location_value_map['place_of_performance_flag'] and row[location_map.get('location_country_code')] is None and "performance_code" not in location_map) or \ (row[location_map.get('location_country_code')] in territory_country_codes): row[location_map["location_country_code"]] = 'USA' state_code = row.get(location_map.get('state_code')) if state_code is not None: # Remove . in state names (i.e. D.C.) location_value_map.update({'state_code': state_code.replace('.', '')}) location_value_map.update({ 'location_country_code': location_map.get('location_country_code'), 'country_name': location_map.get('location_country_name'), 'state_code': None, # expired 'state_name': None, }) location_data = load_data_into_model( Location(), row, value_map=location_value_map, field_map=location_map, as_dict=True) del location_data['data_source'] # hacky way to ensure we don't create a series of empty location records if len(location_data): if len(location_data) == 1 and "place_of_performance_flag" in location_data and\ location_data["place_of_performance_flag"]: location_object = None created = False elif save: location_object = load_data_into_model(Location(), row, value_map=location_value_map, field_map=location_map, as_dict=False, save=True) created = False else: location_object = load_data_into_model(Location(), row, value_map=location_value_map, field_map=location_map, as_dict=False) # location_object = Location.objects.create(**location_data) created = True return location_object, created else: # record had no location information at all return None, None