def handle(self, *args, **options): print('Load cities') City.objects.all().delete() for item in cities: c = City() c.name = item c.save() print('Saving...%s' % item)
def proccess_file(filename): document = openpyxl.load_workbook(filename, data_only=True) sheet = document.get_sheet_by_name('Hoja 1') cities = [] for cont, row in enumerate(sheet.iter_rows(min_row=2)): cities.append(City( name=row[0].value.strip().capitalize(), code=row[0].value.strip() )) City.objects.bulk_create(cities)
def read_csv(source): #for reading unicode #f = codecs.open(source, 'r', encoding='utf-8') city_options = City.objects.filter(tag="ann_arbor") print len(city_options) if not len(city_options): city = City() city.name = "Ann Arbor" city.tag = to_tag(city.name) city.save() else: city = city_options[0] print city #TODO: #setup FeedInfo item #and also create a Source item permit_sub_types = [] status_types = [] building_nums = [] applicants = [] managers = [] cache_file = "%s.json" % city.tag cache_destination = os.path.join(os.path.dirname(source), cache_file) #keep a local copy of data we've processed... #this should help with subsequent calls #to make sure we don't need to duplicate calls to remote geolocation APIs: local_cache = load_json(cache_destination, create=True) if not local_cache.has_key('buildings'): local_cache['buildings'] = {} if not local_cache.has_key('parcels'): local_cache['parcels'] = {} locations = {} for key, value in local_cache['buildings'].items(): locations[key] = Location(value) #geocoder helper: geo = Geo() #with open('eggs.csv', 'rb') as csvfile: with codecs.open(source, 'rb', encoding='utf-8') as csvfile: #reader = csv.reader(csvfile, delimiter=' ', quotechar='|') reader = csv.reader(csvfile) #just print the first row: print '>, <'.join(reader.next()) count = 0 for row in reader: count += 1 #could exit out early here, if needed if count > 10: pass print row #type of building (eg: sf attached, duplex, etc) permit_id = row[0] #should always be "RENTAL" (don't need to track this one) permit_type = row[1] if not permit_type == "RENTAL" and not permit_type == "MECHANICAL": raise ValueError, "Unexpected permit type: %s in row: %s" % ( permit_type, row) sub_type = row[2] #can use this to filter out non-rental or obsolete entries #don't need to track otherwise: status = row[3] parcel_id = row[4] address = row[5] #should be fixed per source: city = row[6] if not ((city.lower() == 'ann arbor') or (city == '')): raise ValueError, "Unexpected city: %s" % (city) sqft = row[7] number_of_buildings = row[8] applicant_name = row[9] number_of_stories = row[10] number_of_units = row[11] if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']): #check if we've started processing any results for this row #if local_cache['buildings'].has_key(address.upper()): # local_cache_cur = local_cache['buildings'][address.upper()] #else: # local_cache_cur = {} if locations.has_key(address.upper()): location = locations[address.upper()] else: location = Location() #do some geocoding, as needed: search = "%s, Ann Arbor MI" % address.upper() for source in location.sources: geo.lookup(search, source, location) location.address_alt = search locations[address.upper()] = location #local_cache['buildings'][address.upper()] = local_cache_cur #and check if a previous building object in the db exists #CREATE A NEW BUILDING OBJECT HERE #cur_building = Building() bldg = Building() bldg.type = sub_type #back it up for later local_cache['buildings'] = {} for key, value in locations.items(): local_cache['buildings'][key] = value.to_dict() save_json(cache_destination, local_cache) #exit() #THE FOLLOWING ARE FOR INFORMATIONAL PURPOSES ONLY #(to see what data is available) if not status in status_types: #print "adding: %s" % sub_type status_types.append(status) if not sub_type in permit_sub_types: #print "adding: %s" % sub_type permit_sub_types.append(sub_type) building_num = row[8] if not building_num in building_nums: #print "adding: %s" % sub_type building_nums.append(building_num) applicant = row[9] if (re.search('MGMT', applicant) or re.search('REALTY', applicant) or re.search('PROPERTIES', applicant) or re.search('MANAGEMENT', applicant) or re.search('GROUP', applicant) or re.search('LLC', applicant) or re.search('L.L.C.', applicant) or re.search('INC', applicant)): if not applicant in managers: managers.append(applicant) else: if not applicant in applicants: applicants.append(applicant) #print ', '.join(row) #print ## print permit_sub_types print status_types print building_nums save_results(locations)
def read_csv(source): #for reading unicode #f = codecs.open(source, 'r', encoding='utf-8') city_options = City.objects.filter(tag="ann_arbor") print len(city_options) if not len(city_options): city = City() city.name = "Ann Arbor" city.tag = to_tag(city.name) city.save() else: city = city_options[0] print city #TODO: #setup FeedInfo item #and also create a Source item permit_sub_types = [] status_types = [] building_nums = [] applicants = [] managers = [] cache_file = "%s.json" % city.tag cache_destination = os.path.join(os.path.dirname(source), cache_file) #keep a local copy of data we've processed... #this should help with subsequent calls #to make sure we don't need to duplicate calls to remote geolocation APIs: local_cache = load_json(cache_destination, create=True) if not local_cache.has_key('buildings'): local_cache['buildings'] = {} if not local_cache.has_key('parcels'): local_cache['parcels'] = {} locations = {} for key, value in local_cache['buildings'].items(): locations[key] = Location(value) #geocoder helper: geo = Geo() #with open('eggs.csv', 'rb') as csvfile: with codecs.open(source, 'rb', encoding='utf-8') as csvfile: #reader = csv.reader(csvfile, delimiter=' ', quotechar='|') reader = csv.reader(csvfile) #just print the first row: print '>, <'.join(reader.next()) count = 0 for row in reader: count += 1 #could exit out early here, if needed if count > 10: pass print row #type of building (eg: sf attached, duplex, etc) permit_id = row[0] #should always be "RENTAL" (don't need to track this one) permit_type = row[1] if not permit_type == "RENTAL" and not permit_type == "MECHANICAL": raise ValueError, "Unexpected permit type: %s in row: %s" % ( permit_type, row) sub_type = row[2] #can use this to filter out non-rental or obsolete entries #don't need to track otherwise: status = row[3] parcel_id = row[4] address = row[5] #should be fixed per source: city = row[6] if not ( (city.lower() == 'ann arbor') or (city == '') ): raise ValueError, "Unexpected city: %s" % (city) sqft = row[7] number_of_buildings = row[8] applicant_name = row[9] number_of_stories = row[10] number_of_units = row[11] if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']): #check if we've started processing any results for this row #if local_cache['buildings'].has_key(address.upper()): # local_cache_cur = local_cache['buildings'][address.upper()] #else: # local_cache_cur = {} if locations.has_key(address.upper()): location = locations[address.upper()] else: location = Location() #do some geocoding, as needed: search = "%s, Ann Arbor MI" % address.upper() for source in location.sources: geo.lookup(search, source, location) location.address_alt = search locations[address.upper()] = location #local_cache['buildings'][address.upper()] = local_cache_cur #and check if a previous building object in the db exists #CREATE A NEW BUILDING OBJECT HERE #cur_building = Building() bldg = Building() bldg.type = sub_type #back it up for later local_cache['buildings'] = {} for key, value in locations.items(): local_cache['buildings'][key] = value.to_dict() save_json(cache_destination, local_cache) #exit() #THE FOLLOWING ARE FOR INFORMATIONAL PURPOSES ONLY #(to see what data is available) if not status in status_types: #print "adding: %s" % sub_type status_types.append(status) if not sub_type in permit_sub_types: #print "adding: %s" % sub_type permit_sub_types.append(sub_type) building_num = row[8] if not building_num in building_nums: #print "adding: %s" % sub_type building_nums.append(building_num) applicant = row[9] if ( re.search('MGMT', applicant) or re.search('REALTY', applicant) or re.search('PROPERTIES', applicant) or re.search('MANAGEMENT', applicant) or re.search('GROUP', applicant) or re.search('LLC', applicant) or re.search('L.L.C.', applicant) or re.search('INC', applicant) ): if not applicant in managers: managers.append(applicant) else: if not applicant in applicants: applicants.append(applicant) #print ', '.join(row) #print ## print permit_sub_types print status_types print building_nums save_results(locations)
['Berkeley', 'CA', '', ''], ['Evanston', 'IL', '', ''], ['Fayetteville', 'AR', '', ''], ['Dearborn', 'MI', '', ''], ['Oklahoma City', 'OK', '', ''], ] for city_simple in cities: city_name = city_simple[0] city_state = city_simple[1] city_tag = to_tag("%s_%s" % (city_name, city_state)) city_options = City.objects.filter(tag=city_tag) print "Number of cities available: %s" % len(city_options) if not len(city_options): city = City() else: city = city_options[0] city.name = city_name city.tag = city_tag city.state = city_state if saved_cities.has_key(city_tag) and saved_cities[city_tag][ 'lat'] and saved_cities[city_tag]['lng']: city_dict = saved_cities[city_tag] print city_dict city.latitude = city_dict['lat'] city.longitude = city_dict['lng'] else:
['Berkeley', 'CA', '', ''], ['Evanston', 'IL', '', ''], ['Fayetteville', 'AR', '', ''], ['Dearborn', 'MI', '', ''], ['Oklahoma City', 'OK', '', ''], ] for city_simple in cities: city_name = city_simple[0] city_state = city_simple[1] city_tag = to_tag("%s_%s" % (city_name, city_state)) city_options = City.objects.filter(tag=city_tag) print "Number of cities available: %s" % len(city_options) if not len(city_options): city = City() else: city = city_options[0] city.name = city_name city.tag = city_tag city.state = city_state if saved_cities.has_key(city_tag) and saved_cities[city_tag]['lat'] and saved_cities[city_tag]['lng']: city_dict = saved_cities[city_tag] print city_dict city.latitude = city_dict['lat'] city.longitude = city_dict['lng'] else: