def _import_data(self, server, database, max_facilities_to_import=DEFAULT_FACILITIES_TO_IMPORT, max_mdg_to_import=DEFAULT_MDG_TO_IMPORT): from mangrove.datastore.database import DatabaseManager from mangrove.datastore.entity import Entity, get_entities_by_value from mangrove.datastore.datadict import DataDictType, get_datadict_type, create_datadict_type from mangrove.utils import GoogleSpreadsheetsClient from mangrove.utils.google_spreadsheets import get_string, get_number, get_boolean, get_list from mangrove.utils.spreadsheets import CsvReader from mangrove.utils.helpers import slugify from mangrove.georegistry.api import get_feature_by_id import os import datetime import json from pytz import UTC print "Loading 'NIMS Data'..." print "\tServer: %s" % server print "\tDatabase: %s" % database dbm = DatabaseManager(server=server, database=database) user_spreadsheets = GoogleSpreadsheetsClient(settings.GMAIL_USERNAME, settings.GMAIL_PASSWORD) nims_data = user_spreadsheets['NIMS Data Deux'] load_population = True load_other = True load_mdg = True load_health = True load_water = True load_education = True countries = {} states = {} locations = {} num_cgs = 0 datadict_types = {} geo_id_dict = {} cgs_type = create_datadict_type( dbm, slug='cgs', name='CGS', primitive_type='boolean' ) datadict_types['cgs'] = cgs_type.id geo_id_type = create_datadict_type( dbm, slug='geo_id', name='Geographic ID', primitive_type='string' ) datadict_types['geo_id'] = geo_id_type.id name_type = create_datadict_type( dbm, slug='name', name='Name', primitive_type='string' ) datadict_types['name'] = name_type.id mdg_type = create_datadict_type( dbm, slug='mdg', name='MDG', primitive_type='string' ) datadict_types['mdg'] = mdg_type.id country_geo_id = {} for row in nims_data['Nigeria Country ALL']: country_geo_id[row['name']] = row['grid'] state_geo_ids = {} for row in nims_data['Nigeria States ALL']: state_geo_ids[row['name']] = row['grid'] num_rows = 0 print "Importing location entities from 'Nigeria LGAs ALL' worksheet" for row in nims_data['Nigeria LGAs ALL']: country = get_string('country', row) state = get_string('state', row) lga = get_string('lga', row) cgs = get_boolean('cgs', row) geo_id = get_string('geoid', row) lga_gr_id = get_string('grid', row) location = (country, state, lga) if country not in countries: gr_id = country_geo_id[country] feature = get_feature_by_id(gr_id) # geometry = feature['geometry'] centroid = json.loads(feature['properties']['geometry_centroid']) e = Entity(dbm, entity_type=["Location", "Country"], location=[country], centroid=centroid, gr_id=gr_id) locations[(country,)] = e.save() countries[country] = e.id data = [(name_type.slug, country, name_type)] e.add_data(data, event_time=datetime.datetime(2011, 03, 01, tzinfo=UTC)) num_rows += 1 print "[%s]...(%s) -- %s" % (num_rows, country, e.id)
def _import_data(self, server, database, max_facilities_to_import=DEFAULT_FACILITIES_TO_IMPORT, max_mdg_to_import=DEFAULT_MDG_TO_IMPORT): from mangrove.datastore.database import DatabaseManager from mangrove.datastore.entity import Entity, get_entities_by_value from mangrove.datastore.datadict import DataDictType, get_datadict_type, create_datadict_type from mangrove.utils import GoogleSpreadsheetsClient from mangrove.utils.google_spreadsheets import get_string, get_number, get_boolean, get_list from mangrove.utils.spreadsheets import CsvReader from mangrove.utils.helpers import slugify from mangrove.georegistry.api import get_feature_by_id import os import datetime import json from pytz import UTC print "Loading 'NIMS Data'..." print "\tServer: %s" % server print "\tDatabase: %s" % database dbm = DatabaseManager(server=server, database=database) user_spreadsheets = GoogleSpreadsheetsClient(settings.GMAIL_USERNAME, settings.GMAIL_PASSWORD) nims_data = user_spreadsheets['NIMS Data Deux'] load_population = True load_other = True load_mdg = True load_health = True load_water = True load_education = True countries = {} states = {} locations = {} num_cgs = 0 datadict_types = {} geo_id_dict = {} cgs_type = create_datadict_type(dbm, slug='cgs', name='CGS', primitive_type='boolean') datadict_types['cgs'] = cgs_type.id geo_id_type = create_datadict_type(dbm, slug='geo_id', name='Geographic ID', primitive_type='string') datadict_types['geo_id'] = geo_id_type.id name_type = create_datadict_type(dbm, slug='name', name='Name', primitive_type='string') datadict_types['name'] = name_type.id mdg_type = create_datadict_type(dbm, slug='mdg', name='MDG', primitive_type='string') datadict_types['mdg'] = mdg_type.id country_geo_id = {} for row in nims_data['Nigeria Country ALL']: country_geo_id[row['name']] = row['grid'] state_geo_ids = {} for row in nims_data['Nigeria States ALL']: state_geo_ids[row['name']] = row['grid'] num_rows = 0 print "Importing location entities from 'Nigeria LGAs ALL' worksheet" for row in nims_data['Nigeria LGAs ALL']: country = get_string('country', row) state = get_string('state', row) lga = get_string('lga', row) cgs = get_boolean('cgs', row) geo_id = get_string('geoid', row) lga_gr_id = get_string('grid', row) location = (country, state, lga) if country not in countries: gr_id = country_geo_id[country] feature = get_feature_by_id(gr_id) # geometry = feature['geometry'] centroid = json.loads( feature['properties']['geometry_centroid']) e = Entity(dbm, entity_type=["Location", "Country"], location=[country], centroid=centroid, gr_id=gr_id) locations[(country, )] = e.save() countries[country] = e.id data = [(name_type.slug, country, name_type)] e.add_data(data, event_time=datetime.datetime(2011, 03, 01, tzinfo=UTC)) num_rows += 1 print "[%s]...(%s) -- %s" % (num_rows, country, e.id)
if cgs: num_cgs += 1 e.add_data(data=[(cgs_type.slug, cgs, cgs_type)]) print "Countries (%d)" % len(countries) print "States (%d)" % len(states) print "LGAs (%d) (%d as CGS)" % ((len(locations) - len(countries) - len(states)), num_cgs) print "Total locations (%d)" % len(locations) lga_loaded = [] lga_failed = [] if load_population: print "Adding data from 'Population Data' worksheet" for row in nims_data['Population Variables']: slug = get_string('slug', row) name = get_string('name', row) primitive_type = get_string('primitivetype', row) tags = get_list('tags', row) if not slug in datadict_types: dd_type = create_datadict_type( dbm, slug=slug, name=name, primitive_type=primitive_type, tags=tags ) datadict_types[slug] = dd_type.id for row in nims_data['Population Data']: state = get_string('state', row)
num_cgs += 1 e.add_data(data=[(cgs_type.slug, cgs, cgs_type)]) print "Countries (%d)" % len(countries) print "States (%d)" % len(states) print "LGAs (%d) (%d as CGS)" % ( (len(locations) - len(countries) - len(states)), num_cgs) print "Total locations (%d)" % len(locations) lga_loaded = [] lga_failed = [] if load_population: print "Adding data from 'Population Data' worksheet" for row in nims_data['Population Variables']: slug = get_string('slug', row) name = get_string('name', row) primitive_type = get_string('primitivetype', row) tags = get_list('tags', row) if not slug in datadict_types: dd_type = create_datadict_type( dbm, slug=slug, name=name, primitive_type=primitive_type, tags=tags) datadict_types[slug] = dd_type.id for row in nims_data['Population Data']: state = get_string('state', row) lga = get_string('lga', row)