# set up projection transformation # thanks to John A. Stevenson: http://all-geo.org/volcan01010/2012/11/change-coordinates-with-pyproj/ wgs84 = pyproj.Proj( "+init=EPSG:4326" ) # LatLon with WGS84 datum used by GPS units and Google Earth osgb36 = pyproj.Proj("+init=EPSG:27700") # UK Ordnance Survey, 1936 datum # raw files download # True if specified --download, otherwise False FILES = {RAW_FILE_NAME_REPD: URL_REPD, RAW_FILE_NAME_DUKES: URL_DUKES} DOWNLOAD_FILES = pw.download( u"UK Renewable Energy Planning Database and DUKES", FILES) # set up fuel type thesaurus fuel_thesaurus = pw.make_fuel_thesaurus() # set up country name thesaurus country_thesaurus = pw.make_country_names_thesaurus() # create dictionary for power plant objects plants_dictionary = {} # load GEO and CARMA for matching coordinates geo_database = pw.load_database(GEO_DATABASE_FILE) print("Loaded {0} plants from GEO database.".format(len(geo_database))) carma_database = pw.load_database(CARMA_DATABASE_FILE) print("Loaded {0} plants from CARMA database.".format(len(carma_database))) # read in plant matches file with open(PLANT_MATCHES, "rbU") as f:
def country_summary(db_conn, country, iso_code): """ Get a country-level summary of the database. Parameters ---------- db_conn : sqlite3.Connection Open database connection. country : str Standard country name used in the database. iso_code : str 3 character country code. Returns ------- Dict holding the summarized metrics for the country. """ summary = {'country': country, 'iso_code': iso_code} c = db_conn.cursor() # count number of powerplants stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=?)''' query = c.execute(stmt, (iso_code, )) summary['count'], = query.fetchone() # skip rest of summary if there aren't any powerplants if not summary['count']: return summary # compute total capacity stmt = '''SELECT SUM(capacity_mw) FROM powerplants WHERE (country=?)''' query = c.execute(stmt, (iso_code, )) total_capacity_mw, = query.fetchone() summary['total_capacity_gw'] = total_capacity_mw / 1000 # compute maximum single capacity stmt = '''SELECT MAX(capacity_mw) FROM powerplants WHERE (country=?)''' query = c.execute(stmt, (iso_code, )) summary['max_capacity_mw'], = query.fetchone() # count distinct fuel types stmt = '''SELECT COUNT(*) FROM ( SELECT DISTINCT(primary_fuel) from powerplants WHERE (country="{0}" AND primary_fuel IS NOT NULL) UNION SELECT DISTINCT(other_fuel1) from powerplants WHERE (country="{0}" AND other_fuel1 IS NOT NULL) UNION SELECT DISTINCT(other_fuel2) from powerplants WHERE (country="{0}" AND other_fuel2 IS NOT NULL) UNION SELECT DISTINCT(other_fuel3) from powerplants WHERE (country="{0}" AND other_fuel3 IS NOT NULL) ) AS temp'''.format(iso_code) query = c.execute(stmt) summary['count_distinct_fuel'], = query.fetchone() # fuel-specific summaries fuel_list = pw.make_fuel_thesaurus().keys() for fuel in fuel_list: fuel_column_name = '_'.join(fuel.lower().split()) stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND (primary_fuel="{fuel}" OR other_fuel1="{fuel}" OR other_fuel2="{fuel}" OR other_fuel3="{fuel}"))'''.format(fuel=fuel) query = c.execute(stmt, (iso_code, )) summary['count_fuel_{0}'.format(fuel_column_name)], = query.fetchone() stmt = '''SELECT SUM(capacity_mw) FROM powerplants WHERE (country=? AND (primary_fuel="{fuel}" OR other_fuel1="{fuel}" OR other_fuel2="{fuel}" OR other_fuel3="{fuel}"))'''.format(fuel=fuel) query = c.execute(stmt, (iso_code, )) fuel_capacity_mw, = query.fetchone() summary_name = 'capacity_gw_fuel_{0}'.format(fuel_column_name) if fuel_capacity_mw is None: summary[summary_name] = 0 else: summary[summary_name] = fuel_capacity_mw / 1000 # count distinct fields count_distinct_list = ['name', 'owner', 'source'] for field in count_distinct_list: stmt = '''SELECT COUNT(DISTINCT({field})) FROM powerplants WHERE (country=? AND {field} IS NOT NULL)'''.format(field=field) query = c.execute(stmt, (iso_code, )) summary['count_distinct_{0}'.format(field)], = query.fetchone() # count null fields count_null_list = [ 'name', 'gppd_idnr', 'capacity_mw', 'year_of_capacity_data', 'owner', 'source', 'url', 'latitude', 'longitude' ] for field in count_null_list: stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND {field} IS NULL)'''.format(field=field) query = c.execute(stmt, (iso_code, )) summary['count_null_{0}'.format(field)], = query.fetchone() # count null fuel occurrences stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND primary_fuel IS NULL AND other_fuel1 IS NULL AND other_fuel2 IS NULL AND other_fuel3 is NULL)''' query = c.execute(stmt, (iso_code, )) summary['count_null_fuel'], = query.fetchone() # count plants with a wepp_id stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND wepp_id IS NOT NULL)''' query = c.execute(stmt, (iso_code, )) summary['count_wepp_id'], = query.fetchone() # count plants with a wepp_id stmt = '''SELECT SUM(capacity_mw) FROM powerplants WHERE (country=? AND wepp_id IS NOT NULL)''' query = c.execute(stmt, (iso_code, )) wepp_id_mw, = query.fetchone() if wepp_id_mw is None: summary['capacity_gw_wepp_id'] = 0 else: summary['capacity_gw_wepp_id'] = wepp_id_mw / 1000. # count null generation data for all years stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND generation_gwh_2015 IS NULL AND generation_gwh_2016 IS NULL AND generation_gwh_2017 IS NULL AND estimated_generation_gwh IS NULL )''' query = c.execute(stmt, (iso_code, )) summary['count_null_generation_gwh_all'], = query.fetchone() # count non-null generation years for year in range(2013, 2018): field = 'generation_gwh_{0}'.format(year) stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND {field} IS NOT NULL)'''.format(field=field) query = c.execute(stmt, (iso_code, )) summary['count_{0}'.format(field)], = query.fetchone() # count non-null generation sources stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND generation_data_source IS NOT NULL)''' query = c.execute(stmt, (iso_code, )) summary['count_generation_data_source'], = query.fetchone() # count null estimated generation field = 'estimated_generation_gwh' stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND {field} IS NOT NULL)'''.format(field=field) query = c.execute(stmt, (iso_code, )) summary['count_{0}'.format(field)], = query.fetchone() return summary