def create_consolidated_tables(): allsources = eia.fossilfuels + eia.elec_sources + \ eia.nuclear + eia.renewables allsectors = ["TC", "AC", "CC", "IC", "RC"] + eia.elec_sectors for year in config.STUDY_YEARS: strings = { "renewables": sqlhelper.set_repr(eia.renewables), "elec_sources": sqlhelper.set_repr(eia.elec_sources), "elec_sectors": sqlhelper.set_repr(eia.elec_sectors), "allsources": sqlhelper.set_repr(allsources), "allsectors": sqlhelper.set_repr(allsectors), "from_table": "%s.seds_us_%d" % (config.EIA_SCHEMA, year), "tablename": "%s.seds_short_%d" % (config.EIA_SCHEMA, year), } db.execute("DROP TABLE IF EXISTS %(tablename)s CASCADE" % strings) db.execute(""" SELECT source, sector, case when sum(use_btu) = 0 then 0 else sum(ex) / sum(use_btu) end as price, sum(use_btu) as use_btu, sum(ex) as ex INTO %(tablename)s FROM (SELECT case when source in %(renewables)s then 'RE' when source in %(elec_sources)s then 'ES' else source end as source, case when sector in %(elec_sectors)s then 'EI' else sector end as sector, price, use_btu, ex FROM %(from_table)s WHERE source in %(allsources)s AND sector in %(allsectors)s) e GROUP by source, sector order by source, sector""" % strings)
def create_views(): test_view = "%s.nipa_groups" % common.config.TEST_SCHEMA subqueries = [] for year in config.STUDY_YEARS: subqueries.append( """SELECT %d AS year, nipa_group, description FROM %s.nipa_codes_%d""" % (year, config.IO_SCHEMA, year)) db.execute("CREATE OR REPLACE VIEW %s AS %s" % (test_view, "\n UNION\n".join(subqueries))) for year in config.STUDY_YEARS: strings = { "view_name": "%s.nipa_pce_%d" % (config.IO_SCHEMA, year), "bridge_table": "%s.pcebridge_%d" % (config.IO_SCHEMA, year), "code_table": "%s.nipa_codes_%d" % (config.IO_SCHEMA, year), "groups_table": "%s.pce_codes" % config.NIPA_SCHEMA, "pa_naics": eia.source_naics_map['PA'][year], "pa_trans_naics": eia.source_naics_map['PA-trans'][year], "pa_nontrans_naics": eia.source_naics_map['PA-nontrans'][year], "tourism_adjustment": bea.tourism_adjustment_codes[year], } # make sure gasoline/motor fuel purchases are allocated # among the split commodities we made for petroleum # # fortunately the group description for gasoline in all years # all start with "Gasoline" or "GASOLINE" sql = """ CREATE OR REPLACE VIEW %(view_name)s AS SELECT commodity, pcegroup, cast(sum(prod_val) as float) as value FROM (SELECT CASE WHEN UPPER(codes.description) LIKE 'GASOLINE%%' AND commodity = '%(pa_naics)s' THEN '%(pa_trans_naics)s' WHEN commodity = '%(pa_naics)s' THEN '%(pa_nontrans_naics)s' ELSE bridge.commodity END AS commodity, bridge.prod_val, groups.description as pcegroup FROM %(bridge_table)s bridge, %(code_table)s codes, %(groups_table)s groups WHERE bridge.pce_code = codes.pce_code AND bridge.commodity <> '%(tourism_adjustment)s' AND codes.nipa_group = groups.code) a GROUP BY commodity, pcegroup""" % strings db.execute(sql)
def create_views(): test_view = "%s.nipa_groups" % common.config.TEST_SCHEMA subqueries = [] for year in config.STUDY_YEARS: subqueries.append("""SELECT %d AS year, nipa_group, description FROM %s.nipa_codes_%d""" % (year, config.IO_SCHEMA, year)) db.execute("CREATE OR REPLACE VIEW %s AS %s" % (test_view, "\n UNION\n".join(subqueries))) for year in config.STUDY_YEARS: strings = { "view_name": "%s.nipa_pce_%d" % (config.IO_SCHEMA, year), "bridge_table": "%s.pcebridge_%d" % (config.IO_SCHEMA, year), "code_table": "%s.nipa_codes_%d" % (config.IO_SCHEMA, year), "groups_table": "%s.pce_codes" % config.NIPA_SCHEMA, "pa_naics": eia.source_naics_map['PA'][year], "pa_trans_naics": eia.source_naics_map['PA-trans'][year], "pa_nontrans_naics": eia.source_naics_map['PA-nontrans'][year], "tourism_adjustment": bea.tourism_adjustment_codes[year], } # make sure gasoline/motor fuel purchases are allocated # among the split commodities we made for petroleum # # fortunately the group description for gasoline in all years # all start with "Gasoline" or "GASOLINE" sql = """ CREATE OR REPLACE VIEW %(view_name)s AS SELECT commodity, pcegroup, cast(sum(prod_val) as float) as value FROM (SELECT CASE WHEN UPPER(codes.description) LIKE 'GASOLINE%%' AND commodity = '%(pa_naics)s' THEN '%(pa_trans_naics)s' WHEN commodity = '%(pa_naics)s' THEN '%(pa_nontrans_naics)s' ELSE bridge.commodity END AS commodity, bridge.prod_val, groups.description as pcegroup FROM %(bridge_table)s bridge, %(code_table)s codes, %(groups_table)s groups WHERE bridge.pce_code = codes.pce_code AND bridge.commodity <> '%(tourism_adjustment)s' AND codes.nipa_group = groups.code) a GROUP BY commodity, pcegroup""" % strings db.execute(sql)
def create_views(): for year in config.STUDY_YEARS: strings = { "year": year, "schema": config.SCHEMA, } sql = """CREATE OR REPLACE VIEW %(schema)s.ixi_view_%(year)d AS SELECT codes.to_code as from_sector, io.to_sector, sum(io.value) as value FROM uk.ixi_%(year)d io, (SELECT DISTINCT from_code, to_code FROM %(schema)s.code_map WHERE from_code is not null AND to_code is not null) codes WHERE io.from_sector = codes.from_code GROUP BY codes.to_code, io.to_sector""" % strings db.execute(sql)
def create_views(): years = [] files = fileutils.getcachecontents("io-annual") for filename in files: year = is_make(filename) if year: years.append(year) for year in years: strings = { "make_table": "%s.annual_make_%s" % (config.IO_SCHEMA, year), "use_table": "%s.annual_use_%s" % (config.IO_SCHEMA, year), "cxc_table": "%s.annual_cxc_%s" % (config.IO_SCHEMA, year), } db.execute("DROP TABLE %(cxc_table)s" % strings) db.execute("""SELECT from_sector, to_sector, SUM(value) AS value INTO %(cxc_table)s FROM (SELECT use.commodity AS from_sector, indshare.commodity AS to_sector, use.value * indshare.output_share AS value FROM (SELECT make.industry, make.commodity, make.value / indtotal.value AS output_share FROM %(make_table)s make, (SELECT industry, SUM(value) AS value FROM %(make_table)s GROUP BY industry) indtotal WHERE make.industry = indtotal.industry) indshare, %(use_table)s use WHERE indshare.industry = use.industry UNION SELECT use.commodity AS from_sector, use.industry AS to_sector, use.value AS value FROM %(use_table)s use WHERE industry NOT IN (SELECT industry FROM %(make_table)s make WHERE commodity = 'TIO') ) allocations GROUP BY from_sector, to_sector""" % strings)
def trade_sector_stats(countries, is_export): minyear = min(config.STUDY_YEARS) maxyear = max(config.STUDY_YEARS) strings = { "minyear": minyear, "maxyear": maxyear, "schema": config.WIOD_SCHEMA, "is_export": is_export, "countries": sqlhelper.set_repr(countries), "blacklist": sqlhelper.set_repr(config.bad_data_blacklist), } if is_export: strings["view"] = "export_view" strings["is_export_str"] = "true" else: strings["view"] = "import_view" strings["is_export_str"] = "false" db.execute("""CREATE OR REPLACE VIEW %(view)s AS SELECT year, industry, sum(value) as value FROM trade_results WHERE is_export is %(is_export_str)s AND country IN %(countries)s AND country NOT IN %(blacklist)s GROUP BY year, industry""" % strings) stmt = db.prepare(""" SELECT a.year, a.industry, a.value, a.value / b.value * 100 FROM %(view)s a, (SELECT year, sum(value) as value FROM %(view)s GROUP BY year) b WHERE a.year in (%(minyear)d, %(maxyear)d) AND a.year = b.year""" % strings) print() print(countries) print() print_result(stmt(), minyear, maxyear)
def create_hybrid_tables(): for year in config.STUDY_YEARS: strings = { # table names "eia_table": "%s.seds_us_%d" % (config.EIA_SCHEMA, year), "io_table": "%s.transact_view_%d" % (config.IO_SCHEMA, year), "map_table": "%s.eia_code_map_%d" % (config.IO_SCHEMA, year), "hybrid_table": "hybrid_transactions_%d" % year, # selected sector codes "pa_trans_code": eia.source_naics_map['PA-trans'][year], "pa_nontrans_code": eia.source_naics_map['PA-nontrans'][year], } runsql("DROP TABLE IF EXISTS %s CASCADE" % strings["hybrid_table"]) runsql(""" CREATE TABLE %(hybrid_table)s ( from_sector varchar(6), to_sector varchar(6), expenditure float )""" % strings) for source in eia.sources: strings["shares_view"] = "%s_ex_proportions_%s" % (source.lower(), year) strings["source"] = source strings["source_naics"] = eia.source_naics_map[source][year] # seds prices are c.i.f., but when allocating btu among user # industries it is unfair to assign based on c.i.f. since what # they end up using is f.o.b. subquery = """ SELECT codes.eia_sector, sum(io.fob) as fob FROM %(io_table)s io, %(map_table)s codes WHERE codes.eia_source = '%(source)s' AND io.from_sector = '%(source_naics)s' AND io.to_sector = codes.io_sector GROUP BY codes.eia_sector""" % strings strings["subquery"] = subquery # the price each industry ends up actually paying for energy # should be the f.o.b. price which is (fob / cif) of the seds price runsql(""" CREATE OR REPLACE VIEW %(shares_view)s AS SELECT io.to_sector, codes.eia_sector, cast(io.fob as float) / cast(io.cif as float) as fob_share, cast(io.fob as float) / cast(totals.fob as float) as ex_share FROM %(io_table)s io, %(map_table)s codes, (%(subquery)s) totals WHERE codes.eia_source = '%(source)s' AND io.from_sector = '%(source_naics)s' AND io.to_sector = codes.io_sector AND totals.eia_sector = codes.eia_sector""" % strings) # split petroleum if source == 'PA': strings["aviation_pa"] = sqlhelper.set_repr( eia.aviation_petroleum) strings["other_pa"] = sqlhelper.set_repr(eia.other_petroleum) strings["all_pa"] = sqlhelper.set_repr(eia.aviation_petroleum + eia.other_petroleum) strings["pa_nontrans_view"] = "pa_nontrans_%d" % year strings["pa_trans_view"] = "pa_trans_%d" % year strings[ "pa_trans_shares_view"] = "pa_trans_proportions_%d" % year strings["aviation_code"] = eia.air_transportation_codes[year] # non transportation petroleum use runsql(""" CREATE OR REPLACE VIEW %(pa_nontrans_view)s AS SELECT shares.to_sector, shares.eia_sector, sum(shares.ex_share * eia.use_btu) as btu, sum(shares.ex_share * eia.use_btu * eia.price * shares.fob_share) as ex FROM %(shares_view)s shares, %(eia_table)s eia WHERE eia.source in %(all_pa)s AND shares.eia_sector = eia.sector -- these two below are double counted AND eia.source || eia.sector not in ('DFEI', 'PCIC') AND eia.sector <> 'AC' GROUP BY shares.to_sector, shares.eia_sector""" % strings) # petroleum use for transportation other than air runsql(""" CREATE OR REPLACE VIEW %(pa_trans_view)s AS SELECT io.to_sector, io.fob, io.fob - nontrans.ex as remaining FROM %(io_table)s io, %(pa_nontrans_view)s nontrans WHERE io.from_sector = '%(source_naics)s' AND io.to_sector = nontrans.to_sector -- remaining is negative for IC and EI AND nontrans.eia_sector in ('CC', 'RC') UNION SELECT io.to_sector, io.fob, cast(io.fob as float) as remaining FROM %(io_table)s io, %(map_table)s codes WHERE io.from_sector = '%(source_naics)s' AND io.to_sector = codes.io_sector AND codes.eia_source = 'PA' AND codes.eia_sector = 'AC' AND io.to_sector <> '%(aviation_code)s' """ % strings) # proportions for petroleum allocated to transportation runsql(""" CREATE OR REPLACE VIEW %(pa_trans_shares_view)s AS SELECT use.to_sector, use.remaining / total.total as ex_share FROM %(pa_trans_view)s use, (SELECT sum(remaining) as total FROM %(pa_trans_view)s) total""" % strings) # allocate all of JF and AV to air transportation runsql(""" INSERT INTO %(hybrid_table)s SELECT '%(pa_trans_code)s', io.to_sector, sum(eia.use_btu) FROM %(io_table)s io, %(eia_table)s eia WHERE eia.source in %(aviation_pa)s and eia.sector = 'AC' and io.from_sector = '%(source_naics)s' and io.to_sector = '%(aviation_code)s' GROUP BY io.to_sector """ % strings) # allocate all other transportation runsql(""" INSERT INTO %(hybrid_table)s SELECT '%(pa_trans_code)s', shares.to_sector, sum(shares.ex_share * eia.use_btu) FROM %(pa_trans_shares_view)s shares, %(eia_table)s eia WHERE eia.source in %(other_pa)s AND eia.sector = 'AC' GROUP BY shares.to_sector""" % strings) # allocate non-transportation petroleum use runsql(""" INSERT INTO %(hybrid_table)s SELECT '%(pa_nontrans_code)s', to_sector, btu FROM %(pa_nontrans_view)s""" % strings) #WHERE eia_sector in ('IC', 'EI')""" # dependencies in reverse order runsql("DROP VIEW %s" % strings["pa_trans_shares_view"]) runsql("DROP VIEW %s" % strings["pa_trans_view"]) runsql("DROP VIEW %s" % strings["pa_nontrans_view"]) runsql("DROP VIEW %s" % strings["shares_view"]) else: runsql(""" INSERT INTO %(hybrid_table)s SELECT '%(source_naics)s', shares.to_sector, shares.ex_share * eia.use_btu FROM %(shares_view)s shares, %(eia_table)s eia WHERE eia.source = '%(source)s' AND shares.eia_sector = eia.sector""" % strings) runsql("DROP VIEW %s" % strings["shares_view"]) # insert remainder of standard io table energy_sectors = [] for source in eia.sources: energy_sectors.append(eia.source_naics_map[source][year]) strings["sectors"] = ", ".join(["'%s'" % s for s in energy_sectors]) db.execute(""" INSERT INTO %(hybrid_table)s SELECT from_sector, to_sector, fob FROM %(io_table)s WHERE from_sector not in (%(sectors)s)""" % strings) # split petroleum column proportional to trans and nontrans uses stmt = db.prepare(""" SELECT trans.use_btu / total.use_btu as trans_share FROM (SELECT use_btu FROM %(eia_table)s WHERE source = 'PA' AND sector = 'AC') trans, (SELECT use_btu FROM %(eia_table)s WHERE source = 'PA' AND sector = 'TC') total""" % strings) result = stmt() if len(result) and len(result[0]): strings["pa_naics"] = eia.source_naics_map['PA'][year] strings["trans_share"] = result[0][0] strings["nontrans_share"] = 1 - result[0][0] # transportation petroleum use column runsql(""" INSERT INTO %(hybrid_table)s SELECT from_sector, '%(pa_trans_code)s', %(trans_share).4f * expenditure FROM %(hybrid_table)s WHERE to_sector = '%(pa_naics)s' """ % strings) # non-transportation petroleum use column runsql(""" UPDATE %(hybrid_table)s SET expenditure = %(nontrans_share).4f * expenditure, to_sector = '%(pa_nontrans_code)s' WHERE to_sector = '%(pa_naics)s' """ % strings)
def runsql(sql): print(sql) db.execute(sql)
def create_views(): va_sectors = set(config.va_sectors.values()) fd_sectors = set(config.fd_sectors.values()) for year in config.STUDY_YEARS: strings = { "test_schema": common.config.TEST_SCHEMA, "schema": config.WIOD_SCHEMA, "extra_schema": "wiod_plus", "year": year, "fd_sectors": sqlhelper.set_repr(fd_sectors), "va_sectors": sqlhelper.set_repr(va_sectors), "margins": sqlhelper.set_repr(config.margin_sectors) } ### indbyind tables ignoring imports db.execute("""CREATE OR REPLACE VIEW %(schema)s.indbyind_%(year)d AS SELECT country, from_ind, to_ind, value FROM %(schema)s.niot_%(year)d WHERE NOT is_import UNION SELECT country, from_ind, 'IMP', sum(value) FROM %(schema)s.niot_%(year)d WHERE is_import GROUP BY country, from_ind""" % strings) #continue # co2 intensity views # put in test since we're just checking results sql = """CREATE OR REPLACE VIEW %(test_schema)s.co2_intensity_%(year)d AS SELECT a.country, CAST(a.gdp as int) gdp, CAST(b.emissions as int) emissions, b.emissions / a.gdp AS intensity FROM (SELECT country, sum(value) AS gdp FROM %(schema)s.indbyind_%(year)d WHERE from_ind not in %(va_sectors)s AND to_ind in %(fd_sectors)s GROUP BY country) a, (SELECT country, value AS emissions FROM %(schema)s.env_%(year)d where industry = 'total' AND measurement = 'CO2') b WHERE a.country = b.country ORDER BY country""" % strings db.execute(sql) # commodity output proportions tables for all countries sql = """CREATE OR REPLACE VIEW %(schema)s.comshare_%(year)d AS SELECT make.country, make.commodity, make.industry, make.value / totals.value AS use_share FROM (SELECT country, commodity, industry, value FROM wiod.int_make_%(year)d WHERE commodity not in %(va_sectors)s AND industry not in %(margins)s) make, (SELECT country, commodity, sum(value) as value FROM wiod.int_make_%(year)d WHERE commodity not in %(va_sectors)s AND industry not in %(margins)s GROUP BY country, commodity) totals WHERE make.country = totals.country AND make.commodity = totals.commodity""" % strings db.execute(sql) for country in config.countries: strings["country"] = country.lower() table = "%(extra_schema)s.%(country)s_io_import_%(year)d" % strings strings["io_import_table"] = table sql = "DROP TABLE IF EXISTS %(io_import_table)s" % strings db.execute(sql) sql = """SELECT comshare.country, comshare.industry AS from_sector, use.industry AS to_sector, sum(use.value * comshare.use_share) AS value INTO %(io_import_table)s FROM %(schema)s.comshare_%(year)d comshare, (SELECT from_country, industry, commodity, value FROM %(schema)s.int_use_%(year)d WHERE to_country = $1 AND from_country <> $1) use WHERE comshare.country = use.from_country AND comshare.commodity = use.commodity GROUP BY comshare.country, comshare.industry, use.industry""" % strings print(sql) stmt = db.prepare(sql) stmt(country)
def parse_nipa_data(): test_view = "%s.nipa_groups" % common.config.TEST_SCHEMA db.execute("DROP VIEW IF EXISTS %s" % test_view) # get table for pce category harmonization trailing_pat = re.compile('(.+) \(.*\d.*\)$') nipa_code_map = {} filename = fileutils.getdatapath("nipa_code_map.csv", "usa") fh = open(filename) csvf = csv.reader(fh) for row in csvf: if len(row) == 2: harmonized = row[0] trailing = trailing_pat.match(harmonized) if trailing: harmonized = trailing.group(1) nipa_code_map[row[1]] = harmonized fh.close() # get nipa series codes from underlying detail tables tracker = TableStateTracker() tracker.create_table("%s.pce_codes" % config.NIPA_SCHEMA, ["code", "parent", "description"], ["char(7)", "char(7)", "text"], True) number_pat = re.compile('^\d+$') trailing_pat = re.compile('(.+) \(.*\d.*\)$') filename = fileutils.getcache("Section2All_underlying.csv", "bea", "nipa") fh = open(filename) csvf = csv.reader(fh) is_in_table = False code_stack = [None] indent_stack = [-1] # the code mapping has been done such that each item is at least at # three levels of disaggregation below the top, i.e. there is always # an ancestor at the second level. we only want to keep track of the # ancestor at the third level (root is zero) # the first level below root has goods and services # the second level has durable goods, nondurable goods, and services. reverse_code_dict = {} second_level_nodes = [] for row in csvf: if len(row): if not is_in_table: if row[0].startswith("Table 2.4.5U"): is_in_table = True else: if row[0].startswith("Table 2.4.5U"): # we only need to go through one instance of this table break else: if number_pat.match(row[0]) and len(row) > 2: title = row[1].lstrip() # these are duplicate codes if title.startswith("Market-based PCE"): continue code = row[2] current_indent = len(row[1]) - len(title) while current_indent <= indent_stack[-1]: indent_stack.pop() code_stack.pop() indent_stack.append(current_indent) code_stack.append(code) if len(code_stack) > 1: parent = code_stack[-2] else: parent = None title = title.strip() trailing = trailing_pat.match(title) if trailing: title = trailing.group(1) if len(code_stack) > 4: reverse_code_dict[title] = code_stack[3] else: reverse_code_dict[title] = code tracker.insert_row((code, parent, title)) tracker.flush() fh.close() # table for price deflators tracker.create_table("%s.implicit_price_deflators" % config.NIPA_SCHEMA, ["year", "gdp", "pce"], ["int", "float", "float"]) filename = fileutils.getcache("Section1all_csv.csv", "bea/nipa") fh = open(filename) csvf = csv.reader(fh) is_in_table = False data = {} # we need to parse two rows before we can populate years = {} for row in csvf: if len(row): if not is_in_table: if row[0].startswith("Table 1.1.9"): is_in_table = True else: if row[0].startswith("Table 1.1.9"): # this is seasonally adjusted version of the same table break else: if row[0] == "Line": for i in range(len(row)): if number_pat.match(row[i]): year = int(row[i]) years[year] = i data[year] = {} elif number_pat.match(row[0]) and len(row) > 2: title = row[1].lstrip() if title == "Gross domestic product": column = "gdp" elif title == "Personal consumption expenditures": column = "pce" else: continue for (year, colindex) in years.items(): data[year][column] = float(row[colindex]) for (year, results) in data.items(): tracker.insert_row([year, results["gdp"], results["pce"]]) tracker.flush() fh.close() # parse pce bridge class IONIPAStateTracker(TableStateTracker): def flush(self): TableStateTracker.flush(self) if self.fh is not None and not self.fh.closed: self.fh.close() def __init__(self): TableStateTracker.__init__(self) self.fh = None self.code_dict = None self.value_columns = [ "prod_val", "rail_margin", "truck_margin", "water_margin", "air_margin", "pipe_margin", "gaspipe_margin", "wholesale_margin", "retail_margin", "purchase_val" ] self.old_style_field_map = { "Producers' Value": "prod_val", "MfgExciseTax": "prod_val", "RailMargin": "rail_margin", "TruckMargin": "truck_margin", "WaterMargin": "water_margin", "AirMargin": "air_margin", "PipeMargin": "pipe_margin", "WholesaleMargin": "wholesale_margin", "WholesaleTax": "wholesale_margin", "RetailMargin": "retail_margin", "RetailSalesTax": "retail_margin", "OtherRetailTax": "retail_margin", "Purchasers' Value": "purchase_val", } def set_filename(self, filename): path = fileutils.getcache(filename, str(self.year)) self.filename = path def set_year(self, year): self.flush() self.year = year tablename = "%s.pcebridge_%d" % (config.IO_SCHEMA, year) fields = ["pce_code", "commodity"] + self.value_columns types = ["varchar(6)", "varchar(6)"] + \ ["bigint"]*len(self.value_columns) self.create_table(tablename, fields, types) def setup_for_codes(self): self.code_dict = {} def flush_codes(self): if self.code_dict is not None: tablename = "%s.nipa_codes_%d" % (config.IO_SCHEMA, self.year) self.create_table(tablename, ["pce_code", "nipa_group", "description"], ["varchar(6)", "char(7)", "text"]) for (code, raw_desc) in self.code_dict.items(): desc = raw_desc if desc.endswith('(s.)') or desc.endswith('(d.)'): desc = desc[:-4].strip() elif desc.endswith('(n.d.)'): desc = desc[:-6].strip() if desc in nipa_code_map: desc = nipa_code_map[desc] if desc in reverse_code_dict: nipa_code = reverse_code_dict[desc] else: nipa_code = None #self.current_stmt(code, nipa_code, raw_desc) self.table.insert([code, nipa_code, raw_desc]) self.code_dict = None self.flush() def insert_code_row(self, code, desc): # workaround for the way excel interprets numbers as floats # when we know the codes should be strings if type(code) is float: code = int(code) self.code_dict[str(code)] = desc.strip() def insert_row(self, pce_code, commod, dollar_values, factor=1): # workaround for the way excel interprets numbers as floats # when we know the codes should be strings if type(pce_code) is float: pce_code = int(pce_code) values = [str(pce_code).strip(), commod.strip()] for column in self.value_columns: if column in dollar_values: if factor == 1: values.append(dollar_values[column]) else: values.append(int(float(dollar_values[column]) * factor)) else: values.append(None) #self.current_stmt(*values) self.table.insert(values) def parse_old_style_xls(self, year): self.set_year(year) self.set_filename("%d_PCE_Commodity.xls" % self.year) wb = xlrd.open_workbook(self.filename) # parse pce bridge data sheet = wb.sheet_by_name("%d PCE Workfile - Commodity" % self.year) field_indexes = {} pce_code_idx = 0 commod_idx = 2 for rowindex in range(sheet.nrows): row = sheet.row_values(rowindex) if len(row) > 1: if "PCE Category" in row: pce_code_idx = row.index("PCE Category") if "Commodity" in row: commod_idx = row.index("Commodity") for i in range(len(row)): xls_col = row[i] if xls_col in self.old_style_field_map: colname = self.old_style_field_map[xls_col] if colname not in field_indexes: field_indexes[colname] = [] field_indexes[colname].append(i) elif len(field_indexes): pce_code = row[pce_code_idx] commod = str(int(row[commod_idx])).rjust(6, "0") values = {} for (field, columns) in field_indexes.items(): # doclumentation says units are in 100,000 dollars # but the orders of magnitude don't match up with # later years if we use 100 components = [int(float(row[column] * 1000)) for column in columns] value = 0 for component in components: value += component values[field] = value self.insert_row(pce_code, commod, values) # parse codes from neighboring worksheet self.setup_for_codes() sheet = wb.sheet_by_name("%d PCE Category Descriptions" % self.year) code_idx = None desc_idx = None for rowindex in range(sheet.nrows): row = sheet.row_values(rowindex) if len(row) > 1: codetab = "PCE Category Code" codetab2 = "%s - %d" % (codetab, self.year) if codetab in row or codetab2 in row: if codetab in row: code_idx = row.index(codetab) else: code_idx = row.index(codetab2) desctab = "PCE Category Description - %d" % self.year if desctab in row: desc_idx = row.index(desctab) else: desctab = "PCE Category Description" if desctab in row: desc_idx = row.index(desctab) elif code_idx is not None and desc_idx is not None: code = row[code_idx] desc = str(row[desc_idx]) self.insert_code_row(code, desc) self.flush_codes() def get_file_handle(self, filetype, options={}): if filetype == "txt": self.fh = open(self.filename) return self.fh elif filetype == "csv": self.fh = open(self.filename) if "delim" in options: csvf = csv.reader(self.fh, delimiter=options["delim"]) else: csvf = csv.reader(self.fh) return csvf elif filetype == "xls": wb = xlrd.open_workbook(self.filename) return wb def parse_text(self, rowcallback): path = fileutils.getcache(filename, str(self.year)) f = open(path) for line in f: rowcallback(line, this) f.close() tracker = IONIPAStateTracker() tracker.parse_old_style_xls(1967) tracker.parse_old_style_xls(1972) tracker.parse_old_style_xls(1977) tracker.parse_old_style_xls(1982) tracker.set_year(1987) tracker.set_filename("tbld-87.dat") fh = tracker.get_file_handle("txt") for line in fh: if len(line) < 103: continue commod = line[0:6] pce_code = line[14:18] values = { "prod_val": line[21:30], "rail_margin": line[30:39], "truck_margin": line[39:48], "water_margin": line[48:57], "air_margin": line[57:66], "pipe_margin": line[66:75], "wholesale_margin": line[75:84], "retail_margin": line[84:93], "purchase_val": line[93:102], } tracker.insert_row(pce_code, commod, values, 1000) tracker.setup_for_codes() tracker.set_filename("io-nipa.doc") fh = tracker.get_file_handle("txt") for line in fh: if len(line) < 27: continue code = line[0:4].strip() desc = line[26:].strip() tracker.insert_code_row(code, desc) tracker.flush_codes() tracker.set_year(1992) tracker.set_filename("TabD.txt") fh = tracker.get_file_handle("csv", {"delim": "\t"}) for row in fh: values = { "prod_val": row[4], "rail_margin": row[5], "truck_margin": row[6], "water_margin": row[7], "air_margin": row[8], "pipe_margin": row[9], "gaspipe_margin": row[10], "wholesale_margin": row[11], "retail_margin": row[12], "purchase_val": row[13], } tracker.insert_row(row[2], row[0], values, 1000) tracker.setup_for_codes() tracker.set_filename("IO-NIPA.txt") fh = tracker.get_file_handle("csv", {"delim": "\t"}) for row in fh: code = row[0] desc = row[4] tracker.insert_code_row(code, desc) tracker.flush_codes() tracker.set_year(1997) tracker.set_filename("AppendixC_Detail.txt") fh = tracker.get_file_handle("csv", {"delim": ","}) for row in fh: values = { "prod_val": row[3], "rail_margin": row[4], "truck_margin": row[5], "water_margin": row[6], "air_margin": row[7], "pipe_margin": row[8], "gaspipe_margin": row[9], "wholesale_margin": row[10], "retail_margin": row[11], "purchase_val": row[12], } tracker.insert_row(row[1], row[0], values, 1000) tracker.setup_for_codes() tracker.set_filename("IO-NIPA_PCE.txt") fh = tracker.get_file_handle("csv", {"delim": ","}) for row in fh: code = row[1] desc = row[2] tracker.insert_code_row(code, desc) tracker.flush_codes() tracker.set_year(2002) tracker.setup_for_codes() # do this simultaneously since it's all one file tracker.set_filename("2002_PCE_Bridge.xls") wb = tracker.get_file_handle("xls") naics_pat = re.compile('[A-Z0-9]{6}') sheet = wb.sheet_by_name("PCE_Bridge_Detail") pce_codes = [] for rowindex in range(sheet.nrows): row = sheet.row_values(rowindex) if len(row) == 13 and naics_pat.match(row[1]): pce_desc = row[0] # we don't need the distinction between households and # nonprofit institutions service households parts = pce_desc.split('-') if len(parts) > 1: lastpart = parts[-1].strip() if lastpart == 'HH' or lastpart == 'NPISH': pce_desc = '-'.join(parts[:-1]) pce_desc = pce_desc.strip() if pce_desc in pce_codes: pce_code = pce_codes.index(pce_desc) else: pce_code = len(pce_codes) pce_codes.append(pce_desc) tracker.insert_code_row(str(pce_code), pce_desc) values = { "prod_val": row[3], "rail_margin": row[4], "truck_margin": row[5], "water_margin": row[6], "air_margin": row[7], "pipe_margin": row[8], "gaspipe_margin": row[9], "wholesale_margin": row[10], "retail_margin": row[11], "purchase_val": row[12], } tracker.insert_row(str(pce_code), row[1], values, 1000) tracker.flush_codes()
def dopostparse(): for year in config.STUDY_YEARS: tablename = config.IO_SCHEMA + ".eia_code_map_" + str(year) codetable = config.IO_SCHEMA + ".codes_" + str(year) db.execute("DROP TABLE IF EXISTS %s CASCADE" % tablename) db.execute(""" CREATE TABLE %s ( io_sector char(6), eia_source char(2), eia_sector char(2) );""" % tablename) if year <= 1992: conditions = eia.sector_naics_map_old else: conditions = eia.sector_naics_map for source in eia.sources: # AC prices for natural gas are zero for years prior to 1990 # but except for petroleum, only the use_btu column is looked at # so this mapping still works for NG/AC before 1990. cases = [ "WHEN %s THEN '%s'" % (conditions[sector], eia.valid_sectors_by_source[source][sector]) for sector in ("ind", "res", "elec", "com", "trans") ] db.execute(""" INSERT INTO %s SELECT code, '%s', CASE %s ELSE 'IC' END FROM %s; """ % (tablename, source, "\n ".join(cases), codetable)) # create views checktotal_parts = [] checksector_parts = [] strings = { "iotable": "%s.transact_view_%d" % (config.IO_SCHEMA, year), "eiatable": "%s.seds_us_%d" % (config.EIA_SCHEMA, year), "maptable": "%s.eia_code_map_%d" % (config.IO_SCHEMA, year), } for (eia_source, io_sectors) in eia.source_naics_map.items(): if eia_source not in eia.sources: # make sure we don't create tables for modified sources continue strings["io_view"] = "io_%s_%d" % (eia_source.lower(), year) strings["from_sector"] = io_sectors[year] strings["eia_source"] = eia_source runsql(""" CREATE OR REPLACE VIEW %(io_view)s AS SELECT io.to_sector, energy.sector, io.fob, io.cif, energy.price * (cast(io.fob as float) / cast(io.cif as float)) as price FROM %(iotable)s io, %(maptable)s codes, %(eiatable)s energy WHERE io.from_sector = '%(from_sector)s' AND energy.source = '%(eia_source)s' AND codes.eia_source = '%(eia_source)s' AND io.to_sector = codes.io_sector AND energy.sector = codes.eia_sector; """ % strings) checktotal_parts.append(""" SELECT eia.source, CAST(eia.ex as int) AS eia_ex, SUM(io.cif) AS io_ex, CAST(eia.use_btu as int) AS eia_btu, CAST(SUM(io.fob / io.price) as int) AS io_btu FROM %(eiatable)s eia, %(io_view)s io WHERE eia.source = '%(eia_source)s' AND eia.sector = 'TC' GROUP BY eia.source, eia.ex, eia.use_btu """ % strings) checksector_parts.append(""" SELECT eia.source, eia.sector, CAST(eia.ex as int) AS eia_ex, SUM(io.cif) AS io_ex, CAST(eia.use_btu as int) AS eia_btu, CAST(SUM(io.fob / io.price) as int) AS io_btu FROM %(eiatable)s eia, %(io_view)s io WHERE eia.source = '%(eia_source)s' AND eia.sector = io.sector AND eia.sector <> 'TC' GROUP BY eia.source, eia.sector, eia.price, eia.ex, eia.use_btu """ % strings) checktotal_view = "%s.eia_io_totals_%d" % (common.config.TEST_SCHEMA, year) checksector_view = "%s.eia_io_sectors_%d" % (common.config.TEST_SCHEMA, year) runsql("CREATE OR REPLACE VIEW %s AS %s" % \ (checktotal_view, "UNION".join(checktotal_parts))) runsql("CREATE OR REPLACE VIEW %s AS %s" % \ (checksector_view, "UNION".join(checksector_parts)))
def create_views(): va_sectors = set(config.va_sectors.values()) fd_sectors = set(config.fd_sectors.values()) for year in config.STUDY_YEARS: strings = { "test_schema": common.config.TEST_SCHEMA, "schema": config.WIOD_SCHEMA, "extra_schema": "wiod_plus", "year": year, "fd_sectors": sqlhelper.set_repr(fd_sectors), "va_sectors": sqlhelper.set_repr(va_sectors), "margins": sqlhelper.set_repr(config.margin_sectors) } ### indbyind tables ignoring imports db.execute( """CREATE OR REPLACE VIEW %(schema)s.indbyind_%(year)d AS SELECT country, from_ind, to_ind, value FROM %(schema)s.niot_%(year)d WHERE NOT is_import UNION SELECT country, from_ind, 'IMP', sum(value) FROM %(schema)s.niot_%(year)d WHERE is_import GROUP BY country, from_ind""" % strings) #continue # co2 intensity views # put in test since we're just checking results sql = """CREATE OR REPLACE VIEW %(test_schema)s.co2_intensity_%(year)d AS SELECT a.country, CAST(a.gdp as int) gdp, CAST(b.emissions as int) emissions, b.emissions / a.gdp AS intensity FROM (SELECT country, sum(value) AS gdp FROM %(schema)s.indbyind_%(year)d WHERE from_ind not in %(va_sectors)s AND to_ind in %(fd_sectors)s GROUP BY country) a, (SELECT country, value AS emissions FROM %(schema)s.env_%(year)d where industry = 'total' AND measurement = 'CO2') b WHERE a.country = b.country ORDER BY country""" % strings db.execute(sql) # commodity output proportions tables for all countries sql = """CREATE OR REPLACE VIEW %(schema)s.comshare_%(year)d AS SELECT make.country, make.commodity, make.industry, make.value / totals.value AS use_share FROM (SELECT country, commodity, industry, value FROM wiod.int_make_%(year)d WHERE commodity not in %(va_sectors)s AND industry not in %(margins)s) make, (SELECT country, commodity, sum(value) as value FROM wiod.int_make_%(year)d WHERE commodity not in %(va_sectors)s AND industry not in %(margins)s GROUP BY country, commodity) totals WHERE make.country = totals.country AND make.commodity = totals.commodity""" % strings db.execute(sql) for country in config.countries: strings["country"] = country.lower() table = "%(extra_schema)s.%(country)s_io_import_%(year)d" % strings strings["io_import_table"] = table sql = "DROP TABLE IF EXISTS %(io_import_table)s" % strings db.execute(sql) sql = """SELECT comshare.country, comshare.industry AS from_sector, use.industry AS to_sector, sum(use.value * comshare.use_share) AS value INTO %(io_import_table)s FROM %(schema)s.comshare_%(year)d comshare, (SELECT from_country, industry, commodity, value FROM %(schema)s.int_use_%(year)d WHERE to_country = $1 AND from_country <> $1) use WHERE comshare.country = use.from_country AND comshare.commodity = use.commodity GROUP BY comshare.country, comshare.industry, use.industry""" % strings print(sql) stmt = db.prepare(sql) stmt(country)
def parse_nipa_data(): test_view = "%s.nipa_groups" % common.config.TEST_SCHEMA db.execute("DROP VIEW IF EXISTS %s" % test_view) # get table for pce category harmonization trailing_pat = re.compile('(.+) \(.*\d.*\)$') nipa_code_map = {} filename = fileutils.getdatapath("nipa_code_map.csv", "usa") fh = open(filename) csvf = csv.reader(fh) for row in csvf: if len(row) == 2: harmonized = row[0] trailing = trailing_pat.match(harmonized) if trailing: harmonized = trailing.group(1) nipa_code_map[row[1]] = harmonized fh.close() # get nipa series codes from underlying detail tables tracker = TableStateTracker() tracker.create_table("%s.pce_codes" % config.NIPA_SCHEMA, ["code", "parent", "description"], ["char(7)", "char(7)", "text"], True) number_pat = re.compile('^\d+$') trailing_pat = re.compile('(.+) \(.*\d.*\)$') filename = fileutils.getcache("Section2All_underlying.csv", "bea", "nipa") fh = open(filename) csvf = csv.reader(fh) is_in_table = False code_stack = [None] indent_stack = [-1] # the code mapping has been done such that each item is at least at # three levels of disaggregation below the top, i.e. there is always # an ancestor at the second level. we only want to keep track of the # ancestor at the third level (root is zero) # the first level below root has goods and services # the second level has durable goods, nondurable goods, and services. reverse_code_dict = {} second_level_nodes = [] for row in csvf: if len(row): if not is_in_table: if row[0].startswith("Table 2.4.5U"): is_in_table = True else: if row[0].startswith("Table 2.4.5U"): # we only need to go through one instance of this table break else: if number_pat.match(row[0]) and len(row) > 2: title = row[1].lstrip() # these are duplicate codes if title.startswith("Market-based PCE"): continue code = row[2] current_indent = len(row[1]) - len(title) while current_indent <= indent_stack[-1]: indent_stack.pop() code_stack.pop() indent_stack.append(current_indent) code_stack.append(code) if len(code_stack) > 1: parent = code_stack[-2] else: parent = None title = title.strip() trailing = trailing_pat.match(title) if trailing: title = trailing.group(1) if len(code_stack) > 4: reverse_code_dict[title] = code_stack[3] else: reverse_code_dict[title] = code tracker.insert_row((code, parent, title)) tracker.flush() fh.close() # table for price deflators tracker.create_table("%s.implicit_price_deflators" % config.NIPA_SCHEMA, ["year", "gdp", "pce"], ["int", "float", "float"]) filename = fileutils.getcache("Section1all_csv.csv", "bea/nipa") fh = open(filename) csvf = csv.reader(fh) is_in_table = False data = {} # we need to parse two rows before we can populate years = {} for row in csvf: if len(row): if not is_in_table: if row[0].startswith("Table 1.1.9"): is_in_table = True else: if row[0].startswith("Table 1.1.9"): # this is seasonally adjusted version of the same table break else: if row[0] == "Line": for i in range(len(row)): if number_pat.match(row[i]): year = int(row[i]) years[year] = i data[year] = {} elif number_pat.match(row[0]) and len(row) > 2: title = row[1].lstrip() if title == "Gross domestic product": column = "gdp" elif title == "Personal consumption expenditures": column = "pce" else: continue for (year, colindex) in years.items(): data[year][column] = float(row[colindex]) for (year, results) in data.items(): tracker.insert_row([year, results["gdp"], results["pce"]]) tracker.flush() fh.close() # parse pce bridge class IONIPAStateTracker(TableStateTracker): def flush(self): TableStateTracker.flush(self) if self.fh is not None and not self.fh.closed: self.fh.close() def __init__(self): TableStateTracker.__init__(self) self.fh = None self.code_dict = None self.value_columns = [ "prod_val", "rail_margin", "truck_margin", "water_margin", "air_margin", "pipe_margin", "gaspipe_margin", "wholesale_margin", "retail_margin", "purchase_val" ] self.old_style_field_map = { "Producers' Value": "prod_val", "MfgExciseTax": "prod_val", "RailMargin": "rail_margin", "TruckMargin": "truck_margin", "WaterMargin": "water_margin", "AirMargin": "air_margin", "PipeMargin": "pipe_margin", "WholesaleMargin": "wholesale_margin", "WholesaleTax": "wholesale_margin", "RetailMargin": "retail_margin", "RetailSalesTax": "retail_margin", "OtherRetailTax": "retail_margin", "Purchasers' Value": "purchase_val", } def set_filename(self, filename): path = fileutils.getcache(filename, str(self.year)) self.filename = path def set_year(self, year): self.flush() self.year = year tablename = "%s.pcebridge_%d" % (config.IO_SCHEMA, year) fields = ["pce_code", "commodity"] + self.value_columns types = ["varchar(6)", "varchar(6)"] + \ ["bigint"]*len(self.value_columns) self.create_table(tablename, fields, types) def setup_for_codes(self): self.code_dict = {} def flush_codes(self): if self.code_dict is not None: tablename = "%s.nipa_codes_%d" % (config.IO_SCHEMA, self.year) self.create_table(tablename, ["pce_code", "nipa_group", "description"], ["varchar(6)", "char(7)", "text"]) for (code, raw_desc) in self.code_dict.items(): desc = raw_desc if desc.endswith('(s.)') or desc.endswith('(d.)'): desc = desc[:-4].strip() elif desc.endswith('(n.d.)'): desc = desc[:-6].strip() if desc in nipa_code_map: desc = nipa_code_map[desc] if desc in reverse_code_dict: nipa_code = reverse_code_dict[desc] else: nipa_code = None #self.current_stmt(code, nipa_code, raw_desc) self.table.insert([code, nipa_code, raw_desc]) self.code_dict = None self.flush() def insert_code_row(self, code, desc): # workaround for the way excel interprets numbers as floats # when we know the codes should be strings if type(code) is float: code = int(code) self.code_dict[str(code)] = desc.strip() def insert_row(self, pce_code, commod, dollar_values, factor=1): # workaround for the way excel interprets numbers as floats # when we know the codes should be strings if type(pce_code) is float: pce_code = int(pce_code) values = [str(pce_code).strip(), commod.strip()] for column in self.value_columns: if column in dollar_values: if factor == 1: values.append(dollar_values[column]) else: values.append( int(float(dollar_values[column]) * factor)) else: values.append(None) #self.current_stmt(*values) self.table.insert(values) def parse_old_style_xls(self, year): self.set_year(year) self.set_filename("%d_PCE_Commodity.xls" % self.year) wb = xlrd.open_workbook(self.filename) # parse pce bridge data sheet = wb.sheet_by_name("%d PCE Workfile - Commodity" % self.year) field_indexes = {} pce_code_idx = 0 commod_idx = 2 for rowindex in range(sheet.nrows): row = sheet.row_values(rowindex) if len(row) > 1: if "PCE Category" in row: pce_code_idx = row.index("PCE Category") if "Commodity" in row: commod_idx = row.index("Commodity") for i in range(len(row)): xls_col = row[i] if xls_col in self.old_style_field_map: colname = self.old_style_field_map[xls_col] if colname not in field_indexes: field_indexes[colname] = [] field_indexes[colname].append(i) elif len(field_indexes): pce_code = row[pce_code_idx] commod = str(int(row[commod_idx])).rjust(6, "0") values = {} for (field, columns) in field_indexes.items(): # doclumentation says units are in 100,000 dollars # but the orders of magnitude don't match up with # later years if we use 100 components = [ int(float(row[column] * 1000)) for column in columns ] value = 0 for component in components: value += component values[field] = value self.insert_row(pce_code, commod, values) # parse codes from neighboring worksheet self.setup_for_codes() sheet = wb.sheet_by_name("%d PCE Category Descriptions" % self.year) code_idx = None desc_idx = None for rowindex in range(sheet.nrows): row = sheet.row_values(rowindex) if len(row) > 1: codetab = "PCE Category Code" codetab2 = "%s - %d" % (codetab, self.year) if codetab in row or codetab2 in row: if codetab in row: code_idx = row.index(codetab) else: code_idx = row.index(codetab2) desctab = "PCE Category Description - %d" % self.year if desctab in row: desc_idx = row.index(desctab) else: desctab = "PCE Category Description" if desctab in row: desc_idx = row.index(desctab) elif code_idx is not None and desc_idx is not None: code = row[code_idx] desc = str(row[desc_idx]) self.insert_code_row(code, desc) self.flush_codes() def get_file_handle(self, filetype, options={}): if filetype == "txt": self.fh = open(self.filename) return self.fh elif filetype == "csv": self.fh = open(self.filename) if "delim" in options: csvf = csv.reader(self.fh, delimiter=options["delim"]) else: csvf = csv.reader(self.fh) return csvf elif filetype == "xls": wb = xlrd.open_workbook(self.filename) return wb def parse_text(self, rowcallback): path = fileutils.getcache(filename, str(self.year)) f = open(path) for line in f: rowcallback(line, this) f.close() tracker = IONIPAStateTracker() tracker.parse_old_style_xls(1967) tracker.parse_old_style_xls(1972) tracker.parse_old_style_xls(1977) tracker.parse_old_style_xls(1982) tracker.set_year(1987) tracker.set_filename("tbld-87.dat") fh = tracker.get_file_handle("txt") for line in fh: if len(line) < 103: continue commod = line[0:6] pce_code = line[14:18] values = { "prod_val": line[21:30], "rail_margin": line[30:39], "truck_margin": line[39:48], "water_margin": line[48:57], "air_margin": line[57:66], "pipe_margin": line[66:75], "wholesale_margin": line[75:84], "retail_margin": line[84:93], "purchase_val": line[93:102], } tracker.insert_row(pce_code, commod, values, 1000) tracker.setup_for_codes() tracker.set_filename("io-nipa.doc") fh = tracker.get_file_handle("txt") for line in fh: if len(line) < 27: continue code = line[0:4].strip() desc = line[26:].strip() tracker.insert_code_row(code, desc) tracker.flush_codes() tracker.set_year(1992) tracker.set_filename("TabD.txt") fh = tracker.get_file_handle("csv", {"delim": "\t"}) for row in fh: values = { "prod_val": row[4], "rail_margin": row[5], "truck_margin": row[6], "water_margin": row[7], "air_margin": row[8], "pipe_margin": row[9], "gaspipe_margin": row[10], "wholesale_margin": row[11], "retail_margin": row[12], "purchase_val": row[13], } tracker.insert_row(row[2], row[0], values, 1000) tracker.setup_for_codes() tracker.set_filename("IO-NIPA.txt") fh = tracker.get_file_handle("csv", {"delim": "\t"}) for row in fh: code = row[0] desc = row[4] tracker.insert_code_row(code, desc) tracker.flush_codes() tracker.set_year(1997) tracker.set_filename("AppendixC_Detail.txt") fh = tracker.get_file_handle("csv", {"delim": ","}) for row in fh: values = { "prod_val": row[3], "rail_margin": row[4], "truck_margin": row[5], "water_margin": row[6], "air_margin": row[7], "pipe_margin": row[8], "gaspipe_margin": row[9], "wholesale_margin": row[10], "retail_margin": row[11], "purchase_val": row[12], } tracker.insert_row(row[1], row[0], values, 1000) tracker.setup_for_codes() tracker.set_filename("IO-NIPA_PCE.txt") fh = tracker.get_file_handle("csv", {"delim": ","}) for row in fh: code = row[1] desc = row[2] tracker.insert_code_row(code, desc) tracker.flush_codes() tracker.set_year(2002) tracker.setup_for_codes() # do this simultaneously since it's all one file tracker.set_filename("2002_PCE_Bridge.xls") wb = tracker.get_file_handle("xls") naics_pat = re.compile('[A-Z0-9]{6}') sheet = wb.sheet_by_name("PCE_Bridge_Detail") pce_codes = [] for rowindex in range(sheet.nrows): row = sheet.row_values(rowindex) if len(row) == 13 and naics_pat.match(row[1]): pce_desc = row[0] # we don't need the distinction between households and # nonprofit institutions service households parts = pce_desc.split('-') if len(parts) > 1: lastpart = parts[-1].strip() if lastpart == 'HH' or lastpart == 'NPISH': pce_desc = '-'.join(parts[:-1]) pce_desc = pce_desc.strip() if pce_desc in pce_codes: pce_code = pce_codes.index(pce_desc) else: pce_code = len(pce_codes) pce_codes.append(pce_desc) tracker.insert_code_row(str(pce_code), pce_desc) values = { "prod_val": row[3], "rail_margin": row[4], "truck_margin": row[5], "water_margin": row[6], "air_margin": row[7], "pipe_margin": row[8], "gaspipe_margin": row[9], "wholesale_margin": row[10], "retail_margin": row[11], "purchase_val": row[12], } tracker.insert_row(str(pce_code), row[1], values, 1000) tracker.flush_codes()
def dopostparse(): for year in config.STUDY_YEARS: tablename = config.IO_SCHEMA + ".eia_code_map_" + str(year) codetable = config.IO_SCHEMA + ".codes_" + str(year) db.execute("DROP TABLE IF EXISTS %s CASCADE" % tablename) db.execute(""" CREATE TABLE %s ( io_sector char(6), eia_source char(2), eia_sector char(2) );""" % tablename) if year <= 1992: conditions = eia.sector_naics_map_old else: conditions = eia.sector_naics_map for source in eia.sources: # AC prices for natural gas are zero for years prior to 1990 # but except for petroleum, only the use_btu column is looked at # so this mapping still works for NG/AC before 1990. cases = ["WHEN %s THEN '%s'" % (conditions[sector], eia.valid_sectors_by_source[source][sector]) for sector in ("ind", "res", "elec", "com", "trans")] db.execute(""" INSERT INTO %s SELECT code, '%s', CASE %s ELSE 'IC' END FROM %s; """ % (tablename, source, "\n ".join(cases), codetable)) # create views checktotal_parts = [] checksector_parts = [] strings = { "iotable": "%s.transact_view_%d" % (config.IO_SCHEMA, year), "eiatable": "%s.seds_us_%d" % (config.EIA_SCHEMA, year), "maptable": "%s.eia_code_map_%d" % (config.IO_SCHEMA, year), } for (eia_source, io_sectors) in eia.source_naics_map.items(): if eia_source not in eia.sources: # make sure we don't create tables for modified sources continue strings["io_view"] = "io_%s_%d" % (eia_source.lower(), year) strings["from_sector"] = io_sectors[year] strings["eia_source"] = eia_source runsql(""" CREATE OR REPLACE VIEW %(io_view)s AS SELECT io.to_sector, energy.sector, io.fob, io.cif, energy.price * (cast(io.fob as float) / cast(io.cif as float)) as price FROM %(iotable)s io, %(maptable)s codes, %(eiatable)s energy WHERE io.from_sector = '%(from_sector)s' AND energy.source = '%(eia_source)s' AND codes.eia_source = '%(eia_source)s' AND io.to_sector = codes.io_sector AND energy.sector = codes.eia_sector; """ % strings) checktotal_parts.append(""" SELECT eia.source, CAST(eia.ex as int) AS eia_ex, SUM(io.cif) AS io_ex, CAST(eia.use_btu as int) AS eia_btu, CAST(SUM(io.fob / io.price) as int) AS io_btu FROM %(eiatable)s eia, %(io_view)s io WHERE eia.source = '%(eia_source)s' AND eia.sector = 'TC' GROUP BY eia.source, eia.ex, eia.use_btu """ % strings) checksector_parts.append(""" SELECT eia.source, eia.sector, CAST(eia.ex as int) AS eia_ex, SUM(io.cif) AS io_ex, CAST(eia.use_btu as int) AS eia_btu, CAST(SUM(io.fob / io.price) as int) AS io_btu FROM %(eiatable)s eia, %(io_view)s io WHERE eia.source = '%(eia_source)s' AND eia.sector = io.sector AND eia.sector <> 'TC' GROUP BY eia.source, eia.sector, eia.price, eia.ex, eia.use_btu """ % strings) checktotal_view = "%s.eia_io_totals_%d" % (common.config.TEST_SCHEMA, year) checksector_view = "%s.eia_io_sectors_%d" % (common.config.TEST_SCHEMA, year) runsql("CREATE OR REPLACE VIEW %s AS %s" % \ (checktotal_view, "UNION".join(checktotal_parts))) runsql("CREATE OR REPLACE VIEW %s AS %s" % \ (checksector_view, "UNION".join(checksector_parts)))
def parse_io(): io_files = { 1996: "410281134571.xls", 1999: "4102715414971.xls", 2001: "4122111363671.xls", 2004: "611239581071.xls", 2006: "9121414285971.xls", 2007: "1139203871.xls", 2008: "1139204871.xls", 2009: "11229101502.xls", 2010: "1122910141371.xls", } for (year, filename) in io_files.items(): tablename = "%s.io_%d" % (config.SCHEMA, year) # millions are in NTD table = SQLTable(tablename, ["from_sector", "to_sector", "millions"], ["varchar(255)", "varchar(255)", "float"]) table.create() table.truncate() path = fileutils.getcache(filename, "tw/%d" % year) wb = xlrd.open_workbook(path) sheet = wb.sheets()[0] to_codes = sheet.row_values(0) to_names = sheet.row_values(1) for rowindex in range(2, sheet.nrows): row = sheet.row_values(rowindex) from_code = row[0].strip() from_name = row[1].strip() for i in range(2, len(to_names)): to_name = to_names[i].strip() value = row[i] table.insert([from_name, to_name, value]) if year == 2010: strings = { "viewname": "%s.io_view_%d" % (config.SCHEMA, year), "tablename": tablename, "maptable": "%s.sector_map_%d" % (config.SCHEMA, year), "to_blacklist": sqlhelper.set_repr(config.to_blacklists[year]), "from_blacklist": sqlhelper.set_repr(config.from_blacklists[year]), } sql = """CREATE OR REPLACE VIEW %(viewname)s AS SELECT from_map.io_sector AS from_sector, to_map.io_sector as to_sector, sum(millions) as millions FROM %(tablename)s io, (SELECT DISTINCT io_sector, io_commod FROM %(maptable)s) from_map, (SELECT DISTINCT io_sector, io_ind FROM %(maptable)s) to_map WHERE io.to_sector NOT IN %(to_blacklist)s AND io.from_sector NOT IN %(from_blacklist)s AND from_map.io_commod = io.from_sector AND to_map.io_ind = io.to_sector GROUP BY from_map.io_sector, to_map.io_sector""" % strings print(sql) db.execute(sql)
def create_hybrid_tables(): for year in config.STUDY_YEARS: strings = { # table names "eia_table": "%s.seds_us_%d" % (config.EIA_SCHEMA, year), "io_table": "%s.transact_view_%d" % (config.IO_SCHEMA, year), "map_table": "%s.eia_code_map_%d" % (config.IO_SCHEMA, year), "hybrid_table": "hybrid_transactions_%d" % year, # selected sector codes "pa_trans_code": eia.source_naics_map["PA-trans"][year], "pa_nontrans_code": eia.source_naics_map["PA-nontrans"][year], } runsql("DROP TABLE IF EXISTS %s CASCADE" % strings["hybrid_table"]) runsql( """ CREATE TABLE %(hybrid_table)s ( from_sector varchar(6), to_sector varchar(6), expenditure float )""" % strings ) for source in eia.sources: strings["shares_view"] = "%s_ex_proportions_%s" % (source.lower(), year) strings["source"] = source strings["source_naics"] = eia.source_naics_map[source][year] # seds prices are c.i.f., but when allocating btu among user # industries it is unfair to assign based on c.i.f. since what # they end up using is f.o.b. subquery = ( """ SELECT codes.eia_sector, sum(io.fob) as fob FROM %(io_table)s io, %(map_table)s codes WHERE codes.eia_source = '%(source)s' AND io.from_sector = '%(source_naics)s' AND io.to_sector = codes.io_sector GROUP BY codes.eia_sector""" % strings ) strings["subquery"] = subquery # the price each industry ends up actually paying for energy # should be the f.o.b. price which is (fob / cif) of the seds price runsql( """ CREATE OR REPLACE VIEW %(shares_view)s AS SELECT io.to_sector, codes.eia_sector, cast(io.fob as float) / cast(io.cif as float) as fob_share, cast(io.fob as float) / cast(totals.fob as float) as ex_share FROM %(io_table)s io, %(map_table)s codes, (%(subquery)s) totals WHERE codes.eia_source = '%(source)s' AND io.from_sector = '%(source_naics)s' AND io.to_sector = codes.io_sector AND totals.eia_sector = codes.eia_sector""" % strings ) # split petroleum if source == "PA": strings["aviation_pa"] = sqlhelper.set_repr(eia.aviation_petroleum) strings["other_pa"] = sqlhelper.set_repr(eia.other_petroleum) strings["all_pa"] = sqlhelper.set_repr(eia.aviation_petroleum + eia.other_petroleum) strings["pa_nontrans_view"] = "pa_nontrans_%d" % year strings["pa_trans_view"] = "pa_trans_%d" % year strings["pa_trans_shares_view"] = "pa_trans_proportions_%d" % year strings["aviation_code"] = eia.air_transportation_codes[year] # non transportation petroleum use runsql( """ CREATE OR REPLACE VIEW %(pa_nontrans_view)s AS SELECT shares.to_sector, shares.eia_sector, sum(shares.ex_share * eia.use_btu) as btu, sum(shares.ex_share * eia.use_btu * eia.price * shares.fob_share) as ex FROM %(shares_view)s shares, %(eia_table)s eia WHERE eia.source in %(all_pa)s AND shares.eia_sector = eia.sector -- these two below are double counted AND eia.source || eia.sector not in ('DFEI', 'PCIC') AND eia.sector <> 'AC' GROUP BY shares.to_sector, shares.eia_sector""" % strings ) # petroleum use for transportation other than air runsql( """ CREATE OR REPLACE VIEW %(pa_trans_view)s AS SELECT io.to_sector, io.fob, io.fob - nontrans.ex as remaining FROM %(io_table)s io, %(pa_nontrans_view)s nontrans WHERE io.from_sector = '%(source_naics)s' AND io.to_sector = nontrans.to_sector -- remaining is negative for IC and EI AND nontrans.eia_sector in ('CC', 'RC') UNION SELECT io.to_sector, io.fob, cast(io.fob as float) as remaining FROM %(io_table)s io, %(map_table)s codes WHERE io.from_sector = '%(source_naics)s' AND io.to_sector = codes.io_sector AND codes.eia_source = 'PA' AND codes.eia_sector = 'AC' AND io.to_sector <> '%(aviation_code)s' """ % strings ) # proportions for petroleum allocated to transportation runsql( """ CREATE OR REPLACE VIEW %(pa_trans_shares_view)s AS SELECT use.to_sector, use.remaining / total.total as ex_share FROM %(pa_trans_view)s use, (SELECT sum(remaining) as total FROM %(pa_trans_view)s) total""" % strings ) # allocate all of JF and AV to air transportation runsql( """ INSERT INTO %(hybrid_table)s SELECT '%(pa_trans_code)s', io.to_sector, sum(eia.use_btu) FROM %(io_table)s io, %(eia_table)s eia WHERE eia.source in %(aviation_pa)s and eia.sector = 'AC' and io.from_sector = '%(source_naics)s' and io.to_sector = '%(aviation_code)s' GROUP BY io.to_sector """ % strings ) # allocate all other transportation runsql( """ INSERT INTO %(hybrid_table)s SELECT '%(pa_trans_code)s', shares.to_sector, sum(shares.ex_share * eia.use_btu) FROM %(pa_trans_shares_view)s shares, %(eia_table)s eia WHERE eia.source in %(other_pa)s AND eia.sector = 'AC' GROUP BY shares.to_sector""" % strings ) # allocate non-transportation petroleum use runsql( """ INSERT INTO %(hybrid_table)s SELECT '%(pa_nontrans_code)s', to_sector, btu FROM %(pa_nontrans_view)s""" % strings ) # WHERE eia_sector in ('IC', 'EI')""" # dependencies in reverse order runsql("DROP VIEW %s" % strings["pa_trans_shares_view"]) runsql("DROP VIEW %s" % strings["pa_trans_view"]) runsql("DROP VIEW %s" % strings["pa_nontrans_view"]) runsql("DROP VIEW %s" % strings["shares_view"]) else: runsql( """ INSERT INTO %(hybrid_table)s SELECT '%(source_naics)s', shares.to_sector, shares.ex_share * eia.use_btu FROM %(shares_view)s shares, %(eia_table)s eia WHERE eia.source = '%(source)s' AND shares.eia_sector = eia.sector""" % strings ) runsql("DROP VIEW %s" % strings["shares_view"]) # insert remainder of standard io table energy_sectors = [] for source in eia.sources: energy_sectors.append(eia.source_naics_map[source][year]) strings["sectors"] = ", ".join(["'%s'" % s for s in energy_sectors]) db.execute( """ INSERT INTO %(hybrid_table)s SELECT from_sector, to_sector, fob FROM %(io_table)s WHERE from_sector not in (%(sectors)s)""" % strings ) # split petroleum column proportional to trans and nontrans uses stmt = db.prepare( """ SELECT trans.use_btu / total.use_btu as trans_share FROM (SELECT use_btu FROM %(eia_table)s WHERE source = 'PA' AND sector = 'AC') trans, (SELECT use_btu FROM %(eia_table)s WHERE source = 'PA' AND sector = 'TC') total""" % strings ) result = stmt() if len(result) and len(result[0]): strings["pa_naics"] = eia.source_naics_map["PA"][year] strings["trans_share"] = result[0][0] strings["nontrans_share"] = 1 - result[0][0] # transportation petroleum use column runsql( """ INSERT INTO %(hybrid_table)s SELECT from_sector, '%(pa_trans_code)s', %(trans_share).4f * expenditure FROM %(hybrid_table)s WHERE to_sector = '%(pa_naics)s' """ % strings ) # non-transportation petroleum use column runsql( """ UPDATE %(hybrid_table)s SET expenditure = %(nontrans_share).4f * expenditure, to_sector = '%(pa_nontrans_code)s' WHERE to_sector = '%(pa_naics)s' """ % strings )
def runsql(self, sql): if common.config.DEBUG_MODE: print(sql) db.execute(sql)