def perform_computation(self, df, ameco_df): uvgdh, uvgdh_1, knp = 'UVGDH', 'UVGDH.1.0.0.0', 'KNP.1.0.212.0' series_meta = self.get_meta(uvgdh) splicer = Splicer() try: series_data = self.get_data(ameco_df, uvgdh_1) series_data = splicer.ratio_splice(series_data, self.get_data(df, uvgdh_1), type='forward') except KeyError: series_data = self.get_data(df, uvgdh) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(knp) series_data = self.get_data(ameco_df, knp) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=6, country=self.country) return self.result
def _sum_and_splice(self, addends, df, ameco_h_df, splice=True): splicer = Splicer() for variable, sources in addends.items(): series_meta = self.get_meta(variable) expected_scale = series_meta.get('Scale') try: base_series = self.get_data(ameco_h_df, variable) except KeyError: base_series = None splice_series = pd.Series() for source in sources: factor = 1 if source.startswith('-'): source = source[1:] factor = -1 src_scale = self.get_scale(source, dataframe=df) expected_scale = self.get_scale(variable) if src_scale != expected_scale: factor = factor * pow(1000, self.codes[src_scale] - self.codes[expected_scale]) try: source_data = factor * self.get_data(df, source) except KeyError: source_data = factor * self.get_data(self.result, source) splice_series = splice_series.add(source_data, fill_value=0) if base_series is None or splice is False: series_data = splice_series else: series_data = splicer.butt_splice(base_series, splice_series, kind='forward') if self.country == 'JP' and variable in ['UUTG.1.0.0.0', 'URTG.1.0.0.0']: if variable == 'URTG.1.0.0.0': new_sources = ['UUTG.1.0.0.0', 'UBLG.1.0.0.0'] splice_series = self.get_data( self.result, new_sources[0]) + self.get_data( self.result, new_sources[1] ) series_data = splicer.ratio_splice(base_series, splice_series, kind='forward') series_data = series_data series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True)
def perform_computation(self, df, ameco_df): operators = Operators() splicer = Splicer() variables = ['FETD9.1.0.0.0', 'FWTD9.1.0.0.0'] if self.country in FCRIF: try: fetd9 = self.get_data(df, 'FETD.1.0.0.0') fwtd9 = self.get_data(df, 'FWTD.1.0.0.0') except KeyError: fetd9 = self.get_data(df, 'NETD.1.0.0.0') fwtd9 = self.get_data(df, 'NWTD.1.0.0.0') series_meta = self.get_meta(variables[0]) series_data = fetd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(variables[1]) series_data = fwtd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) else: series_meta = self.get_meta(variables[0]) if self.country == 'US': fetd9 = self.get_data(df, 'NETD.1.0.0.0') fwtd9 = self.get_data(df, 'NWTD.1.0.0.0') else: fetd9 = splicer.ratio_splice(self.get_data( ameco_df, variables[0]), self.get_data(df, 'NETD'), kind='forward') fwtd9 = splicer.ratio_splice(self.get_data( ameco_df, variables[0]), self.get_data(df, 'NWTD'), kind='forward') series_data = fetd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(variables[1]) series_data = fwtd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = ['UWCD', 'UWWD', 'UWSC'] variables_1 = [variable + '.1.0.0.0' for variable in variables] variables_h1 = [ re.sub('^U', 'H', variable) + 'W.1.0.0.0' for variable in variables ] compensation = 'FWTD9.1.0.0.0' private_consumption_u = 'UCPH.1.0.0.0' private_consumption_o = 'OCPH.1.0.0.0' variables_r1 = [ re.sub('^U', 'R', variable) + 'C.3.1.0.0' for variable in variables ] services = ['UMSN', 'UXSN', 'UMSN.1.0.0.0', 'UXSN.1.0.0.0'] for index, variable in enumerate(variables): series_meta = self.get_meta(variables_h1[index]) series_data = self.get_data(df, variables_1[index]) / fwtd9 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(variables_r1[index]) series_data = operators.rebase( self.get_data(df, variables_1[index]) / fwtd9 / self.get_data(df, private_consumption_u) / self.get_data(df, private_consumption_o), base_period=BASE_PERIOD) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = [ 'RVGDE.1.0.0.0', 'RVGEW.1.0.0.0', 'RVGEW.1.0.0.0', 'ZATN9.1.0.0.0', 'ZETN9.1.0.0.0', 'ZUTN9.1.0.0.0' ] numerators = [ 'OVGD.1.0.0.0', 'OVGE.1.0.0.0', 'OVGD.1.0.0.0', 'NLTN.1.0.0.0', 'NETN.1.0.0.0', 'NUTN.1.0.0.0' ] denominators = [ 'FETD9.1.0.0.0', 'FETD9.1.0.0.0', 'NETD.1.0.0.0', 'NPAN1.1.0.0.0', 'NPAN1.1.0.0.0', 'NLTN.1.0.0.0' ] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) if denominators[index] == 'FETD9.1.0.0.0': denominator_series = fetd9 else: denominator_series = self.get_data(df, denominators[index]) series_data = self.get_data(df, numerators[index]) / denominator_series if variable in ['ZATN9.1.0.0.0', 'ZETN9.1.0.0.0', 'ZUTN9.1.0.0.0']: series_data = series_data * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'FETD9.6.0.0.0' series_meta = self.get_meta(variable) series_data = fetd9.pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'ZUTN.1.0.0.0' if self.country in EU: # ZUTN based on NUTN.1.0.0.0 and NETN.1.0.0.0 (18/01/2017) is commented out in FDMS+ last_observation = self.get_data(ameco_df, variable).last_valid_index() series_meta = self.get_meta(variable) series_data = round( self.get_data(df, 'NUTN') / (self.get_data(df, 'NUTN') + self.get_data(df, 'NETN')) * 100, 1) + round( self.get_data(ameco_df, 'NUTN.1.0.0.0')[last_observation] - self.get_data(df, 'NUTN') / (self.get_data(df, 'NUTN')[last_observation] + self.get_data(df, 'NETN')[last_observation]), 1) series_data = splicer.butt_splice( self.get_data(ameco_df, variable), self.get_data(ameco_df, variable), kind='forward') else: try: netn1 = self.get_data(df, 'NETN.1.0.0.0') except KeyError: netn1 = self.get_data(df, 'NETN') series_data = splicer.level_splice( self.get_data(ameco_df, variable), self.get_data(df, 'NUTN.1.0.0.0') / (self.get_data(df, 'NUTN.1.0.0.0') + self.get_data(df, netn1)) * 100) # NUTN ratiospliced (18/01/2017) is commented out in FDMS+ plcd3 = 'plcd3_series' variables = ['PLCD.3.1.0.0', 'QLCD.3.1.0.0'] numerators = ['HWCDW.1.0.0.0', 'PLCD.3.1.0.0'] denominators = ['RVGDE.1.0.0.0', 'PVGD.3.1.0.0'] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) if denominators[index] == 'PVGD.3.1.0.0': denominator_series = self.get_data(df, denominators[index]) else: denominator_series = self.get_data(self.result, denominators[index]) series_data = operators.rebase( self.get_data(self.result, numerators[index]) / denominator_series, base_period=BASE_PERIOD) if index == 0: plcd3 = series_data.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = [ 'RWCDC.3.1.0.0', 'PLCD.3.1.0.0', 'QLCD.3.1.0.0', 'HWCDW.1.0.0.0', 'HWSCW.1.0.0.0', 'HWWDW.1.0.0.0', 'RVGDE.1.0.0.0', 'RVGEW.1.0.0.0' ] variables_6 = [ re.sub('.....0.0$', '.6.0.0.0', variable) for variable in variables ] for index, variable in enumerate(variables): series_meta = self.get_meta(variables_6[index]) series_data = self.get_data(self.result, variable).pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=11, country=self.country) return self.result
def rebase(self, series, base_period): new_series = pd.Series({base_period: 100}) splicer = Splicer() return splicer.ratio_splice(new_series, series, kind='both')
def perform_computation(self, df, ameco_df): for variable in NA_VO: new_variable = variable + '.1.0.0.0' u_variable = re.sub('^.', 'U', variable) variable11 = variable + '.1.1.0.0' if self.country in FCWVACP: try: new_data = self.splicer.ratio_splice( self.get_data(ameco_df, u_variable), self.get_data(df, variable), kind='forward') except KeyError: logger.error( 'Failed to calculate {} (national accounts volume).'. format(variable)) continue new_meta = pd.Series(self.get_meta(new_variable)) new_series = new_meta.append(new_data) self.result = self.result.append(new_series, ignore_index=True) else: try: series = self.get_data(df, variable) u_series = self.get_data(df, u_variable) except KeyError: logger.error( 'Failed to calculate {} (national accounts volume).'. format(variable)) continue try: series11 = self.get_data(ameco_df, variable11) series11[2019] = pd.np.nan except KeyError: logger.warning( 'Missing Ameco data for variable {} (national accounts volume). Using data ' 'from country desk forecast'.format(variable11)) splice_series = (series / u_series.shift(1) - 1) * 100 # RatioSplice(base, level(series)) = base * (1 + 0,01 * series) new_data = self.splicer.splice_and_level_forward( series11, splice_series) new_meta = pd.Series(self.get_meta(new_variable)) new_series = new_meta.append(new_data) self.result = self.result.append(new_series, ignore_index=True) # Imports / exports of goods and services omgs, oxgs, obgn, obsn, oigp = 'OMGS.1.0.0.0', 'OXGS.1.0.0.0', 'OBGN.1.0.0.0', 'OBSN.1.0.0.0', 'OIGP.1.0.0.0' variables = { omgs: { 'ameco': 'OMGS.1.1.0.0', 'goods': 'OMGN', 'services': 'OMSN', 'u_goods': 'UMGN', 'u_services': 'UMSN' } } variables[oxgs] = { 'ameco': 'OXGS.1.1.0.0', 'goods': 'OXGN', 'services': 'OXSN', 'u_goods': 'UXGN', 'u_services': 'UXSN' } variables[obgn] = { 'exports': 'OXGN.1.1.0.0', 'imports': 'OMGN.1.1.0.0', 'new_exports': 'OXGN', 'u_exports': 'UXGN', 'new_imports': 'OMGN', 'u_imports': 'UMGN' } variables[obsn] = { 'exports': 'OXSN.1.1.0.0', 'imports': 'OMSN.1.1.0.0', 'new_exports': 'OXSN', 'u_exports': 'UXSN', 'new_imports': 'OMGN', 'u_imports': 'UMGN' } variables[oigp] = { 'exports': 'OIGT.1.1.0.0', 'imports': 'OIGG.1.1.0.0', 'new_exports': 'OIGG', 'u_exports': 'UIGG', 'new_imports': 'OIGG', 'u_imports': 'UIGG' } for variable in variables: base_series = None try: base_series, splice_series_1, splice_series_2 = self._get_data( variable, variables[variable], df, ameco_df) except TypeError: logger.error( 'Missing data for variable {} in national accounts volume'. format(variable)) # if variable == obsn: # import code;code.interact(local=locals()) self._update_result(variable, base_series, splice_series_1, splice_series_2) # Net exports goods and services var = 'OBGS.1.0.0.0' ameco_exports = 'OXGS.1.1.0.0' ameco_imports = 'OMGS.1.1.0.0' goods_exports = 'OXGN' services_exports = 'OXSN' goods_imports = 'OMGN' services_imports = 'OMSN' u_goods_exports = 'UXGN' u_services_exports = 'UXSN' u_goods_imports = 'UMGN' u_services_imports = 'UMSN' export_series = self.get_data(df, goods_exports) + self.get_data( df, services_exports) import_series = self.get_data(df, goods_imports) + self.get_data( df, services_imports) u_exports = self.get_data(df, u_goods_exports) + self.get_data( df, u_services_exports) u_imports = self.get_data(df, u_goods_imports) + self.get_data( df, u_services_imports) base_series = self.get_data(ameco_df, ameco_exports) - self.get_data( ameco_df, ameco_imports) splice_series_1 = export_series - import_series splice_series_2 = ((export_series - import_series) / (u_exports - u_imports).shift(1) - 1) * 100 self._update_result(var, base_series, splice_series_1, splice_series_2) # Investments var = 'OIGNR.1.0.0.0' ameco_1 = 'OIGCO.1.1.0.0' ameco_2 = 'OIGDW.1.1.0.0' investments_1 = 'OIGCO' investments_2 = 'OIGDW' u_investments_1 = 'UIGCO' u_investments_2 = 'UIGDW' net_series = self.get_data(df, investments_1) - self.get_data( df, investments_2) u_net_series = self.get_data(df, u_investments_1) - self.get_data( df, u_investments_2) base_series = self.get_data(ameco_df, ameco_1) - self.get_data( ameco_df, ameco_2) splice_series_1 = net_series.copy() splice_series_2 = (net_series / u_net_series.shift(1) - 1) * 100 self._update_result(var, base_series, splice_series_1, splice_series_2) # Domestic demand var = 'OUNF.1.0.0.0' private_consumption = 'OCPH.1.1.0.0' government_consumption = 'OCTG.1.1.0.0' use_ameco = 'OIGT.1.1.0.0' new_private_consumption = 'OCPH' new_government_consumption = 'OCTG' new_use = 'OIGT' u_new_private_consumption = 'UCPH' u_new_government_consumption = 'UCTG' u_new_use = 'UIGT' u_series = self.get_data( df, u_new_private_consumption) + self.get_data( df, u_new_government_consumption) + self.get_data( df, u_new_use) base_series = self.get_data( ameco_df, private_consumption) + self.get_data( ameco_df, government_consumption) + self.get_data( ameco_df, use_ameco) splice_series_1 = self.get_data( df, new_private_consumption) + self.get_data( df, new_government_consumption) + self.get_data(df, new_use) splice_series_2 = (splice_series_1 / u_series.shift(1) - 1) * 100 self._update_result(var, base_series, splice_series_1, splice_series_2) # Domestic demand variables = { 'OUNT.1.0.0.0': ['OUNT.1.1.0.0', 'OCPH', 'OCTG', 'OIGT', 'OIST'], 'OUTT.1.0.0.0': ['OUTT.1.1.0.0', 'OCPH', 'OCTG', 'OIGT', 'OIST', 'OXGN', 'OXSN'], 'OITT.1.0.0.0': ['OITT.1.0.0.0', 'OIGT', 'OIST'] } for var, new_vars in variables.items(): base_series = None splice_series_1 = sum([self.get_data(df, v) for v in new_vars[1:]]) try: base_series = self.get_data(df, new_vars[0]) except KeyError: logger.warning( 'No historical data for {} to level_splice, country {}, using country forecast ' 'data.'.format(new_vars[0], self.country)) splice_series_2 = None if self.country not in FCWVACP: u_new_vars = [re.sub('^.', 'U', v) for v in new_vars[1:]] try: sum_u_series = sum( self.get_data(df, v) for v in new_vars[1:]) splice_series_2 = splice_series_1.copy( ) / sum_u_series.shift(1) - 1 * 100 self._update_result(var, base_series, splice_series_1, splice_series_2) except KeyError: logger.error( 'Missing data for variable {} in national accounts volume (172)' .format(new_variable)) else: self._update_result(var, base_series, splice_series_1, None) # Volume, rebase to baseperiod, percent change, contribution to percent change in GDP for var in NA_VO: new_variable = var + '.1.0.0.0' u1_variable = re.sub('^.', 'U', var) + '.1.0.0.0' # TODO: Review this new_vars = ['OXGS.1.0.0.0', 'OVGE.1.0.0.0'] if new_variable in self.result['Variable Code'].values.tolist( ) + new_vars: if new_variable not in new_vars: result_series_index = self.get_index(new_variable) series_orig = self.result.loc[result_series_index] data_orig = pd.to_numeric( series_orig.filter(regex=r'[0-9]{4}'), errors='coerce') else: logger.error( 'Missing data for variable {} in national accounts volume' .format(u1_variable)) # Rebase to baseperiod if u1_variable in df.index.get_level_values('Variable Code'): series_meta = self.get_meta(new_variable) u1_series = self.get_data(df, u1_variable) value_to_rebase = data_orig[BASE_PERIOD] / u1_series[ BASE_PERIOD] series_data = data_orig * value_to_rebase series = pd.Series(series_meta) series = series.append(series_data) self.result.iloc[result_series_index] = series else: logger.error( 'Missing data for variable {} in national accounts volume' .format(u1_variable)) # Percent change variable_6 = var + '.6.0.0.0' series_meta = self.get_meta(variable_6) series_data = data_orig.pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Contribution to percent change in GDP variable_c1 = re.sub('^.', 'C', var) + '.1.0.0.0' variable_x = new_variable if self.country in [ 'MT', 'TR' ] else u1_variable series_6_index = self.get_index(variable_6) data_6 = self.get_data(self.result, variable_6) # series_6 = self.result.loc[result_series_index] # data_6 = pd.to_numeric(series_6.filter(regex=r'[0-9]{4}'), errors='coerce') xvgd = 'OVGD.1.0.0.0' if self.country in ['MT', 'TR' ] else 'UVGD.1.0.0.0' series_meta = self.get_meta(variable_c1) data_x = self.get_data(df, variable_x).shift(1) data_xvgd = self.get_data(df, xvgd).shift(1) if variable_c1 not in ['CBGN.1.0.0.0']: try: data_x[1996] = self.get_data(ameco_df, variable_x)[1996] except KeyError: pass try: data_x[1996] = self.get_data(ameco_df, xvgd)[1996] except KeyError: pass try: series_data = data_6 * data_x / data_xvgd except KeyError: logger.error( 'Missing data for variable {} in national accounts volume' .format(new_variable)) continue series = pd.Series(series_meta) series = series.append(series_data) # if variable_c1 == 'CMGS.1.0.0.0': # import code;code.interact(local=locals()) self.result = self.result.append(series, ignore_index=True, sort=True) else: logger.error( 'Missing data for variable {} in national accounts volume'. format(new_variable)) r = self.result.copy() if new_variable == 'OVGD.1.0.0.0': ovgd1 = self.get_data(self.result, 'OVGD.1.0.0.0') # if variable_c1 == 'CMGS.1.0.0.0': # import code;code.interact(local=locals()) # Contribution to percent change in GDP (calculation for additional variables) var = 'CMGS.1.0.0.0' series_meta = self.get_meta(var) series_data = -self.get_data(self.result, var) index = self.get_index(var) series = pd.Series(series_meta) series = series.append(series_data) self.result.iloc[index] = series var = 'CBGS.1.0.0.0' exports = 'CXGS.1.0.0.0' imports = 'CMGS.1.0.0.0' series_meta = self.get_meta(var) series_meta['Variable Code'] = var series_data = self.get_data(self.result, exports) + self.get_data( self.result, imports) index = self.get_index(var) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # TODO: If Country in group 'Forecast: Countries with volumes at constant prices' line 202 country calc # Per-capita GDP # TODO: fix scale, frequency and country everywhere # TODO: fix this new_variable = 'RVGDP.1.0.0.0' ameco_variable = 'RVGDP.1.1.0.0' variable_6 = re.sub('.1.0.0.0', '.6.0.0.0', new_variable) total_population = 'NPTD.1.0.0.0' potential_gdp = 'OVGD.1.0.0.0' series_meta = self.get_meta(new_variable) series_6_meta = self.get_meta(variable_6) ameco_series = self.get_data(ameco_df, ameco_variable) splice_series = ovgd1 / self.get_data(df, total_population) splicer = Splicer() series_data = splicer.ratio_splice(ameco_series, splice_series, kind='forward') series_6_data = series_data.pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_6 = pd.Series(series_6_meta) series_6 = series_6.append(series_6_data) self.result = self.result.append(series_6, ignore_index=True, sort=True) # TODO: Do not add series if they're alreade there, i.e. df.loc['BE','UMGS'] is repeated # Terms of trade variables = ['APGN.3.0.0.0', 'APSN.3.0.0.0', 'APGS.3.0.0.0'] exports_1 = ['UXGN.1.0.0.0', 'UXSN.1.0.0.0', 'UXGS.1.0.0.0'] exports_2 = ['OXGN.1.0.0.0', 'OXSN.1.0.0.0', 'OXGS.1.0.0.0'] imports_1 = ['UMGN.1.0.0.0', 'UMSN.1.0.0.0', 'UMGS.1.0.0.0'] imports_2 = ['OMGN.1.0.0.0', 'OMSN.1.0.0.0', 'OMGS.1.0.0.0'] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) series_data = ( self.get_data(df, exports_1[index]) / self.get_data(self.result, exports_2[index]) / (self.get_data(df, imports_1[index]) / self.get_data(self.result, imports_2[index]))) * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable_6 = re.sub('3', '6', variable) series_meta = self.get_meta(variable_6) series_data = series_data.pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Set up OVGD.6.1.212.0 for World GDP volume table variable = 'OVGD.6.1.212.0' series_meta = self.get_meta(variable) series_data = self.get_data(self.result, 'OVGD.6.0.0.0') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Convert percent change of trade variables (volume) from national currency to USD for variable in T_VO: new_variable = variable + '.6.0.30.0' variable_6 = variable + '.6.0.0.0' series_meta = self.get_meta(new_variable) series_data = self.get_data(self.result, variable_6) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta('OVGD.1.0.0.0') series = pd.Series(series_meta) # TODO: This shouldn't be needed... Check what's going on series = series.append(ovgd1) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=4, country=self.country) return self.result, ovgd1
def perform_computation(self, ameco_db_df, xr_df, ameco_xne_us_df): splicer = Splicer() variable = 'XNE.1.0.99.0' series_data = self.get_data(ameco_db_df, variable) try: xr_data = self.get_data(xr_df, variable) except KeyError: pass else: last_valid = xr_data.first_valid_index() for year in range(last_valid + 1, LAST_YEAR + 1): series_data[year] = pd.np.nan series_data = splicer.ratio_splice(series_data.copy(), xr_data, kind='forward') series_meta = self.get_meta(variable) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = ['ILN.1.0.0.0', 'ISN.1.0.0.0'] sources = ['ILN.1.1.0.0', 'ISN.1.1.0.0'] null_dates = list( range(int(datetime.datetime.now().year) - 1, LAST_YEAR)) for index, variable in enumerate(variables): series_meta = self.get_meta(variable) series_data = self.get_data(ameco_db_df, sources[index], null_dates=null_dates) series_data = splicer.butt_splice(series_data, self.get_data( xr_df, sources[index]), kind='forward') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) if self.country in EA: membership_date = get_membership_date(self.country) variable = 'XNE.1.0.99.0' for year in range(membership_date, LAST_YEAR + 1): self.result.loc[self.result['Variable Code'] == 'XNE.1.0.99.0', year] = 1 variable = 'XNEF.1.0.99.0' series_meta = self.get_meta(variable) series_data = self.get_data(ameco_db_df, 'XNE.1.0.99.0') last_valid = series_data.last_valid_index() if last_valid < LAST_YEAR: for index in range(last_valid + 1, LAST_YEAR + 1): series_data[index] = series_data[last_valid] series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'XNEB.1.0.99.0' series_meta = self.get_meta(variable) series_data = self.get_data(self.result, 'XNE.1.0.99.0') * self.get_data( self.result, 'XNEF.1.0.99.0') for year in range(membership_date, LAST_YEAR + 1): self.result.loc[self.result['Variable Code'] == 'XNEF.1.0.99.0', year] = pd.np.nan series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) else: variable = 'XNEB.1.0.99.0' series_meta = self.get_meta(variable) series_data = self.get_data(self.result, 'XNE.1.0.99.0').copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'XNU.1.0.30.0' xne_us = self.get_data(xr_df, 'XNE.1.0.99.0', country='US') last_observation = xne_us.first_valid_index() new_xne_us = self.get_data(ameco_xne_us_df, 'XNE.1.0.99.0', country='US') for year in range(last_observation + 1, LAST_YEAR + 1): new_xne_us[year] = pd.np.nan series_meta = self.get_meta(variable) series_data = splicer.ratio_splice(new_xne_us, xne_us, kind='forward') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Effective exchange rates and relative unit labour costs, currently not calculated in FDMS+ variables = [ 'PLCDQ.3.0.0.437', 'PLCDQ.3.0.30.437', 'XUNNQ.3.0.30.437', 'XUNRQ.3.0.30.437', 'PLCDQ.3.0.0.414', 'PLCDQ.3.0.0.415', 'PLCDQ.3.0.0.417', 'PLCDQ.3.0.0.424', 'PLCDQ.3.0.0.427', 'PLCDQ.3.0.0.435', 'PLCDQ.3.0.0.436', 'PLCDQ.3.0.30.414', 'PLCDQ.3.0.30.415', 'PLCDQ.3.0.30.417', 'PLCDQ.3.0.30.424', 'PLCDQ.3.0.30.427', 'PLCDQ.3.0.30.435', 'PLCDQ.3.0.30.436', 'XUNNQ.3.0.30.414', 'XUNNQ.3.0.30.415', 'XUNNQ.3.0.30.417', 'XUNNQ.3.0.30.423', 'XUNNQ.3.0.30.424', 'XUNNQ.3.0.30.427', 'XUNNQ.3.0.30.435', 'XUNNQ.3.0.30.436', 'XUNNQ.3.0.30.441', 'XUNRQ.3.0.30.414', 'XUNRQ.3.0.30.415', 'XUNRQ.3.0.30.417', 'XUNRQ.3.0.30.424', 'XUNRQ.3.0.30.427', 'XUNRQ.3.0.30.435', 'XUNRQ.3.0.30.436' ] missing_vars = [] for variable in variables: series_meta = self.get_meta(variable) try: series_data = self.get_data(ameco_db_df, variable) except KeyError: missing_vars.append(variable) else: series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = ['PLCDQ.6.0.0.437', 'PLCDQ.6.0.0.435', 'PLCDQ.6.0.0.436'] sources = ['PLCDQ.3.0.0.437', 'PLCDQ.3.0.0.435', 'PLCDQ.3.0.0.436'] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) try: series_data = self.get_data( self.result, sources[index]).copy().pct_change() * 100 except (KeyError, IndexError): missing_vars.append(variable) else: series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = [ 'XUNNQ.6.0.30.437', 'XUNRQ.6.0.30.437', 'XUNNQ.6.0.30.435', 'XUNNQ.6.0.30.436', 'XUNRQ.6.0.30.435', 'XUNRQ.6.0.30.436' ] sources = [ 'XUNNQ.3.0.30.437', 'XUNRQ.3.0.30.437', 'XUNNQ.3.0.30.435', 'XUNNQ.3.0.30.436', 'XUNRQ.3.0.30.435', 'XUNRQ.3.0.30.436' ] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) try: series_data = self.get_data( self.result, sources[index]).copy().pct_change() * 100 except (KeyError, IndexError): missing_vars.append(variable) else: series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # TODO: is it OK? these are missing in ameco_db: PLCDQ.3.0.0.414 PLCDQ.3.0.0.435 PLCDQ.3.0.0.436 # PLCDQ.3.0.30.414 PLCDQ.3.0.30.435 PLCDQ.3.0.30.436 XUNNQ.3.0.30.414 XUNNQ.3.0.30.423 XUNNQ.3.0.30.435 # XUNNQ.3.0.30.436 XUNNQ.3.0.30.441 XUNRQ.3.0.30.414 XUNRQ.3.0.30.435 XUNRQ.3.0.30.436 PLCDQ.6.0.0.435 # PLCDQ.6.0.0.436 with open('errors_step_10.txt', 'w') as f: f.write('\n'.join(missing_vars)) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=10, country=self.country) return self.result
def perform_computation(self, df, ameco_df): for index, row in df.iterrows(): variable = index[1] if variable in TM: # Convert all transfer matrix variables to 1.0.0.0 (except National Account (volume)) and splice in # country desk forecast if variable not in NA_VO: splicer = Splicer() operators = Operators() meta = self.get_meta(variable) new_variable = variable + '.1.0.0.0' meta1000 = self.get_meta(new_variable) meta['Variable Code'] = variable meta1000['Variable Code'] = new_variable splice_series = self.get_data(df, variable) base_series = None try: base_series = self.get_data(ameco_df, new_variable) except KeyError: logger.warning( 'Missing Ameco data for variable {} (transfer matrix)' .format(new_variable)) orig_series = splice_series.copy() orig_series.name = None new_meta = pd.Series(meta) orig_series = new_meta.append(orig_series) if variable in TM_TBBO: new_series = splicer.butt_splice(base_series, splice_series, kind='forward') new_series.name = None new_meta = pd.Series(meta1000) new_series = new_meta.append(new_series) self.result = self.result.append(new_series, ignore_index=True) elif variable in TM_TBM: df_to_be_merged = pd.DataFrame( [splice_series, base_series]) new_series = operators.merge(df_to_be_merged) new_series.name = None new_meta = pd.Series(meta1000) new_series = new_meta.append(new_series) self.result = self.result.append(new_series, ignore_index=True) else: new_series = splicer.butt_splice(splicer.ratio_splice( base_series, splice_series, kind='forward'), splice_series, kind='forward') new_series.name = None new_meta = pd.Series(meta1000) new_series = new_meta.append(new_series) self.result = self.result.append(new_series, ignore_index=True) self.result = self.result.append(orig_series, ignore_index=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=1, country=self.country) return self.result
def perform_computation(self, df, ameco_df, ameco_db_df): '''Capital Stock and Total Factor Productivity''' # ameco_db_df should have data till 1960 variables = ['OIGT.1.0.0.0', 'OVGD.1.0.0.0', 'UIGT.1.0.0.0'] splicer = Splicer() for variable in variables: try: series_data = self.get_data(df, variable) except KeyError: logger.warning( 'Missing data for variable {} (Capital Stock)'.format( variable)) continue if series_data is not None: series_data = splicer.ratio_splice(series_data, self.get_data( ameco_db_df, variable), kind='backward', variable=variable)[YEARS] series_meta = self.get_meta(variable) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # TODO: The AMECO_H.TXT only has data till 2017, we might need to update it variable = 'UKCT.1.0.0.0' try: ameco_data = self.get_data(ameco_df, variable) except KeyError: series_data = self.get_data(ameco_db_df, variable)[YEARS] else: series_data = splicer.ratio_splice(ameco_data, self.get_data( ameco_db_df, variable)[YEARS], kind='backward') series_meta = self.get_meta(variable) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'OKCT.1.0.0.0' series_meta = self.get_meta(variable) series_data = self.get_data( self.result, 'UKCT.1.0.0.0') / (self.get_data(df, 'UIGT.1.0.0.0') / self.get_data(df, 'OIGT.1.0.0.0')) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'OINT.1.0.0.0' series_meta = self.get_meta(variable) series_data = self.get_data(df, 'OIGT.1.0.0.0') - self.get_data( self.result, 'OKCT.1.0.0.0') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'OKND.1.0.0.0' series_meta = self.get_meta(variable) series_1 = self.get_data(ameco_db_df, 'OVGD.1.0.0.0') series_2 = self.get_data(ameco_db_df, 'OIGT.1.0.0.0') if series_1.first_valid_index() + 1 < series_2.first_valid_index(): last_observation = series_2.first_valid_index() - 1 else: last_observation = series_1.first_valid_index() new_series = pd.Series(series_meta) oint_1 = self.get_data(ameco_db_df, 'OINT.1.0.0.0').copy() oigt_1 = self.get_data(self.result, 'OIGT.1.0.0.0').copy() new_data = pd.Series({ year: pd.np.nan for year in range(last_observation, LAST_YEAR + 1) }) new_data[last_observation] = 3 * series_1[last_observation] for year in range(last_observation + 1, LAST_YEAR): new_data[year] = new_data[year - 1] + oint_1[year] last_observation = self.result[ self.result['Variable Code'] == 'OKCT.1.0.0.0'].iloc[-1].last_valid_index() if type(last_observation) != int: last_observation = 1993 # Up until now we were discarding data before 1993, however here we need it if we want the same results # We need to pass all_data=True to read_ameco_db_xls and get the right ameco_db_df for year in range(last_observation + 1, LAST_YEAR + 1): self.result.loc[self.result['Variable Code'] == 'OKCT.1.0.0.0', [year]] = (new_data[year - 1] * self.result.loc[ self.result['Variable Code'] == 'OKCT.1.0.0.0', [year - 1]] / new_data[year - 2]).iloc[0, 0] new_data[year] = ( new_data[year - 1] + oigt_1[year] - self.result.loc[self.result['Variable Code'] == 'OKCT.1.0.0.0', [year]]).iloc[0, 0] self.result.loc[self.result['Variable Code'] == 'OINT.1.0.0.0', [year]] = (oigt_1[year] - self.result.loc[ self.result['Variable Code'] == 'OKCT.1.0.0.0', [year]]).iloc[0, 0] self.result.loc[ self.result['Variable Code'] == 'UKCT.1.0.0.0', [year]] = (self.result.loc[self.result['Variable Code'] == 'OKCT.1.0.0.0', [year]] * self.get_data(self.result, 'UIGT.1.0.0.0')[year] / oigt_1[year]).iloc[0, 0] new_series = new_series.append(new_data[YEARS].copy()) self.result = self.result.append(new_series, ignore_index=True, sort=True) # TODO: Fix this one, we get -6.897824 instead of -2.41 but it's because NLHT9.1.0.0.0 scale is wrong variable = 'ZVGDFA3.3.0.0.0' series_meta = self.get_meta(variable) series_3 = self.get_data(df, 'NLHT9.1.0.0.0') ovgd_1 = self.get_data(self.result, 'OVGD.1.0.0.0') series_data = pd.np.log( ovgd_1 / (pow(series_3 * 1000, 0.65) * pow(new_data, 0.35))) series = pd.Series(series_meta) series = series.append(series_data[YEARS].copy()) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=8, country=self.country) return self.result
def perform_computation(self, df, ameco_df): splicer = Splicer() # Total labour force (unemployed + employed) variable = 'NLTN.1.0.0.0' unemployed = 'NUTN.1.0.0.0' employed = 'NETN.1.0.0.0' base_series = self.get_data(ameco_df, variable) splice_series = self.get_data(df, unemployed) + self.get_data( df, employed) NLTN1000_meta = self.get_meta(variable) NLTN1000_data = splicer.ratio_splice(base_series, splice_series, kind='forward') NLTN1000 = pd.Series(NLTN1000_meta) NLTN1000 = NLTN1000.append(NLTN1000_data) self.result = self.result.append(NLTN1000, ignore_index=True) # Self employed (employed - wage and salary earners) variable = 'NSTD.1.0.0.0' employed = 'NETN.1.0.0.0' salary_earners = 'NWTD.1.0.0.0' base_series = None try: base_series = self.get_data(ameco_df, variable) except KeyError: logger.warning( 'Missing Ameco data for variable {} (population). Using data ' 'from country desk forecast'.format(variable)) splice_series = self.get_data(df, employed) - self.get_data( df, salary_earners) NSTD1000_meta = self.get_meta(variable) NSTD1000_data = splicer.ratio_splice(base_series, splice_series, kind='forward', variable=variable) NSTD1000 = pd.Series(NSTD1000_meta) NSTD1000 = NSTD1000.append(NSTD1000_data) self.result = self.result.append(NSTD1000, ignore_index=True) # Percentage employed (total employed / population of working age (15-64) variable = 'NETD.1.0.414.0' employed = 'NETD.1.0.0.0' working_age = 'NPAN1.1.0.0.0' NETD104140_meta = self.get_meta(variable) NETD104140_data = self.get_data(df, employed) / self.get_data( df, working_age) * 100 NETD104140 = pd.Series(NETD104140_meta) NETD104140 = NETD104140.append(NETD104140_data) self.result = self.result.append(NETD104140, ignore_index=True) # Civilian employment variable = 'NECN.1.0.0.0' employed = 'NETN' NECN1000_meta = self.get_meta(variable) NECN1000_data = splicer.ratio_splice(self.get_data(ameco_df, variable), self.get_data(df, employed), kind='forward') NECN1000 = pd.Series(NECN1000_meta) NECN1000 = NECN1000.append(NECN1000_data) self.result = self.result.append(NECN1000, ignore_index=True) # Total annual hours worked variable = 'NLHT.1.0.0.0' average_hours = 'NLHA.1.0.0.0' employed = 'NETD.1.0.0.0' total_hours_data = self.get_data(df, employed) * self.get_data( df, average_hours) NLHT1000_meta = self.get_meta(variable) NLHT1000_data = splicer.ratio_splice(self.get_data(ameco_df, variable), total_hours_data, kind='forward') NLHT1000 = pd.Series(NLHT1000_meta) NLHT1000 = NLHT1000.append(NLHT1000_data) self.result = self.result.append(NLHT1000, ignore_index=True) # Total annual hours worked; total economy. for internal use only variable = 'NLHT9.1.0.0.0' average_hours = 'NLHA.1.0.0.0' employed = 'NETD.1.0.0.0' total_hours_data = self.get_data(df, employed) * self.get_data( df, average_hours) NLHT91000_meta = self.get_meta(variable) NLHT91000_data = splicer.ratio_splice(self.get_data( ameco_df, variable), total_hours_data, kind='forward') NLHT91000 = pd.Series(NLHT91000_meta) NLHT91000 = NLHT91000.append(NLHT91000_data) self.result = self.result.append(NLHT91000, ignore_index=True) # Civilian labour force variable = 'NLCN.1.0.0.0' civilian_employment = 'NECN.1.0.0.0' unemployed = 'NUTN.1.0.0.0' NLCN1000_meta = self.get_meta(variable) try: base_series = self.get_data(ameco_df, variable) except KeyError: logger.warning( 'Missing Ameco data for variable {} (population). Using data ' 'from country desk forecast'.format(variable)) NLCN1000_data = splicer.ratio_splice(base_series, NECN1000_data + self.get_data(df, unemployed), kind='forward', variable=variable) NLCN1000 = pd.Series(NLCN1000_meta) NLCN1000 = NLCN1000.append(NLCN1000_data) self.result = self.result.append(NLCN1000, ignore_index=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=2, country=self.country) return self.result