def perform_computation(self, df, ameco_df): uvgdh, uvgdh_1, knp = 'UVGDH', 'UVGDH.1.0.0.0', 'KNP.1.0.212.0' series_meta = self.get_meta(uvgdh) splicer = Splicer() try: series_data = self.get_data(ameco_df, uvgdh_1) series_data = splicer.ratio_splice(series_data, self.get_data(df, uvgdh_1), type='forward') except KeyError: series_data = self.get_data(df, uvgdh) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(knp) series_data = self.get_data(ameco_df, knp) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=6, country=self.country) return self.result
def perform_computation(self, output_gap_df): variables = [ 'ZNAWRU.1.0.0.0', 'AVGDGP.1.0.0.0', 'AVGDGT.1.0.0.0', 'OVGDP.1.0.0.0', 'OVGDT.1.0.0.0' ] for variable in variables: series_meta = self.get_meta(variable) series_data = self.get_data(output_gap_df, variable) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'OVGDP.6.0.0.0' series_meta = self.get_meta(variable) series_data = self.get_data(self.result, 'OVGDP.1.0.0.0').pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=9, country=self.country) return self.result
def perform_computation(self, df): '''splice AMECO Historical data with forecast data and calculate percent change, and GNI (GDP deflator)''' zcpih, zcpih_6, zcpin = 'ZCPIH', 'ZCPIH.6.0.0.0', 'ZCPIN' # series_meta = self.get_meta(zcpin) # series_meta['Variable Code'] = zcpih_6 series_meta = self.get_meta(zcpih_6) try: series_data = self.get_data(df, zcpih) except KeyError: series_data = self.get_data(df, zcpin) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) operators = Operators() for variable in PD: variable_u1 = re.sub('^P', 'U', re.sub('.3.1.0.0', '.1.0.0.0', variable)) variable_o1 = re.sub('^P', 'O', re.sub('.3.1.0.0', '.1.0.0.0', variable)) # series_meta = self.get_meta(variable_o1) # series_meta['Variable Code'] = variable series_meta = self.get_meta(variable) series_data = operators.rebase( self.get_data(df, variable_u1) / self.get_data(df, variable_o1), BASE_PERIOD) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # GNI (GDP deflator) variable = 'OVGN.1.0.0.0' variable_6 = 'OVGN.6.0.0.0' gross_income = 'UVGN.1.0.0.0' gross_domestic_product = 'PVGD.3.1.0.0' series_meta = self.get_meta(variable) series_data = self.get_data(df, gross_income) / self.get_data( self.result, gross_domestic_product) * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(variable_6) series_data = series_data.copy().pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=7, country=self.country) return self.result
def perform_computation(self, df, ameco_db_df, ovgd1): addends = { 'UVGN.1.0.0.0': ['UVGD.1.0.0.0', 'UBRA.1.0.0.0'], 'UOGD.1.0.0.0': [ 'UVGD.1.0.0.0', 'UYVG.1.0.0.0', 'UYEU.1.0.0.0', '-UWCD.1.0.0.0', '-UTVG.1.0.0.0', '-UTEU.1.0.0.0' ], 'UTVNBP.1.0.0.0': ['UTVTBP.1.0.0.0', '-UYVTBP.1.0.0.0'], 'UVGE.1.0.0.0': ['UVGD.1.0.0.0', '-UTVNBP.1.0.0.0'], 'UWSC.1.0.0.0': ['UWCD.1.0.0.0', '-UWWD'], } self._sum_and_splice(addends, df, df, splice=False) variable = 'UWCDA.1.0.0.0' total_employment = 'NETD.1.0.0.0' compensation = 'UWCD.1.0.0.0' real_compensation = 'NWTD.1.0.0.0' series_meta = self.get_meta(variable) series_data = (self.get_data(df, total_employment) * self.get_data(df, compensation) / self.get_data(df, real_compensation)) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) for variable in NA_IS_VA: variable_1 = variable + '.1.0.0.0' variable_cc = re.sub('^U', 'CC', variable_1) series_meta = self.get_meta(variable_cc) try: pch = self.get_data(self.result, variable_1) / ovgd1 except (IndexError, KeyError): pch = self.get_data(df, variable_1) / ovgd1 pch = pch.pct_change() * 100 try: series_data = self.get_data(self.result, variable_1) / self.get_data( df, 'UVGD.1.0.0.0') * pch except (IndexError, KeyError): series_data = self.get_data(df, variable_1) / self.get_data( df, 'UVGD.1.0.0.0') * pch series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=5, country=self.country) return self.result
def perform_computation(self, df, ameco_df): operators = Operators() splicer = Splicer() variables = ['FETD9.1.0.0.0', 'FWTD9.1.0.0.0'] if self.country in FCRIF: try: fetd9 = self.get_data(df, 'FETD.1.0.0.0') fwtd9 = self.get_data(df, 'FWTD.1.0.0.0') except KeyError: fetd9 = self.get_data(df, 'NETD.1.0.0.0') fwtd9 = self.get_data(df, 'NWTD.1.0.0.0') series_meta = self.get_meta(variables[0]) series_data = fetd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(variables[1]) series_data = fwtd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) else: series_meta = self.get_meta(variables[0]) if self.country == 'US': fetd9 = self.get_data(df, 'NETD.1.0.0.0') fwtd9 = self.get_data(df, 'NWTD.1.0.0.0') else: fetd9 = splicer.ratio_splice(self.get_data( ameco_df, variables[0]), self.get_data(df, 'NETD'), kind='forward') fwtd9 = splicer.ratio_splice(self.get_data( ameco_df, variables[0]), self.get_data(df, 'NWTD'), kind='forward') series_data = fetd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(variables[1]) series_data = fwtd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = ['UWCD', 'UWWD', 'UWSC'] variables_1 = [variable + '.1.0.0.0' for variable in variables] variables_h1 = [ re.sub('^U', 'H', variable) + 'W.1.0.0.0' for variable in variables ] compensation = 'FWTD9.1.0.0.0' private_consumption_u = 'UCPH.1.0.0.0' private_consumption_o = 'OCPH.1.0.0.0' variables_r1 = [ re.sub('^U', 'R', variable) + 'C.3.1.0.0' for variable in variables ] services = ['UMSN', 'UXSN', 'UMSN.1.0.0.0', 'UXSN.1.0.0.0'] for index, variable in enumerate(variables): series_meta = self.get_meta(variables_h1[index]) series_data = self.get_data(df, variables_1[index]) / fwtd9 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(variables_r1[index]) series_data = operators.rebase( self.get_data(df, variables_1[index]) / fwtd9 / self.get_data(df, private_consumption_u) / self.get_data(df, private_consumption_o), base_period=BASE_PERIOD) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = [ 'RVGDE.1.0.0.0', 'RVGEW.1.0.0.0', 'RVGEW.1.0.0.0', 'ZATN9.1.0.0.0', 'ZETN9.1.0.0.0', 'ZUTN9.1.0.0.0' ] numerators = [ 'OVGD.1.0.0.0', 'OVGE.1.0.0.0', 'OVGD.1.0.0.0', 'NLTN.1.0.0.0', 'NETN.1.0.0.0', 'NUTN.1.0.0.0' ] denominators = [ 'FETD9.1.0.0.0', 'FETD9.1.0.0.0', 'NETD.1.0.0.0', 'NPAN1.1.0.0.0', 'NPAN1.1.0.0.0', 'NLTN.1.0.0.0' ] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) if denominators[index] == 'FETD9.1.0.0.0': denominator_series = fetd9 else: denominator_series = self.get_data(df, denominators[index]) series_data = self.get_data(df, numerators[index]) / denominator_series if variable in ['ZATN9.1.0.0.0', 'ZETN9.1.0.0.0', 'ZUTN9.1.0.0.0']: series_data = series_data * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'FETD9.6.0.0.0' series_meta = self.get_meta(variable) series_data = fetd9.pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'ZUTN.1.0.0.0' if self.country in EU: # ZUTN based on NUTN.1.0.0.0 and NETN.1.0.0.0 (18/01/2017) is commented out in FDMS+ last_observation = self.get_data(ameco_df, variable).last_valid_index() series_meta = self.get_meta(variable) series_data = round( self.get_data(df, 'NUTN') / (self.get_data(df, 'NUTN') + self.get_data(df, 'NETN')) * 100, 1) + round( self.get_data(ameco_df, 'NUTN.1.0.0.0')[last_observation] - self.get_data(df, 'NUTN') / (self.get_data(df, 'NUTN')[last_observation] + self.get_data(df, 'NETN')[last_observation]), 1) series_data = splicer.butt_splice( self.get_data(ameco_df, variable), self.get_data(ameco_df, variable), kind='forward') else: try: netn1 = self.get_data(df, 'NETN.1.0.0.0') except KeyError: netn1 = self.get_data(df, 'NETN') series_data = splicer.level_splice( self.get_data(ameco_df, variable), self.get_data(df, 'NUTN.1.0.0.0') / (self.get_data(df, 'NUTN.1.0.0.0') + self.get_data(df, netn1)) * 100) # NUTN ratiospliced (18/01/2017) is commented out in FDMS+ plcd3 = 'plcd3_series' variables = ['PLCD.3.1.0.0', 'QLCD.3.1.0.0'] numerators = ['HWCDW.1.0.0.0', 'PLCD.3.1.0.0'] denominators = ['RVGDE.1.0.0.0', 'PVGD.3.1.0.0'] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) if denominators[index] == 'PVGD.3.1.0.0': denominator_series = self.get_data(df, denominators[index]) else: denominator_series = self.get_data(self.result, denominators[index]) series_data = operators.rebase( self.get_data(self.result, numerators[index]) / denominator_series, base_period=BASE_PERIOD) if index == 0: plcd3 = series_data.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = [ 'RWCDC.3.1.0.0', 'PLCD.3.1.0.0', 'QLCD.3.1.0.0', 'HWCDW.1.0.0.0', 'HWSCW.1.0.0.0', 'HWWDW.1.0.0.0', 'RVGDE.1.0.0.0', 'RVGEW.1.0.0.0' ] variables_6 = [ re.sub('.....0.0$', '.6.0.0.0', variable) for variable in variables ] for index, variable in enumerate(variables): series_meta = self.get_meta(variables_6[index]) series_data = self.get_data(self.result, variable).pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=11, country=self.country) return self.result
def perform_computation(self, result_1, result_7, ameco_h_df): # TODO: Check the scales of the output variables splicer = Splicer() operators = Operators() # First we will calculate ASGH.1.0.0.0 and OVGHA.3.0.0.0, and then we will use the _sum_and_splice method # From SumAndSpliceMixin to calculate all the rest addends = {'UYOH.1.0.0.0': ['UOGH.1.0.0.0', 'UYNH.1.0.0.0']} self._sum_and_splice(addends, result_1, ameco_h_df, splice=False) new_input_df = self.result.set_index( ['Country Ameco', 'Variable Code'], drop=True) new_input_df = pd.concat([new_input_df, result_1], sort=True) addends = { 'UVGH.1.0.0.0': [ 'UWCH.1.0.0.0', 'UYOH.1.0.0.0', 'UCTRH.1.0.0.0', '-UTYH.1.0.0.0', '-UCTPH.1.0.0.0' ] } self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False) new_input_df = self.result.set_index( ['Country Ameco', 'Variable Code'], drop=True) new_input_df = pd.concat([new_input_df, result_1], sort=True) addends = {'UVGHA.1.0.0.0': ['UVGH.1.0.0.0', 'UEHH.1.0.0.0']} self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False) addends = { 'USGH.1.0.0.0': [ 'UWCH.1.0.0.0', 'UOGH.1.0.0.0', 'UYNH.1.0.0.0', 'UCTRH.1.0.0.0', '-UTYH.1.0.0.0', '-UCTPH.1.0.0.0', 'UEHH.1.0.0.0', '-UCPH0.1.0.0.0' ] } self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False) new_input_df = self.result.set_index( ['Country Ameco', 'Variable Code'], drop=True) new_input_df = pd.concat([new_input_df, result_1], sort=True) # Since this formula is using *ignoremissingsubtract* instead of *ignoremissingsum*, we change the sign of all # but the first variables in the list addends = { 'UBLH.1.0.0.0': ['USGH.1.0.0.0', '-UITH.1.0.0.0', '-UKOH.1.0.0.0'] } self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False) uvgha_data = self.get_data(new_input_df, 'UVGHA.1.0.0.0') pcph_data = self.get_data(result_7, 'PCPH.3.1.0.0') uvgha_base_period = uvgha_data.loc[BASE_PERIOD] ovgha_data = operators.rebase(uvgha_data / pcph_data, BASE_PERIOD) / 100 * uvgha_base_period series_meta = self.get_meta('OVGHA.3.0.0.0') series = pd.Series(series_meta) series = series.append(ovgha_data) self.result = self.result.append(series, ignore_index=True, sort=True) usgh_data = self.get_data(new_input_df, 'USGH.1.0.0.0') uvgha_data = self.get_data(new_input_df, 'UVGHA.1.0.0.0') asgh_ameco_h = self.get_data(ameco_h_df, 'ASGH.1.0.0.0') asgh_data = splicer.butt_splice(asgh_ameco_h, usgh_data / uvgha_data * 100) series_meta = self.get_meta('ASGH.1.0.0.0') new_series = pd.Series(series_meta) new_series = new_series.append(asgh_data) self.result = self.result.append(new_series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=14, country=self.country) return self.result
def test_country_calculation_BE(self): # STEP 2 step_2 = Population(scales=self.scales, country=self.country) step_2_vars = [ 'NUTN.1.0.0.0', 'NETN.1.0.0.0', 'NWTD.1.0.0.0', 'NETD.1.0.0.0', 'NPAN1.1.0.0.0', 'NETN', 'NLHA.1.0.0.0' ] # NECN.1.0.0.0 is calculated and used in step_2 step_2_df = self.result_1.loc[self.result_1.index.isin( step_2_vars, level='Variable Code')].copy() result_2 = step_2.perform_computation(step_2_df, self.ameco_df) variables = [ 'NLTN.1.0.0.0', 'NETD.1.0.414.0', 'NECN.1.0.0.0', 'NLHT.1.0.0.0', 'NLHT9.1.0.0.0', 'NLCN.1.0.0.0', 'NSTD.1.0.0.0' ] missing_vars = [ v for v in variables if v not in list(result_2.loc[self.country].index) ] self.assertFalse(missing_vars) # STEP 3 step_3 = GDPComponents(scales=self.scales, country=self.country) step_3_vars = [ 'UMGN', 'UMSN', 'UXGN', 'UXSN', 'UMGN', 'UMSN', 'UXGS', 'UMGS', 'UIGG0', 'UIGT', 'UIGG', 'UIGCO', 'UIGDW', 'UCPH', 'UCTG', 'UIGT', 'UIST' ] step_3_additional_vars = [ 'UMGN.1.0.0.0', 'UMSN.1.0.0.0', 'UXGN.1.0.0.0', 'UXSN.1.0.0.0', 'UMGN.1.0.0.0', 'UMSN.1.0.0.0', 'UXGS.1.0.0.0', 'UMGS.1.0.0.0', 'UIGG0.1.0.0.0', 'UIGT.1.0.0.0', 'UIGG.1.0.0.0', 'UIGCO.1.0.0.0', 'UIGDW.1.0.0.0', 'UCPH.1.0.0.0', 'UCTG.1.0.0.0', 'UIGT.1.0.0.0', 'UIST.1.0.0.0', 'UXGN', 'UMGN' ] result_3 = step_3.perform_computation(self.result_1, self.ameco_df) variables = [ 'UMGS', 'UXGS', 'UBGN', 'UBSN', 'UBGS', 'UIGG', 'UIGP', 'UIGNR', 'UUNF', 'UUNT', 'UUTT', 'UITT', 'UMGS.1.0.0.0', 'UXGS.1.0.0.0', 'UBGN.1.0.0.0', 'UBSN.1.0.0.0', 'UBGS.1.0.0.0', 'UIGG.1.0.0.0', 'UIGP.1.0.0.0', 'UIGNR.1.0.0.0', 'UUNF.1.0.0.0', 'UUNT.1.0.0.0', 'UUTT.1.0.0.0', 'UITT.1.0.0.0' ] missing_vars = [ v for v in variables if v not in list(result_3.loc[self.country].index) ] self.assertFalse(missing_vars) # STEP 4 step_4 = NationalAccountsVolume(scales=self.scales, country=self.country) # These variables have been calculated and are needed later calculated = [ 'UMGS', 'UXGS', 'UBGN', 'UBSN', 'UBGS', 'UIGG', 'UIGP', 'UIGNR', 'UUNF', 'UUNT', 'UUTT', 'UUITT' ] step_4_src_vars = list(NA_VO) step_4_src_vars.extend(calculated) step_4_src_vars.extend(['OMGS', 'OVGE', 'OVGD']) step_4_1000vars = [ variable + '.1.0.0.0' for variable in step_4_src_vars ] step_4_uvars = [ re.sub('^.', 'U', variable) for variable in step_4_src_vars ] step_4_1100vars = [ variable + '.1.1.0.0' for variable in step_4_src_vars ] # Per capita GDP: RVGDP.1.1.0.0 step_4_1000vars.append('OVGD.1.0.0.0') step_4_1100vars.append('RVGDP.1.1.0.0') df_input = self.df.copy() df_input[1993] = pd.np.nan df_input[1994] = pd.np.nan df_input[1995] = pd.np.nan step_4_df = pd.concat([df_input, self.result_1, result_3], sort=True) # result_4, ovgd1 = step_4.perform_computation(step_4_df, ameco_df) result_4, ovgd1 = step_4.perform_computation(step_4_df, self.ameco_df) # missing_vars = [v for v in step_4_1000vars if v not in list(result_4.loc[self.country].index)] # self.assertFalse(missing_vars) # STEP 5 step_5 = NationalAccountsValue(scales=self.scales, country=self.country) step_5_df = self.result_1.copy() result_5 = step_5.perform_computation(step_5_df, self.ameco_db_df, ovgd1) variables = [ 'UVGN.1.0.0.0', 'UVGN.1.0.0.0', 'UOGD.1.0.0.0', 'UOGD.1.0.0.0', 'UTVNBP.1.0.0.0', 'UTVNBP.1.0.0.0', 'UVGE.1.0.0.0', 'UVGE.1.0.0.0', 'UWCDA.1.0.0.0', 'UWCDA.1.0.0.0', 'UWSC.1.0.0.0', 'UWSC.1.0.0.0' ] missing_vars = [ v for v in variables if v not in list(result_5.loc[self.country].index) ] self.assertFalse(missing_vars) PD = [ 'PCPH.3.1.0.0', 'PCTG.3.1.0.0', 'PIGT.3.1.0.0', 'PIGCO.3.1.0.0', 'PIGDW.3.1.0.0', 'PIGNR.3.1.0.0', 'PIGEQ.3.1.0.0', 'PIGOT.3.1.0.0', 'PUNF.3.1.0.0', 'PUNT.3.1.0.0', 'PUTT.3.1.0.0', 'PVGD.3.1.0.0', 'PXGS.3.1.0.0', 'PMGS.3.1.0.0', 'PXGN.3.1.0.0', 'PXSN.3.1.0.0', 'PMGN.3.1.0.0', 'PMSN.3.1.0.0', 'PIGP.3.1.0.0', 'PIST.3.1.0.0', 'PVGE.3.1.0.0' ] PD_O = [ 'OCPH.1.0.0.0', 'OCTG.1.0.0.0', 'OIGT.1.0.0.0', 'OIGCO.1.0.0.0', 'OIGDW.1.0.0.0', 'OIGNR.1.0.0.0', 'OIGEQ.1.0.0.0', 'OIGOT.1.0.0.0', 'OUNF.1.0.0.0', 'OUNT.1.0.0.0', 'OUTT.1.0.0.0', 'OVGD.1.0.0.0', 'OXGS.1.0.0.0', 'OMGS.1.0.0.0', 'OXGN.1.0.0.0', 'OXSN.1.0.0.0', 'OMGN.1.0.0.0', 'OMSN.1.0.0.0', 'OIGP.1.0.0.0', 'OIST.1.0.0.0', 'OVGE.1.0.0.0' ] PD_U = [ 'UCPH.1.0.0.0', 'UCTG.1.0.0.0', 'UIGT.1.0.0.0', 'UIGCO.1.0.0.0', 'UIGDW.1.0.0.0', 'UIGNR.1.0.0.0', 'UIGEQ.1.0.0.0', 'UIGOT.1.0.0.0', 'UUNF.1.0.0.0', 'UUNT.1.0.0.0', 'UUTT.1.0.0.0', 'UVGD.1.0.0.0', 'UXGS.1.0.0.0', 'UMGS.1.0.0.0', 'UXGN.1.0.0.0', 'UXSN.1.0.0.0', 'UMGN.1.0.0.0', 'UMSN.1.0.0.0', 'UIGP.1.0.0.0', 'UIST.1.0.0.0', 'UVGE.1.0.0.0' ] # STEP 6 ameco_vars = ['UVGDH.1.0.0.0', 'KNP.1.0.212.0'] ameco_df = self._get_ameco_df(ameco_vars) step_6 = RecalculateUvgdh(scales=self.scales, country=self.country) result_6 = step_6.perform_computation(self.df, ameco_df) # STEP 7 step_7 = Prices(scales=self.scales, country=self.country) step_7_df = pd.concat([self.result_1, result_3, result_4, result_5], sort=True) result_7 = step_7.perform_computation(step_7_df) variables = list(PD) missing_vars = [ v for v in variables if v not in list(result_7.loc[self.country].index) ] self.assertFalse(missing_vars) # STEP 8 step_8 = CapitalStock(scales=self.scales, country=self.country) step_8_df = pd.concat( [self.result_1, result_2, result_3, result_4, result_5], sort=True) result_8 = step_8.perform_computation(step_8_df, self.ameco_df, self.ameco_db_df_all_data) # variables = list(PD) # missing_vars = [v for v in variables if v not in list(result_8.loc[self.country].index)] # self.assertFalse(missing_vars) # STEP 9 step_9 = OutputGap(scales=self.scales, country=self.country) result_9 = step_9.perform_computation(read_output_gap_xls()) # variables = list(PD) # missing_vars = [v for v in variables if v not in list(result_9.loc[self.country].index)] # self.assertFalse(missing_vars) # STEP 10 step_10 = ExchangeRates(scales=self.scales, country=self.country) result_10 = step_10.perform_computation(self.ameco_db_df, read_xr_ir_xls(), read_ameco_xne_us_xls()) # variables = list(PD) # missing_vars = [v for v in variables if v not in list(result_10.loc[self.country].index)] # self.assertFalse(missing_vars) # STEP 11 step_11 = LabourMarket(scales=self.scales, country=self.country) step_11_df = pd.concat( [self.result_1, result_2, result_4, result_5, result_7], sort=True) result_11 = step_11.perform_computation(step_11_df, self.ameco_df) variables = [ 'FETD9.1.0.0.0', 'FWTD9.1.0.0.0', 'HWCDW.1.0.0.0', 'RWCDC.3.1.0.0', 'HWWDW.1.0.0.0', 'RWWDC.3.1.0.0', 'HWSCW.1.0.0.0', 'RWSCC.3.1.0.0', 'RVGDE.1.0.0.0', 'RVGEW.1.0.0.0', 'RVGEW.1.0.0.0', 'ZATN9.1.0.0.0', 'ZETN9.1.0.0.0', 'ZUTN9.1.0.0.0', 'FETD9.6.0.0.0', 'PLCD.3.1.0.0', 'QLCD.3.1.0.0', 'RWCDC.6.0.0.0', 'PLCD.6.0.0.0', 'QLCD.6.0.0.0', 'HWCDW.6.0.0.0', 'HWSCW.6.0.0.0', 'HWWDW.6.0.0.0', 'RVGDE.6.0.0.0', 'RVGEW.6.0.0.0' ] missing_vars = [ v for v in variables if v not in list(result_11.loc[self.country].index) ] self.assertFalse(missing_vars) # STEP 12 step_12 = FiscalSector(scales=self.scales, country=self.country) result_12 = step_12.perform_computation(self.result_1, self.ameco_df) # variables = list(PD) # missing_vars = [v for v in variables if v not in list(result_12.loc[self.country].index)] # self.assertFalse(missing_vars) # STEP 13 step_13 = CorporateSector(scales=self.scales, country=self.country) result_13 = step_13.perform_computation(self.result_1, self.ameco_df) variables = ['USGC.1.0.0.0', 'UOGC.1.0.0.0'] missing_vars = [ v for v in variables if v not in list(result_13.loc[self.country].index) ] self.assertFalse(missing_vars) # STEP 14 step_14 = HouseholdSector(scales=self.scales, country=self.country) result_14 = step_14.perform_computation(self.result_1, result_7, self.ameco_df) variables = [ 'UYOH.1.0.0.0', 'UVGH.1.0.0.0', 'UVGHA.1.0.0.0', 'OVGHA.3.0.0.0', 'USGH.1.0.0.0', 'ASGH.1.0.0.0', 'UBLH.1.0.0.0' ] missing_vars = [ v for v in variables if v not in list(result_14.loc[self.country].index) ] self.assertFalse(missing_vars) # TODO: Fix all scales result = pd.concat([ self.result_1, result_2, result_3, result_4, result_5, result_6, result_7, result_8, result_9, result_10, result_11, result_12, result_13, result_14 ], sort=True) result = remove_duplicates(result) fix_scales(result, self.country) export_to_excel(result, 'output/{}/outputall.txt'.format(self.country), 'output/{}/outputall.xlsx'.format(self.country)) # res = result.drop(columns=['Scale']) res = result.copy() # res.loc[:, YEARS] = res.loc[:, YEARS].round(decimals=4) columns = res.columns rows = result.index.tolist() self.dfexp['Frequency'] = 'Annual' exp = self.dfexp[columns].reindex(rows) # exp.loc[:, YEARS] = exp.loc[:, YEARS].round(decimals=4) diff = (exp == res) | (exp != exp) & (res != res) diff_series = diff.all(axis=1) wrong_series = set() for i in range(1, res.shape[0]): series, expected = res.iloc[i], exp.iloc[i] for year in YEARS: p, q = series[year], expected[year] if not all([pd.isna(p), pd.isna(q)]): if abs(p - q) < 5e-6: wrong_series.add(series.name) # if res.iloc[i].name[1] == 'OBSN.1.0.0.0': # # TODO: Fix - This value is wrong, probably due to a previous calculation # # res[i][2019] = -1.294279652 # continue wrong_names = [name for name in wrong_series] res_wrong, exp_wrong = res.loc[wrong_names].copy( ), exp.loc[wrong_names].copy() report_diff(res_wrong, exp_wrong, country=self.country)
def perform_computation(self, df, ameco_df): for variable in NA_VO: new_variable = variable + '.1.0.0.0' u_variable = re.sub('^.', 'U', variable) variable11 = variable + '.1.1.0.0' if self.country in FCWVACP: try: new_data = self.splicer.ratio_splice( self.get_data(ameco_df, u_variable), self.get_data(df, variable), kind='forward') except KeyError: logger.error( 'Failed to calculate {} (national accounts volume).'. format(variable)) continue new_meta = pd.Series(self.get_meta(new_variable)) new_series = new_meta.append(new_data) self.result = self.result.append(new_series, ignore_index=True) else: try: series = self.get_data(df, variable) u_series = self.get_data(df, u_variable) except KeyError: logger.error( 'Failed to calculate {} (national accounts volume).'. format(variable)) continue try: series11 = self.get_data(ameco_df, variable11) series11[2019] = pd.np.nan except KeyError: logger.warning( 'Missing Ameco data for variable {} (national accounts volume). Using data ' 'from country desk forecast'.format(variable11)) splice_series = (series / u_series.shift(1) - 1) * 100 # RatioSplice(base, level(series)) = base * (1 + 0,01 * series) new_data = self.splicer.splice_and_level_forward( series11, splice_series) new_meta = pd.Series(self.get_meta(new_variable)) new_series = new_meta.append(new_data) self.result = self.result.append(new_series, ignore_index=True) # Imports / exports of goods and services omgs, oxgs, obgn, obsn, oigp = 'OMGS.1.0.0.0', 'OXGS.1.0.0.0', 'OBGN.1.0.0.0', 'OBSN.1.0.0.0', 'OIGP.1.0.0.0' variables = { omgs: { 'ameco': 'OMGS.1.1.0.0', 'goods': 'OMGN', 'services': 'OMSN', 'u_goods': 'UMGN', 'u_services': 'UMSN' } } variables[oxgs] = { 'ameco': 'OXGS.1.1.0.0', 'goods': 'OXGN', 'services': 'OXSN', 'u_goods': 'UXGN', 'u_services': 'UXSN' } variables[obgn] = { 'exports': 'OXGN.1.1.0.0', 'imports': 'OMGN.1.1.0.0', 'new_exports': 'OXGN', 'u_exports': 'UXGN', 'new_imports': 'OMGN', 'u_imports': 'UMGN' } variables[obsn] = { 'exports': 'OXSN.1.1.0.0', 'imports': 'OMSN.1.1.0.0', 'new_exports': 'OXSN', 'u_exports': 'UXSN', 'new_imports': 'OMGN', 'u_imports': 'UMGN' } variables[oigp] = { 'exports': 'OIGT.1.1.0.0', 'imports': 'OIGG.1.1.0.0', 'new_exports': 'OIGG', 'u_exports': 'UIGG', 'new_imports': 'OIGG', 'u_imports': 'UIGG' } for variable in variables: base_series = None try: base_series, splice_series_1, splice_series_2 = self._get_data( variable, variables[variable], df, ameco_df) except TypeError: logger.error( 'Missing data for variable {} in national accounts volume'. format(variable)) # if variable == obsn: # import code;code.interact(local=locals()) self._update_result(variable, base_series, splice_series_1, splice_series_2) # Net exports goods and services var = 'OBGS.1.0.0.0' ameco_exports = 'OXGS.1.1.0.0' ameco_imports = 'OMGS.1.1.0.0' goods_exports = 'OXGN' services_exports = 'OXSN' goods_imports = 'OMGN' services_imports = 'OMSN' u_goods_exports = 'UXGN' u_services_exports = 'UXSN' u_goods_imports = 'UMGN' u_services_imports = 'UMSN' export_series = self.get_data(df, goods_exports) + self.get_data( df, services_exports) import_series = self.get_data(df, goods_imports) + self.get_data( df, services_imports) u_exports = self.get_data(df, u_goods_exports) + self.get_data( df, u_services_exports) u_imports = self.get_data(df, u_goods_imports) + self.get_data( df, u_services_imports) base_series = self.get_data(ameco_df, ameco_exports) - self.get_data( ameco_df, ameco_imports) splice_series_1 = export_series - import_series splice_series_2 = ((export_series - import_series) / (u_exports - u_imports).shift(1) - 1) * 100 self._update_result(var, base_series, splice_series_1, splice_series_2) # Investments var = 'OIGNR.1.0.0.0' ameco_1 = 'OIGCO.1.1.0.0' ameco_2 = 'OIGDW.1.1.0.0' investments_1 = 'OIGCO' investments_2 = 'OIGDW' u_investments_1 = 'UIGCO' u_investments_2 = 'UIGDW' net_series = self.get_data(df, investments_1) - self.get_data( df, investments_2) u_net_series = self.get_data(df, u_investments_1) - self.get_data( df, u_investments_2) base_series = self.get_data(ameco_df, ameco_1) - self.get_data( ameco_df, ameco_2) splice_series_1 = net_series.copy() splice_series_2 = (net_series / u_net_series.shift(1) - 1) * 100 self._update_result(var, base_series, splice_series_1, splice_series_2) # Domestic demand var = 'OUNF.1.0.0.0' private_consumption = 'OCPH.1.1.0.0' government_consumption = 'OCTG.1.1.0.0' use_ameco = 'OIGT.1.1.0.0' new_private_consumption = 'OCPH' new_government_consumption = 'OCTG' new_use = 'OIGT' u_new_private_consumption = 'UCPH' u_new_government_consumption = 'UCTG' u_new_use = 'UIGT' u_series = self.get_data( df, u_new_private_consumption) + self.get_data( df, u_new_government_consumption) + self.get_data( df, u_new_use) base_series = self.get_data( ameco_df, private_consumption) + self.get_data( ameco_df, government_consumption) + self.get_data( ameco_df, use_ameco) splice_series_1 = self.get_data( df, new_private_consumption) + self.get_data( df, new_government_consumption) + self.get_data(df, new_use) splice_series_2 = (splice_series_1 / u_series.shift(1) - 1) * 100 self._update_result(var, base_series, splice_series_1, splice_series_2) # Domestic demand variables = { 'OUNT.1.0.0.0': ['OUNT.1.1.0.0', 'OCPH', 'OCTG', 'OIGT', 'OIST'], 'OUTT.1.0.0.0': ['OUTT.1.1.0.0', 'OCPH', 'OCTG', 'OIGT', 'OIST', 'OXGN', 'OXSN'], 'OITT.1.0.0.0': ['OITT.1.0.0.0', 'OIGT', 'OIST'] } for var, new_vars in variables.items(): base_series = None splice_series_1 = sum([self.get_data(df, v) for v in new_vars[1:]]) try: base_series = self.get_data(df, new_vars[0]) except KeyError: logger.warning( 'No historical data for {} to level_splice, country {}, using country forecast ' 'data.'.format(new_vars[0], self.country)) splice_series_2 = None if self.country not in FCWVACP: u_new_vars = [re.sub('^.', 'U', v) for v in new_vars[1:]] try: sum_u_series = sum( self.get_data(df, v) for v in new_vars[1:]) splice_series_2 = splice_series_1.copy( ) / sum_u_series.shift(1) - 1 * 100 self._update_result(var, base_series, splice_series_1, splice_series_2) except KeyError: logger.error( 'Missing data for variable {} in national accounts volume (172)' .format(new_variable)) else: self._update_result(var, base_series, splice_series_1, None) # Volume, rebase to baseperiod, percent change, contribution to percent change in GDP for var in NA_VO: new_variable = var + '.1.0.0.0' u1_variable = re.sub('^.', 'U', var) + '.1.0.0.0' # TODO: Review this new_vars = ['OXGS.1.0.0.0', 'OVGE.1.0.0.0'] if new_variable in self.result['Variable Code'].values.tolist( ) + new_vars: if new_variable not in new_vars: result_series_index = self.get_index(new_variable) series_orig = self.result.loc[result_series_index] data_orig = pd.to_numeric( series_orig.filter(regex=r'[0-9]{4}'), errors='coerce') else: logger.error( 'Missing data for variable {} in national accounts volume' .format(u1_variable)) # Rebase to baseperiod if u1_variable in df.index.get_level_values('Variable Code'): series_meta = self.get_meta(new_variable) u1_series = self.get_data(df, u1_variable) value_to_rebase = data_orig[BASE_PERIOD] / u1_series[ BASE_PERIOD] series_data = data_orig * value_to_rebase series = pd.Series(series_meta) series = series.append(series_data) self.result.iloc[result_series_index] = series else: logger.error( 'Missing data for variable {} in national accounts volume' .format(u1_variable)) # Percent change variable_6 = var + '.6.0.0.0' series_meta = self.get_meta(variable_6) series_data = data_orig.pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Contribution to percent change in GDP variable_c1 = re.sub('^.', 'C', var) + '.1.0.0.0' variable_x = new_variable if self.country in [ 'MT', 'TR' ] else u1_variable series_6_index = self.get_index(variable_6) data_6 = self.get_data(self.result, variable_6) # series_6 = self.result.loc[result_series_index] # data_6 = pd.to_numeric(series_6.filter(regex=r'[0-9]{4}'), errors='coerce') xvgd = 'OVGD.1.0.0.0' if self.country in ['MT', 'TR' ] else 'UVGD.1.0.0.0' series_meta = self.get_meta(variable_c1) data_x = self.get_data(df, variable_x).shift(1) data_xvgd = self.get_data(df, xvgd).shift(1) if variable_c1 not in ['CBGN.1.0.0.0']: try: data_x[1996] = self.get_data(ameco_df, variable_x)[1996] except KeyError: pass try: data_x[1996] = self.get_data(ameco_df, xvgd)[1996] except KeyError: pass try: series_data = data_6 * data_x / data_xvgd except KeyError: logger.error( 'Missing data for variable {} in national accounts volume' .format(new_variable)) continue series = pd.Series(series_meta) series = series.append(series_data) # if variable_c1 == 'CMGS.1.0.0.0': # import code;code.interact(local=locals()) self.result = self.result.append(series, ignore_index=True, sort=True) else: logger.error( 'Missing data for variable {} in national accounts volume'. format(new_variable)) r = self.result.copy() if new_variable == 'OVGD.1.0.0.0': ovgd1 = self.get_data(self.result, 'OVGD.1.0.0.0') # if variable_c1 == 'CMGS.1.0.0.0': # import code;code.interact(local=locals()) # Contribution to percent change in GDP (calculation for additional variables) var = 'CMGS.1.0.0.0' series_meta = self.get_meta(var) series_data = -self.get_data(self.result, var) index = self.get_index(var) series = pd.Series(series_meta) series = series.append(series_data) self.result.iloc[index] = series var = 'CBGS.1.0.0.0' exports = 'CXGS.1.0.0.0' imports = 'CMGS.1.0.0.0' series_meta = self.get_meta(var) series_meta['Variable Code'] = var series_data = self.get_data(self.result, exports) + self.get_data( self.result, imports) index = self.get_index(var) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # TODO: If Country in group 'Forecast: Countries with volumes at constant prices' line 202 country calc # Per-capita GDP # TODO: fix scale, frequency and country everywhere # TODO: fix this new_variable = 'RVGDP.1.0.0.0' ameco_variable = 'RVGDP.1.1.0.0' variable_6 = re.sub('.1.0.0.0', '.6.0.0.0', new_variable) total_population = 'NPTD.1.0.0.0' potential_gdp = 'OVGD.1.0.0.0' series_meta = self.get_meta(new_variable) series_6_meta = self.get_meta(variable_6) ameco_series = self.get_data(ameco_df, ameco_variable) splice_series = ovgd1 / self.get_data(df, total_population) splicer = Splicer() series_data = splicer.ratio_splice(ameco_series, splice_series, kind='forward') series_6_data = series_data.pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_6 = pd.Series(series_6_meta) series_6 = series_6.append(series_6_data) self.result = self.result.append(series_6, ignore_index=True, sort=True) # TODO: Do not add series if they're alreade there, i.e. df.loc['BE','UMGS'] is repeated # Terms of trade variables = ['APGN.3.0.0.0', 'APSN.3.0.0.0', 'APGS.3.0.0.0'] exports_1 = ['UXGN.1.0.0.0', 'UXSN.1.0.0.0', 'UXGS.1.0.0.0'] exports_2 = ['OXGN.1.0.0.0', 'OXSN.1.0.0.0', 'OXGS.1.0.0.0'] imports_1 = ['UMGN.1.0.0.0', 'UMSN.1.0.0.0', 'UMGS.1.0.0.0'] imports_2 = ['OMGN.1.0.0.0', 'OMSN.1.0.0.0', 'OMGS.1.0.0.0'] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) series_data = ( self.get_data(df, exports_1[index]) / self.get_data(self.result, exports_2[index]) / (self.get_data(df, imports_1[index]) / self.get_data(self.result, imports_2[index]))) * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable_6 = re.sub('3', '6', variable) series_meta = self.get_meta(variable_6) series_data = series_data.pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Set up OVGD.6.1.212.0 for World GDP volume table variable = 'OVGD.6.1.212.0' series_meta = self.get_meta(variable) series_data = self.get_data(self.result, 'OVGD.6.0.0.0') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Convert percent change of trade variables (volume) from national currency to USD for variable in T_VO: new_variable = variable + '.6.0.30.0' variable_6 = variable + '.6.0.0.0' series_meta = self.get_meta(new_variable) series_data = self.get_data(self.result, variable_6) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta('OVGD.1.0.0.0') series = pd.Series(series_meta) # TODO: This shouldn't be needed... Check what's going on series = series.append(ovgd1) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=4, country=self.country) return self.result, ovgd1
def perform_computation(self, ameco_db_df, xr_df, ameco_xne_us_df): splicer = Splicer() variable = 'XNE.1.0.99.0' series_data = self.get_data(ameco_db_df, variable) try: xr_data = self.get_data(xr_df, variable) except KeyError: pass else: last_valid = xr_data.first_valid_index() for year in range(last_valid + 1, LAST_YEAR + 1): series_data[year] = pd.np.nan series_data = splicer.ratio_splice(series_data.copy(), xr_data, kind='forward') series_meta = self.get_meta(variable) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = ['ILN.1.0.0.0', 'ISN.1.0.0.0'] sources = ['ILN.1.1.0.0', 'ISN.1.1.0.0'] null_dates = list( range(int(datetime.datetime.now().year) - 1, LAST_YEAR)) for index, variable in enumerate(variables): series_meta = self.get_meta(variable) series_data = self.get_data(ameco_db_df, sources[index], null_dates=null_dates) series_data = splicer.butt_splice(series_data, self.get_data( xr_df, sources[index]), kind='forward') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) if self.country in EA: membership_date = get_membership_date(self.country) variable = 'XNE.1.0.99.0' for year in range(membership_date, LAST_YEAR + 1): self.result.loc[self.result['Variable Code'] == 'XNE.1.0.99.0', year] = 1 variable = 'XNEF.1.0.99.0' series_meta = self.get_meta(variable) series_data = self.get_data(ameco_db_df, 'XNE.1.0.99.0') last_valid = series_data.last_valid_index() if last_valid < LAST_YEAR: for index in range(last_valid + 1, LAST_YEAR + 1): series_data[index] = series_data[last_valid] series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'XNEB.1.0.99.0' series_meta = self.get_meta(variable) series_data = self.get_data(self.result, 'XNE.1.0.99.0') * self.get_data( self.result, 'XNEF.1.0.99.0') for year in range(membership_date, LAST_YEAR + 1): self.result.loc[self.result['Variable Code'] == 'XNEF.1.0.99.0', year] = pd.np.nan series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) else: variable = 'XNEB.1.0.99.0' series_meta = self.get_meta(variable) series_data = self.get_data(self.result, 'XNE.1.0.99.0').copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'XNU.1.0.30.0' xne_us = self.get_data(xr_df, 'XNE.1.0.99.0', country='US') last_observation = xne_us.first_valid_index() new_xne_us = self.get_data(ameco_xne_us_df, 'XNE.1.0.99.0', country='US') for year in range(last_observation + 1, LAST_YEAR + 1): new_xne_us[year] = pd.np.nan series_meta = self.get_meta(variable) series_data = splicer.ratio_splice(new_xne_us, xne_us, kind='forward') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Effective exchange rates and relative unit labour costs, currently not calculated in FDMS+ variables = [ 'PLCDQ.3.0.0.437', 'PLCDQ.3.0.30.437', 'XUNNQ.3.0.30.437', 'XUNRQ.3.0.30.437', 'PLCDQ.3.0.0.414', 'PLCDQ.3.0.0.415', 'PLCDQ.3.0.0.417', 'PLCDQ.3.0.0.424', 'PLCDQ.3.0.0.427', 'PLCDQ.3.0.0.435', 'PLCDQ.3.0.0.436', 'PLCDQ.3.0.30.414', 'PLCDQ.3.0.30.415', 'PLCDQ.3.0.30.417', 'PLCDQ.3.0.30.424', 'PLCDQ.3.0.30.427', 'PLCDQ.3.0.30.435', 'PLCDQ.3.0.30.436', 'XUNNQ.3.0.30.414', 'XUNNQ.3.0.30.415', 'XUNNQ.3.0.30.417', 'XUNNQ.3.0.30.423', 'XUNNQ.3.0.30.424', 'XUNNQ.3.0.30.427', 'XUNNQ.3.0.30.435', 'XUNNQ.3.0.30.436', 'XUNNQ.3.0.30.441', 'XUNRQ.3.0.30.414', 'XUNRQ.3.0.30.415', 'XUNRQ.3.0.30.417', 'XUNRQ.3.0.30.424', 'XUNRQ.3.0.30.427', 'XUNRQ.3.0.30.435', 'XUNRQ.3.0.30.436' ] missing_vars = [] for variable in variables: series_meta = self.get_meta(variable) try: series_data = self.get_data(ameco_db_df, variable) except KeyError: missing_vars.append(variable) else: series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = ['PLCDQ.6.0.0.437', 'PLCDQ.6.0.0.435', 'PLCDQ.6.0.0.436'] sources = ['PLCDQ.3.0.0.437', 'PLCDQ.3.0.0.435', 'PLCDQ.3.0.0.436'] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) try: series_data = self.get_data( self.result, sources[index]).copy().pct_change() * 100 except (KeyError, IndexError): missing_vars.append(variable) else: series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = [ 'XUNNQ.6.0.30.437', 'XUNRQ.6.0.30.437', 'XUNNQ.6.0.30.435', 'XUNNQ.6.0.30.436', 'XUNRQ.6.0.30.435', 'XUNRQ.6.0.30.436' ] sources = [ 'XUNNQ.3.0.30.437', 'XUNRQ.3.0.30.437', 'XUNNQ.3.0.30.435', 'XUNNQ.3.0.30.436', 'XUNRQ.3.0.30.435', 'XUNRQ.3.0.30.436' ] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) try: series_data = self.get_data( self.result, sources[index]).copy().pct_change() * 100 except (KeyError, IndexError): missing_vars.append(variable) else: series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # TODO: is it OK? these are missing in ameco_db: PLCDQ.3.0.0.414 PLCDQ.3.0.0.435 PLCDQ.3.0.0.436 # PLCDQ.3.0.30.414 PLCDQ.3.0.30.435 PLCDQ.3.0.30.436 XUNNQ.3.0.30.414 XUNNQ.3.0.30.423 XUNNQ.3.0.30.435 # XUNNQ.3.0.30.436 XUNNQ.3.0.30.441 XUNRQ.3.0.30.414 XUNRQ.3.0.30.435 XUNRQ.3.0.30.436 PLCDQ.6.0.0.435 # PLCDQ.6.0.0.436 with open('errors_step_10.txt', 'w') as f: f.write('\n'.join(missing_vars)) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=10, country=self.country) return self.result
def perform_computation(self, df, ameco_h_df): splicer = Splicer() addends = { 'UTOG.1.0.0.0': ['UROG.1.0.0.0', 'UPOMN.1.0.0.0'], 'UUCG.1.0.0.0': [ 'UWCG.1.0.0.0', 'UYTGH.1.0.0.0', 'UYIG.1.0.0.0', 'UYVG.1.0.0.0', 'UUOG.1.0.0.0', 'UCTGI.1.0.0.0', 'UYTGM.1.0.0.0' ], 'URCG.1.0.0.0': ['UTVG.1.0.0.0', 'UTYG.1.0.0.0', 'UTSG.1.0.0.0', 'UTOG.1.0.0.0'], 'UUTG.1.0.0.0': ['UUCG.1.0.0.0', 'UIGG0.1.0.0.0', 'UKOG.1.0.0.0'], 'URTG.1.0.0.0': ['URCG.1.0.0.0', 'UKTTG.1.0.0.0'], 'UBLG.1.0.0.0': ['URTG.1.0.0.0', '-UUTG.1.0.0.0'], } # if country == JP: addends['UUCG.1.0.0.0'][0] = 'UCTG.1.0.0.0'; del(addends['UTOG.1.0.0.0']) if self.country == 'JP': addends['UUCG.1.0.0.0'][0] = 'UCTG.1.0.0.0' del addends['UTOG.1.0.0.0'] self._sum_and_splice(addends, df, ameco_h_df) # variable = 'UBLG.1.0.0.0' # sources = {variable: ['URTG.1.0.0.0', 'UUTG.1.0.0.0']} # series_meta = self.get_meta(variable) # splice_series = self.get_data(df, sources[variable][0]).subtract(self.get_data( # df, sources[variable][1], fill_value=0)) # if self.country == 'JP': # series_data = splice_series.copy() # else: # base_series = self.get_data(ameco_h_df, variable) # series_data = splicer.butt_splice(base_series, splice_series, kind='forward') # series = pd.Series(series_meta) # series = series.append(series_data) # self.result = self.result.append(series, ignore_index=True, sort=True) if self.country not in EU: if self.country != 'MK': variable = 'UBLGE.1.0.0.0' series_meta = self.get_meta(variable) series_data = self.get_data(self.result, 'UBLG.1.0.0.0') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'UYIGE.1.0.0.0' series_meta = self.get_meta(variable) series_data = self.get_data(df, 'UYIG.1.0.0.0') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) addends = { 'UBLGI.1.0.0.0': ['UBLG.1.0.0.0', 'UYIG.1.0.0.0'], 'UBLGIE.1.0.0.0': ['UBLGE.1.0.0.0', 'UYIGE.1.0.0.0'], 'UTAT.1.0.0.0': [ 'UTVG.1.0.0.0', 'UTYG.1.0.0.0', 'UTAG.1.0.0.0', 'UTKG.1.0.0.0', 'UTEU.1.0.0.0' ], 'UOOMS.1.0.0.0': ['UOOMSR.1.0.0.0', 'UOOMSE.1.0.0.0'], 'UTTG.1.0.0.0': ['UTVG.1.0.0.0', 'UTEU.1.0.0.0'], 'UDGGL.1.0.0.0': [ 'UDGG.1.0.0.0', ] } self._sum_and_splice(addends, df, ameco_h_df) variable = 'UDGG.1.0.0.0' series_meta = self.get_meta(variable) series_data = self.get_data(self.result, 'UDGGL.1.0.0.0') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # TODO: copy EATTG, EATYG and EATSG from cyclical adjustment database # for variable in ['EATTG', 'EATYG', 'EATSG']: # series_meta = self.get_meta(variable) # series_data = self.get_data(self.result, 'UDGGL.1.0.0.0') # series = pd.Series(series_meta) # series = series.append(series_data) # self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=12, country=self.country) return self.result
def perform_computation(self, df, ameco_df): for index, row in df.iterrows(): variable = index[1] if variable in TM: # Convert all transfer matrix variables to 1.0.0.0 (except National Account (volume)) and splice in # country desk forecast if variable not in NA_VO: splicer = Splicer() operators = Operators() meta = self.get_meta(variable) new_variable = variable + '.1.0.0.0' meta1000 = self.get_meta(new_variable) meta['Variable Code'] = variable meta1000['Variable Code'] = new_variable splice_series = self.get_data(df, variable) base_series = None try: base_series = self.get_data(ameco_df, new_variable) except KeyError: logger.warning( 'Missing Ameco data for variable {} (transfer matrix)' .format(new_variable)) orig_series = splice_series.copy() orig_series.name = None new_meta = pd.Series(meta) orig_series = new_meta.append(orig_series) if variable in TM_TBBO: new_series = splicer.butt_splice(base_series, splice_series, kind='forward') new_series.name = None new_meta = pd.Series(meta1000) new_series = new_meta.append(new_series) self.result = self.result.append(new_series, ignore_index=True) elif variable in TM_TBM: df_to_be_merged = pd.DataFrame( [splice_series, base_series]) new_series = operators.merge(df_to_be_merged) new_series.name = None new_meta = pd.Series(meta1000) new_series = new_meta.append(new_series) self.result = self.result.append(new_series, ignore_index=True) else: new_series = splicer.butt_splice(splicer.ratio_splice( base_series, splice_series, kind='forward'), splice_series, kind='forward') new_series.name = None new_meta = pd.Series(meta1000) new_series = new_meta.append(new_series) self.result = self.result.append(new_series, ignore_index=True) self.result = self.result.append(orig_series, ignore_index=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=1, country=self.country) return self.result
def perform_computation(self, df, ameco_df, ameco_db_df): '''Capital Stock and Total Factor Productivity''' # ameco_db_df should have data till 1960 variables = ['OIGT.1.0.0.0', 'OVGD.1.0.0.0', 'UIGT.1.0.0.0'] splicer = Splicer() for variable in variables: try: series_data = self.get_data(df, variable) except KeyError: logger.warning( 'Missing data for variable {} (Capital Stock)'.format( variable)) continue if series_data is not None: series_data = splicer.ratio_splice(series_data, self.get_data( ameco_db_df, variable), kind='backward', variable=variable)[YEARS] series_meta = self.get_meta(variable) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # TODO: The AMECO_H.TXT only has data till 2017, we might need to update it variable = 'UKCT.1.0.0.0' try: ameco_data = self.get_data(ameco_df, variable) except KeyError: series_data = self.get_data(ameco_db_df, variable)[YEARS] else: series_data = splicer.ratio_splice(ameco_data, self.get_data( ameco_db_df, variable)[YEARS], kind='backward') series_meta = self.get_meta(variable) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'OKCT.1.0.0.0' series_meta = self.get_meta(variable) series_data = self.get_data( self.result, 'UKCT.1.0.0.0') / (self.get_data(df, 'UIGT.1.0.0.0') / self.get_data(df, 'OIGT.1.0.0.0')) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'OINT.1.0.0.0' series_meta = self.get_meta(variable) series_data = self.get_data(df, 'OIGT.1.0.0.0') - self.get_data( self.result, 'OKCT.1.0.0.0') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'OKND.1.0.0.0' series_meta = self.get_meta(variable) series_1 = self.get_data(ameco_db_df, 'OVGD.1.0.0.0') series_2 = self.get_data(ameco_db_df, 'OIGT.1.0.0.0') if series_1.first_valid_index() + 1 < series_2.first_valid_index(): last_observation = series_2.first_valid_index() - 1 else: last_observation = series_1.first_valid_index() new_series = pd.Series(series_meta) oint_1 = self.get_data(ameco_db_df, 'OINT.1.0.0.0').copy() oigt_1 = self.get_data(self.result, 'OIGT.1.0.0.0').copy() new_data = pd.Series({ year: pd.np.nan for year in range(last_observation, LAST_YEAR + 1) }) new_data[last_observation] = 3 * series_1[last_observation] for year in range(last_observation + 1, LAST_YEAR): new_data[year] = new_data[year - 1] + oint_1[year] last_observation = self.result[ self.result['Variable Code'] == 'OKCT.1.0.0.0'].iloc[-1].last_valid_index() if type(last_observation) != int: last_observation = 1993 # Up until now we were discarding data before 1993, however here we need it if we want the same results # We need to pass all_data=True to read_ameco_db_xls and get the right ameco_db_df for year in range(last_observation + 1, LAST_YEAR + 1): self.result.loc[self.result['Variable Code'] == 'OKCT.1.0.0.0', [year]] = (new_data[year - 1] * self.result.loc[ self.result['Variable Code'] == 'OKCT.1.0.0.0', [year - 1]] / new_data[year - 2]).iloc[0, 0] new_data[year] = ( new_data[year - 1] + oigt_1[year] - self.result.loc[self.result['Variable Code'] == 'OKCT.1.0.0.0', [year]]).iloc[0, 0] self.result.loc[self.result['Variable Code'] == 'OINT.1.0.0.0', [year]] = (oigt_1[year] - self.result.loc[ self.result['Variable Code'] == 'OKCT.1.0.0.0', [year]]).iloc[0, 0] self.result.loc[ self.result['Variable Code'] == 'UKCT.1.0.0.0', [year]] = (self.result.loc[self.result['Variable Code'] == 'OKCT.1.0.0.0', [year]] * self.get_data(self.result, 'UIGT.1.0.0.0')[year] / oigt_1[year]).iloc[0, 0] new_series = new_series.append(new_data[YEARS].copy()) self.result = self.result.append(new_series, ignore_index=True, sort=True) # TODO: Fix this one, we get -6.897824 instead of -2.41 but it's because NLHT9.1.0.0.0 scale is wrong variable = 'ZVGDFA3.3.0.0.0' series_meta = self.get_meta(variable) series_3 = self.get_data(df, 'NLHT9.1.0.0.0') ovgd_1 = self.get_data(self.result, 'OVGD.1.0.0.0') series_data = pd.np.log( ovgd_1 / (pow(series_3 * 1000, 0.65) * pow(new_data, 0.35))) series = pd.Series(series_meta) series = series.append(series_data[YEARS].copy()) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=8, country=self.country) return self.result
def perform_computation(self, df, ameco_h_df): splicer = Splicer() # Imports and exports of goods and services at current prices (National accounts) variables = ['UMGS', 'UXGS', 'UMGS.1.0.0.0', 'UXGS.1.0.0.0'] goods = ['UMGN', 'UXGN', 'UMGN.1.0.0.0', 'UXGN.1.0.0.0'] services = ['UMSN', 'UXSN', 'UMSN.1.0.0.0', 'UXSN.1.0.0.0'] country = 'BE' for index, variable in enumerate(variables): series_meta = self.get_meta(variable) series_data = self.get_data(df, goods[index]) + self.get_data( df, services[index]) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Gross fixed capital formation at current prices: general government variables = ['UIGG', 'UIGG.1.0.0.0'] grossfcf = ['UIGG0', 'UIGG0.1.0.0.0'] country = 'BE' for index, variable in enumerate(variables): series_meta = self.get_meta(variable) series_data = self.get_data(df, grossfcf[index]) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Net exports of goods, services, and goods & services at current prices (National accounts) # TODO: Check that the 4th variable is correct in exports_goods_and_services and imports_goods_and_services variables = [ 'UBGN', 'UBSN', 'UBGS', 'UBGN.1.0.0.0', 'UBSN.1.0.0.0', 'UBGS.1.0.0.0', 'UIGP', 'UIGNR', 'UIGP.1.0.0.0', 'UIGNR.1.0.0.0' ] exports_goods_and_services = [ 'UXGN', 'UXSN', 'UXGS', 'UXGN', 'UXSN.1.0.0.0', 'UXGS.1.0.0.0', 'UIGT', 'UIGCO', 'UIGT.1.0.0.0', 'UIGCO.1.0.0.0' ] imports_goods_and_services = [ 'UMGN', 'UMSN', 'UMGS', 'UMGN', 'UMSN.1.0.0.0', 'UMGS.1.0.0.0', 'UIGG', 'UIGDW', 'UIGG.1.0.0.0', 'UIGDW.1.0.0.0' ] country = 'BE' for index, variable in enumerate(variables): series_meta = self.get_meta(variable) exports_data = self.get_data(df, exports_goods_and_services[index]) imports_data = self.get_data(df, imports_goods_and_services[index]) if not isinstance(exports_data.name, type(imports_data.name)): imports_data.name = None series_data = exports_data - imports_data series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Domestic demand excluding stocks at current prices variables = ['UUNF', 'UUNF.1.0.0.0'] private_consumption = ['UCPH', 'UCPH.1.0.0.0'] government = ['UCTG', 'UCTG.1.0.0.0'] total = ['UIGT', 'UIGT.1.0.0.0'] country = 'BE' for index, variable in enumerate(variables): series_meta = self.get_meta(variable) series_data = self.get_data( df, private_consumption[index]) + self.get_data( df, total[index]) + self.get_data(df, government[index]) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Domestic demand including stocks at current prices variables = ['UUNT', 'UUNT.1.0.0.0'] private_consumption = ['UCPH', 'UCPH.1.0.0.0'] government = ['UCTG', 'UCTG.1.0.0.0'] total = ['UIGT', 'UIGT.1.0.0.0'] changes = ['UIST', 'UIST.1.0.0.0'] country = 'BE' for index, variable in enumerate(variables): series_meta = self.get_meta(variable) series_data = self.get_data( df, private_consumption[index]) + self.get_data( df, total[index]) + self.get_data( df, government[index]) + self.get_data( df, changes[index]) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Final demand at current prices variables = ['UUTT', 'UUTT.1.0.0.0'] private_consumption = ['UCPH', 'UCPH.1.0.0.0'] government = ['UCTG', 'UCTG.1.0.0.0'] total = ['UIGT', 'UIGT.1.0.0.0'] changes = ['UIST', 'UIST.1.0.0.0'] export_goods = ['UXGN', 'UXGN.1.0.0.0'] export_services = ['UXSN', 'UXSN.1.0.0.0'] country = 'BE' for index, variable in enumerate(variables): series_meta = self.get_meta(variable) series_data = self.get_data( df, private_consumption[index]) + self.get_data( df, total[index]) + self.get_data( df, government[index]) + self.get_data( df, changes[index]) + self.get_data( df, export_goods[index]) + self.get_data( df, export_services[index]) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Gross capital formation at current prices: total economy variables = ['UITT', 'UITT.1.0.0.0'] total = ['UIGT', 'UIGT.1.0.0.0'] changes = ['UIST', 'UIST.1.0.0.0'] country = 'BE' for index, variable in enumerate(variables): series_meta = self.get_meta(variable) series_data = self.get_data(df, total[index]) + self.get_data( df, changes[index]) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=3, country=self.country) return self.result
def perform_computation(self, df, ameco_df): splicer = Splicer() # Total labour force (unemployed + employed) variable = 'NLTN.1.0.0.0' unemployed = 'NUTN.1.0.0.0' employed = 'NETN.1.0.0.0' base_series = self.get_data(ameco_df, variable) splice_series = self.get_data(df, unemployed) + self.get_data( df, employed) NLTN1000_meta = self.get_meta(variable) NLTN1000_data = splicer.ratio_splice(base_series, splice_series, kind='forward') NLTN1000 = pd.Series(NLTN1000_meta) NLTN1000 = NLTN1000.append(NLTN1000_data) self.result = self.result.append(NLTN1000, ignore_index=True) # Self employed (employed - wage and salary earners) variable = 'NSTD.1.0.0.0' employed = 'NETN.1.0.0.0' salary_earners = 'NWTD.1.0.0.0' base_series = None try: base_series = self.get_data(ameco_df, variable) except KeyError: logger.warning( 'Missing Ameco data for variable {} (population). Using data ' 'from country desk forecast'.format(variable)) splice_series = self.get_data(df, employed) - self.get_data( df, salary_earners) NSTD1000_meta = self.get_meta(variable) NSTD1000_data = splicer.ratio_splice(base_series, splice_series, kind='forward', variable=variable) NSTD1000 = pd.Series(NSTD1000_meta) NSTD1000 = NSTD1000.append(NSTD1000_data) self.result = self.result.append(NSTD1000, ignore_index=True) # Percentage employed (total employed / population of working age (15-64) variable = 'NETD.1.0.414.0' employed = 'NETD.1.0.0.0' working_age = 'NPAN1.1.0.0.0' NETD104140_meta = self.get_meta(variable) NETD104140_data = self.get_data(df, employed) / self.get_data( df, working_age) * 100 NETD104140 = pd.Series(NETD104140_meta) NETD104140 = NETD104140.append(NETD104140_data) self.result = self.result.append(NETD104140, ignore_index=True) # Civilian employment variable = 'NECN.1.0.0.0' employed = 'NETN' NECN1000_meta = self.get_meta(variable) NECN1000_data = splicer.ratio_splice(self.get_data(ameco_df, variable), self.get_data(df, employed), kind='forward') NECN1000 = pd.Series(NECN1000_meta) NECN1000 = NECN1000.append(NECN1000_data) self.result = self.result.append(NECN1000, ignore_index=True) # Total annual hours worked variable = 'NLHT.1.0.0.0' average_hours = 'NLHA.1.0.0.0' employed = 'NETD.1.0.0.0' total_hours_data = self.get_data(df, employed) * self.get_data( df, average_hours) NLHT1000_meta = self.get_meta(variable) NLHT1000_data = splicer.ratio_splice(self.get_data(ameco_df, variable), total_hours_data, kind='forward') NLHT1000 = pd.Series(NLHT1000_meta) NLHT1000 = NLHT1000.append(NLHT1000_data) self.result = self.result.append(NLHT1000, ignore_index=True) # Total annual hours worked; total economy. for internal use only variable = 'NLHT9.1.0.0.0' average_hours = 'NLHA.1.0.0.0' employed = 'NETD.1.0.0.0' total_hours_data = self.get_data(df, employed) * self.get_data( df, average_hours) NLHT91000_meta = self.get_meta(variable) NLHT91000_data = splicer.ratio_splice(self.get_data( ameco_df, variable), total_hours_data, kind='forward') NLHT91000 = pd.Series(NLHT91000_meta) NLHT91000 = NLHT91000.append(NLHT91000_data) self.result = self.result.append(NLHT91000, ignore_index=True) # Civilian labour force variable = 'NLCN.1.0.0.0' civilian_employment = 'NECN.1.0.0.0' unemployed = 'NUTN.1.0.0.0' NLCN1000_meta = self.get_meta(variable) try: base_series = self.get_data(ameco_df, variable) except KeyError: logger.warning( 'Missing Ameco data for variable {} (population). Using data ' 'from country desk forecast'.format(variable)) NLCN1000_data = splicer.ratio_splice(base_series, NECN1000_data + self.get_data(df, unemployed), kind='forward', variable=variable) NLCN1000 = pd.Series(NLCN1000_meta) NLCN1000 = NLCN1000.append(NLCN1000_data) self.result = self.result.append(NLCN1000, ignore_index=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=2, country=self.country) return self.result