def _sum_and_splice(self, addends, df, ameco_h_df, splice=True): splicer = Splicer() for variable, sources in addends.items(): series_meta = self.get_meta(variable) expected_scale = series_meta.get('Scale') try: base_series = self.get_data(ameco_h_df, variable) except KeyError: base_series = None splice_series = pd.Series() for source in sources: factor = 1 if source.startswith('-'): source = source[1:] factor = -1 src_scale = self.get_scale(source, dataframe=df) expected_scale = self.get_scale(variable) if src_scale != expected_scale: factor = factor * pow(1000, self.codes[src_scale] - self.codes[expected_scale]) try: source_data = factor * self.get_data(df, source) except KeyError: source_data = factor * self.get_data(self.result, source) splice_series = splice_series.add(source_data, fill_value=0) if base_series is None or splice is False: series_data = splice_series else: series_data = splicer.butt_splice(base_series, splice_series, kind='forward') if self.country == 'JP' and variable in ['UUTG.1.0.0.0', 'URTG.1.0.0.0']: if variable == 'URTG.1.0.0.0': new_sources = ['UUTG.1.0.0.0', 'UBLG.1.0.0.0'] splice_series = self.get_data( self.result, new_sources[0]) + self.get_data( self.result, new_sources[1] ) series_data = splicer.ratio_splice(base_series, splice_series, kind='forward') series_data = series_data series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True)
def perform_computation(self, df, ameco_df): operators = Operators() splicer = Splicer() variables = ['FETD9.1.0.0.0', 'FWTD9.1.0.0.0'] if self.country in FCRIF: try: fetd9 = self.get_data(df, 'FETD.1.0.0.0') fwtd9 = self.get_data(df, 'FWTD.1.0.0.0') except KeyError: fetd9 = self.get_data(df, 'NETD.1.0.0.0') fwtd9 = self.get_data(df, 'NWTD.1.0.0.0') series_meta = self.get_meta(variables[0]) series_data = fetd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(variables[1]) series_data = fwtd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) else: series_meta = self.get_meta(variables[0]) if self.country == 'US': fetd9 = self.get_data(df, 'NETD.1.0.0.0') fwtd9 = self.get_data(df, 'NWTD.1.0.0.0') else: fetd9 = splicer.ratio_splice(self.get_data( ameco_df, variables[0]), self.get_data(df, 'NETD'), kind='forward') fwtd9 = splicer.ratio_splice(self.get_data( ameco_df, variables[0]), self.get_data(df, 'NWTD'), kind='forward') series_data = fetd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(variables[1]) series_data = fwtd9.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = ['UWCD', 'UWWD', 'UWSC'] variables_1 = [variable + '.1.0.0.0' for variable in variables] variables_h1 = [ re.sub('^U', 'H', variable) + 'W.1.0.0.0' for variable in variables ] compensation = 'FWTD9.1.0.0.0' private_consumption_u = 'UCPH.1.0.0.0' private_consumption_o = 'OCPH.1.0.0.0' variables_r1 = [ re.sub('^U', 'R', variable) + 'C.3.1.0.0' for variable in variables ] services = ['UMSN', 'UXSN', 'UMSN.1.0.0.0', 'UXSN.1.0.0.0'] for index, variable in enumerate(variables): series_meta = self.get_meta(variables_h1[index]) series_data = self.get_data(df, variables_1[index]) / fwtd9 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) series_meta = self.get_meta(variables_r1[index]) series_data = operators.rebase( self.get_data(df, variables_1[index]) / fwtd9 / self.get_data(df, private_consumption_u) / self.get_data(df, private_consumption_o), base_period=BASE_PERIOD) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = [ 'RVGDE.1.0.0.0', 'RVGEW.1.0.0.0', 'RVGEW.1.0.0.0', 'ZATN9.1.0.0.0', 'ZETN9.1.0.0.0', 'ZUTN9.1.0.0.0' ] numerators = [ 'OVGD.1.0.0.0', 'OVGE.1.0.0.0', 'OVGD.1.0.0.0', 'NLTN.1.0.0.0', 'NETN.1.0.0.0', 'NUTN.1.0.0.0' ] denominators = [ 'FETD9.1.0.0.0', 'FETD9.1.0.0.0', 'NETD.1.0.0.0', 'NPAN1.1.0.0.0', 'NPAN1.1.0.0.0', 'NLTN.1.0.0.0' ] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) if denominators[index] == 'FETD9.1.0.0.0': denominator_series = fetd9 else: denominator_series = self.get_data(df, denominators[index]) series_data = self.get_data(df, numerators[index]) / denominator_series if variable in ['ZATN9.1.0.0.0', 'ZETN9.1.0.0.0', 'ZUTN9.1.0.0.0']: series_data = series_data * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'FETD9.6.0.0.0' series_meta = self.get_meta(variable) series_data = fetd9.pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'ZUTN.1.0.0.0' if self.country in EU: # ZUTN based on NUTN.1.0.0.0 and NETN.1.0.0.0 (18/01/2017) is commented out in FDMS+ last_observation = self.get_data(ameco_df, variable).last_valid_index() series_meta = self.get_meta(variable) series_data = round( self.get_data(df, 'NUTN') / (self.get_data(df, 'NUTN') + self.get_data(df, 'NETN')) * 100, 1) + round( self.get_data(ameco_df, 'NUTN.1.0.0.0')[last_observation] - self.get_data(df, 'NUTN') / (self.get_data(df, 'NUTN')[last_observation] + self.get_data(df, 'NETN')[last_observation]), 1) series_data = splicer.butt_splice( self.get_data(ameco_df, variable), self.get_data(ameco_df, variable), kind='forward') else: try: netn1 = self.get_data(df, 'NETN.1.0.0.0') except KeyError: netn1 = self.get_data(df, 'NETN') series_data = splicer.level_splice( self.get_data(ameco_df, variable), self.get_data(df, 'NUTN.1.0.0.0') / (self.get_data(df, 'NUTN.1.0.0.0') + self.get_data(df, netn1)) * 100) # NUTN ratiospliced (18/01/2017) is commented out in FDMS+ plcd3 = 'plcd3_series' variables = ['PLCD.3.1.0.0', 'QLCD.3.1.0.0'] numerators = ['HWCDW.1.0.0.0', 'PLCD.3.1.0.0'] denominators = ['RVGDE.1.0.0.0', 'PVGD.3.1.0.0'] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) if denominators[index] == 'PVGD.3.1.0.0': denominator_series = self.get_data(df, denominators[index]) else: denominator_series = self.get_data(self.result, denominators[index]) series_data = operators.rebase( self.get_data(self.result, numerators[index]) / denominator_series, base_period=BASE_PERIOD) if index == 0: plcd3 = series_data.copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = [ 'RWCDC.3.1.0.0', 'PLCD.3.1.0.0', 'QLCD.3.1.0.0', 'HWCDW.1.0.0.0', 'HWSCW.1.0.0.0', 'HWWDW.1.0.0.0', 'RVGDE.1.0.0.0', 'RVGEW.1.0.0.0' ] variables_6 = [ re.sub('.....0.0$', '.6.0.0.0', variable) for variable in variables ] for index, variable in enumerate(variables): series_meta = self.get_meta(variables_6[index]) series_data = self.get_data(self.result, variable).pct_change() * 100 series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=11, country=self.country) return self.result
def perform_computation(self, result_1, result_7, ameco_h_df): # TODO: Check the scales of the output variables splicer = Splicer() operators = Operators() # First we will calculate ASGH.1.0.0.0 and OVGHA.3.0.0.0, and then we will use the _sum_and_splice method # From SumAndSpliceMixin to calculate all the rest addends = {'UYOH.1.0.0.0': ['UOGH.1.0.0.0', 'UYNH.1.0.0.0']} self._sum_and_splice(addends, result_1, ameco_h_df, splice=False) new_input_df = self.result.set_index( ['Country Ameco', 'Variable Code'], drop=True) new_input_df = pd.concat([new_input_df, result_1], sort=True) addends = { 'UVGH.1.0.0.0': [ 'UWCH.1.0.0.0', 'UYOH.1.0.0.0', 'UCTRH.1.0.0.0', '-UTYH.1.0.0.0', '-UCTPH.1.0.0.0' ] } self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False) new_input_df = self.result.set_index( ['Country Ameco', 'Variable Code'], drop=True) new_input_df = pd.concat([new_input_df, result_1], sort=True) addends = {'UVGHA.1.0.0.0': ['UVGH.1.0.0.0', 'UEHH.1.0.0.0']} self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False) addends = { 'USGH.1.0.0.0': [ 'UWCH.1.0.0.0', 'UOGH.1.0.0.0', 'UYNH.1.0.0.0', 'UCTRH.1.0.0.0', '-UTYH.1.0.0.0', '-UCTPH.1.0.0.0', 'UEHH.1.0.0.0', '-UCPH0.1.0.0.0' ] } self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False) new_input_df = self.result.set_index( ['Country Ameco', 'Variable Code'], drop=True) new_input_df = pd.concat([new_input_df, result_1], sort=True) # Since this formula is using *ignoremissingsubtract* instead of *ignoremissingsum*, we change the sign of all # but the first variables in the list addends = { 'UBLH.1.0.0.0': ['USGH.1.0.0.0', '-UITH.1.0.0.0', '-UKOH.1.0.0.0'] } self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False) uvgha_data = self.get_data(new_input_df, 'UVGHA.1.0.0.0') pcph_data = self.get_data(result_7, 'PCPH.3.1.0.0') uvgha_base_period = uvgha_data.loc[BASE_PERIOD] ovgha_data = operators.rebase(uvgha_data / pcph_data, BASE_PERIOD) / 100 * uvgha_base_period series_meta = self.get_meta('OVGHA.3.0.0.0') series = pd.Series(series_meta) series = series.append(ovgha_data) self.result = self.result.append(series, ignore_index=True, sort=True) usgh_data = self.get_data(new_input_df, 'USGH.1.0.0.0') uvgha_data = self.get_data(new_input_df, 'UVGHA.1.0.0.0') asgh_ameco_h = self.get_data(ameco_h_df, 'ASGH.1.0.0.0') asgh_data = splicer.butt_splice(asgh_ameco_h, usgh_data / uvgha_data * 100) series_meta = self.get_meta('ASGH.1.0.0.0') new_series = pd.Series(series_meta) new_series = new_series.append(asgh_data) self.result = self.result.append(new_series, ignore_index=True, sort=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=14, country=self.country) return self.result
def perform_computation(self, ameco_db_df, xr_df, ameco_xne_us_df): splicer = Splicer() variable = 'XNE.1.0.99.0' series_data = self.get_data(ameco_db_df, variable) try: xr_data = self.get_data(xr_df, variable) except KeyError: pass else: last_valid = xr_data.first_valid_index() for year in range(last_valid + 1, LAST_YEAR + 1): series_data[year] = pd.np.nan series_data = splicer.ratio_splice(series_data.copy(), xr_data, kind='forward') series_meta = self.get_meta(variable) series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = ['ILN.1.0.0.0', 'ISN.1.0.0.0'] sources = ['ILN.1.1.0.0', 'ISN.1.1.0.0'] null_dates = list( range(int(datetime.datetime.now().year) - 1, LAST_YEAR)) for index, variable in enumerate(variables): series_meta = self.get_meta(variable) series_data = self.get_data(ameco_db_df, sources[index], null_dates=null_dates) series_data = splicer.butt_splice(series_data, self.get_data( xr_df, sources[index]), kind='forward') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) if self.country in EA: membership_date = get_membership_date(self.country) variable = 'XNE.1.0.99.0' for year in range(membership_date, LAST_YEAR + 1): self.result.loc[self.result['Variable Code'] == 'XNE.1.0.99.0', year] = 1 variable = 'XNEF.1.0.99.0' series_meta = self.get_meta(variable) series_data = self.get_data(ameco_db_df, 'XNE.1.0.99.0') last_valid = series_data.last_valid_index() if last_valid < LAST_YEAR: for index in range(last_valid + 1, LAST_YEAR + 1): series_data[index] = series_data[last_valid] series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'XNEB.1.0.99.0' series_meta = self.get_meta(variable) series_data = self.get_data(self.result, 'XNE.1.0.99.0') * self.get_data( self.result, 'XNEF.1.0.99.0') for year in range(membership_date, LAST_YEAR + 1): self.result.loc[self.result['Variable Code'] == 'XNEF.1.0.99.0', year] = pd.np.nan series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) else: variable = 'XNEB.1.0.99.0' series_meta = self.get_meta(variable) series_data = self.get_data(self.result, 'XNE.1.0.99.0').copy() series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variable = 'XNU.1.0.30.0' xne_us = self.get_data(xr_df, 'XNE.1.0.99.0', country='US') last_observation = xne_us.first_valid_index() new_xne_us = self.get_data(ameco_xne_us_df, 'XNE.1.0.99.0', country='US') for year in range(last_observation + 1, LAST_YEAR + 1): new_xne_us[year] = pd.np.nan series_meta = self.get_meta(variable) series_data = splicer.ratio_splice(new_xne_us, xne_us, kind='forward') series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # Effective exchange rates and relative unit labour costs, currently not calculated in FDMS+ variables = [ 'PLCDQ.3.0.0.437', 'PLCDQ.3.0.30.437', 'XUNNQ.3.0.30.437', 'XUNRQ.3.0.30.437', 'PLCDQ.3.0.0.414', 'PLCDQ.3.0.0.415', 'PLCDQ.3.0.0.417', 'PLCDQ.3.0.0.424', 'PLCDQ.3.0.0.427', 'PLCDQ.3.0.0.435', 'PLCDQ.3.0.0.436', 'PLCDQ.3.0.30.414', 'PLCDQ.3.0.30.415', 'PLCDQ.3.0.30.417', 'PLCDQ.3.0.30.424', 'PLCDQ.3.0.30.427', 'PLCDQ.3.0.30.435', 'PLCDQ.3.0.30.436', 'XUNNQ.3.0.30.414', 'XUNNQ.3.0.30.415', 'XUNNQ.3.0.30.417', 'XUNNQ.3.0.30.423', 'XUNNQ.3.0.30.424', 'XUNNQ.3.0.30.427', 'XUNNQ.3.0.30.435', 'XUNNQ.3.0.30.436', 'XUNNQ.3.0.30.441', 'XUNRQ.3.0.30.414', 'XUNRQ.3.0.30.415', 'XUNRQ.3.0.30.417', 'XUNRQ.3.0.30.424', 'XUNRQ.3.0.30.427', 'XUNRQ.3.0.30.435', 'XUNRQ.3.0.30.436' ] missing_vars = [] for variable in variables: series_meta = self.get_meta(variable) try: series_data = self.get_data(ameco_db_df, variable) except KeyError: missing_vars.append(variable) else: series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = ['PLCDQ.6.0.0.437', 'PLCDQ.6.0.0.435', 'PLCDQ.6.0.0.436'] sources = ['PLCDQ.3.0.0.437', 'PLCDQ.3.0.0.435', 'PLCDQ.3.0.0.436'] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) try: series_data = self.get_data( self.result, sources[index]).copy().pct_change() * 100 except (KeyError, IndexError): missing_vars.append(variable) else: series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) variables = [ 'XUNNQ.6.0.30.437', 'XUNRQ.6.0.30.437', 'XUNNQ.6.0.30.435', 'XUNNQ.6.0.30.436', 'XUNRQ.6.0.30.435', 'XUNRQ.6.0.30.436' ] sources = [ 'XUNNQ.3.0.30.437', 'XUNRQ.3.0.30.437', 'XUNNQ.3.0.30.435', 'XUNNQ.3.0.30.436', 'XUNRQ.3.0.30.435', 'XUNRQ.3.0.30.436' ] for index, variable in enumerate(variables): series_meta = self.get_meta(variable) try: series_data = self.get_data( self.result, sources[index]).copy().pct_change() * 100 except (KeyError, IndexError): missing_vars.append(variable) else: series = pd.Series(series_meta) series = series.append(series_data) self.result = self.result.append(series, ignore_index=True, sort=True) # TODO: is it OK? these are missing in ameco_db: PLCDQ.3.0.0.414 PLCDQ.3.0.0.435 PLCDQ.3.0.0.436 # PLCDQ.3.0.30.414 PLCDQ.3.0.30.435 PLCDQ.3.0.30.436 XUNNQ.3.0.30.414 XUNNQ.3.0.30.423 XUNNQ.3.0.30.435 # XUNNQ.3.0.30.436 XUNNQ.3.0.30.441 XUNRQ.3.0.30.414 XUNRQ.3.0.30.435 XUNRQ.3.0.30.436 PLCDQ.6.0.0.435 # PLCDQ.6.0.0.436 with open('errors_step_10.txt', 'w') as f: f.write('\n'.join(missing_vars)) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=10, country=self.country) return self.result
def perform_computation(self, df, ameco_df): for index, row in df.iterrows(): variable = index[1] if variable in TM: # Convert all transfer matrix variables to 1.0.0.0 (except National Account (volume)) and splice in # country desk forecast if variable not in NA_VO: splicer = Splicer() operators = Operators() meta = self.get_meta(variable) new_variable = variable + '.1.0.0.0' meta1000 = self.get_meta(new_variable) meta['Variable Code'] = variable meta1000['Variable Code'] = new_variable splice_series = self.get_data(df, variable) base_series = None try: base_series = self.get_data(ameco_df, new_variable) except KeyError: logger.warning( 'Missing Ameco data for variable {} (transfer matrix)' .format(new_variable)) orig_series = splice_series.copy() orig_series.name = None new_meta = pd.Series(meta) orig_series = new_meta.append(orig_series) if variable in TM_TBBO: new_series = splicer.butt_splice(base_series, splice_series, kind='forward') new_series.name = None new_meta = pd.Series(meta1000) new_series = new_meta.append(new_series) self.result = self.result.append(new_series, ignore_index=True) elif variable in TM_TBM: df_to_be_merged = pd.DataFrame( [splice_series, base_series]) new_series = operators.merge(df_to_be_merged) new_series.name = None new_meta = pd.Series(meta1000) new_series = new_meta.append(new_series) self.result = self.result.append(new_series, ignore_index=True) else: new_series = splicer.butt_splice(splicer.ratio_splice( base_series, splice_series, kind='forward'), splice_series, kind='forward') new_series.name = None new_meta = pd.Series(meta1000) new_series = new_meta.append(new_series) self.result = self.result.append(new_series, ignore_index=True) self.result = self.result.append(orig_series, ignore_index=True) self.result.set_index(['Country Ameco', 'Variable Code'], drop=True, inplace=True) self.apply_scale() export_to_excel(self.result, step=1, country=self.country) return self.result