示例#1
0
文件: mixins.py 项目: e/FDMS
    def _sum_and_splice(self, addends, df, ameco_h_df, splice=True):
        splicer = Splicer()
        for variable, sources in addends.items():
            series_meta = self.get_meta(variable)
            expected_scale = series_meta.get('Scale')
            try:
                base_series = self.get_data(ameco_h_df, variable)
            except KeyError:
                base_series = None
            splice_series = pd.Series()
            for source in sources:
                factor = 1
                if source.startswith('-'):
                    source = source[1:]
                    factor = -1
                src_scale = self.get_scale(source, dataframe=df)
                expected_scale = self.get_scale(variable)
                if src_scale != expected_scale:
                    factor = factor * pow(1000, self.codes[src_scale] - self.codes[expected_scale])
                try:
                    source_data = factor * self.get_data(df, source)
                except KeyError:
                    source_data = factor * self.get_data(self.result, source)
                splice_series = splice_series.add(source_data, fill_value=0)

            if base_series is None or splice is False:
                series_data = splice_series
            else:
                series_data = splicer.butt_splice(base_series, splice_series, kind='forward')
            if self.country == 'JP' and variable in ['UUTG.1.0.0.0', 'URTG.1.0.0.0']:
                if variable == 'URTG.1.0.0.0':
                    new_sources = ['UUTG.1.0.0.0', 'UBLG.1.0.0.0']
                    splice_series = self.get_data(
                        self.result, new_sources[0]) + self.get_data(
                        self.result, new_sources[1]
                    )
                series_data = splicer.ratio_splice(base_series, splice_series, kind='forward')
            series_data = series_data
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series, ignore_index=True, sort=True)
示例#2
0
文件: labour_market.py 项目: e/FDMS
    def perform_computation(self, df, ameco_df):
        operators = Operators()
        splicer = Splicer()
        variables = ['FETD9.1.0.0.0', 'FWTD9.1.0.0.0']
        if self.country in FCRIF:
            try:
                fetd9 = self.get_data(df, 'FETD.1.0.0.0')
                fwtd9 = self.get_data(df, 'FWTD.1.0.0.0')
            except KeyError:
                fetd9 = self.get_data(df, 'NETD.1.0.0.0')
                fwtd9 = self.get_data(df, 'NWTD.1.0.0.0')
            series_meta = self.get_meta(variables[0])
            series_data = fetd9.copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)
            series_meta = self.get_meta(variables[1])
            series_data = fwtd9.copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)
        else:
            series_meta = self.get_meta(variables[0])
            if self.country == 'US':
                fetd9 = self.get_data(df, 'NETD.1.0.0.0')
                fwtd9 = self.get_data(df, 'NWTD.1.0.0.0')
            else:
                fetd9 = splicer.ratio_splice(self.get_data(
                    ameco_df, variables[0]),
                                             self.get_data(df, 'NETD'),
                                             kind='forward')
                fwtd9 = splicer.ratio_splice(self.get_data(
                    ameco_df, variables[0]),
                                             self.get_data(df, 'NWTD'),
                                             kind='forward')
            series_data = fetd9.copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)
            series_meta = self.get_meta(variables[1])
            series_data = fwtd9.copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        variables = ['UWCD', 'UWWD', 'UWSC']
        variables_1 = [variable + '.1.0.0.0' for variable in variables]
        variables_h1 = [
            re.sub('^U', 'H', variable) + 'W.1.0.0.0' for variable in variables
        ]
        compensation = 'FWTD9.1.0.0.0'
        private_consumption_u = 'UCPH.1.0.0.0'
        private_consumption_o = 'OCPH.1.0.0.0'
        variables_r1 = [
            re.sub('^U', 'R', variable) + 'C.3.1.0.0' for variable in variables
        ]
        services = ['UMSN', 'UXSN', 'UMSN.1.0.0.0', 'UXSN.1.0.0.0']
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variables_h1[index])
            series_data = self.get_data(df, variables_1[index]) / fwtd9
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

            series_meta = self.get_meta(variables_r1[index])
            series_data = operators.rebase(
                self.get_data(df, variables_1[index]) / fwtd9 /
                self.get_data(df, private_consumption_u) /
                self.get_data(df, private_consumption_o),
                base_period=BASE_PERIOD)
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        variables = [
            'RVGDE.1.0.0.0', 'RVGEW.1.0.0.0', 'RVGEW.1.0.0.0', 'ZATN9.1.0.0.0',
            'ZETN9.1.0.0.0', 'ZUTN9.1.0.0.0'
        ]
        numerators = [
            'OVGD.1.0.0.0', 'OVGE.1.0.0.0', 'OVGD.1.0.0.0', 'NLTN.1.0.0.0',
            'NETN.1.0.0.0', 'NUTN.1.0.0.0'
        ]
        denominators = [
            'FETD9.1.0.0.0', 'FETD9.1.0.0.0', 'NETD.1.0.0.0', 'NPAN1.1.0.0.0',
            'NPAN1.1.0.0.0', 'NLTN.1.0.0.0'
        ]
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            if denominators[index] == 'FETD9.1.0.0.0':
                denominator_series = fetd9
            else:
                denominator_series = self.get_data(df, denominators[index])
            series_data = self.get_data(df,
                                        numerators[index]) / denominator_series
            if variable in ['ZATN9.1.0.0.0', 'ZETN9.1.0.0.0', 'ZUTN9.1.0.0.0']:
                series_data = series_data * 100
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        variable = 'FETD9.6.0.0.0'
        series_meta = self.get_meta(variable)
        series_data = fetd9.pct_change() * 100
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        variable = 'ZUTN.1.0.0.0'
        if self.country in EU:
            # ZUTN based on NUTN.1.0.0.0 and NETN.1.0.0.0 (18/01/2017) is commented out in FDMS+
            last_observation = self.get_data(ameco_df,
                                             variable).last_valid_index()
            series_meta = self.get_meta(variable)
            series_data = round(
                self.get_data(df, 'NUTN') /
                (self.get_data(df, 'NUTN') + self.get_data(df, 'NETN')) * 100,
                1) + round(
                    self.get_data(ameco_df, 'NUTN.1.0.0.0')[last_observation] -
                    self.get_data(df, 'NUTN') /
                    (self.get_data(df, 'NUTN')[last_observation] +
                     self.get_data(df, 'NETN')[last_observation]), 1)
            series_data = splicer.butt_splice(
                self.get_data(ameco_df, variable),
                self.get_data(ameco_df, variable),
                kind='forward')
        else:
            try:
                netn1 = self.get_data(df, 'NETN.1.0.0.0')
            except KeyError:
                netn1 = self.get_data(df, 'NETN')
            series_data = splicer.level_splice(
                self.get_data(ameco_df, variable),
                self.get_data(df, 'NUTN.1.0.0.0') /
                (self.get_data(df, 'NUTN.1.0.0.0') + self.get_data(df, netn1))
                * 100)

        # NUTN ratiospliced (18/01/2017) is commented out in FDMS+

        plcd3 = 'plcd3_series'
        variables = ['PLCD.3.1.0.0', 'QLCD.3.1.0.0']
        numerators = ['HWCDW.1.0.0.0', 'PLCD.3.1.0.0']
        denominators = ['RVGDE.1.0.0.0', 'PVGD.3.1.0.0']
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            if denominators[index] == 'PVGD.3.1.0.0':
                denominator_series = self.get_data(df, denominators[index])
            else:
                denominator_series = self.get_data(self.result,
                                                   denominators[index])
            series_data = operators.rebase(
                self.get_data(self.result, numerators[index]) /
                denominator_series,
                base_period=BASE_PERIOD)
            if index == 0:
                plcd3 = series_data.copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        variables = [
            'RWCDC.3.1.0.0', 'PLCD.3.1.0.0', 'QLCD.3.1.0.0', 'HWCDW.1.0.0.0',
            'HWSCW.1.0.0.0', 'HWWDW.1.0.0.0', 'RVGDE.1.0.0.0', 'RVGEW.1.0.0.0'
        ]
        variables_6 = [
            re.sub('.....0.0$', '.6.0.0.0', variable) for variable in variables
        ]
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variables_6[index])
            series_data = self.get_data(self.result,
                                        variable).pct_change() * 100
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=11, country=self.country)
        return self.result
示例#3
0
    def perform_computation(self, result_1, result_7, ameco_h_df):
        # TODO: Check the scales of the output variables
        splicer = Splicer()
        operators = Operators()
        # First we will calculate ASGH.1.0.0.0 and OVGHA.3.0.0.0, and then we will use the _sum_and_splice method
        # From SumAndSpliceMixin to calculate all the rest
        addends = {'UYOH.1.0.0.0': ['UOGH.1.0.0.0', 'UYNH.1.0.0.0']}
        self._sum_and_splice(addends, result_1, ameco_h_df, splice=False)
        new_input_df = self.result.set_index(
            ['Country Ameco', 'Variable Code'], drop=True)
        new_input_df = pd.concat([new_input_df, result_1], sort=True)
        addends = {
            'UVGH.1.0.0.0': [
                'UWCH.1.0.0.0', 'UYOH.1.0.0.0', 'UCTRH.1.0.0.0',
                '-UTYH.1.0.0.0', '-UCTPH.1.0.0.0'
            ]
        }
        self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False)

        new_input_df = self.result.set_index(
            ['Country Ameco', 'Variable Code'], drop=True)
        new_input_df = pd.concat([new_input_df, result_1], sort=True)
        addends = {'UVGHA.1.0.0.0': ['UVGH.1.0.0.0', 'UEHH.1.0.0.0']}
        self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False)

        addends = {
            'USGH.1.0.0.0': [
                'UWCH.1.0.0.0', 'UOGH.1.0.0.0', 'UYNH.1.0.0.0',
                'UCTRH.1.0.0.0', '-UTYH.1.0.0.0', '-UCTPH.1.0.0.0',
                'UEHH.1.0.0.0', '-UCPH0.1.0.0.0'
            ]
        }
        self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False)

        new_input_df = self.result.set_index(
            ['Country Ameco', 'Variable Code'], drop=True)
        new_input_df = pd.concat([new_input_df, result_1], sort=True)
        # Since this formula is using *ignoremissingsubtract* instead of *ignoremissingsum*, we change the sign of all
        # but the first variables in the list
        addends = {
            'UBLH.1.0.0.0': ['USGH.1.0.0.0', '-UITH.1.0.0.0', '-UKOH.1.0.0.0']
        }
        self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False)

        uvgha_data = self.get_data(new_input_df, 'UVGHA.1.0.0.0')
        pcph_data = self.get_data(result_7, 'PCPH.3.1.0.0')
        uvgha_base_period = uvgha_data.loc[BASE_PERIOD]
        ovgha_data = operators.rebase(uvgha_data / pcph_data,
                                      BASE_PERIOD) / 100 * uvgha_base_period
        series_meta = self.get_meta('OVGHA.3.0.0.0')
        series = pd.Series(series_meta)
        series = series.append(ovgha_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        usgh_data = self.get_data(new_input_df, 'USGH.1.0.0.0')
        uvgha_data = self.get_data(new_input_df, 'UVGHA.1.0.0.0')
        asgh_ameco_h = self.get_data(ameco_h_df, 'ASGH.1.0.0.0')
        asgh_data = splicer.butt_splice(asgh_ameco_h,
                                        usgh_data / uvgha_data * 100)
        series_meta = self.get_meta('ASGH.1.0.0.0')
        new_series = pd.Series(series_meta)
        new_series = new_series.append(asgh_data)
        self.result = self.result.append(new_series,
                                         ignore_index=True,
                                         sort=True)

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=14, country=self.country)
        return self.result
示例#4
0
文件: exchange_rates.py 项目: e/FDMS
    def perform_computation(self, ameco_db_df, xr_df, ameco_xne_us_df):
        splicer = Splicer()
        variable = 'XNE.1.0.99.0'
        series_data = self.get_data(ameco_db_df, variable)
        try:
            xr_data = self.get_data(xr_df, variable)
        except KeyError:
            pass
        else:
            last_valid = xr_data.first_valid_index()
            for year in range(last_valid + 1, LAST_YEAR + 1):
                series_data[year] = pd.np.nan
            series_data = splicer.ratio_splice(series_data.copy(),
                                               xr_data,
                                               kind='forward')
        series_meta = self.get_meta(variable)
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        variables = ['ILN.1.0.0.0', 'ISN.1.0.0.0']
        sources = ['ILN.1.1.0.0', 'ISN.1.1.0.0']
        null_dates = list(
            range(int(datetime.datetime.now().year) - 1, LAST_YEAR))
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            series_data = self.get_data(ameco_db_df,
                                        sources[index],
                                        null_dates=null_dates)
            series_data = splicer.butt_splice(series_data,
                                              self.get_data(
                                                  xr_df, sources[index]),
                                              kind='forward')
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        if self.country in EA:
            membership_date = get_membership_date(self.country)
            variable = 'XNE.1.0.99.0'
            for year in range(membership_date, LAST_YEAR + 1):
                self.result.loc[self.result['Variable Code'] == 'XNE.1.0.99.0',
                                year] = 1

            variable = 'XNEF.1.0.99.0'
            series_meta = self.get_meta(variable)
            series_data = self.get_data(ameco_db_df, 'XNE.1.0.99.0')
            last_valid = series_data.last_valid_index()
            if last_valid < LAST_YEAR:
                for index in range(last_valid + 1, LAST_YEAR + 1):
                    series_data[index] = series_data[last_valid]
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

            variable = 'XNEB.1.0.99.0'
            series_meta = self.get_meta(variable)
            series_data = self.get_data(self.result,
                                        'XNE.1.0.99.0') * self.get_data(
                                            self.result, 'XNEF.1.0.99.0')
            for year in range(membership_date, LAST_YEAR + 1):
                self.result.loc[self.result['Variable Code'] ==
                                'XNEF.1.0.99.0', year] = pd.np.nan
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)
        else:
            variable = 'XNEB.1.0.99.0'
            series_meta = self.get_meta(variable)
            series_data = self.get_data(self.result, 'XNE.1.0.99.0').copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        variable = 'XNU.1.0.30.0'
        xne_us = self.get_data(xr_df, 'XNE.1.0.99.0', country='US')
        last_observation = xne_us.first_valid_index()
        new_xne_us = self.get_data(ameco_xne_us_df,
                                   'XNE.1.0.99.0',
                                   country='US')
        for year in range(last_observation + 1, LAST_YEAR + 1):
            new_xne_us[year] = pd.np.nan
        series_meta = self.get_meta(variable)
        series_data = splicer.ratio_splice(new_xne_us, xne_us, kind='forward')
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        # Effective exchange rates and relative unit labour costs, currently not calculated in FDMS+
        variables = [
            'PLCDQ.3.0.0.437', 'PLCDQ.3.0.30.437', 'XUNNQ.3.0.30.437',
            'XUNRQ.3.0.30.437', 'PLCDQ.3.0.0.414', 'PLCDQ.3.0.0.415',
            'PLCDQ.3.0.0.417', 'PLCDQ.3.0.0.424', 'PLCDQ.3.0.0.427',
            'PLCDQ.3.0.0.435', 'PLCDQ.3.0.0.436', 'PLCDQ.3.0.30.414',
            'PLCDQ.3.0.30.415', 'PLCDQ.3.0.30.417', 'PLCDQ.3.0.30.424',
            'PLCDQ.3.0.30.427', 'PLCDQ.3.0.30.435', 'PLCDQ.3.0.30.436',
            'XUNNQ.3.0.30.414', 'XUNNQ.3.0.30.415', 'XUNNQ.3.0.30.417',
            'XUNNQ.3.0.30.423', 'XUNNQ.3.0.30.424', 'XUNNQ.3.0.30.427',
            'XUNNQ.3.0.30.435', 'XUNNQ.3.0.30.436', 'XUNNQ.3.0.30.441',
            'XUNRQ.3.0.30.414', 'XUNRQ.3.0.30.415', 'XUNRQ.3.0.30.417',
            'XUNRQ.3.0.30.424', 'XUNRQ.3.0.30.427', 'XUNRQ.3.0.30.435',
            'XUNRQ.3.0.30.436'
        ]
        missing_vars = []
        for variable in variables:
            series_meta = self.get_meta(variable)
            try:
                series_data = self.get_data(ameco_db_df, variable)
            except KeyError:
                missing_vars.append(variable)
            else:
                series = pd.Series(series_meta)
                series = series.append(series_data)
                self.result = self.result.append(series,
                                                 ignore_index=True,
                                                 sort=True)

        variables = ['PLCDQ.6.0.0.437', 'PLCDQ.6.0.0.435', 'PLCDQ.6.0.0.436']
        sources = ['PLCDQ.3.0.0.437', 'PLCDQ.3.0.0.435', 'PLCDQ.3.0.0.436']
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            try:
                series_data = self.get_data(
                    self.result, sources[index]).copy().pct_change() * 100
            except (KeyError, IndexError):
                missing_vars.append(variable)
            else:
                series = pd.Series(series_meta)
                series = series.append(series_data)
                self.result = self.result.append(series,
                                                 ignore_index=True,
                                                 sort=True)

        variables = [
            'XUNNQ.6.0.30.437', 'XUNRQ.6.0.30.437', 'XUNNQ.6.0.30.435',
            'XUNNQ.6.0.30.436', 'XUNRQ.6.0.30.435', 'XUNRQ.6.0.30.436'
        ]
        sources = [
            'XUNNQ.3.0.30.437', 'XUNRQ.3.0.30.437', 'XUNNQ.3.0.30.435',
            'XUNNQ.3.0.30.436', 'XUNRQ.3.0.30.435', 'XUNRQ.3.0.30.436'
        ]
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            try:
                series_data = self.get_data(
                    self.result, sources[index]).copy().pct_change() * 100
            except (KeyError, IndexError):
                missing_vars.append(variable)
            else:
                series = pd.Series(series_meta)
                series = series.append(series_data)
                self.result = self.result.append(series,
                                                 ignore_index=True,
                                                 sort=True)

        # TODO: is it OK? these are missing in ameco_db: PLCDQ.3.0.0.414 PLCDQ.3.0.0.435 PLCDQ.3.0.0.436
        # PLCDQ.3.0.30.414 PLCDQ.3.0.30.435 PLCDQ.3.0.30.436 XUNNQ.3.0.30.414 XUNNQ.3.0.30.423 XUNNQ.3.0.30.435
        # XUNNQ.3.0.30.436 XUNNQ.3.0.30.441 XUNRQ.3.0.30.414 XUNRQ.3.0.30.435 XUNRQ.3.0.30.436 PLCDQ.6.0.0.435
        # PLCDQ.6.0.0.436
        with open('errors_step_10.txt', 'w') as f:
            f.write('\n'.join(missing_vars))

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=10, country=self.country)
        return self.result
示例#5
0
文件: transfer_matrix.py 项目: e/FDMS
    def perform_computation(self, df, ameco_df):
        for index, row in df.iterrows():
            variable = index[1]
            if variable in TM:
                # Convert all transfer matrix variables to 1.0.0.0 (except National Account (volume)) and splice in
                # country desk forecast
                if variable not in NA_VO:
                    splicer = Splicer()
                    operators = Operators()
                    meta = self.get_meta(variable)
                    new_variable = variable + '.1.0.0.0'
                    meta1000 = self.get_meta(new_variable)
                    meta['Variable Code'] = variable
                    meta1000['Variable Code'] = new_variable
                    splice_series = self.get_data(df, variable)
                    base_series = None
                    try:
                        base_series = self.get_data(ameco_df, new_variable)
                    except KeyError:
                        logger.warning(
                            'Missing Ameco data for variable {} (transfer matrix)'
                            .format(new_variable))
                    orig_series = splice_series.copy()
                    orig_series.name = None
                    new_meta = pd.Series(meta)
                    orig_series = new_meta.append(orig_series)
                    if variable in TM_TBBO:
                        new_series = splicer.butt_splice(base_series,
                                                         splice_series,
                                                         kind='forward')
                        new_series.name = None
                        new_meta = pd.Series(meta1000)
                        new_series = new_meta.append(new_series)
                        self.result = self.result.append(new_series,
                                                         ignore_index=True)
                    elif variable in TM_TBM:
                        df_to_be_merged = pd.DataFrame(
                            [splice_series, base_series])
                        new_series = operators.merge(df_to_be_merged)
                        new_series.name = None
                        new_meta = pd.Series(meta1000)
                        new_series = new_meta.append(new_series)
                        self.result = self.result.append(new_series,
                                                         ignore_index=True)
                    else:
                        new_series = splicer.butt_splice(splicer.ratio_splice(
                            base_series, splice_series, kind='forward'),
                                                         splice_series,
                                                         kind='forward')
                        new_series.name = None
                        new_meta = pd.Series(meta1000)
                        new_series = new_meta.append(new_series)
                        self.result = self.result.append(new_series,
                                                         ignore_index=True)
                    self.result = self.result.append(orig_series,
                                                     ignore_index=True)

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=1, country=self.country)
        return self.result