示例#1
0
    def get_people(self, year, geogs):

        if isinstance(geogs, str):
            geogs = [geogs]

        geogs = ukpoputils.split_by_country(geogs)

        alldata = pd.DataFrame()
        for country in geogs:
            # TODO variants...
            if not geogs[country]: continue
            if year < self.snpp.min_year(country):
                data = self.mye.aggregate(["GENDER", "C_AGE"], geogs[country],
                                          year)
            elif year <= self.snpp.max_year(country):
                data = self.snpp.aggregate(["GENDER", "C_AGE"], geogs[country],
                                           year)
            else:
                print("%d population for %s is extrapolated" % (year, country))
                data = self.snpp.extrapolagg(["GENDER", "C_AGE"], self.npp,
                                             geogs[country], year)
            alldata = alldata.append(data, ignore_index=True, sort=False)

        alldata = alldata.rename({
            "OBS_VALUE": "PEOPLE"
        }, axis=1).drop("PROJECTED_YEAR_NAME", axis=1)

        # print(data.head())
        # print(len(data))
        return alldata
示例#2
0
    def test_utils(self):
        year_range = range(2018, 2050)
        (in_range, ex_range) = utils.split_range(year_range, self.snpp.max_year(utils.EN))
        self.assertEqual(min(in_range), min(year_range))
        self.assertEqual(max(in_range), 2029)
        self.assertEqual(min(ex_range), 2030)
        self.assertEqual(max(ex_range), max(year_range))

        self.assertEqual(utils.trim_range(2011, 1991, 2016), [2011])
        self.assertEqual(utils.trim_range(2011.0, 1991, 2016), [2011])
        self.assertEqual(utils.trim_range([2011], 1991, 2016), [2011])
        self.assertEqual(utils.trim_range([2011.0], 1991, 2016), [2011])
        self.assertEqual(utils.trim_range(np.array([1995, 2005, 2019]), 2001, 2011), [2005])
        self.assertEqual(utils.trim_range([1969, 2111], 1991, 2016), [])
        self.assertEqual(utils.trim_range(range(1969, 2111), 2011, 2016), list(range(2011, 2017)))

        codes = "E09000001"
        self.assertTrue(utils.country(codes) == ["en"])
        codes = ['E06000002', 'E09000001']
        self.assertTrue(utils.country(codes) == ["en"])
        codes = ['E06000002', 'N09000001', 'S12000033', 'W06000011']
        self.assertTrue(utils.country(codes) == ['en', 'ni', 'sc', 'wa'])
        codes = ['E06000001', 'E06000002', 'N09000001', 'S12000033', 'W06000011']
        self.assertTrue(utils.country(codes) == ['en', 'ni', 'sc', 'wa'])
        codes = ['E06000001', 'W06000011', 'X06000002', 'Y09000001', 'Z12000033']
        self.assertTrue(utils.country(codes) == ["en", "wa"])
        codes = 'A06000001'
        self.assertTrue(utils.country(codes) == [])

        codes = ['E06000001', 'E06000002', 'N09000001', 'S12000033', 'W06000011']
        split = utils.split_by_country(codes)
        self.assertTrue(split[utils.EN] == ['E06000001', 'E06000002'])
        self.assertTrue(split[utils.WA] == ['W06000011'])
        self.assertTrue(split[utils.SC] == ['S12000033'])
        self.assertTrue(split[utils.NI] == ['N09000001'])

        # naively, each element would be rounded down, making the total 10
        fractional = np.array([0.1, 0.2, 0.3, 0.4]) * 11
        integral = utils.integerise(fractional)
        self.assertTrue(np.array_equal(integral, [1, 2, 3, 5]))

        # 1.51 is NOT increased because 4.5 has a larger fractional part when total is rescaled to 17 from 16.91
        fractional = np.array([1.1, 3.9, 4.5, 5.9, 1.51])
        integral = utils.integerise(fractional)
        self.assertTrue(np.array_equal(integral, [1, 4, 5, 6, 1]))

        # another example that preserves sum
        fractional = np.array([1.01] * 100)
        integral = utils.integerise(fractional)
        self.assertTrue(sum(integral) == 1.01 * 100)
        self.assertTrue(np.array_equal(np.unique(integral), [1, 2]))
示例#3
0
    def get_households(self, year, geogs):

        geogs = ukpoputils.split_by_country(geogs)

        allsnhp = pd.DataFrame()

        for country in geogs:
            if not geogs[country]: continue
            max_year = self.snhp.max_year(country)

            if year <= max_year:
                snhp = self.snhp.aggregate(geogs[country], year).rename(
                    {"OBS_VALUE": "HOUSEHOLDS"}, axis=1)
            else:
                print("%d households for %s is extrapolated" % (year, country))
                #print(self.snhp.aggregate(geogs[country], max_year))
                snhp = self.snhp.aggregate(geogs[country], max_year - 1).merge(
                    self.snhp.aggregate(geogs[country], max_year),
                    left_on="GEOGRAPHY_CODE",
                    right_on="GEOGRAPHY_CODE")
                snhp["HOUSEHOLDS"] = snhp.OBS_VALUE_y + (
                    snhp.OBS_VALUE_y - snhp.OBS_VALUE_x) * (year - max_year)
                snhp["PROJECTED_YEAR_NAME"] = year
                snhp.drop([
                    "PROJECTED_YEAR_NAME_x", "OBS_VALUE_x",
                    "PROJECTED_YEAR_NAME_y", "OBS_VALUE_y"
                ],
                          axis=1,
                          inplace=True)

            # aggregate census-merged LADs 'E06000053' 'E09000001'
            snhp.loc[snhp.GEOGRAPHY_CODE == "E09000033",
                     "HOUSEHOLDS"] = snhp[snhp.GEOGRAPHY_CODE.isin(
                         ["E09000001", "E09000033"])].HOUSEHOLDS.sum()
            snhp.loc[snhp.GEOGRAPHY_CODE == "E06000052",
                     "HOUSEHOLDS"] = snhp[snhp.GEOGRAPHY_CODE.isin(
                         ["E06000052", "E06000053"])].HOUSEHOLDS.sum()
            allsnhp = allsnhp.append(snhp, ignore_index=True, sort=False)

        return allsnhp
示例#4
0
    def extrapolate(self, npp, geog_codes, year_range):

        if isinstance(geog_codes, str):
            geog_codes = [geog_codes]

        geog_codes = utils.split_by_country(geog_codes)

        all_codes_all_years = pd.DataFrame()

        for country in geog_codes:
            if not geog_codes[country]: continue

            max_year = self.max_year(country)
            last_year = self.filter(geog_codes[country], max_year)

            (in_range, ex_range) = utils.split_range(year_range, max_year)
            # years that dont need to be extrapolated
            all_years = self.filter(geog_codes[country],
                                    in_range) if in_range else pd.DataFrame()

            for year in ex_range:
                data = last_year.copy()
                scaling = npp.year_ratio("ppp", country, max_year, year)
                data = data.merge(scaling[["GENDER", "C_AGE", "OBS_VALUE"]],
                                  on=["GENDER", "C_AGE"])
                data["OBS_VALUE"] = data.OBS_VALUE_x * data.OBS_VALUE_y
                data.PROJECTED_YEAR_NAME = year
                all_years = all_years.append(data.drop(
                    ["OBS_VALUE_x", "OBS_VALUE_y"], axis=1),
                                             ignore_index=True,
                                             sort=False)

            all_codes_all_years = all_codes_all_years.append(all_years,
                                                             ignore_index=True,
                                                             sort=False)

        return all_codes_all_years