示例#1
0
 def test_utils(self):
     year_range = range(2018, 2050)
     (in_range, ex_range) = utils.split_range(year_range,
                                              self.snpp.max_year(utils.EN))
     self.assertEqual(min(in_range), min(year_range))
     self.assertEqual(max(in_range), 2027)
     self.assertEqual(min(ex_range), 2028)
     self.assertEqual(max(ex_range), max(year_range))
示例#2
0
    def test_utils(self):
        year_range = range(2018, 2050)
        (in_range, ex_range) = utils.split_range(year_range, self.snpp.max_year(utils.EN))
        self.assertEqual(min(in_range), min(year_range))
        self.assertEqual(max(in_range), 2029)
        self.assertEqual(min(ex_range), 2030)
        self.assertEqual(max(ex_range), max(year_range))

        self.assertEqual(utils.trim_range(2011, 1991, 2016), [2011])
        self.assertEqual(utils.trim_range(2011.0, 1991, 2016), [2011])
        self.assertEqual(utils.trim_range([2011], 1991, 2016), [2011])
        self.assertEqual(utils.trim_range([2011.0], 1991, 2016), [2011])
        self.assertEqual(utils.trim_range(np.array([1995, 2005, 2019]), 2001, 2011), [2005])
        self.assertEqual(utils.trim_range([1969, 2111], 1991, 2016), [])
        self.assertEqual(utils.trim_range(range(1969, 2111), 2011, 2016), list(range(2011, 2017)))

        codes = "E09000001"
        self.assertTrue(utils.country(codes) == ["en"])
        codes = ['E06000002', 'E09000001']
        self.assertTrue(utils.country(codes) == ["en"])
        codes = ['E06000002', 'N09000001', 'S12000033', 'W06000011']
        self.assertTrue(utils.country(codes) == ['en', 'ni', 'sc', 'wa'])
        codes = ['E06000001', 'E06000002', 'N09000001', 'S12000033', 'W06000011']
        self.assertTrue(utils.country(codes) == ['en', 'ni', 'sc', 'wa'])
        codes = ['E06000001', 'W06000011', 'X06000002', 'Y09000001', 'Z12000033']
        self.assertTrue(utils.country(codes) == ["en", "wa"])
        codes = 'A06000001'
        self.assertTrue(utils.country(codes) == [])

        codes = ['E06000001', 'E06000002', 'N09000001', 'S12000033', 'W06000011']
        split = utils.split_by_country(codes)
        self.assertTrue(split[utils.EN] == ['E06000001', 'E06000002'])
        self.assertTrue(split[utils.WA] == ['W06000011'])
        self.assertTrue(split[utils.SC] == ['S12000033'])
        self.assertTrue(split[utils.NI] == ['N09000001'])

        # naively, each element would be rounded down, making the total 10
        fractional = np.array([0.1, 0.2, 0.3, 0.4]) * 11
        integral = utils.integerise(fractional)
        self.assertTrue(np.array_equal(integral, [1, 2, 3, 5]))

        # 1.51 is NOT increased because 4.5 has a larger fractional part when total is rescaled to 17 from 16.91
        fractional = np.array([1.1, 3.9, 4.5, 5.9, 1.51])
        integral = utils.integerise(fractional)
        self.assertTrue(np.array_equal(integral, [1, 4, 5, 6, 1]))

        # another example that preserves sum
        fractional = np.array([1.01] * 100)
        integral = utils.integerise(fractional)
        self.assertTrue(sum(integral) == 1.01 * 100)
        self.assertTrue(np.array_equal(np.unique(integral), [1, 2]))
示例#3
0
  def extrapolate(self, npp, geog_code, year_range):

    (in_range, ex_range) = utils.split_range(year_range, self.max_year(geog_code))

    all_years = self.filter(geog_code, in_range)

    for year in ex_range:
      data = self.filter([geog_code], [self.max_year(geog_code)])
      scaling = npp.year_ratio("ppp", utils.country(geog_code), self.max_year(geog_code), year)
      assert(len(data == len(scaling)))
      data.OBS_VALUE = data.OBS_VALUE * scaling.OBS_VALUE
      data.PROJECTED_YEAR_NAME = year
      all_years = all_years.append(data, ignore_index=True)

    return all_years
示例#4
0
    def create_variant(self, variant_name, npp, geog_codes, year_range):
        """
        Apply NPP variant to SNPP: SNPP(v) = SNPP(0) * sum(a,g) [ NPP(v) / NPP(0) ]
        Preserves age-gender structure of SNPP data
        """
        result = pd.DataFrame()
        if isinstance(geog_codes, str):
            geog_codes = [geog_codes]

        for geog_code in geog_codes:

            # split out any years prior to the NPP data (currently SNPP is 2014 based but NPP is 2016)
            (pre_range, in_range) = utils.split_range(year_range,
                                                      npp.min_year() - 1)
            # for any years prior to NPP we just use the SNPP data as-is (i.e. "ppp")
            pre_data = self.filter(geog_code,
                                   pre_range) if pre_range else pd.DataFrame()
            if len(pre_data) > 0:
                print(
                    "WARNING: variant {} not applied for years {} that predate the NPP data"
                    .format(variant_name, pre_range))

            # return if there's nothing in the NPP range
            if not in_range:
                result.append(pre_data)
                continue

            data = self.extrapolate(npp, geog_code, in_range).sort_values(
                ["C_AGE", "GENDER",
                 "PROJECTED_YEAR_NAME"]).reset_index(drop=True)

            scaling = npp.variant_ratio(variant_name, utils.country(geog_code),
                                        year_range).reset_index().sort_values([
                                            "C_AGE", "GENDER",
                                            "PROJECTED_YEAR_NAME"
                                        ])
            # scaling.to_csv(variant_name + ".csv", index=False)

            # print("DF: ", len(data), ":", len(scaling))
            assert (len(data) == len(scaling))
            data.OBS_VALUE = data.OBS_VALUE * scaling.OBS_VALUE

            # prepend any pre-NPP data
            result = result.append(pre_data.append(data))

        return result
示例#5
0
    def extrapolate(self, npp, geog_codes, year_range):

        if isinstance(geog_codes, str):
            geog_codes = [geog_codes]

        geog_codes = utils.split_by_country(geog_codes)

        all_codes_all_years = pd.DataFrame()

        for country in geog_codes:
            if not geog_codes[country]: continue

            max_year = self.max_year(country)
            last_year = self.filter(geog_codes[country], max_year)

            (in_range, ex_range) = utils.split_range(year_range, max_year)
            # years that dont need to be extrapolated
            all_years = self.filter(geog_codes[country],
                                    in_range) if in_range else pd.DataFrame()

            for year in ex_range:
                data = last_year.copy()
                scaling = npp.year_ratio("ppp", country, max_year, year)
                data = data.merge(scaling[["GENDER", "C_AGE", "OBS_VALUE"]],
                                  on=["GENDER", "C_AGE"])
                data["OBS_VALUE"] = data.OBS_VALUE_x * data.OBS_VALUE_y
                data.PROJECTED_YEAR_NAME = year
                all_years = all_years.append(data.drop(
                    ["OBS_VALUE_x", "OBS_VALUE_y"], axis=1),
                                             ignore_index=True,
                                             sort=False)

            all_codes_all_years = all_codes_all_years.append(all_years,
                                                             ignore_index=True,
                                                             sort=False)

        return all_codes_all_years
import ukpopulation.myedata as MYEData
import ukpopulation.nppdata as NPPData
import ukpopulation.snppdata as SNPPData
import ukpopulation.utils as utils

# initialise the population modules
mye = MYEData.MYEData()
npp = NPPData.NPPData()
snpp = SNPPData.SNPPData()

lad = "E07000041"  # Exeter

# 50 year horizon
years = range(2011, 2067)

(mye_years, proj_years) = utils.split_range(years, mye.max_year())
(snpp_years, npp_years) = utils.split_range(proj_years, snpp.max_year(lad))

pop_mye = mye.aggregate(["GENDER", "C_AGE"], lad, mye_years)
# get the total projected population for pop up to the SNPP horizon (2039)
pop = snpp.aggregate(["GENDER", "C_AGE"], lad, snpp_years)
# extrapolate for another ~25 years
pop_ex = snpp.extrapolagg(["GENDER", "C_AGE"], npp, lad, npp_years)

# plot the data
plt.plot(pop_mye.PROJECTED_YEAR_NAME, pop_mye.OBS_VALUE, "b^", label="MYE")
plt.plot(pop.PROJECTED_YEAR_NAME, pop.OBS_VALUE, "bo", label="SNPP")
plt.plot(pop_ex.PROJECTED_YEAR_NAME, pop_ex.OBS_VALUE, "b.", label="ext NPP")
plt.xlabel("Year")
plt.ylabel("Persons")
plt.legend()