def get_people(self, year, geogs): if isinstance(geogs, str): geogs = [geogs] geogs = ukpoputils.split_by_country(geogs) alldata = pd.DataFrame() for country in geogs: # TODO variants... if not geogs[country]: continue if year < self.snpp.min_year(country): data = self.mye.aggregate(["GENDER", "C_AGE"], geogs[country], year) elif year <= self.snpp.max_year(country): data = self.snpp.aggregate(["GENDER", "C_AGE"], geogs[country], year) else: print("%d population for %s is extrapolated" % (year, country)) data = self.snpp.extrapolagg(["GENDER", "C_AGE"], self.npp, geogs[country], year) alldata = alldata.append(data, ignore_index=True, sort=False) alldata = alldata.rename({ "OBS_VALUE": "PEOPLE" }, axis=1).drop("PROJECTED_YEAR_NAME", axis=1) # print(data.head()) # print(len(data)) return alldata
def test_utils(self): year_range = range(2018, 2050) (in_range, ex_range) = utils.split_range(year_range, self.snpp.max_year(utils.EN)) self.assertEqual(min(in_range), min(year_range)) self.assertEqual(max(in_range), 2029) self.assertEqual(min(ex_range), 2030) self.assertEqual(max(ex_range), max(year_range)) self.assertEqual(utils.trim_range(2011, 1991, 2016), [2011]) self.assertEqual(utils.trim_range(2011.0, 1991, 2016), [2011]) self.assertEqual(utils.trim_range([2011], 1991, 2016), [2011]) self.assertEqual(utils.trim_range([2011.0], 1991, 2016), [2011]) self.assertEqual(utils.trim_range(np.array([1995, 2005, 2019]), 2001, 2011), [2005]) self.assertEqual(utils.trim_range([1969, 2111], 1991, 2016), []) self.assertEqual(utils.trim_range(range(1969, 2111), 2011, 2016), list(range(2011, 2017))) codes = "E09000001" self.assertTrue(utils.country(codes) == ["en"]) codes = ['E06000002', 'E09000001'] self.assertTrue(utils.country(codes) == ["en"]) codes = ['E06000002', 'N09000001', 'S12000033', 'W06000011'] self.assertTrue(utils.country(codes) == ['en', 'ni', 'sc', 'wa']) codes = ['E06000001', 'E06000002', 'N09000001', 'S12000033', 'W06000011'] self.assertTrue(utils.country(codes) == ['en', 'ni', 'sc', 'wa']) codes = ['E06000001', 'W06000011', 'X06000002', 'Y09000001', 'Z12000033'] self.assertTrue(utils.country(codes) == ["en", "wa"]) codes = 'A06000001' self.assertTrue(utils.country(codes) == []) codes = ['E06000001', 'E06000002', 'N09000001', 'S12000033', 'W06000011'] split = utils.split_by_country(codes) self.assertTrue(split[utils.EN] == ['E06000001', 'E06000002']) self.assertTrue(split[utils.WA] == ['W06000011']) self.assertTrue(split[utils.SC] == ['S12000033']) self.assertTrue(split[utils.NI] == ['N09000001']) # naively, each element would be rounded down, making the total 10 fractional = np.array([0.1, 0.2, 0.3, 0.4]) * 11 integral = utils.integerise(fractional) self.assertTrue(np.array_equal(integral, [1, 2, 3, 5])) # 1.51 is NOT increased because 4.5 has a larger fractional part when total is rescaled to 17 from 16.91 fractional = np.array([1.1, 3.9, 4.5, 5.9, 1.51]) integral = utils.integerise(fractional) self.assertTrue(np.array_equal(integral, [1, 4, 5, 6, 1])) # another example that preserves sum fractional = np.array([1.01] * 100) integral = utils.integerise(fractional) self.assertTrue(sum(integral) == 1.01 * 100) self.assertTrue(np.array_equal(np.unique(integral), [1, 2]))
def get_households(self, year, geogs): geogs = ukpoputils.split_by_country(geogs) allsnhp = pd.DataFrame() for country in geogs: if not geogs[country]: continue max_year = self.snhp.max_year(country) if year <= max_year: snhp = self.snhp.aggregate(geogs[country], year).rename( {"OBS_VALUE": "HOUSEHOLDS"}, axis=1) else: print("%d households for %s is extrapolated" % (year, country)) #print(self.snhp.aggregate(geogs[country], max_year)) snhp = self.snhp.aggregate(geogs[country], max_year - 1).merge( self.snhp.aggregate(geogs[country], max_year), left_on="GEOGRAPHY_CODE", right_on="GEOGRAPHY_CODE") snhp["HOUSEHOLDS"] = snhp.OBS_VALUE_y + ( snhp.OBS_VALUE_y - snhp.OBS_VALUE_x) * (year - max_year) snhp["PROJECTED_YEAR_NAME"] = year snhp.drop([ "PROJECTED_YEAR_NAME_x", "OBS_VALUE_x", "PROJECTED_YEAR_NAME_y", "OBS_VALUE_y" ], axis=1, inplace=True) # aggregate census-merged LADs 'E06000053' 'E09000001' snhp.loc[snhp.GEOGRAPHY_CODE == "E09000033", "HOUSEHOLDS"] = snhp[snhp.GEOGRAPHY_CODE.isin( ["E09000001", "E09000033"])].HOUSEHOLDS.sum() snhp.loc[snhp.GEOGRAPHY_CODE == "E06000052", "HOUSEHOLDS"] = snhp[snhp.GEOGRAPHY_CODE.isin( ["E06000052", "E06000053"])].HOUSEHOLDS.sum() allsnhp = allsnhp.append(snhp, ignore_index=True, sort=False) return allsnhp
def extrapolate(self, npp, geog_codes, year_range): if isinstance(geog_codes, str): geog_codes = [geog_codes] geog_codes = utils.split_by_country(geog_codes) all_codes_all_years = pd.DataFrame() for country in geog_codes: if not geog_codes[country]: continue max_year = self.max_year(country) last_year = self.filter(geog_codes[country], max_year) (in_range, ex_range) = utils.split_range(year_range, max_year) # years that dont need to be extrapolated all_years = self.filter(geog_codes[country], in_range) if in_range else pd.DataFrame() for year in ex_range: data = last_year.copy() scaling = npp.year_ratio("ppp", country, max_year, year) data = data.merge(scaling[["GENDER", "C_AGE", "OBS_VALUE"]], on=["GENDER", "C_AGE"]) data["OBS_VALUE"] = data.OBS_VALUE_x * data.OBS_VALUE_y data.PROJECTED_YEAR_NAME = year all_years = all_years.append(data.drop( ["OBS_VALUE_x", "OBS_VALUE_y"], axis=1), ignore_index=True, sort=False) all_codes_all_years = all_codes_all_years.append(all_years, ignore_index=True, sort=False) return all_codes_all_years