def __init__(self, cache_dir=utils.default_cache_dir()): self.cache_dir = cache_dir self.data = {} self.data[utils.EN] = self.__do_england() self.data[utils.WA] = self.__do_wales() self.data[utils.SC] = self.__do_scotland() self.data[utils.NI] = self.__do_nireland()
def __init__(self, cache_dir=None): if cache_dir is None: cache_dir = utils.default_cache_dir() self.cache_dir = cache_dir self.data_api = Api.Nomisweb(self.cache_dir) # store as a dictionary keyed by year (lazy retrieval) self.data = {}
def __do_wales(self): print("Collating SNPP data for Wales...") cache_dir = utils.default_cache_dir() wales_raw = cache_dir + "/snpp_w.csv" if os.path.isfile(wales_raw): snpp_w = pd.read_csv(wales_raw) else: fields = [ 'Area_AltCode1', 'Year_Code', 'Data', 'Gender_Code', 'Age_Code', 'Area_Hierarchy', 'Variant_Code' ] # StatsWales is an OData endpoint, so select fields of interest url = "http://open.statswales.gov.wales/dataset/popu6010?$select={}".format( ",".join(fields)) # use OData syntax to filter P (persons), AllAges (all ages), Area_Hierarchy 691 (LADs) url += "&$filter=Gender_Code ne 'P' and Area_Hierarchy gt 690 and Area_Hierarchy lt 694 and Variant_Code eq 'Principal'" # data = [] while True: print(url) r = requests.get(url) r_data = r.json() data += r_data['value'] if "odata.nextLink" in r_data: url = r_data["odata.nextLink"] else: break snpp_w = pd.DataFrame(data) # Remove unwanted and rename wanted columns snpp_w = snpp_w.drop(["Area_Hierarchy", "Variant_Code"], axis=1) snpp_w = snpp_w.rename( columns={ "Age_Code": "C_AGE", "Area_AltCode1": "GEOGRAPHY_CODE", "Data": "OBS_VALUE", "Gender_Code": "GENDER", "Year_Code": "PROJECTED_YEAR_NAME" }) # Remove all but SYOA and make numeric snpp_w = snpp_w[(snpp_w.C_AGE != "AllAges") & (snpp_w.C_AGE != "00To15") & (snpp_w.C_AGE != "16To64") & (snpp_w.C_AGE != "65Plus")] snpp_w.loc[snpp_w.C_AGE == "90Plus", "C_AGE"] = "90" snpp_w.C_AGE = pd.to_numeric(snpp_w.C_AGE) # convert gender to census convention 1=M, 2=F snpp_w.GENDER = snpp_w.GENDER.map({"M": 1, "F": 2}) # assert(len(snpp_w) == 26*2*91*22) # 22 LADs x 91 ages x 2 genders x 26 years print(wales_raw) snpp_w.to_csv(wales_raw, index=False) return snpp_w
def __init__(self, cache_dir = None): if cache_dir is None: cache_dir = utils.default_cache_dir() self.cache_dir = cache_dir self.data_api = Api.Nomisweb(self.cache_dir) # map of pandas dataframes keyed by variant code self.data = {} # load principal aggressively... self.data["ppp"] = self.__download_ppp()
def __init__(self, cache_dir=None): if cache_dir is None: cache_dir = utils.default_cache_dir() self.cache_dir = cache_dir self.data_api = Api.Nomisweb(self.cache_dir) self.data = {} self.data[utils.EN] = self.__do_england() self.data[utils.WA] = self.__do_wales() self.data[utils.SC] = self.__do_scotland() self.data[utils.NI] = self.__do_nireland()
def register_custom_projection(name, data, cache_dir=utils.default_cache_dir()): # check data is compatible required_colnames = [ "GEOGRAPHY_CODE", "OBS_VALUE", "GENDER", "C_AGE", "PROJECTED_YEAR_NAME" ] for col in required_colnames: if not col in data.columns.values: raise ValueError("Custom SNPP dataset must contain a %s column" % col) if not (data.GENDER.unique() == [1, 2]).all(): raise ValueError( "GENDER column must only contain 1 (male) and 2 (female)") if min(data.C_AGE.unique()) != 0 or max(data.C_AGE.unique()) != 90: raise ValueError("C_AGE column must range from 0 to 90 (inclusive)") filename = _custom_snpp_filename(name, cache_dir) print("Writing custom SNPP %s to %s" % (name, filename)) data.to_csv(filename, index=False)
def __init__(self, name, cache_dir=utils.default_cache_dir()): self.name = name self.cache_dir = cache_dir filename = _custom_snpp_filename(name, self.cache_dir) self.data = pd.read_csv(filename)
def list_custom_projections(cache_dir=utils.default_cache_dir()): files = glob.glob(os.path.join(cache_dir, "ukpopulation_custom_snpp_*.csv")) projs = [os.path.basename(file)[25:-4] for file in files] return projs