Exemplo n.º 1
0
  def __init__(self, cache_dir=utils.default_cache_dir()):
    self.cache_dir = cache_dir

    self.data = {}
    self.data[utils.EN] = self.__do_england()
    self.data[utils.WA] = self.__do_wales()
    self.data[utils.SC] = self.__do_scotland()
    self.data[utils.NI] = self.__do_nireland()
Exemplo n.º 2
0
  def __init__(self, cache_dir=None):
    if cache_dir is None:
      cache_dir = utils.default_cache_dir()
    self.cache_dir = cache_dir
    self.data_api = Api.Nomisweb(self.cache_dir) 

    # store as a dictionary keyed by year (lazy retrieval)
    self.data = {}
Exemplo n.º 3
0
    def __do_wales(self):
        print("Collating SNPP data for Wales...")
        cache_dir = utils.default_cache_dir()

        wales_raw = cache_dir + "/snpp_w.csv"
        if os.path.isfile(wales_raw):
            snpp_w = pd.read_csv(wales_raw)
        else:
            fields = [
                'Area_AltCode1', 'Year_Code', 'Data', 'Gender_Code',
                'Age_Code', 'Area_Hierarchy', 'Variant_Code'
            ]
            # StatsWales is an OData endpoint, so select fields of interest
            url = "http://open.statswales.gov.wales/dataset/popu6010?$select={}".format(
                ",".join(fields))
            # use OData syntax to filter P (persons), AllAges (all ages), Area_Hierarchy 691 (LADs)
            url += "&$filter=Gender_Code ne 'P' and Area_Hierarchy gt 690 and Area_Hierarchy lt 694 and Variant_Code eq 'Principal'"
            #
            data = []
            while True:
                print(url)
                r = requests.get(url)
                r_data = r.json()
                data += r_data['value']
                if "odata.nextLink" in r_data:
                    url = r_data["odata.nextLink"]
                else:
                    break
            snpp_w = pd.DataFrame(data)

            # Remove unwanted and rename wanted columns
            snpp_w = snpp_w.drop(["Area_Hierarchy", "Variant_Code"], axis=1)
            snpp_w = snpp_w.rename(
                columns={
                    "Age_Code": "C_AGE",
                    "Area_AltCode1": "GEOGRAPHY_CODE",
                    "Data": "OBS_VALUE",
                    "Gender_Code": "GENDER",
                    "Year_Code": "PROJECTED_YEAR_NAME"
                })
            # Remove all but SYOA and make numeric
            snpp_w = snpp_w[(snpp_w.C_AGE != "AllAges")
                            & (snpp_w.C_AGE != "00To15") &
                            (snpp_w.C_AGE != "16To64") &
                            (snpp_w.C_AGE != "65Plus")]
            snpp_w.loc[snpp_w.C_AGE == "90Plus", "C_AGE"] = "90"
            snpp_w.C_AGE = pd.to_numeric(snpp_w.C_AGE)

            # convert gender to census convention 1=M, 2=F
            snpp_w.GENDER = snpp_w.GENDER.map({"M": 1, "F": 2})

            # assert(len(snpp_w) == 26*2*91*22) # 22 LADs x 91 ages x 2 genders x 26 years
            print(wales_raw)
            snpp_w.to_csv(wales_raw, index=False)

        return snpp_w
Exemplo n.º 4
0
  def __init__(self, cache_dir = None):
    if cache_dir is None:
      cache_dir = utils.default_cache_dir()
    self.cache_dir = cache_dir
    self.data_api = Api.Nomisweb(self.cache_dir) 
    # map of pandas dataframes keyed by variant code
    self.data = {}

    # load principal aggressively...
    self.data["ppp"] = self.__download_ppp()
Exemplo n.º 5
0
  def __init__(self, cache_dir=None):
    if cache_dir is None:
      cache_dir = utils.default_cache_dir()
    self.cache_dir = cache_dir
    self.data_api = Api.Nomisweb(self.cache_dir) 

    self.data = {}
    self.data[utils.EN] = self.__do_england()
    self.data[utils.WA] = self.__do_wales()
    self.data[utils.SC] = self.__do_scotland()
    self.data[utils.NI] = self.__do_nireland()
Exemplo n.º 6
0
def register_custom_projection(name, data,
                               cache_dir=utils.default_cache_dir()):
    # check data is compatible
    required_colnames = [
        "GEOGRAPHY_CODE", "OBS_VALUE", "GENDER", "C_AGE", "PROJECTED_YEAR_NAME"
    ]
    for col in required_colnames:
        if not col in data.columns.values:
            raise ValueError("Custom SNPP dataset must contain a %s column" %
                             col)

    if not (data.GENDER.unique() == [1, 2]).all():
        raise ValueError(
            "GENDER column must only contain 1 (male) and 2 (female)")

    if min(data.C_AGE.unique()) != 0 or max(data.C_AGE.unique()) != 90:
        raise ValueError("C_AGE column must range from 0 to 90 (inclusive)")

    filename = _custom_snpp_filename(name, cache_dir)

    print("Writing custom SNPP %s to %s" % (name, filename))
    data.to_csv(filename, index=False)
Exemplo n.º 7
0
    def __init__(self, name, cache_dir=utils.default_cache_dir()):
        self.name = name
        self.cache_dir = cache_dir

        filename = _custom_snpp_filename(name, self.cache_dir)
        self.data = pd.read_csv(filename)
Exemplo n.º 8
0
def list_custom_projections(cache_dir=utils.default_cache_dir()):
    files = glob.glob(os.path.join(cache_dir,
                                   "ukpopulation_custom_snpp_*.csv"))
    projs = [os.path.basename(file)[25:-4] for file in files]
    return projs