示例#1
0
    def __init__(self):
        rawData = ds.EHSinterview()

        incomeRent = rawData.loc[:,
                                 [self.incomeField, "rentwkx",
                                  "bedrqx"]]  # Filter for fields of interest
        self.renterData = incomeRent[incomeRent["rentwkx"] >
                                     0]  # filter out non renters
        #     self.renterData = self.renterData[self.renterData["bedrqx"]==4] # only consider one-bed
        # split the data into 2D histogram data
        self.population, self.xbins, self.ybins = np.histogram2d(
            np.log(self.renterData["rentwkx"].values),
            np.log(self.renterData[self.incomeField].values),
            bins=[40, 30])
        self.xaxis = (np.array(self.xbins[1:]) +
                      np.array(self.xbins[:-1])) / 2.0
        self.yaxis = (np.array(self.ybins[1:]) +
                      np.array(self.ybins[:-1])) / 2.0

        self.popdf = pd.DataFrame(data=np.zeros(
            ((len(self.xbins) - 1) * (len(self.ybins) - 1), 3)),
                                  columns=['rental price', 'income', 'p'])
        i = 0
        totalPop = self.population.sum()
        for param in range(1, len(self.ybins)):
            for out in range(1, len(self.xbins)):
                self.popdf.iloc[i, 0] = (self.xbins[out] +
                                         self.xbins[out - 1]) / 2.0
                self.popdf.iloc[i, 1] = (self.ybins[param] +
                                         self.ybins[param - 1]) / 2.0
                self.popdf.iloc[i, 2] = self.population[out - 1, param -
                                                        1] * 1.0 / totalPop
                i += 1
  def __init__(self):
      rawData = ds.EHSinterview()
      
      incomeRent = rawData.loc[:,[self.incomeField,"rentwkx","bedrqx"]]   # Filter for fields of interest
      self.renterData = incomeRent[incomeRent["rentwkx"]>0] # filter out non renters
 #     self.renterData = self.renterData[self.renterData["bedrqx"]==4] # only consider one-bed
      # split the data into 2D histogram data
      self.population, self.xbins, self.ybins = np.histogram2d(
          np.log(self.renterData["rentwkx"].values),
          np.log(self.renterData[self.incomeField].values),
          bins=[40,30])
示例#3
0
 def __init__(self):
     # Bring raw data from Datasets class reader for the English Housing Survey data
     raw_data = ds.EHSinterview()
     # Filter for field rentwkx, total weekly rent payable (rent plus housing benefit)
     income_rent = raw_data.loc[:, [self.incomeField, "rentwkx"]]
     # Filter out non renters and unreasonably large weekly rent values
     self.renterData = income_rent[(income_rent["rentwkx"] > 0)
                                   & (income_rent["rentwkx"] < 50000)]
     # Filter out strings at rentwkx column
     self.renterData = self.renterData[self.renterData["rentwkx"].apply(
         lambda x: not isinstance(x, str))]
     # Cast rentwkx column values as numpy float64 type
     self.renterData = self.renterData.astype({"rentwkx": np.float64})
     # Split the data into a 2D histogram with logarithmic bins (no normalisation here as we want column
     # normalisation, to be introduced when plotting)
     self.population, self.xbins, self.ybins = np.histogram2d(
         np.log(self.renterData[self.incomeField].values),
         np.log(self.renterData["rentwkx"].values),
         bins=[30, 30])
     # Transpose the matrix as histogram2d returns a list of columns instead of a list of rows
     self.population = self.population.T
示例#4
0
# -*- coding: utf-8 -*-
"""
Class to study households' saving behaviour based on English Housing Survey data.

@author: daniel, Adrian Carro
"""

import Datasets as ds
import pandas as pd
import matplotlib.pyplot as plt

# Read English Housing Survey interview data
derived = ds.EHSinterview()
# Read English Housing Survey income data
income = ds.EHSincome()
# Merge both Dataframes (join on aacode column, which must be present in both frames, and use intersection of keys)
joined = pd.merge(derived, income, on='aacode', how='inner')
# Keep only HYEARGRx and AmtSvng1b columns:
# - HYEARGRx: household gross annual income (inc. income from all adult household members). An extension of the gross
#   income of the HRP and any partner. This variable represents the household gross income of ALL adults living within
#   the household.
# - AmtSvng1b: Amount of savings/money invested.
#   Value = 1.0 Label = under £1,000
# 	Value = 2.0	Label = £1,000 to £2,999
# 	Value = 3.0	Label = £3,000 to £4,999
# 	Value = 4.0	Label = £5,000 to £5,999
# 	Value = 5.0	Label = £6,000 to £6,999
# 	Value = 6.0	Label = £7,000 to £7,999
# 	Value = 7.0	Label = £8,000 to £11,999
# 	Value = 8.0	Label = £12,000 to £15,999
# 	Value = 9.0	Label = £16,000 to £19,999
示例#5
0
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 29 13:29:06 2015

@author: daniel
"""

import Datasets as ds
import numpy as np
import matplotlib.pyplot as plt


rawData = ds.EHSinterview()
data = rawData[['Prevten','agehrpx','lenresb','tenure4','HYEARGRx']][(rawData['Prevten'] >0) & (rawData['Prevten'] < 9)]
data["ageAtMove"] = data['agehrpx'] - data['lenresb']
data['tenChange'] = ((data['Prevten'] < 5) & (data['Prevten']>1))*10.0 + ((data['Prevten']>4) & (data['Prevten']<7))*20.0 + (data['Prevten']>6)*30.0 + data['tenure4']
formationData = data[['ageAtMove']][data['tenChange']<10]
formationRentData = data[['ageAtMove']][data['tenChange']==3]
moverData = data[['ageAtMove']][(data['tenChange']>10) & (data['tenChange']<20)]
rentownData = data[['ageAtMove']][data['tenChange'] == 31]
fluxes = data[['tenChange']]
incomeFormRent = data[['HYEARGRx']][data['tenChange']==3]
incomeFormSoc = data[['HYEARGRx']][data['tenChange']==2]
incomeFormOwn = data[['HYEARGRx']][data['tenChange']==1]

#hist, bins = np.histogram(ftbData)
#plt.hist(fluxes.values, bins=33)
#plt.hist(incomeFormRent.values, bins=20)
plt.hist(incomeFormSoc.values,incomeFormRent.values, incomeFormOwn.values], bins=20, stacked=True)
#plt.hist(incomeFormOwn.values, bins=20)
#plt.hist(rentownData.values, bins=20)