# -*- coding: utf-8 -*- """ Classes to explore Zoopla data @author: Adrian Carro """ import pandas as pd import Datasets as ds import numpy as np decision = ds.ZooplaMatchedAggregated().read(100) pd.set_option('display.max_columns', None) # data = ds.ZooplaMatchedDaily() data = ds.ZooplaRawCollated() chunk = data.read(500) filteredChunk = chunk[(chunk["CREATED"] != chunk["LATEST SOLD"]) & (np.invert(pd.isnull(chunk["LATEST SOLD"])))] # filteredChunk = chunk[np.invert(pd.isnull(chunk["LATEST SOLD"]))] # filtered_chunk = chunk[chunk["MARKET"]=="SALE"][['LISTING ID','DAY','PRICE']][chunk['PRICE']>0] pd.set_option('display.max_rows', None) pd.set_option("display.max_columns", None) # print decision[['LISTING ID', 'CREATED', 'DELETED']] print len(filteredChunk) print filteredChunk
def markup(row): return (row['INITIAL PRICE'] / backProjectedPrice( datetime.strptime(row['CREATED'], "%Y-%m-%d"), datetime.strptime(row['LATEST SOLD'], "%Y-%m-%d"), row['PRICE'])) def averageDaysOnMarket(data, date): dom = [(datetime.strptime(row[1], "%Y-%m-%d") - datetime.strptime(row[0], "%Y-%m-%d")).days for row in data[data['LATEST SOLD'] == date] [['CREATED', 'LATEST SOLD']].values] return (sum(dom) / len(dom)) data = ds.ZooplaRawCollated() # 2008-11-06 #data = ds.ZooplaRawCollated(2000000) # 2009-09-30 #data = ds.ZooplaRawCollated(3900000) # 2010-04-27 #data = ds.ZooplaRawCollated(4000000) # 2010-05-07? chunk = data.read(200000) #filteredchunk = chunk[(chunk["MARKET"]=="SALE") & (chunk['INITIAL PRICE'].values>0) & (chunk['INITIAL PRICE'].values<10000000)][['LAND REGISTRY UID','CREATED','INITIAL PRICE','LATEST SOLD']] filteredchunk = chunk[(chunk["MARKET"] == "SALE") & (chunk['INITIAL PRICE'].values > 0) & (chunk['INITIAL PRICE'].values < 10000000)][[ 'CREATED', 'INITIAL PRICE', 'LATEST SOLD', 'PRICE' ]] date = datetime.strptime("2008-10-11", "%Y-%m-%d") refdate = datetime.strptime("1900-01-01", "%Y-%m-%d") soldListings = chunk[(chunk["MARKET"] == "SALE") & (chunk['INITIAL PRICE'].values > 0) &