def get_price_history(model, apartments): """ Return times and corresponding prices for several apartments Parameters ---------- model : Model_tf apartments : list """ features = model.attributes['features'] time_index = np.where(features == 'soldDate')[0][0] years = range(2013, 2022) months = range(1, 13) times = np.zeros(len(years) * len(months)) prices = np.zeros((len(years) * len(months), len(apartments))) time_counter = 0 for year in years: for month in months: time_stamp = time_stuff.get_time_stamp(year, month, 1) times[time_counter] = time_stamp for j, apartment in enumerate(apartments): tmp = apartment.copy() tmp[time_index] = time_stamp prices[time_counter, j] = model.predict(tmp) time_counter += 1 return times, prices
def visualize_data(x_raw, y_raw, features, y_label, normal_apartment_indices): # Inspect data label = 'floor' cut_off = 30 j = np.where(features == label)[0][0] mask = x_raw[j,:] > cut_off #print(np.shape(x_raw)) #print(np.median(x_raw[j,:])) print('Inspect feature: ' + label) print('Highest apartments are on these floors:') print(x_raw[j,mask]) print('{:d} apartments are above floor {:d}'.format(np.sum(mask), cut_off)) print('Apartment file indices:') print(normal_apartment_indices[mask]) print() # Plot histograms plt.figure() plt.hist(y_raw, bins=50) plt.xlabel(y_label) plt.ylabel('apartment distribution') #plt.xlim([0,20*10**6]) plt.show() plt.figure() plt.hist(np.log(y_raw), bins=50) plt.xlabel('log(' + y_label + ')') plt.ylabel('apartment distribution') #plt.xlim([0,20*10**6]) plt.show() # Histograms of feature data for i in range(len(features)): print('min and max of', features[i], ':', np.min(x_raw[i,:]), np.max(x_raw[i,:])) plt.figure() plt.hist(x_raw[i,:], bins=30) plt.ylabel('apartment distribution') if features[i] == 'soldDate': tmp = [np.min(x_raw[i,:]), np.max(x_raw[i,:])] years = [datetime.fromtimestamp(time_stamp).year for time_stamp in tmp] xticks = range(years[0], years[1]+1) xticks_position = [time_stuff.get_time_stamp(year, 1, 1) for year in xticks] plt.xticks(xticks_position, xticks) plt.xlabel('sold date (year)') else: plt.xlabel(features[i]) plt.show() print()
def plot_price_change_over_time(model, apartments): features = model.attributes['features'] times, prices = get_price_history(model, apartments.values()) time_index = np.where(features == 'soldDate')[0][0] area_index = np.where(features == 'livingArea')[0][0] fig, axes = plt.subplots(nrows=2, sharex=True) for j, (label, apartment) in enumerate(apartments.items()): axes[0].plot(times, prices[:, j] / 10**6, '-', label=label) axes[1].plot(times, prices[:, j] / (apartment[area_index] * 10**3), '-', label=label) # Plot current price tmp = apartment.copy() time_stamp = datetime.now().timestamp() tmp[time_index] = time_stamp price_tmp = model.predict(tmp) axes[0].plot(time_stamp, price_tmp / 10**6, 'o', color='k') axes[1].plot(time_stamp, price_tmp / (tmp[area_index] * 10**3), 'o', color='k') axes[1].set_xlabel('time') axes[0].set_ylabel('price (Msek)') axes[1].set_ylabel('price/livingArea (ksek/$m^2$)') years = range( datetime.fromtimestamp(np.min(times)).year, datetime.fromtimestamp(np.max(times)).year + 1) for ax in axes: ax.set_xticks( [time_stuff.get_time_stamp(year, 1, 1) for year in years]) ax.set_xticklabels(years) ax.grid() ax.legend() plt.tight_layout() plt.savefig('figures/time_evolve_new.pdf') plt.savefig('figures/time_evolve_new.png') plt.show()
def setup_data(apartments, labels, features, y_label): """ Return input and output data for final price predication. Replaces missing values in the input with the average of that feature. Parameters ---------- apartments : array(M,N) M is the number of apartment examples. N is the number of apartment properties. labels : array(N) Descriptions of all apartment properties. features : array(K) Descriptions of all the features in the return matrix x. y_label : str Descriptions of the output property in the return vector y. Returns ------- x : array(K,P) Feature matrix. y : array(P) Output vector. """ # Feature data x = [] normal_apartments = [] k_lati = np.where(labels == 'latitude')[0][0] k_long = np.where(labels == 'longitude')[0][0] # Loop over all apartments for i in range(np.shape(apartments)[0]): # Resonable values: exclude apartments that are weird. # E.g. apartments that are either too small, too big, # too cheap, too expensive, # have too big price change from the starting price, # too high rent # or are strange is some other way. j = np.where(labels == 'livingArea')[0][0] if float(apartments[i,j]) < 10: # Skip this apartment, too small continue if float(apartments[i,j]) > 150: # Skip this apartment, too big continue j = np.where(labels == 'soldPrice')[0][0] if float(apartments[i,j]) < 0.5*10**6: # Skip this apartment, too cheap continue if float(apartments[i,j]) > 20*10**6: # Skip this apartment, too expensive continue j2 = np.where(labels == 'listPrice')[0][0] if float(apartments[i,j2]) != 0: if float(apartments[i,j])/float(apartments[i,j2]) < 0.6: # Skip this apartment, too big decrease continue if float(apartments[i,j])/float(apartments[i,j2]) > 2.5: # Skip this apartment, too big increase continue else: # Skip this apartment, zero list price is a bit weird... continue j = np.where(labels == 'rent')[0][0] if float(apartments[i,j]) > 20000: # Skip this apartment, too high rent continue j = np.where(labels == 'rooms')[0][0] if float(apartments[i,j]) > 15: # Skip this apartment, too many rooms continue j = np.where(labels == 'floor')[0][0] if float(apartments[i,j]) > 36: # Skip this apartment, too high floor. # According to wiki, the most number of floors in Stockholm # is at the moment (2019) 36 floors. continue apartment = np.zeros(len(features), dtype=np.float) for j, feature in enumerate(features): if feature in labels: k = np.where(labels == feature)[0][0] if feature == 'soldDate': year = int(apartments[i,k][:4]) month = int(apartments[i,k][5:7]) day = int(apartments[i,k][8:9]) apartment[j] = time_stuff.get_time_stamp(year, month, day) else: apartment[j] = float(apartments[i,k]) elif feature == 'distance2SthlmCenter': apartment[j] = location.distance_2_sthlm_center( float(apartments[i,k_lati]), float(apartments[i,k_long])) #elif feature == 'sizePerRoom': #x_raw, features = preparation.add_size_per_room_as_feature(x_raw, features) else: raise Exception('Feature ' + feature + ' does not exist...') # An apartment reaching this point is considered normal normal_apartments.append(i) x.append(apartment) normal_apartments = np.array(normal_apartments) x = np.array(x) # Output index y_label_index = np.where(labels == y_label)[0][0] # Output vector y = np.array(apartments[normal_apartments, y_label_index], dtype=np.float) print('{:d} apartments are un-normal and are excluded.'.format( len(apartments)-len(normal_apartments))) # Transpose for later convinience x = x.T replace_missing_values(x) return x, y, normal_apartments
def main(): # 1) Load machine learning (ML) model #filename_nn = 'models/sthlm_layers9_3_1_sigmoid.h5' #filename_nn = 'models/sthlm_layers9_20_10_10_10_5_5_5_5_5_5_5_1_sigmoid.h5' #filename_nn = 'models/sthlm_layers9_40_30_20_10_10_1_sigmoid.h5' filename_nn = 'models/sthlm_layers9_30_30_60_30_20_10_10_10_10_10_1_sigmoid.h5' model = load_nn_model_from_file(filename_nn) # List of features expected as input by the model features = model.attributes['features'] print('Input features:') print(features) # 2) Provide basic apartment information apartments = {} label = 'Sankt Göransgatan 96' position = location.get_location_info(label) print('Location:', position) sankt_goransgatan_dict = { 'soldDate': time_stuff.get_time_stamp(2019, 5, 31), 'livingArea': 67, 'rooms': 3, 'rent': 3370, 'floor': 4, 'constructionYear': 1996, 'latitude': position.latitude, 'longitude': position.longitude, 'distance2SthlmCenter': location.distance_2_sthlm_center(position.latitude, position.longitude), 'ocean': 2564 } apartments[label] = [ sankt_goransgatan_dict[feature] for feature in features ] label = 'Blekingegatan 27' position = location.get_location_info(label) print('Location:', position) blekingegatan_dict = { 'soldDate': time_stuff.get_time_stamp(2019, 4, 1), 'livingArea': 44, 'rooms': 2, 'rent': 2800, 'floor': 1.5, 'constructionYear': 1927, 'latitude': position.latitude, 'longitude': position.longitude, 'distance2SthlmCenter': location.distance_2_sthlm_center(position.latitude, position.longitude), 'ocean': float('nan') } apartments[label] = [blekingegatan_dict[feature] for feature in features] # Median apartment in Stockholm print('Median apartment in Stockholm') label = 'median apartment' median_apartment_dict = { 'soldDate': time_stuff.get_time_stamp(2016, 11, 1), 'livingArea': 58.5, 'rooms': 2, 'rent': 3091, 'floor': 2.0, 'constructionYear': 1952, 'latitude': 59.33, 'longitude': 18.04, 'distance2SthlmCenter': location.distance_2_sthlm_center(59.33, 18.04), 'ocean': float('nan') } apartments[label] = [ median_apartment_dict[feature] for feature in features ] # 3) Estimate prices in Stockholm! # 3.1) Analyze specific addresses # Print apartment info and predicted prices for label, apartment in apartments.items(): print(label) disp.apartment_into(features, apartment, model) # Time evolve apartments i = np.where(features == 'soldDate')[0][0] years = range(2013, 2022) months = range(1, 13) times = np.zeros(len(years) * len(months)) prices = np.zeros((len(years) * len(months), len(apartments))) time_counter = 0 for year in years: for month in months: time_stamp = time_stuff.get_time_stamp(year, month, 1) times[time_counter] = time_stamp for j, apartment in enumerate(apartments.values()): tmp = apartment.copy() tmp[i] = time_stamp prices[time_counter, j] = model.predict(tmp) time_counter += 1 # Plot prices plt.figure() for j, (label, apartment) in enumerate(apartments.items()): plt.plot(times, prices[:, j] / 10**6, '-', label=label) # Plot current price tmp = apartment.copy() time_stamp = datetime.now().timestamp() tmp[i] = time_stamp plt.plot(time_stamp, model.predict(tmp) / 10**6, 'o', color='k') plt.xlabel('time') plt.ylabel('price (Msek)') plt.xticks([time_stuff.get_time_stamp(year, 1, 1) for year in years], years) plt.grid() plt.legend() plt.tight_layout() plt.savefig('figures/time_evolve_new.pdf') plt.savefig('figures/time_evolve_new.png') plt.show() # Price/m^2 as function of m^2 feature = 'livingArea' area_index = np.where(features == feature)[0][0] time_index = np.where(features == 'soldDate')[0][0] areas = np.linspace(20, 150, 300) price_density = np.zeros((len(areas), len(apartments))) time_stamp = datetime.now().timestamp() for j, apartment in enumerate(apartments.values()): # Change to current time tmp = apartment.copy() tmp[time_index] = time_stamp for k, area in enumerate(areas): # Change area tmp[area_index] = area price_density[k, j] = model.predict(tmp) / area # Plot price density plt.figure() for j, (label, apartment) in enumerate(apartments.items()): plt.plot(areas, price_density[:, j] / 1000, '-', label=label) # Plot price density for actual area size, at current time tmp = apartment.copy() tmp[time_index] = time_stamp plt.plot(tmp[area_index], model.predict(tmp) / (tmp[area_index] * 1000), 'o', color='k') plt.xlabel(feature + ' ($m^2$)') plt.ylabel('price/livingArea (ksek/$m^2$)') plt.grid() plt.legend() plt.tight_layout() plt.savefig('figures/price_density_new.pdf') plt.savefig('figures/price_density_new.png') plt.show() # 3.2) Create contour color-map of Stockholm # Model the apartment price on a grid of geographical positions. # Keep all paramteres fixed except for the position related features # (such as latitude and longitude, and distace to Stockholm's center). # Examples of possibly interesting parameter values are: # - Median apartment in Stockholm, at the present/current time # Change to current time i = np.where(features == 'soldDate')[0][0] apartments['median apartment, current time'] = apartments[ 'median apartment'].copy() apartments['median apartment, current time'][i] = datetime.now().timestamp( ) # Calculate the price for a latitude and longitude mesh latitude_lim = [59.233, 59.45] longitude_lim = [17.82, 18.19] latitudes = np.linspace(latitude_lim[0], latitude_lim[1], 310) longitudes = np.linspace(longitude_lim[0], longitude_lim[1], 300) longitude_grid, latitude_grid = np.meshgrid(longitudes, latitudes) price_grid = np.zeros_like(longitude_grid, dtype=np.float) for i, lat in enumerate(latitudes): for j, long in enumerate(longitudes): tmp = apartments['median apartment, current time'].copy() k = np.where(features == 'latitude')[0][0] tmp[k] = lat k = np.where(features == 'longitude')[0][0] tmp[k] = long k = np.where(features == 'distance2SthlmCenter')[0][0] tmp[k] = location.distance_2_sthlm_center(lat, long) price_grid[i, j] = model.predict(tmp) price_grid[price_grid < 0] = np.nan # Plot map and apartment prices fig = plt.figure(figsize=(8, 8)) # map rendering quality. 6 is very bad, 10 is ok, 12 is good, 13 is very good, 14 excellent map_quality = 12 plot.plot_map(longitude_lim, latitude_lim, map_quality) # Plot the price i = np.where(features == 'livingArea')[0][0] plot.plot_contours( fig, longitude_grid, latitude_grid, price_grid / (apartments['median apartment, current time'][i] * 10**3), colorbarlabel=r'price/$m^2$ (ksek)') # Plot landmarks of Stockholm plot.plot_sthlm_landmarks() # Plot design plt.legend(loc=0) plt.xlabel('longitude') plt.ylabel('latitude') plt.savefig('figures/sthlm_new.pdf') plt.savefig('figures/sthlm_new.png') plt.show() # Plot figure with distance to Stockholm center d2c_grid = np.zeros_like(longitude_grid, dtype=np.float) for i, lat in enumerate(latitudes): for j, long in enumerate(longitudes): d2c_grid[i, j] = location.distance_2_sthlm_center(lat, long) fig = plt.figure(figsize=(8, 8)) # map rendering quality. 6 is very bad, 10 is ok, 12 is good, 13 is very good, 14 excellent map_quality = 12 plot.plot_map(longitude_lim, latitude_lim, map_quality) # Plot distance plot.plot_contours(fig, longitude_grid, latitude_grid, d2c_grid, colorbarlabel=r'distance to center (km)') # Plot landmarks of Stockholm plot.plot_sthlm_landmarks() # Plot design plt.legend(loc=0) plt.xlabel('longitude') plt.ylabel('latitude') plt.savefig('figures/sthlm_d2c_new.pdf') plt.savefig('figures/sthlm_d2c_new.png') plt.show()
def main(ai_name, verbose): model = Model_tf(ai_name) features = model.attributes['features'] y_label = model.attributes['y_label'] print('Input features:', features) # Provide basic apartment information apartments = {} label = 'Sankt Göransgatan 96' position = location.get_location_info(label) print('Location:', position) sankt_goransgatan_dict = { 'soldDate': time_stuff.get_time_stamp(2019, 5, 31), 'livingArea': 67, 'rooms': 3, 'rent': 3370, 'floor': 4, 'constructionYear': 1996, 'latitude': position.latitude, 'longitude': position.longitude, 'distance2SthlmCenter': location.distance_2_sthlm_center(position.latitude, position.longitude), 'ocean': 2564 } apartments[label] = [ sankt_goransgatan_dict[feature] for feature in features ] label = 'Blekingegatan 27' position = location.get_location_info(label) print('Location:', position) blekingegatan_dict = { 'soldDate': time_stuff.get_time_stamp(2019, 4, 1), 'livingArea': 44, 'rooms': 2, 'rent': 2800, 'floor': 1.5, 'constructionYear': 1927, 'latitude': position.latitude, 'longitude': position.longitude, 'distance2SthlmCenter': location.distance_2_sthlm_center(position.latitude, position.longitude), 'ocean': float('nan') } apartments[label] = [blekingegatan_dict[feature] for feature in features] # Estimate prices in Stockholm! # Print apartment info and predict prices for label, apartment in apartments.items(): print(label) disp.apartment_into(features, apartment, model) plot_price_change_over_time(model, apartments) plot_price_change_with_size(model, apartments) plot_price_change_with_floor(model, apartments) plot_price_change_with_building_year(model, apartments) # Create contour color-map of Stockholm # Model the apartment price on a grid of geographical positions. # Keep all paramteres fixed except for the position related features # (such as latitude and longitude, and distace to Stockholm's center). # Examples of possibly interesting parameter values are: # - Sankt Göransgatan 96, at the present time # - Median apartment in Stockholm, at the present time latitude_lim = [59.233, 59.45] longitude_lim = [17.82, 18.19] latitudes = np.linspace(latitude_lim[0], latitude_lim[1], 301) longitudes = np.linspace(longitude_lim[0], longitude_lim[1], 300) plot_price_on_map(model, apartments['Sankt Göransgatan 96'], latitudes, longitudes) # Movie about how prices on map vary over time videos_on_map(model, apartments['Sankt Göransgatan 96'], latitudes, longitudes) plot_distance_to_ceneter_on_map(latitudes, longitudes)
def videos_on_map(model, apartment, latitudes, longitudes, x=None): """ Videos Video of map with contour lines of apartment prices, when vary time. Parameters ---------- model : Model_tf apartment : list latitudes : ndarray(N) longitudes : ndarray(M) x : None or ndarray(K,L) If not None, represents K features for L different apartments. """ years = range(2013, 2021) months = range(1, 13) days = (1, 15) features = model.attributes['features'] time_index = np.where(features == 'soldDate')[0][0] times = np.zeros(len(years) * len(months) * len(days)) prices = [] time_counter = 0 for year in years: for month in months: for day in days: time_stamp = time_stuff.get_time_stamp(year, month, day) times[time_counter] = time_stamp apartment_reference = apartment.copy() # Change time apartment_reference[time_index] = time_stamp price_grid, longitude_grid, latitude_grid = get_price_on_grid( model, apartment_reference, latitudes, longitudes, features) price_grid[price_grid < 0] = np.nan prices.append(price_grid) time_counter += 1 prices = np.array(prices) # Plot the prices area_index = np.where(features == 'livingArea')[0][0] # price per m^2 (ksek) prices /= apartment_reference[area_index] * 10**3 price_change = 100 * ((prices / prices[0]) - 1) make_video_on_map(times, prices, longitude_grid, latitude_grid, features, x, filename_keyword="sthlm_new", colorbarlabel=r'price/$m^2$ (ksek)') make_video_on_map(times, price_change, longitude_grid, latitude_grid, features, x, filename_keyword="sthlm_change_new", colorbarlabel=r'price change (%)')