示例#1
0
def get_price_history(model, apartments):
    """
    Return times and corresponding prices for several apartments

    Parameters
    ----------
    model : Model_tf

    apartments : list

    """
    features = model.attributes['features']
    time_index = np.where(features == 'soldDate')[0][0]
    years = range(2013, 2022)
    months = range(1, 13)
    times = np.zeros(len(years) * len(months))
    prices = np.zeros((len(years) * len(months), len(apartments)))
    time_counter = 0
    for year in years:
        for month in months:
            time_stamp = time_stuff.get_time_stamp(year, month, 1)
            times[time_counter] = time_stamp
            for j, apartment in enumerate(apartments):
                tmp = apartment.copy()
                tmp[time_index] = time_stamp
                prices[time_counter, j] = model.predict(tmp)
            time_counter += 1
    return times, prices
示例#2
0
def visualize_data(x_raw, y_raw, features, y_label, normal_apartment_indices):
    # Inspect data
    label = 'floor'
    cut_off = 30
    j = np.where(features == label)[0][0]
    mask = x_raw[j,:] > cut_off
    #print(np.shape(x_raw))
    #print(np.median(x_raw[j,:]))
    print('Inspect feature: ' + label)
    print('Highest apartments are on these floors:')
    print(x_raw[j,mask])
    print('{:d} apartments are above floor {:d}'.format(np.sum(mask), cut_off))
    print('Apartment file indices:')
    print(normal_apartment_indices[mask])
    print()

    # Plot histograms
    plt.figure()
    plt.hist(y_raw, bins=50)
    plt.xlabel(y_label)
    plt.ylabel('apartment distribution')
    #plt.xlim([0,20*10**6])
    plt.show()

    plt.figure()
    plt.hist(np.log(y_raw), bins=50)
    plt.xlabel('log(' + y_label + ')')
    plt.ylabel('apartment distribution')
    #plt.xlim([0,20*10**6])
    plt.show()

    # Histograms of feature data
    for i in range(len(features)):
        print('min and max of', features[i], ':', np.min(x_raw[i,:]), np.max(x_raw[i,:]))
        plt.figure()
        plt.hist(x_raw[i,:], bins=30)
        plt.ylabel('apartment distribution')
        if features[i] == 'soldDate':
            tmp = [np.min(x_raw[i,:]), np.max(x_raw[i,:])]
            years = [datetime.fromtimestamp(time_stamp).year for time_stamp in tmp]
            xticks = range(years[0], years[1]+1)
            xticks_position = [time_stuff.get_time_stamp(year, 1, 1) for year in xticks]
            plt.xticks(xticks_position, xticks)
            plt.xlabel('sold date (year)')
        else:
            plt.xlabel(features[i])
        plt.show()
    print()
示例#3
0
def plot_price_change_over_time(model, apartments):
    features = model.attributes['features']
    times, prices = get_price_history(model, apartments.values())
    time_index = np.where(features == 'soldDate')[0][0]
    area_index = np.where(features == 'livingArea')[0][0]
    fig, axes = plt.subplots(nrows=2, sharex=True)
    for j, (label, apartment) in enumerate(apartments.items()):
        axes[0].plot(times, prices[:, j] / 10**6, '-', label=label)
        axes[1].plot(times,
                     prices[:, j] / (apartment[area_index] * 10**3),
                     '-',
                     label=label)
        # Plot current price
        tmp = apartment.copy()
        time_stamp = datetime.now().timestamp()
        tmp[time_index] = time_stamp
        price_tmp = model.predict(tmp)
        axes[0].plot(time_stamp, price_tmp / 10**6, 'o', color='k')
        axes[1].plot(time_stamp,
                     price_tmp / (tmp[area_index] * 10**3),
                     'o',
                     color='k')
    axes[1].set_xlabel('time')
    axes[0].set_ylabel('price (Msek)')
    axes[1].set_ylabel('price/livingArea (ksek/$m^2$)')
    years = range(
        datetime.fromtimestamp(np.min(times)).year,
        datetime.fromtimestamp(np.max(times)).year + 1)
    for ax in axes:
        ax.set_xticks(
            [time_stuff.get_time_stamp(year, 1, 1) for year in years])
        ax.set_xticklabels(years)
        ax.grid()
        ax.legend()
    plt.tight_layout()
    plt.savefig('figures/time_evolve_new.pdf')
    plt.savefig('figures/time_evolve_new.png')
    plt.show()
示例#4
0
def setup_data(apartments, labels, features, y_label):
    """
    Return input and output data for final price predication.

    Replaces missing values in the input with the average
    of that feature.

    Parameters
    ----------
    apartments : array(M,N)
        M is the number of apartment examples.
        N is the number of apartment properties.
    labels : array(N)
        Descriptions of all apartment properties.
    features : array(K)
        Descriptions of all the features in the return matrix x.
    y_label : str
        Descriptions of the output property in the return vector y.

    Returns
    -------
    x : array(K,P)
        Feature matrix.
    y : array(P)
        Output vector.

    """
    # Feature data
    x = []
    normal_apartments = []
    k_lati = np.where(labels == 'latitude')[0][0]
    k_long = np.where(labels == 'longitude')[0][0]
    # Loop over all apartments
    for i in range(np.shape(apartments)[0]):
        # Resonable values: exclude apartments that are weird.
        # E.g. apartments that are either too small, too big,
        # too cheap, too expensive,
        # have too big price change from the starting price,
        # too high rent
        # or are strange is some other way.
        j = np.where(labels == 'livingArea')[0][0]
        if float(apartments[i,j]) < 10:
            # Skip this apartment, too small
            continue
        if float(apartments[i,j]) > 150:
            # Skip this apartment, too big
            continue
        j = np.where(labels == 'soldPrice')[0][0]
        if float(apartments[i,j]) < 0.5*10**6:
            # Skip this apartment, too cheap
            continue
        if float(apartments[i,j]) > 20*10**6:
            # Skip this apartment, too expensive
            continue
        j2 = np.where(labels == 'listPrice')[0][0]
        if float(apartments[i,j2]) != 0:
            if float(apartments[i,j])/float(apartments[i,j2]) < 0.6:
                # Skip this apartment, too big decrease
                continue
            if float(apartments[i,j])/float(apartments[i,j2]) > 2.5:
                # Skip this apartment, too big increase
                continue
        else:
            # Skip this apartment, zero list price is a bit weird...
            continue
        j = np.where(labels == 'rent')[0][0]
        if float(apartments[i,j]) > 20000:
            # Skip this apartment, too high rent
            continue
        j = np.where(labels == 'rooms')[0][0]
        if float(apartments[i,j]) > 15:
            # Skip this apartment, too many rooms
            continue
        j = np.where(labels == 'floor')[0][0]
        if float(apartments[i,j]) > 36:
            # Skip this apartment, too high floor.
            # According to wiki, the most number of floors in Stockholm
            # is at the moment (2019) 36 floors.
            continue
        apartment = np.zeros(len(features), dtype=np.float)
        for j, feature in enumerate(features):
            if feature in labels:
                k = np.where(labels == feature)[0][0]
                if feature == 'soldDate':
                    year = int(apartments[i,k][:4])
                    month = int(apartments[i,k][5:7])
                    day = int(apartments[i,k][8:9])
                    apartment[j] = time_stuff.get_time_stamp(year, month, day)
                else:
                    apartment[j] = float(apartments[i,k])
            elif feature == 'distance2SthlmCenter':
                apartment[j] = location.distance_2_sthlm_center(
                    float(apartments[i,k_lati]), float(apartments[i,k_long]))
            #elif feature == 'sizePerRoom':
                #x_raw, features = preparation.add_size_per_room_as_feature(x_raw, features)
            else:
                raise Exception('Feature ' + feature + ' does not exist...')
        # An apartment reaching this point is considered normal
        normal_apartments.append(i)
        x.append(apartment)

    normal_apartments = np.array(normal_apartments)
    x = np.array(x)
    # Output index
    y_label_index = np.where(labels == y_label)[0][0]
    # Output vector
    y = np.array(apartments[normal_apartments, y_label_index], dtype=np.float)
    print('{:d} apartments are un-normal and are excluded.'.format(
        len(apartments)-len(normal_apartments)))

    # Transpose for later convinience
    x = x.T
    replace_missing_values(x)
    return x, y, normal_apartments
示例#5
0
def main():
    # 1) Load machine learning (ML) model
    #filename_nn = 'models/sthlm_layers9_3_1_sigmoid.h5'
    #filename_nn = 'models/sthlm_layers9_20_10_10_10_5_5_5_5_5_5_5_1_sigmoid.h5'
    #filename_nn = 'models/sthlm_layers9_40_30_20_10_10_1_sigmoid.h5'
    filename_nn = 'models/sthlm_layers9_30_30_60_30_20_10_10_10_10_10_1_sigmoid.h5'
    model = load_nn_model_from_file(filename_nn)
    # List of features expected as input by the model
    features = model.attributes['features']
    print('Input features:')
    print(features)

    # 2) Provide basic apartment information
    apartments = {}
    label = 'Sankt Göransgatan 96'
    position = location.get_location_info(label)
    print('Location:', position)
    sankt_goransgatan_dict = {
        'soldDate':
        time_stuff.get_time_stamp(2019, 5, 31),
        'livingArea':
        67,
        'rooms':
        3,
        'rent':
        3370,
        'floor':
        4,
        'constructionYear':
        1996,
        'latitude':
        position.latitude,
        'longitude':
        position.longitude,
        'distance2SthlmCenter':
        location.distance_2_sthlm_center(position.latitude,
                                         position.longitude),
        'ocean':
        2564
    }
    apartments[label] = [
        sankt_goransgatan_dict[feature] for feature in features
    ]

    label = 'Blekingegatan 27'
    position = location.get_location_info(label)
    print('Location:', position)
    blekingegatan_dict = {
        'soldDate':
        time_stuff.get_time_stamp(2019, 4, 1),
        'livingArea':
        44,
        'rooms':
        2,
        'rent':
        2800,
        'floor':
        1.5,
        'constructionYear':
        1927,
        'latitude':
        position.latitude,
        'longitude':
        position.longitude,
        'distance2SthlmCenter':
        location.distance_2_sthlm_center(position.latitude,
                                         position.longitude),
        'ocean':
        float('nan')
    }
    apartments[label] = [blekingegatan_dict[feature] for feature in features]

    # Median apartment in Stockholm
    print('Median apartment in Stockholm')
    label = 'median apartment'
    median_apartment_dict = {
        'soldDate': time_stuff.get_time_stamp(2016, 11, 1),
        'livingArea': 58.5,
        'rooms': 2,
        'rent': 3091,
        'floor': 2.0,
        'constructionYear': 1952,
        'latitude': 59.33,
        'longitude': 18.04,
        'distance2SthlmCenter': location.distance_2_sthlm_center(59.33, 18.04),
        'ocean': float('nan')
    }
    apartments[label] = [
        median_apartment_dict[feature] for feature in features
    ]

    # 3) Estimate prices in Stockholm!
    # 3.1) Analyze specific addresses

    # Print apartment info and predicted prices
    for label, apartment in apartments.items():
        print(label)
        disp.apartment_into(features, apartment, model)

    # Time evolve apartments
    i = np.where(features == 'soldDate')[0][0]
    years = range(2013, 2022)
    months = range(1, 13)
    times = np.zeros(len(years) * len(months))
    prices = np.zeros((len(years) * len(months), len(apartments)))
    time_counter = 0
    for year in years:
        for month in months:
            time_stamp = time_stuff.get_time_stamp(year, month, 1)
            times[time_counter] = time_stamp
            for j, apartment in enumerate(apartments.values()):
                tmp = apartment.copy()
                tmp[i] = time_stamp
                prices[time_counter, j] = model.predict(tmp)
            time_counter += 1
    # Plot prices
    plt.figure()
    for j, (label, apartment) in enumerate(apartments.items()):
        plt.plot(times, prices[:, j] / 10**6, '-', label=label)
        # Plot current price
        tmp = apartment.copy()
        time_stamp = datetime.now().timestamp()
        tmp[i] = time_stamp
        plt.plot(time_stamp, model.predict(tmp) / 10**6, 'o', color='k')
    plt.xlabel('time')
    plt.ylabel('price (Msek)')
    plt.xticks([time_stuff.get_time_stamp(year, 1, 1) for year in years],
               years)
    plt.grid()
    plt.legend()
    plt.tight_layout()
    plt.savefig('figures/time_evolve_new.pdf')
    plt.savefig('figures/time_evolve_new.png')
    plt.show()

    # Price/m^2 as function of m^2
    feature = 'livingArea'
    area_index = np.where(features == feature)[0][0]
    time_index = np.where(features == 'soldDate')[0][0]
    areas = np.linspace(20, 150, 300)
    price_density = np.zeros((len(areas), len(apartments)))
    time_stamp = datetime.now().timestamp()
    for j, apartment in enumerate(apartments.values()):
        # Change to current time
        tmp = apartment.copy()
        tmp[time_index] = time_stamp
        for k, area in enumerate(areas):
            # Change area
            tmp[area_index] = area
            price_density[k, j] = model.predict(tmp) / area
    # Plot price density
    plt.figure()
    for j, (label, apartment) in enumerate(apartments.items()):
        plt.plot(areas, price_density[:, j] / 1000, '-', label=label)
        # Plot price density for actual area size, at current time
        tmp = apartment.copy()
        tmp[time_index] = time_stamp
        plt.plot(tmp[area_index],
                 model.predict(tmp) / (tmp[area_index] * 1000),
                 'o',
                 color='k')
    plt.xlabel(feature + '  ($m^2$)')
    plt.ylabel('price/livingArea (ksek/$m^2$)')
    plt.grid()
    plt.legend()
    plt.tight_layout()
    plt.savefig('figures/price_density_new.pdf')
    plt.savefig('figures/price_density_new.png')
    plt.show()

    # 3.2) Create contour color-map of Stockholm
    # Model the apartment price on a grid of geographical positions.
    # Keep all paramteres fixed except for the position related features
    # (such as latitude and longitude, and distace to Stockholm's center).
    # Examples of possibly interesting parameter values are:
    # - Median apartment in Stockholm, at the present/current time

    # Change to current time
    i = np.where(features == 'soldDate')[0][0]
    apartments['median apartment, current time'] = apartments[
        'median apartment'].copy()
    apartments['median apartment, current time'][i] = datetime.now().timestamp(
    )
    # Calculate the price for a latitude and longitude mesh
    latitude_lim = [59.233, 59.45]
    longitude_lim = [17.82, 18.19]
    latitudes = np.linspace(latitude_lim[0], latitude_lim[1], 310)
    longitudes = np.linspace(longitude_lim[0], longitude_lim[1], 300)
    longitude_grid, latitude_grid = np.meshgrid(longitudes, latitudes)
    price_grid = np.zeros_like(longitude_grid, dtype=np.float)
    for i, lat in enumerate(latitudes):
        for j, long in enumerate(longitudes):
            tmp = apartments['median apartment, current time'].copy()
            k = np.where(features == 'latitude')[0][0]
            tmp[k] = lat
            k = np.where(features == 'longitude')[0][0]
            tmp[k] = long
            k = np.where(features == 'distance2SthlmCenter')[0][0]
            tmp[k] = location.distance_2_sthlm_center(lat, long)
            price_grid[i, j] = model.predict(tmp)
    price_grid[price_grid < 0] = np.nan
    # Plot map and apartment prices
    fig = plt.figure(figsize=(8, 8))
    # map rendering quality. 6 is very bad, 10 is ok, 12 is good, 13 is very good, 14 excellent
    map_quality = 12
    plot.plot_map(longitude_lim, latitude_lim, map_quality)
    # Plot the price
    i = np.where(features == 'livingArea')[0][0]
    plot.plot_contours(
        fig,
        longitude_grid,
        latitude_grid,
        price_grid / (apartments['median apartment, current time'][i] * 10**3),
        colorbarlabel=r'price/$m^2$ (ksek)')
    # Plot landmarks of Stockholm
    plot.plot_sthlm_landmarks()
    # Plot design
    plt.legend(loc=0)
    plt.xlabel('longitude')
    plt.ylabel('latitude')
    plt.savefig('figures/sthlm_new.pdf')
    plt.savefig('figures/sthlm_new.png')
    plt.show()

    # Plot figure with distance to Stockholm center
    d2c_grid = np.zeros_like(longitude_grid, dtype=np.float)
    for i, lat in enumerate(latitudes):
        for j, long in enumerate(longitudes):
            d2c_grid[i, j] = location.distance_2_sthlm_center(lat, long)
    fig = plt.figure(figsize=(8, 8))
    # map rendering quality. 6 is very bad, 10 is ok, 12 is good, 13 is very good, 14 excellent
    map_quality = 12
    plot.plot_map(longitude_lim, latitude_lim, map_quality)
    # Plot distance
    plot.plot_contours(fig,
                       longitude_grid,
                       latitude_grid,
                       d2c_grid,
                       colorbarlabel=r'distance to center  (km)')
    # Plot landmarks of Stockholm
    plot.plot_sthlm_landmarks()
    # Plot design
    plt.legend(loc=0)
    plt.xlabel('longitude')
    plt.ylabel('latitude')
    plt.savefig('figures/sthlm_d2c_new.pdf')
    plt.savefig('figures/sthlm_d2c_new.png')
    plt.show()
示例#6
0
def main(ai_name, verbose):
    model = Model_tf(ai_name)
    features = model.attributes['features']
    y_label = model.attributes['y_label']
    print('Input features:', features)

    # Provide basic apartment information
    apartments = {}
    label = 'Sankt Göransgatan 96'
    position = location.get_location_info(label)
    print('Location:', position)
    sankt_goransgatan_dict = {
        'soldDate':
        time_stuff.get_time_stamp(2019, 5, 31),
        'livingArea':
        67,
        'rooms':
        3,
        'rent':
        3370,
        'floor':
        4,
        'constructionYear':
        1996,
        'latitude':
        position.latitude,
        'longitude':
        position.longitude,
        'distance2SthlmCenter':
        location.distance_2_sthlm_center(position.latitude,
                                         position.longitude),
        'ocean':
        2564
    }
    apartments[label] = [
        sankt_goransgatan_dict[feature] for feature in features
    ]

    label = 'Blekingegatan 27'
    position = location.get_location_info(label)
    print('Location:', position)
    blekingegatan_dict = {
        'soldDate':
        time_stuff.get_time_stamp(2019, 4, 1),
        'livingArea':
        44,
        'rooms':
        2,
        'rent':
        2800,
        'floor':
        1.5,
        'constructionYear':
        1927,
        'latitude':
        position.latitude,
        'longitude':
        position.longitude,
        'distance2SthlmCenter':
        location.distance_2_sthlm_center(position.latitude,
                                         position.longitude),
        'ocean':
        float('nan')
    }
    apartments[label] = [blekingegatan_dict[feature] for feature in features]

    # Estimate prices in Stockholm!

    # Print apartment info and predict prices
    for label, apartment in apartments.items():
        print(label)
        disp.apartment_into(features, apartment, model)

    plot_price_change_over_time(model, apartments)

    plot_price_change_with_size(model, apartments)

    plot_price_change_with_floor(model, apartments)

    plot_price_change_with_building_year(model, apartments)

    # Create contour color-map of Stockholm
    # Model the apartment price on a grid of geographical positions.
    # Keep all paramteres fixed except for the position related features
    # (such as latitude and longitude, and distace to Stockholm's center).
    # Examples of possibly interesting parameter values are:
    # - Sankt Göransgatan 96, at the present time
    # - Median apartment in Stockholm, at the present time
    latitude_lim = [59.233, 59.45]
    longitude_lim = [17.82, 18.19]
    latitudes = np.linspace(latitude_lim[0], latitude_lim[1], 301)
    longitudes = np.linspace(longitude_lim[0], longitude_lim[1], 300)
    plot_price_on_map(model, apartments['Sankt Göransgatan 96'], latitudes,
                      longitudes)

    # Movie about how prices on map vary over time
    videos_on_map(model, apartments['Sankt Göransgatan 96'], latitudes,
                  longitudes)

    plot_distance_to_ceneter_on_map(latitudes, longitudes)
示例#7
0
def videos_on_map(model, apartment, latitudes, longitudes, x=None):
    """
    Videos

    Video of map with contour lines of apartment prices, when vary time.

    Parameters
    ----------
    model : Model_tf

    apartment : list

    latitudes : ndarray(N)

    longitudes : ndarray(M)

    x : None or ndarray(K,L)
        If not None, represents K features for L different apartments.

    """
    years = range(2013, 2021)
    months = range(1, 13)
    days = (1, 15)

    features = model.attributes['features']
    time_index = np.where(features == 'soldDate')[0][0]
    times = np.zeros(len(years) * len(months) * len(days))
    prices = []
    time_counter = 0
    for year in years:
        for month in months:
            for day in days:
                time_stamp = time_stuff.get_time_stamp(year, month, day)
                times[time_counter] = time_stamp
                apartment_reference = apartment.copy()
                # Change time
                apartment_reference[time_index] = time_stamp
                price_grid, longitude_grid, latitude_grid = get_price_on_grid(
                    model, apartment_reference, latitudes, longitudes,
                    features)
                price_grid[price_grid < 0] = np.nan
                prices.append(price_grid)
                time_counter += 1
    prices = np.array(prices)
    # Plot the prices
    area_index = np.where(features == 'livingArea')[0][0]
    # price per m^2 (ksek)
    prices /= apartment_reference[area_index] * 10**3
    price_change = 100 * ((prices / prices[0]) - 1)

    make_video_on_map(times,
                      prices,
                      longitude_grid,
                      latitude_grid,
                      features,
                      x,
                      filename_keyword="sthlm_new",
                      colorbarlabel=r'price/$m^2$ (ksek)')
    make_video_on_map(times,
                      price_change,
                      longitude_grid,
                      latitude_grid,
                      features,
                      x,
                      filename_keyword="sthlm_change_new",
                      colorbarlabel=r'price change  (%)')