def main(): # read data for train train = pd.read_csv('data_for_model/new_with_price_per_sqm/training_data.csv') LB = LabelBinarizer() train['town'] = LB.fit_transform(train['town']) train['flat_model'] = LB.fit_transform(train['flat_model']) labels = train.iloc[:,20:].values total_price = train.iloc[:,19:20].values features = train.iloc[:,:19].values floor_area = np.asarray(train['floor_area_sqm'].values).reshape(len(labels),1) # preprocess training data X_train = preprocessing_X(features) scaler_y_train, y_train = preprocessing_Y(labels) # read in test data test = pd.read_csv('data_for_model/new_with_price_per_sqm/test_data.csv') test['town'] = LB.fit_transform(test['town']) test['flat_model'] = LB.fit_transform(test['flat_model']) labels_test = test.iloc[:,20:].values total_price_test = test.iloc[:,19:20].values features_test = test.iloc[:,:19].values floor_area_test = np.asarray(test['floor_area_sqm'].values).reshape(len(labels_test),1) # preprocess test data X_test = preprocessing_X(features_test) scaler_y_test, y_test = preprocessing_Y(labels_test) # fine_tune # fine_tune(X_train, y_train, scaler_y, floor_area, total_price) # train on all training data with best hyper-params #model = build_model() #result = model.fit(X_train, y_train, epochs=300, batch_size = int(len(X_train)/256), verbose=1, shuffle=False) # train on all training data with best hyper-params with pdp model = KerasRegressor(build_model, epochs=300, batch_size = int(len(X_train)/256), verbose=1, shuffle=False) model._estimator_type = "regressor" # Cheap workaround for keras NN to work with plot_partial_dependence model.dummy_ = "dummy" # Cheap workaround for keras NN to work with plot_partial_dependence model.fit(X_train, y_train) print('Computing partial dependence plots...') tic = time() pdp_features = [10] # remaining_lease, dist_nearest_mrt, dist_nearest_supermarkets, dist_nearest_sports_facilities display = plot_partial_dependence(estimator=model, X=X_train, features=pdp_features, kind='both', subsample=500, random_state=0, verbose=10) print(f"done in {time() - tic:.3f}s") display.figure_.suptitle( 'Plot' ) display.figure_.subplots_adjust(hspace=0.3) plt.show() # get score for validation #score = get_score(scaler_y_train.inverse_transform(model.predict(X_train)) * floor_area, total_price) #print('score on validation = {}'.format(score)) # predict y values for test data #val_res = scaler_y_test.inverse_transform(model.predict(X_test)) # get performance score on test data #score = get_score(val_res * floor_area_test, total_price_test) # score = get_score(val_res, total_price_test) #print('score on test = {}'.format(score)) '''