def main(): grad_boosting = GradientBoostingClassifier() liner_svc = LinearSVC(penalty='l2', loss='squared_hinge', dual=False) paramsBoost = { 'loss': ['exponential'], 'learning_rate': np.linspace(0.01, 1.0, 10), 'max_depth': [3, 4, 5] } paramsSVC = {'C': [10**p for p in range(-7, 8)]} grid_cv_boost, prediction_boost = function.learning_best_classifier( 'data/train.csv', 'data/test.csv', grad_boosting, paramsBoost, encode_columns=[1, 3, 5, 6, 7, 8, 9, 13], n_jobs=-1) grid_cv_liner, prediction_liner = function.learning_best_classifier( 'data/train.csv', 'data/test.csv', liner_svc, paramsSVC, encode_columns=[1, 3, 5, 6, 7, 8, 9, 13], n_jobs=-1) prediction_filename = f'data/gradBoosting{grid_cv_boost.best_score_:.4f}' function.save(prediction_filename, prediction_boost, grid_cv_boost, fmt='%i') prediction_filename = f'data/linearSVC{grid_cv_liner.best_score_:.4f}' function.save(prediction_filename, prediction_liner, grid_cv_liner, fmt='%i')
row = 21 while row > length or type(row) != int or row == 0: try: row = abs(int(input("Enter a Row: "))) except: print("Try Again!") colm = 21 while colm > length or type(colm) != int or row == 0: try: colm = abs(int(input("Enter a Column: "))) except: print("Try Again") UserBoard = function.flag(row - 1,colm - 1,UserBoard) elif choice == 'S': function.save(UserBoard,MasterBoard, length) elif choice == 'Q': print("Good Bye") quit(0) elif choice == 'A': printBoard(MasterBoard) if flag == True: printBoard(MasterBoard) quit(0)
string_features = ['pre_zip_code'] prediction_f2 = function.convertColumn(prediction_f2, string_features, StringType()) print('prediction_final') # calculate price growth rate based on prediction price prediction_f2 = prediction_f2.withColumn("price_growth_rate", ((f.col('prediction')-f.col('pre_price_square_feet'))*100/f.col('pre_price_square_feet'))) prediction_f2.printSchema() # prediction_f2.show() # join prediction with previous data sale = sale.withColumn('price_growth_rate',f.lit(0)) # set prediction as year 2020 price square feet prediction_f = prediction_f2.withColumn('Country',f.lit(2020)).select('pre_zip_code', 'prediction','Country','price_growth_rate') prediction_f = prediction_f.withColumnRenamed('Country', 'sale_year').withColumnRenamed('prediction', 'price_square_feet').withColumnRenamed('pre_zip_code', 'zip_code') sale2 = sale.select('zip_code','sale_year','price_square_feet','price_growth_rate') prediction_f = prediction_f.unionAll(sale2) # prediction_f.show() # store the table to postgresql table = 'price_all_predict2' function.save(prediction_f, table)
d_loss = 0 g_loss = 0 generator.train() start_time = time.time() for idx, ((images_day, _), (images_nig, _)) in enumerate(zip(day_loader, nig_loader)): batch_size_idx = images_day.shape[0] images_day, images_nig = images_day.to(device), images_nig.to( device) d_loss += discriminator_train_step(generator, images_day, images_nig, discriminator, batch_size_idx, minimax_loss, d_optimizer) g_loss += generator_train_step(generator, images_nig, discriminator, batch_size_idx, minimax_loss, g_optimizer) if epoch % 3 == 0: pass # visualize(generator, images_nig) print( "Epoch: {}| Generator loss:{:5f}| Discriminator:{:5f}| time elapsed:{:2f}" .format(epoch, g_loss, d_loss, time.time() - start_time)) if epoch % 200 == 0: save(generator, G_PATH) save(discriminator, D_PATH) save(generator, G_PATH) save(discriminator, D_PATH)
import function from pyspark import SparkConf, SparkContext import pyspark.sql.functions as f sc = SparkContext() sc.setLogLevel("ERROR") # for table ny_subway_station: path_sub = 'processed/subway_station.csv' csv_sub = function.load(path_sub) # transfer longitude, latitude to zip code csv_sub1 = function.zipcode(csv_sub, sc) csv_sub3 = function.subway_route(csv_sub1) # save the table into postgresql table_subway = 'subway' function.save(csv_sub3, table_subway) # process table ny_citibike_station: path_bike = 'processed/citybike_station' csv = function.load(path_bike) csv_bike1 = function.zipcode(csv, sc) csv_bike = function.citybike_station_count(csv_bike1) # loading rolling sale and crime data: path_sale_crime = 'processed/prepared.csv' csv_sale_crime = function.load(path_sale_crime) # calculate average sale price per square feet from rolling sale data after 2013 sale = function.avg_price_per_square_feet(csv_sale_crime) # load population dataset to cauculate crime index path_population = 'processed/census_population'