示例#1
0
文件: main.py 项目: Kuddus1924/labs3
def main():
    grad_boosting = GradientBoostingClassifier()
    liner_svc = LinearSVC(penalty='l2', loss='squared_hinge', dual=False)
    paramsBoost = {
        'loss': ['exponential'],
        'learning_rate': np.linspace(0.01, 1.0, 10),
        'max_depth': [3, 4, 5]
    }
    paramsSVC = {'C': [10**p for p in range(-7, 8)]}
    grid_cv_boost, prediction_boost = function.learning_best_classifier(
        'data/train.csv',
        'data/test.csv',
        grad_boosting,
        paramsBoost,
        encode_columns=[1, 3, 5, 6, 7, 8, 9, 13],
        n_jobs=-1)
    grid_cv_liner, prediction_liner = function.learning_best_classifier(
        'data/train.csv',
        'data/test.csv',
        liner_svc,
        paramsSVC,
        encode_columns=[1, 3, 5, 6, 7, 8, 9, 13],
        n_jobs=-1)
    prediction_filename = f'data/gradBoosting{grid_cv_boost.best_score_:.4f}'
    function.save(prediction_filename,
                  prediction_boost,
                  grid_cv_boost,
                  fmt='%i')
    prediction_filename = f'data/linearSVC{grid_cv_liner.best_score_:.4f}'
    function.save(prediction_filename,
                  prediction_liner,
                  grid_cv_liner,
                  fmt='%i')
        row = 21
        while row > length or type(row) != int or row == 0:
            try:
                row = abs(int(input("Enter a Row: ")))
            except:
                print("Try Again!")
                
        colm = 21
        while colm > length or type(colm) != int or row == 0:
            try:
                colm = abs(int(input("Enter a Column: ")))
            except:
                print("Try Again")

        UserBoard = function.flag(row - 1,colm - 1,UserBoard)

    elif choice == 'S':
        function.save(UserBoard,MasterBoard, length)

    elif choice == 'Q':
        print("Good Bye")
        quit(0)

    elif choice == 'A':
        printBoard(MasterBoard)

    if flag == True:
        printBoard(MasterBoard)
        quit(0)
    
string_features = ['pre_zip_code']
prediction_f2 = function.convertColumn(prediction_f2, string_features, StringType())

print('prediction_final')

# calculate price growth rate based on prediction price
prediction_f2 = prediction_f2.withColumn("price_growth_rate", ((f.col('prediction')-f.col('pre_price_square_feet'))*100/f.col('pre_price_square_feet')))
prediction_f2.printSchema()
# prediction_f2.show()


# join prediction with previous data 
sale = sale.withColumn('price_growth_rate',f.lit(0))



# set prediction as year 2020 price square feet
prediction_f = prediction_f2.withColumn('Country',f.lit(2020)).select('pre_zip_code', 'prediction','Country','price_growth_rate')
prediction_f = prediction_f.withColumnRenamed('Country', 'sale_year').withColumnRenamed('prediction', 'price_square_feet').withColumnRenamed('pre_zip_code', 'zip_code')
sale2 = sale.select('zip_code','sale_year','price_square_feet','price_growth_rate')
prediction_f = prediction_f.unionAll(sale2)
# prediction_f.show()

# store the table to postgresql
table = 'price_all_predict2'
function.save(prediction_f, table)




示例#4
0
        d_loss = 0
        g_loss = 0
        generator.train()
        start_time = time.time()
        for idx, ((images_day, _),
                  (images_nig, _)) in enumerate(zip(day_loader, nig_loader)):
            batch_size_idx = images_day.shape[0]
            images_day, images_nig = images_day.to(device), images_nig.to(
                device)

            d_loss += discriminator_train_step(generator, images_day,
                                               images_nig, discriminator,
                                               batch_size_idx, minimax_loss,
                                               d_optimizer)

            g_loss += generator_train_step(generator, images_nig,
                                           discriminator, batch_size_idx,
                                           minimax_loss, g_optimizer)
        if epoch % 3 == 0: pass  # visualize(generator, images_nig)
        print(
            "Epoch: {}| Generator loss:{:5f}| Discriminator:{:5f}| time elapsed:{:2f}"
            .format(epoch, g_loss, d_loss,
                    time.time() - start_time))
        if epoch % 200 == 0:
            save(generator, G_PATH)
            save(discriminator, D_PATH)

    save(generator, G_PATH)
    save(discriminator, D_PATH)
示例#5
0
import function
from pyspark import SparkConf, SparkContext
import pyspark.sql.functions as f
sc = SparkContext()
sc.setLogLevel("ERROR")

# for table ny_subway_station:
path_sub = 'processed/subway_station.csv'
csv_sub = function.load(path_sub)
# transfer longitude, latitude to zip code
csv_sub1 = function.zipcode(csv_sub, sc)
csv_sub3 = function.subway_route(csv_sub1)
# save the table into postgresql
table_subway = 'subway'
function.save(csv_sub3, table_subway)

# process table ny_citibike_station:
path_bike = 'processed/citybike_station'
csv = function.load(path_bike)
csv_bike1 = function.zipcode(csv, sc)
csv_bike = function.citybike_station_count(csv_bike1)

# loading rolling sale and crime data:
path_sale_crime = 'processed/prepared.csv'
csv_sale_crime = function.load(path_sale_crime)

# calculate average sale price per square feet from rolling sale data after 2013
sale = function.avg_price_per_square_feet(csv_sale_crime)

# load population dataset to cauculate crime index
path_population = 'processed/census_population'