model_type = "linear" conn_str = 'Driver=SQL Server;Server=<Server Name>;Database=MLDB;Uid=<User Name>;Pwd=<Password>;' cnxn = pyodbc.connect(conn_str) inputsql = 'select "RentalCount", "Year", "Month", "Day", "WeekDay", "Snow", "Holiday", "FWeekDay" from dbo.rental_data where Year < 2015' rental_train_data = pd.read_sql(inputsql, cnxn) # Used to sample data # train = rental_train_data.sample(frac=0.8, random_state=1) # test = rental_train_data.loc[~ rental_train_data.index.isin(train.index)] # print("Train {0} / test {1}".format(len(train), len(test))) rental_train_data["Holiday"] = rental_train_data["Holiday"].astype("category") rental_train_data["Snow"] = rental_train_data["Snow"].astype("category") rental_train_data["WeekDay"] = rental_train_data["WeekDay"].astype("category") if model_type == "linear": linmod_model = rx_lin_mod("RentalCount ~ Month + Day + WeekDay + Snow + Holiday", data = rental_train_data) trained_model = rx_serialize_model(linmod_model, realtime_scoring_only = True) if model_type == "dtree": dtree_model = rx_dtree("RentalCount ~ Month + Day + WeekDay + Snow + Holiday", data = rental_train_data) trained_model = rx_serialize_model(dtree_model, realtime_scoring_only = True) print(rx_summary("~ Month + Day + WeekDay + Snow + Holiday", rental_train_data)) # Dump learned model to file with open(r'c:\temp\trained_model.pickle', mode='wb') as f: f.write(trained_model) cursor=cnxn.cursor() cursor.execute("INSERT INTO rental_models(model_name, lang, native_model) VALUES(?, ?, ?)", (model_type + "_model", "Python", trained_model)) cnxn.commit()
TM['Occupation'].cat.reorder_categories( ["Manual", "Clerical", "Skilled Manual", "Professional", "Management"], inplace=True) # Make integers from ordinals TM['EducationInt'] = TM['Education'].cat.codes TM['CommuteDistanceInt'] = TM['CommuteDistance'].cat.codes TM['OccupationInt'] = TM['Occupation'].cat.codes # Check the distribution # TM['OccupationInt'].value_counts().sort_index() # TM['Occupation'].value_counts().sort_index() # Create a linear model from revoscalepy import rx_lin_mod, rx_predict linmod = rx_lin_mod( """NumberCarsOwned ~ TotalChildren + OccupationInt + NumberChildrenAtHome + EducationInt + CommuteDistanceInt + BikeBuyer""", data=TM) TMPredict = rx_predict(linmod, data=TM, output_data=TM) TMPredict[["NumberCarsOwned", "NumberCarsOwned_Pred"]].head(5) TMPredict[["NumberCarsOwned", "NumberCarsOwned_Pred"]].head(20).plot(kind="area", color=('green', 'orange')) plt.show() # Naive Bayes from sklearn.metrics import accuracy_score from sklearn.naive_bayes import GaussianNB # Arrange the data - feature matrix and target vector # Split the data Xtrain = TM.loc[TM.TrainTest == 1, [
# SQL Server data # Importing data from SQL Server using pyodbc # Using the revoscalepy library import numpy as np import pandas as pd import pyodbc from revoscalepy import rx_lin_mod, rx_predict, rx_summary # Connecting and reading the data con = pyodbc.connect('DSN=AWDW;UID=RUser;PWD=Pa$$w0rd') query = """SELECT CustomerKey, Age, YearlyIncome, TotalChildren, NumberCarsOwned FROM dbo.vTargetMail;""" TM = pd.read_sql(query, con) TM.head(5) TM.shape # Check the summary of the NumberCarsOwned summary = rx_summary("NumberCarsOwned", TM) print(summary) # Create a linear model linmod = rx_lin_mod("NumberCarsOwned ~ YearlyIncome + Age + TotalChildren", data=TM) predmod = rx_predict(linmod, data=TM, output_data=TM) predmod.head(10) # End of script
"Month" : { "type" : "integer" }, "Day" : { "type" : "integer" }, "RentalCount" : { "type" : "integer" }, "WeekDay" : { "type" : "factor", "levels" : ["1", "2", "3", "4", "5", "6", "7"] }, "Holiday" : { "type" : "factor", "levels" : ["1", "0"] }, "Snow" : { "type" : "factor", "levels" : ["1", "0"] } } data_source = RxSqlServerData(sql_query=inputsql, connection_string=conn_str, column_info=column_info) linmod_model = rx_lin_mod("RentalCount ~ Month + Day + WeekDay + Snow + Holiday", data = data_source, computeContext = cc) trained_model = rx_serialize_model(linmod_model, realtime_scoring_only = True) with open(r'c:\temp\trained_model.pickle', mode='wb') as f: f.write(trained_model) print(rx_summary("RentalCount ~ Month + Day + WeekDay + Snow + Holiday", data_source)) cnxn = pyodbc.connect(conn_str) cursor=cnxn.cursor() cursor.execute("INSERT INTO rental_models(model_name, lang, native_model) VALUES(?, ?, ?)", ("linear_model", "Python", trained_model)) cnxn.commit()