return self.model.predict(test1).tolist() def save(self, f_name): dump(self.normalizer, f_name + '_normalizer.joblib') dump(self.model, f_name + '_model.joblib') def load_model(self, f_name): self.normalizer = load(f_name + '_normalizer.joblib') self.model = load(f_name + '_model.joblib') if __name__ == '__main__': #read train data from train.csv train_df = pd.read_csv('../data/train.csv', usecols=[ 'STAT_CAUSE_DESCR', 'LATITUDE', 'LONGITUDE', 'DISCOVERY_DATE', 'FIRE_SIZE' ]) y = pd.DataFrame() y['STAT_CAUSE_DESCR'] = train_df['STAT_CAUSE_DESCR'] #create labels by grouping the causes y = labels.createLabel(y) y = y['STAT_CAUSE_DESCR'].astype(int) train_df = train_df.drop(columns=['STAT_CAUSE_DESCR']) #train and save the model model = LRModel() model.trainModel(train_df, y) model.save("../models/lrmodel")
from LogisticRegression import LRModel from collections import Counter from sklearn.metrics import f1_score,confusion_matrix,accuracy_score if __name__ == '__main__': test_df = pd.read_csv('../data/test.csv',usecols = ['STAT_CAUSE_DESCR','LATITUDE','LONGITUDE','DISCOVERY_DATE','FIRE_SIZE']) #Separate the labels y_test = pd.DataFrame() y_test['STAT_CAUSE_DESCR']=test_df['STAT_CAUSE_DESCR'] test_df = test_df.drop(columns=['STAT_CAUSE_DESCR']) #create new classes y_test=labels.createLabel(y_test) y_test=y_test['STAT_CAUSE_DESCR'].astype(int) #Predict using Logistic Regression trained model model = LRModel() model.load_model("../models/lrmodel") y_pred = model.predict(test_df) #Evaluate# print("Logistic Regression Model") print("------------------------------------------------------------------------------------------") print("F1-score: %f"%(f1_score(y_test,y_pred,average='macro'))) print("Accuracy Score: %f"%(accuracy_score(y_test,y_pred))) print(confusion_matrix(y_test, y_pred))