def loaddata(): print "loading data..." #load data in train.csv, divided into train data and validation data data, label = preprocess.loadTrainSet() val_data = data[0:6000] val_label = label[0:6000] train_data = data[6000:] train_label = label[6000:] #load data in test.csv test_data = preprocess.loadTestSet() return train_data, train_label, val_data, val_label, test_data
def loaddata(): print "loading data..." #load data in train.csv, divided into train data and validation data data,label = preprocess.loadTrainSet() val_data = data[0:6000] val_label = label[0:6000] train_data = data[6000:] train_label = label[6000:] #load data in test.csv test_data = preprocess.loadTestSet() return train_data,train_label,val_data,val_label,test_data
def loaddata(): print("loading data...") #加载测试数据集 train.csv, 将其分为训练数据和校验数据 data, label = preprocess.loadTrainSet() val_data = data[0:6000] #校验数据选择前6000个 val_label = label[0:6000] #前6000个校验数据的类别 train_data = data[6000:] #训练数据 train_label = label[6000:] #训练数据的类别 #加载测试数据集 test.csv test_data = preprocess.loadTestSet() return train_data, train_label, val_data, val_label, test_data
def loaddata(): #####The data set is loaded into train.csv file print "loading data..." data,label = preprocess.loadTrainSet() #####The data set is divided into train data and validation data val_data = data[0:6000] val_label = label[0:6000] train_data = data[6000:] train_label = label[6000:] ######The data is loaded into test.csv file test_data = preprocess.loadTestSet() return train_data,train_label,val_data,val_label,test_data
def loaddata(): ####Data set is loaded into the train.csv file print "loading data..." data,label = preprocess.loadTrainSet() ####Divide the loaded data set into train data and validation data val_data = data[0:6000] val_label = label[0:6000] train_data = data[6000:] train_label = label[6000:] test_data = preprocess.loadTestSet() ####The data is loaded into test.csv file return train_data,train_label,val_data,val_label,test_data def knn(train_data,train_label,val_data,val_label,test_data,name = "knn_submission.csv"): print "Start training KNN Classifier..." ####validation set is evaluated knnClf = KNeighborsClassifier(n_neighbors=20) knnClf.fit(train_data,train_label) val_pred_label = knnClf.predict_proba(val_data) logloss = preprocess.evaluation(val_label,val_pred_label) print "logloss of validation set:",logloss ####Classifying the set print "Start classify test set..." test_label = knnClf.predict_proba(test_data) preprocess.saveResult(test_label,filename = name) if __name__ == "__main__": t1 = time.time() train_data,train_label,val_data,val_label,test_data = loaddata() knn(train_data,train_label,val_data,val_label,test_data) t2 = time.time() print "Done! It cost",t2-t1,"s"