def main(): trainData = pd.read_csv('CrimeClassification/Dataset/train-2.csv') classesMap = dm.mapClasses(trainData) print trainData.info() print(classesMap) cleanedTrainData,normalizationValues = dm.cleanTrainData(trainData,classesMap) print(cleanedTrainData.info()) data = cleanedTrainData.values np.random.shuffle(data.astype(np.float64)) Ytrain = binarizeLabels(data[0:,0]) Xtrain = data[0:,1:] model = trainModel(Xtrain,Ytrain) output = testProbaModel(model,Xtrain) testData = pd.read_csv('CrimeClassification/Dataset/test-2.csv') cleanedTestData = dm.cleanTestData(testData,normalizationValues) print(cleanedTestData.info()) output = testProbaModel(model,cleanedTestData.values[:,1:]) result = np.c_[cleanedTestData.values[:,0].astype(int),output] outputVec = sorted(classesMap, key=classesMap.__getitem__) outputVec.insert(0,'Id') dataFrameResults = pd.DataFrame(result,columns=outputVec) dataFrameResults['Id']=dataFrameResults['Id'].astype(int) dm.saveResults(dataFrameResults)
def main(): trainData = pd.read_csv("CrimeClassification/Dataset/train-2.csv") trainData.info() classesMap = dm.mapClasses(trainData) cleanedTrainData = dm.cleanData(trainData, classesMap) cleanedTrainData.describe() heatMapXY(cleanedTrainData, "Global heatMap of crimes") heatMapPerCategory(cleanedTrainData, classesMap) histogramOfCategories(cleanedTrainData, classesMap) textHistogram(cleanedTrainData, classesMap)
def main(): trainData = pd.read_csv('CrimeClassification/Dataset/train-2.csv') classesMap = dm.mapClasses(trainData) print trainData.info() print(classesMap) cleanedTrainData,normalizationValues = dm.cleanTrainData(trainData,classesMap) print(cleanedTrainData.info()) [Xtrain, Ytrain, Xtest,Ytest]=splitData(cleanedTrainData.values) model = trainModel(Xtrain,Ytrain) Ypred = testModel(model,Xtest) confMatrix = da.confusionMatrix(Ypred,Ytest) titleCM = da.orderClassesMapKeys(classesMap) da.plotConfusionMatrix(confMatrix,titleCM) print (da.f1Score(Ypred,Ytest))
def main(): trainData = pd.read_csv("CrimeClassification/Dataset/train-2.csv") classesMap = dm.mapClasses(trainData) print trainData.info() print (classesMap) cleanedTrainData = dm.cleanData(trainData, classesMap) testData = pd.read_csv("CrimeClassification/Dataset/test-2.csv") cleanedTestData = dm.cleanTestData(testData) print (cleanedTrainData.info()) model = trainModel(cleanedTrainData.values) result = np.c_[cleanedTestData.values[:, 0].astype(int), output] outputVec = sorted(classesMap, key=classesMap.__getitem__) outputVec.insert(0, "Id") dataFrameResults = pd.DataFrame(result, columns=outputVec) dataFrameResults["Id"] = dataFrameResults["Id"].astype(int) dm.saveResults(dataFrameResults)
def main(): pd.set_option("display.precision",3) trainData = pd.read_csv('CrimeClassification/Dataset/train01-tsc.csv') classesMap = dm.mapClasses(trainData) print(classesMap) cleanedTrainData = dm.cleanData(trainData,classesMap) testData = pd.read_csv('CrimeClassification/Dataset/test-tsc.csv') cleanedTestData = dm.cleanTestData(testData) print(cleanedTrainData.info()) model = trainModel(cleanedTrainData.values) print(cleanedTestData.info()) output = testProbaModel(model,cleanedTestData.values) result = np.c_[cleanedTestData.values[:,0].astype(int),output] outputVec = sorted(classesMap, key=classesMap.__getitem__) outputVec.insert(0,'Id') dataFrameResults = pd.DataFrame(result,columns=outputVec) dataFrameResults['Id']=dataFrameResults['Id'].astype(int)