def computeStatistics(self,inRaster,inShape,inField): try: rasterized = rasterize(inRaster,inShape,inField) Yp,Yt = dataraster.get_samples_from_roi(inRaster,rasterized) CONF = ai.CONFUSION_MATRIX() CONF.compute_confusion_matrix(Yp,Yt) self.confusion_matrix = CONF.confusion_matrix self.Kappa = CONF.Kappa self.OA = CONF.OA except: pushFeedback('Error during statitics calculation')
def computeStatistics(self, inRaster, inShape, inField): try: rasterized = rasterize(inRaster, inShape, inField) Yp, Yt = dataraster.get_samples_from_roi(inRaster, rasterized) CONF = ai.CONFUSION_MATRIX() CONF.compute_confusion_matrix(Yp, Yt) self.confusion_matrix = CONF.confusion_matrix self.Kappa = CONF.Kappa self.OA = CONF.OA except BaseException: pushFeedback('Error during statitics calculation')
def computeStatistics(self, inRaster, inShape, inField): progress = progressBar('Computing statistics...', 0) try: rasterized = self.rasterize(inRaster, inShape, inField) Yp, Yt = dataraster.get_samples_from_roi(inRaster, rasterized) CONF = ai.CONFUSION_MATRIX() CONF.compute_confusion_matrix(Yp, Yt) self.confusion_matrix = CONF.confusion_matrix self.Kappa = CONF.Kappa self.OA = CONF.OA except: QgsMessageLog.logMessage('Error during statitics calculation') progress.reset()
""" """ inShape = '/mnt/DATA/demo/train.shp' inField = 'Class' number = 50 percent = True outValidation = '/tmp/valid1.shp' outTrain ='/tmp/train.shp' randomInSubset(inShape,inField,outValidation,outTrain,number,percent) """ import function_dataraster function_dataraster.rasterize(inRaster, inVector, inField, '/tmp/roi.tif') X, Y, coords = function_dataraster.get_samples_from_roi(inRaster, '/tmp/roi.tif', getCoords=True) distanceArray = distMatrix(coords) rawCV = distanceCV(distanceArray, Y, 32, minTrain=-1, SLOO=True) #rawCV = distanceCV(distanceArray,label,distanceThresold=distance,minTrain=minTrain,SLOO=SLOO,maxIter=maxIter,verbose=False,stats=True) for tr, vl in rawCV: print(tr.shape) print(vl.shape) # randomInSubset('/tmp/valid.shp','level3','/tmp/processingd62a83be114a482aaa14ca317e640586/f99783a424984860ac9998b5027be604/OUTPUTVALIDATION.shp','/tmp/processingd62a83be114a482aaa14ca317e640586/1822187d819e450fa9ad9995d6757e09/OUTPUTTRAIN.shp',50,True)
def __init__(self, inRaster, inVector, inField='Class', outModel=None, inSplit=1, inSeed=0, outMatrix=None, inClassifier='GMM'): learningProgress = progressBar('Learning model...', 6) # Convert vector to raster try: try: temp_folder = tempfile.mkdtemp() filename = os.path.join(temp_folder, 'temp.tif') data = gdal.Open(inRaster, gdal.GA_ReadOnly) shp = ogr.Open(inVector) lyr = shp.GetLayer() except: QgsMessageLog.logMessage( "Problem with making tempfile or opening raster or vector") # Create temporary data set try: driver = gdal.GetDriverByName('GTiff') dst_ds = driver.Create(filename, data.RasterXSize, data.RasterYSize, 1, gdal.GDT_Byte) dst_ds.SetGeoTransform(data.GetGeoTransform()) dst_ds.SetProjection(data.GetProjection()) OPTIONS = 'ATTRIBUTE=' + inField gdal.RasterizeLayer(dst_ds, [1], lyr, None, options=[OPTIONS]) data, dst_ds, shp, lyr = None, None, None, None except: QgsMessageLog.logMessage("Cannot create temporary data set") # Load Training set try: X, Y = dataraster.get_samples_from_roi(inRaster, filename) except: QgsMessageLog.logMessage( "Problem while getting samples from ROI with" + inRaster) QgsMessageLog.logMessage( "Are you sure to have only integer values in your " + str(inField) + " column ?") [n, d] = X.shape C = int(Y.max()) SPLIT = inSplit os.remove(filename) os.rmdir(temp_folder) # Scale the data X, M, m = self.scale(X) learningProgress.addStep() # Add Step to ProgressBar # Learning process take split of groundthruth pixels for training and the remaining for testing try: if SPLIT < 1: # Random selection of the sample x = sp.array([]).reshape(0, d) y = sp.array([]).reshape(0, 1) xt = sp.array([]).reshape(0, d) yt = sp.array([]).reshape(0, 1) sp.random.seed(inSeed) # Set the random generator state for i in range(C): t = sp.where((i + 1) == Y)[0] nc = t.size ns = int(nc * SPLIT) rp = sp.random.permutation(nc) x = sp.concatenate((X[t[rp[0:ns]], :], x)) xt = sp.concatenate((X[t[rp[ns:]], :], xt)) y = sp.concatenate((Y[t[rp[0:ns]]], y)) yt = sp.concatenate((Y[t[rp[ns:]]], yt)) else: x, y = X, Y except: QgsMessageLog.logMessage("Problem while learning if SPLIT <1") learningProgress.addStep() # Add Step to ProgressBar # Train Classifier if inClassifier == 'GMM': try: # tau=10.0**sp.arange(-8,8,0.5) model = gmmr.GMMR() model.learn(x, y) # htau,err = model.cross_validation(x,y,tau) # model.tau = htau except: QgsMessageLog.logMessage("Cannot train with GMMM") else: try: from sklearn import neighbors from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier try: model_selection = True from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import GridSearchCV except: model_selection = False from sklearn.cross_validation import StratifiedKFold from sklearn.grid_search import GridSearchCV try: # AS Qgis in Windows doensn't manage multiprocessing, force to use 1 thread for not linux system if os.name == 'posix': n_jobs = -1 else: n_jobs = 1 # if inClassifier == 'RF': param_grid_rf = dict(n_estimators=3**sp.arange( 1, 5), max_features=sp.arange(1, 4)) y.shape = (y.size, ) if model_selection: cv = StratifiedKFold(n_splits=3).split(x, y) #cv = cv.get_n_splits(y) else: cv = StratifiedKFold(y, n_folds=3) grid = GridSearchCV(RandomForestClassifier(), param_grid=param_grid_rf, cv=cv, n_jobs=n_jobs) grid.fit(x, y) model = grid.best_estimator_ model.fit(x, y) elif inClassifier == 'SVM': param_grid_svm = dict(gamma=2.0**sp.arange(-4, 4), C=10.0**sp.arange(-2, 5)) y.shape = (y.size, ) if model_selection: cv = StratifiedKFold(n_splits=5).split(x, y) else: cv = StratifiedKFold(y, n_folds=5) grid = GridSearchCV(SVC(), param_grid=param_grid_svm, cv=cv, n_jobs=n_jobs) grid.fit(x, y) model = grid.best_estimator_ model.fit(x, y) elif inClassifier == 'KNN': param_grid_knn = dict( n_neighbors=sp.arange(1, 20, 4)) y.shape = (y.size, ) if model_selection: cv = StratifiedKFold(n_splits=3).split(x, y) else: cv = StratifiedKFold(y, n_folds=3) grid = GridSearchCV( neighbors.KNeighborsClassifier(), param_grid=param_grid_knn, cv=cv, n_jobs=n_jobs) grid.fit(x, y) model = grid.best_estimator_ model.fit(x, y) except: QgsMessageLog.logMessage( "Cannot train with classifier " + inClassifier) except: QgsMessageLog.logMessage( "You must have sklearn dependencies on your computer. Please consult the documentation for installation." ) learningProgress.prgBar.setValue(5) # Add Step to ProgressBar # Assess the quality of the model if SPLIT < 1: # if inClassifier == 'GMM': # = model.predict(xt)[0] # else: yp = model.predict(xt) CONF = ai.CONFUSION_MATRIX() CONF.compute_confusion_matrix(yp, yt) sp.savetxt(outMatrix, CONF.confusion_matrix, delimiter=',', fmt='%1.4d') # Save Tree model if outModel is not None: output = open(outModel, 'wb') pickle.dump([model, M, m], output) output.close() learningProgress.addStep() # Add Step to ProgressBar # Close progressBar learningProgress.reset() learningProgress = None except: learningProgress.reset()
def __init__(self,inRaster,inVector,inField='Class',outModel=None,inSplit=100,inSeed=0,outMatrix=None,inClassifier='GMM',extraParam=False,feedback=None): """!@brief Learn model with a shp file and a raster image. ********** Parameters ---------- inRaster : Filtered image name ('sample_filtered.tif',str). inVector : Name of the training shpfile ('training.shp',str). inField : Column name where are stored class number (str). inSplit : (int) or str 'SLOO' or 'STAND' if 'STAND', extraParam['SLOO'] is by default False, and extraParam['maxIter'] is 5. \n if 'SLOO', extraParam['distance'] must be given. extraParam['maxIter'] is False, extraParam['minTrain'] is 0.5 for 50\% \n Please specify a extraParam['saveDir'] to save results/confusion matrix. inSeed : (int). outModel : Name of the model to save, will be compulsory for the 3rd step (classifying). outMatrix : Default the name of the file inRaster(minus the extension)_inClassifier_inSeed_confu.csv (str). inClassifier : GMM,KNN,SVM, or RF. (str). Output ---------- Model file. Confusion Matrix. """ # Convert vector to raster needXY = True pushFeedback('Learning model...',feedback=feedback) pushFeedback(0,feedback=feedback) total = 100/10 SPLIT = inSplit if feedback=='gui': progress = pB.progressBar('Loading...',6) try: if isinstance(inRaster,np.ndarray): needXY = False X=inRaster if isinstance(inVector,np.ndarray): Y=inVector else: msg = 'You have to give an array for label when using array for raster' pushFeedback(msg,feedback=feedback) if extraParam: if 'readROIFromVector' in extraParam.keys(): if extraParam['readROIFromVector'] is not False: try: from function_vector import readROIFromVector X,Y = readROIFromVector(inVector,extraParam['readROIFromVector'],inField) needXY = False except: msg = 'Problem when importing readFieldVector from functions in dzetsaka' pushFeedback(msg,feedback=feedback) if 'saveDir' in extraParam.keys(): saveDir = extraParam['saveDir'] if not os.path.exists(saveDir): os.makedirs(saveDir) if not os.path.exists(os.path.join(saveDir,'matrix/')): os.makedirs(os.path.join(saveDir,'matrix/')) inVectorTest = False if type(SPLIT) == str : if SPLIT.endswith(('.shp','.sqlite')): inVectorTest = SPLIT if needXY: ROI = rasterize(inRaster,inVector,inField) if inVectorTest: ROIt = rasterize(inRaster,inVectorTest,inField) X,Y = dataraster.get_samples_from_roi(inRaster,ROI) Xt,yt = dataraster.get_samples_from_roi(inRaster,ROIt) xt,N,n = self.scale(Xt) #x,y = dataraster.get_samples_from_roi(inRaster,ROI,getCoords=True,convertTo4326=True) y=Y # Create temporary data set if SPLIT=='SLOO': from sklearn.metrics import confusion_matrix try: from function_vector import distanceCV,distMatrix except: from .function_vector import distanceCV,distMatrix from sklearn.metrics import cohen_kappa_score,accuracy_score,f1_score """ distanceFile = os.path.splitext(inVector)[0]+'_'+str(inField)+'_distMatrix.npy' if os.path.exists(distanceFile): print('Distance array loaded') distanceArray = np.load(distanceFile) X,Y = dataraster.get_samples_from_roi(inRaster,ROI) else: print('Generate distance array') """ if 'readROIFromVector' in extraParam.keys(): if extraParam['readROIFromVector'] is not False: try: coords = extraParam['coords'] except: pushFeedback('Can\'t read coords array',feedback=feedback) else: X,Y,coords = dataraster.get_samples_from_roi(inRaster,ROI,getCoords=True) try: coords = extraParam['coords'] except: X,Y,coords = dataraster.get_samples_from_roi(inRaster,ROI,getCoords=True) distanceArray = distMatrix(coords) #np.save(os.path.splitext(distanceFile)[0],distanceArray) else: if SPLIT=='STAND': from sklearn.metrics import confusion_matrix try: from .function_vector import standCV #,readFieldVector except: from function_vector import standCV #,readFieldVector try: from sklearn.metrics import cohen_kappa_score,accuracy_score,f1_score except: pass if 'inStand' in extraParam.keys(): inStand = extraParam['inStand'] else: inStand = 'stand' STAND = rasterize(inRaster,inVector,inStand) X,Y,STDs = dataraster.get_samples_from_roi(inRaster,ROI,STAND) #ROIStand = rasterize(inRaster,inVector,inStand) #temp, STDs = dataraster.get_samples_from_roi(inRaster,ROIStand) #FIDs,STDs,srs=readFieldVector(inVector,inField,inStand,getFeatures=False) elif needXY: X,Y = dataraster.get_samples_from_roi(inRaster,ROI) except: msg = "Problem with getting samples from ROI \n \ Are you sure to have only integer values in your "+str(inField)+" field ?\n " pushFeedback(msg,feedback=feedback) [n,d] = X.shape C = int(Y.max()) SPLIT = inSplit try: os.remove(ROI) except: pass #os.remove(filename) #os.rmdir(temp_folder) # Scale the data X,M,m = self.scale(X) pushFeedback(int(1* total)) if feedback=='gui': progress.addStep() # Add Step to ProgressBar # Learning process take split of groundthruth pixels for training and the remaining for testing try: if type(SPLIT)==int or type(SPLIT)==float: if SPLIT < 100: # Random selection of the sample x = np.array([]).reshape(0,d) y = np.array([]).reshape(0,1) xt = np.array([]).reshape(0,d) yt = np.array([]).reshape(0,1) np.random.seed(inSeed) # Set the random generator state for i in range(C): t = np.where((i+1)==Y)[0] nc = t.size ns = int(nc*(SPLIT/float(100))) rp = np.random.permutation(nc) x = np.concatenate((X[t[rp[0:ns]],:],x)) xt = np.concatenate((X[t[rp[ns:]],:],xt)) y = np.concatenate((Y[t[rp[0:ns]]],y)) yt = np.concatenate((Y[t[rp[ns:]]],yt)) else: x,y=X,Y self.x = x self.y = y else: x,y=X,Y self.x = x self.y = y except: pushFeedback("Problem while learning if SPLIT <1",feedback=feedback) pushFeedback(int(2* total),feedback=feedback) if feedback=='gui': progress.addStep() pushFeedback('Learning process...',feedback=feedback) pushFeedback('This step could take a lot of time... So be patient, even if the progress bar stucks at 20% :)',feedback=feedback) if feedback=='gui': progress.addStep() # Add Step to ProgressBar # Train Classifier if inClassifier == 'GMM': try: from . import gmm_ridge as gmmr except: import gmm_ridge as gmmr try: # tau=10.0**sp.arange(-8,8,0.5) model = gmmr.GMMR() model.learn(x,y) # htau,err = model.cross_validation(x,y,tau) # model.tau = htau except: pushFeedback("Cannot train with GMM",feedback=feedback) else: #from sklearn import neighbors #from sklearn.svm import SVC #from sklearn.ensemble import RandomForestClassifier #model_selection = True from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import GridSearchCV try: if extraParam: if 'param_algo' in extraParam.keys(): param_algo = extraParam['param_algo'] # AS Qgis in Windows doensn't manage multiprocessing, force to use 1 thread for not linux system if SPLIT=='STAND': label = np.copy(Y) if extraParam: if 'SLOO' in extraParam.keys(): SLOO = extraParam['SLOO'] else: SLOO=False if 'maxIter' in extraParam.keys(): maxIter = extraParam['maxIter'] else: maxIter=5 else: SLOO=False maxIter=5 rawCV = standCV(label,STDs,maxIter,SLOO,seed=inSeed) print(rawCV) cvDistance = [] for tr,vl in rawCV : #sts.append(stat) cvDistance.append((tr,vl)) if SPLIT=='SLOO': # Compute CV for Learning later label = np.copy(Y) if extraParam: if 'distance' in extraParam.keys(): distance = extraParam['distance'] else: pushFeedback('You need distance in extraParam',feedback=feedback) if 'minTrain' in extraParam.keys(): minTrain = float(extraParam['minTrain']) else : minTrain = -1 if 'SLOO' in extraParam.keys(): SLOO = extraParam['SLOO'] else: SLOO=True if 'maxIter' in extraParam.keys(): maxIter = extraParam['maxIter'] else: maxIter=False if 'otherLevel' in extraParam.keys(): otherLevel = extraParam['otherLevel'] else: otherLevel = False #sts = [] cvDistance = [] """ rawCV = distanceCV(distanceArray,label,distanceThresold=distance,minTrain=minTrain,SLOO=SLOO,maxIter=maxIter,verbose=False,stats=False) """ #feedback.setProgressText('distance is '+str(extraParam['distance'])) pushFeedback('label is '+str(label.shape),feedback=feedback) pushFeedback('distance array shape is '+str(distanceArray.shape),feedback=feedback) pushFeedback('minTrain is '+str(minTrain),feedback=feedback) pushFeedback('SLOO is '+str(SLOO),feedback=feedback) pushFeedback('maxIter is '+str(maxIter),feedback=feedback) rawCV = distanceCV(distanceArray,label,distanceThresold=distance,minTrain=minTrain,SLOO=SLOO,maxIter=maxIter,stats=False) pushFeedback('Computing SLOO Cross Validation',feedback=feedback) for tr,vl in rawCV : pushFeedback('Training size is '+str(tr.shape),feedback=feedback) pushFeedback('Validation size is '+str(vl.shape),feedback=feedback) #sts.append(stat) cvDistance.append((tr,vl)) """ for tr,vl,stat in rawCV : sts.append(stat) cvDistance.append((tr,vl)) """ # if inClassifier == 'RF': from sklearn.ensemble import RandomForestClassifier param_grid = dict(n_estimators=3**np.arange(1,5),max_features=range(1,x.shape[1],int(x.shape[1]/3))) if 'param_algo' in locals(): classifier = RandomForestClassifier(**param_algo) else: classifier = RandomForestClassifier() n_splits=5 elif inClassifier == 'SVM': from sklearn.svm import SVC param_grid = dict(gamma=2.0**np.arange(-4,4), C=10.0**np.arange(-2,5)) if 'param_algo' in locals(): classifier = SVC(probability=True, **param_algo) print('Found param algo : '+str(param_algo)) else: classifier = SVC(probability=True,kernel="rbf") n_splits=5 elif inClassifier == 'KNN': from sklearn import neighbors param_grid = dict(n_neighbors = np.arange(1,20,4)) if 'param_algo' in locals(): classifier = neighbors.KNeighborsClassifier(**param_algo) else: classifier = neighbors.KNeighborsClassifier() n_splits=3 except: pushFeedback("Cannot train with classifier "+inClassifier,feedback=feedback) if feedback=='gui': progress.prgBar.setValue(5) # Add Step to ProgressBar if isinstance(SPLIT,int): cv = StratifiedKFold(n_splits=n_splits)#.split(x,y) else: cv = cvDistance y.shape=(y.size,) if extraParam: if 'param_grid' in extraParam.keys(): param_grid = extraParam['param_grid'] pushFeedback('Custom param for Grid Search CV has been found : '+str(param_grid),feedback=feedback) grid = GridSearchCV(classifier,param_grid=param_grid, cv=cv,n_jobs=1) grid.fit(x,y) model = grid.best_estimator_ model.fit(x,y) if isinstance(SPLIT,str): CM = [] testIndex = [] for train_index, test_index in cv: X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] model.fit(X_train, y_train) X_pred = model.predict(X_test) CM.append(confusion_matrix(y_test, X_pred)) testIndex.append(test_index) for i,j in enumerate(CM): if SPLIT=='SLOO': #np.savetxt((saveDir+'matrix/'+str(distance)+'_'+str(inField)+'_'+str(minTrain)+'_'+str(i)+'.csv'),CM[i],delimiter=',',fmt='%.d') np.savetxt(os.path.join(saveDir,'matrix/'+str(distance)+'_'+str(inField)+'_'+str(minTrain)+'_'+str(i)+'.csv'),CM[i],delimiter=',',fmt='%.d') if otherLevel is not False: otherLevelFolder = os.path.join(saveDir,'matrix/level3/') if not os.path.exists(otherLevelFolder): os.makedirs(otherLevelFolder) bigCM = np.zeros([14,14],dtype=np.byte) arr= CM[i] curLevel = otherLevel[testIndex[i]] curLevel = np.sort(curLevel,axis=0) for lvl in range(curLevel.shape[0]): bigCM[curLevel.astype(int)-1,curLevel[lvl].astype(int)-1] = arr[:,lvl].reshape(-1,1) np.savetxt(os.path.join(otherLevelFolder,str(distance)+'_'+str(inField)+'_'+str(minTrain)+'_'+str(i)+'.csv'),bigCM,delimiter=',',fmt='%.d') elif SPLIT=='STAND': #np.savetxt((saveDir+'matrix/stand_'+str(inField)+'_'+str(i)+'.csv'),CM[i],delimiter=',',fmt='%.d') np.savetxt(os.path.join(saveDir,'matrix/stand_'+str(inField)+'_'+str(i)+'.csv'),CM[i],delimiter=',',fmt='%.d') pushFeedback(int(9* total),feedback=feedback) # Assess the quality of the model if feedback=='gui': progress.prgBar.setValue(90) if inVectorTest or isinstance(SPLIT,int): if SPLIT!=100 or inVectorTest: #from sklearn.metrics import cohen_kappa_score,accuracy_score,f1_score # if inClassifier == 'GMM': # = model.predict(xt)[0] # else: yp = model.predict(xt) CONF = ai.CONFUSION_MATRIX() CONF.compute_confusion_matrix(yp,yt) if outMatrix is not None: if not os.path.exists(os.path.dirname(outMatrix)): os.makedirs(os.path.dirname(outMatrix)) np.savetxt(outMatrix,CONF.confusion_matrix,delimiter=',',header='Columns=prediction,Lines=reference.',fmt='%1.4d') if inClassifier !='GMM': for key in param_grid.keys(): message = 'best '+key+' : '+str(grid.best_params_[key]) if feedback == 'gui': QgsMessageLog.logMessage(message) elif feedback: feedback.setProgressText(message) else: print(message) """ self.kappa = cohen_kappa_score(yp,yt) self.f1 = f1_score(yp,yt,average='micro') self.oa = accuracy_score(yp,yt) """ res = {'Overall Accuracy':CONF.OA,'Kappa':CONF.Kappa,'f1':CONF.F1mean} for estim in res: pushFeedback(estim+' : '+str(res[estim]),feedback=feedback) # Save Tree model self.model = model self.M= M self.m = m if outModel is not None: output = open(outModel, 'wb') pickle.dump([model,M,m,inClassifier], output) output.close() pushFeedback(int(10* total),feedback=feedback) if feedback=='gui': progress.reset() progress=None
def __init__(self, inRaster, inVector, inField='Class', outModel=None, inSplit=100, inSeed=0, outMatrix=None, inClassifier='GMM', extraParam=False, feedback=None): """!@brief Learn model with a shp file and a raster image. ********** Parameters ---------- inRaster : Filtered image name ('sample_filtered.tif',str). inVector : Name of the training shpfile ('training.shp',str). inField : Column name where are stored class number (str). inSplit : (int) or str 'SLOO' or 'STAND' if 'STAND', extraParam['SLOO'] is by default False, and extraParam['maxIter'] is 5. \n if 'SLOO', extraParam['distance'] must be given. extraParam['maxIter'] is False, extraParam['minTrain'] is 0.5 for 50\% \n Please specify a extraParam['saveDir'] to save results/confusion matrix. inSeed : (int). outModel : Name of the model to save, will be compulsory for the 3rd step (classifying). outMatrix : Default the name of the file inRaster(minus the extension)_inClassifier_inSeed_confu.csv (str). inClassifier : GMM,KNN,SVM, or RF. (str). Output ---------- Model file. Confusion Matrix. """ # Convert vector to raster needXY = True pushFeedback('Learning model...', feedback=feedback) pushFeedback(0, feedback=feedback) total = 100 / 10 SPLIT = inSplit if feedback == 'gui': progress = pB.progressBar('Loading...', 6) try: if isinstance(inRaster, np.ndarray): needXY = False X = inRaster if isinstance(inVector, np.ndarray): Y = inVector else: msg = 'You have to give an array for label when using array for raster' pushFeedback(msg, feedback=feedback) if extraParam: if 'readROIFromVector' in extraParam.keys(): if extraParam['readROIFromVector'] is not False: try: from function_vector import readROIFromVector X, Y = readROIFromVector( inVector, extraParam['readROIFromVector'], inField) needXY = False except BaseException: msg = 'Problem when importing readFieldVector from functions in dzetsaka' pushFeedback(msg, feedback=feedback) if 'saveDir' in extraParam.keys(): saveDir = extraParam['saveDir'] if not os.path.exists(saveDir): os.makedirs(saveDir) if not os.path.exists(os.path.join(saveDir, 'matrix/')): os.makedirs(os.path.join(saveDir, 'matrix/')) inVectorTest = False if isinstance(SPLIT, str): if SPLIT.endswith(('.shp', '.sqlite')): inVectorTest = SPLIT if needXY: ROI = rasterize(inRaster, inVector, inField) if inVectorTest: ROIt = rasterize(inRaster, inVectorTest, inField) X, Y = dataraster.get_samples_from_roi(inRaster, ROI) Xt, yt = dataraster.get_samples_from_roi(inRaster, ROIt) xt, N, n = self.scale(Xt) #x,y = dataraster.get_samples_from_roi(inRaster,ROI,getCoords=True,convertTo4326=True) y = Y # Create temporary data set if SPLIT == 'SLOO': from sklearn.metrics import confusion_matrix try: from function_vector import distanceCV, distMatrix except BaseException: from .function_vector import distanceCV, distMatrix from sklearn.metrics import cohen_kappa_score, accuracy_score, f1_score """ distanceFile = os.path.splitext(inVector)[0]+'_'+str(inField)+'_distMatrix.npy' if os.path.exists(distanceFile): print('Distance array loaded') distanceArray = np.load(distanceFile) X,Y = dataraster.get_samples_from_roi(inRaster,ROI) else: print('Generate distance array') """ if 'readROIFromVector' in extraParam.keys(): if extraParam['readROIFromVector'] is not False: try: coords = extraParam['coords'] except BaseException: pushFeedback( 'Can\'t read coords array', feedback=feedback) else: X, Y, coords = dataraster.get_samples_from_roi( inRaster, ROI, getCoords=True) try: coords = extraParam['coords'] except BaseException: X, Y, coords = dataraster.get_samples_from_roi( inRaster, ROI, getCoords=True) distanceArray = distMatrix(coords) # np.save(os.path.splitext(distanceFile)[0],distanceArray) else: if SPLIT == 'STAND': from sklearn.metrics import confusion_matrix try: from .function_vector import standCV # ,readFieldVector except BaseException: from function_vector import standCV # ,readFieldVector try: from sklearn.metrics import cohen_kappa_score, accuracy_score, f1_score except BaseException: pass if 'inStand' in extraParam.keys(): inStand = extraParam['inStand'] else: inStand = 'stand' STAND = rasterize(inRaster, inVector, inStand) X, Y, STDs = dataraster.get_samples_from_roi( inRaster, ROI, STAND) #ROIStand = rasterize(inRaster,inVector,inStand) #temp, STDs = dataraster.get_samples_from_roi(inRaster,ROIStand) # FIDs,STDs,srs=readFieldVector(inVector,inField,inStand,getFeatures=False) elif needXY: X, Y = dataraster.get_samples_from_roi(inRaster, ROI) except BaseException: msg = "Problem with getting samples from ROI \n \ Are you sure to have only integer values in your " + str(inField) + " field ?\n " pushFeedback(msg, feedback=feedback) [n, d] = X.shape C = int(Y.max()) SPLIT = inSplit try: #pushFeedback(str(ROI),feedback=feedback) os.remove(ROI) except BaseException: pass # os.remove(filename) # os.rmdir(temp_folder) # Scale the data X, M, m = self.scale(X) pushFeedback(int(1 * total)) if feedback == 'gui': progress.addStep() # Add Step to ProgressBar # Learning process take split of groundthruth pixels for training and # the remaining for testing try: if isinstance(SPLIT, int) or isinstance(SPLIT, float): if SPLIT < 100: # Random selection of the sample x = np.array([]).reshape(0, d) y = np.array([]).reshape(0, 1) xt = np.array([]).reshape(0, d) yt = np.array([]).reshape(0, 1) np.random.seed(inSeed) # Set the random generator state for i in range(C): t = np.where((i + 1) == Y)[0] nc = t.size ns = int(nc * (SPLIT / float(100))) rp = np.random.permutation(nc) x = np.concatenate((X[t[rp[0:ns]], :], x)) xt = np.concatenate((X[t[rp[ns:]], :], xt)) y = np.concatenate((Y[t[rp[0:ns]]], y)) yt = np.concatenate((Y[t[rp[ns:]]], yt)) else: x, y = X, Y self.x = x self.y = y else: x, y = X, Y self.x = x self.y = y except BaseException: pushFeedback( "Problem while learning if SPLIT <1", feedback=feedback) pushFeedback(int(2 * total), feedback=feedback) if feedback == 'gui': progress.addStep() pushFeedback('Learning process...', feedback=feedback) pushFeedback( 'This step could take a lot of time... So be patient, even if the progress bar stucks at 20% :)', feedback=feedback) if feedback == 'gui': progress.addStep() # Add Step to ProgressBar # Train Classifier if inClassifier == 'GMM': try: from . import gmm_ridge as gmmr except BaseException: import gmm_ridge as gmmr try: # tau=10.0**sp.arange(-8,8,0.5) model = gmmr.GMMR() model.learn(x, y) # htau,err = model.cross_validation(x,y,tau) # model.tau = htau except BaseException: pushFeedback("Cannot train with GMM", feedback=feedback) else: #from sklearn import neighbors #from sklearn.svm import SVC #from sklearn.ensemble import RandomForestClassifier #model_selection = True from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import GridSearchCV try: if extraParam: if 'param_algo' in extraParam.keys(): param_algo = extraParam['param_algo'] # AS Qgis in Windows doensn't manage multiprocessing, force to # use 1 thread for not linux system if SPLIT == 'STAND': label = np.copy(Y) if extraParam: if 'SLOO' in extraParam.keys(): SLOO = extraParam['SLOO'] else: SLOO = False if 'maxIter' in extraParam.keys(): maxIter = extraParam['maxIter'] else: maxIter = 5 else: SLOO = False maxIter = 5 rawCV = standCV(label, STDs, maxIter, SLOO, seed=inSeed) print(rawCV) cvDistance = [] for tr, vl in rawCV: # sts.append(stat) cvDistance.append((tr, vl)) if SPLIT == 'SLOO': # Compute CV for Learning later label = np.copy(Y) if extraParam: if 'distance' in extraParam.keys(): distance = extraParam['distance'] else: pushFeedback( 'You need distance in extraParam', feedback=feedback) if 'minTrain' in extraParam.keys(): minTrain = float(extraParam['minTrain']) else: minTrain = -1 if 'SLOO' in extraParam.keys(): SLOO = extraParam['SLOO'] else: SLOO = True if 'maxIter' in extraParam.keys(): maxIter = extraParam['maxIter'] else: maxIter = False if 'otherLevel' in extraParam.keys(): otherLevel = extraParam['otherLevel'] else: otherLevel = False #sts = [] cvDistance = [] """ rawCV = distanceCV(distanceArray,label,distanceThresold=distance,minTrain=minTrain,SLOO=SLOO,maxIter=maxIter,verbose=False,stats=False) """ #feedback.setProgressText('distance is '+str(extraParam['distance'])) pushFeedback('label is ' + str(label.shape), feedback=feedback) pushFeedback('distance array shape is ' + str(distanceArray.shape), feedback=feedback) pushFeedback( 'minTrain is ' + str(minTrain), feedback=feedback) pushFeedback('SLOO is ' + str(SLOO), feedback=feedback) pushFeedback( 'maxIter is ' + str(maxIter), feedback=feedback) rawCV = distanceCV( distanceArray, label, distanceThresold=distance, minTrain=minTrain, SLOO=SLOO, maxIter=maxIter, stats=False) pushFeedback( 'Computing SLOO Cross Validation', feedback=feedback) for tr, vl in rawCV: pushFeedback('Training size is ' + str(tr.shape), feedback=feedback) pushFeedback('Validation size is ' + str(vl.shape), feedback=feedback) # sts.append(stat) cvDistance.append((tr, vl)) """ for tr,vl,stat in rawCV : sts.append(stat) cvDistance.append((tr,vl)) """ # if inClassifier == 'RF': from sklearn.ensemble import RandomForestClassifier param_grid = dict( n_estimators=3**np.arange( 1, 5), max_features=range( 1, x.shape[1], int( x.shape[1] / 3))) if 'param_algo' in locals(): classifier = RandomForestClassifier(**param_algo) else: classifier = RandomForestClassifier() n_splits = 5 elif inClassifier == 'SVM': from sklearn.svm import SVC param_grid = dict( gamma=2.0**np.arange(-4, 4), C=10.0**np.arange(-2, 5)) if 'param_algo' in locals(): classifier = SVC(probability=True, **param_algo) print('Found param algo : ' + str(param_algo)) else: classifier = SVC(probability=True, kernel="rbf") n_splits = 5 elif inClassifier == 'KNN': from sklearn import neighbors param_grid = dict(n_neighbors=np.arange(1, 20, 4)) if 'param_algo' in locals(): classifier = neighbors.KNeighborsClassifier( **param_algo) else: classifier = neighbors.KNeighborsClassifier() n_splits = 3 except BaseException: pushFeedback( "Cannot train with classifier " + inClassifier, feedback=feedback) if feedback == 'gui': progress.prgBar.setValue(5) # Add Step to ProgressBar if isinstance(SPLIT, int): cv = StratifiedKFold(n_splits=n_splits) # .split(x,y) else: cv = cvDistance y.shape = (y.size,) if extraParam: if 'param_grid' in extraParam.keys(): param_grid = extraParam['param_grid'] pushFeedback( 'Custom param for Grid Search CV has been found : ' + str(param_grid), feedback=feedback) grid = GridSearchCV( classifier, param_grid=param_grid, cv=cv, n_jobs=1) grid.fit(x, y) model = grid.best_estimator_ model.fit(x, y) if isinstance(SPLIT, str): CM = [] testIndex = [] for train_index, test_index in cv: X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] model.fit(X_train, y_train) X_pred = model.predict(X_test) CM.append(confusion_matrix(y_test, X_pred)) testIndex.append(test_index) for i, j in enumerate(CM): if SPLIT == 'SLOO': # np.savetxt((saveDir+'matrix/'+str(distance)+'_'+str(inField)+'_'+str(minTrain)+'_'+str(i)+'.csv'),CM[i],delimiter=',',fmt='%.d') np.savetxt( os.path.join( saveDir, 'matrix/' + str(distance) + '_' + str(inField) + '_' + str(minTrain) + '_' + str(i) + '.csv'), CM[i], delimiter=',', fmt='%.d') if otherLevel is not False: otherLevelFolder = os.path.join( saveDir, 'matrix/level3/') if not os.path.exists(otherLevelFolder): os.makedirs(otherLevelFolder) bigCM = np.zeros([14, 14], dtype=np.byte) arr = CM[i] curLevel = otherLevel[testIndex[i]] curLevel = np.sort(curLevel, axis=0) for lvl in range(curLevel.shape[0]): bigCM[curLevel.astype( int) - 1, curLevel[lvl].astype(int) - 1] = arr[:, lvl].reshape(-1, 1) np.savetxt( os.path.join( otherLevelFolder, str(distance) + '_' + str(inField) + '_' + str(minTrain) + '_' + str(i) + '.csv'), bigCM, delimiter=',', fmt='%.d') elif SPLIT == 'STAND': # np.savetxt((saveDir+'matrix/stand_'+str(inField)+'_'+str(i)+'.csv'),CM[i],delimiter=',',fmt='%.d') np.savetxt( os.path.join( saveDir, 'matrix/stand_' + str(inField) + '_' + str(i) + '.csv'), CM[i], delimiter=',', fmt='%.d') pushFeedback(int(9 * total), feedback=feedback) # Assess the quality of the model if feedback == 'gui': progress.prgBar.setValue(90) if inVectorTest or isinstance(SPLIT, int): if SPLIT != 100 or inVectorTest: #from sklearn.metrics import cohen_kappa_score,accuracy_score,f1_score # if inClassifier == 'GMM': # = model.predict(xt)[0] # else: yp = model.predict(xt) CONF = ai.CONFUSION_MATRIX() CONF.compute_confusion_matrix(yp, yt) if outMatrix is not None: if not os.path.exists(os.path.dirname(outMatrix)): os.makedirs(os.path.dirname(outMatrix)) np.savetxt( outMatrix, CONF.confusion_matrix, delimiter=',', header='Columns=prediction,Lines=reference.', fmt='%1.4d') if inClassifier != 'GMM': for key in param_grid.keys(): message = 'best ' + key + ' : ' + \ str(grid.best_params_[key]) if feedback == 'gui': QgsMessageLog.logMessage(message) elif feedback: feedback.setProgressText(message) else: print(message) """ self.kappa = cohen_kappa_score(yp,yt) self.f1 = f1_score(yp,yt,average='micro') self.oa = accuracy_score(yp,yt) """ res = { 'Overall Accuracy': CONF.OA, 'Kappa': CONF.Kappa, 'f1': CONF.F1mean} for estim in res: pushFeedback(estim + ' : ' + str(res[estim]), feedback=feedback) # Save Tree model self.model = model self.M = M self.m = m if outModel is not None: output = open(outModel, 'wb') pickle.dump([model, M, m, inClassifier], output) output.close() pushFeedback(int(10 * total), feedback=feedback) if feedback == 'gui': progress.reset() progress = None
def __init__(self,inRaster,inVector,inField='Class',inSplit=0.5,inSeed=0,outModel=None,outMatrix=None,inClassifier='GMM'): learningProgress=progressBar('Learning model...',6) # Convert vector to raster try: try: temp_folder = tempfile.mkdtemp() filename = os.path.join(temp_folder, 'temp.tif') data = gdal.Open(inRaster,gdal.GA_ReadOnly) shp = ogr.Open(inVector) lyr = shp.GetLayer() except: QgsMessageLog.logMessage("Problem with making tempfile or opening raster or vector") # Create temporary data set try: driver = gdal.GetDriverByName('GTiff') dst_ds = driver.Create(filename,data.RasterXSize,data.RasterYSize, 1,gdal.GDT_Byte) dst_ds.SetGeoTransform(data.GetGeoTransform()) dst_ds.SetProjection(data.GetProjection()) OPTIONS = 'ATTRIBUTE='+inField gdal.RasterizeLayer(dst_ds, [1], lyr, None,options=[OPTIONS]) data,dst_ds,shp,lyr=None,None,None,None except: QgsMessageLog.logMessage("Cannot create temporary data set") # Load Training set try: X,Y = dataraster.get_samples_from_roi(inRaster,filename) except: QgsMessageLog.logMessage("Problem while getting samples from ROI with"+inRaster) [n,d] = X.shape C = int(Y.max()) SPLIT = inSplit os.remove(filename) os.rmdir(temp_folder) # Scale the data X,M,m = self.scale(X) learningProgress.addStep() # Add Step to ProgressBar # Learning process take split of groundthruth pixels for training and the remaining for testing try: if SPLIT < 1: # progressBar, set Max to C # Random selection of the sample x = sp.array([]).reshape(0,d) y = sp.array([]).reshape(0,1) xt = sp.array([]).reshape(0,d) yt = sp.array([]).reshape(0,1) sp.random.seed(inSeed) # Set the random generator state for i in range(C): t = sp.where((i+1)==Y)[0] nc = t.size ns = int(nc*SPLIT) rp = sp.random.permutation(nc) x = sp.concatenate((X[t[rp[0:ns]],:],x)) xt = sp.concatenate((X[t[rp[ns:]],:],xt)) y = sp.concatenate((Y[t[rp[0:ns]]],y)) yt = sp.concatenate((Y[t[rp[ns:]]],yt)) #Add Pb except: QgsMessageLog.logMessage("Problem while learning if SPLIT <1") else: x,y=X,Y learningProgress.addStep() # Add Step to ProgressBar # Train Classifier try: if inClassifier == 'GMM': # tau=10.0**sp.arange(-8,8,0.5) model = gmmr.GMMR() model.learn(x,y) # htau,err = model.cross_validation(x,y,tau) # model.tau = htau except: QgsMessageLog.logMessage("Cannot train with GMMM") else: try: from sklearn import neighbors from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.cross_validation import StratifiedKFold from sklearn.grid_search import GridSearchCV except: QgsMessageLog.logMessage("You must have sklearn dependencies on your computer. Please consult the documentation") try: # AS Qgis in Windows doensn't manage multiprocessing, force to use 1 thread for not linux system if os.name == 'posix': n_jobs=-1 else: n_jobs=1 # if inClassifier == 'RF': param_grid_rf = dict(n_estimators=3**sp.arange(1,5),max_features=sp.arange(1,4)) y.shape=(y.size,) cv = StratifiedKFold(y, n_folds=3) grid = GridSearchCV(RandomForestClassifier(), param_grid=param_grid_rf, cv=cv,n_jobs=n_jobs) grid.fit(x, y) model = grid.best_estimator_ model.fit(x,y) elif inClassifier == 'SVM': param_grid_svm = dict(gamma=2.0**sp.arange(-4,4), C=10.0**sp.arange(-2,5)) y.shape=(y.size,) cv = StratifiedKFold(y, n_folds=5) grid = GridSearchCV(SVC(), param_grid=param_grid_svm, cv=cv,n_jobs=n_jobs) grid.fit(x, y) model = grid.best_estimator_ model.fit(x,y) elif inClassifier == 'KNN': param_grid_knn = dict(n_neighbors = sp.arange(1,20,4)) y.shape=(y.size,) cv = StratifiedKFold(y, n_folds=3) grid = GridSearchCV(neighbors.KNeighborsClassifier(), param_grid=param_grid_knn, cv=cv,n_jobs=n_jobs) grid.fit(x, y) model = grid.best_estimator_ model.fit(x,y) except: print 'Cannot train with Classifier '+inClassifier QgsMessageLog.logMessage("Cannot train with Classifier"+inClassifier) learningProgress.prgBar.setValue(5) # Add Step to ProgressBar # Assess the quality of the model if SPLIT < 1 : # if inClassifier == 'GMM': # yp = model.predict(xt)[0] # else: yp = model.predict(xt) CONF = ai.CONFUSION_MATRIX() CONF.compute_confusion_matrix(yp,yt) sp.savetxt(outMatrix,CONF.confusion_matrix,delimiter=',',fmt='%1.4d') # Save Tree model if outModel is not None: output = open(outModel, 'wb') pickle.dump([model,M,m], output) output.close() learningProgress.addStep() # Add Step to ProgressBar # Close progressBar learningProgress.reset() learningProgress=None except: learningProgress.reset()