def sim(self): """ Make 1 iteracion of simulation. """ # TODO: eleminate AreaAnalyst.getChangeMap() from the process transition = self.crosstable.getCrosstable() prediction = self.getPrediction() state = self.getState() new_state = state.getBand(1).copy() # New states (the result of simulation) will be stored there. analyst = AreaAnalyst(state, prediction) classes = analyst.classes changes = analyst.getChangeMap().getBand(1) # Make transition between classes according to # number of moved pixel in crosstable self.rangeChanged.emit(self.tr("Simulation process %p%"), len(classes) ** 2 - len(classes)) for initClass in classes: for finalClass in classes: if initClass == finalClass: continue # TODO: Calculate number of pixels to be moved via TransitoionMatrix and state raster n = transition.getTransition( initClass, finalClass ) # Number of pixels to be moved (constant count now). # Find n appropriate places for transition initClass -> finalClass class_code = analyst.encode(initClass, finalClass) places = changes == class_code # Array of places where transitions initClass -> finalClass are occured placesCount = np.sum(places) if placesCount < n: self.logMessage.emit( self.tr("There are more transitions in the transition matrix, then the model have found") ) n = placesCount confidence = self.getConfidence().getBand(1) confidence = ( confidence * places ) # The higher is number in cell, the higer is probability of transition in the cell indices = [] for i in range(n): index = np.unravel_index( confidence.argmax(), confidence.shape ) # Select the cell with biggest probability indices.append(index) confidence[index] = -1 # Mark the cell to prevent second selection # Now "indices" contains indices of the appropriate places, # make transition initClass -> finalClass for index in indices: new_state[index] = finalClass self.updateProgress.emit() result = Raster() result.create([new_state], state.getGeodata()) self.state = result self.updatePrediction(result) self.processFinished.emit()
class TestSimulator(unittest.TestCase): def setUp(self): # Raster1: # ~ [1, 1, 3,], # ~ [3, 2, 1,], # ~ [0, 3, 1,] self.raster1 = Raster("../../examples/multifact.tif") self.raster1.resetMask([0]) self.X = np.array([[1, 2, 3], [3, 2, 1], [0, 1, 1]]) self.X = np.ma.array(self.X, mask=(self.X == 0)) self.raster2 = Raster() self.raster2.create([self.X], self.raster1.getGeodata()) self.aa = AreaAnalyst(self.raster1, self.raster2) self.crosstab = CrossTableManager(self.raster1, self.raster2) # Simple model self.model = Model(state=self.raster1) def test_compute_table(self): # print self.crosstab.getCrosstable().getCrosstable() # CrossTab: # [[ 3. 1. 0.] # [ 0. 1. 0.] # [ 1. 0. 2.]] prediction = self.model.getPrediction(self.raster1) # print prediction.getBand(1) # prediction = [[1.0 1.0 6.0] # [6.0 5.0 1.0] # [-- 6.0 1.0]] # confidence = self.model.getConfidence() # print confidence.getBand(1) # confidence = [[1.0 0.5 0.33] # [0.5 0.33 0.25] # [-- 0.25 0.2]] result = np.array([[2.0, 1.0, 3.0], [1.0, 2.0, 1.0], [0, 3.0, 1.0]]) result = np.ma.array(result, mask=(result == 0)) simulator = Simulator( state=self.raster1, factors=None, model=self.model, crosstable=self.crosstab ) # The model does't use factors simulator.setIterationCount(1) simulator.simN() state = simulator.getState().getBand(1) assert_array_equal(result, state) result = np.array([[2.0, 1.0, 1.0], [2.0, 2.0, 1.0], [0, 3.0, 1.0]]) result = np.ma.array(result, mask=(result == 0)) simulator = Simulator(self.raster1, None, self.model, self.crosstab) simulator.setIterationCount(2) simulator.simN() state = simulator.getState().getBand(1) assert_array_equal(result, state)
def train(self): """ Train the model """ self.transitionPotentials = {} try: iterCount = len(self.codes) * len(self.factors) self.rangeChanged.emit(self.tr("Training WoE... %p%"), iterCount) changeMap = self.changeMap.getBand(1) for code in self.codes: sites = binaryzation(changeMap, [code]) # Reclass factors (continuous factor -> ordinal factor) wMap = np.ma.zeros(changeMap.shape) # The map of summary weight of the all factors self.weights[code] = {} # Dictionary for storing wheights of every raster's band for k in xrange(len(self.factors)): fact = self.factors[k] self.weights[code][k] = {} # Weights of the factor factorW = self.weights[code][k] if self.bins: # Get bins of the factor bin = self.bins[k] if (bin != None) and fact.getBandsCount() != len(bin): raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!") else: bin = None for i in range(1, fact.getBandsCount() + 1): band = fact.getBand(i) if bin and bin[i - 1]: # band = reclass(band, bin[i - 1]) band, sites = masks_identity(band, sites, dtype=np.uint8) # Combine masks of the rasters woeRes = woe( band, sites, self.unit_cell ) # WoE for the 'code' (initState->finalState) transition and current 'factor'. weights = woeRes["map"] wMap = wMap + weights factorW[i] = woeRes["weights"] self.updateProgress.emit() # Reclassification finished => set WoE coefficients self.woe[code] = wMap # WoE for all factors and the transition code. # Potentials are WoE map rescaled to 0--100 percents band = (sigmoid(wMap) * 100).astype(np.uint8) p = Raster() p.create([band], self.geodata) self.transitionPotentials[code] = p gc.collect() except MemoryError: self.errorReport.emit("The system out of memory during WoE trainig") raise except: self.errorReport.emit(self.tr("An unknown error occurs during WoE trainig")) raise finally: self.processFinished.emit()
def errorMap(self, answer): ''' Create map of correct and incorrect prediction. This function compares the known answer and the result of predicting procedure, correct pixel is marked as 0. ''' state = self.getState() b = state.getBand(1) a = answer.getBand(1) diff = (a - b).astype(np.int16) result = Raster() result.create([diff], state.getGeodata()) return result
def errorMap(self, answer): ''' Create map of correct and incorrect prediction. This function compares the known answer and the result of predicting procedure, correct pixel is marked as 0. ''' state = self.getState() b = state.getBand(1) a = answer.getBand(1) diff = (a-b).astype(np.int16) result = Raster() result.create([diff], state.getGeodata()) return result
def train(self): ''' Train the model ''' self.transitionPotentials = {} try: iterCount = len(self.codes)*len(self.factors) self.rangeChanged.emit(self.tr("Training WoE... %p%"), iterCount) changeMap = self.changeMap.getBand(1) for code in self.codes: sites = binaryzation(changeMap, [code]) # Reclass factors (continuous factor -> ordinal factor) wMap = np.ma.zeros(changeMap.shape) # The map of summary weight of the all factors self.weights[code] = {} # Dictionary for storing wheights of every raster's band for k in xrange(len(self.factors)): fact = self.factors[k] self.weights[code][k] = {} # Weights of the factor factorW = self.weights[code][k] if self.bins: # Get bins of the factor bin = self.bins[k] if (bin != None) and fact.getBandsCount() != len(bin): raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!") else: bin = None for i in range(1, fact.getBandsCount()+1): band = fact.getBand(i) if bin and bin[i-1]: # band = reclass(band, bin[i-1]) band, sites = masks_identity(band, sites, dtype=np.uint8) # Combine masks of the rasters woeRes = woe(band, sites, self.unit_cell) # WoE for the 'code' (initState->finalState) transition and current 'factor'. weights = woeRes['map'] wMap = wMap + weights factorW[i] = woeRes['weights'] self.updateProgress.emit() # Reclassification finished => set WoE coefficients self.woe[code]=wMap # WoE for all factors and the transition code. # Potentials are WoE map rescaled to 0--100 percents band = (sigmoid(wMap)*100).astype(np.uint8) p = Raster() p.create([band], self.geodata) self.transitionPotentials[code] = p gc.collect() except MemoryError: self.errorReport.emit('The system out of memory during WoE trainig') raise except: self.errorReport.emit(self.tr("An unknown error occurs during WoE trainig")) raise finally: self.processFinished.emit()
def test_WoeManager(self): aa = AreaAnalyst(self.sites, self.sites) w1 = WoeManager([self.factor], aa) p = w1.getPrediction(self.sites).getBand(1) assert_array_equal(p, self.sites.getBand(1)) initState = Raster("../../examples/data.tif") finalState = Raster("../../examples/data1.tif") aa = AreaAnalyst(initState, finalState) w = WoeManager([initState], aa) p = w.getPrediction(initState).getBand(1) # Calculate by hands: # 1->1 transition raster: r11 = [[1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] # 1->2 raster: r12 = [[0, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] # 1->3 raster: r13 = [[0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] # 2->1 r21 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] # 2->2 r22 = [[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0]] # 2->3 r23 = [[0, 0, 0, 0], [0, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]] # 3->1 r31 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0]] # 3->2 r32 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]] # 3->3 r33 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0]] geodata = initState.getGeodata() sites = {"11": r11, "12": r12, "13": r13, "21": r21, "22": r22, "23": r23, "31": r31, "32": r32, "33": r33} woeDict = {} # WoE of transitions for k in sites.keys(): # if k != "21": # !!! r21 is zero x = Raster() x.create([np.ma.array(data=sites[k])], geodata) sites[k] = x woeDict[k] = woe(initState.getBand(1), x.getBand(1)) # w1max = np.maximum(woeDict['11'], woeDict['12'], woeDict['13']) # w2max = np.maximum(woeDict['22'], woeDict['23']) # w3max = np.maximum(woeDict['31'], woeDict['32'], woeDict['33']) # Answer is index of finalClass that maximizes weights of transiotion initClass -> finalClass answer = [[1, 1, 1, 1], [1, 1, 3, 3], [3, 3, 3, 3], [1, 1, 1, 1]] assert_array_equal(p, answer) w = WoeManager([initState], aa, bins={0: [[2]]}) p = w.getPrediction(initState).getBand(1)
def makeChangeMap(self): f, s = self.first, self.second rows, cols = self.geodata['ySize'], self.geodata['xSize'] band = np.zeros([rows, cols]) self.rangeChanged.emit(self.tr("Creating change map %p%"), rows) for i in xrange(rows): for j in xrange(cols): if not f.mask[i,j]: r = f[i,j] c = s[i,j] band[i, j] = self.encode(r, c) self.updateProgress.emit() bands = [np.ma.array(data = band, mask = f.mask)] raster = Raster() raster.create(bands, self.geodata) self.processFinished.emit(raster) self.changeMap = raster
class Model(object): """ Simple predicting model for Simulator tests """ def __init__(self, state): self._predict(state) def getConfidence(self): return self.confidence def getPrediction(self, state, factors=None, calcTransitions=False): self._predict(state, factors) return self.prediction def _predict(self, state, factors=None, calcTransitions=False): geodata = state.getGeodata() band = state.getBand(1) rows, cols = geodata["ySize"], geodata["xSize"] # Let the new state is: 1 -> 2, 2- >3, 3 -> 1, then # the prediction is 1->1, 2->5, 3->6 predicted_band = np.copy(band) predicted_band[band == 1] = 1.0 predicted_band[band == 2] = 5.0 predicted_band[band == 3] = 6.0 # Let the confidence is 1/(1+row+col), where row is row number of the cell, col is column number of the cell. confidence_band = np.zeros([rows, cols]) for i in xrange(cols): for j in xrange(rows): confidence_band[i, j] = 1.0 / (1 + i + j) predicted_bands = [np.ma.array(data=predicted_band, mask=band.mask)] confidence_bands = [np.ma.array(data=confidence_band, mask=band.mask)] self.prediction = Raster() self.prediction.create(predicted_bands, state.geodata) self.confidence = Raster() self.confidence.create(confidence_bands, state.geodata)
class Model(object): ''' Simple predicting model for Simulator tests ''' def __init__(self, state): self.state = state self._predict(state) def getConfidence(self): return self.confidence def getPrediction(self, state, factors=None): self._predict(state, factors) return self.prediction def _predict(self, state, factors = None): geodata = self.state.getGeodata() band = state.getBand(1) rows, cols = geodata['ySize'], geodata['xSize'] # Let the prediction is: 1 -> 2, 2- >3, 3 -> 1 predicted_band = np.copy(band) predicted_band[band == 1] = 2 predicted_band[band == 2] = 3 predicted_band[band == 3] = 1 # Let the confidence is 1/(1+row+col), where row is row number of the cell, col is column number of the cell. confidence_band = np.zeros([rows, cols]) for i in xrange(cols): for j in xrange(rows): confidence_band[i,j] = 1.0/(1+i+j) predicted_bands = [np.ma.array(data = predicted_band, mask = band.mask)] confidence_bands = [np.ma.array(data = confidence_band, mask = band.mask)] self.prediction = Raster() self.prediction.create(predicted_bands, state.geodata) self.confidence = Raster() self.confidence.create(confidence_bands, state.geodata)
class Model(object): ''' Simple predicting model for Simulator tests ''' def __init__(self, state): self._predict(state) def getConfidence(self): return self.confidence def getPrediction(self, state, factors=None, calcTransitions=False): self._predict(state, factors) return self.prediction def _predict(self, state, factors = None, calcTransitions=False): geodata = state.getGeodata() band = state.getBand(1) rows, cols = geodata['ySize'], geodata['xSize'] # Let the new state is: 1 -> 2, 2- >3, 3 -> 1, then # the prediction is 1->1, 2->5, 3->6 predicted_band = np.copy(band) predicted_band[band == 1] = 1.0 predicted_band[band == 2] = 5.0 predicted_band[band == 3] = 6.0 # Let the confidence is 1/(1+row+col), where row is row number of the cell, col is column number of the cell. confidence_band = np.zeros([rows, cols]) for i in xrange(cols): for j in xrange(rows): confidence_band[i,j] = 1.0/(1+i+j) predicted_bands = [np.ma.array(data = predicted_band, mask = band.mask)] confidence_bands = [np.ma.array(data = confidence_band, mask = band.mask)] self.prediction = Raster() self.prediction.create(predicted_bands, state.geodata) self.confidence = Raster() self.confidence.create(confidence_bands, state.geodata)
def makeChangeMap(self): rows, cols = self.geodata['ySize'], self.geodata['xSize'] band = np.zeros([rows, cols], dtype=np.int16) f, s = self.first, self.second if self.initRaster == None: checkPersistent = False else: checkPersistent = True t = self.initRaster.getBand(1) raster = None try: self.rangeChanged.emit(self.tr("Creating change map %p%"), rows) for i in xrange(rows): for j in xrange(cols): if (f.mask.shape == ()) or (not f.mask[i,j]): r = f[i,j] c = s[i,j] # Percistent category is the category that is constant for all three rasters if checkPersistent and (r==c) and (r==t[i,j]): band[i, j] = self.persistentCategoryCode else: band[i, j] = self.encode(r, c) self.updateProgress.emit() bands = [np.ma.array(data = band, mask = f.mask, dtype=np.int16)] raster = Raster() raster.create(bands, self.geodata) self.changeMap = raster except MemoryError: self.errorReport.emit(self.tr("The system out of memory during change map creating")) raise except: self.errorReport.emit(self.tr("An unknown error occurs during change map creating")) raise finally: self.processFinished.emit(raster)
def test_create(self): raster = Raster() raster.create([self.data1], geodata=self.r1.getGeodata()) self.assertTrue(raster.geoDataMatch(self.r1)) self.assertEqual(raster.getBandsCount(), 1) self.assertEqual(set(raster.getBandGradation(1)), set([0, 1, 2, 3]))
class LR(object): """ Implements Logistic Regression model definition and calibration (maximum liklihood parameter estimation). """ def __init__(self, ns=0, logreg=None): from sklearn import linear_model as lm if logreg: self.logreg = logreg else: self.logreg = lm.LogisticRegression() self.ns = ns # Neighbourhood size of training rasters. self.data = None # Training data self.classlist = None # List of unique output values of the output raster # Results of the LR prediction self.prediction = None # Raster of the LR prediction results self.confidence = None # Raster of the LR results confidence def getCoef(self): return self.logreg.coef_ def getConfidence(self): return self.confidence def getIntercept(self): return self.logreg.intercept_ def getPrediction(self, state, factors): self._predict(state, factors) return self.prediction def _outputConfidence(self, input): ''' Return confidence (difference between 2 biggest probabilities) of the LR output. ''' out_scl = self.logreg.predict_proba(input)[0] # Calculate the confidence: out_scl.sort() return out_scl[-1] - out_scl[-2] def _predict(self, state, factors): ''' Calculate output and confidence rasters using LR model and input rasters @param state Raster of the current state (classes) values. @param factors List of the factor rasters (predicting variables). ''' geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] for r in factors: if not state.geoDataMatch(r): raise LRError('Geometries of the input rasters are different!') # Normalize factors before prediction: for f in factors: f.normalize(mode = 'mean') predicted_band = np.zeros([rows, cols]) confidence_band = np.zeros([rows, cols]) sampler = Sampler(state, factors, ns=self.ns) mask = state.getBand(1).mask.copy() for i in xrange(rows): for j in xrange(cols): if not mask[i,j]: input = sampler.get_inputs(state, factors, i,j) if input != None: out = self.logreg.predict(input) predicted_band[i,j] = out confidence = self._outputConfidence(input) confidence_band[i, j] = confidence else: # Input sample is incomplete => mask this pixel mask[i, j] = True predicted_bands = [np.ma.array(data = predicted_band, mask = mask)] confidence_bands = [np.ma.array(data = confidence_band, mask = mask)] self.prediction = Raster() self.prediction.create(predicted_bands, geodata) self.confidence = Raster() self.confidence.create(confidence_bands, geodata) def read(self): pass def save(self): pass def setTrainingData(self, state, factors, output, mode='All', samples=None): ''' @param state Raster of the current state (classes) values. @param factors List of the factor rasters (predicting variables). @param output Raster that contains classes to predict. @param mode Type of sampling method: All Get all pixels Normal Get samples. Count of samples in the data=samples. Balanced Undersampling of major classes and/or oversampling of minor classes. @samples Sample count of the training data (doesn't used in 'All' mode). ''' if not self.logreg: raise LRError('You must create a Logistic Regression model before!') # Normalize factors before sampling: for f in factors: f.normalize(mode = 'mean') sampler = Sampler(state, factors, output, ns=self.ns) sampler.setTrainingData(state, factors, output, shuffle=False, mode=mode, samples=samples) outputVecLen = sampler.outputVecLen stateVecLen = sampler.stateVecLen factorVectLen = sampler.factorVectLen size = len(sampler.data) self.data = sampler.data def train(self): X = np.column_stack( (self.data['state'], self.data['factors']) ) Y = self.data['output'] self.logreg.fit(X, Y)
class TestSimulator(unittest.TestCase): def setUp(self): # Raster1: #~ [1, 1, 3,], #~ [3, 2, 1,], #~ [0, 3, 1,] self.raster1 = Raster('../../examples/multifact.tif') self.raster1.resetMask([0]) self.X = np.array([ [1, 2, 3], [3, 2, 1], [0, 1, 1] ]) self.X = np.ma.array(self.X, mask=(self.X == 0)) self.raster2 = Raster() self.raster2.create([self.X], self.raster1.getGeodata()) self.aa = AreaAnalyst(self.raster1, self.raster2) self.crosstab = CrossTableManager(self.raster1, self.raster2) # Simple model self.model = Model(state=self.raster1) def test_compute_table(self): # print self.crosstab.getCrosstable().getCrosstable() # CrossTab: # [[ 3. 1. 0.] # [ 0. 1. 0.] # [ 1. 0. 2.]] prediction = self.model.getPrediction(self.raster1) # print prediction.getBand(1) # prediction = [[1.0 1.0 6.0] # [6.0 5.0 1.0] # [-- 6.0 1.0]] # confidence = self.model.getConfidence() # print confidence.getBand(1) # confidence = [[1.0 0.5 0.33] # [0.5 0.33 0.25] # [-- 0.25 0.2]] result = np.array([ [2.0, 1.0, 3.0], [1.0, 2.0, 1.0], [0, 3.0, 1.0] ]) result = np.ma.array(result, mask = (result==0)) simulator = Simulator(state=self.raster1, factors=None, model=self.model, crosstable=self.crosstab) # The model does't use factors simulator.setIterationCount(1) simulator.simN() state = simulator.getState().getBand(1) assert_array_equal(result, state) result = np.array([ [2.0, 1.0, 1.0], [2.0, 2.0, 1.0], [0, 3.0, 1.0] ]) result = np.ma.array(result, mask = (result==0)) simulator = Simulator(self.raster1, None, self.model, self.crosstab) simulator.setIterationCount(2) simulator.simN() state = simulator.getState().getBand(1) assert_array_equal(result, state)
def test_WoeManager(self): aa = AreaAnalyst(self.sites, self.sites) w1 = WoeManager([self.factor], aa) w1.train() p = w1.getPrediction(self.sites).getBand(1) answer = [[0, 3, 0], [0, 3, 0], [9, 0, 3]] answer = ma.array(data=answer, mask=self.mask) assert_array_equal(p, answer) initState = Raster('../../examples/data.tif') #~ [1,1,1,1], #~ [1,1,2,2], #~ [2,2,2,2], #~ [3,3,3,3] finalState = Raster('../../examples/data1.tif') #~ [1,1,2,3], #~ [3,1,2,3], #~ [3,3,3,3], #~ [1,1,3,2] aa = AreaAnalyst(initState, finalState) w = WoeManager([initState], aa) w.train() #print w.woe p = w.getPrediction(initState).getBand(1) self.assertEquals(p.dtype, np.uint8) # Calculate by hands: #1->1 transition raster: r11 = [[1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] #1->2 raster: r12 = [[0, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] #1->3 raster: r13 = [[0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] # 2->1 r21 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] # 2->2 r22 = [[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0]] # 2->3 r23 = [[0, 0, 0, 0], [0, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]] # 3->1 r31 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0]] # 3->2 r32 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]] # 3->3 r33 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0]] geodata = initState.getGeodata() sites = { '11': r11, '12': r12, '13': r13, '21': r21, '22': r22, '23': r23, '31': r31, '32': r32, '33': r33 } woeDict = {} # WoE of transitions for k in sites.keys(): # if k != '21': # !!! r21 is zero x = Raster() x.create([np.ma.array(data=sites[k])], geodata) sites[k] = x woeDict[k] = woe(initState.getBand(1), x.getBand(1)) #w1max = np.maximum(woeDict['11'], woeDict['12'], woeDict['13']) #w2max = np.maximum(woeDict['22'], woeDict['23']) #w3max = np.maximum(woeDict['31'], woeDict['32'], woeDict['33']) # Answer is a transition code with max weight answer = [[0, 0, 0, 0], [0, 0, 5, 5], [5, 5, 5, 5], [6, 6, 6, 6]] assert_array_equal(p, answer) w = WoeManager([initState], aa, bins={ 0: [ [2], ], }) w.train() p = w.getPrediction(initState).getBand(1) self.assertEquals(p.dtype, np.uint8) c = w.getConfidence().getBand(1) self.assertEquals(c.dtype, np.uint8)
class WoeManager(QObject): '''This class gets the data extracted from the UI and pass it to woe function, then gets and stores the result. ''' rangeChanged = pyqtSignal(str, int) updateProgress = pyqtSignal() processFinished = pyqtSignal() logMessage = pyqtSignal(str) errorReport = pyqtSignal(str) def __init__(self, factors, areaAnalyst, unit_cell=1, bins = None): ''' @param factors List of the pattern rasters used for prediction of point objects (sites). @param areaAnalyst AreaAnalyst that contains map of the changes, encodes and decodes category numbers. @param unit_cell Method parameter, pixelsize of resampled rasters. @param bins Dictionary of bins. Bins are binning boundaries that used for reduce count of categories. For example if factors = [f0, f1], then bins could be (for example) {0:[bins for f0], 1:[bins for f1]} = {0:[[10, 100, 250]],1:[[0.2, 1, 1.5, 4]]}. List of list used because a factor can be a multiband raster, we need get a list of bins for every band. For example: factors = [f0, 2-band-factor], bins= {0: [[10, 100, 250]], 1:[[0.2, 1, 1.5, 4], [3, 4, 7]] } ''' QObject.__init__(self) self.factors = factors self.analyst = areaAnalyst self.changeMap = areaAnalyst.getChangeMap() self.bins = bins self.unit_cell = unit_cell self.prediction = None # Raster of the prediction results self.confidence = None # Raster of the results confidence(1 = the maximum confidence, 0 = the least confidence) if (bins != None) and (len(self.factors) != len(bins.keys())): raise WoeManagerError('Lengths of bins and factors are different!') for r in self.factors: if not self.changeMap.geoDataMatch(r): raise WoeManagerError('Geometries of the input rasters are different!') if self.changeMap.getBandsCount() != 1: raise WoeManagerError('Change map must have one band!') self.geodata = self.changeMap.getGeodata() # Denormalize factors if they are normalized for r in self.factors: r.denormalize() # Get list of codes from the changeMap raster categories = self.changeMap.getBandGradation(1) self.codes = [int(c) for c in categories] # Codes of transitions initState->finalState (see AreaAnalyst.encode) self.woe = {} # Maps of WoE results of every transition code self.weights = {} # Weights of WoE (of raster band code) #{ # The format is: {Transition_code: {factorNumber1: [list of the weights], factorNumber2: [list of the weights]}, ...} # # for example: # 0: {0: {1: [...]}, 1: {1: [...]}}, # 1: {0: {1: [...]}, 1: {1: [...]}}, # 2: {0: {1: [...]}, 1: {1: [...]}}, # ... #} # self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...} def checkBins(self): """ Check if bins are applicable to the factors """ if self.bins != None: for i, factor in enumerate(self.factors): factor.denormalize() bin = self.bins[i] if (bin != None) and (bin != [None]): for j in range(factor.getBandsCount()): b = bin[j] tmp = b[:] tmp.sort() if b!=tmp: # Mast be sorted return False b0, bMax = b[0], b[len(b)-1] bandStat = factor.getBandStat(j+1) if bandStat['min'] >b0 or bandStat['max']<bMax: return False return True def getConfidence(self): return self.confidence def getPrediction(self, state, factors=None, calcTransitions=False): ''' Most of the models use factors for prediction, but WoE takes list of factors only once (during the initialization). ''' self._predict(state, calcTransitions) return self.prediction def getTransitionPotentials(self): return self.transitionPotentials def getWoe(self): return self.woe def _predict(self, state, calcTransitions=False): ''' Predict the changes. ''' try: self.rangeChanged.emit(self.tr("Initialize model %p%"), 1) rows, cols = self.geodata['ySize'], self.geodata['xSize'] if not self.changeMap.geoDataMatch(state): raise WoeManagerError('Geometries of the state and changeMap rasters are different!') prediction = np.zeros((rows,cols), dtype=np.uint8) confidence = np.zeros((rows,cols), dtype=np.uint8) mask = np.zeros((rows,cols), dtype=np.byte) stateBand = state.getBand(1) self.updateProgress.emit() self.rangeChanged.emit(self.tr("Prediction %p%"), rows) for r in xrange(rows): for c in xrange(cols): oldMax, currMax = -1000, -1000 # Small numbers indexMax = -1 # Index of Max weight initCat = stateBand[r,c] # Init category (state before transition) try: codes = self.analyst.codes(initCat) # Possible final states for code in codes: try: # If not all possible transitions are presented in the changeMap map = self.woe[code] # Get WoE map of transition 'code' except KeyError: continue w = map[r,c] # The weight in the (r,c)-pixel if w > currMax: indexMax, oldMax, currMax = code, currMax, w prediction[r,c] = indexMax confidence[r,c] = int(100*(sigmoid(currMax) - sigmoid(oldMax))) except ValueError: mask[r,c] = 1 self.updateProgress.emit() predicted_band = np.ma.array(data=prediction, mask=mask, dtype=np.uint8) self.prediction = Raster() self.prediction.create([predicted_band], self.geodata) confidence_band = np.ma.array(data=confidence, mask=mask, dtype=np.uint8) self.confidence = Raster() self.confidence.create([confidence_band], self.geodata) except MemoryError: self.errorReport.emit(self.tr("The system out of memory during WOE prediction")) raise except: self.errorReport.emit(self.tr("An unknown error occurs during WoE prediction")) raise finally: self.processFinished.emit() def train(self): ''' Train the model ''' self.transitionPotentials = {} try: iterCount = len(self.codes)*len(self.factors) self.rangeChanged.emit(self.tr("Training WoE... %p%"), iterCount) changeMap = self.changeMap.getBand(1) for code in self.codes: sites = binaryzation(changeMap, [code]) # Reclass factors (continuous factor -> ordinal factor) wMap = np.ma.zeros(changeMap.shape) # The map of summary weight of the all factors self.weights[code] = {} # Dictionary for storing wheights of every raster's band for k in xrange(len(self.factors)): fact = self.factors[k] self.weights[code][k] = {} # Weights of the factor factorW = self.weights[code][k] if self.bins: # Get bins of the factor bin = self.bins[k] if (bin != None) and fact.getBandsCount() != len(bin): raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!") else: bin = None for i in range(1, fact.getBandsCount()+1): band = fact.getBand(i) if bin and bin[i-1]: # band = reclass(band, bin[i-1]) band, sites = masks_identity(band, sites, dtype=np.uint8) # Combine masks of the rasters woeRes = woe(band, sites, self.unit_cell) # WoE for the 'code' (initState->finalState) transition and current 'factor'. weights = woeRes['map'] wMap = wMap + weights factorW[i] = woeRes['weights'] self.updateProgress.emit() # Reclassification finished => set WoE coefficients self.woe[code]=wMap # WoE for all factors and the transition code. # Potentials are WoE map rescaled to 0--100 percents band = (sigmoid(wMap)*100).astype(np.uint8) p = Raster() p.create([band], self.geodata) self.transitionPotentials[code] = p gc.collect() except MemoryError: self.errorReport.emit('The system out of memory during WoE trainig') raise except: self.errorReport.emit(self.tr("An unknown error occurs during WoE trainig")) raise finally: self.processFinished.emit() def weightsToText(self): ''' Format self.weights as text report. ''' if self.weights == {}: return u"" text = u"" for code in self.codes: (initClass, finalClass) = self.analyst.decode(code) text = text + self.tr("Transition %s -> %s\n" % (int(initClass), int(finalClass))) try: factorW = self.weights[code] for factNum, factDict in factorW.iteritems(): name = self.factors[factNum].getFileName() name = basename(name) text = text + self.tr("\t factor: %s \n" % (name,) ) for bandNum, bandWeights in factDict.iteritems(): weights = ["%f" % (w,) for w in bandWeights] text = text + self.tr("\t\t Weights of band %s: %s \n" % (bandNum, ", ".join(weights)) ) except: text = text + self.tr('W for code % s (%s -> %s) causes error' % (code, initClass, finalClass)) raise return text
def test_WoeManager(self): aa = AreaAnalyst(self.sites, self.sites) w1 = WoeManager([self.factor], aa) w1.train() p = w1.getPrediction(self.sites).getBand(1) answer = [[0,3,0], [0,3,0], [9,0,3]] answer = ma.array(data = answer, mask = self.mask) assert_array_equal(p, answer) initState = Raster('../../examples/data.tif') #~ [1,1,1,1], #~ [1,1,2,2], #~ [2,2,2,2], #~ [3,3,3,3] finalState = Raster('../../examples/data1.tif') #~ [1,1,2,3], #~ [3,1,2,3], #~ [3,3,3,3], #~ [1,1,3,2] aa = AreaAnalyst(initState, finalState) w = WoeManager([initState], aa) w.train() #print w.woe p = w.getPrediction(initState).getBand(1) self.assertEquals(p.dtype, np.uint8) # Calculate by hands: #1->1 transition raster: r11 = [ [1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0] ] #1->2 raster: r12 = [ [0, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0] ] #1->3 raster: r13 = [ [0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0] ] # 2->1 r21 = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0] ] # 2->2 r22 = [ [0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0] ] # 2->3 r23 = [ [0, 0, 0, 0], [0, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0] ] # 3->1 r31 = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0] ] # 3->2 r32 = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1] ] # 3->3 r33 = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0] ] geodata = initState.getGeodata() sites = {'11': r11, '12': r12, '13': r13, '21': r21, '22': r22, '23': r23, '31': r31, '32': r32, '33': r33} woeDict = {} # WoE of transitions for k in sites.keys(): # if k !='21' : # !!! r21 is zero x = Raster() x.create([np.ma.array(data=sites[k])], geodata) sites[k] = x woeDict[k] = woe(initState.getBand(1), x.getBand(1)) #w1max = np.maximum(woeDict['11'], woeDict['12'], woeDict['13']) #w2max = np.maximum(woeDict['22'], woeDict['23']) #w3max = np.maximum(woeDict['31'], woeDict['32'], woeDict['33']) # Answer is a transition code with max weight answer = [ [0, 0, 0, 0], [0, 0, 5, 5], [5, 5, 5, 5], [6, 6, 6, 6] ] assert_array_equal(p, answer) w = WoeManager([initState], aa, bins = {0: [[2], ],}) w.train() p = w.getPrediction(initState).getBand(1) self.assertEquals(p.dtype, np.uint8) c = w.getConfidence().getBand(1) self.assertEquals(c.dtype, np.uint8)
class LR(QObject): """ Implements Logistic Regression model definition and calibration (maximum liklihood parameter estimation). """ rangeChanged = pyqtSignal(str, int) updateProgress = pyqtSignal() processFinished = pyqtSignal() samplingFinished = pyqtSignal() finished = pyqtSignal() logMessage = pyqtSignal(str) errorReport = pyqtSignal(str) def __init__(self, ns=0, logreg=None): QObject.__init__(self) if logreg: self.logreg = logreg else: self.logreg = mlr.MLR() self.state = None self.factors = None self.output = None self.mode = "All" self.samples = None self.catlist = None self.ns = ns # Neighbourhood size of training rasters. self.data = None # Training data self.maxiter = 100 # Maximum of fitting iterations self.sampler = None # Sampler # Results of the LR prediction self.prediction = None # Raster of the LR prediction results self.confidence = None # Raster of the LR results confidence (1 = the maximum confidence, 0 = the least confidence) self.Kappa = 0 # Kappa value self.pseudoR = 0 # Pseudo R-squared (Count) (http://www.ats.ucla.edu/stat/mult_pkg/faq/general/Psuedo_RSquareds.htm) self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...} def getCoef(self): return self.logreg.get_weights().T def getConfidence(self): return self.confidence def getIntercept(self): return self.logreg.get_intercept() def getKappa(self): return self.Kappa def getStdErrIntercept(self): X = np.column_stack( (self.data['state'], self.data['factors']) ) return self.logreg.get_stderr_intercept(X) def getStdErrWeights(self): X = np.column_stack( (self.data['state'], self.data['factors']) ) return self.logreg.get_stderr_weights(X).T def get_PvalIntercept(self): X = np.column_stack( (self.data['state'], self.data['factors']) ) return self.logreg.get_pval_intercept(X) def get_PvalWeights(self): X = np.column_stack( (self.data['state'], self.data['factors']) ) return self.logreg.get_pval_weights(X).T def getPrediction(self, state, factors, calcTransitions=False): self._predict(state, factors, calcTransitions) return self.prediction def getPseudoR(self): return self.pseudoR def getTransitionPotentials(self): return self.transitionPotentials def _outputConfidence(self, input): ''' Return confidence (difference between 2 biggest probabilities) of the LR output. 1 = the maximum confidence, 0 = the least confidence ''' out_scl = self.logreg.predict_proba(input)[0] # Calculate the confidence: out_scl.sort() return int(100 * (out_scl[-1] - out_scl[-2]) ) def outputTransitions(self, input): ''' Return transition potential of the outputs ''' out_scl = self.logreg.predict_proba(input)[0] out_scl = [int(100 * x) for x in out_scl] result = {} for r, v in enumerate(out_scl): cat = self.catlist[r] result[cat] = v return result def _predict(self, state, factors, calcTransitions=False): ''' Calculate output and confidence rasters using LR model and input rasters @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). ''' try: self.rangeChanged.emit(self.tr("Initialize model %p%"), 1) geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] for r in factors: if not state.geoDataMatch(r): raise LRError('Geometries of the input rasters are different!') self.transitionPotentials = None # Reset tr.potentials if they exist # Normalize factors before prediction: for f in factors: f.normalize(mode = 'mean') predicted_band = np.zeros([rows, cols], dtype=np.uint8) confidence_band = np.zeros([rows, cols], dtype=np.uint8) if calcTransitions: self.transitionPotentials = {} for cat in self.catlist: self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8) self.sampler = Sampler(state, factors, ns=self.ns) mask = state.getBand(1).mask.copy() if mask.shape == (): mask = np.zeros([rows, cols], dtype=np.bool) self.updateProgress.emit() self.rangeChanged.emit(self.tr("Prediction %p%"), rows) for i in xrange(rows): for j in xrange(cols): if not mask[i,j]: input = self.sampler.get_inputs(state, i,j) if input != None: input = np.array([input]) out = self.logreg.predict(input) predicted_band[i,j] = out confidence = self._outputConfidence(input) confidence_band[i, j] = confidence if calcTransitions: potentials = self.outputTransitions(input) for cat in self.catlist: map = self.transitionPotentials[cat] map[i, j] = potentials[cat] else: # Input sample is incomplete => mask this pixel mask[i, j] = True self.updateProgress.emit() predicted_bands = [np.ma.array(data = predicted_band, mask = mask, dtype=np.uint8)] confidence_bands = [np.ma.array(data = confidence_band, mask = mask, dtype=np.uint8)] self.prediction = Raster() self.prediction.create(predicted_bands, geodata) self.confidence = Raster() self.confidence.create(confidence_bands, geodata) if calcTransitions: for cat in self.catlist: band = [np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8)] self.transitionPotentials[cat] = Raster() self.transitionPotentials[cat].create(band, geodata) except MemoryError: self.errorReport.emit(self.tr("The system out of memory during LR prediction")) raise except: self.errorReport.emit(self.tr("An unknown error occurs during LR prediction")) raise finally: self.processFinished.emit() def __propagateSamplerSignals(self): self.sampler.rangeChanged.connect(self.__samplerProgressRangeChanged) self.sampler.updateProgress.connect(self.__samplerProgressChanged) self.sampler.samplingFinished.connect(self.__samplerFinished) def __samplerFinished(self): self.sampler.rangeChanged.disconnect(self.__samplerProgressRangeChanged) self.sampler.updateProgress.disconnect(self.__samplerProgressChanged) self.sampler.samplingFinished.disconnect(self.__samplerFinished) self.samplingFinished.emit() def __samplerProgressRangeChanged(self, message, maxValue): self.rangeChanged.emit(message, maxValue) def __samplerProgressChanged(self): self.updateProgress.emit() def save(self): pass def saveSamples(self, fileName): self.sampler.saveSamples(fileName) def setMaxIter(self, maxiter): self.maxiter = maxiter def setTrainingData(self): state, factors, output, mode, samples = self.state, self.factors, self.output, self.mode, self.samples if not self.logreg: raise LRError('You must create a Logistic Regression model before!') # Normalize factors before sampling: for f in factors: f.normalize(mode = 'mean') self.sampler = Sampler(state, factors, output, ns=self.ns) self.__propagateSamplerSignals() self.sampler.setTrainingData(state, output, shuffle=False, mode=mode, samples=samples) outputVecLen = self.sampler.outputVecLen stateVecLen = self.sampler.stateVecLen factorVectLen = self.sampler.factorVectLen size = len(self.sampler.data) self.data = self.sampler.data self.catlist = np.unique(self.data['output']) def train(self): X = np.column_stack( (self.data['state'], self.data['factors']) ) Y = self.data['output'] self.labelCodes = np.unique(Y) self.logreg.fit(X, Y, maxiter=self.maxiter) out = self.logreg.predict(X) depCoef = DependenceCoef(np.ma.array(out), np.ma.array(Y), expand=True) self.Kappa = depCoef.kappa(mode=None) self.pseudoR = depCoef.correctness(percent = False) def setState(self, state): self.state = state def setFactors(self, factors): self.factors = factors def setOutput(self, output): self.output = output def setMode(self, mode): self.mode = mode def setSamples(self, samples): self.samples = samples def startTrain(self): try: self.setTrainingData() self.train() except MemoryError: self.errorReport.emit(self.tr("The system out of memory during LR training")) raise except: self.errorReport.emit(self.tr("An unknown error occurs during LR trainig")) raise finally: self.finished.emit()
def __sim(self): ''' 1 iteracion of simulation. ''' transition = self.crosstable.getCrosstable() self.updatePrediction(self.state) changes = self.getPrediction().getBand(1) # Predicted change map changes = changes + 1 # Filling nodata as 0 can be ambiguous: changes = np.ma.filled(changes, 0) # (cat_code can be 0, to do not mix it with no-data, add 1) state = self.getState() new_state = state.getBand(1).copy().astype(np.uint8) # New states (the result of simulation) will be stored there. self.rangeChanged.emit(self.tr("Area Change Analysis %p%"), 2) self.updateProgress.emit() QCoreApplication.processEvents() analyst = AreaAnalyst(state, second = None) self.updateProgress.emit() QCoreApplication.processEvents() categories = state.getBandGradation(1) # Make transition between categories according to # number of moved pixel in crosstable self.rangeChanged.emit(self.tr("Simulation process %p%"), len(categories)**2 - len(categories)) QCoreApplication.processEvents() for initClass in categories: for finalClass in categories: if initClass == finalClass: continue # TODO: Calculate number of pixels to be moved via TransitionMatrix and state raster n = transition.getTransition(initClass, finalClass) # Number of pixels that have to be # changed the categories # (use TransitoionMatrix only). if n==0: continue # Find n appropriate places for transition initClass -> finalClass cat_code = analyst.encode(initClass, finalClass) # Array of places where transitions initClass -> finalClass are occured places = (changes==cat_code+1) # cat_code can be 0, do not mix it with no-data in 'changes' variable placesCount = np.sum(places) # print "cat_code, placesCount, n", cat_code, placesCount if placesCount < n: self.logMessage.emit(self.tr("There are more transitions in the transition matrix, then the model have found")) # print "There are more transitions in the transition matrix, then the model have found" # print "cat_code, placesCount, n", cat_code, placesCount, n QCoreApplication.processEvents() n = placesCount if n >0: confidence = self.getConfidence().getBand(1) # Add some random value rnd = np.random.sample(size=confidence.shape)/1000 # A small random confidence = np.ma.filled(confidence, 0) + rnd confidence = confidence * places # The higher is number in cell, the higer is probability of transition in the cell. # Ensure, n is bigger then nonzero confidence placesCount = np.sum(confidence>0) if placesCount < n: # Some confidence where transitions has to be appear is zero. The transition count will be cropped. # print "Some confidence is zero. cat_code, nonzeroConf, wantedPixels", cat_code, placesCount, n n = placesCount ind = confidence.argsort(axis=None)[-n:] indices = [np.unravel_index(i, confidence.shape) for i in ind] # Now "indices" contains indices of the appropriate places, # make transition initClass -> finalClass r1 = np.zeros(confidence.shape) for index in indices: new_state[index] = finalClass self.updateProgress.emit() QCoreApplication.processEvents() result = Raster() result.create([new_state], state.getGeodata()) self.state = result
class WoeManager(object): '''This class gets the data extracted from the UI and pass it to woe function, then gets and stores the result. ''' def __init__(self, factors, areaAnalyst, unit_cell=1, bins = None): ''' @param factors List of the pattern rasters used for prediction of point objects (sites). @param areaAnalyst AreaAnalyst that contains map of the changes, encodes and decodes class numbers. @param unit_cell Method parameter, pixelsize of resampled rasters. @param bins Dictionary of bins. Bins are binning boundaries that used for reduce count of classes. For example if factors = [f0, f1], then bins could be (for example) {0:[bins for f0], 1:[bins for f1]} = {0:[[10, 100, 250]],1:[[0.2, 1, 1.5, 4]]}. List of list used because a factor can be a multiband raster, we need get a list of bins for every band. For example: factors = [f0, 2-band-factor], bins= {0: [[10, 100, 250]], 1:[[0.2, 1, 1.5, 4], [3, 4, 7]] } ''' self.factors = factors self.analyst = areaAnalyst self.changeMap = areaAnalyst.getChangeMap() self.prediction = None self.confidence = None if (bins != None) and (len(factors) != len(bins.keys())): raise WoeManagerError('Lengths of bins and factors are different!') for r in self.factors: if not self.changeMap.geoDataMatch(r): raise WoeManagerError('Geometries of the input rasters are different!') if self.changeMap.getBandsCount() != 1: raise WoeManagerError('Change map must have one band!') # Get list of codes from the changeMap raster classes = self.changeMap.getBandStat(1)['gradation'] cMap = self.changeMap.getBand(1) self.codes = [int(c) for c in classes] # Codes of transitions initState->finalState (see AreaAnalyst.encode) self.woe = {} for code in self.codes: sites = binaryzation(cMap, [code]) # TODO: reclass factors (continuous factor -> ordinal factor) wMap = np.ma.zeros(cMap.shape) for k in xrange(len(factors)): fact = factors[k] if bins: # Get bins of the factor bin = bins[k] if (bin != None) and fact.getBandsCount() != len(bin): raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!") else: bin = None for i in range(1, fact.getBandsCount()+1): band = fact.getBand(i) if bin: band = reclass(band, bin[i-1]) band, sites = masks_identity(band, sites) # Combine masks of the rasters weights = woe(band, sites, unit_cell) # WoE for the 'code' (initState->finalState) transition and current 'factor'. wMap = wMap + weights self.woe[code]=wMap # WoE for all factors and the transition. def getConfidence(self): return self.confidence def getPrediction(self, state, factors=None): ''' Most of the models use factors for prediction, but WoE takes list of factors only once (during the initialization). ''' self._predict(state) return self.prediction def getWoe(self): return self.woe def _predict(self, state): ''' Predict the changes. ''' geodata = self.changeMap.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] if not self.changeMap.geoDataMatch(state): raise WoeManagerError('Geometries of the state and changeMap rasters are different!') prediction = np.zeros((rows,cols)) confidence = np.zeros((rows,cols)) mask = np.zeros((rows,cols)) woe = self.getWoe() stateBand = state.getBand(1) for r in xrange(rows): for c in xrange(cols): oldMax, currMax = -1000, -1000 # Small numbers indexMax = -1 # Index of Max weight initClass = stateBand[r,c] # Init class (state before transition) try: codes = self.analyst.codes(initClass) # Possible final states for code in codes: try: # If not all possible transitions are presented in the changeMap map = woe[code] # Get WoE map of transition 'code' except KeyError: continue w = map[r,c] # The weight in the (r,c)-pixel if w > currMax: indexMax, oldMax, currMax = code, currMax, w decode = self.analyst.decode(indexMax) # Get init & final classes (initState, finalState) prediction[r,c] = decode[1] # final class confidence[r,c] = sigmoid(currMax) - sigmoid(oldMax) except ValueError: mask[r,c] = 1 predicted_band = np.ma.array(data=prediction, mask=mask) self.prediction = Raster() self.prediction.create([predicted_band], geodata) confidence_band = np.ma.array(data=confidence, mask=mask) self.confidence = Raster() self.confidence.create([confidence_band], geodata)
class MCE(object): randomConsistencyIndex = { 2: 0, 3: 0.58, 4: 0.90, 5: 1.12, 6: 1.24, 7: 1.32, 8: 1.41, 9: 1.45, 10: 1.49, 11: 1.51, 12: 1.48, 13: 1.56, 14: 1.57, 15: 1.59, 16: 1.60, 17: 1.61, 18: 1.62, 19: 1.63, 20: 1.63, 21: 1.64, 22: 1.65, 23: 1.65, 24: 1.66, 25: 1.66, 26: 1.67, 27: 1.67, 28: 1.67, 29: 1.68, 30: 1.68, 31: 1.68, 32: 1.69, 33: 1.69, 34: 1.69, 35: 1.69, 36: 1.70, 37: 1.70, 38: 1.70, 39: 1.70 } def __init__(self, factors, wMatr, initStateNum, finalStateNum): ''' Multicriteria evaluation based on Saaty method. It defines transition probability of two classes (initStateNum, finalStateNum). @param factors List of the factor rasters used for prediction. @param wMatr List of lists -- NxN comparison matrix. @param initStateNum Number of initial state (the state before transition). @param finalStateNum Number of final state (the state after transition). ''' self.factors = factors self.initStateNum = initStateNum self.finalStateNum = finalStateNum # Check matrix dimension and factor count, apply normalization self.dim = 0 for f in factors: self.dim = self.dim + f.getBandsCount() f.normalize(mode = 'maxmin') if self.dim != len(wMatr): raise MCEError('Matrix size is different from the number of variables!') # Check if the matrix is valid for i in xrange(self.dim): if len(wMatr[i]) != self.dim: raise MCEError('The weight matrix is not NxN!') EPSILON = 0.000001 # A small number for i in xrange(self.dim): if wMatr[i][i] != 1: raise MCEError('w[i,i] not equal 1 !') for j in xrange(i+1, self.dim): if abs(wMatr[i][j] * wMatr[j][i] - 1) > EPSILON: raise MCEError('w[i,j] * w[j,i] not equal 1 !') self.wMatr = np.array(wMatr) self.weights = None # Weights of the factors, calculated using wMatr # It's a list, the length is self.dim # first element is the weight of first band of the first factor and so on: # [W_f1, ... weights of 1-st factors ... , W_f2, ... weights of 1-st factors..., W_fn, ...] self.consistency =None # Consistency ratio of the comparison matrix. self.prediction = None self.confidence = None def getConsistency(self): if self.consistency == None: self.setWeights() return self.consistency def getConfidence(self): return self.confidence def getPrediction(self, state, factors=None): ''' Most of the models use factors for prediction, but WoE takes list of factors only once (during the initialization). ''' self._predict(state) return self.prediction def getWeights(self): if self.weights == None: self.setWeights() return self.weights def _predict(self, state): ''' Predict the changes. ''' geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] # Get locations where self.initStateNum is occurs band = state.getBand(1) initStateMask = binaryzation(band, [self.initStateNum]) mask = band.mask # Calculate summary map of factors weights # Confidence: # confidence is summary map of factors, if current state = self.initState # confidence is 0, if current state != self.initState # Prediction: # predicted value is a constant = self.finalStateNum, if current state = self.initState # predicted value is current state, if current state != self.initState confidence = np.zeros((rows,cols)) weights = self.getWeights() weightNum = 0 # Number of processed weights for f in self.factors: if not f.geoDataMatch(state): raise MCEError('Geometries of the state and factor rasters are different!') f.normalize(mode = 'maxmin') for i in xrange(f.getBandsCount()): band = f.getBand(i+1) confidence = confidence + band*weights[weightNum] mask = np.ma.mask_or(mask, band.mask) weightNum = weightNum + 1 confidence = confidence*initStateMask prediction = np.copy(state.getBand(1)) prediction = np.logical_not(initStateMask) * prediction prediction = prediction + initStateMask*self.finalStateNum predicted_band = np.ma.array(data=prediction, mask=mask) self.prediction = Raster() self.prediction.create([predicted_band], geodata) confidence_band = np.ma.array(data=confidence, mask=mask) self.confidence = Raster() self.confidence.create([confidence_band], geodata) def setWeights(self): ''' Calculate the weigths and consistency ratio. ''' # Weights w, v = np.linalg.eig(self.wMatr) maxW = np.max(w) maxInd = list(w).index(maxW) # Index of the biggest eigenvalue maxW = maxW.real v = v[:,maxInd] # The eigen vector self.weights = [x.real for x in v] # Maxtix v can be complex self.weights = self.weights/sum(self.weights) # Consistency ratio if self.dim > 2: ci = (maxW - self.dim)/(self.dim - 1) try: ri = self.randomConsistencyIndex[self.dim] self.consistency = ci/ri except KeyError: self.consistency = -1 else: self.consistency = 0
class MlpManager(QObject): '''This class gets the data extracted from the UI and pass it to multi-layer perceptron, then gets and stores the result. ''' updateGraph = pyqtSignal(float, float) # Train error, val. error updateMinValErr = pyqtSignal(float) # Min validation error updateDeltaRMS = pyqtSignal(float) # Delta of RMS: min(valError) - currentValError processFinished = pyqtSignal() logMessage = pyqtSignal(str) def __init__(self, ns=0, MLP=None): QObject.__init__(self) self.MLP = MLP self.layers = None if self.MLP: self.layers = self.getMlpTopology() self.ns = ns # Neighbourhood size of training rasters. self.data = None # Training data self.classlist = None # List of unique output values of the output raster self.train_error = None # Error on training set self.val_error = None # Error on validation set self.minValError = None # The minimum error that is achieved on the validation set # Results of the MLP prediction self.prediction = None # Raster of the MLP prediction results self.confidence = None # Raster of the MLP results confidence # Outputs of the activation function for small and big numbers self.sigmax, self.sigmin = sigmoid(100), sigmoid(-100) # Max and Min of the sigmoid function self.sigrange = self.sigmax - self.sigmin # Range of the sigmoid def computeMlpError(self, sample): '''Get MLP error on the sample''' input = np.hstack( (sample['state'], sample['factors']) ) out = self.getOutput( input ) err = ((sample['output'] - out)**2).sum()/len(out) return err def computePerformance(self, train_indexes, val_ind): '''Check errors of training and validation sets @param train_indexes Tuple that contains indexes of the first and last elements of the training set. @param val_ind Tuple that contains indexes of the first and last elements of the validation set. ''' train_error = 0 train_sampl = train_indexes[1] - train_indexes[0] # Count of training samples for i in range(train_indexes[0], train_indexes[1]): train_error = train_error + self.computeMlpError(sample = self.data[i]) self.setTrainError(train_error/train_sampl) if val_ind: val_error = 0 val_sampl = val_ind[1] - val_ind[0] for i in xrange(val_ind[0], val_ind[1]): val_error = val_error + self.computeMlpError(sample = self.data[i]) self.setValError(val_error/val_sampl) def copyWeights(self): '''Deep copy of the MLP weights''' return copy.deepcopy(self.MLP.weights) def createMlp(self, state, factors, output, hidden_layers): ''' @param state Raster of the current state (classes) values. @param factors List of the factor rasters (predicting variables). @param hidden_layers List of neuron counts in hidden layers. @param ns Neighbourhood size. ''' if output.getBandsCount() != 1: raise MplManagerError('Output layer must have one band!') input_neurons = 0 for raster in [state] + factors: input_neurons = input_neurons+ raster.getNeighbourhoodSize(self.ns) # Output class (neuron) count band = output.getBand(1) self.classlist = np.unique(band.compressed()) classes = len(self.classlist) # set neuron counts in the MLP layers self.layers = hidden_layers self.layers.insert(0, input_neurons) self.layers.append(classes) self.MLP = MLP(*self.layers) def getConfidence(self): return self.confidence def getInputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[0] def getOutput(self, input_vector): out = self.MLP.propagate_forward( input_vector ) return out def getOutputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[-1] def getOutputVector(self, val): '''Convert a number val into vector, for example, let self.classlist = [1, 3, 4] then if val = 1, result = [ 1, -1, -1] if val = 3, result = [-1, 1, -1] if val = 4, result = [-1, -1, 1] where -1 is minimum of the sigmoid, 1 is max of the sigmoid ''' size = self.getOutputVectLen() res = np.ones(size) * (self.sigmin) ind = np.where(self.classlist==val) res[ind] = self.sigmax return res def getMinValError(self): return self.minValError def getMlpTopology(self): return self.MLP.shape def getPrediction(self, state, factors): self._predict(state, factors) return self.prediction def getTrainError(self): return self.train_error def getValError(self): return self.val_error def outputConfidence(self, output): ''' Return confidence (difference between 2 biggest values) of the MLP output. ''' # Scale the output to range [0,1] out_scl = 1.0 * (output - self.sigmin) / self.sigrange # Calculate the confidence: out_scl.sort() return out_scl[-1] - out_scl[-2] def _predict(self, state, factors): ''' Calculate output and confidence rasters using MLP model and input rasters @param state Raster of the current state (classes) values. @param factors List of the factor rasters (predicting variables). ''' geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] for r in factors: if not state.geoDataMatch(r): raise MlpManagerError('Geometries of the input rasters are different!') # Normalize factors before prediction: for f in factors: f.normalize(mode = 'mean') predicted_band = np.zeros([rows, cols]) confidence_band = np.zeros([rows, cols]) sampler = Sampler(state, factors, ns=self.ns) mask = state.getBand(1).mask.copy() for i in xrange(rows): for j in xrange(cols): if not mask[i,j]: input = sampler.get_inputs(state, factors, i,j) if input != None: out = self.getOutput(input) # Get index of the biggest output value as the result biggest = max(out) res = list(out).index(biggest) predicted_band[i, j] = self.classlist[res] confidence = self.outputConfidence(out) confidence_band[i, j] = confidence else: # Input sample is incomplete => mask this pixel mask[i, j] = True predicted_bands = [np.ma.array(data = predicted_band, mask = mask)] confidence_bands = [np.ma.array(data = confidence_band, mask = mask)] self.prediction = Raster() self.prediction.create(predicted_bands, geodata) self.confidence = Raster() self.confidence.create(confidence_bands, geodata) def readMlp(self): pass def resetErrors(self): self.val_error = np.finfo(np.float).max self.train_error = np.finfo(np.float).max def resetMlp(self): self.MLP.reset() self.resetErrors() def saveMlp(self): pass def setMlpWeights(self, w): '''Set weights of the MLP''' self.MLP.weights = w def setTrainingData(self, state, factors, output, shuffle=True, mode='All', samples=None): ''' @param state Raster of the current state (classes) values. @param factors List of the factor rasters (predicting variables). @param output Raster that contains classes to predict. @param shuffle Perform random shuffle. @param mode Type of sampling method: All Get all pixels Normal Get samples. Count of samples in the data=samples. Balanced Undersampling of major classes and/or oversampling of minor classes. @samples Sample count of the training data (doesn't used in 'All' mode). ''' if not self.MLP: raise MlpManagerError('You must create a MLP before!') # Normalize factors before sampling: for f in factors: f.normalize(mode = 'mean') sampler = Sampler(state, factors, output, self.ns) sampler.setTrainingData(state, factors, output, shuffle, mode, samples) outputVecLen = self.getOutputVectLen() stateVecLen = sampler.stateVecLen factorVectLen = sampler.factorVectLen size = len(sampler.data) self.data = np.zeros(size, dtype=[('state', float, stateVecLen), ('factors', float, factorVectLen), ('output', float, outputVecLen)]) self.data['state'] = sampler.data['state'] self.data['factors'] = sampler.data['factors'] self.data['output'] = [self.getOutputVector(sample['output']) for sample in sampler.data] def setTrainError(self, error): self.train_error = error def setValError(self, error): self.val_error = error def setEpochs(self, epochs): self.epochs = epochs def setValPercent(self, value=20): self.valPercent = value def setLRate(self, value=0.1): self.lrate = value def setMomentum(self, value=0.01): self.momentum = value def setContinueTrain(self, value=False): self.continueTrain = value def startTrain(self): self.train(self.epochs, self.valPercent, self.lrate, self.momentum, self.continueTrain) def train(self, epochs, valPercent=20, lrate=0.1, momentum=0.01, continue_train=False): '''Perform the training procedure on the MLP and save the best neural net @param epoch Max iteration count. @param valPercent Percent of the validation set. @param lrate Learning rate. @param momentum Learning momentum. @param continue_train If False then it is new training cycle, reset weights training and validation error. If True, then continue training. ''' samples_count = len(self.data) val_sampl_count = samples_count*valPercent/100 apply_validation = True if val_sampl_count>0 else False # Use or not use validation set train_sampl_count = samples_count - val_sampl_count # Set first train_sampl_count as training set, the other as validation set train_indexes = (0, train_sampl_count) val_indexes = (train_sampl_count, samples_count) if apply_validation else None if not continue_train: self.resetMlp() self.minValError = self.getValError() # The minimum error that is achieved on the validation set last_train_err = self.getTrainError() best_weights = self.copyWeights() # The MLP weights when minimum error that is achieved on the validation set for epoch in range(epochs): self.trainEpoch(train_indexes, lrate, momentum) self.computePerformance(train_indexes, val_indexes) self.updateGraph.emit(self.getTrainError(), self.getValError()) self.updateDeltaRMS.emit(self.getMinValError() - self.getValError()) last_train_err = self.getTrainError() self.setTrainError(last_train_err) if apply_validation and (self.getValError() < self.getMinValError()): self.minValError = self.getValError() best_weights = self.copyWeights() self.updateMinValErr.emit(self.getMinValError()) self.setMlpWeights(best_weights) self.processFinished.emit() def trainEpoch(self, train_indexes, lrate=0.1, momentum=0.01): '''Perform a training epoch on the MLP @param train_ind Tuple of the min&max indexes of training samples in the samples data. @param val_ind Tuple of the min&max indexes of validation samples in the samples data. @param lrate Learning rate. @param momentum Learning momentum. ''' train_sampl = train_indexes[1] - train_indexes[0] for i in range(train_sampl): n = np.random.randint( *train_indexes ) sample = self.data[n] input = np.hstack( (sample['state'],sample['factors']) ) self.getOutput( input ) self.MLP.propagate_backward( sample['output'], lrate, momentum )
class LR(QObject): """ Implements Logistic Regression model definition and calibration (maximum liklihood parameter estimation). """ rangeChanged = pyqtSignal(str, int) updateProgress = pyqtSignal() processFinished = pyqtSignal() samplingFinished = pyqtSignal() finished = pyqtSignal() logMessage = pyqtSignal(str) errorReport = pyqtSignal(str) def __init__(self, ns=0, logreg=None): QObject.__init__(self) if logreg: self.logreg = logreg else: self.logreg = mlr.MLR() self.state = None self.factors = None self.output = None self.mode = "All" self.samples = None self.catlist = None self.ns = ns # Neighbourhood size of training rasters. self.data = None # Training data self.maxiter = 100 # Maximum of fitting iterations self.sampler = None # Sampler # Results of the LR prediction self.prediction = None # Raster of the LR prediction results self.confidence = None # Raster of the LR results confidence (1 = the maximum confidence, 0 = the least confidence) self.Kappa = 0 # Kappa value self.pseudoR = 0 # Pseudo R-squared (Count) (http://www.ats.ucla.edu/stat/mult_pkg/faq/general/Psuedo_RSquareds.htm) self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...} def getCoef(self): return self.logreg.get_weights().T def getConfidence(self): return self.confidence def getIntercept(self): return self.logreg.get_intercept() def getKappa(self): return self.Kappa def getStdErrIntercept(self): X = np.column_stack((self.data['state'], self.data['factors'])) return self.logreg.get_stderr_intercept(X) def getStdErrWeights(self): X = np.column_stack((self.data['state'], self.data['factors'])) return self.logreg.get_stderr_weights(X).T def get_PvalIntercept(self): X = np.column_stack((self.data['state'], self.data['factors'])) return self.logreg.get_pval_intercept(X) def get_PvalWeights(self): X = np.column_stack((self.data['state'], self.data['factors'])) return self.logreg.get_pval_weights(X).T def getPrediction(self, state, factors, calcTransitions=False): self._predict(state, factors, calcTransitions) return self.prediction def getPseudoR(self): return self.pseudoR def getTransitionPotentials(self): return self.transitionPotentials def _outputConfidence(self, input): ''' Return confidence (difference between 2 biggest probabilities) of the LR output. 1 = the maximum confidence, 0 = the least confidence ''' out_scl = self.logreg.predict_proba(input)[0] # Calculate the confidence: out_scl.sort() return int(100 * (out_scl[-1] - out_scl[-2])) def outputTransitions(self, input): ''' Return transition potential of the outputs ''' out_scl = self.logreg.predict_proba(input)[0] out_scl = [int(100 * x) for x in out_scl] result = {} for r, v in enumerate(out_scl): cat = self.catlist[r] result[cat] = v return result def _predict(self, state, factors, calcTransitions=False): ''' Calculate output and confidence rasters using LR model and input rasters @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). ''' try: self.rangeChanged.emit(self.tr("Initialize model %p%"), 1) geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] for r in factors: if not state.geoDataMatch(r): raise LRError( 'Geometries of the input rasters are different!') self.transitionPotentials = None # Reset tr.potentials if they exist # Normalize factors before prediction: for f in factors: f.normalize(mode='mean') predicted_band = np.zeros([rows, cols], dtype=np.uint8) confidence_band = np.zeros([rows, cols], dtype=np.uint8) if calcTransitions: self.transitionPotentials = {} for cat in self.catlist: self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8) self.sampler = Sampler(state, factors, ns=self.ns) mask = state.getBand(1).mask.copy() if mask.shape == (): mask = np.zeros([rows, cols], dtype=np.bool) self.updateProgress.emit() self.rangeChanged.emit(self.tr("Prediction %p%"), rows) for i in xrange(rows): for j in xrange(cols): if not mask[i, j]: input = self.sampler.get_inputs(state, i, j) if input != None: input = np.array([input]) out = self.logreg.predict(input) predicted_band[i, j] = out confidence = self._outputConfidence(input) confidence_band[i, j] = confidence if calcTransitions: potentials = self.outputTransitions(input) for cat in self.catlist: map = self.transitionPotentials[cat] map[i, j] = potentials[cat] else: # Input sample is incomplete => mask this pixel mask[i, j] = True self.updateProgress.emit() predicted_bands = [ np.ma.array(data=predicted_band, mask=mask, dtype=np.uint8) ] confidence_bands = [ np.ma.array(data=confidence_band, mask=mask, dtype=np.uint8) ] self.prediction = Raster() self.prediction.create(predicted_bands, geodata) self.confidence = Raster() self.confidence.create(confidence_bands, geodata) if calcTransitions: for cat in self.catlist: band = [ np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8) ] self.transitionPotentials[cat] = Raster() self.transitionPotentials[cat].create(band, geodata) except MemoryError: self.errorReport.emit( self.tr("The system out of memory during LR prediction")) raise except: self.errorReport.emit( self.tr("An unknown error occurs during LR prediction")) raise finally: self.processFinished.emit() def __propagateSamplerSignals(self): self.sampler.rangeChanged.connect(self.__samplerProgressRangeChanged) self.sampler.updateProgress.connect(self.__samplerProgressChanged) self.sampler.samplingFinished.connect(self.__samplerFinished) def __samplerFinished(self): self.sampler.rangeChanged.disconnect( self.__samplerProgressRangeChanged) self.sampler.updateProgress.disconnect(self.__samplerProgressChanged) self.sampler.samplingFinished.disconnect(self.__samplerFinished) self.samplingFinished.emit() def __samplerProgressRangeChanged(self, message, maxValue): self.rangeChanged.emit(message, maxValue) def __samplerProgressChanged(self): self.updateProgress.emit() def save(self): pass def saveSamples(self, fileName): self.sampler.saveSamples(fileName) def setMaxIter(self, maxiter): self.maxiter = maxiter def setTrainingData(self): state, factors, output, mode, samples = self.state, self.factors, self.output, self.mode, self.samples if not self.logreg: raise LRError( 'You must create a Logistic Regression model before!') # Normalize factors before sampling: for f in factors: f.normalize(mode='mean') self.sampler = Sampler(state, factors, output, ns=self.ns) self.__propagateSamplerSignals() self.sampler.setTrainingData(state, output, shuffle=False, mode=mode, samples=samples) outputVecLen = self.sampler.outputVecLen stateVecLen = self.sampler.stateVecLen factorVectLen = self.sampler.factorVectLen size = len(self.sampler.data) self.data = self.sampler.data self.catlist = np.unique(self.data['output']) def train(self): X = np.column_stack((self.data['state'], self.data['factors'])) Y = self.data['output'] self.labelCodes = np.unique(Y) self.logreg.fit(X, Y, maxiter=self.maxiter) out = self.logreg.predict(X) depCoef = DependenceCoef(np.ma.array(out), np.ma.array(Y), expand=True) self.Kappa = depCoef.kappa(mode=None) self.pseudoR = depCoef.correctness(percent=False) def setState(self, state): self.state = state def setFactors(self, factors): self.factors = factors def setOutput(self, output): self.output = output def setMode(self, mode): self.mode = mode def setSamples(self, samples): self.samples = samples def startTrain(self): try: self.setTrainingData() self.train() except MemoryError: self.errorReport.emit( self.tr("The system out of memory during LR training")) raise except: self.errorReport.emit( self.tr("An unknown error occurs during LR trainig")) raise finally: self.finished.emit()
class MlpManager(QObject): '''This class gets the data extracted from the UI and pass it to multi-layer perceptron, then gets and stores the result. ''' updateGraph = pyqtSignal(float, float) # Train error, val. error updateMinValErr = pyqtSignal(float) # Min validation error updateDeltaRMS = pyqtSignal(float) # Delta of RMS: min(valError) - currentValError updateKappa = pyqtSignal(float) # Kappa value processFinished = pyqtSignal() processInterrupted = pyqtSignal() logMessage = pyqtSignal(str) errorReport = pyqtSignal(str) rangeChanged = pyqtSignal(str, int) updateProgress = pyqtSignal() def __init__(self, ns=0, MLP=None): QObject.__init__(self) self.MLP = MLP self.interrupted = False self.layers = None if self.MLP: self.layers = self.getMlpTopology() self.ns = ns # Neighbourhood size of training rasters. self.data = None # Training data self.catlist = None # List of unique output values of the output raster self.train_error = None # Error on training set self.val_error = None # Error on validation set self.minValError = None # The minimum error that is achieved on the validation set self.valKappa = 0 # Kappa on on the validation set self.sampler = None # Sampler # Results of the MLP prediction self.prediction = None # Raster of the MLP prediction results self.confidence = None # Raster of the MLP results confidence (1 = the maximum confidence, 0 = the least confidence) self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...} # Outputs of the activation function for small and big numbers self.sigmax, self.sigmin = sigmoid(100), sigmoid(-100) # Max and Min of the sigmoid function self.sigrange = self.sigmax - self.sigmin # Range of the sigmoid def computeMlpError(self, sample): '''Get MLP error on the sample''' input = np.hstack( (sample['state'], sample['factors']) ) out = self.getOutput( input ) err = ((sample['output'] - out)**2).sum()/len(out) return err def computePerformance(self, train_indexes, val_ind): '''Check errors of training and validation sets @param train_indexes Tuple that contains indexes of the first and last elements of the training set. @param val_ind Tuple that contains indexes of the first and last elements of the validation set. ''' train_error = 0 train_sampl = train_indexes[1] - train_indexes[0] # Count of training samples for i in range(train_indexes[0], train_indexes[1]): train_error = train_error + self.computeMlpError(sample = self.data[i]) self.setTrainError(train_error/train_sampl) if val_ind: val_error = 0 val_sampl = val_ind[1] - val_ind[0] answers = np.ma.zeros(val_sampl) out = np.ma.zeros(val_sampl) for i in xrange(val_ind[0], val_ind[1]): sample = self.data[i] val_error = val_error + self.computeMlpError(sample = self.data[i]) input = np.hstack( (sample['state'],sample['factors']) ) output = self.getOutput(input) out[i-val_ind[0]] = self.outCategory(output) answers[i-val_ind[0]] = self.outCategory(sample['output']) self.setValError(val_error/val_sampl) depCoef = DependenceCoef(out, answers, expand=True) self.valKappa = depCoef.kappa(mode=None) def copyWeights(self): '''Deep copy of the MLP weights''' return copy.deepcopy(self.MLP.weights) def createMlp(self, state, factors, output, hidden_layers): ''' @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). @param hidden_layers List of neuron counts in hidden layers. @param ns Neighbourhood size. ''' if output.getBandsCount() != 1: raise MlpManagerError('Output layer must have one band!') input_neurons = 0 for raster in factors: input_neurons = input_neurons+ raster.getNeighbourhoodSize(self.ns) # state raster contains categories. We need use n-1 dummy variables (where n = number of categories) input_neurons = input_neurons + (len(state.getBandGradation(1))-1) * state.getNeighbourhoodSize(self.ns) # Output category's (neuron) list and count self.catlist = output.getBandGradation(1) categories = len(self.catlist) # set neuron counts in the MLP layers self.layers = hidden_layers self.layers.insert(0, input_neurons) self.layers.append(categories) self.MLP = MLP(*self.layers) def getConfidence(self): return self.confidence def getInputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[0] def getOutput(self, input_vector): out = self.MLP.propagate_forward( input_vector ) return out def getOutputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[-1] def getOutputVector(self, val): '''Convert a number val into vector, for example, let self.catlist = [1, 3, 4] then if val = 1, result = [ 1, -1, -1] if val = 3, result = [-1, 1, -1] if val = 4, result = [-1, -1, 1] where -1 is minimum of the sigmoid, 1 is max of the sigmoid ''' size = self.getOutputVectLen() res = np.ones(size) * (self.sigmin) ind = np.where(self.catlist==val) res[ind] = self.sigmax return res def getMinValError(self): return self.minValError def getMlpTopology(self): return self.MLP.shape def getKappa(self): return self.valKappa def getPrediction(self, state, factors, calcTransitions=False): self._predict(state, factors, calcTransitions) return self.prediction def getTrainError(self): return self.train_error def getTransitionPotentials(self): return self.transitionPotentials def getValError(self): return self.val_error def outCategory(self, out_vector): # Get index of the biggest output value as the result biggest = max(out_vector) res = list(out_vector).index(biggest) res = self.catlist[res] return res def outputConfidence(self, output, scale=True): ''' Return confidence (difference between 2 biggest values) of the MLP output. @param output: The confidence @param scale: If True, then scale the confidence to int [0, 1, ..., 100] percent ''' out_scl = self.scaleOutput(output, percent=scale) out_scl.sort() return out_scl[-1] - out_scl[-2] def outputTransitions(self, output, scale=True): ''' Return transition potencial of the outputs scaled to [0,1] or 1-100 @param output: The output of MLP @param scale: If True, then scale the transitions to int ([0, 1, ..., 100]) percent ''' out_scl = self.scaleOutput(output, percent=scale) result = {} for r, v in enumerate(out_scl): cat = self.catlist[r] result[cat] = v return result def scaleOutput(self, output, percent=True): ''' Scale the output to range [0,1] or 1-100 @param output: Output of a MLP @param percent: If True, then scale the output to int [0, 1, ..., 100] percent ''' res = 1.0 * (output - self.sigmin) / self.sigrange if percent: res = [ int(100 * x) for x in res] return res def _predict(self, state, factors, calcTransitions=False): ''' Calculate output and confidence rasters using MLP model and input rasters @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). ''' try: self.rangeChanged.emit(self.tr("Initialize model %p%"), 1) geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] for r in factors: if not state.geoDataMatch(r): raise MlpManagerError('Geometries of the input rasters are different!') self.transitionPotentials = None # Reset tr.potentials if they exist # Normalize factors before prediction: for f in factors: f.normalize(mode = 'mean') predicted_band = np.zeros([rows, cols], dtype=np.uint8) confidence_band = np.zeros([rows, cols], dtype=np.uint8) if calcTransitions: self.transitionPotentials = {} for cat in self.catlist: self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8) self.sampler = Sampler(state, factors, ns=self.ns) mask = state.getBand(1).mask.copy() if mask.shape == (): mask = np.zeros([rows, cols], dtype=np.bool) self.updateProgress.emit() self.rangeChanged.emit(self.tr("Prediction %p%"), rows) for i in xrange(rows): for j in xrange(cols): if not mask[i,j]: input = self.sampler.get_inputs(state, i,j) if input != None: out = self.getOutput(input) res = self.outCategory(out) predicted_band[i, j] = res confidence = self.outputConfidence(out) confidence_band[i, j] = confidence if calcTransitions: potentials = self.outputTransitions(out) for cat in self.catlist: map = self.transitionPotentials[cat] map[i, j] = potentials[cat] else: # Input sample is incomplete => mask this pixel mask[i, j] = True self.updateProgress.emit() predicted_bands = [np.ma.array(data = predicted_band, mask = mask, dtype=np.uint8)] confidence_bands = [np.ma.array(data = confidence_band, mask = mask, dtype=np.uint8)] self.prediction = Raster() self.prediction.create(predicted_bands, geodata) self.confidence = Raster() self.confidence.create(confidence_bands, geodata) if calcTransitions: for cat in self.catlist: band = [np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8)] self.transitionPotentials[cat] = Raster() self.transitionPotentials[cat].create(band, geodata) except MemoryError: self.errorReport.emit(self.tr("The system out of memory during ANN prediction")) raise except: self.errorReport.emit(self.tr("An unknown error occurs during ANN prediction")) raise def readMlp(self): pass def resetErrors(self): self.val_error = np.finfo(np.float).max self.train_error = np.finfo(np.float).max def resetMlp(self): self.MLP.reset() self.resetErrors() def saveMlp(self): pass def saveSamples(self, fileName): self.sampler.saveSamples(fileName) def setMlpWeights(self, w): '''Set weights of the MLP''' self.MLP.weights = w def setTrainingData(self, state, factors, output, shuffle=True, mode='All', samples=None): ''' @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). @param output Raster that contains categories to predict. @param shuffle Perform random shuffle. @param mode Type of sampling method: All Get all pixels Random Get samples. Count of samples in the data=samples. Stratified Undersampling of major categories and/or oversampling of minor categories. @samples Sample count of the training data (doesn't used in 'All' mode). ''' if not self.MLP: raise MlpManagerError('You must create a MLP before!') # Normalize factors before sampling: for f in factors: f.normalize(mode = 'mean') self.sampler = Sampler(state, factors, output, self.ns) self.sampler.setTrainingData(state=state, output=output, shuffle=shuffle, mode=mode, samples=samples) outputVecLen = self.getOutputVectLen() stateVecLen = self.sampler.stateVecLen factorVectLen = self.sampler.factorVectLen size = len(self.sampler.data) self.data = np.zeros(size, dtype=[('coords', float, 2), ('state', float, stateVecLen), ('factors', float, factorVectLen), ('output', float, outputVecLen)]) self.data['coords'] = self.sampler.data['coords'] self.data['state'] = self.sampler.data['state'] self.data['factors'] = self.sampler.data['factors'] self.data['output'] = [self.getOutputVector(sample['output']) for sample in self.sampler.data] def setTrainError(self, error): self.train_error = error def setValError(self, error): self.val_error = error def setEpochs(self, epochs): self.epochs = epochs def setValPercent(self, value=20): self.valPercent = value def setLRate(self, value=0.1): self.lrate = value def setMomentum(self, value=0.01): self.momentum = value def setContinueTrain(self, value=False): self.continueTrain = value def startTrain(self): self.train(self.epochs, self.valPercent, self.lrate, self.momentum, self.continueTrain) def stopTrain(self): self.interrupted = True def train(self, epochs, valPercent=20, lrate=0.1, momentum=0.01, continue_train=False): '''Perform the training procedure on the MLP and save the best neural net @param epoch Max iteration count. @param valPercent Percent of the validation set. @param lrate Learning rate. @param momentum Learning momentum. @param continue_train If False then it is new training cycle, reset weights training and validation error. If True, then continue training. ''' try: samples_count = len(self.data) val_sampl_count = samples_count*valPercent/100 apply_validation = True if val_sampl_count>0 else False # Use or not use validation set train_sampl_count = samples_count - val_sampl_count # Set first train_sampl_count as training set, the other as validation set train_indexes = (0, train_sampl_count) val_indexes = (train_sampl_count, samples_count) if apply_validation else None if not continue_train: self.resetMlp() self.minValError = self.getValError() # The minimum error that is achieved on the validation set last_train_err = self.getTrainError() best_weights = self.copyWeights() # The MLP weights when minimum error that is achieved on the validation set self.rangeChanged.emit(self.tr("Train model %p%"), epochs) for epoch in range(epochs): self.trainEpoch(train_indexes, lrate, momentum) self.computePerformance(train_indexes, val_indexes) self.updateGraph.emit(self.getTrainError(), self.getValError()) self.updateDeltaRMS.emit(self.getMinValError() - self.getValError()) self.updateKappa.emit(self.getKappa()) QCoreApplication.processEvents() if self.interrupted: self.processInterrupted.emit() break last_train_err = self.getTrainError() self.setTrainError(last_train_err) if apply_validation and (self.getValError() < self.getMinValError()): self.minValError = self.getValError() best_weights = self.copyWeights() self.updateMinValErr.emit(self.getMinValError()) self.updateProgress.emit() self.setMlpWeights(best_weights) except MemoryError: self.errorReport.emit(self.tr("The system out of memory during ANN training")) raise except: self.errorReport.emit(self.tr("An unknown error occurs during ANN trainig")) raise finally: self.processFinished.emit() def trainEpoch(self, train_indexes, lrate=0.1, momentum=0.01): '''Perform a training epoch on the MLP @param train_ind Tuple of the min&max indexes of training samples in the samples data. @param val_ind Tuple of the min&max indexes of validation samples in the samples data. @param lrate Learning rate. @param momentum Learning momentum. ''' train_sampl = train_indexes[1] - train_indexes[0] for i in range(train_sampl): n = np.random.randint( *train_indexes ) sample = self.data[n] input = np.hstack( (sample['state'],sample['factors']) ) self.getOutput( input ) # Forward propagation self.MLP.propagate_backward( sample['output'], lrate, momentum )
class WoeManager(QObject): """This class gets the data extracted from the UI and pass it to woe function, then gets and stores the result. """ rangeChanged = pyqtSignal(str, int) updateProgress = pyqtSignal() processFinished = pyqtSignal() logMessage = pyqtSignal(str) errorReport = pyqtSignal(str) def __init__(self, factors, areaAnalyst, unit_cell=1, bins=None): """ @param factors List of the pattern rasters used for prediction of point objects (sites). @param areaAnalyst AreaAnalyst that contains map of the changes, encodes and decodes category numbers. @param unit_cell Method parameter, pixelsize of resampled rasters. @param bins Dictionary of bins. Bins are binning boundaries that used for reduce count of categories. For example if factors = [f0, f1], then bins could be (for example) {0:[bins for f0], 1:[bins for f1]} = {0:[[10, 100, 250]],1:[[0.2, 1, 1.5, 4]]}. List of list used because a factor can be a multiband raster, we need get a list of bins for every band. For example: factors = [f0, 2-band-factor], bins= {0: [[10, 100, 250]], 1:[[0.2, 1, 1.5, 4], [3, 4, 7]] } """ QObject.__init__(self) self.factors = factors self.analyst = areaAnalyst self.changeMap = areaAnalyst.getChangeMap() self.bins = bins self.unit_cell = unit_cell self.prediction = None # Raster of the prediction results self.confidence = None # Raster of the results confidence(1 = the maximum confidence, 0 = the least confidence) if (bins != None) and (len(self.factors) != len(bins.keys())): raise WoeManagerError("Lengths of bins and factors are different!") for r in self.factors: if not self.changeMap.geoDataMatch(r): raise WoeManagerError("Geometries of the input rasters are different!") if self.changeMap.getBandsCount() != 1: raise WoeManagerError("Change map must have one band!") self.geodata = self.changeMap.getGeodata() # Denormalize factors if they are normalized for r in self.factors: r.denormalize() # Get list of codes from the changeMap raster categories = self.changeMap.getBandGradation(1) self.codes = [int(c) for c in categories] # Codes of transitions initState->finalState (see AreaAnalyst.encode) self.woe = {} # Maps of WoE results of every transition code self.weights = {} # Weights of WoE (of raster band code) # { # The format is: {Transition_code: {factorNumber1: [list of the weights], factorNumber2: [list of the weights]}, ...} # # for example: # 0: {0: {1: [...]}, 1: {1: [...]}}, # 1: {0: {1: [...]}, 1: {1: [...]}}, # 2: {0: {1: [...]}, 1: {1: [...]}}, # ... # } # self.transitionPotentials = ( None ) # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...} def checkBins(self): """ Check if bins are applicable to the factors """ if self.bins != None: for i, factor in enumerate(self.factors): factor.denormalize() bin = self.bins[i] if (bin != None) and (bin != [None]): for j in range(factor.getBandsCount()): b = bin[j] tmp = b[:] tmp.sort() if b != tmp: # Mast be sorted return False b0, bMax = b[0], b[len(b) - 1] bandStat = factor.getBandStat(j + 1) if bandStat["min"] > b0 or bandStat["max"] < bMax: return False return True def getConfidence(self): return self.confidence def getPrediction(self, state, factors=None, calcTransitions=False): """ Most of the models use factors for prediction, but WoE takes list of factors only once (during the initialization). """ self._predict(state, calcTransitions) return self.prediction def getTransitionPotentials(self): return self.transitionPotentials def getWoe(self): return self.woe def _predict(self, state, calcTransitions=False): """ Predict the changes. """ try: self.rangeChanged.emit(self.tr("Initialize model %p%"), 1) rows, cols = self.geodata["ySize"], self.geodata["xSize"] if not self.changeMap.geoDataMatch(state): raise WoeManagerError("Geometries of the state and changeMap rasters are different!") prediction = np.zeros((rows, cols), dtype=np.uint8) confidence = np.zeros((rows, cols), dtype=np.uint8) mask = np.zeros((rows, cols), dtype=np.byte) stateBand = state.getBand(1) self.updateProgress.emit() self.rangeChanged.emit(self.tr("Prediction %p%"), rows) for r in xrange(rows): for c in xrange(cols): oldMax, currMax = -1000, -1000 # Small numbers indexMax = -1 # Index of Max weight initCat = stateBand[r, c] # Init category (state before transition) try: codes = self.analyst.codes(initCat) # Possible final states for code in codes: try: # If not all possible transitions are presented in the changeMap map = self.woe[code] # Get WoE map of transition 'code' except KeyError: continue w = map[r, c] # The weight in the (r,c)-pixel if w > currMax: indexMax, oldMax, currMax = code, currMax, w prediction[r, c] = indexMax confidence[r, c] = int(100 * (sigmoid(currMax) - sigmoid(oldMax))) except ValueError: mask[r, c] = 1 self.updateProgress.emit() predicted_band = np.ma.array(data=prediction, mask=mask, dtype=np.uint8) self.prediction = Raster() self.prediction.create([predicted_band], self.geodata) confidence_band = np.ma.array(data=confidence, mask=mask, dtype=np.uint8) self.confidence = Raster() self.confidence.create([confidence_band], self.geodata) except MemoryError: self.errorReport.emit(self.tr("The system out of memory during WOE prediction")) raise except: self.errorReport.emit(self.tr("An unknown error occurs during WoE prediction")) raise finally: self.processFinished.emit() def train(self): """ Train the model """ self.transitionPotentials = {} try: iterCount = len(self.codes) * len(self.factors) self.rangeChanged.emit(self.tr("Training WoE... %p%"), iterCount) changeMap = self.changeMap.getBand(1) for code in self.codes: sites = binaryzation(changeMap, [code]) # Reclass factors (continuous factor -> ordinal factor) wMap = np.ma.zeros(changeMap.shape) # The map of summary weight of the all factors self.weights[code] = {} # Dictionary for storing wheights of every raster's band for k in xrange(len(self.factors)): fact = self.factors[k] self.weights[code][k] = {} # Weights of the factor factorW = self.weights[code][k] if self.bins: # Get bins of the factor bin = self.bins[k] if (bin != None) and fact.getBandsCount() != len(bin): raise WoeManagerError("Count of bins list for multiband factor is't equal to band count!") else: bin = None for i in range(1, fact.getBandsCount() + 1): band = fact.getBand(i) if bin and bin[i - 1]: # band = reclass(band, bin[i - 1]) band, sites = masks_identity(band, sites, dtype=np.uint8) # Combine masks of the rasters woeRes = woe( band, sites, self.unit_cell ) # WoE for the 'code' (initState->finalState) transition and current 'factor'. weights = woeRes["map"] wMap = wMap + weights factorW[i] = woeRes["weights"] self.updateProgress.emit() # Reclassification finished => set WoE coefficients self.woe[code] = wMap # WoE for all factors and the transition code. # Potentials are WoE map rescaled to 0--100 percents band = (sigmoid(wMap) * 100).astype(np.uint8) p = Raster() p.create([band], self.geodata) self.transitionPotentials[code] = p gc.collect() except MemoryError: self.errorReport.emit("The system out of memory during WoE trainig") raise except: self.errorReport.emit(self.tr("An unknown error occurs during WoE trainig")) raise finally: self.processFinished.emit() def weightsToText(self): """ Format self.weights as text report. """ if self.weights == {}: return u"" text = u"" for code in self.codes: (initClass, finalClass) = self.analyst.decode(code) text = text + self.tr("Transition %s -> %s\n" % (int(initClass), int(finalClass))) try: factorW = self.weights[code] for factNum, factDict in factorW.iteritems(): name = self.factors[factNum].getFileName() name = basename(name) text = text + self.tr("\t factor: %s \n" % (name,)) for bandNum, bandWeights in factDict.iteritems(): weights = ["%f" % (w,) for w in bandWeights] text = text + self.tr("\t\t Weights of band %s: %s \n" % (bandNum, ", ".join(weights))) except: text = text + self.tr("W for code % s (%s -> %s) causes error" % (code, initClass, finalClass)) raise return text
class MCE(QObject): logMessage = pyqtSignal(str) errorReport = pyqtSignal(str) randomConsistencyIndex = { 2: 0, 3: 0.58, 4: 0.90, 5: 1.12, 6: 1.24, 7: 1.32, 8: 1.41, 9: 1.45, 10: 1.49, 11: 1.51, 12: 1.48, 13: 1.56, 14: 1.57, 15: 1.59, 16: 1.60, 17: 1.61, 18: 1.62, 19: 1.63, 20: 1.63, 21: 1.64, 22: 1.65, 23: 1.65, 24: 1.66, 25: 1.66, 26: 1.67, 27: 1.67, 28: 1.67, 29: 1.68, 30: 1.68, 31: 1.68, 32: 1.69, 33: 1.69, 34: 1.69, 35: 1.69, 36: 1.70, 37: 1.70, 38: 1.70, 39: 1.70 } def __init__(self, factors, wMatr, initStateNum, finalStateNum, areaAnalyst): ''' Multicriteria evaluation based on Saaty method. It defines transition probability of two categories (initStateNum, finalStateNum). @param factors List of the factor rasters used for prediction. @param wMatr List of lists -- NxN comparison matrix. @param initStateNum Number of initial state (the state before transition). @param finalStateNum Number of final state (the state after transition). ''' QObject.__init__(self) self.factors = factors self.initStateNum = initStateNum self.finalStateNum = finalStateNum self.areaAnalyst = areaAnalyst # Check matrix dimension and factor count, apply normalization self.dim = 0 for f in factors: self.dim = self.dim + f.getBandsCount() f.normalize(mode = 'maxmin') if self.dim != len(wMatr): raise MCEError('Matrix size is different from the number of variables!') # Check if the matrix is valid for i in xrange(self.dim): if len(wMatr[i]) != self.dim: raise MCEError('The weight matrix is not NxN!') EPSILON = 0.000001 # A small number for i in xrange(self.dim): if wMatr[i][i] != 1: raise MCEError('w[i,i] not equal 1 !') for j in xrange(i+1, self.dim): if abs(wMatr[i][j] * wMatr[j][i] - 1) > EPSILON: raise MCEError('w[i,j] * w[j,i] not equal 1 !') self.wMatr = np.array(wMatr) self.weights = None # Weights of the factors, calculated using wMatr # It's a list, the length is self.dim # first element is the weight of first band of the first factor and so on: # [W_f1, ... weights of 1-st factors ... , W_f2, ... weights of 2-nd factors..., W_fn, ...] self.consistency =None # Consistency ratio of the comparison matrix. self.prediction = None # Raster of the prediction results self.confidence = None # Raster of the results confidence(1 = the maximum confidence, 0 = the least confidence) self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...} def getConsistency(self): if self.consistency == None: self.setWeights() return self.consistency def getConfidence(self): return self.confidence def getTransitionPotentials(self): return self.transitionPotentials def getPrediction(self, state, factors=None, calcTransitions=False): ''' Most of the models use factors for prediction, but MCE takes list of factors only once (during the initialization). ''' self._predict(state, calcTransitions) return self.prediction def getWeights(self): if self.weights == None: self.setWeights() return self.weights def _predict(self, state, calcTransitions=False): ''' Predict the changes. ''' try: geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] self.transitionPotentials = None # Reset tr.potentials if they exist # Get locations where self.initStateNum is occurs band = state.getBand(1) initStateMask = binaryzation(band, [self.initStateNum]) mask = band.mask # Calculate summary map of factors weights # Transition potentials: # current implementation: potential and confidence are equal (two-class implementation) # Confidence: # confidence is summary map of factors scaled to 0-100, if current state = self.initState # confidence is 0, if current state != self.initState # Prediction: # predicted value is a constant = areaAnalyst.encode(initStateNum, finalStateNum), if current state = self.initState # predicted value is the transition code current_state -> current_state, if current state != self.initState confidence = np.zeros((rows,cols), dtype=np.uint8) weights = self.getWeights() weightNum = 0 # Number of processed weights for f in self.factors: if not f.geoDataMatch(state): raise MCEError('Geometries of the state and factor rasters are different!') f.normalize(mode = 'maxmin') for i in xrange(f.getBandsCount()): band = f.getBand(i+1) confidence = confidence + (band*weights[weightNum]*100).astype(np.uint8) mask = np.ma.mask_or(mask, band.mask) weightNum = weightNum + 1 confidence = confidence*initStateMask prediction = np.copy(state.getBand(1)) for code in self.areaAnalyst.categories: if code != self.initStateNum: prediction[prediction==code] = self.areaAnalyst.encode(code, code) else: prediction[prediction==code] = self.areaAnalyst.encode(self.initStateNum, self.finalStateNum) predicted_band = np.ma.array(data=prediction, mask=mask, dtype=np.uint8) self.prediction = Raster() self.prediction.create([predicted_band], geodata) confidence_band = np.ma.array(data=confidence, mask=mask, dtype=np.uint8) self.confidence = Raster() self.confidence.create([confidence_band], geodata) code = self.areaAnalyst.encode(self.initStateNum, self.finalStateNum) self.transitionPotentials = {code: self.confidence} except MemoryError: self.errorReport.emit(self.tr("The system out of memory during MCE prediction")) raise except: self.errorReport.emit(self.tr("An unknown error occurs during MCE prediction")) raise def setWeights(self): ''' Calculate the weigths and consistency ratio. ''' # Weights w, v = np.linalg.eig(self.wMatr) maxW = np.max(w) maxInd = list(w).index(maxW) # Index of the biggest eigenvalue maxW = maxW.real v = v[:,maxInd] # The eigen vector self.weights = [x.real for x in v] # Maxtix v can be complex self.weights = self.weights/sum(self.weights) # Consistency ratio if self.dim > 2: ci = (maxW - self.dim)/(self.dim - 1) try: ri = self.randomConsistencyIndex[self.dim] self.consistency = ci/ri except KeyError: self.consistency = -1 else: self.consistency = 0
def __sim(self): ''' 1 iteracion of simulation. ''' transition = self.crosstable.getCrosstable() self.updatePrediction(self.state) changes = self.getPrediction().getBand(1) # Predicted change map changes = changes + 1 # Filling nodata as 0 can be ambiguous: changes = np.ma.filled( changes, 0) # (cat_code can be 0, to do not mix it with no-data, add 1) state = self.getState() new_state = state.getBand(1).copy().astype( np.uint8 ) # New states (the result of simulation) will be stored there. self.rangeChanged.emit(self.tr("Area Change Analysis %p%"), 2) self.updateProgress.emit() QCoreApplication.processEvents() analyst = AreaAnalyst(state, second=None) self.updateProgress.emit() QCoreApplication.processEvents() categories = state.getBandGradation(1) # Make transition between categories according to # number of moved pixel in crosstable self.rangeChanged.emit(self.tr("Simulation process %p%"), len(categories)**2 - len(categories)) QCoreApplication.processEvents() for initClass in categories: for finalClass in categories: if initClass == finalClass: continue # TODO: Calculate number of pixels to be moved via TransitionMatrix and state raster n = transition.getTransition( initClass, finalClass) # Number of pixels that have to be # changed the categories # (use TransitoionMatrix only). if n == 0: continue # Find n appropriate places for transition initClass -> finalClass cat_code = analyst.encode(initClass, finalClass) # Array of places where transitions initClass -> finalClass are occured places = ( changes == cat_code + 1 ) # cat_code can be 0, do not mix it with no-data in 'changes' variable placesCount = np.sum(places) # print "cat_code, placesCount, n", cat_code, placesCount if placesCount < n: self.logMessage.emit( self. tr("There are more transitions in the transition matrix, then the model have found" )) # print "There are more transitions in the transition matrix, then the model have found" # print "cat_code, placesCount, n", cat_code, placesCount, n QCoreApplication.processEvents() n = placesCount if n > 0: confidence = self.getConfidence().getBand(1) # Add some random value rnd = np.random.sample( size=confidence.shape) / 1000 # A small random confidence = np.ma.filled(confidence, 0) + rnd confidence = confidence * places # The higher is number in cell, the higer is probability of transition in the cell. # Ensure, n is bigger then nonzero confidence placesCount = np.sum(confidence > 0) if placesCount < n: # Some confidence where transitions has to be appear is zero. The transition count will be cropped. # print "Some confidence is zero. cat_code, nonzeroConf, wantedPixels", cat_code, placesCount, n n = placesCount ind = confidence.argsort(axis=None)[-n:] indices = [ np.unravel_index(i, confidence.shape) for i in ind ] # Now "indices" contains indices of the appropriate places, # make transition initClass -> finalClass r1 = np.zeros(confidence.shape) for index in indices: new_state[index] = finalClass self.updateProgress.emit() QCoreApplication.processEvents() result = Raster() result.create([new_state], state.getGeodata()) self.state = result
class MlpManager(QObject): '''This class gets the data extracted from the UI and pass it to multi-layer perceptron, then gets and stores the result. ''' updateGraph = pyqtSignal(float, float) # Train error, val. error updateMinValErr = pyqtSignal(float) # Min validation error updateDeltaRMS = pyqtSignal( float) # Delta of RMS: min(valError) - currentValError updateKappa = pyqtSignal(float) # Kappa value processFinished = pyqtSignal() processInterrupted = pyqtSignal() logMessage = pyqtSignal(str) errorReport = pyqtSignal(str) rangeChanged = pyqtSignal(str, int) updateProgress = pyqtSignal() def __init__(self, ns=0, MLP=None): QObject.__init__(self) self.MLP = MLP self.interrupted = False self.layers = None if self.MLP: self.layers = self.getMlpTopology() self.ns = ns # Neighbourhood size of training rasters. self.data = None # Training data self.catlist = None # List of unique output values of the output raster self.train_error = None # Error on training set self.val_error = None # Error on validation set self.minValError = None # The minimum error that is achieved on the validation set self.valKappa = 0 # Kappa on on the validation set self.sampler = None # Sampler # Results of the MLP prediction self.prediction = None # Raster of the MLP prediction results self.confidence = None # Raster of the MLP results confidence (1 = the maximum confidence, 0 = the least confidence) self.transitionPotentials = None # Dictionary of transition potencial maps: {category1: map1, category2: map2, ...} # Outputs of the activation function for small and big numbers self.sigmax, self.sigmin = sigmoid(100), sigmoid( -100) # Max and Min of the sigmoid function self.sigrange = self.sigmax - self.sigmin # Range of the sigmoid def computeMlpError(self, sample): '''Get MLP error on the sample''' input = np.hstack((sample['state'], sample['factors'])) out = self.getOutput(input) err = ((sample['output'] - out)**2).sum() / len(out) return err def computePerformance(self, train_indexes, val_ind): '''Check errors of training and validation sets @param train_indexes Tuple that contains indexes of the first and last elements of the training set. @param val_ind Tuple that contains indexes of the first and last elements of the validation set. ''' train_error = 0 train_sampl = train_indexes[1] - train_indexes[ 0] # Count of training samples for i in range(train_indexes[0], train_indexes[1]): train_error = train_error + self.computeMlpError( sample=self.data[i]) self.setTrainError(train_error / train_sampl) if val_ind: val_error = 0 val_sampl = val_ind[1] - val_ind[0] answers = np.ma.zeros(val_sampl) out = np.ma.zeros(val_sampl) for i in xrange(val_ind[0], val_ind[1]): sample = self.data[i] val_error = val_error + self.computeMlpError( sample=self.data[i]) input = np.hstack((sample['state'], sample['factors'])) output = self.getOutput(input) out[i - val_ind[0]] = self.outCategory(output) answers[i - val_ind[0]] = self.outCategory(sample['output']) self.setValError(val_error / val_sampl) depCoef = DependenceCoef(out, answers, expand=True) self.valKappa = depCoef.kappa(mode=None) def copyWeights(self): '''Deep copy of the MLP weights''' return copy.deepcopy(self.MLP.weights) def createMlp(self, state, factors, output, hidden_layers): ''' @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). @param hidden_layers List of neuron counts in hidden layers. @param ns Neighbourhood size. ''' if output.getBandsCount() != 1: raise MlpManagerError('Output layer must have one band!') input_neurons = 0 for raster in factors: input_neurons = input_neurons + raster.getNeighbourhoodSize( self.ns) # state raster contains categories. We need use n-1 dummy variables (where n = number of categories) input_neurons = input_neurons + (len(state.getBandGradation(1)) - 1) * state.getNeighbourhoodSize( self.ns) # Output category's (neuron) list and count self.catlist = output.getBandGradation(1) categories = len(self.catlist) # set neuron counts in the MLP layers self.layers = hidden_layers self.layers.insert(0, input_neurons) self.layers.append(categories) self.MLP = MLP(*self.layers) def getConfidence(self): return self.confidence def getInputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[0] def getOutput(self, input_vector): out = self.MLP.propagate_forward(input_vector) return out def getOutputVectLen(self): '''Length of input data vector of the MLP''' shape = self.getMlpTopology() return shape[-1] def getOutputVector(self, val): '''Convert a number val into vector, for example, let self.catlist = [1, 3, 4] then if val = 1, result = [ 1, -1, -1] if val = 3, result = [-1, 1, -1] if val = 4, result = [-1, -1, 1] where -1 is minimum of the sigmoid, 1 is max of the sigmoid ''' size = self.getOutputVectLen() res = np.ones(size) * (self.sigmin) ind = np.where(self.catlist == val) res[ind] = self.sigmax return res def getMinValError(self): return self.minValError def getMlpTopology(self): return self.MLP.shape def getKappa(self): return self.valKappa def getPrediction(self, state, factors, calcTransitions=False): self._predict(state, factors, calcTransitions) return self.prediction def getTrainError(self): return self.train_error def getTransitionPotentials(self): return self.transitionPotentials def getValError(self): return self.val_error def outCategory(self, out_vector): # Get index of the biggest output value as the result biggest = max(out_vector) res = list(out_vector).index(biggest) res = self.catlist[res] return res def outputConfidence(self, output, scale=True): ''' Return confidence (difference between 2 biggest values) of the MLP output. @param output: The confidence @param scale: If True, then scale the confidence to int [0, 1, ..., 100] percent ''' out_scl = self.scaleOutput(output, percent=scale) out_scl.sort() return out_scl[-1] - out_scl[-2] def outputTransitions(self, output, scale=True): ''' Return transition potencial of the outputs scaled to [0,1] or 1-100 @param output: The output of MLP @param scale: If True, then scale the transitions to int ([0, 1, ..., 100]) percent ''' out_scl = self.scaleOutput(output, percent=scale) result = {} for r, v in enumerate(out_scl): cat = self.catlist[r] result[cat] = v return result def scaleOutput(self, output, percent=True): ''' Scale the output to range [0,1] or 1-100 @param output: Output of a MLP @param percent: If True, then scale the output to int [0, 1, ..., 100] percent ''' res = 1.0 * (output - self.sigmin) / self.sigrange if percent: res = [int(100 * x) for x in res] return res def _predict(self, state, factors, calcTransitions=False): ''' Calculate output and confidence rasters using MLP model and input rasters @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). ''' try: self.rangeChanged.emit(self.tr("Initialize model %p%"), 1) geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] for r in factors: if not state.geoDataMatch(r): raise MlpManagerError( 'Geometries of the input rasters are different!') self.transitionPotentials = None # Reset tr.potentials if they exist # Normalize factors before prediction: for f in factors: f.normalize(mode='mean') predicted_band = np.zeros([rows, cols], dtype=np.uint8) confidence_band = np.zeros([rows, cols], dtype=np.uint8) if calcTransitions: self.transitionPotentials = {} for cat in self.catlist: self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8) self.sampler = Sampler(state, factors, ns=self.ns) mask = state.getBand(1).mask.copy() if mask.shape == (): mask = np.zeros([rows, cols], dtype=np.bool) self.updateProgress.emit() self.rangeChanged.emit(self.tr("Prediction %p%"), rows) for i in xrange(rows): for j in xrange(cols): if not mask[i, j]: input = self.sampler.get_inputs(state, i, j) if input != None: out = self.getOutput(input) res = self.outCategory(out) predicted_band[i, j] = res confidence = self.outputConfidence(out) confidence_band[i, j] = confidence if calcTransitions: potentials = self.outputTransitions(out) for cat in self.catlist: map = self.transitionPotentials[cat] map[i, j] = potentials[cat] else: # Input sample is incomplete => mask this pixel mask[i, j] = True self.updateProgress.emit() predicted_bands = [ np.ma.array(data=predicted_band, mask=mask, dtype=np.uint8) ] confidence_bands = [ np.ma.array(data=confidence_band, mask=mask, dtype=np.uint8) ] self.prediction = Raster() self.prediction.create(predicted_bands, geodata) self.confidence = Raster() self.confidence.create(confidence_bands, geodata) if calcTransitions: for cat in self.catlist: band = [ np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8) ] self.transitionPotentials[cat] = Raster() self.transitionPotentials[cat].create(band, geodata) except MemoryError: self.errorReport.emit( self.tr("The system out of memory during ANN prediction")) raise except: self.errorReport.emit( self.tr("An unknown error occurs during ANN prediction")) raise def readMlp(self): pass def resetErrors(self): self.val_error = np.finfo(np.float).max self.train_error = np.finfo(np.float).max def resetMlp(self): self.MLP.reset() self.resetErrors() def saveMlp(self): pass def saveSamples(self, fileName): self.sampler.saveSamples(fileName) def setMlpWeights(self, w): '''Set weights of the MLP''' self.MLP.weights = w def setTrainingData(self, state, factors, output, shuffle=True, mode='All', samples=None): ''' @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). @param output Raster that contains categories to predict. @param shuffle Perform random shuffle. @param mode Type of sampling method: All Get all pixels Random Get samples. Count of samples in the data=samples. Stratified Undersampling of major categories and/or oversampling of minor categories. @samples Sample count of the training data (doesn't used in 'All' mode). ''' if not self.MLP: raise MlpManagerError('You must create a MLP before!') # Normalize factors before sampling: for f in factors: f.normalize(mode='mean') self.sampler = Sampler(state, factors, output, self.ns) self.sampler.setTrainingData(state=state, output=output, shuffle=shuffle, mode=mode, samples=samples) outputVecLen = self.getOutputVectLen() stateVecLen = self.sampler.stateVecLen factorVectLen = self.sampler.factorVectLen size = len(self.sampler.data) self.data = np.zeros(size, dtype=[('coords', float, 2), ('state', float, stateVecLen), ('factors', float, factorVectLen), ('output', float, outputVecLen)]) self.data['coords'] = self.sampler.data['coords'] self.data['state'] = self.sampler.data['state'] self.data['factors'] = self.sampler.data['factors'] self.data['output'] = [ self.getOutputVector(sample['output']) for sample in self.sampler.data ] def setTrainError(self, error): self.train_error = error def setValError(self, error): self.val_error = error def setEpochs(self, epochs): self.epochs = epochs def setValPercent(self, value=20): self.valPercent = value def setLRate(self, value=0.1): self.lrate = value def setMomentum(self, value=0.01): self.momentum = value def setContinueTrain(self, value=False): self.continueTrain = value def startTrain(self): self.train(self.epochs, self.valPercent, self.lrate, self.momentum, self.continueTrain) def stopTrain(self): self.interrupted = True def train(self, epochs, valPercent=20, lrate=0.1, momentum=0.01, continue_train=False): '''Perform the training procedure on the MLP and save the best neural net @param epoch Max iteration count. @param valPercent Percent of the validation set. @param lrate Learning rate. @param momentum Learning momentum. @param continue_train If False then it is new training cycle, reset weights training and validation error. If True, then continue training. ''' try: samples_count = len(self.data) val_sampl_count = samples_count * valPercent / 100 apply_validation = True if val_sampl_count > 0 else False # Use or not use validation set train_sampl_count = samples_count - val_sampl_count # Set first train_sampl_count as training set, the other as validation set train_indexes = (0, train_sampl_count) val_indexes = (train_sampl_count, samples_count) if apply_validation else None if not continue_train: self.resetMlp() self.minValError = self.getValError( ) # The minimum error that is achieved on the validation set last_train_err = self.getTrainError() best_weights = self.copyWeights( ) # The MLP weights when minimum error that is achieved on the validation set self.rangeChanged.emit(self.tr("Train model %p%"), epochs) for epoch in range(epochs): self.trainEpoch(train_indexes, lrate, momentum) self.computePerformance(train_indexes, val_indexes) self.updateGraph.emit(self.getTrainError(), self.getValError()) self.updateDeltaRMS.emit(self.getMinValError() - self.getValError()) self.updateKappa.emit(self.getKappa()) QCoreApplication.processEvents() if self.interrupted: self.processInterrupted.emit() break last_train_err = self.getTrainError() self.setTrainError(last_train_err) if apply_validation and (self.getValError() < self.getMinValError()): self.minValError = self.getValError() best_weights = self.copyWeights() self.updateMinValErr.emit(self.getMinValError()) self.updateProgress.emit() self.setMlpWeights(best_weights) except MemoryError: self.errorReport.emit( self.tr("The system out of memory during ANN training")) raise except: self.errorReport.emit( self.tr("An unknown error occurs during ANN trainig")) raise finally: self.processFinished.emit() def trainEpoch(self, train_indexes, lrate=0.1, momentum=0.01): '''Perform a training epoch on the MLP @param train_ind Tuple of the min&max indexes of training samples in the samples data. @param val_ind Tuple of the min&max indexes of validation samples in the samples data. @param lrate Learning rate. @param momentum Learning momentum. ''' train_sampl = train_indexes[1] - train_indexes[0] for i in range(train_sampl): n = np.random.randint(*train_indexes) sample = self.data[n] input = np.hstack((sample['state'], sample['factors'])) self.getOutput(input) # Forward propagation self.MLP.propagate_backward(sample['output'], lrate, momentum)