def test_get_state(self): smp = Sampler(self.state, self.factors, self.output, ns=0) assert_array_equal(smp.get_state(self.state, 1, 1), [0, 0]) assert_array_equal(smp.get_state(self.state, 0, 0), [0, 1]) smp = Sampler(self.state, self.factors, self.output, ns=1) res = [ 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, ] assert_array_equal(smp.get_state(self.state, 1, 1), res) inputRast = Raster('../../examples/sites.tif') inputRast.resetMask([0]) smp = Sampler(inputRast, self.factors, self.output, ns=0) assert_array_equal(smp.get_state(self.state, 1, 1), [0]) assert_array_equal(smp.get_state(self.state, 0, 0), [1])
def test_cat2vect(self): smp = Sampler(self.state, self.factors, self.output, ns=0) assert_array_equal(smp.cat2vect(0), [1, 0]) assert_array_equal(smp.cat2vect(1), [0, 1]) assert_array_equal(smp.cat2vect(2), [0, 0]) inputRast = Raster('../../examples/sites.tif') inputRast.resetMask([0]) smp = Sampler(inputRast, self.factors, self.output, ns=0) assert_array_equal(smp.cat2vect(1), [1]) assert_array_equal(smp.cat2vect(2), [0])
def setTrainingData(self): state, factors, output, mode, samples = self.state, self.factors, self.output, self.mode, self.samples if not self.logreg: raise LRError( 'You must create a Logistic Regression model before!') # Normalize factors before sampling: for f in factors: f.normalize(mode='mean') self.sampler = Sampler(state, factors, output, ns=self.ns) self.__propagateSamplerSignals() self.sampler.setTrainingData(state, output, shuffle=False, mode=mode, samples=samples) outputVecLen = self.sampler.outputVecLen stateVecLen = self.sampler.stateVecLen factorVectLen = self.sampler.factorVectLen size = len(self.sampler.data) self.data = self.sampler.data self.catlist = np.unique(self.data['output'])
def setTrainingData(self, state, factors, output, shuffle=True, mode='All', samples=None): ''' @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). @param output Raster that contains categories to predict. @param shuffle Perform random shuffle. @param mode Type of sampling method: All Get all pixels Random Get samples. Count of samples in the data=samples. Stratified Undersampling of major categories and/or oversampling of minor categories. @samples Sample count of the training data (doesn't used in 'All' mode). ''' if not self.MLP: raise MlpManagerError('You must create a MLP before!') # Normalize factors before sampling: for f in factors: f.normalize(mode='mean') self.sampler = Sampler(state, factors, output, self.ns) self.sampler.setTrainingData(state=state, output=output, shuffle=shuffle, mode=mode, samples=samples) outputVecLen = self.getOutputVectLen() stateVecLen = self.sampler.stateVecLen factorVectLen = self.sampler.factorVectLen size = len(self.sampler.data) self.data = np.zeros(size, dtype=[('coords', float, 2), ('state', float, stateVecLen), ('factors', float, factorVectLen), ('output', float, outputVecLen)]) self.data['coords'] = self.sampler.data['coords'] self.data['state'] = self.sampler.data['state'] self.data['factors'] = self.sampler.data['factors'] self.data['output'] = [ self.getOutputVector(sample['output']) for sample in self.sampler.data ]
def test_setTrainingData(self): smp = Sampler(self.state, self.factors, self.output, ns=0) smp.setTrainingData(self.state, self.output, shuffle=False) data = np.array([ ([0, 3], [0, 1], 1.0, 1.0), ([1, 3], [0, 0], 1.0, 2.0), ([2, 3], [0, 1], 3.0, 1.0), ([0, 2], [0, 1], 3.0, 1.0), ([1, 2], [0, 0], 2.0, 2.0), ([2, 2], [0, 1], 1.0, 1.0), ([0, 1], [1, 0], 0.0, 0.0), ([1, 1], [0, 1], 3.0, 1.0), ([2, 1], [0, 0], 1.0, 2.0), ], dtype=[('coords', float, 2), ('state', float, (2, )), ('factors', float, (1, )), ('output', float, 1)]) for i in range(len(data)): assert_array_equal(data[i]['coords'], smp.data[i]['coords']) assert_array_equal(data[i]['factors'], smp.data[i]['factors']) assert_array_equal(data[i]['output'], smp.data[i]['output']) assert_array_equal(data[i]['state'], smp.data[i]['state']) # two factor_rasters smp = Sampler(self.state, self.factors2, self.output, ns=1) smp.setTrainingData(self.state, self.output) # Check all except coords data = np.array( [( #State categories: [1,2,1, 1,2,1, 0,1,2], [0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0], # Factors: [ 1., 1., 3., 3., 2., 1., 0., 3., 1., 1., 1., 3., 3., 2., 1., 0., 3., 1. ], # Output: 2.0)], dtype=[('state', float, (18, )), ('factors', float, (18, )), ('output', float, 1)]) assert_array_equal(data[0]['factors'], smp.data[0]['factors']) assert_array_equal(data[0]['output'], smp.data[0]['output']) assert_array_equal(data[0]['state'], smp.data[0]['state']) # Multiband factors smp = Sampler(self.state, self.factors3, self.output, ns=1) smp.setTrainingData(self.state, self.output) # Check all except coords data = np.array( [([0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0], [ 1., 2., 1., 1., 2., 1., 0., 1., 2., 1., 1., 3., 3., 2., 1., 0., 3., 1. ], 2.0)], dtype=[('state', float, (18, )), ('factors', float, (18, )), ('output', float, 1)]) assert_array_equal(data[0]['factors'], smp.data[0]['factors']) assert_array_equal(data[0]['output'], smp.data[0]['output']) assert_array_equal(data[0]['state'], smp.data[0]['state']) # Several factor bands, several factor rasters smp = Sampler(self.state, self.factors4, self.output, ns=1) smp.setTrainingData(self.state, self.output) # Check all except coords data = np.array([ ([0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0], [ 1., 2., 1., 1., 2., 1., 0., 1., 2., 1., 1., 3., 3., 2., 1., 0., 3., 1., 1., 1., 3., 3., 2., 1., 0., 3., 1. ], 2.0) ], dtype=[('state', float, (18, )), ('factors', float, (27, )), ('output', float, 1)]) assert_array_equal(data[0]['factors'], smp.data[0]['factors']) assert_array_equal(data[0]['output'], smp.data[0]['output']) assert_array_equal(data[0]['state'], smp.data[0]['state']) # Mode = Random # As the Multiband factors example, but 10 samples: data = np.array( [([0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0], [ 1., 2., 1., 1., 2., 1., 0., 1., 2., 1., 1., 3., 3., 2., 1., 0., 3., 1. ], 2.0)], dtype=[('state', float, (18, )), ('factors', float, (18, )), ('output', float, 1)]) smp = Sampler(self.state, self.factors3, self.output, ns=1) smp.setTrainingData(self.state, self.output, mode='Random', samples=10) for i in range(10): assert_array_equal(data[0]['factors'], smp.data[i]['factors']) assert_array_equal(data[0]['output'], smp.data[i]['output']) assert_array_equal(data[0]['state'], smp.data[i]['state']) # Mode = Stratified smp = Sampler(self.state, self.factors, self.output, ns=0) smp.setTrainingData(self.state, self.output, mode='Stratified', samples=15) out = smp.data['output'] out.sort() self.assertEqual(out[0], 0) self.assertEqual(out[4], 0) self.assertEqual(out[5], 1) self.assertEqual(out[9], 1) self.assertEqual(out[10], 2)
def _predict(self, state, factors, calcTransitions=False): ''' Calculate output and confidence rasters using LR model and input rasters @param state Raster of the current state (categories) values. @param factors List of the factor rasters (predicting variables). ''' try: self.rangeChanged.emit(self.tr("Initialize model %p%"), 1) geodata = state.getGeodata() rows, cols = geodata['ySize'], geodata['xSize'] for r in factors: if not state.geoDataMatch(r): raise LRError( 'Geometries of the input rasters are different!') self.transitionPotentials = None # Reset tr.potentials if they exist # Normalize factors before prediction: for f in factors: f.normalize(mode='mean') predicted_band = np.zeros([rows, cols], dtype=np.uint8) confidence_band = np.zeros([rows, cols], dtype=np.uint8) if calcTransitions: self.transitionPotentials = {} for cat in self.catlist: self.transitionPotentials[cat] = np.zeros([rows, cols], dtype=np.uint8) self.sampler = Sampler(state, factors, ns=self.ns) mask = state.getBand(1).mask.copy() if mask.shape == (): mask = np.zeros([rows, cols], dtype=np.bool) self.updateProgress.emit() self.rangeChanged.emit(self.tr("Prediction %p%"), rows) for i in xrange(rows): for j in xrange(cols): if not mask[i, j]: input = self.sampler.get_inputs(state, i, j) if input != None: input = np.array([input]) out = self.logreg.predict(input) predicted_band[i, j] = out confidence = self._outputConfidence(input) confidence_band[i, j] = confidence if calcTransitions: potentials = self.outputTransitions(input) for cat in self.catlist: map = self.transitionPotentials[cat] map[i, j] = potentials[cat] else: # Input sample is incomplete => mask this pixel mask[i, j] = True self.updateProgress.emit() predicted_bands = [ np.ma.array(data=predicted_band, mask=mask, dtype=np.uint8) ] confidence_bands = [ np.ma.array(data=confidence_band, mask=mask, dtype=np.uint8) ] self.prediction = Raster() self.prediction.create(predicted_bands, geodata) self.confidence = Raster() self.confidence.create(confidence_bands, geodata) if calcTransitions: for cat in self.catlist: band = [ np.ma.array(data=self.transitionPotentials[cat], mask=mask, dtype=np.uint8) ] self.transitionPotentials[cat] = Raster() self.transitionPotentials[cat].create(band, geodata) except MemoryError: self.errorReport.emit( self.tr("The system out of memory during LR prediction")) raise except: self.errorReport.emit( self.tr("An unknown error occurs during LR prediction")) raise finally: self.processFinished.emit()