Exemplo n.º 1
0
"""

##############################################################################
# Import librairies
# -------------------------------------------

from museotoolbox.cross_validation import SpatialLeaveAsideOut
from museotoolbox import datasets, raster_tools, vector_tools

##############################################################################
# Load HistoricalMap dataset
# -------------------------------------------

raster, vector = datasets.historicalMap(low_res=True)
field = 'Class'
X, y = raster_tools.getSamplesFromROI(raster, vector, field)
distanceMatrix = vector_tools.getDistanceMatrix(raster, vector)

##############################################################################
# Create CV
# -------------------------------------------
# n_splits will be the number  of the least populated class

SLOPO = SpatialLeaveAsideOut(valid_size=1 / 3,
                             n_splits=2,
                             distanceMatrix=distanceMatrix,
                             random_state=2)

print(SLOPO.get_n_splits(X, y))

###############################################################################
##############################################################################
# Import librairies
# -------------------------------------------

from museotoolbox.cross_validation import LeaveOneSubGroupOut
from museotoolbox.raster_tools import getSamplesFromROI
from museotoolbox import datasets

##############################################################################
# Load HistoricalMap dataset
# -------------------------------------------

raster,vector = datasets.historicalMap()
field = 'Class'
group = 'uniquefid'
X,y,s = getSamplesFromROI(raster,vector,field,group)

##############################################################################
# Create CV
# -------------------------------------------
# if n_splits is False (default), the number of splits will be the smallest
# number of subgroup of all labels.

valid_size = 0.5 # Means 50%
LOSGO = LeaveOneSubGroupOut(verbose=False,random_state=12) # 

###############################################################################
# .. note::
#    Split is made to generate each fold

LOSGO.get_n_splits(X,y,s)
from museotoolbox.cross_validation import SpatialLeaveOneSubGroupOut
from museotoolbox import datasets, raster_tools, vector_tools
##############################################################################
# Load HistoricalMap dataset
# -------------------------------------------

raster, vector, centroid = datasets.historicalMap(low_res=True, centroid=True)

field = 'Class'

##############################################################################
# Extract label ('Class' field) and groups ('uniquefid' field)
# Compute distanceMatrix with centroid (one point per group)

X, y, groups = raster_tools.getSamplesFromROI(raster, vector, field,
                                              'uniquefid')
distanceMatrix, distanceLabel = vector_tools.getDistanceMatrix(
    raster, centroid, 'uniquefid')

##############################################################################
# Create CV
# -------------------------------------------
# n_splits will be the number  of the least populated class

SLOSGO = SpatialLeaveOneSubGroupOut(distanceThresold=100,
                                    distanceMatrix=distanceMatrix,
                                    distanceLabel=distanceLabel,
                                    random_state=12)

###############################################################################
# .. note::
Exemplo n.º 4
0
field = 'Class'
group = 'uniquefid'

##############################################################################
# Create CV
# -------------------------------------------
valid_size = 0.5  # Means 50%
LPSGO = LeavePSubGroupOut(valid_size=valid_size,
                          random_state=12,
                          verbose=False)

###############################################################################
# Extract X,y and group.
# -------------------------------------------

X, y, g = raster_tools.getSamplesFromROI(raster, vector, field, group)

###############################################################################
# .. note::
#    Split is made to generate each fold

for tr, vl in LPSGO.split(X, y, g):
    print(tr.shape, vl.shape)

print('y label with number of samples')
print(np.unique(y[tr], return_counts=True))
##############################################################################
# Differences with scikit-learn
# -------------------------------------------
from sklearn.model_selection import LeavePGroupsOut
# You need to specify the number of groups
Exemplo n.º 5
0
# sklearn will compute different metrics, but will keep best results from kappa (refit='kappa')
LAP = learnAndPredict(n_jobs=1,verbose=1)

##############################################################################
# Create or use custom function

def reduceBands(X,bandToKeep=[0,2]):
    # this function get the first and the last band
    X=X[:,bandToKeep].reshape(-1,len(bandToKeep))
    return X

# add this function to learnAndPredict class
LAP.customizeX(reduceBands)

# if you learn from vector, refit according to the f1_mean
X,y = getSamplesFromROI(raster,vector,field)
LAP.learnFromVector(X,y,cv=2,classifier=classifier,param_grid=dict(n_estimators=[10]),
                    scoring=scoring,refit='f1_mean')

# if you learn from raster
LAP.learnFromRaster(raster,vector,field,cv=2,classifier=classifier,param_grid=dict(n_estimators=[10]),
                    scoring=scoring,refit='f1_mean')


##############################################################################
# Read the model
# -------------------
print(LAP.model)
print(LAP.model.cv_results_)
print(LAP.model.best_score_)