Exemplo n.º 1
0
def test_construct_grids():
    batch = fetch_species_distributions(data_home=None,
                                        download_if_missing=True)
    keep = construct_grids(batch)

    xmin = batch.x_left_lower_corner + batch.grid_size
    xmax = xmin + (batch.Nx * batch.grid_size)

    ymin = batch.y_left_lower_corner + batch.grid_size
    ymax = ymin + (batch.Ny * batch.grid_size)

    xgrid = np.arange(xmin, xmax, batch.grid_size)
    ygrid = np.arange(ymin, ymax, batch.grid_size)

    assert_array_equal(keep[0], xgrid)
    assert_array_equal(keep[1], ygrid)
Exemplo n.º 2
0
latlon = np.vstack([data.train['dd lat'], data.train['dd long']]).T
species = np.array(
    [d.decode('ascii').startswith('micro') for d in data.train['species']],
    dtype='int')

import os
import conda

conda_file_dir = conda.__file__
conda_dir = conda_file_dir.split('lib')[0]
proj_lib = os.path.join(os.path.join(conda_dir, 'share'), 'proj')
os.environ["PROJ_LIB"] = proj_lib

from mpl_toolkits.basemap import Basemap
from sklearn.datasets.species_distributions import construct_grids
xgrid, ygrid = construct_grids(data)
# plot coastlines with Basemap
m = Basemap(projection='cyl',
            resolution='c',
            llcrnrlat=ygrid.min(),
            urcrnrlat=ygrid.max(),
            llcrnrlon=xgrid.min(),
            urcrnrlon=xgrid.max())
m.drawmapboundary(fill_color='#DDEEFF')
m.fillcontinents(color='#FFEEDD')
m.drawcoastlines(color='gray', zorder=2)
m.drawcountries(color='gray', zorder=2)
# plot locations
m.scatter(latlon[:, 1],
          latlon[:, 0],
          zorder=3,
def plot_species_distribution(species=("bradypus_variegatus_0",
                                       "microryzomys_minutus_0")):
    """
    Plot the species distribution.
    """
    if len(species) > 2:
        print("Note: when more than two species are provided,"
              " only the first two will be used")

    t0 = time()

    # Load the compressed data
    data = fetch_species_distributions()

    # Set up the data grid
    xgrid, ygrid = construct_grids(data)

    # The grid in x,y coordinates
    X, Y = np.meshgrid(xgrid, ygrid[::-1])

    # create a bunch for each species
    BV_bunch = create_species_bunch(species[0], data.train, data.test,
                                    data.coverages, xgrid, ygrid)
    MM_bunch = create_species_bunch(species[1], data.train, data.test,
                                    data.coverages, xgrid, ygrid)

    # background points (grid coordinates) for evaluation
    np.random.seed(13)
    background_points = np.c_[
        np.random.randint(low=0, high=data.Ny, size=10000),
        np.random.randint(low=0, high=data.Nx, size=10000)].T

    # We'll make use of the fact that coverages[6] has measurements at all
    # land points.  This will help us decide between land and water.
    land_reference = data.coverages[6]

    # Fit, predict, and plot for each species.
    for i, species in enumerate([BV_bunch, MM_bunch]):
        print("_" * 80)
        print("Modeling distribution of species '%s'" % species.name)

        # Standardize features
        mean = species.cov_train.mean(axis=0)
        std = species.cov_train.std(axis=0)
        train_cover_std = (species.cov_train - mean) / std

        # Fit OneClassSVM
        print(" - fit OneClassSVM ... ", end='')
        clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.5)
        clf.fit(train_cover_std)
        print("done.")

        # Plot map of South America
        plt.subplot(1, 2, i + 1)
        if basemap:
            print(" - plot coastlines using basemap")
            m = Basemap(projection='cyl',
                        llcrnrlat=Y.min(),
                        urcrnrlat=Y.max(),
                        llcrnrlon=X.min(),
                        urcrnrlon=X.max(),
                        resolution='c')
            m.drawcoastlines()
            m.drawcountries()
        else:
            print(" - plot coastlines from coverage")
            plt.contour(X,
                        Y,
                        land_reference,
                        levels=[-9998],
                        colors="k",
                        linestyles="solid")
            plt.xticks([])
            plt.yticks([])

        print(" - predict species distribution")

        # Predict species distribution using the training data
        Z = np.ones((data.Ny, data.Nx), dtype=np.float64)

        # We'll predict only for the land points.
        idx = np.where(land_reference > -9999)
        coverages_land = data.coverages[:, idx[0], idx[1]].T

        pred = clf.decision_function((coverages_land - mean) / std)
        Z *= pred.min()
        Z[idx[0], idx[1]] = pred

        levels = np.linspace(Z.min(), Z.max(), 25)
        Z[land_reference == -9999] = -9999

        # plot contours of the prediction
        plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)
        plt.colorbar(format='%.2f')

        # scatter training/testing points
        plt.scatter(species.pts_train['dd long'],
                    species.pts_train['dd lat'],
                    s=2**2,
                    c='black',
                    marker='^',
                    label='train')
        plt.scatter(species.pts_test['dd long'],
                    species.pts_test['dd lat'],
                    s=2**2,
                    c='black',
                    marker='x',
                    label='test')
        plt.legend()
        plt.title(species.name)
        plt.axis('equal')

        # Compute AUC with regards to background points
        pred_background = Z[background_points[0], background_points[1]]
        pred_test = clf.decision_function((species.cov_test - mean) / std)
        scores = np.r_[pred_test, pred_background]
        y = np.r_[np.ones(pred_test.shape), np.zeros(pred_background.shape)]
        fpr, tpr, thresholds = metrics.roc_curve(y, scores)
        roc_auc = metrics.auc(fpr, tpr)
        plt.text(-35, -70, "AUC: %.3f" % roc_auc, ha="right")
        print("\n Area under the ROC curve : %f" % roc_auc)

    print("\ntime elapsed: %.2fs" % (time() - t0))
Exemplo n.º 4
0
def test2():
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn.datasets import fetch_species_distributions
    from sklearn.datasets.species_distributions import construct_grids
    from sklearn.neighbors import KernelDensity

    # if basemap is available, we'll use it.
    # otherwise, we'll improvise later...
    try:
        from mpl_toolkits.basemap import Basemap
        basemap = True
    except ImportError:
        basemap = False

    # Get matrices/arrays of species IDs and locations
    data = fetch_species_distributions()
    species_names = ['Bradypus Variegatus', 'Microryzomys Minutus']

    Xtrain = np.vstack([data['train']['dd lat'],
                        data['train']['dd long']]).T
    ytrain = np.array([d.startswith('micro') for d in data['train']['species']],
                      dtype='int')
    Xtrain *= np.pi / 180.  # Convert lat/long to radians

    # Set up the data grid for the contour plot
    xgrid, ygrid = construct_grids(data)
    return ygrid, xgrid
    X, Y = np.meshgrid(xgrid[::5], ygrid[::5][::-1])
    land_reference = data.coverages[6][::5, ::5]
    land_mask = (land_reference > -9999).ravel()

    xy = np.vstack([Y.ravel(), X.ravel()]).T
    xy = xy[land_mask]
    xy *= np.pi / 180.

    # Plot map of South America with distributions of each species
    fig = plt.figure()
    fig.subplots_adjust(left=0.05, right=0.95, wspace=0.05)

    for i in range(2):
        plt.subplot(1, 2, i + 1)

        # construct a kernel density estimate of the distribution
        print(" - computing KDE in spherical coordinates")
        kde = KernelDensity(bandwidth=0.04, metric='haversine',
                            kernel='gaussian', algorithm='ball_tree')
        print Xtrain[ytrain == i].shape
        kde.fit(Xtrain[ytrain == i])

        # evaluate only on the land: -9999 indicates ocean
        Z = -9999 + np.zeros(land_mask.shape[0])
        Z[land_mask] = np.exp(kde.score_samples(xy))
        Z = Z.reshape(X.shape)

        # plot contours of the density
        levels = np.linspace(0, Z.max(), 25)
        print map(lambda x: x.shape, [X,Y,Z])
        plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)

        if basemap:
            print(" - plot coastlines using basemap")
            m = Basemap(projection='cyl', llcrnrlat=Y.min(),
                        urcrnrlat=Y.max(), llcrnrlon=X.min(),
                        urcrnrlon=X.max(), resolution='c')
            m.drawcoastlines()
            m.drawcountries()
        else:
            print(" - plot coastlines from coverage")
            plt.contour(X, Y, land_reference,
                        levels=[-9999], colors="k",
                        linestyles="solid")
            plt.xticks([])
            plt.yticks([])

        plt.title(species_names[i])

    plt.show()
Exemplo n.º 5
0
    basemap = True
except ImportError:
    basemap = False

# Get matrices/arrays of species IDs and locations
data = fetch_species_distributions()
species_names = ['Bradypus Variegatus', 'Microryzomys Minutus']

Xtrain = np.vstack([data['train']['dd lat'],
                    data['train']['dd long']]).T
ytrain = np.array([d.decode('ascii').startswith('micro')
                  for d in data['train']['species']], dtype='int')
Xtrain *= np.pi / 180.  # Convert lat/long to radians

# Set up the data grid for the contour plot
xgrid, ygrid = construct_grids(data)
X, Y = np.meshgrid(xgrid[::5], ygrid[::5][::-1])
land_reference = data.coverages[6][::5, ::5]
land_mask = (land_reference > -9999).ravel()

xy = np.vstack([Y.ravel(), X.ravel()]).T
xy = xy[land_mask]
xy *= np.pi / 180.

# Plot map of South America with distributions of each species
fig = plt.figure()
fig.subplots_adjust(left=0.05, right=0.95, wspace=0.05)

for i in range(2):
    plt.subplot(1, 2, i + 1)
Exemplo n.º 6
0
def plot_species_distribution(species=("bradypus_variegatus_0",
                                       "microryzomys_minutus_0")):
    """
    Plot the species distribution.
    """
    if len(species) > 2:
        print("Note: when more than two species are provided,"
              " only the first two will be used")

    t0 = time()

    # Load the compressed data
    data = fetch_species_distributions()

    # Set up the data grid
    xgrid, ygrid = construct_grids(data)

    # The grid in x,y coordinates
    X, Y = np.meshgrid(xgrid, ygrid[::-1])

    # create a bunch for each species
    BV_bunch = create_species_bunch(species[0],
                                    data.train, data.test,
                                    data.coverages, xgrid, ygrid)
    MM_bunch = create_species_bunch(species[1],
                                    data.train, data.test,
                                    data.coverages, xgrid, ygrid)

    # background points (grid coordinates) for evaluation
    np.random.seed(13)
    background_points = np.c_[np.random.randint(low=0, high=data.Ny,
                                                size=10000),
                              np.random.randint(low=0, high=data.Nx,
                                                size=10000)].T

    # We'll make use of the fact that coverages[6] has measurements at all
    # land points.  This will help us decide between land and water.
    land_reference = data.coverages[6]

    # Fit, predict, and plot for each species.
    for i, species in enumerate([BV_bunch, MM_bunch]):
        print("_" * 80)
        print("Modeling distribution of species '%s'" % species.name)

        # Standardize features
        mean = species.cov_train.mean(axis=0)
        std = species.cov_train.std(axis=0)
        train_cover_std = (species.cov_train - mean) / std

        # Fit OneClassSVM
        print(" - fit OneClassSVM ... ", end='')
        print(train_cover_std.shape)
        clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.5)
        clf.fit(train_cover_std)
        print("done.")

        # Plot map of South America
        plt.subplot(1, 2, i + 1)
        if basemap:
            print(" - plot coastlines using basemap")
            m = Basemap(projection='cyl', llcrnrlat=Y.min(),
                        urcrnrlat=Y.max(), llcrnrlon=X.min(),
                        urcrnrlon=X.max(), resolution='c')
            m.drawcoastlines()
            m.drawcountries()
        else:
            print(" - plot coastlines from coverage")
            plt.contour(X, Y, land_reference,
                        levels=[-9999], colors="k",
                        linestyles="solid")
            plt.xticks([])
            plt.yticks([])

        print(" - predict species distribution")

        # Predict species distribution using the training data
        Z = np.ones((data.Ny, data.Nx), dtype=np.float64)

        # We'll predict only for the land points.
        idx = np.where(land_reference > -9999)
        coverages_land = data.coverages[:, idx[0], idx[1]].T

        pred = clf.decision_function((coverages_land - mean) / std)[:, 0]
        Z *= pred.min()
        Z[idx[0], idx[1]] = pred

        levels = np.linspace(Z.min(), Z.max(), 25)
        Z[land_reference == -9999] = -9999

        # plot contours of the prediction
        plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)
        plt.colorbar(format='%.2f')

        # scatter training/testing points
        plt.scatter(species.pts_train['dd long'], species.pts_train['dd lat'],
                    s=2 ** 2, c='black',
                    marker='^', label='train')
        plt.scatter(species.pts_test['dd long'], species.pts_test['dd lat'],
                    s=2 ** 2, c='black',
                    marker='x', label='test')
        plt.legend()
        plt.title(species.name)
        plt.axis('equal')

        # Compute AUC with regards to background points
        pred_background = Z[background_points[0], background_points[1]]
        pred_test = clf.decision_function((species.cov_test - mean)
                                          / std)[:, 0]
        scores = np.r_[pred_test, pred_background]
        y = np.r_[np.ones(pred_test.shape), np.zeros(pred_background.shape)]
        fpr, tpr, thresholds = metrics.roc_curve(y, scores)
        roc_auc = metrics.auc(fpr, tpr)
        plt.text(-35, -70, "AUC: %.3f" % roc_auc, ha="right")
        print("\n Area under the ROC curve : %f" % roc_auc)

    print("\ntime elapsed: %.2fs" % (time() - t0))
    def processAlgorithm(self, progress):
        # Set up the data as sklearn bunch (basically just a dictionary with specific attributes)
        data = Bunch()

        # Vector layer
        vector = self.getParameterValue(self.SPECIES)
        v = Processing.getObject(vector)
        v_crs = v.crs()

        # Environmental layers
        envlayers = self.getParameterValue(self.ENV)
        if func.unificationNecessary(envlayers.split(";")):
            raise GeoAlgorithmExecutionException(
                "All input environmental layers need to have the same resolution and extent. Use the Unify tool beforehand"
            )
            #TODO: Enable option to do this automatically

        progress.setConsoleInfo("Loading Coverage Data")

        # Check Projection and Cellsize
        for lay in envlayers.split(";"):
            r = Processing.getObject(lay)  # QgsRasterLayer object
            if r.crs() != v_crs:
                raise GeoAlgorithmExecutionException(
                    "All input layers need to have the same projection")
            if round(r.rasterUnitsPerPixelX()) != round(
                    r.rasterUnitsPerPixelY()):
                raise GeoAlgorithmExecutionException(
                    "Grid Cell size values are not equal. Please be sure that grid cells are squares."
                )

        # Set coverage parameters
        r = Processing.getObject(
            envlayers.split(";")[0])  # QgsRasterLayer object
        ex = r.extent()
        data["grid_size"] = r.rasterUnitsPerPixelX()
        data["Nx"] = r.width()
        data["Ny"] = r.height()
        data["x_left_lower_corner"] = ex.xMinimum()
        data["y_left_lower_corner"] = ex.yMinimum()

        # Load in Coverage values
        coverage = []
        for lay in envlayers.split(";"):
            raster = gdal.Open(str(lay))
            if raster.RasterCount > 1:
                progress.setConsoleInfo(
                    "Warning: Multiple bands for layer detected. Using only first band."
                )
            array = raster.GetRasterBand(1).ReadAsArray()
            NA = raster.GetRasterBand(1).GetNoDataValue()
            if NA == None:
                raise GeoAlgorithmExecutionException(
                    "Warning: Raster layer has no no-data value. Please specify a no-data value for this dataset."
                )
            else:
                array[array ==
                      NA] = -9999  # Replace nodata-values of array with -9999
            coverage.append(array)
        data["coverages"] = numpy.array(
            coverage)  # Load all the coverage values into the bunch

        # Setup parameters for output prediction
        a = gdal.Open(envlayers.split(";")[0])
        columns = a.RasterXSize
        rows = a.RasterYSize
        driver = a.GetDriver()
        NA = -9999
        gt = a.GetGeoTransform()
        proj = a.GetProjection()
        output = self.getOutputValue(self.OUT_PRED)

        # Set up the data grid
        xgrid, ygrid = construct_grids(data)

        # The grid in x,y coordinates
        X, Y = numpy.meshgrid(xgrid, ygrid[::-1])

        # background points (grid coordinates) for evaluation
        numpy.random.seed(100)
        background_points = numpy.c_[
            numpy.random.randint(low=0, high=data.Ny, size=10000),
            numpy.random.randint(low=0, high=data.Nx, size=10000)].T

        # We'll make use of the fact that coverages[6] has measurements at all
        # land points.  This will help us decide between land and water.
        # FIXME: Assuming that all predictors have a similar distribution. Might be violated
        land_reference = data.coverages[0]

        progress.setConsoleInfo("Loading Occurence Data and coverage")
        # Creating response
        train = []
        for feature in v.getFeatures():
            geom = feature.geometry().asPoint()
            mx = geom.x()
            my = geom.y()
            train.append((mx, my))
        data["train"] = numpy.array(train)  # Add to bunch as training dataset

        # create species bunch
        sp_Bunch = Bunch(name="Species")
        points = dict(train=data.train)
        for label, pts in points.iteritems():
            #determine coverage values for each of the training & testing points
            ix = numpy.searchsorted(xgrid, pts[0])
            iy = numpy.searchsorted(ygrid, pts[1])
            bunch['cov_%s' % label] = data.coverages[:, -iy, ix].T

        progress.setConsoleInfo(
            "Finished loading coverage data of environmental layers")

        # Starting modelling
        progress.setConsoleInfo("Finished preparing the data for the analysis")
        progress.setConsoleInfo("----")
        progress.setConsoleInfo("Starting Modelling with support of sklearn")

        # Standardize features
        #TODO: Enable different or no Standardization methods
        mean = sp_Bunch.cov.mean(axis=0)
        std = sp_Bunch.cov.std(axis=0)
        train_cover_std = (sp_Bunch.cov - mean) / std

        # Fit OneClassSVM
        progress.setConsoleInfo("Fitting Support Vector Machine")
        # TODO: Allow the user to vary the input
        clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.5)
        clf.fit(train_cover_std)
        progress.setConsoleInfo("Fitting done")

        # Predict species distribution using the training data
        Z = numpy.ones((data.Ny, data.Nx), dtype=numpy.float64)

        # We'll predict only for the land points.
        idx = numpy.where(land_reference > -9999)
        coverages_land = data.coverages[:, idx[0], idx[1]].T

        pred = clf.decision_function((coverages_land - mean) / std)[:, 0]
        Z *= pred.min()
        Z[idx[0], idx[1]] = pred

        levels = numpy.linspace(Z.min(), Z.max(), 25)
        Z[land_reference == -9999] = -9999

        result = Z  # save the final results scores

        # Compute AUC w.r.t. background points
        pred_background = Z[background_points[0], background_points[1]]
        pred_test = clf.decision_function((species.cov_test - mean) / std)[:,
                                                                           0]
        scores = numpy.r_[pred_test, pred_background]
        y = numpy.r_[numpy.ones(pred_test.shape),
                     numpy.zeros(pred_background.shape)]
        fpr, tpr, thresholds = metrics.roc_curve(y, scores)
        roc_auc = metrics.auc(fpr, tpr)  #  Area under the ROC curve
        # TODO: Evaluate the availability of other metrics to compute on (average mean error, etc.. )

        # Create Output Prediction File
        output = self.getOutputValue(self.OUT_PRED_RES)
        titles = ['AUC']
        res_pred = [roc_auc]
        # Save Output
        func.saveToCSV(res_pred, titles, output)

        # Create Output for resulting prediction
        metadata = driver.GetMetadata()
        if metadata.has_key(
                gdal.DCAP_CREATE) and metadata[gdal.DCAP_CREATE] == "YES":
            pass
        else:
            progress.setConsoleInfo(
                "Output creation of input Fileformat is not supported by gdal. Create GTiff by default."
            )
            driver = gdal.GetDriverByName("GTiff")

        data_type = result.dtype
        try:
            outData = driver.Create(output, columns, rows, 1, data_type)
        except Exception, e:
            ProcessingLog.addToLog(ProcessingLog.LOG_ERROR,
                                   "Output file could not be created!")
    def processAlgorithm(self, progress):
        # Set up the data as sklearn bunch (basically just a dictionary with specific attributes)
        data = Bunch()

        # Vector layer
        vector = self.getParameterValue(self.SPECIES)
        v = Processing.getObject(vector)
        v_crs = v.crs()
        
        # Environmental layers
        envlayers = self.getParameterValue(self.ENV)        
        if func.unificationNecessary(envlayers.split(";")):
            raise GeoAlgorithmExecutionException("All input environmental layers need to have the same resolution and extent. Use the Unify tool beforehand")
            #TODO: Enable option to do this automatically

        progress.setConsoleInfo("Loading Coverage Data")                

        # Check Projection and Cellsize
        for lay in envlayers.split(";"):
            r = Processing.getObject(lay) # QgsRasterLayer object
            if r.crs() != v_crs:
                raise GeoAlgorithmExecutionException("All input layers need to have the same projection")
            if round(r.rasterUnitsPerPixelX()) != round(r.rasterUnitsPerPixelY()):
                raise GeoAlgorithmExecutionException("Grid Cell size values are not equal. Please be sure that grid cells are squares.")            

        # Set coverage parameters
        r = Processing.getObject(envlayers.split(";")[0]) # QgsRasterLayer object
        ex = r.extent()
        data["grid_size"] = r.rasterUnitsPerPixelX()        
        data["Nx"] = r.width()
        data["Ny"] = r.height()        
        data["x_left_lower_corner"] = ex.xMinimum()
        data["y_left_lower_corner"] = ex.yMinimum()

        # Load in Coverage values
        coverage = []
        for lay in envlayers.split(";"):
            raster = gdal.Open(str(lay))
            if raster.RasterCount > 1:
                progress.setConsoleInfo("Warning: Multiple bands for layer detected. Using only first band.")                
            array = raster.GetRasterBand(1).ReadAsArray()
            NA = raster.GetRasterBand(1).GetNoDataValue()
            if NA == None:
                raise GeoAlgorithmExecutionException("Warning: Raster layer has no no-data value. Please specify a no-data value for this dataset.")                
            else:
                array[array==NA] = -9999 # Replace nodata-values of array with -9999            
            coverage.append(array)    
        data["coverages"] = numpy.array( coverage ) # Load all the coverage values into the bunch

        # Setup parameters for output prediction
        a = gdal.Open(envlayers.split(";")[0])
        columns = a.RasterXSize
        rows = a.RasterYSize
        driver = a.GetDriver()            
        NA = -9999
        gt = a.GetGeoTransform()
        proj = a.GetProjection()
        output = self.getOutputValue(self.OUT_PRED)        


        # Set up the data grid
        xgrid, ygrid = construct_grids(data)
        
        # The grid in x,y coordinates           
        X, Y = numpy.meshgrid(xgrid, ygrid[::-1])

        # background points (grid coordinates) for evaluation        
        numpy.random.seed(100)
        background_points = numpy.c_[numpy.random.randint(low=0, high=data.Ny,
                                                    size=10000),
                                numpy.random.randint(low=0, high=data.Nx,
                                                    size=10000)].T

        # We'll make use of the fact that coverages[6] has measurements at all
        # land points.  This will help us decide between land and water.
        # FIXME: Assuming that all predictors have a similar distribution. Might be violated
        land_reference = data.coverages[0]

        progress.setConsoleInfo("Loading Occurence Data and coverage")                
        # Creating response
        train = []
        for feature in v.getFeatures():
            geom = feature.geometry().asPoint()
            mx = geom.x()
            my = geom.y()
            train.append((mx,my)) 
        data["train"] = numpy.array(train) # Add to bunch as training dataset

        # create species bunch      
        sp_Bunch = Bunch(name="Species")
        points = dict(train=data.train)
        for label, pts in points.iteritems():
            #determine coverage values for each of the training & testing points
            ix = numpy.searchsorted(xgrid, pts[0])
            iy = numpy.searchsorted(ygrid, pts[1])  
            bunch['cov_%s' % label] = data.coverages[:, -iy, ix].T

        progress.setConsoleInfo("Finished loading coverage data of environmental layers")                 
                                
        # Starting modelling
        progress.setConsoleInfo("Finished preparing the data for the analysis")                
        progress.setConsoleInfo("----")  
        progress.setConsoleInfo("Starting Modelling with support of sklearn")                
                      
        # Standardize features
        #TODO: Enable different or no Standardization methods
        mean = sp_Bunch.cov.mean(axis=0)
        std = sp_Bunch.cov.std(axis=0)
        train_cover_std = (sp_Bunch.cov - mean) / std

        # Fit OneClassSVM
        progress.setConsoleInfo("Fitting Support Vector Machine") 
        # TODO: Allow the user to vary the input                
        clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.5)
        clf.fit(train_cover_std)
        progress.setConsoleInfo("Fitting done") 
                
        # Predict species distribution using the training data
        Z = numpy.ones((data.Ny, data.Nx), dtype=numpy.float64)

        # We'll predict only for the land points.
        idx = numpy.where(land_reference > -9999)
        coverages_land = data.coverages[:, idx[0], idx[1]].T

        pred = clf.decision_function((coverages_land - mean) / std)[:, 0]
        Z *= pred.min()
        Z[idx[0], idx[1]] = pred

        levels = numpy.linspace(Z.min(), Z.max(), 25)
        Z[land_reference == -9999] = -9999

        result = Z # save the final results scores 
        
        # Compute AUC w.r.t. background points
        pred_background = Z[background_points[0], background_points[1]]
        pred_test = clf.decision_function((species.cov_test - mean)
                                          / std)[:, 0]
        scores = numpy.r_[pred_test, pred_background]
        y = numpy.r_[numpy.ones(pred_test.shape), numpy.zeros(pred_background.shape)]
        fpr, tpr, thresholds = metrics.roc_curve(y, scores)
        roc_auc = metrics.auc(fpr, tpr) #  Area under the ROC curve
        # TODO: Evaluate the availability of other metrics to compute on (average mean error, etc.. )
        
        # Create Output Prediction File
        output = self.getOutputValue(self.OUT_PRED_RES)
        titles =  ['AUC']
        res_pred = [roc_auc]
        # Save Output
        func.saveToCSV(res_pred, titles, output)

        # Create Output for resulting prediction        
        metadata = driver.GetMetadata()
        if metadata.has_key( gdal.DCAP_CREATE ) and metadata[ gdal.DCAP_CREATE ] == "YES":
            pass
        else:
            progress.setConsoleInfo("Output creation of input Fileformat is not supported by gdal. Create GTiff by default.")
            driver = gdal.GetDriverByName("GTiff")            

        data_type = result.dtype        
        try:
            outData = driver.Create(output, columns, rows, 1, data_type)
        except Exception, e:
            ProcessingLog.addToLog(ProcessingLog.LOG_ERROR,"Output file could not be created!")