Python get_basename示例，madmex.util.get_basename Python示例

示例#1

0

显示文件

    def preprocess(self):
        '''
        Top of atmosphere is calculated and persisted into a file. Then a cloud
        mask is created with the given algorithm.
        '''
        solar_zenith = self.get_sensor().parser.get_attribute(
            rapideye.SOLAR_ZENITH)
        data_acquisition_date = self.get_sensor().parser.get_attribute(
            rapideye.ACQUISITION_DATE)
        solar_azimuth = self.get_sensor().parser.get_attribute(
            rapideye.SOLAR_AZIMUTH)
        geotransform = self.get_raster().get_attribute(raster.GEOTRANSFORM)
        data = self.get_raster().read_data_file_as_array()

        sun_earth_distance = calculate_distance_sun_earth(
            data_acquisition_date)
        top_of_atmosphere_data = calculate_toa_rapideye(
            calculate_rad_rapideye(data), sun_earth_distance, solar_zenith)
        top_of_atmosphere_directory = create_filename(get_parent(self.path),
                                                      'TOA')

        create_directory_path(top_of_atmosphere_directory)
        output_file = create_filename(
            top_of_atmosphere_directory,
            get_basename(self.get_files()[2]) +
            '_toa.tif')  # TODO: change [2] in self.get_files()[2]

        create_raster_from_reference(output_file,
                                     top_of_atmosphere_data,
                                     self.file_dictionary[_IMAGE],
                                     data_type=NumericTypeCodeToGDALTypeCode(
                                         numpy.float32))
        LOGGER.debug('Top of atmosphere file was created.')
        cloud_output_file = create_filename(
            top_of_atmosphere_directory,
            get_basename(self.get_files()[2]) + '_cloud.tif')

        if self.algorithm == ANOMALY_DETECTION:
            LOGGER.debug('Cloud mask by anomaly detection process.')
            clouds = self.anomaly_detection_cloud_mask(top_of_atmosphere_data,
                                                       cloud_output_file,
                                                       solar_zenith,
                                                       solar_azimuth,
                                                       geotransform)
        elif self.algorithm == TIME_SERIES:
            LOGGER.debug('Cloud mask by reference with time series process.')
            tile_id = self.get_sensor().get_attribute(TILE_ID)
            clouds = self.masking_with_time_series(data, cloud_output_file,
                                                   solar_zenith, solar_azimuth,
                                                   geotransform, tile_id)

        create_raster_from_reference(cloud_output_file,
                                     clouds,
                                     self.file_dictionary[_IMAGE],
                                     data_type=NumericTypeCodeToGDALTypeCode(
                                         numpy.float32))
        LOGGER.info('Cloud mask was created.')

示例#2

0

显示文件

文件： aggregate.py 项目： makeling/antares

    def handle(self, **options):
        output = options['output'][0]
        #state = options['state'][0]

        #         for name in get_states_names():
        #             print name[0]
        #             for footprint in get_rapideye_footprints_from_state(name[0]):
        #                 print footprint[0]
        #
        #
        #         import time
        #         time.sleep(2)

        for image_path in options['path']:
            print image_path
            basename = '%s_ipcc.tif' % get_basename(image_path)
            LOGGER.info(basename)
            target = create_filename(output, basename)
            #print target
            start_time = time.time()

            self.aggregate_by_block(image_path, target, INITIAL_IPCC_2015,
                                    FINAL_IPCC_2015)

            #self.method_by_block(image_path, target)
            #self.mask_iterating_values(image_path, target, INITIAL_ARRAY, FINAL_ARRAY)
            LOGGER.info("--- %s seconds ---" % (time.time() - start_time))
            LOGGER.info('Dataset was written.')

示例#3

0

显示文件

文件： split.py 项目： makeling/antares

def get_convex_hull(shape_name, destination_directory):
    '''
    This method will read all objects from a shape file and create a new one
    with the convex hull of all the geometry points of the first.
    '''
    driver = ogr.GetDriverByName(str('ESRI Shapefile'))
    shape = driver.Open(shape_name, 0)
    layer = shape.GetLayer()
    layer_name = layer.GetName()
    spatial_reference = layer.GetSpatialRef()
    prefix = get_basename(shape_name)
    output_name = create_filename(destination_directory,
                                  '%s-hull.shp' % prefix)
    geometries = ogr.Geometry(ogr.wkbGeometryCollection)
    for feature in layer:
        geometries.AddGeometry(feature.GetGeometryRef())
    if os.path.exists(output_name):
        driver.DeleteDataSource(output_name)
    datasource = driver.CreateDataSource(output_name)
    out_layer = datasource.CreateLayer(str('states_convexhull'),
                                       spatial_reference,
                                       geom_type=ogr.wkbPolygon)
    out_layer.CreateField(ogr.FieldDefn(str('id'), ogr.OFTInteger))
    featureDefn = out_layer.GetLayerDefn()
    feature = ogr.Feature(featureDefn)
    feature.SetGeometry(geometries.ConvexHull())
    feature.SetField(str('id'), 1)
    out_layer.CreateFeature(feature)
    shape.Destroy()
    datasource.Destroy()

示例#4

0

显示文件

文件： segment.py 项目： makeling/antares

    def handle(self, **options):
        '''
        In this example command, the values that come from the user input are
        added up and the result is printed in the screen.
        '''
        stack = options['path'][0]
        output_vector_file = '/Users/agutierrez/%s.gpkg' % get_basename(stack)

        segmentation = bis.Model()

        shapes, transform, meta = segmentation.predict(stack)

        # Vectorize
        #shapes = features.shapes(segments.astype(np.uint16), transform=transform)

        start_time = time.time()
        print 'about to start query'
        query = persist_database(shapes, meta)
        print 'done'
        print time.time() - start_time
        start_time = time.time()
        with connection.cursor() as cursor:
            cursor.execute(query)
        transaction.commit()
        print time.time() - start_time

示例#5

0

显示文件

文件： usemodel.py 项目： makeling/antares

    def handle(self, **options):
        '''
        In this example command, the values that come from the user input are
        added up and the result is printed in the screen.
        '''
        output = options['output'][0]
        models = options['modelname']
        model_directory = options['modeldir'][0]

        pca_model = pca.Model(5)
        pca_model.load(create_filename(model_directory, 'pca'))

        for model_name in models:
            persistence_directory = create_filename(model_directory,
                                                    model_name)
            model = load_model(model_name)
            model_instance = model.Model(persistence_directory)
            model_instance.load(persistence_directory)
            block_size = 500
            for path in options['path']:
                image_array = open_handle(path)
                y_size = image_array.shape[1]
                x_size = image_array.shape[2]
                basename = get_basename(path)[:7]
                warnings.filterwarnings('ignore')
                final = numpy.zeros((x_size, y_size))
                import time
                start_time = time.time()
                for i in range(0, y_size, block_size):
                    if i + block_size < y_size:
                        rows = block_size
                    else:
                        rows = y_size - i
                    for j in range(0, x_size, block_size):
                        if j + block_size < x_size:
                            cols = block_size
                        else:
                            cols = x_size - j
                        step = image_array[:, i:i + rows, j:j + cols]
                        step_ravel = step.reshape(10, -1)
                        prediction = model_instance.predict(
                            pca_model.transform(numpy.transpose(step_ravel)))
                        final[i:i + rows, j:j + cols] = prediction.reshape(
                            (rows, cols))
                print("--- %s seconds ---" % (time.time() - start_time))
                create_directory_path(output)
                classification = create_filename(
                    output, '%s-%s.tif' % (basename, model_name))
                create_raster_from_reference(classification,
                                             final.reshape(x_size, y_size),
                                             path,
                                             data_type=gdal.GDT_Byte,
                                             creating_options=['COMPRESS=LZW'])

示例#6

0

显示文件

    def get_thumbnail_with_path(self, thumbnail_path):
        '''
        Creates a thumbnail for the scene in true color.
        '''
        from subprocess import call
        thumnail_directory = create_filename(thumbnail_path, 'thumbnail')
        create_directory_path(thumnail_directory)
        filename = self.file_dictionary[_BROWSE]

        thumbnail = create_filename(thumnail_directory,
                                    '%s.jpg' % get_basename(filename))

        resize_command = [
            '/Library/Frameworks/GDAL.framework/Programs/gdal_translate',
            filename, '-of', 'JPEG', thumbnail
        ]
        call(resize_command)

示例#7

0

显示文件

 def handle(self, **options):
     '''
     In this example command, the values that come from the user input are
     added up and the result is printed in the screen.
     '''
     output = options['output'][0]
     zip = options['path'][0]
     basename = get_basename(zip)
     aux_name = create_filename(output, 'aux_%s' % basename)
     real_name = create_filename(output, basename)
     with ZipFile(zip, 'r') as unzipped:
         unzipped.extractall(create_filename(output, 'aux_%s' % basename))
     path = create_filename(aux_name, 'trend_tiles')
     tifs = get_contents_from_folder(path)
     create_directory_path(real_name)
     for tif in tifs:
         source = create_filename(path, tif)
         target = create_filename(real_name, tif)
         tile_map(source, target, 4000, 4000, 2, 2)
     remove_directory(aux_name)

示例#8

0

显示文件

文件： split.py 项目： makeling/antares

 def get_year_from_path(self, path):
     return get_basename(path)[19:23]

示例#9

0

显示文件

文件： split.py 项目： makeling/antares

    def handle(self, **options):
        '''
        In this example command, the values that come from the user input are
        added up and the result is printed in the screen.
        '''
        shape_name = options['shape'][0]
        destination = options['dest'][0]
        paths = options['path']

        years = []
        for path in paths:
            years.append(self.get_year_from_path(path))

        if os.path.exists(shape_name):
            LOGGER.info('The file %s was found.' % shape_name)
        shape_files = split_shape_into_features(shape_name, destination,
                                                str('id'), str('nombre'), '__')
        process_launcher = LocalProcessLauncher()

        import time
        start_time = time.time()

        cover_array = []
        tth_array = []
        cover_file = 'cover-stats.json'
        tth_name = 'tth-stats.json'
        json_directory = create_filename(destination, 'cover_stats')
        json_file = create_filename(json_directory, cover_file.lower())
        tth_file = create_filename(json_directory, tth_name.lower())

        for shape_file in shape_files:
            shape_name = get_basename(shape_file).split('__')
            anp_id = shape_name[0]
            anp_name = shape_name[1]
            basename = '%s.tif' % anp_name

            dataframe = DataFrame(index=years,
                                  columns=[0, 1, 2, 3, 4, 5, 6, 7, 8])

            create_directory_path(json_directory)

            print shape_file
            for path in paths:
                #pixel_resolution, dataset = pixel_info(path)

                year = self.get_year_from_path(path)
                raster_dir = create_filename(
                    create_filename(destination, 'raster'), year)
                create_directory_path(raster_dir)
                raster_file = create_filename(raster_dir, basename)

                shell_command = 'gdalwarp -ot Byte -co COMPRESS=LZW -cutline %s -crop_to_cutline %s %s' % (
                    shape_file, path, raster_file)
                print shell_command
                print process_launcher.execute(shell_command)
                raster_array = open_handle(raster_file)

                ds = gdal.Open(raster_file)
                geotransform = ds.GetGeoTransform()
                x_resolution = geotransform[1]
                y_resolution = geotransform[5]
                pixel_area = abs(x_resolution * y_resolution)

                unique_values = numpy.unique(raster_array, return_counts=True)

                indexes = unique_values[0]
                counts = unique_values[1]

                for i in range(len(indexes)):
                    key = indexes[i]
                    dataframe.set_value(year, key,
                                        area(int(counts[i]), pixel_area))
            dataframe = dataframe.sort_index()
            columns = list(dataframe.columns.values)
            index = list(dataframe.index.values)

            print dataframe
            cover_array.append(
                self.dataframe_to_json(dataframe, anp_id, anp_name))

            tth_dataframe = DataFrame(columns=columns)

            for i in range(len(index) - 1):
                label = '%s-%s' % (index[i], index[i + 1])
                tth_column = calculate_thh(dataframe.ix[i],
                                           dataframe.ix[i + 1],
                                           int(index[i + 1]) - int(index[i]))
                for j in range(len(tth_column)):
                    tth_dataframe.set_value(label, j, tth_column[j])
            tth_dataframe = tth_dataframe.sort_index()

            print tth_dataframe

            tth_array.append(
                self.dataframe_to_json(tth_dataframe, anp_id, anp_name))

        self.json_to_file(json_file, cover_array)
        self.json_to_file(tth_file, tth_array)

        print("--- %s seconds ---" % (time.time() - start_time))

示例#10

0

显示文件

文件： test.py 项目： makeling/antares

    def handle(self, **options):
        '''
        In this example command, the values that come from the user input are
        added up and the result is printed in the screen.
        '''
        output = options['output'][0]
        for path in options['path']:
            bundle = _get_bundle_from_path(path)
            basename = get_basename(bundle.get_raster_file())
            bundle.get_NDVI()
            ndvi_file = create_filename(output, 'ndvi.tif')
            red_edge_ndvi_file = create_filename(output, 'red_edge_ndvi.tif')
            gndvi_file = create_filename(output, 'gndvi.tif')
            ndre_file = create_filename(output, 'ndre.tif')
            sovel_file = create_filename(output, 'sovel2.tif')

            all_file = create_filename(output,
                                       '%s_all_features.tif' % basename)

            print all_file

            image_array = bundle.get_raster().read_data_file_as_array()

            ndvi_array = bundle.get_NDVI()
            ndvi_array[ndvi_array <= -1] = -1
            ndvi_array[ndvi_array >= 1] = 1

            red_edge_ndvi_array = bundle.get_red_edge_NDVI()
            red_edge_ndvi_array[red_edge_ndvi_array <= -1] = -1
            red_edge_ndvi_array[red_edge_ndvi_array >= 1] = 1

            gndvi_array = bundle.get_gndvi()
            gndvi_array[gndvi_array <= -1] = -1
            gndvi_array[gndvi_array >= 1] = 1

            ndre_array = bundle.get_ndre()
            ndre_array[ndre_array <= -1] = -1
            ndre_array[ndre_array >= 1] = 1

            sobel_filter_array = bundle.get_sobel_filter(sigma=2)

            #sobel_filter_array[sobel_filter_array<=-1] = -1
            #sobel_filter_array[sobel_filter_array>=1] = 1

            #create_raster_from_reference(ndvi_file, ndvi_array, bundle.get_raster_file())
            #create_raster_from_reference(red_edge_ndvi_file, red_edge_ndvi_array, bundle.get_raster_file())
            #create_raster_from_reference(gndvi_file, gndvi_array, bundle.get_raster_file())
            #create_raster_from_reference(ndre_file, ndre_array, bundle.get_raster_file())
            create_raster_from_reference(sovel_file, sobel_filter_array,
                                         bundle.get_raster_file())

            all_features = numpy.array([
                image_array[0], image_array[1], image_array[2], image_array[3],
                image_array[4], ndvi_array, red_edge_ndvi_array, gndvi_array,
                ndre_array, sobel_filter_array
            ])

            print image_array[0].shape
            print image_array[1].shape
            print image_array[2].shape
            print image_array[3].shape
            print image_array[4].shape
            print ndvi_array.shape
            print red_edge_ndvi_array.shape
            print gndvi_array.shape
            print ndre_array.shape
            print sobel_filter_array.shape
            print all_features.shape

            create_raster_from_reference(all_file,
                                         all_features,
                                         bundle.get_raster_file(),
                                         creating_options=['BIGTIFF=YES'])

示例#11

0

显示文件

文件： usemodelobjects.py 项目： makeling/antares

    def handle(self, **options):
        '''
        In this example command, the values that come from the user input are
        added up and the result is printed in the screen.
        '''
        output = options['output'][0]
        models = options['modelname']
        model_directory = options['modeldir'][0]
        region = options['region'][0]

        start_time = time.time()

        for path in options['path']:
            print path

            scene_bundle = rapideye.Bundle(path)
            directory = getattr(SETTINGS, 'TEMPORARY')
            directory_helper = create_filename(directory, 'helper')
            create_directory_path(directory_helper)
            categories_file = create_filename(directory, 'categories.json')
            categories_dictionaty = {
                0: "AGRICULTURA DE RIEGO",
                1: "AGRICULTURA DE TEMPORAL",
                2: "AGUA",
                3: "AREAS QUEMADAS",
                4: "ASENTAMIENTOS HUMANOS",
                5: "BOSQUE CULTIVADO",
                6: "BOSQUE DE AYARIN",
                7: "BOSQUE DE ENCINO",
                8: "BOSQUE DE ENCINO-PINO",
                9: "BOSQUE DE GALERIA",
                10: "BOSQUE DE MEZQUITE",
                11: "BOSQUE DE OYAMEL",
                12: "BOSQUE DE PINO",
                13: "BOSQUE DE PINO-ENCINO",
                14: "BOSQUE INDUCIDO",
                15: "BOSQUE MESOFILO DE MONTANA",
                16: "DESPROVISTO DE VEGETACION",
                17: "INDEFINIDO",
                18: "MANGLAR",
                19: "MATORRAL SUBTROPICAL",
                20: "MEZQUITAL",
                21: "NUBES",
                22: "PASTIZAL CULTIVADO",
                23: "PASTIZAL HALOFILO",
                24: "PASTIZAL INDUCIDO",
                25: "PASTIZAL NATURAL",
                26: "PRADERA DE ALTA MONTANA",
                27: "SABANOIDE",
                28: "SELVA ALTA PERENNIFOLIA",
                29: "SELVA ALTA SUBPERENNIFOLIA",
                30: "SELVA BAJA CADUCIFOLIA",
                31: "SELVA BAJA ESPINOSA CADUCIFOLIA",
                32: "SELVA BAJA SUBCADUCIFOLIA",
                33: "SELVA DE GALERIA",
                34: "SELVA MEDIANA CADUCIFOLIA",
                35: "SELVA MEDIANA SUBCADUCIFOLIA",
                36: "SELVA MEDIANA SUBPERENNIFOLIA",
                37: "SIN VEGETACION APARENTE",
                38: "SOMBRAS",
                39: "TULAR",
                40: "VEGETACION DE DUNAS COSTERAS",
                41: "VEGETACION HALOFILA HIDROFILA",
                42: "ZONA URBANA"
            }
            basename = get_basename(scene_bundle.get_raster_file())
            all_file = create_filename(directory_helper,
                                       '%s_all_features.tif' % basename)

            if not is_file(all_file):
                scene_bundle.get_feature_array(all_file)

            filename = get_basename(all_file)

            if not is_file(
                    create_filename(directory_helper, '%s.shp' % filename)):
                shell_string = 'docker run --rm -v %s:/data madmex/segment gdal-segment %s.tif -out helper/%s.shp -algo SLIC -region %s' % (
                    directory, filename, filename, region)
                launcher = LocalProcessLauncher()
                LOGGER.debug('Docker command: %s', shell_string)
                launcher.execute(shell_string)

            data = read_data_table(
                create_filename(directory_helper, '%s.shp' % filename))

            results = {}

            for model_name in models:
                persistence_directory = create_filename(
                    model_directory, model_name)
                print model_name
                model = load_model(model_name)
                model_instance = model.Model(persistence_directory)
                model_instance.load(persistence_directory)
                prediction = model_instance.predict(data)
                results[model_name] = prediction
            print results
            create_directory_path(output)
            write_results(
                create_filename(directory_helper, '%s.shp' % filename),
                create_filename(output,
                                '%s_classification.shp' % filename[0:32]),
                results, categories_dictionaty)

        LOGGER.info("--- %s seconds ---" % (time.time() - start_time))

示例#12

0

显示文件

    def copy_with_dictionary(self, output_directory, dictionary):
        '''
        This method will take a input shape and iterate over its features, creating
        a new shape file with each one of them. It copies all the fields and the
        same spatial reference from the original file. The created files are saved
        in the destination directory using the number of the field given. 
        '''
        layer = self.get_layer()
        layer_name = layer.GetName()
        spatial_reference = layer.GetSpatialRef()
        in_feature = layer.GetNextFeature()
        layer_definition = layer.GetLayerDefn()
        field_definition = layer_definition.GetFieldDefn(0)
        column_name = field_definition.GetName() 
        shape_files = []
        
        name = get_basename(self.image_path)
        
        
        create_directory_path(output_directory)
        
        output_name = create_filename(output_directory, '%s_mapped.shp' % name)
        
        
        in_layer_definition = layer.GetLayerDefn()
        
        data_source = self.driver.CreateDataSource(output_name)
        out_layer = data_source.CreateLayer(layer_name, spatial_reference, geom_type=ogr.wkbPolygon)
            
        
        while in_feature:            
            in_feature_name = in_feature.GetField(column_name)
            
            shape_files.append(output_name)
            if is_file(output_name):
                self.driver.DeleteDataSource(output_name)
            for i in range(0, in_layer_definition.GetFieldCount()):
                fieldDefn = in_layer_definition.GetFieldDefn(i)
                out_layer.CreateField(fieldDefn)
            for i in range(0, in_layer_definition.GetFieldCount()):
                fieldDefn = in_layer_definition.GetFieldDefn(i)
                idField = ogr.FieldDefn(str("%sINTEGER" % fieldDefn.GetNameRef()), ogr.OFTInteger)
                out_layer.CreateField(idField)

                
            outLayerDefn = out_layer.GetLayerDefn()
            geometry = in_feature.GetGeometryRef()
            out_feature = ogr.Feature(outLayerDefn)
            out_feature.SetGeometry(geometry)
            for i in range(0, in_layer_definition.GetFieldCount()):
                out_feature.SetField(outLayerDefn.GetFieldDefn(i).GetNameRef(), in_feature.GetField(i))
            for i in range(0, in_layer_definition.GetFieldCount()):
                
                out_feature.SetField(str("%sINTEGER" % in_layer_definition.GetFieldDefn(i).GetNameRef()), str(dictionary[in_feature.GetField(i)]))
            out_layer.CreateFeature(out_feature)
            
            out_feature = None
            in_feature = None
            in_feature = layer.GetNextFeature()
        self.close()
        return [Data(filename) for filename in shape_files]

示例#13

0

显示文件

文件： createmodelobjects.py 项目： makeling/antares

    def handle(self, **options):
        '''
        In this example command, the values that come from the user input are
        added up and the result is printed in the screen.
        '''
        target_tag = 'DN'
        start_time_all = time.time()
        shape_name = options['shape'][0]
        raster_paths = options['path']
        destination = options['dest']
        models = options['model']
        dataframe_features = None
        temporary_directory = getattr(SETTINGS, 'TEMPORARY')
        create_directory_path(temporary_directory)
        # I read the training data in shape form
        training_shape = vector.Data(shape_name)
        training_dataframe = training_shape.to_dataframe()
        training_path = create_filename(temporary_directory,
                                        'training_raster.tif')
        categories_file = create_filename(temporary_directory,
                                          'categories.json')
        training_warped_path = create_filename(temporary_directory,
                                               'training_warped_raster.tif')
        pixel_size = 0.000462175996292

        if not is_file(training_warped_path):
            training_raster = vector_to_raster(
                training_shape, training_path, pixel_size, -pixel_size,
                ['ATTRIBUTE=OBJECTID', 'COMPRESS=LZW'])
            training_raster_warped = training_raster.reproject(
                training_warped_path, epgs=32617)
        else:
            training_raster_warped = raster.Data(training_warped_path)

        dem_file = getattr(SETTINGS, 'DEM')

        dem_raster = raster.Data(dem_file)
        print dem_raster.get_spatial_reference()
        print 'reproyecting raster'
        #dem_raster_warped = dem_raster.reproject(training_warped_path, epgs=32614)

        #training_raster_warped = raster.Data(training_path)

        aspect_file = getattr(SETTINGS, 'ASPECT')
        slope_file = getattr(SETTINGS, 'SLOPE')

        print dem_file, aspect_file, slope_file

        for raster_path in raster_paths:
            scene_bundle = rapideye.Bundle(raster_path)

            raster_mask = scene_bundle.get_raster()

            #example_path = create_filename(temporary_directory, 'mask')
            #create_directory_path(example_path)
            #raster_to_vector_mask(raster_mask, example_path)

            print scene_bundle.get_raster_file()

            basename = get_basename(scene_bundle.get_raster_file())
            all_file = create_filename(temporary_directory,
                                       '%s_all_features.tif' % basename)
            # Do not recalculate if the file is already there.
            if is_file(all_file):
                features_raster = raster.Data(all_file)
            else:
                features_raster = scene_bundle.get_feature_array(all_file)
            new_df = get_dataframe_from_raster(features_raster,
                                               training_raster_warped)
            if new_df is not None:
                if dataframe_features is not None:
                    dataframe_features = pandas.concat([
                        dataframe_features,
                        get_dataframe_from_raster(features_raster,
                                                  training_raster_warped)
                    ])
                else:
                    dataframe_features = get_dataframe_from_raster(
                        features_raster, training_raster_warped)

        features_size = len(list(dataframe_features))

        training_set = dataframe_features.set_index(0).join(
            training_dataframe.set_index('OBJECTID'))

        print training_set

        training_set['target'] = pandas.Categorical.from_array(
            training_set[target_tag]).labels
        categories_array = pandas.Categorical.from_array(
            training_set[target_tag]).categories
        create_categories_file(categories_file, categories_array)
        training_set = training_set[training_set['target'] != -1]
        #features_size includes 0 that is the index of the feature
        training_set_array = numpy.transpose(
            numpy.transpose(training_set.as_matrix([range(1, features_size)])))
        target_set_array = training_set.pop('target')

        print training_set_array.shape
        print target_set_array.shape

        X_train, X_test, y_train, y_test = train_test_split(training_set_array,
                                                            target_set_array,
                                                            train_size=0.8,
                                                            test_size=0.2)
        models_directory = create_filename(temporary_directory, 'models')
        create_directory_path(models_directory)

        for model_name in models:
            start_time = time.time()
            print numpy.unique(y_train)
            train_model(X_train, X_test, y_train, y_test, models_directory,
                        model_name)
            print "--- %s seconds training %s model---" % (
                (time.time() - start_time), model_name)