Exemplo n.º 1
0
def transform_aspect(aspect_raster, nodata, transform='both', out_dir=None):

    nesw = False
    nwse = False

    ds = gdal.Open(aspect_raster)
    ar = ds.ReadAsArray()
    tx = ds.GetGeoTransform()
    prj = ds.GetProjection()
    ds = None

    driver = gdal.GetDriverByName('envi')
    if not out_dir:
        out_dir = os.path.dirname(aspect_raster)

    if transform == 'both':
        nesw = True
        nwse = True
    elif transform == 'nesw':
        nesw = True
    else:
        nwse = True
    mask = ar != nodata
    if nesw:
        ar_nesw = np.full(ar.shape, 255, dtype=np.uint8)
        ar_nesw[mask] = 100 * (np.cos(np.radians(ar - 225)) + 1)[mask]
        out_path = aspect_raster.replace('.bsq', '_nesw.bsq')
        array_to_raster(ar_nesw,
                        tx,
                        prj,
                        driver,
                        out_path,
                        dtype=gdal.GDT_Byte,
                        silent=True)
        desc = ('Cosine transformed aspect representing northeast/southwest-ness.\n' +\
            '\tCreated with the expression: 100 * (np.cos(np.radians(ar - 225)) + 1)\n' + \
            '\tInput aspect raster: {0}').format(aspect_raster)
        createMetadata(sys.argv, out_path, description=desc)
    if nwse:
        ar_nwse = np.full(ar.shape, 255, dtype=np.uint8)
        ar_nwse[mask] = 100 * (np.cos(np.radians(ar - 135)) + 1)[mask]
        out_path = aspect_raster.replace('.bsq', '_nwse.bsq')
        array_to_raster(ar_nwse,
                        tx,
                        prj,
                        driver,
                        out_path,
                        dtype=gdal.GDT_Byte,
                        silent=True)
        desc = ('Cosine transformed aspect representing northwest/southeast-ness.\n' +\
            '\tCreated with the expression: 100 * (np.cos(np.radians(ar - 135)) + 1)\n' + \
            '\tInput aspect raster: {0}').format(aspect_raster)
        createMetadata(sys.argv, out_path, description=desc)
Exemplo n.º 2
0
def scale_raster(in_raster, nodata, scale):

    ds = gdal.Open(in_raster)
    ar = ds.ReadAsArray()
    tx = ds.GetGeoTransform()
    prj = ds.GetProjection()
    driver = ds.GetDriver()
    ds = None

    #driver = gdal.GetDriverByName('envi')
    mask = ar != nodata
    ar[mask] = ar[mask] * scale
    out_path = in_raster  #.replace('.bsq', '_scaled%s.bsq' % scale)
    array_to_raster(ar,
                    tx,
                    prj,
                    driver,
                    out_path,
                    dtype=gdal.GDT_Int16,
                    silent=True)
    desc = '%s scaled by %s.\n' % (in_raster, scale)
    createMetadata(sys.argv, out_path, description=desc)
Exemplo n.º 3
0
def main(in_raster,
         snap_raster,
         in_nodata,
         out_nodata,
         out_path=None,
         mask_val=None,
         overwrite=False):

    t0 = time.time()
    in_nodata = int(in_nodata)
    out_nodata = int(out_nodata)

    print '\nOpening datasets... '
    t1 = time.time()
    ds_in = gdal.Open(in_raster)
    ar_in = ds_in.ReadAsArray()
    tx_in = ds_in.GetGeoTransform()
    #driver = ds_in.GetDriver()
    ds_in = None

    ds_snap = gdal.Open(snap_raster)
    ar_snap = ds_snap.ReadAsArray()
    tx_snap = ds_snap.GetGeoTransform()
    prj = ds_snap.GetProjection()
    ds_snap = None
    print '%.1f seconds\n' % (time.time() - t1)

    print 'Snapping input raster...'
    t1 = time.time()
    offset = calc_offset((tx_snap[0], tx_snap[3]), tx_in)
    snap_inds, in_inds = get_offset_array_indices(ar_snap.shape, ar_in.shape,
                                                  offset)
    np_dtype = ar_in.dtype
    ar = np.full(ar_snap.shape, out_nodata, dtype=np_dtype)
    ar_in[ar_in == in_nodata] = out_nodata
    ar[snap_inds[0]:snap_inds[1],
       snap_inds[2]:snap_inds[3]] = ar_in[in_inds[0]:in_inds[1],
                                          in_inds[2]:in_inds[3]]

    if mask_val:
        mask_val = int(mask_val)
        ar[ar_snap == mask_val] = out_nodata

    print '%.1f seconds\n' % (time.time() - t1)

    if out_path:
        if ar.max() <= 255 and ar.min() >= 0:
            gdal_dtype = gdal.GDT_Byte
        else:
            gdal_dtype = gdal.GDT_Int16

        if os.path.exists(out_path) and not overwrite:
            sys.exit('out_path already exists')
        driver = get_gdal_driver(out_path)
        array_to_raster(ar, tx_snap, prj, driver, out_path, gdal_dtype,
                        out_nodata)

        # Write metadata
        desc = ('Input raster %s snapped to the extent of %s.') % (in_raster,
                                                                   snap_raster)
        if mask_val:
            desc += ' Data were masked from snap raster with value %s.' % mask_val
        createMetadata(sys.argv, out_path, description=desc)
    else:
        return ar

    print '\nTotal time to snap raster: %.1f seconds\n' % (time.time() - t0)
def main(raster, nodata, psu_shp, out_dir):

    nodata = int(nodata)
    psus = attributes_to_df(psu_shp)
    ds = gdal.Open(raster)
    ar = ds.GetVirtualMemArray()  #ReadAsArray()
    tx = ds.GetGeoTransform()
    prj = ds.GetProjection()
    driver = gdal.GetDriverByName('gtiff')

    # Just extract the test sample first
    test_sample_dfs = []
    print '\nGetting test samples for PSUs...'
    for i, psu in psus.iterrows():
        # Calc offsets
        row_off, col_off = calc_offset((tx[0], tx[3]), psu[['ul_x', 'ul_y']],
                                       tx)
        n_rows = abs(int((psu.ymax - psu.ymin) / tx[5]))
        n_cols = abs(int((psu.xmax - psu.xmin) / tx[1]))

        # Get values
        test_data = ar[row_off:row_off + n_rows,
                       col_off:col_off + n_cols].ravel()
        mask = test_data != nodata

        # Get row/col and x/y vals
        test_data = test_data[mask]
        row_inds, col_inds = np.indices((n_rows, n_cols), dtype=np.uint32)
        row_inds = row_inds.ravel()[mask]
        col_inds = col_inds.ravel()[mask]
        these_row_inds = row_inds + row_off
        these_col_inds = col_inds + col_off
        y_coords = row_inds * tx[5] + psu.ul_y
        x_coords = col_inds * tx[1] + psu.ul_x

        df = pd.DataFrame({
            'row': these_row_inds,
            'col': these_col_inds,
            'y': y_coords,
            'x': x_coords,
            'value': test_data,
            'tile_id': psu['name']
        })
        #import pdb; pdb.set_trace()
        test_sample_dfs.append(df)

    test_sample = pd.concat(test_sample_dfs, ignore_index=True)
    basename = os.path.basename(raster)
    out_txt = os.path.join(out_dir, basename.replace(basename[-4:],
                                                     '_test.txt'))
    test_sample.to_csv(out_txt, sep='\t', index=False)

    # Read the raster as a write-able array and set all test samples to nodata
    print '\nAssigning nodata val to PSUs in training raster...\n'
    ar = ds.ReadAsArray()
    ar[test_sample.row, test_sample.col] = nodata
    out_raster = out_txt.replace('_test.txt', '_train.tif')
    array_to_raster(ar, tx, prj, driver, out_raster, nodata=nodata)

    desc = 'Training raster and test sample (text file with the same name but "_test" at the end) for making and evaluating STEM CONUS maps. Primary sampling units (PSUs) reserved for testing are assigned nodata.'
    desc += '\n\tInput raster: %s' % os.path.abspath(raster)
    desc += '\n\tNodata value: %s' % nodata
    desc += '\n\tPSU shapefile: %s' % os.path.abspath(psu_shp)
    desc += '\n\tOutput directory: %s\n' % os.path.abspath(out_dir)
    createMetadata(sys.argv, out_raster, description=desc)  #'''

    ds = None
Exemplo n.º 5
0
def main(in_raster,
         snap_raster,
         in_nodata,
         out_nodata,
         out_path=None,
         mask_val=None,
         overwrite=False,
         n_tiles=None):

    t0 = time.time()
    in_nodata = int(in_nodata)
    out_nodata = int(out_nodata)

    print '\nOpening datasets... '
    t1 = time.time()
    ds_in = gdal.Open(in_raster)
    tx_in = ds_in.GetGeoTransform()

    ds_snap = gdal.Open(snap_raster)
    snap_size = ds_snap.RasterYSize, ds_snap.RasterXSize
    tx_snap = ds_snap.GetGeoTransform()
    prj = ds_snap.GetProjection()
    print '%.1f seconds\n' % (time.time() - t1)

    if n_tiles:
        if not out_path:
            raise IOError('n_tiles was given, but no out_path specified')
        tiles = make_tiles(n_tiles, ds_snap)
        temp_dir = os.path.join(os.path.dirname(out_path), 'temp_tiles')
        if not os.path.exists(temp_dir):
            os.mkdir(temp_dir)
        np_dtype = snap_by_tile(ds_in, ds_snap, tiles, tx_snap, tx_in,
                                in_nodata, out_nodata, temp_dir, mask_val)
        ar = np.full(snap_size, out_nodata, dtype=np_dtype)
        for tile_id, coords in tiles.dropna(subset=['file']).iterrows():
            with open(coords.file, 'rb') as f:
                ar_tile = pickle.load(f)
            t_ysize, t_xsize = ar_tile.shape
            ul_r = coords.ul_r
            ul_c = coords.ul_c
            lr_r = ul_r + t_ysize
            lr_c = ul_c + t_xsize
            ar[ul_r:lr_r, ul_c:lr_c] = ar_tile
        shutil.rmtree(temp_dir)

    else:
        ar_in = ds_in.ReadAsArray()
        ar_snap = ds_snap.ReasAsArray()
        ar = snap_array(ar_in, ar_snap, tx_in, tx_snap, in_nodata, out_nodata,
                        mask_val)
        print '%.1f minutes\n' % ((time.time() - t1) / 60)

    if out_path:
        if ar.max() <= 255 and ar.min() >= 0:
            gdal_dtype = gdal.GDT_Byte
        else:
            gdal_dtype = gdal.GDT_Int16

        if os.path.exists(out_path) and not overwrite:
            sys.exit('out_path already exists')
        driver = get_gdal_driver(out_path)
        array_to_raster(ar, tx_snap, prj, driver, out_path, gdal_dtype,
                        out_nodata)

        # Write metadata
        desc = ('Input raster %s snapped to the extent of %s.') % (in_raster,
                                                                   snap_raster)
        if mask_val:
            desc += ' Data were masked from snap raster with value %s.' % mask_val
        createMetadata(sys.argv, out_path, description=desc)
    else:
        return ar

    print '\nTotal time to snap raster: %.1f seconds\n' % (time.time() - t0)
Exemplo n.º 6
0
def main(model_dir, n_tiles, **kwargs):

    t0 = time.time()

    n_tiles = [int(n) for n in n_tiles.split(',')]
    if not os.path.isdir(model_dir):
        message = 'model directory given does not exist or is not a directory: ', model_dir
        raise IOError(message)

    model = os.path.basename(model_dir)
    dt_dir = os.path.join(model_dir, 'decisiontree_models')
    set_txt = os.path.join(dt_dir, '%s_support_sets.txt' % model)
    df_sets = pd.read_csv(set_txt, sep='\t', index_col='set_id')

    pred_param_path = glob(os.path.join(model_dir,
                                        'predict_stem_*params.txt'))[0]
    predict_params, df_var = stem.read_params(pred_param_path)
    train_param_path = glob(os.path.join(model_dir,
                                         'train_stem_*params.txt'))[0]
    train_params, _ = stem.read_params(train_param_path)
    df_var.sort_index(inplace=True)

    nodata = int(predict_params['nodata'].replace('"', ''))
    if len(kwargs) == 0:
        var_ids = df_sets.max_importance.unique()
        var_names = df_var.ix[var_ids].index
        variables = zip(var_ids, var_names)
    else:
        variables = [(variable_id, variable_name)
                     for variable_name, variable_id in kwargs]

    mask_path = os.path.join(model_dir, '%s_vote.bsq' % model)
    if not os.path.exists(mask_path):
        mask_path = mask_path.replace('.bsq', '.tif')
    mask_ds = gdal.Open(mask_path)
    mask_tx = mask_ds.GetGeoTransform()
    xsize = mask_ds.RasterXSize
    ysize = mask_ds.RasterYSize
    prj = mask_ds.GetProjection()
    df_tiles, df_tiles_rc, tile_size = stem.get_tiles(n_tiles, xsize, ysize,
                                                      mask_tx)
    total_tiles = len(df_tiles)
    df_tiles['tile'] = df_tiles.index

    # Find the tiles that have only nodata values
    t1 = time.time()
    print '\nFinding empty tiles...'
    mask = mask_ds.ReadAsArray() == nodata
    empty_tiles = stem.find_empty_tiles(df_tiles, ~mask, mask_tx)
    mask_ds = None
    print '%s empty tiles found of %s total tiles\n%.1f minutes\n' %\
    (len(empty_tiles), total_tiles, (time.time() - t1)/60)
    # Select only tiles that are not empty
    df_tiles = df_tiles.select(lambda x: x not in empty_tiles)
    total_tiles = len(df_tiles)

    #some_set = df_sets.iloc[0]
    support_size = [
        int(s)
        for s in train_params['support_size'].replace('"', '').split(',')
    ]
    set_size = [int(abs(s / mask_tx[1])) for s in support_size]

    out_dir = os.path.join(model_dir, 'importance_maps')
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    print variables
    for vi, (v_id, v_name) in enumerate(variables):

        t1 = time.time()
        print 'Making map for %s: %s of %s variables\n' % (v_name, vi + 1,
                                                           len(variables))

        ar = np.full((ysize, xsize), nodata, dtype=np.uint8)

        for i, (t_ind, t_row) in enumerate(df_tiles.iterrows()):
            t2 = time.time()
            print 'Aggregating for %s of %s tiles' % (i + 1, total_tiles)

            # Calculate the size of this tile in case it's at the edge where the
            #   tile size will be slightly different
            this_size = abs(t_row.lr_y - t_row.ul_y), abs(t_row.lr_x -
                                                          t_row.ul_x)
            df_these_sets = stem.get_overlapping_sets(df_sets, t_row,
                                                      this_size, support_size)

            rc = df_tiles_rc.ix[t_ind]
            this_size = rc.lr_r - rc.ul_r, rc.lr_c - rc.ul_c
            n_sets = len(df_these_sets)

            # Load overlapping predictions from disk and read them as arrays
            tile_ul = t_row[['ul_x', 'ul_y']]

            print n_sets, ' Overlapping sets'
            importance_bands = []

            importance_values = []
            for s_ind, s_row in df_these_sets.iterrows():

                # Calculate offset and array/tile indices
                offset = stem.calc_offset(tile_ul, (s_row.ul_x, s_row.ul_y),
                                          mask_tx)
                #if abs(offset[0]) > this_size[0] or abs(offset[1] > this_size[1]):

                tile_inds, a_inds = mosaic.get_offset_array_indices(
                    tile_size, set_size, offset)

                # Get feature with maximum importance and fill tile with that val
                try:
                    with open(s_row.dt_file, 'rb') as f:
                        dt_model = pickle.load(f)
                    importance_value = int(
                        dt_model.feature_importances_[v_id] * 100)
                    importance_values.append(importance_value)
                    #filled = np.full((nrows, ncols), importance_value, dtype=np.uint8)
                    #import_band = stem.fill_tile_band(this_size, filled, tile_inds, nodata)
                    import_band = np.full(this_size, np.nan, dtype=np.float16)
                    import_band[tile_inds[0]:tile_inds[1],
                                tile_inds[2]:tile_inds[3]] = importance_value
                    importance_bands.append(import_band)
                except Exception as e:
                    print e
                    continue  #'''

            print 'Average importance for this tile: %.1f' % np.mean(
                importance_values)
            #Aggregate
            importance_stack = np.dstack(importance_bands)
            importance_tile = np.nanmean(importance_stack, axis=2)
            tile_mask = mask[rc.ul_r:rc.lr_r,
                             rc.ul_c:rc.lr_c] | np.isnan(importance_tile)
            importance_tile[tile_mask] = nodata
            ar[rc.ul_r:rc.lr_r,
               rc.ul_c:rc.lr_c] = np.round(importance_tile).astype(np.uint8)
            print 'Aggregation time for this tile: %.1f minutes\n' % (
                (time.time() - t2) / 60)
            '''temp_dir = os.path.join(out_dir, 'delete')
            if not os.path.isdir(temp_dir):
                os.mkdir(temp_dir)
            t_tx = tile_ul[0], 30, 0, tile_ul[1], 0, -30
            array_to_raster(np.round(importance_tile).astype(np.uint8), t_tx, prj, gdal.GetDriverByName('gtiff'), os.path.join(temp_dir, 'delete_%s.tif' % t_ind), gdal.GDT_Byte, 255, True)'''
        out_path = os.path.join(out_dir,
                                '%s_importance_%s.tif' % (model, v_name))
        try:
            array_to_raster(ar, mask_tx, prj, gdal.GetDriverByName('gtiff'),
                            out_path, gdal.GDT_Byte, nodata)
        except Exception as e:
            print e
            import pdb
            pdb.set_trace()
        print 'Time for this variable: %.1f minutes\n' % (
            (time.time() - t1) / 60)

    print '\nTotal time for %s variables: %.1f hours\n' % (len(variables), (
        (time.time() - t0) / 3600))
Exemplo n.º 7
0
def main(region_path,
         tile_path,
         reference_path,
         out_dir,
         id_field='region_id',
         ref_basename='nlcd'):

    df = attributes_to_df(region_path)
    tile_info = attributes_to_df(tile_path)
    tile_info['ul_x'] = tile_info.xmin
    tile_info['lr_x'] = tile_info.xmax
    tile_info['ul_y'] = tile_info.ymax
    tile_info['lr_y'] = tile_info.ymin

    _, vector_ext = os.path.splitext(region_path)
    region_ids = df[id_field].unique()
    n_regions = len(region_ids)

    region_ds = ogr.Open(region_path)
    region_lyr = region_ds.GetLayer()

    for i, r_id in enumerate(region_ids):
        print 'Making region dir for %s (%s of %s)' % (r_id, i, n_regions)
        df_r = df[df.region_id == r_id]
        id_str = ('0' + str(r_id))[-2:]

        fid = df_r.index[0]
        region_feature = region_lyr.GetFeature(fid)
        xmin, xmax, ymin, ymax = region_feature.GetGeometryRef().GetEnvelope()
        region_feature.Destroy()
        df_r['ul_x'] = xmin
        df_r['lr_x'] = xmax
        df_r['ul_y'] = ymax
        df_r['lr_y'] = ymin
        clip_coords = df_r.loc[fid, ['ul_x', 'lr_x', 'ul_y', 'lr_y']]

        region_dir = os.path.join(out_dir, 'region_%s' % id_str)
        if not os.path.exists(region_dir):
            os.mkdir(region_dir)

        # Make a shapefile of the tiles
        out_vector = os.path.join(region_dir,
                                  'tile_{0}{1}'.format(id_str, vector_ext))
        if not os.path.exists(out_vector):
            ''' switch to selection by min/max of coords '''
            region_tiles = tile_info[tile_info[id_field] == r_id]
            coords_to_shp(region_tiles, region_path, out_vector)

        # Make a map of reference NLCD
        ds = gdal.Open(out_vector.replace(vector_ext, '.tif'))
        mask = ds.ReadAsArray() == 255
        ds = None
        nlcd_year = re.search(
            '\d\d\d\d',
            reference_path).group()  # finds the first one (potentially buggy)
        out_ref_map = os.path.join(
            region_dir, '%s_%s_%s.tif' % (ref_basename, nlcd_year, id_str))
        if not False:  #os.path.exists(out_ref_map):
            ref_ds = gdal.Open(reference_path)
            ref_tx = ref_ds.GetGeoTransform()
            ref_shape = ref_ds.RasterYSize, ref_ds.RasterXSize

            col_off = (ref_tx[0] - clip_coords.ul_x) / ref_tx[1]
            row_off = (ref_tx[3] - clip_coords.ul_y) / ref_tx[5]
            n_cols = abs((clip_coords.ul_x - clip_coords.lr_x) / ref_tx[1])
            n_rows = abs((clip_coords.ul_y - clip_coords.lr_y) / ref_tx[1])

            ar_inds, ref_inds = get_offset_array_indices(
                (n_rows, n_cols), ref_shape, (row_off, col_off))
            ref_n_cols = ref_inds[1] - ref_inds[0]
            ref_n_rows = ref_inds[3] - ref_inds[2]

            ar_ref = ref_ds.ReadAsArray(ref_inds[2], ref_inds[0], ref_n_cols,
                                        ref_n_rows)
            ar = np.full((n_rows, n_cols), 255)
            ar[ar_inds[0]:ar_inds[1], ar_inds[2]:ar_inds[3]] = ar_ref
            ar[mask] = 255

            tx = clip_coords.ul_x, 30, 0, clip_coords.ul_y, 0, -30
            prj = ref_ds.GetProjection()
            driver = gdal.GetDriverByName('gtiff')
            array_to_raster(ar, tx, prj, driver, out_ref_map, nodata=255)

        # Make a clipped raster of the tiles
        out_raster = out_vector.replace(vector_ext, '.tif')
        if not os.path.exists(out_raster):
            tiles = ogr.Open(tile_path)
            tile_lyr = tiles.GetLayer()
            tx = clip_coords.ul_x, 30, 0, clip_coords.ul_y, 0, -30
            tile_array, _ = kernel_from_shp(tile_lyr,
                                            clip_coords,
                                            tx,
                                            255,
                                            val_field='name')
            tile_array[ar == 255] = 255
            driver = gdal.GetDriverByName('gtiff')
            prj = tile_lyr.GetSpatialRef().ExportToWkt()
            array_to_raster(tile_array,
                            tx,
                            prj,
                            driver,
                            out_raster,
                            nodata=255)
            tiles.Destroy()