def run(self):
        lulc_nodata = raster_utils.get_nodata_from_uri(GLOBAL_LANDCOVER_URI)

        forest_lulc_codes = [1, 2, 3, 4, 5]

        mask_uri = os.path.join(OUTPUT_DIR, "forest_mask.tif")
        mask_nodata = 2

        def mask_nonforest(lulc):
            """Takes in a numpy array of landcover values and returns 1s
                where they match forest codes and 0 otherwise"""
            mask = numpy.empty(lulc.shape, dtype=numpy.int8)
            mask[:] = 1
            for lulc_code in forest_lulc_codes:
                mask[lulc == lulc_code] = 0
            mask[lulc == lulc_nodata] = mask_nodata
            return mask

        cell_size = raster_utils.get_cell_size_from_uri(GLOBAL_LANDCOVER_URI)
        raster_utils.vectorize_datasets(
            [GLOBAL_LANDCOVER_URI,], mask_nonforest, mask_uri, gdal.GDT_Byte,
            mask_nodata, cell_size, 'intersection', dataset_to_align_index=0,
            dataset_to_bound_index=None, aoi_uri=None,
            assert_datasets_projected=True, process_pool=None,
            vectorize_op=False, datasets_are_pre_aligned=True)

        raster_utils.distance_transform_edt(
            mask_uri, FOREST_EDGE_DISTANCE_URI)
 def run(self):
     nodata = raster_utils.get_nodata_from_uri(UNION_LANDCOVER_URI)
     cell_size = raster_utils.get_cell_size_from_uri(UNION_LANDCOVER_URI)
     raster_utils.vectorize_datasets(
         [UNION_LANDCOVER_URI, UNION_BIOMASS_URI], lambda x, y: x,
         GLOBAL_LANDCOVER_URI,
         gdal.GDT_Int16, nodata, cell_size, "intersection",
         dataset_to_align_index=0, vectorize_op=False)
def _align_raster_with_biomass(input_uri, output_uri):
    """Function to use internally to take an input and align it with the
        GLOBAL_BIOMASS_URI raster"""
    nodata = raster_utils.get_nodata_from_uri(input_uri)
    if nodata is None:
        nodata = -9999
    cell_size = raster_utils.get_cell_size_from_uri(GLOBAL_BIOMASS_URI)
    raster_utils.vectorize_datasets(
        [input_uri, GLOBAL_BIOMASS_URI], lambda x, y: x,
        output_uri, gdal.GDT_Float32, nodata, cell_size, "dataset",
        dataset_to_bound_index=1, vectorize_op=False)
    def run(self):
        def union_op(*array_list):
            """Given an array stack return an array that has a value defined
                in the stack that is not nodata.  used for overlapping nodata
                stacks."""
            output_array = array_list[0]
            for array in array_list[1:]:
                output_array = numpy.where(
                    array != nodata, array, output_array)
            return output_array
        nodata = raster_utils.get_nodata_from_uri(self.dataset_uri_list[0])
        cell_size = raster_utils.get_cell_size_from_uri(
            self.dataset_uri_list[0])

        raster_utils.vectorize_datasets(
            list(self.dataset_uri_list), union_op, self.dataset_out_uri,
            gdal.GDT_Int16, nodata, cell_size, "union",
            dataset_to_align_index=0, vectorize_op=False)
	def run(self):
		biomass_raster_list = [
			"C:/Users/rpsharp/Dropbox_stanford/Dropbox/forest_edge_carbon/af_biov2ct1.tif",
			"C:/Users/rpsharp/Dropbox_stanford/Dropbox/forest_edge_carbon/am_biov2ct1.tif",
			"C:/Users/rpsharp/Dropbox_stanford/Dropbox/forest_edge_carbon/as_biov2ct1.tif",
		]

		nodata = raster_utils.get_nodata_from_uri(biomass_raster_list[0])
		cell_size = raster_utils.get_cell_size_from_uri(biomass_raster_list[0])

		def union_op(*biomass_array_list):
			output_array = biomass_array_list[0]
			for biomass_array in biomass_array_list[1:]:
				output_array = numpy.where(
					biomass_array != nodata, biomass_array, output_array)
			return output_array

		raster_utils.create_directories([os.path.dirname(self.output_uri)])

		raster_utils.vectorize_datasets(
	        biomass_raster_list, union_op, self.output_uri, gdal.GDT_Int16,
	        nodata, cell_size, 'union', dataset_to_align_index=0,
	        vectorize_op=False)
def process_ecoregion(prefix):
    ecoregion_shapefile_uri = os.path.join(
        DATA_DIR, 'ecoregions', 'ecoregions_projected.shp')

    ecoregion_lookup = raster_utils.extract_datasource_table_by_key(
        ecoregion_shapefile_uri, 'ECO_ID_U')
    ecoregion_nodata = -1
    ecoregion_lookup[ecoregion_nodata] = {
        'ECO_NAME': 'UNKNOWN',
        'ECODE_NAME': 'UNKNOWN',
        'WWF_MHTNAM': 'UNKNOWN',
        }

    lulc_raw_uri = os.path.join(DATA_DIR, '%s%s' % (prefix, LULC_BASE))
    biomass_raw_uri = os.path.join(DATA_DIR, '%s%s' % (prefix, BIOMASS_BASE))

    cell_size = raster_utils.get_cell_size_from_uri(lulc_raw_uri)

    lulc_uri = os.path.join(OUTPUT_DIR, "%s_lulc_aligned.tif" % (prefix))
    biomass_uri = os.path.join(OUTPUT_DIR, "%s_biomass_aligned.tif" % (prefix))

    raster_utils.align_dataset_list(
        [lulc_raw_uri, biomass_raw_uri], [lulc_uri, biomass_uri], ['nearest']*2,
        cell_size, 'intersection', 0, dataset_to_bound_index=None,
        aoi_uri=None, assert_datasets_projected=True, process_pool=None)

    #create ecoregion id
    ecoregion_dataset_uri = os.path.join(
        OUTPUT_DIR, "%s_ecoregion_id.tif" % (prefix))
    raster_utils.new_raster_from_base_uri(
        lulc_uri, ecoregion_dataset_uri, 'GTiff', ecoregion_nodata, gdal.GDT_Int16)
    raster_utils.rasterize_layer_uri(
        ecoregion_dataset_uri, ecoregion_shapefile_uri,
        option_list=["ATTRIBUTE=ECO_ID_U"])

    lulc_nodata = raster_utils.get_nodata_from_uri(lulc_uri)

    forest_lulc_codes = [1, 2, 3, 4, 5]

    mask_uri = os.path.join(OUTPUT_DIR, "%s_mask.tif" % prefix)
    mask_nodata = 2

    def mask_nonforest(lulc):
        mask = numpy.empty(lulc.shape, dtype=numpy.int8)
        mask[:] = 1
        for lulc_code in forest_lulc_codes:
            mask[lulc == lulc_code] = 0
        mask[lulc == lulc_nodata] = mask_nodata
        return mask

    raster_utils.vectorize_datasets(
        [lulc_uri,], mask_nonforest, mask_uri, gdal.GDT_Byte,
        mask_nodata, cell_size, 'intersection', dataset_to_align_index=0,
        dataset_to_bound_index=None, aoi_uri=None,
        assert_datasets_projected=True, process_pool=None, vectorize_op=False,
        datasets_are_pre_aligned=True)

    forest_edge_distance_uri = os.path.join(OUTPUT_DIR, "%s_forest_edge.tif" % prefix)
    raster_utils.distance_transform_edt(mask_uri, forest_edge_distance_uri)

    biomass_stats_uri = os.path.join(OUTPUT_DIR, "%s_biomass_stats.csv" % prefix)
    _aggregate_results(forest_edge_distance_uri, biomass_uri, ecoregion_dataset_uri, ecoregion_lookup, biomass_stats_uri)
def _aggregate_results(forest_edge_distance_uri, biomass_uri, ecoregion_dataset_uri, ecoregion_lookup, biomass_stats_uri):
    cell_size = raster_utils.get_cell_size_from_uri(forest_edge_distance_uri)

    forest_edge_nodata = raster_utils.get_nodata_from_uri(forest_edge_distance_uri)
    biomass_nodata = raster_utils.get_nodata_from_uri(biomass_uri)    

    outfile = open(biomass_stats_uri, 'w')

    ecoregion_dataset = gdal.Open(ecoregion_dataset_uri)
    ecoregion_band = ecoregion_dataset.GetRasterBand(1)

    biomass_ds = gdal.Open(biomass_uri, gdal.GA_ReadOnly)
    biomass_band = biomass_ds.GetRasterBand(1)

    forest_edge_distance_ds = gdal.Open(forest_edge_distance_uri)
    forest_edge_distance_band = forest_edge_distance_ds.GetRasterBand(1)

    n_rows, n_cols = raster_utils.get_row_col_from_uri(biomass_uri)

    base_srs = osr.SpatialReference(biomass_ds.GetProjection())
    lat_lng_srs = base_srs.CloneGeogCS()
    coord_transform = osr.CoordinateTransformation(
        base_srs, lat_lng_srs)
    gt = biomass_ds.GetGeoTransform()

    grid_resolution_list = [25, 50, 100, 150, 200, 300, 400, 500]
    grid_coordinates = dict((resolution, {}) for resolution in grid_resolution_list)

    block_col_size, block_row_size = biomass_band.GetBlockSize()
    n_global_block_rows = int(math.ceil(float(n_rows) / block_row_size))
    n_global_block_cols = int(math.ceil(float(n_cols) / block_col_size))
    
    last_time = time.time()
    for global_block_row in xrange(n_global_block_rows):
        current_time = time.time()
        if current_time - last_time > 5.0:
            print "aggregation %.1f%% complete" % (global_block_row / float(n_global_block_rows) * 100)
            last_time = current_time
        for global_block_col in xrange(n_global_block_cols):
            xoff = global_block_col * block_col_size
            yoff = global_block_row * block_row_size
            win_xsize = min(block_col_size, n_cols - xoff)
            win_ysize = min(block_row_size, n_rows - yoff)
            biomass_block = biomass_band.ReadAsArray(
                xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize)
            forest_edge_distance_block = forest_edge_distance_band.ReadAsArray(
                xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize)
            ecoregion_id_block = ecoregion_band.ReadAsArray(
                xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize)

            for global_row in xrange(global_block_row*block_row_size, min((global_block_row+1)*block_row_size, n_rows)):
                for global_col in xrange(global_block_col*block_col_size, min((global_block_col+1)*block_col_size, n_cols)):
                    row_coord = gt[3] + global_row * gt[5]    
                    col_coord = gt[0] + global_col * gt[1]

                    local_row = global_row - global_block_row * block_row_size
                    local_col = global_col - global_block_col * block_col_size

                    lng_coord, lat_coord, _ = coord_transform.TransformPoint(
                        col_coord, row_coord)

                    #normalize the coordinates so they don't go negative
                    global_grid_row = row_coord - GLOBAL_UPPER_LEFT_ROW
                    global_grid_col = col_coord - GLOBAL_UPPER_LEFT_COL

                    ecoregion_id = ecoregion_id_block[local_row, local_col]
                    if (forest_edge_distance_block[local_row, local_col] != forest_edge_nodata and
                            forest_edge_distance_block[local_row, local_col] > 0.0 and
                            biomass_block[local_row, local_col] != biomass_nodata):

                        outfile.write("%f;%f;%f;%f;%s;%s;%s" % (
                            forest_edge_distance_block[local_row, local_col] * cell_size,
                            biomass_block[local_row, local_col], lat_coord, lng_coord,
                            ecoregion_lookup[ecoregion_id]['ECO_NAME'],
                            ecoregion_lookup[ecoregion_id]['ECODE_NAME'],
                            ecoregion_lookup[ecoregion_id]['WWF_MHTNAM']))
                        outfile.write(";%f;%f" % (global_grid_row, global_grid_col))
                        for global_grid_resolution in grid_resolution_list:
                            #output a grid coordinate in the form 'grid_row-grid_col'
                            grid_row = int(global_grid_row/(global_grid_resolution*1000))
                            grid_col = int(global_grid_col/(global_grid_resolution*1000))
                            grid_id = str(grid_row) + '-' + str(grid_col)
                            outfile.write(";%s" % grid_id)
                            if grid_id not in grid_coordinates[global_grid_resolution]:
                                grid_row_center = grid_row * global_grid_resolution*1000 + GLOBAL_UPPER_LEFT_ROW
                                grid_col_center = grid_col * global_grid_resolution*1000 + GLOBAL_UPPER_LEFT_COL
                                grid_lng_coord, grid_lat_coord, _ = coord_transform.TransformPoint(
                                    grid_col_center, grid_row_center)
                                grid_coordinates[global_grid_resolution][grid_id] = (grid_lat_coord, grid_lng_coord)
                                print grid_lat_coord, grid_lng_coord
                        outfile.write('/n')
    outfile.close()
    for global_grid_resolution in grid_resolution_list:
        output_dir, base_filename = os.path.split(biomass_stats_uri)
        basename = os.path.basename(base_filename)
        grid_output_file = open(os.path.join(output_dir, basename + '_' + str(global_grid_resolution) + '.csv'), 'w')
        grid_output_file.write('grid id;lat_coord;lng_coord/n')
        open(biomass_stats_uri, 'w')
        for grid_id, (lat, lng) in grid_coordinates[global_grid_resolution].iteritems():
            grid_output_file.write('%s;%s;%s/n' % (grid_id, lat, lng))
        grid_output_file.close()
def _map_intensity(forest_edge_distance_uri, biomass_uri):
    grid_resolution_list = [25, 50, 100, 150, 200, 300, 400, 500]
    
    forest_edge_distance_ds = gdal.Open(forest_edge_distance_uri)
    forest_edge_distance_band = forest_edge_distance_ds.GetRasterBand(1)
    forest_edge_distance_nodata = raster_utils.get_nodata_from_uri(forest_edge_distance_uri)

    biomass_ds = gdal.Open(biomass_uri)
    biomass_band = biomass_ds.GetRasterBand(1)
    biomass_nodata = raster_utils.get_nodata_from_uri(biomass_uri)

    n_rows = biomass_ds.RasterYSize
    n_cols = biomass_ds.RasterXSize

    projection = biomass_ds.GetProjection()
    geotransform = biomass_ds.GetGeoTransform()
    driver = gdal.GetDriverByName('GTiff')

    gt = biomass_ds.GetGeoTransform()

    for grid_resolution in grid_resolution_list:

        output_dir, base_filename = os.path.split(biomass_uri)
        basename = os.path.basename(base_filename)

        output_uri = os.path.join(
            output_dir, basename + '_intensity_' + str(grid_resolution) + '.tif')

        n_rows_grid = int(-gt[5] * n_rows / (grid_resolution * 1000.0))
        n_cols_grid = int(gt[1] * n_cols / (grid_resolution * 1000.0))

        new_geotransform = (
            gt[0], grid_resolution * 1000.0, gt[2],
            gt[3], gt[4], -grid_resolution * 1000.0)

        
        output_ds = driver.Create(
            output_uri.encode('utf-8'), n_cols_grid, n_rows_grid, 1, gdal.GDT_Float32)
        output_ds.SetProjection(projection)
        output_ds.SetGeoTransform(new_geotransform)
        output_band = output_ds.GetRasterBand(1)

        output_nodata = -1
        output_band.SetNoDataValue(output_nodata)
        output_band.Fill(output_nodata)

        last_time = time.time()
        for grid_row_index in xrange(n_rows_grid):
            current_time = time.time()
            if current_time - last_time > 5.0:
                print "magnitude %.1f%% complete" % (grid_row_index / float(n_rows_grid) * 100)
                last_time = current_time
            for grid_col_index in xrange(n_cols_grid):
                xoff = int(grid_col_index * (grid_resolution * 1000.0) / (gt[1]))
                yoff = int(grid_row_index * (grid_resolution * 1000.0) / (-gt[5]))
                win_xsize = int((grid_resolution * 1000.0) / (gt[1]))
                win_ysize = int((grid_resolution * 1000.0) / (gt[1]))

                biomass_block = biomass_band.ReadAsArray(
                    xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize)
                forest_edge_distance_block = forest_edge_distance_band.ReadAsArray(
                    xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize)

                valid_mask = numpy.where(
                    (forest_edge_distance_block != forest_edge_distance_nodata) &
                    (biomass_block != biomass_nodata))

                flat_valid_biomass = biomass_block[valid_mask]

                sorted_forest_edge = numpy.argsort(flat_valid_biomass)
                flat_biomass = flat_valid_biomass[sorted_forest_edge]

                n_elements = flat_biomass.size
                if n_elements <= 10:
                    continue
                lower_biomass = numpy.average(flat_biomass[0:int(n_elements*0.1)])
                upper_biomass = numpy.average(flat_biomass[int(n_elements*0.9):n_elements])

                if lower_biomass == 0:
                    continue

                magnitude = upper_biomass/lower_biomass

                output_band.WriteArray(
                    numpy.array([[magnitude]]),
                    xoff=grid_col_index, yoff=grid_row_index)
    def run(self):
        ecoregion_lookup = raster_utils.extract_datasource_table_by_key(
            ECOREGION_SHAPEFILE_URI, 'ECO_ID_U')
        ecoregion_nodata = -1
        ecoregion_lookup[ecoregion_nodata] = {
            'ECO_NAME': 'UNKNOWN',
            'ECODE_NAME': 'UNKNOWN',
            'WWF_MHTNAM': 'UNKNOWN',
            }
        cell_size = raster_utils.get_cell_size_from_uri(
            FOREST_EDGE_DISTANCE_URI)
        forest_edge_nodata = raster_utils.get_nodata_from_uri(
            FOREST_EDGE_DISTANCE_URI)
        biomass_nodata = raster_utils.get_nodata_from_uri(GLOBAL_BIOMASS_URI)
        outfile = open(BIOMASS_STATS_URI, 'w')

        ecoregion_dataset = gdal.Open(ECOREGION_DATASET_URI)
        ecoregion_band = ecoregion_dataset.GetRasterBand(1)

        biomass_ds = gdal.Open(GLOBAL_BIOMASS_URI, gdal.GA_ReadOnly)
        biomass_band = biomass_ds.GetRasterBand(1)

        forest_edge_distance_ds = gdal.Open(FOREST_EDGE_DISTANCE_URI)
        forest_edge_distance_band = forest_edge_distance_ds.GetRasterBand(1)

        n_rows, n_cols = raster_utils.get_row_col_from_uri(GLOBAL_BIOMASS_URI)

        base_srs = osr.SpatialReference(biomass_ds.GetProjection())
        lat_lng_srs = base_srs.CloneGeogCS()
        coord_transform = osr.CoordinateTransformation(
            base_srs, lat_lng_srs)
        geo_trans = biomass_ds.GetGeoTransform()

        block_col_size, block_row_size = biomass_band.GetBlockSize()
        n_global_block_rows = int(math.ceil(float(n_rows) / block_row_size))
        n_global_block_cols = int(math.ceil(float(n_cols) / block_col_size))

        last_time = time.time()
        for global_block_row in xrange(n_global_block_rows):
            current_time = time.time()
            if current_time - last_time > 5.0:
                print (
                    "aggregation %.1f%% complete" %
                    (global_block_row / float(n_global_block_rows) * 100))
                last_time = current_time
            for global_block_col in xrange(n_global_block_cols):
                xoff = global_block_col * block_col_size
                yoff = global_block_row * block_row_size
                win_xsize = min(block_col_size, n_cols - xoff)
                win_ysize = min(block_row_size, n_rows - yoff)
                biomass_block = biomass_band.ReadAsArray(
                    xoff=xoff, yoff=yoff, win_xsize=win_xsize,
                    win_ysize=win_ysize)
                forest_edge_distance_block = (
                    forest_edge_distance_band.ReadAsArray(
                        xoff=xoff, yoff=yoff, win_xsize=win_xsize,
                        win_ysize=win_ysize))
                ecoregion_id_block = ecoregion_band.ReadAsArray(
                    xoff=xoff, yoff=yoff, win_xsize=win_xsize,
                    win_ysize=win_ysize)

                for global_row in xrange(
                        global_block_row*block_row_size,
                        min((global_block_row+1)*block_row_size, n_rows)):
                    for global_col in xrange(
                            global_block_col*block_col_size,
                            min((global_block_col+1)*block_col_size, n_cols)):
                        row_coord = (
                            geo_trans[3] + global_row * geo_trans[5])
                        col_coord = (
                            geo_trans[0] + global_col * geo_trans[1])

                        local_row = (
                            global_row - global_block_row * block_row_size)
                        local_col = (
                            global_col - global_block_col * block_col_size)

                        lng_coord, lat_coord, _ = (
                            coord_transform.TransformPoint(
                                col_coord, row_coord))

                        ecoregion_id = ecoregion_id_block[local_row, local_col]
                        if (forest_edge_distance_block[local_row, local_col] !=
                                forest_edge_nodata and
                                forest_edge_distance_block
                                [local_row, local_col] > 0.0 and
                                biomass_block
                                [local_row, local_col] != biomass_nodata):
                            outfile.write("%f;%f;%f;%f;%s;%s;%s" % (
                                forest_edge_distance_block
                                [local_row, local_col] * cell_size,
                                biomass_block[local_row, local_col],
                                lat_coord, lng_coord,
                                ecoregion_lookup[ecoregion_id]['ECO_NAME'],
                                ecoregion_lookup[ecoregion_id]['ECODE_NAME'],
                                ecoregion_lookup[ecoregion_id]['WWF_MHTNAM']))
                            for global_grid_resolution in GRID_RESOLUTION_LIST:
                                #output a grid coordinate in the form
                                #'grid_row-grid_col'
                                grid_row = (
                                    int((geo_trans[3] - row_coord) /
                                        (global_grid_resolution*1000)))
                                grid_col = (
                                    int((col_coord - geo_trans[0]) /
                                        (global_grid_resolution*1000)))
                                grid_id = str(grid_row) + '-' + str(grid_col)
                                outfile.write(";%s" % grid_id)
                            outfile.write('\n')
        outfile.close()