def snap_array(ds_in, ds_snap, tx_in, tx_snap, in_nodata, out_nodata, mask_val=None): ar_in = ds_in.ReadAsArray() if mask_val is not None: ar_snap = ds_snap.ReadAsArray() in_shape = ar_in.shape out_shape = ds_snap.RasterYSize, ds_snap.RasterXSize offset = calc_offset((tx_snap[0], tx_snap[3]), tx_in) snap_inds, in_inds = get_offset_array_indices(out_shape, in_shape, offset) np_dtype = ar_in.dtype ar = np.full(out_shape, out_nodata, dtype=np_dtype) ar_in[ar_in == in_nodata] = out_nodata ar[snap_inds[0]:snap_inds[1], snap_inds[2]:snap_inds[3]] = ar_in[in_inds[0]:in_inds[1], in_inds[2]:in_inds[3]] if mask_val is not None: mask_val = int(mask_val) ar[ar_snap == mask_val] = out_nodata return ar
def main(sample_txt, ref_raster, pred_raster, p_nodata, t_nodata, target_col, bins, out_txt, match=None, predict_col=None): p_nodata = int(p_nodata) t_nodata = int(t_nodata) ds_p = gdal.Open(pred_raster) ar_p = ds_p.ReadAsArray() ds_r = gdal.Open(ref_raster) ar_r = ds_r.ReadAsArray() r_xsize = ds_r.RasterXSize r_ysize = ds_r.RasterYSize p_xsize = ds_p.RasterXSize p_ysize = ds_p.RasterYSize tx_r = ds_r.GetGeoTransform() tx_p = ds_p.GetGeoTransform() # If two arrays are different sizes, make prediction array match reference if not r_xsize == p_xsize or r_ysize == p_ysize or tx_r != tx_p: warnings.warn('Prediction and reference rasters do not share the same extent. Snapping prediction raster to reference....') offset = mosaic.calc_offset((tx_r[0], tx_r[3]), tx_p) t_inds, p_inds = mosaic.get_offset_array_indices((r_ysize, r_xsize), (p_ysize, p_xsize), offset) ar_buf = np.full(ar_r.shape, p_nodata, dtype=ar_p.dtype) ar_buf[t_inds[0]:t_inds[1], t_inds[2]:t_inds[3]] = ar_p[p_inds[0]:p_inds[1], p_inds[2]:p_inds[3]] ar_p = ar_buf.copy() del ar_buf bins = parse_bins(bins) sample = pd.read_csv(sample_txt, sep='\t') if target_col in sample.columns: t_sample = sample[target_col] else: raise IndexError('target_col "%s" not in sample' % target_col) if match: t_sample, p_sample = get_samples(ar_p, ar_r, p_nodata, t_nodata, sample, match=match) elif predict_col: p_sample = sample[predict_col] else: p_sample = ar_p[sample.row, sample.col] t_sample = ar_r[sample.row, sample.col] rmse = area_weighted_rmse(ar_p, ar_r, p_sample, t_sample, bins, p_nodata, out_txt=out_txt) return rmse
def main(params, ar_p=None, out_txt=None, inventory_txt=None, target_col=None, match=False, file_stamp=None): #p_path, t_path, bins, sample_txt, p_nodata, t_nodata, out_dir, inventory_txt=None # Read params and make variables from text inputs = read_params(params) for i in inputs: exec("{0} = str({1})").format(i, inputs[i]) # Check that variables were specified in params try: bins = parse_bins(bins) p_nodata = int(p_nodata) t_nodata = int(t_nodata) str_check = sample_txt #, target_col except NameError as e: print '' missing_var = str(e).split("'")[1] msg = "Variable '%s' not specified in param file:\n%s" % (missing_var, params) raise NameError(msg) #if out_dir_: # then out_dir came from predict_stem call # out_dir = out_dir_ #out_txt = os.path.join(out_dir, 'confusion.txt') if out_txt: out_dir = os.path.dirname(out_txt) if not os.path.exists(out_dir): os.mkdir(out_dir) shutil.copy2(params, out_dir) # If p_path was specified, this call of the function is coming from outside # predict_stem.py. Otherwise, ar_p should be given. if 'p_path' in locals(): print 'Reading in the prediction raster:%s\n' % p_path ds_p = gdal.Open(p_path) ar_p = ds_p.ReadAsArray() ds_t = gdal.Open(t_path) band = ds_t.GetRasterBand(1) ar_t = band.ReadAsArray() #ar_t=ar_t.GetRasterBand(1) #print('read in the truth raster') t_xsize = ds_t.RasterXSize #print('t_xsize is: ', t_xsize) t_ysize = ds_t.RasterYSize #print('tYsize is: ', t_ysize) p_xsize = ds_p.RasterXSize #print('p_xsize is: ', p_xsize) p_ysize = ds_p.RasterYSize #print('p_ysize is: ', p_ysize) tx_t = ds_t.GetGeoTransform() tx_p = ds_p.GetGeoTransform() # If two arrays are different sizes, make prediction array match reference if not t_xsize == p_xsize or t_ysize == p_ysize or tx_t != tx_p: print('entered if statement') warnings.warn( 'Prediction and reference rasters do not share the same extent. Snapping prediction raster to reference....' ) offset = mosaic.calc_offset((tx_t[0], tx_t[3]), tx_p) #print(offset) t_inds, p_inds = mosaic.get_offset_array_indices( (t_ysize, t_xsize), (p_ysize, p_xsize), offset) print(t_inds, p_inds) ar_buf = np.full(ar_t.shape, p_nodata, dtype=ar_p.dtype) print ar_buf.shape ar_buf[t_inds[0]:t_inds[1], t_inds[2]:t_inds[3]] = ar_p[p_inds[0]:p_inds[1], p_inds[2]:p_inds[3]] ar_p = ar_buf.copy() del ar_buf mask = (ar_p == p_nodata) | (ar_t == t_nodata) #''' samples = pd.read_csv(sample_txt, sep='\t', index_col='obs_id') print samples df_adj, df_smp = confusion_matrix_by_area(ar_p, ar_t, samples, p_nodata, t_nodata, mask=mask, bins=bins, out_txt=out_txt, target_col=target_col, match=match) ar_p = None ar_t = None mask = None accuracy = df_adj.ix['producer', 'user'] kappa = df_adj.ix['producer', 'kappa'] if inventory_txt and file_stamp: df_inv = pd.read_csv(inventory_txt, sep='\t', index_col='stamp') if file_stamp in df_inv.index and 'vote' in os.path.basename(out_dir): cols = ['vote_accuracy', 'vote_kappa'] df_inv.ix[file_stamp, cols] = accuracy, kappa df_inv.to_csv(inventory_txt, sep='\t') print 'Vote scores written to inventory_txt: ', inventory_txt if file_stamp in df_inv.index and 'mean' in os.path.basename(out_dir): cols = ['mean_accuracy', 'mean_kappa'] df_inv.ix[file_stamp, cols] = accuracy, kappa df_inv.to_csv(inventory_txt, sep='\t') return df_smp
def main(in_raster, snap_raster, in_nodata, out_nodata, out_path=None, mask_val=None, overwrite=False): t0 = time.time() in_nodata = int(in_nodata) out_nodata = int(out_nodata) print '\nOpening datasets... ' t1 = time.time() ds_in = gdal.Open(in_raster) ar_in = ds_in.ReadAsArray() tx_in = ds_in.GetGeoTransform() #driver = ds_in.GetDriver() ds_in = None ds_snap = gdal.Open(snap_raster) ar_snap = ds_snap.ReadAsArray() tx_snap = ds_snap.GetGeoTransform() prj = ds_snap.GetProjection() ds_snap = None print '%.1f seconds\n' % (time.time() - t1) print 'Snapping input raster...' t1 = time.time() offset = calc_offset((tx_snap[0], tx_snap[3]), tx_in) snap_inds, in_inds = get_offset_array_indices(ar_snap.shape, ar_in.shape, offset) np_dtype = ar_in.dtype ar = np.full(ar_snap.shape, out_nodata, dtype=np_dtype) ar_in[ar_in == in_nodata] = out_nodata ar[snap_inds[0]:snap_inds[1], snap_inds[2]:snap_inds[3]] = ar_in[in_inds[0]:in_inds[1], in_inds[2]:in_inds[3]] if mask_val: mask_val = int(mask_val) ar[ar_snap == mask_val] = out_nodata print '%.1f seconds\n' % (time.time() - t1) if out_path: if ar.max() <= 255 and ar.min() >= 0: gdal_dtype = gdal.GDT_Byte else: gdal_dtype = gdal.GDT_Int16 if os.path.exists(out_path) and not overwrite: sys.exit('out_path already exists') driver = get_gdal_driver(out_path) array_to_raster(ar, tx_snap, prj, driver, out_path, gdal_dtype, out_nodata) # Write metadata desc = ('Input raster %s snapped to the extent of %s.') % (in_raster, snap_raster) if mask_val: desc += ' Data were masked from snap raster with value %s.' % mask_val createMetadata(sys.argv, out_path, description=desc) else: return ar print '\nTotal time to snap raster: %.1f seconds\n' % (time.time() - t0)
def snap_by_tile(ds_in, ds_snap, tiles, tx_snap, tx_in, in_nodata, out_nodata, out_dir, mask_val=None): prj = ds_in.GetProjection() driver = gdal.GetDriverByName('gtiff') if mask_val is not None: mask_val = int(mask_val) row_off, col_off = calc_offset((tx_snap[0], tx_snap[3]), tx_in) in_size = ds_in.RasterYSize, ds_in.RasterXSize n_tiles = float(len(tiles)) t1 = time.time() msg = '\rProccessing tile %d/%d (%.1f%%) || %.1f/~%.1f minutes' template = os.path.join(out_dir, 'tile_%s.pkl') mins = [] maxs = [] for i, (tile_id, coords) in enumerate(tiles.iterrows()): tile_off = row_off - coords.ul_r, col_off - coords.ul_c tile_size = coords.lr_r - coords.ul_r, coords.lr_c - coords.ul_c tile_inds, in_inds = get_offset_array_indices(tile_size, in_size, tile_off) in_ulr, in_lrr, in_ulc, in_lrc = in_inds in_xsize = in_lrc - in_ulc in_ysize = in_lrr - in_ulr if in_xsize <= 0 or in_ysize <= 0: # They don't overlap continue ar_in = ds_in.ReadAsArray(in_ulc, in_ulr, in_xsize, in_ysize) if np.all(ar_in == in_nodata): continue ar_out = np.full(tile_size, out_nodata, dtype=ar_in.dtype) ar_out[tile_inds[0]:tile_inds[1], tile_inds[2]:tile_inds[3]] = ar_in ar_out[ar_out == in_nodata] = out_nodata if mask_val is not None: mask = ds_snap.ReadAsArray(coords.ul_c, coords.ul_r, tile_size[1], tile_size[0]) == mask_val ar_out[mask] = out_nodata out_path = template % tile_id with open(out_path, 'wb') as f: pickle.dump(ar_out, f, protocol=-1) mins.append(ar_out.min()) maxs.append(ar_out.max()) tiles.loc[tile_id, 'file'] = out_path cum_time = (time.time() - t1) / 60. est_time = cum_time / (i + 1) * (n_tiles - i) # estimate remaing time sys.stdout.write(msg % (i + 1, n_tiles, (i + 1) / n_tiles * 100, cum_time, est_time)) sys.stdout.flush() '''ulx, xres, _, uly, _, yres = tx_snap tx = coords.ul_c * xres + ulx, xres, 0, coords.ul_r * yres + uly, 0, yres array_to_raster(ar_out, tx, prj, driver, '/home/server/pi/homes/shooper/delete/tile_%s.tif' % tile_id, gdal.GDT_Int16, out_nodata)''' dtype = get_min_numpy_dtype(np.array(mins + maxs)) return dtype