def extract_by_kernel(ar, rows, cols, data_type, col_name, nodata): row_dirs = [-1,-1,-1, 0, 0, 0, 1, 1, 1] col_dirs = [-1, 0, 1,-1, 0, 1,-1, 0, 1] kernel_rows = [row + d + 1 for row in rows for d in row_dirs] kernel_cols = [col + d + 1 for col in cols for d in col_dirs] ar_buf = np.full([dim + 2 for dim in ar.shape], nodata, dtype=np.int32) ar_buf[1:-1, 1:-1] = ar del ar kernel_vals = ar_buf[kernel_rows, kernel_cols].reshape(len(rows), len(row_dirs)) train_stats = pd.DataFrame(extract.calc_row_stats(kernel_vals, data_type, col_name, nodata)) vals = train_stats[col_name].astype(np.int32) return vals
def get_samples(ar_p, ar_t, p_nodata, t_nodata, samples=None, match='best_match'): t0 = time.time() # If samples weren't provided (i.e., wall-to-wall eval), samples are every # non-nodata pixel #if not samples: if not isinstance(samples, pd.core.frame.DataFrame): ar_rows, ar_cols = np.indices(ar_p.shape) mask = (ar_p != p_nodata) & (ar_t != t_nodata) samples = pd.DataFrame({'row': ar_rows[mask], 'col': ar_cols[mask]}) del ar_rows, ar_cols, mask # Get the row and col indices of the kernel surrounding all samples row_dirs = [-1, -1, -1, 0, 0, 0, 1, 1, 1] col_dirs = [-1, 0, 1, -1, 0, 1, -1, 0, 1] kernel_rows = [row + d + 1 for row in samples.row for d in row_dirs] #+1 because buffering at edges kernel_cols = [col + d + 1 for col in samples.col for d in col_dirs] ar_buf = np.full([dim + 2 for dim in ar_p.shape], p_nodata, dtype=ar_p.dtype) print('ar_buf is here', ar_buf) ar_buf[1:-1, 1:-1] = ar_p print(len(samples)) print(len(row_dirs)) p_kernel = ar_buf[kernel_rows, kernel_cols].reshape(len(samples), len(row_dirs)) ar_buf[1:-1, 1:-1] = ar_t t_kernel = ar_buf[kernel_rows, kernel_cols].reshape(len(samples), len(row_dirs)) del ar_buf if 'best' in match: print('Finding best match for each sample in a 3 x 3 kernel...') p_samples = ar_p[samples.row, samples.col] sample_mask = p_samples != p_nodata p_samples = p_samples[sample_mask] n_samples = p_samples.size t_kernel = t_kernel[sample_mask, :].reshape( n_samples, len(row_dirs)).astype(float) t_kernel[t_kernel == t_nodata] = np.nan # Find the pixel in each kernel with the lowest difference # Subtract sampled prediction value from each pixel in the kernel dif = np.abs( np.apply_along_axis(lambda x: x - p_samples, axis=0, arr=t_kernel)) try: dif[np.isnan(dif)] = np.nanmax( dif ) + 1 #can't keep as nan because some rows could be all nan so nanargmin() will raise an error except: import pdb pdb.set_trace() pxl_ind = np.argmin(dif, axis=1) t_samples = t_kernel[xrange(n_samples), pxl_ind] #Rather than xrange, maybe use : sample_mask = ~np.isnan(t_samples) t_samples = t_samples[sample_mask] p_samples = p_samples[sample_mask] print('Time to get samples: %.1f seconds\n' % (time.time() - t0)) else: print('Getting average prediction sample vals for 3 x 3 kernel... ') test_stats = pd.DataFrame( calc_row_stats(p_kernel, 'continuous', 'value', p_nodata)) sample_mask = ~test_stats.value.isnull( ).values # Where value is not null p_samples = test_stats.value.values # Get values as np array p_samples = p_samples[sample_mask].astype(np.int32) del test_stats, p_kernel test_stats = pd.DataFrame( calc_row_stats(t_kernel, 'continuous', 'value', t_nodata)) t_samples = test_stats.value.values[sample_mask].astype(np.int32) print('Time to get samples: %.1f seconds\n' % (time.time() - t0)) #''' return t_samples, p_samples