def test_get_valid_mask(): n_bands, n_images, n_mask = 8, 500, 50 data = np.random.randint(0, 10000, size=(n_bands, n_images)).astype(np.int32) # Add in bad data _idx = np.arange(0, n_images) for b in range(n_bands): idx = np.random.choice(_idx, size=n_mask, replace=False) data[b, idx] = 16000 mins = np.repeat(0, n_bands).astype(np.int16) maxes = np.repeat(10000, n_bands).astype(np.int16) truth = np.all([((b >= _min) & (b <= _max)) for b, _min, _max in zip(data, mins, maxes)], axis=0) test = cyprep.get_valid_mask(data, mins, maxes).astype(np.bool) test_min = data[:, test].min(axis=1) test_max = data[:, test].max(axis=1) assert np.array_equal(truth, test) assert np.array_equal(data[:, truth].min(axis=1), test_min) assert np.array_equal(data[:, truth].max(axis=1), test_max) assert np.all(test_min >= mins) assert np.all(test_max <= maxes)
def line(ctx, config, job_number, total_jobs, resume, check_cache, do_not_run, verbose_yatsm): if verbose_yatsm: logger_algo.setLevel(logging.DEBUG) # Parse config cfg = parse_config_file(config) if ('phenology' in cfg and cfg['phenology'].get('enable')) and not pheno: click.secho('Could not import yatsm.phenology but phenology metrics ' 'are requested', fg='red') click.secho('Error: %s' % pheno_exception, fg='red') raise click.Abort() # Make sure output directory exists and is writable output_dir = cfg['dataset']['output'] try: os.makedirs(output_dir) except OSError as e: # File exists if e.errno == 17: pass elif e.errno == 13: click.secho('Cannot create output directory %s' % output_dir, fg='red') raise click.Abort() if not os.access(output_dir, os.W_OK): click.secho('Cannot write to output directory %s' % output_dir, fg='red') raise click.Abort() # Test existence of cache directory read_cache, write_cache = test_cache(cfg['dataset']) logger.info('Job {i} of {n} - using config file {f}'.format(i=job_number, n=total_jobs, f=config)) df = csvfile_to_dataframe(cfg['dataset']['input_file'], cfg['dataset']['date_format']) df['image_ID'] = get_image_IDs(df['filename']) # Get attributes of one of the images nrow, ncol, nband, dtype = get_image_attribute(df['filename'][0]) # Calculate the lines this job ID works on job_lines = distribute_jobs(job_number, total_jobs, nrow) logger.debug('Responsible for lines: {l}'.format(l=job_lines)) # Calculate X feature input dates = np.asarray(df['date']) kws = {'x': dates} kws.update(df.to_dict()) X = patsy.dmatrix(cfg['YATSM']['design_matrix'], kws) cfg['YATSM']['design'] = X.design_info.column_name_indexes # Form YATSM class arguments fit_indices = np.arange(cfg['dataset']['n_bands']) if cfg['dataset']['mask_band'] is not None: fit_indices = fit_indices[:-1] if cfg['YATSM']['reverse']: X = np.flipud(X) # Create output metadata to save md = { 'YATSM': cfg['YATSM'], cfg['YATSM']['algorithm']: cfg[cfg['YATSM']['algorithm']] } if cfg['phenology']['enable']: md.update({'phenology': cfg['phenology']}) # Begin process start_time_all = time.time() for line in job_lines: out = get_output_name(cfg['dataset'], line) if resume: try: np.load(out) except: pass else: logger.debug('Already processed line %s' % line) continue logger.debug('Running line %s' % line) start_time = time.time() Y = read_line(line, df['filename'], df['image_ID'], cfg['dataset'], ncol, nband, dtype, read_cache=read_cache, write_cache=write_cache, validate_cache=False) if do_not_run: continue if cfg['YATSM']['reverse']: Y = np.fliplr(Y) output = [] for col in np.arange(Y.shape[-1]): _Y = Y.take(col, axis=2) # Mask idx_mask = cfg['dataset']['mask_band'] - 1 valid = cyprep.get_valid_mask( _Y, cfg['dataset']['min_values'], cfg['dataset']['max_values']).astype(bool) valid *= np.in1d(_Y.take(idx_mask, axis=0), cfg['dataset']['mask_values'], invert=True).astype(np.bool) _Y = np.delete(_Y, idx_mask, axis=0)[:, valid] _X = X[valid, :] _dates = dates[valid] # Run model cls = cfg['YATSM']['algorithm_cls'] algo_cfg = cfg[cfg['YATSM']['algorithm']] yatsm = cls(lm=cfg['YATSM']['prediction_object'], **algo_cfg.get('init', {})) yatsm.px = col yatsm.py = line try: yatsm.fit(_X, _Y, _dates, **algo_cfg.get('fit', {})) except TSLengthException: continue if yatsm.record is None or len(yatsm.record) == 0: continue # Postprocess if cfg['YATSM'].get('commission_alpha'): yatsm.record = postprocess.commission_test( yatsm, cfg['YATSM']['commission_alpha']) for prefix, lm in zip(cfg['YATSM']['refit']['prefix'], cfg['YATSM']['refit']['prediction_object']): yatsm.record = postprocess.refit_record(yatsm, prefix, lm, keep_regularized=True) if cfg['phenology']['enable']: pcfg = cfg['phenology'] ltm = pheno.LongTermMeanPhenology(**pcfg.get('init', {})) yatsm.record = ltm.fit(yatsm, **pcfg.get('fit', {})) output.extend(yatsm.record) logger.debug(' Saving YATSM output to %s' % out) np.savez(out, record=np.array(output), version=__version__, metadata=md) run_time = time.time() - start_time logger.debug('Line %s took %ss to run' % (line, run_time)) logger.info('Completed {n} lines in {m} minutes'.format( n=len(job_lines), m=round((time.time() - start_time_all) / 60.0, 2)))
def segment(ctx, config, job_number, total_jobs, seg_id): # Parse config dataset_config, yatsm_config = \ yatsm.config_parser.parse_config_file(config) # Read in segmentation image if not yatsm_config['segmentation']: logger.error('No segmentation image specified in configuration file.') sys.exit(1) segment = yatsm.reader.read_image(yatsm_config['segmentation'])[0] # Calculate segments for this job n_segment = segment.max() job_segments = yatsm.utils.distribute_jobs(job_number, total_jobs, n_segment, interlaced=False) job_segments += segment.min() # What lines are required? job_lines = yatsm.segment.segments_to_lines(segment, job_segments) # Read and store all required lines Y, ord_dates = read_data(dataset_config, job_lines, ravel=True) dates = np.array([dt.fromordinal(d) for d in ord_dates]) # Create design matrix X = patsy.dmatrix(yatsm_config['design_matrix'], {'x': ord_dates}) # Preprocess timeseries for each segment Y_mask = np.empty((Y.shape[0], Y.shape[2]), dtype=np.bool) # TODO: I'm sure there's a more efficient way... for pix in range(Y.shape[0]): Y_mask[pix, :] = ~cyprep.get_valid_mask( Y[pix, :dataset_config['mask_band'] - 1, :,], dataset_config['min_values'], dataset_config['max_values']) # Apply Fmask Y_mask *= np.in1d(Y[:, dataset_config['mask_band'] - 1, :], dataset_config['mask_values']).reshape(Y_mask.shape) # Mask Y Y_mask = np.ones((Y.shape[0], Y.shape[1] - 1, Y.shape[2]), np.bool) \ * Y_mask[:, np.newaxis, :] Y = np.ma.masked_array(Y[:, :dataset_config['mask_band'] - 1, :], Y_mask) # Preprocess segments Y_seg_n = np.ones((len(job_segments), Y.shape[2]), np.int16) Y_seg_mask = np.ones((len(job_segments), Y.shape[2]), np.bool) Y_seg_mean = np.empty((len(job_segments), Y.shape[1], Y.shape[2])) Y_seg_var = np.empty((len(job_segments), Y.shape[1], Y.shape[2])) Y_seg_std = np.empty((len(job_segments), Y.shape[1], Y.shape[2])) Y_seg_stderr = np.empty((len(job_segments), Y.shape[1], Y.shape[2])) for i, region_ID in enumerate(job_segments): reg_row, reg_col = np.where(segment == region_ID) reg_idx = np.ravel_multi_index((reg_row, reg_col), segment.shape) reg_Y = Y[reg_idx, :, :] Y_seg_n[i, :] = reg_Y[:, 0, :].mask.sum(axis=0) Y_seg_mask[i, :] = Y_seg_n[i, :] != reg_Y.shape[0] # Summary stats Y_seg_mean[i, :, :] = np.ma.mean(reg_Y, axis=0).data Y_seg_var[i, :, :] = np.ma.var(reg_Y, axis=0).data Y_seg_std[i, :, :] = np.ma.std(reg_Y, axis=0).data Y_seg_stderr[i, :, :] = np.ma.std(reg_Y, axis=0).data / np.sqrt(Y_seg_n[i, :]) plot_idx = 4 _temp_plot(dates, Y_seg_mean, Y_seg_std, Y_seg_stderr, Y_seg_mask, seg_id, plot_idx) from IPython.core.debugger import Pdb Pdb().set_trace() _yatsm = yatsm.yatsm.YATSM( X[Y_seg_mask[seg_id, :]], Y_seg_mean[seg_id, :, :], consecutive=yatsm_config['consecutive'], threshold=yatsm_config['threshold'], min_obs=yatsm_config['min_obs'], min_rmse=yatsm_config['min_rmse'], test_indices=yatsm_config['test_indices'], retrain_time=yatsm_config['retrain_time'], screening=yatsm_config['screening'], screening_crit=yatsm_config['screening_crit'], green_band=dataset_config['green_band'] - 1, swir1_band=dataset_config['swir1_band'] - 1, remove_noise=yatsm_config['remove_noise'], dynamic_rmse=yatsm_config['dynamic_rmse'], design_info=X.design_info, lassocv=yatsm_config['lassocv'], px=seg_id, py=0, logger=logger) _yatsm.run() breakpoints = _yatsm.record['break'][_yatsm.record['break'] != 0] print('Found {n} breakpoints'.format(n=breakpoints.size)) if breakpoints.size > 0: for i, bp in enumerate(breakpoints): print('Break {0}: {1}'.format( i, dt.fromordinal(bp).strftime('%Y-%m-%d'))) _temp_plot(dates, Y_seg_mean, Y_seg_std, Y_seg_stderr, Y_seg_mask, seg_id, plot_idx, results=_yatsm)
def annual(row1, row2, pct): NDV = -9999 # EXAMPLE IMAGE for dimensions, map creation #example_img_fn = '/projectnb/landsat/users/valpasq/LCMS/stacks/p035r032/images/example_img' example_img_fn = '/projectnb/landsat/projects/Massachusetts/p012r031/images/example_img' # YATSM CONFIG FILE #config_file = '/projectnb/landsat/users/valpasq/LCMS/stacks/p035r032/p035r032_config_LCMS.yaml' config_file = '/projectnb/landsat/projects/Massachusetts/p012r031/p012r031_config_pixel.yaml' #WRS2 = 'p027r027' WRS2 = 'p012r031' # Up front -- declare hard coded dataset attributes (for now) BAND_NAMES = [ 'blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'therm', 'tcb', 'tcg', 'tcw', 'fmask' ] n_band = len(BAND_NAMES) - 1 col_names = [ 'date', 'blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'therm', 'tcb', 'tcg', 'tcw' ] dtype = np.int16 years = range(1985, 2016, 1) length = 33 # number of years # Read in example image example_img = read_image(example_img_fn) py_dim = example_img.shape[0] px_dim = example_img.shape[1] print('Shape of example image:') print(example_img.shape) # Read in and parse config file cfg = yaml.load(open(config_file)) # List to np.ndarray so it works with cyprep.get_valid_mask cfg['dataset']['min_values'] = np.asarray(cfg['dataset']['min_values']) cfg['dataset']['max_values'] = np.asarray(cfg['dataset']['max_values']) # Get files list df = csvfile_to_dataframe(cfg['dataset']['input_file'], \ date_format=cfg['dataset']['date_format']) # Get dates for image stack df['image_ID'] = get_image_IDs(df['filename']) df['x'] = df['date'] dates = df['date'].values # Initialize arrays for storing stats mean_TCB = np.zeros((py_dim, px_dim, length)) mean_TCG = np.zeros((py_dim, px_dim, length)) mean_TCW = np.zeros((py_dim, px_dim, length)) min_val_TCB = np.zeros((py_dim, px_dim, length)) min_val_TCG = np.zeros((py_dim, px_dim, length)) min_val_TCW = np.zeros((py_dim, px_dim, length)) min_idx_TCB = np.zeros((py_dim, px_dim, length)) min_idx_TCG = np.zeros((py_dim, px_dim, length)) min_idx_TCW = np.zeros((py_dim, px_dim, length)) max_val_TCB = np.zeros((py_dim, px_dim, length)) max_val_TCG = np.zeros((py_dim, px_dim, length)) max_val_TCW = np.zeros((py_dim, px_dim, length)) max_idx_TCB = np.zeros((py_dim, px_dim, length)) max_idx_TCG = np.zeros((py_dim, px_dim, length)) max_idx_TCW = np.zeros((py_dim, px_dim, length)) for py in range(row1, row2): # row iterator print('Working on row {py}'.format(py=py)) sys.stdout.flush() start_time = time.time() Y_row = read_line( py, df['filename'], df['image_ID'], cfg['dataset'], px_dim, n_band + 1, dtype, # +1 for now for Fmask read_cache=False, write_cache=False, validate_cache=False) for px in range(0, px_dim): # column iterator Y = Y_row.take(px, axis=2) if (Y[0:6] == NDV).mean() > 0.3: continue else: # process time series for disturbance events # Mask based on physical constraints and Fmask valid = cyprep.get_valid_mask( \ Y, \ cfg['dataset']['min_values'], \ cfg['dataset']['max_values']).astype(bool) # Apply mask band idx_mask = cfg['dataset']['mask_band'] - 1 valid *= np.in1d(Y.take(idx_mask, axis=0), \ cfg['dataset']['mask_values'], \ invert=True).astype(np.bool) # Mask time series using fmask result Y_fmask = np.delete(Y, idx_mask, axis=0)[:, valid] dates_fmask = dates[valid] # Apply multi-temporal mask (modified tmask) # Step 1. mask where green > 3 stddev from mean (fmasked) green multitemp1_fmask = np.where( Y_fmask[1] < (np.mean(Y_fmask[1]) + np.std(Y_fmask[1]) * 3)) dates_fmask = dates_fmask[multitemp1_fmask[0]] Y_fmask = Y_fmask[:, multitemp1_fmask[0]] # Step 2. mask where swir < 3 std devfrom mean (fmasked) SWIR multitemp2_fmask = np.where( Y_fmask[4] > (np.mean(Y_fmask[4]) - np.std(Y_fmask[4]) * 3)) dates_fmask = dates_fmask[multitemp2_fmask[0]] Y_fmask = Y_fmask[:, multitemp2_fmask[0]] # convert time from ordinal to dates dt_dates_fmask = np.array( [dt.datetime.fromordinal(d) for d in dates_fmask]) # Create dataframes for analysis # Step 1. reshape data shp_ = dt_dates_fmask.shape[0] dt_dates_fmask_csv = dt_dates_fmask.reshape(shp_, 1) Y_fmask_csv = np.transpose(Y_fmask) data_fmask = np.concatenate([dt_dates_fmask_csv, Y_fmask_csv], axis=1) # Step 2. create dataframe data_fmask_df = pd.DataFrame(data_fmask, columns=col_names) # convert reflectance to numeric type data_fmask_df[BAND_NAMES[0:10]] = data_fmask_df[ BAND_NAMES[0:10]].astype(int) # Group observations by year to generate annual TS year_group_fmask = data_fmask_df.groupby( data_fmask_df.date.dt.year) # get years in time series years_fmask = np.asarray(year_group_fmask.groups.keys()) years_fmask = years_fmask.astype(int) # TODO: FIX THIS!!!!!!! #import pdb; pdb.set_trace() month_group_fmask = data_fmask_df.groupby( [data_fmask_df.date.dt.year, data_fmask_df.date.dt.month]).max() month_groups = month_group_fmask.groupby( month_group_fmask.date.dt.year) # Calculate number of observations nobs = year_group_fmask['tcb'].count() ### TC Brightness # Calculate mean annual TCB TCB_mean = year_group_fmask['tcb'].mean() if pct == False: TCB_max_val = month_groups['tcb'].max() TCB_max_idx = month_groups['tcb'].idxmax() TCB_min_val = month_groups['tcb'].min() TCB_min_idx = month_groups['tcb'].idxmin() else: # percentile clip TCB_max = year_group_fmask['tcb'].quantile([pct2]) TCB_min = year_group_fmask['tcb'].quantile([pct1]) ### TC Greenness # Calculate mean annual TCG TCG_mean = year_group_fmask['tcg'].mean() if pct == False: TCG_max_val = month_groups['tcg'].max() TCG_max_idx = month_groups['tcg'].idxmax() TCG_min_val = month_groups['tcg'].min() TCG_min_idx = month_groups['tcg'].idxmin() else: # percentile clip TCG_max = year_group_fmask['tcg'].quantile([pct2]) TCG_min = year_group_fmask['tcg'].quantile([pct1]) ### TC Wetness # Calculate mean annual TCW TCW_mean = year_group_fmask['tcw'].mean() if pct == False: TCW_max_val = month_groups['tcw'].max() TCW_max_idx = month_groups['tcw'].idxmax() TCW_min_val = month_groups['tcw'].min() TCW_min_idx = month_groups['tcw'].idxmin() else: # percentile clip TCW_max = year_group_fmask['tcw'].quantile([pct2]) TCW_min = year_group_fmask['tcw'].quantile([pct1]) for index, year in enumerate(years): if year in TCB_mean.index: mean_TCB[py, px, index] = TCB_mean[year] mean_TCG[py, px, index] = TCG_mean[year] mean_TCW[py, px, index] = TCW_mean[year] min_val_TCB[py, px, index] = TCB_min_val[year] min_val_TCG[py, px, index] = TCG_min_val[year] min_val_TCW[py, px, index] = TCW_min_val[year] max_val_TCB[py, px, index] = TCB_max_val[year] max_val_TCG[py, px, index] = TCG_max_val[year] max_val_TCW[py, px, index] = TCW_max_val[year] min_idx_TCB[py, px, index] = TCB_min_idx[year][1] min_idx_TCG[py, px, index] = TCG_min_idx[year][1] min_idx_TCW[py, px, index] = TCW_min_idx[year][1] max_idx_TCB[py, px, index] = TCB_max_idx[year][1] max_idx_TCG[py, px, index] = TCG_max_idx[year][1] max_idx_TCW[py, px, index] = TCW_max_idx[year][1] run_time = time.time() - start_time print('Line {line} took {run_time}s to run'.format(line=py, run_time=run_time)) sys.stdout.flush() print('Statistics complete') print('Writing results to raster...') start_time = time.time() # Output map for each year in_ds = gdal.Open(example_img_fn, gdal.GA_ReadOnly) for index, year in enumerate(years): condition_fn = '/projectnb/landsat/users/valpasq/LCMS/dataviz/results/{WRS2}/mean/{WRS2}_ST-BGW_mean_{year}_{row1}-{row2}.tif'.format( WRS2=WRS2, year=year, row1=row1, row2=row2) out_driver = gdal.GetDriverByName("GTiff") out_ds = out_driver.Create( condition_fn, example_img.shape[1], # x size example_img.shape[0], # y size 3, # number of bands gdal.GDT_Int32) out_ds.SetProjection(in_ds.GetProjection()) out_ds.SetGeoTransform(in_ds.GetGeoTransform()) out_ds.GetRasterBand(1).WriteArray(mean_TCB[:, :, index]) out_ds.GetRasterBand(1).SetNoDataValue(0) out_ds.GetRasterBand(1).SetDescription('Mean Annual TC Brightness') out_ds.GetRasterBand(2).WriteArray(mean_TCG[:, :, index]) out_ds.GetRasterBand(2).SetNoDataValue(0) out_ds.GetRasterBand(2).SetDescription('Mean Annual TC Greenness') out_ds.GetRasterBand(3).WriteArray(mean_TCW[:, :, index]) out_ds.GetRasterBand(3).SetNoDataValue(0) out_ds.GetRasterBand(3).SetDescription('Mean Annual TC Wetness') out_ds = None condition_fn = '/projectnb/landsat/users/valpasq/LCMS/dataviz/results/{WRS2}/min/{WRS2}_ST-BGW_min_val_{year}_{row1}-{row2}.tif'.format( WRS2=WRS2, year=year, row1=row1, row2=row2) out_driver = gdal.GetDriverByName("GTiff") out_ds = out_driver.Create( condition_fn, example_img.shape[1], # x size example_img.shape[0], # y size 3, # number of bands gdal.GDT_Int32) out_ds.SetProjection(in_ds.GetProjection()) out_ds.SetGeoTransform(in_ds.GetGeoTransform()) out_ds.GetRasterBand(1).WriteArray(min_val_TCB[:, :, index]) out_ds.GetRasterBand(1).SetNoDataValue(0) out_ds.GetRasterBand(1).SetDescription('Minimum Annual TC Brightness') out_ds.GetRasterBand(2).WriteArray(min_val_TCG[:, :, index]) out_ds.GetRasterBand(2).SetNoDataValue(0) out_ds.GetRasterBand(2).SetDescription('Minimum Annual TC Greenness') out_ds.GetRasterBand(3).WriteArray(min_val_TCW[:, :, index]) out_ds.GetRasterBand(3).SetNoDataValue(0) out_ds.GetRasterBand(3).SetDescription('Minimum Annual TC Wetness') out_ds = None condition_fn = '/projectnb/landsat/users/valpasq/LCMS/dataviz/results/{WRS2}/max/{WRS2}_ST-BGW_max_val_{year}_{row1}-{row2}.tif'.format( WRS2=WRS2, year=year, row1=row1, row2=row2) out_driver = gdal.GetDriverByName("GTiff") out_ds = out_driver.Create( condition_fn, example_img.shape[1], # x size example_img.shape[0], # y size 3, # number of bands gdal.GDT_Int32) out_ds.SetProjection(in_ds.GetProjection()) out_ds.SetGeoTransform(in_ds.GetGeoTransform()) out_ds.GetRasterBand(1).WriteArray(max_val_TCB[:, :, index]) out_ds.GetRasterBand(1).SetNoDataValue(0) out_ds.GetRasterBand(1).SetDescription('Maximum Annual TC Brightness') out_ds.GetRasterBand(2).WriteArray(max_val_TCG[:, :, index]) out_ds.GetRasterBand(2).SetNoDataValue(0) out_ds.GetRasterBand(2).SetDescription('Maximum Annual TC Greenness') out_ds.GetRasterBand(3).WriteArray(max_val_TCW[:, :, index]) out_ds.GetRasterBand(3).SetNoDataValue(0) out_ds.GetRasterBand(3).SetDescription('Maximum Annual TC Wetness') out_ds = None condition_fn = '/projectnb/landsat/users/valpasq/LCMS/dataviz/results/{WRS2}/min/{WRS2}_ST-BGW_min_mon_{year}_{row1}-{row2}.tif'.format( WRS2=WRS2, year=year, row1=row1, row2=row2) out_driver = gdal.GetDriverByName("GTiff") out_ds = out_driver.Create( condition_fn, example_img.shape[1], # x size example_img.shape[0], # y size 3, # number of bands gdal.GDT_Int32) out_ds.SetProjection(in_ds.GetProjection()) out_ds.SetGeoTransform(in_ds.GetGeoTransform()) out_ds.GetRasterBand(1).WriteArray(min_idx_TCB[:, :, index]) out_ds.GetRasterBand(1).SetNoDataValue(0) out_ds.GetRasterBand(1).SetDescription('Minimum Annual TC Brightness') out_ds.GetRasterBand(2).WriteArray(min_idx_TCG[:, :, index]) out_ds.GetRasterBand(2).SetNoDataValue(0) out_ds.GetRasterBand(2).SetDescription('Minimum Annual TC Greenness') out_ds.GetRasterBand(3).WriteArray(min_idx_TCW[:, :, index]) out_ds.GetRasterBand(3).SetNoDataValue(0) out_ds.GetRasterBand(3).SetDescription('Minimum Annual TC Wetness') out_ds = None condition_fn = '/projectnb/landsat/users/valpasq/LCMS/dataviz/results/{WRS2}/max/{WRS2}_ST-BGW_max_mon_{year}_{row1}-{row2}.tif'.format( WRS2=WRS2, year=year, row1=row1, row2=row2) out_driver = gdal.GetDriverByName("GTiff") out_ds = out_driver.Create( condition_fn, example_img.shape[1], # x size example_img.shape[0], # y size 3, # number of bands gdal.GDT_Int32) out_ds.SetProjection(in_ds.GetProjection()) out_ds.SetGeoTransform(in_ds.GetGeoTransform()) out_ds.GetRasterBand(1).WriteArray(max_idx_TCB[:, :, index]) out_ds.GetRasterBand(1).SetNoDataValue(0) out_ds.GetRasterBand(1).SetDescription('Maximum Annual TC Brightness') out_ds.GetRasterBand(2).WriteArray(max_idx_TCG[:, :, index]) out_ds.GetRasterBand(2).SetNoDataValue(0) out_ds.GetRasterBand(2).SetDescription('Maximum Annual TC Greenness') out_ds.GetRasterBand(3).WriteArray(max_idx_TCW[:, :, index]) out_ds.GetRasterBand(3).SetNoDataValue(0) out_ds.GetRasterBand(3).SetDescription('Maximum Annual TC Wetness') out_ds = None run_time = time.time() - start_time print('Rasters took {run_time}s to export'.format(run_time=run_time)) sys.stdout.flush()
def _fetch_results_live(self): """ Run YATSM and get results """ logger.debug('Calculating YATSM results on the fly') # Setup design matrix, Y, and dates self.X = patsy.dmatrix(self._design, { 'x': self.series[0].images['ordinal'], 'sensor': self.series[0].sensor, 'pr': self.series[0].pathrow }) self._design_info = self.X.design_info self.Y = self.series[0].data.astype(np.int16) self.dates = np.asarray(self.series[0].images['ordinal']) mask = self.Y[self._mask_band[0] - 1, :] Y_data = np.delete(self.Y, self._mask_band[0] - 1, axis=0) # Mask out masked values clear = np.in1d(mask, self.mask_values, invert=True) valid = get_valid_mask(Y_data, self._min_values, self._max_values).astype(np.bool) clear *= valid # Setup Y # Setup parameters lm = sklearn.linear_model.Lasso(alpha=20) reg = self._regression_type print(self._regression_type) if hasattr(yatsm.regression, 'packaged'): if reg in yatsm.regression.packaged.packaged_regressions: reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg) try: lm = jl.load(reg_fn) except: logger.error('Cannot load regressor: %s' % reg) else: logger.debug('Loaded regressor %s from %s' % (reg, reg_fn)) else: logger.error('Cannot use unknown regression %s' % reg) else: logger.warning('Using failsafe Lasso(lambda=20) from scikit-learn. ' 'Upgrade to yatsm>=0.5.1 to access more regressors.') kwargs = dict( test_indices=self._test_indices, consecutive=self._consecutive, threshold=self._threshold, min_obs=self._min_obs, min_rmse=None if self._enable_min_rmse else self._min_rmse, screening_crit=self._screen_crit, remove_noise=self._remove_noise, dynamic_rmse=self._dynamic_rmse, ) self.yatsm_model = CCDCesque(lm=lm, **kwargs) # Don't want to have DEBUG logging when we run YATSM log_level = logger.level logger.setLevel(logging.INFO) if self._reverse: self.yatsm_model.fit( np.flipud(self.X[clear, :]), np.fliplr(Y_data[:, clear]), np.fliplr(self.dates[clear])) else: self.yatsm_model.fit( self.X[clear, :], Y_data[:, clear], self.dates[clear]) if self._commit_test: self.yatsm_model.record = postprocess.commission_test( self.yatsm_model, self._commit_alpha) # if self._robust_results: # self.coef_name = 'robust_coef' # self.yatsm_model.record = postprocess.refit_record( # self.yatsm_model, 'robust' # else: # self.coef_name = 'coef' if self._calc_pheno: # TODO: parameterize band indices & scale factor ltm = pheno.LongTermMeanPhenology(self.yatsm_model) self.yatsm_model.record = ltm.fit() # Restore log level logger.setLevel(log_level)
def run_pixel(X, Y, dataset_config, yatsm_config, px=0, py=0): """ Run a single pixel through YATSM Args: X (ndarray): 2D (nimage x nband) feature input from ordinal date Y (ndarray): 2D (nband x nimage) image input dataset_config (dict): dict of dataset configuration options yatsm_config (dict): dict of YATSM algorithm options px (int, optional): X (column) pixel reference py (int, optional): Y (row) pixel reference Returns: model_result (ndarray): NumPy array of model results from YATSM """ # Extract design info design_info = X.design_info # Continue if valid observations are less than 50% of dataset valid = cyprep.get_valid_mask( Y[:dataset_config['mask_band'] - 1, :], dataset_config['min_values'], dataset_config['max_values'] ) if valid.sum() < Y.shape[1] / 2.0: raise TSLengthException('Not enough valid observations') # Otherwise continue with masked values valid = (valid * np.in1d(Y[dataset_config['mask_band'] - 1, :], dataset_config['mask_values'], invert=True)).astype(np.bool) Y = Y[:dataset_config['mask_band'] - 1, valid] X = X[valid, :] if yatsm_config['reverse']: # TODO: do this earlier X = np.flipud(X) Y = np.fliplr(Y) yatsm = YATSM(X, Y, consecutive=yatsm_config['consecutive'], threshold=yatsm_config['threshold'], min_obs=yatsm_config['min_obs'], min_rmse=yatsm_config['min_rmse'], test_indices=yatsm_config['test_indices'], retrain_time=yatsm_config['retrain_time'], screening=yatsm_config['screening'], screening_crit=yatsm_config['screening_crit'], green_band=dataset_config['green_band'] - 1, swir1_band=dataset_config['swir1_band'] - 1, remove_noise=yatsm_config['remove_noise'], dynamic_rmse=yatsm_config['dynamic_rmse'], slope_test=yatsm_config['slope_test'], lassocv=yatsm_config['lassocv'], design_info=design_info, px=px, py=py, logger=logger) yatsm.run() if yatsm_config['commission_alpha']: yatsm.record = yatsm.commission_test(yatsm_config['commission_alpha']) if yatsm_config['robust']: yatsm.record = yatsm.robust_record if yatsm_config['calc_pheno']: ltm = pheno.LongTermMeanPhenology( yatsm, yatsm_config['red_index'], yatsm_config['nir_index'], yatsm_config['blue_index'], yatsm_config['scale'], yatsm_config['evi_index'], yatsm_config['evi_scale']) yatsm.record = ltm.fit(year_interval=yatsm_config['year_interval'], q_min=yatsm_config['q_min'], q_max=yatsm_config['q_max']) return yatsm.record
def pixel(ctx, config, px, py, band, plot, ylim, style, cmap, embed, seed, algo_kw): # Set seed np.random.seed(seed) # Convert band to index band -= 1 # Get colormap if hasattr(palettable.colorbrewer, cmap): mpl_cmap = getattr(palettable.colorbrewer, cmap).mpl_colormap elif hasattr(palettable.cubehelix, cmap): mpl_cmap = getattr(palettable.cubehelix, cmap).mpl_colormap elif hasattr(palettable.wesanderson, cmap): mpl_cmap = getattr(palettable.wesanderson, cmap).mpl_colormap else: raise click.Abort('Cannot find specified colormap in `palettable`') # Parse config cfg = parse_config_file(config) # Apply algorithm overrides revalidate = False for kw in algo_kw: for cfg_key in cfg: if kw in cfg[cfg_key]: # Parse as YAML for type conversions used in config parser value = yaml.load(algo_kw[kw]) print('Overriding cfg[%s][%s]=%s with %s' % (cfg_key, kw, cfg[cfg_key][kw], value)) cfg[cfg_key][kw] = value revalidate = True if revalidate: cfg = convert_config(cfg) # Locate and fetch attributes from data df = csvfile_to_dataframe(cfg['dataset']['input_file'], date_format=cfg['dataset']['date_format']) df['image_ID'] = get_image_IDs(df['filename']) # Setup X/Y kws = {'x': df['date']} kws.update(df.to_dict()) X = patsy.dmatrix(cfg['YATSM']['design_matrix'], kws) design_info = X.design_info Y = read_pixel_timeseries(df['filename'], px, py) fit_indices = np.arange(cfg['dataset']['n_bands']) if cfg['dataset']['mask_band'] is not None: fit_indices = fit_indices[:-1] # Mask out of range data idx_mask = cfg['dataset']['mask_band'] - 1 valid = cyprep.get_valid_mask(Y, cfg['dataset']['min_values'], cfg['dataset']['max_values']).astype(np.bool) valid *= np.in1d(Y[idx_mask, :], cfg['dataset']['mask_values'], invert=True).astype(np.bool) # Apply mask Y = np.delete(Y, idx_mask, axis=0)[:, valid] X = X[valid, :] dates = np.array([dt.datetime.fromordinal(d) for d in df['date'][valid]]) # Plot before fitting with plt.xkcd() if style == 'xkcd' else mpl.style.context(style): for _plot in plot: if _plot == 'TS': plot_TS(dates, Y[band, :]) elif _plot == 'DOY': plot_DOY(dates, Y[band, :], mpl_cmap) elif _plot == 'VAL': plot_VAL(dates, Y[band, :], mpl_cmap) if ylim: plt.ylim(ylim) plt.title('Timeseries: px={px} py={py}'.format(px=px, py=py)) plt.ylabel('Band {b}'.format(b=band + 1)) if embed and has_embed: IPython_embed() plt.tight_layout() plt.show() # Eliminate config parameters not algorithm and fit model yatsm = cfg['YATSM']['algorithm_cls'](lm=cfg['YATSM']['prediction_object'], **cfg[cfg['YATSM']['algorithm']]) yatsm.px = px yatsm.py = py yatsm.fit(X, Y, np.asarray(df['date'][valid])) # Plot after predictions with plt.xkcd() if style == 'xkcd' else mpl.style.context(style): for _plot in plot: if _plot == 'TS': plot_TS(dates, Y[band, :]) elif _plot == 'DOY': plot_DOY(dates, Y[band, :], mpl_cmap) elif _plot == 'VAL': plot_VAL(dates, Y[band, :], mpl_cmap) if ylim: plt.ylim(ylim) plt.title('Timeseries: px={px} py={py}'.format(px=px, py=py)) plt.ylabel('Band {b}'.format(b=band + 1)) plot_results(band, cfg['YATSM'], yatsm, plot_type=_plot) if embed and has_embed: IPython_embed() plt.tight_layout() plt.show()
def run_pixel(X, Y, dataset_config, yatsm_config, px=0, py=0): """ Run a single pixel through YATSM Args: X (ndarray): 2D (nimage x nband) feature input from ordinal date Y (ndarray): 2D (nband x nimage) image input dataset_config (dict): dict of dataset configuration options yatsm_config (dict): dict of YATSM algorithm options px (int, optional): X (column) pixel reference py (int, optional): Y (row) pixel reference Returns: model_result (ndarray): NumPy array of model results from YATSM """ # Extract design info design_info = X.design_info # Continue if valid observations are less than 50% of dataset valid = cyprep.get_valid_mask(Y[:dataset_config['mask_band'] - 1, :], dataset_config['min_values'], dataset_config['max_values']) if valid.sum() < Y.shape[1] / 2.0: raise TSLengthException('Not enough valid observations') # Otherwise continue with masked values valid = (valid * np.in1d(Y[dataset_config['mask_band'] - 1, :], dataset_config['mask_values'], invert=True)).astype(np.bool) Y = Y[:dataset_config['mask_band'] - 1, valid] X = X[valid, :] if yatsm_config['reverse']: # TODO: do this earlier X = np.flipud(X) Y = np.fliplr(Y) yatsm = YATSM(X, Y, consecutive=yatsm_config['consecutive'], threshold=yatsm_config['threshold'], min_obs=yatsm_config['min_obs'], min_rmse=yatsm_config['min_rmse'], test_indices=yatsm_config['test_indices'], retrain_time=yatsm_config['retrain_time'], screening=yatsm_config['screening'], screening_crit=yatsm_config['screening_crit'], green_band=dataset_config['green_band'] - 1, swir1_band=dataset_config['swir1_band'] - 1, remove_noise=yatsm_config['remove_noise'], dynamic_rmse=yatsm_config['dynamic_rmse'], slope_test=yatsm_config['slope_test'], lassocv=yatsm_config['lassocv'], design_info=design_info, px=px, py=py, logger=logger) yatsm.run() if yatsm_config['commission_alpha']: yatsm.record = yatsm.commission_test(yatsm_config['commission_alpha']) if yatsm_config['robust']: yatsm.record = yatsm.robust_record if yatsm_config['calc_pheno']: ltm = pheno.LongTermMeanPhenology(yatsm, yatsm_config['red_index'], yatsm_config['nir_index'], yatsm_config['blue_index'], yatsm_config['scale'], yatsm_config['evi_index'], yatsm_config['evi_scale']) yatsm.record = ltm.fit(year_interval=yatsm_config['year_interval'], q_min=yatsm_config['q_min'], q_max=yatsm_config['q_max']) return yatsm.record
def _fetch_results_live(self): """ Run YATSM and get results """ logger.debug('Calculating YATSM results on the fly') # Setup design matrix, Y, and dates self.X = patsy.dmatrix( self.controls['design'].value, { 'x': self.series[0].images['ordinal'], 'sensor': self.series[0].sensor, 'pr': self.series[0].pathrow }) self._design_info = self.X.design_info.column_name_indexes self.Y = self.series[0].data.astype(np.int16) self.dates = np.asarray(self.series[0].images['ordinal']) mask = self.Y[self.config['mask_band'].value[0] - 1, :] Y_data = np.delete(self.Y, self.config['mask_band'].value[0] - 1, axis=0) # Mask out masked values clear = np.in1d(mask, self.mask_values, invert=True) valid = get_valid_mask(Y_data, self.config['min_values'].value, self.config['max_values'].value).astype(np.bool) clear *= valid # Setup parameters estimator = sklearn.linear_model.Lasso(alpha=20) reg = self.controls['regression_type'].value if hasattr(yatsm.regression, 'packaged'): if reg in yatsm.regression.packaged.packaged_regressions: reg_fn = yatsm.regression.packaged.find_packaged_regressor(reg) try: estimator = jl.load(reg_fn) except: logger.error('Cannot load regressor: %s' % reg) else: logger.debug('Loaded regressor %s from %s' % (reg, reg_fn)) else: logger.error('Cannot use unknown regression %s' % reg) else: logger.warning( 'Using failsafe Lasso(lambda=20) from scikit-learn. ' 'Upgrade to yatsm>=0.5.1 to access more regressors.') kwargs = dict( estimator=estimator, test_indices=self.controls['test_indices'].value, consecutive=self.controls['consecutive'].value, threshold=self.controls['threshold'].value, min_obs=self.controls['min_obs'].value, min_rmse=(None if self.controls['enable_min_rmse'].value else self.controls['min_rmse'].value), screening_crit=self.controls['screen_crit'].value, remove_noise=self.controls['remove_noise'].value, dynamic_rmse=self.controls['dynamic_rmse'].value, ) self.yatsm_model = CCDCesque(**version_kwargs(kwargs)) # Don't want to have DEBUG logging when we run YATSM log_level = logger.level logger.setLevel(logging.INFO) if self.controls['reverse'].value: self.yatsm_model.fit(np.flipud(self.X[clear, :]), np.fliplr(Y_data[:, clear]), self.dates[clear][::-1]) else: self.yatsm_model.fit(self.X[clear, :], Y_data[:, clear], self.dates[clear]) if self.controls['commit_test'].value: self.yatsm_model.record = postprocess.commission_test( self.yatsm_model, self.controls['commit_alpha'].value) # if self.controls['robust_results'].value: # self.coef_name = 'robust_coef' # self.yatsm_model.record = postprocess.refit_record( # self.yatsm_model, 'robust' # else: # self.coef_name = 'coef' if self.config['calc_pheno'].value: # TODO: parameterize band indices & scale factor ltm = pheno.LongTermMeanPhenology() self.yatsm_model.record = ltm.fit(self.yatsm_model) # Restore log level logger.setLevel(log_level)