def inverse(self, value): if not self.scaled(): raise ValueError("Not invertible until scaled") vmin, vmax = self.vmin, self.vmax vin, cin = self.vin, self.cin if cbook.iterable(value): val = ma.asarray(value) ipos = (val > (0.5 + cin)) ineg = (val < (0.5 - cin)) izero = ~(ipos | ineg) result = ma.empty_like(val) result[izero] = (val[izero] - 0.5) * vin / cin result[ipos] = vin * pow((vmax / vin), (val[ipos] - (0.5 + cin)) / (0.5 - cin)) result[ineg] = -vin * pow((-vmin / vin), ((0.5 - cin) - val[min]) / (0.5 - cin)) r = vmin * ma.power((vmax / vmin), val) else: if value > 0.5 + cin: r = vin * pow((vmax / vin), (value - (0.5 + cin)) / (0.5 - cin)) elif value < 0.5 - cin: r = -vin * pow((-vmin / vin), ((0.5 - cin) - value) / (0.5 - cin)) else: r = (value - 0.5) * vin / cin return r
def x_axes(self) -> Union[AxisFormatter, AxisFormatterArray]: """ Return an AxisFormatter or AxisFormatterArray for the X-Axis or X-Axes of the wrapped Axes. """ if not self._has_array: return AxesFormatter(self._axes).x_axis else: axes = empty_like(self._axes, dtype=AxisFormatter) if axes.ndim == 1: for i in range(self._axes.shape[0]): axes[i] = AxisFormatter( axis=self._axes[i].xaxis, direction='x', axes=self._axes[i] ) elif axes.ndim == 2: for i in range(axes.shape[0]): for j in range(axes.shape[1]): axes[i, j] = AxisFormatter( axis=self._axes[i, j].xaxis, direction='x', axes=self._axes[i, j] ) return AxisFormatterArray(axes)
def loss_spatial(downscale, era5, name_array, path_figure): """Plot the loss (mean absolute error) (as a spatial map) Args: downscale: MultiSeries object era5: Era5 object name_array: array of String, length 2 path_figure: where to save the figures """ observed_data = downscale.forecaster.data downscale_array = [downscale, era5] loss_map_array = [] # store loss as a map, array of numpy matrix loss_min = math.inf # for ensuring the colour bar is the same loss_max = 0 # for ensuring the colour bar is the same for downscale in downscale_array: loss_map = ma.empty_like(observed_data.rain[0]) for forecaster_i, observed_rain_i in (zip( downscale.forecaster.generate_forecaster_no_memmap(), observed_data.generate_unmask_rain())): lat_i = forecaster_i.time_series.id[0] long_i = forecaster_i.time_series.id[1] loss_i = compound_poisson.forecast.loss.MeanAbsoluteError( downscale.forecaster.n_simulation) loss_i.add_data(forecaster_i, observed_rain_i) loss_bias_i = loss_i.get_bias_median_loss() loss_map[lat_i, long_i] = loss_bias_i if loss_bias_i < loss_min: loss_min = loss_bias_i if loss_bias_i > loss_max: loss_max = loss_bias_i loss_map_array.append(loss_map) angle_resolution = dataset.ANGLE_RESOLUTION longitude_grid = (observed_data.topography["longitude"] - angle_resolution / 2) latitude_grid = (observed_data.topography["latitude"] + angle_resolution / 2) # plot the losses for loss_map, downscale_name in zip(loss_map_array, name_array): plt.figure() ax = plt.axes(projection=crs.PlateCarree()) im = ax.pcolor(longitude_grid, latitude_grid, loss_map, vmin=loss_min, vmax=loss_max, cmap='Greys') ax.coastlines(resolution="50m") plt.colorbar(im) ax.set_aspect("auto", adjustable=None) plt.savefig(path.join(path_figure, downscale_name + "_mae_map.pdf"), bbox_inches="tight") plt.close()
def _eval(self, ctx, window=None): # Compute partial order in which to evaluate rasters self.compute_order() df = {} for idx, level in enumerate(self._levels): if ctx.msgs: click.echo("Level %d" % idx) for name in level: if ctx.need(name): if ctx.msgs: click.echo(" eval %s" % name) df[name] = self[name].eval(df, window) if idx == 0: namask = self.dropna(df) data = ma.empty_like(namask, dtype=np.float32) data.mask = namask data[~namask] = df[ctx.what] if False: import pandas as pd dframe = pd.DataFrame(df) #import projections.pd_utils as pd_utils dframe.to_pickle('evaled.pyd') if False: import pandas as pd dframe = pd.DataFrame(df) import projections.pd_utils as pd_utils df2 = {} df3 = {} for k in df.keys(): tmp = ma.empty_like(namask, dtype=np.float32) tmp.mask = namask tmp[~namask] = df[k] df2[k] = tmp #.reshape(-1) df3[k] = df2[k][75:135, 880] dframe = pd.DataFrame(df3) #pd_utils.save_pandas('evaled.pyd', dframe) dframe.to_csv('evaled.csv') import pdb pdb.set_trace() #import pandas as pd #dframe = pd.DataFrame(df2) #import pd_utils #pd_utils.save_pandas('1950.pyd', dframe) return data
def ljung_box_pierce(cross_correlation_array, length, n_lag): """Calculate Ljung-Box-Pierce statistics Args: cross_correlation_array: array of spatial maps of correlations, for each lag length: length of the time series n_lag: integer, maximum temporal lag to sum Return: 2d array, same shape as value_map[0, :, :], contains lbp statistics """ statistic = ma.empty_like(cross_correlation_array[0]) statistic[np.logical_not(statistic.mask)] = 0 for i in range(n_lag + 1): statistic += (ma.exp(2 * ma.log(cross_correlation_array[i]) - math.log(length - i))) return statistic
def aggrade_front(self, grid, tstep, source_cells_Qs, elev, SL): #ensure Qs and tstep units match! self.total_sed_supplied_in_tstep = source_cells_Qs*tstep self.Qs_sort_order = np.argsort(source_cells_Qs)[::-1] #descending order self.Qs_sort_order = self.Qs_sort_order[:np.count_nonzero(self.Qs_sort_order>0)] for i in self.Qs_sort_order: subaerial_nodes = elev>=SL subsurface_elev_array = ma.array(elev, subaerial_nodes) xy_tuple = (grid.node_x[i], grid.node_y[i]) distance_map = grid.get_distances_of_nodes_to_point(xy_tuple) loop_number = 0 closest_node_list = ma.argsort(ma.masked_array(distance_map, mask=subsurface_elev_array.mask)) smooth_cone_elev_from_apex = subsurface_elev_array[i]-distance_map*self.tan_repose_angle while 1: filled_all_cells_flag = 0 accom_space_at_controlling_node = SL - subsurface_elev_array[closest_node_list[loop_number]] new_max_cone_surface_elev = smooth_cone_elev_from_apex + accom_space_at_controlling_node subsurface_elev_array.mask = (elev>=SL or new_max_cone_surface_elev<elev) depth_of_accom_space = new_max_cone_surface_elev - subsurface_elev_array accom_depth_order = ma.argsort(depth_of_accom_space)[::-1] #Vectorised method to calc fill volumes: area_to_fill = ma.cumsum(grid.cellarea[accom_depth_order]) differential_depths = ma.empty_like(depth_of_accom_space) differential_depths[:-1] = depth_of_accom_space[accom_depth_order[:-1]] - depth_of_accom_space[accom_depth_order[1:]] differential_depths[-1] = depth_of_accom_space[accom_depth_order[-1]] incremental_volumes = ma.cumsum(differential_depths*area_to_fill) match_position_of_Qs_in = ma.searchsorted(incremental_volumes, self.total_sed_supplied_in_tstep[i]) try: depths_to_add = depth_of_accom_space-depth_of_accom_space[match_position_of_Qs_in] except: depths_to_add = depth_of_accom_space-depth_of_accom_space[match_position_of_Qs_in-1] filled_all_cells_flag = 1 depths_to_add = depths_to_add[ma.where(depths_to_add>=0)] if not filled_all_cells_flag: depths_to_add += (self.total_sed_supplied_in_tstep[i] - incremental_volumes[match_position_of_Qs_in-1])/area_to_fill[match_position_of_Qs_in-1] subsurface_elev_array[accom_depth_order[len(depths_to_add)]] = depths_to_add self.total_sed_supplied_in_tstep[i] = 0 break else: subsurface_elev_array[accom_depth_order] = depths_to_add self.total_sed_supplied_in_tstep[i] -= incremental_volumes[-1] loop_number += 1 return elev
def __call__(self, value, clip=None): if clip is None: clip = self.clip if cbook.iterable(value): vtype = 'array' val = ma.asarray(value).astype(np.float) else: vtype = 'scalar' val = ma.array([value]).astype(np.float) self.autoscale_None(val) vmin, vmax = self.vmin, self.vmax vin, cin = self.vin, self.cin if vmin > vmax: raise ValueError("minvalue must be less than or equal to maxvalue") elif vmin > 0: raise ValueError("minvalue must be less than 0") elif vmax < 0: raise ValueError("maxvalue must be greater than 0") elif vmin == vmax: result = 0.0 * val else: if clip: mask = ma.getmask(val) val = ma.array(np.clip(val.filled(vmax), vmin, vmax), mask=mask) ipos = (val > vin) ineg = (val < -vin) izero = ~(ipos | ineg) result = ma.empty_like(val) result[izero] = 0.5 + cin * val[izero] / vin result[ipos] = 0.5 + cin + (0.5 - cin) * \ (ma.log(val[ipos]) - np.log(vin)) / (np.log(vmax) - np.log(vin)) result[ineg] = 0.5 - cin - (0.5 - cin) * \ (ma.log(-val[ineg]) - np.log(vin)) / (np.log(-vmin) - np.log(vin)) result.mask = ma.getmask(val) if vtype == 'scalar': result = result[0] return result
def __call__(self, value, clip=None): if clip is None: clip = self.clip if cbook.iterable(value): vtype = 'array' val = ma.asarray(value).astype(np.float) else: vtype = 'scalar' val = ma.array([value]).astype(np.float) self.autoscale_None(val) vmin, vmax = self.vmin, self.vmax vin, cin = self.vin, self.cin if vmin > vmax: raise ValueError("minvalue must be less than or equal to maxvalue") elif vmin > 0: raise ValueError("minvalue must be less than 0") elif vmax < 0: raise ValueError("maxvalue must be greater than 0") elif vmin==vmax: result = 0.0 * val else: if clip: mask = ma.getmask(val) val = ma.array(np.clip(val.filled(vmax), vmin, vmax), mask=mask) ipos = (val > vin) ineg = (val < -vin) izero = ~(ipos | ineg) result = ma.empty_like(val) result[izero] = 0.5 + cin * val[izero] / vin result[ipos] = 0.5 + cin + (0.5 - cin) * \ (ma.log(val[ipos]) - np.log(vin)) / (np.log(vmax) - np.log(vin)) result[ineg] = 0.5 - cin - (0.5 - cin) * \ (ma.log(-val[ineg]) - np.log(vin)) / (np.log(-vmin) - np.log(vin)) result.mask = ma.getmask(val) if vtype == 'scalar': result = result[0] return result
def inverse(self, value): if not self.scaled(): raise ValueError("Not invertible until scaled") vmin, vmax = self.vmin, self.vmax vin, cin = self.vin, self.cin if cbook.iterable(value): val = ma.asarray(value) ipos = (val > (0.5 + cin)) ineg = (val < (0.5 - cin)) izero = ~(ipos | ineg) result = ma.empty_like(val) result[izero] = (val[izero] - 0.5) * vin/cin result[ipos] = vin * pow((vmax/vin), (val[ipos] - (0.5 + cin))/(0.5 - cin)) result[ineg] = -vin * pow((-vmin/vin), ((0.5 - cin) - val[min])/(0.5 - cin)) r = vmin * ma.power((vmax/vmin), val) else: if value > 0.5 + cin: r = vin * pow((vmax/vin), (value - (0.5 + cin))/(0.5 - cin)) elif value < 0.5 - cin: r = -vin * pow((-vmin/vin), ((0.5 - cin) - value)/(0.5 - cin)) else: r = (value - 0.5) * vin / cin return r
def spatial_correlation(downscale, era5, name_array, path_figure): """Plot spatial correlations Plot spatial correlation as a spatial map with different temporal lag, a figure for CP-MCMC, ERA5 and observed and they all share the same colour bar. Spatial correlation is compared to the centre of mass for Wales (can be referred to as the reference) The median over all forecasts is taken first, followed by spatial correlation. ie, shown are the spatial correlation of the median forecast, not the median spatial correlation of the forecasts Plot ljung_box_pierce statistics (as a spatial map for different lags), comparing spatial correlation of CP-MCMC with ERA5 Plot ljung_box_pierce statistics (as a histogram, averaging over space, for different lags), comparing spatial correlation of CP-MCMC with ERA5 Args: downscale: MultiSeries object era5: Era5 object name_array: array of String, length 2 path_figure: where to save the figures """ test_set = downscale.forecaster.data time_length = len(test_set) angle_resolution = dataset.ANGLE_RESOLUTION longitude_grid = test_set.topography["longitude"] - angle_resolution / 2 latitude_grid = test_set.topography["latitude"] + angle_resolution / 2 reference = [10, 17] # index for the centre of mass for Wales # time series at the centre of mass for cp-mcmc, era 5 and observed forecast_reference = None era5_reference = None test_set_reference = test_set.rain[:, reference[0], reference[1]] # retrive the cp-mcmc forecast and era 5 forecast_array = ma.empty_like(test_set.rain) era5_array = ma.empty_like(test_set.rain) for time_series_i in downscale.generate_unmask_time_series(): forecast_array[:, time_series_i.id[0], time_series_i.id[1]] = ( time_series_i.forecaster.forecast_median) if time_series_i.id == reference: forecast_reference = time_series_i.forecaster.forecast_median for time_series_i in era5.generate_unmask_time_series(): era5_array[:, time_series_i.id[0], time_series_i.id[1]] = ( time_series_i.forecaster.forecast_median) if time_series_i.id == reference: era5_reference = time_series_i.forecaster.forecast_median # array of spatial map of spatial correlation, one for each time lag forecast_cross_correlation_array = [] era5_cross_correlation_array = [] test_set_cross_correlation_array = [] n_lag = 10 # for each lag, calculate and plot spatial correlation for i_lag in range(n_lag): forecast_cross_correlation = spatial_cross_correlation( forecast_reference, forecast_array, i_lag) forecast_cross_correlation_array.append(forecast_cross_correlation) era5_cross_correlation = spatial_cross_correlation( era5_reference, era5_array, i_lag) era5_cross_correlation_array.append(era5_cross_correlation) test_set_cross_correlation = spatial_cross_correlation( test_set_reference, test_set.rain, i_lag) test_set_cross_correlation_array.append(test_set_cross_correlation) vmax = ma.max([ forecast_cross_correlation.max(), era5_cross_correlation.max(), test_set_cross_correlation.max(), ]) vmin = ma.min([ forecast_cross_correlation.min(), era5_cross_correlation.min(), test_set_cross_correlation.min(), ]) cross_correlation_array = [ forecast_cross_correlation, era5_cross_correlation, test_set_cross_correlation, ] label_array = ["forecast", "era5", "observed"] for cross_correlation, label in zip(cross_correlation_array, label_array): plt.rcParams.update({'font.size': 18}) plt.figure() ax = plt.axes(projection=crs.PlateCarree()) im = ax.pcolor(longitude_grid, latitude_grid, cross_correlation, cmap='Greys', vmin=vmin, vmax=vmax) plt.hlines(latitude_grid[reference[0], reference[1]] - angle_resolution / 2, longitude_grid.min(), longitude_grid.max(), colors='k', linestyles='dashed') plt.vlines(longitude_grid[reference[0], reference[1]] + angle_resolution / 2, latitude_grid.min(), latitude_grid.max(), colors='k', linestyles='dashed') ax.coastlines(resolution="50m") plt.colorbar(im) ax.set_aspect("auto", adjustable=None) plt.savefig(path.join( path_figure, "correlation_" + label + "_" + str(i_lag) + ".pdf"), bbox_inches="tight") plt.close() # ljung_box_pierce statistics, for comparing cp-mcmc with observed forecast_cross_correlation_array = ma.asarray( forecast_cross_correlation_array) test_set_cross_correlation_array = ma.asarray( test_set_cross_correlation_array) # array of numpy array, for each lag # each numpy array contains the lbp statistic for each location lbp_array = [] for i_lag in range(n_lag): forecast_statistic = ljung_box_pierce(forecast_cross_correlation_array, time_length, i_lag) test_set_statistic = ljung_box_pierce(test_set_cross_correlation_array, time_length, i_lag) f_statistic = ma.log(forecast_statistic) - ma.log(test_set_statistic) f_statistic = ma.exp(f_statistic) # spatial plot plt.figure() ax = plt.axes(projection=crs.PlateCarree()) im = ax.pcolor(longitude_grid, latitude_grid, f_statistic, cmap='Greys') plt.hlines(latitude_grid[reference[0], reference[1]] - angle_resolution / 2, longitude_grid.min(), longitude_grid.max(), colors='k', linestyles='dashed') plt.vlines(longitude_grid[reference[0], reference[1]] + angle_resolution / 2, latitude_grid.min(), latitude_grid.max(), colors='k', linestyles='dashed') ax.coastlines(resolution="50m") plt.colorbar(im) ax.set_aspect("auto", adjustable=None) plt.savefig(path.join(path_figure, "correlation_lbp_" + str(i_lag) + ".pdf"), bbox_inches="tight") plt.close() f_statistic = f_statistic[np.logical_not(f_statistic.mask)] f_statistic = f_statistic.data lbp_array.append(f_statistic.flatten()) # histogram of statistics plt.figure() plt.boxplot(lbp_array, positions=range(n_lag)) plt.xlabel("lag") plt.ylabel("ratio of Ljung-Box") plt.savefig(path.join(path_figure, "correlation_lbp_ratio.pdf"), bbox_inches="tight")
import numpy.ma as ma palette = copy(plt.cm.Greens) palette.set_over('r', 1.0) palette.set_under('y', 1.0) palette.set_bad('k', 1.0) static = Dataset('../../data/luh2_v2/staticData_quarterdeg.nc') icwtr = static.variables['icwtr'][:, :] fstnf = static.variables['fstnf'][:, :] vars = [ u'primf', u'primn', u'secdf', u'secdn', u'urban', u'c3ann', u'c4ann', u'c3per', u'c4per', u'c3nfx', u'pastr', u'range' ] a = ma.empty_like(icwtr) a.mask = np.where(icwtr == 1, True, False) atol = 1e-5 scenarios = [ 'LUH2_v2f_beta_SSP1_RCP2.6_IMAGE', 'LUH2_v2f_beta_SSP2_RCP4.5_MESSAGE-GLOBIOM', 'LUH2_v2f_beta_SSP3_RCP7.0_AIM', 'LUH2_v2f_beta_SSP4_RCP3.4_GCAM', 'LUH2_v2f_beta_SSP4_RCP6.0_GCAM', 'LUH2_v2f_beta_SSP5_RCP8.5_REMIND-MAGPIE', 'historical' ] file_list = [ os.path.join('../../data/luh2_v2', x, 'states.nc') for x in scenarios ] for fname in file_list:
def print_forecast(self): """Print figures for the forecasts The following figures are printed: -bias loss at each location (heat map), for both mean and median -median forecast, heat map for each day -for each location, everything in TimeSeries.print_forecast() """ test_set = self.forecaster.data angle_resolution = dataset.ANGLE_RESOLUTION longitude_grid = (test_set.topography["longitude"] - angle_resolution / 2) latitude_grid = test_set.topography["latitude"] + angle_resolution / 2 rain_units = test_set.rain_units series_dir = path.join(self.directory, "series_forecast") if not path.isdir(series_dir): os.mkdir(series_dir) map_dir = path.join(self.directory, "map_forecast") if not path.isdir(map_dir): os.mkdir(map_dir) # forecast map, 3 dimensions, same as test_set.rain # prediction of precipitation for each point in space and time # 0th dimension is time, remaining is space forecast_map = ma.empty_like(test_set.rain) # array of dictionaries, one for each loss class # each element contains a dictionary of loss maps loss_map_array = [] for i_loss in range(len(loss_segmentation.LOSS_CLASSES)): dic = {} dic["bias_mean"] = ma.empty_like(test_set.rain[0]) dic["bias_median"] = ma.empty_like(test_set.rain[0]) loss_map_array.append(dic) # get forecast (median) and losses for the maps for forecaster_i, observed_rain_i in (zip( self.forecaster.generate_time_series_forecaster(), test_set.generate_unmask_rain())): lat_i = forecaster_i.time_series.id[0] long_i = forecaster_i.time_series.id[1] forecast_map[:, lat_i, long_i] = forecaster_i.forecast_median # get the value for each loss (to produce a map) for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES): loss_i = Loss(self.forecaster.n_simulation) loss_i.add_data(forecaster_i, observed_rain_i) loss_map_array[i_loss]["bias_mean"][lat_i, long_i] = ( loss_i.get_bias_loss()) loss_map_array[i_loss]["bias_median"][lat_i, long_i] = ( loss_i.get_bias_median_loss()) forecaster_i.del_memmap() # plot the losses map for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES): for metric, loss_map in loss_map_array[i_loss].items(): plt.figure() ax = plt.axes(projection=crs.PlateCarree()) im = ax.pcolor(longitude_grid, latitude_grid, loss_map, cmap='Greys') ax.coastlines(resolution="50m") plt.colorbar(im) ax.set_aspect("auto", adjustable=None) plt.savefig(path.join( self.directory_assess, self.prefix + Loss.get_short_name() + "_" + metric + "_map.pdf"), bbox_inches="tight") plt.close() # plot the spatial forecast for each time (in parallel) message_array = [] for i, time in enumerate(test_set.time_array): title = "precipitation (" + rain_units + ") : " + str(time) file_path = path.join(map_dir, str(i) + ".png") message = PrintForecastMapMessage(forecast_map[i], latitude_grid, longitude_grid, title, file_path) message_array.append(message) self.pool.map(PrintForecastMapMessage.print, message_array) # plot the forecast (time series) for each location (in parallel) message_array = [] for forecaster_i, observed_rain_i in (zip( self.forecaster.generate_forecaster_no_memmap(), test_set.generate_unmask_rain())): message = PrintForecastSeriesMessage(series_dir, forecaster_i, observed_rain_i) message_array.append(message) self.pool.map(PrintForecastSeriesMessage.print, message_array)
#!/usr/bin/env python # This script checks that secma is <= 1 when secdf + secdn == 0. from netCDF4 import Dataset import numpy as np import numpy.ma as ma import os static = Dataset('../../data/luh2_v2/staticData_quarterdeg.nc') icwtr = static.variables['icwtr'][:, :] secd = ma.empty_like(icwtr) secd.mask = np.where(icwtr == 1, True, False) secma = ma.empty_like(icwtr) secma.mask = secd.mask atol = 1e-5 scenarios = [#'LUH2_v2f_beta_SSP1_RCP2.6_IMAGE', #'LUH2_v2f_beta_SSP2_RCP4.5_MESSAGE-GLOBIOM', #'LUH2_v2f_beta_SSP3_RCP7.0_AIM', #'LUH2_v2f_beta_SSP4_RCP3.4_GCAM', #'LUH2_v2f_beta_SSP4_RCP6.0_GCAM', #'LUH2_v2f_beta_SSP5_RCP8.5_REMIND-MAGPIE', 'historical'] file_list = [os.path.join('../../data/luh2_v2', x, 'states.nc') for x in scenarios] total = 0 for fname in file_list: print(fname) with Dataset(fname) as ds:
def reflate(self, namask, data): arr = ma.empty_like(namask, dtype=np.float32) arr.mask = namask arr[~namask] = data return arr
scenarios = ['historical'] states = [os.path.join('../../data/luh2_v2', x, 'states.nc') for x in scenarios] transitions = [os.path.join('../../data/luh2_v2', x, 'transitions.nc') for x in scenarios] file_list = zip(states, transitions) sidx = 0 for sname, tname in file_list: print(sname) print(tname) with Dataset(tname) as trans: with Dataset(sname) as state: shp = state.variables['secdf'].shape currf = ma.empty_like(icwtr) currf.mask = (icwtr == 1.0) currn = ma.empty_like(currf) secdf = ma.empty_like(currf) secdn = ma.empty_like(currf) posf = filter(lambda x: re.search(r'(?!secdf)_to_secdf$|primf_harv$', x), trans.variables.keys()) posn = filter(lambda x: re.search(r'(?!secdn)_to_secdn$|primn_harv$', x), trans.variables.keys()) negf = filter(lambda x: re.match(r'secdf_to_(?!secdf)', x), trans.variables.keys()) negn = filter(lambda x: re.match(r'secdn_to_(?!secdn)', x), trans.variables.keys()) print("posf: ", posf, "\n") print("posn: ", posn, "\n") print("negf: ", negf, "\n")
def main(): # parse command-line arguments parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) ## targets to fit parser.add_argument("--name", type=str, default=None, help="skim file name") parser.add_argument("--abs-beta", type=float, default=3.92, help='absorption redshift scaling power') parser.add_argument("--abs-alpha", type=float, default=0.0018, help='absorption coefficient') parser.add_argument("--forest-wave-ref", type=float, default=1185.0, help='forest wave reference') args = parser.parse_args() # import data forest_skim = h5py.File(args.name+'-forest.hdf5', 'r') forest_flux = np.ma.MaskedArray(forest_skim['flux'][:], mask=forest_skim['mask'][:]) forest_ivar = np.ma.MaskedArray(forest_skim['ivar'][:], mask=forest_skim['mask'][:]) forest_loglam = forest_skim['loglam'][:] forest_wave = np.power(10.0, forest_loglam) forest_norm = forest_skim['norm'][:] quasar_redshifts = forest_skim['z'][:] redshift_order = np.argsort(quasar_redshifts) wave_lya = forest_skim.attrs['wave_lya'] forest_pixel_redshifts = (1.0 + quasar_redshifts[:,np.newaxis])*forest_wave/wave_lya - 1.0 print 'Input data shape: ', forest_pixel_redshifts.shape #### Method 1, find which mean flux slice to use for which pixel ## uses: shifted_rows, shifted_cols, flux.shape, forest_flux, forest_weight, args.subsample_step ## redshift_order, forest_pixel_redshifts print 'Starting linear continuum fit ...' num_forests, num_forest_waves = forest_flux.shape print 'Building model matrix...' log_forest_wave_ratio = np.log(forest_wave/args.forest_wave_ref) # raveled_weights = np.ones_like(forest_ivar).ravel()#np.sqrt(forest_ivar/(1.0+forest_ivar*0.055)).ravel() num_params = 2 param_coefs = np.tile(np.vstack((np.ones(num_forest_waves), log_forest_wave_ratio)).reshape((-1,), order='F'), num_forests) param_rows = np.repeat(np.arange(num_forests*num_forest_waves), num_params) param_cols = np.vstack((np.repeat(np.arange(num_forests)*num_params, num_forest_waves), np.repeat(np.arange(num_forests)*num_params + 1, num_forest_waves))).reshape((-1,), order='F') # num_params = 1 # param_coefs = np.tile(np.ones(num_forest_waves), num_forests) # param_rows = np.arange(num_forests*num_forest_waves) # param_cols = np.repeat(np.arange(num_forests), num_forest_waves) print 'Param coef shapes: ', param_coefs.shape, param_rows.shape, param_cols.shape #### Add continuum coefficients cont_coefs = np.tile(np.ones(num_forest_waves), num_forests) cont_rows = np.arange(num_forests*num_forest_waves) cont_cols = np.tile(np.arange(num_forest_waves), num_forests) print 'Continuum coef shapes: ', cont_coefs.shape, cont_rows.shape, cont_cols.shape #### Add absorption coefficients abs_coefs = args.abs_alpha*np.power(1+forest_pixel_redshifts, args.abs_beta) # forest_min_z = 1.9 # forest_max_z = 3.5 # forest_dz = 0.1 # num_z_bins = int((forest_max_z-forest_min_z)/forest_dz) # fz_zbin_indices = np.floor((forest_pixel_redshifts.ravel() - forest_min_z)/forest_dz).astype(int) # # print fz_zbin_indices.shape # print fz_zbin_indices # lo_coef = forest_pixel_redshifts - fz_zbin_indices*dz # hi_coef = forest_dz-lo_coef # abs_coefs = np.vstack((lo_coef,hi_coef)).reshape((-1,),order='F') # abs_cols = fz_zbin_indices # abs_rows = np.repeat(np.arange(num_forests*num_forest_waves), 2) # abs_coefs = np.ones(num_forest_waves*num_forests) # abs_rows = np.arange(num_forests*num_forest_waves) # abs_cols = fz_zbin_indices # print abs_coefs.shape model_coefs = np.concatenate((cont_coefs, param_coefs)) model_rows = np.concatenate((cont_rows, param_rows)) model_cols = np.concatenate((cont_cols, num_forest_waves+param_cols)) print 'Model coef shapes: ', model_coefs.shape, model_rows.shape, model_cols.shape model_matrix = scipy.sparse.csc_matrix((model_coefs, (model_rows, model_cols)), shape=(num_forests*num_forest_waves,num_forest_waves+num_params*num_forests)) print 'Model matrix shape: ', model_matrix.shape model_y = ma.log(ma.masked_where(forest_flux <= 0, forest_flux)) + abs_coefs print 'y shape, num masked pixels: ', model_y.shape, np.sum(model_y.mask) # valid = ~model_y.mask.ravel() regr = linear_model.LinearRegression(fit_intercept=False) print ('... performing fit using %s ...\n' % regr) # regr.fit(model_matrix[valid], model_y.ravel()[valid]) regr.fit(model_matrix, model_y.ravel()) soln = regr.coef_ continuum = np.exp(soln[:num_forest_waves]) # absorption = soln[num_forest_waves:2*num_forest_waves] params_a = np.exp(soln[num_forest_waves:num_forest_waves+num_params*num_forests:num_params]) params_b = soln[num_forest_waves+1:num_forest_waves+num_params*num_forests:num_params] # mean_transmission = np.exp(soln[num_forest_waves+num_params*num_forests:]) print 'Number of continuum params: ', continuum.shape outfile = h5py.File(args.name+'-linear-continuum.hdf5', 'w') # copy attributes from input file for attr_key in forest_skim.attrs: outfile.attrs[attr_key] = forest_skim.attrs[attr_key] # save args outfile.attrs['abs_alpha'] = args.abs_alpha outfile.attrs['abs_beta'] = args.abs_beta outfile.attrs['forest_wave_ref'] = args.forest_wave_ref # save fit results outfile.create_dataset('params_a', data=params_a, compression="gzip") outfile.create_dataset('params_b', data=params_b, compression="gzip") outfile.create_dataset('continuum', data=continuum, compression="gzip") outfile.create_dataset('continuum_wave', data=forest_wave, compression="gzip") outfile.close() # plt.figure(figsize=(12,9)) # plt.plot(np.linspace(forest_min_z, forest_max_z, num_z_bins), mean_transmission, c='k') # plt.ylabel(r'z') # plt.xlabel(r'Mean F(z)') # plt.grid() # plt.savefig(args.name+'-linear-mean-transmission.png', dpi=100, bbox_inches='tight') # plt.close() plt.figure(figsize=(12,9)) plt.step(forest_wave, continuum, c='k', where='mid') def draw_example(i, **kwargs): print quasar_redshifts[i] plt.scatter(forest_wave, forest_norm[i]*forest_flux[i], marker='+', **kwargs) plt.plot(forest_wave, forest_norm[i]*params_a[i]*np.exp(params_b[i]*log_forest_wave_ratio)*continuum, **kwargs) # draw_example(1, color='blue') # draw_example(10, color='green') # draw_example(100, color='red') plt.xlim(forest_wave[0], forest_wave[-1]) plt.ylabel(r'Continuum (arb. units)') plt.xlabel(r'Rest Wavelength ($\AA$)') plt.grid() plt.savefig(args.name+'-linear-continuum.png', dpi=100, bbox_inches='tight') plt.close() plt.figure(figsize=(12,9)) plt.hist(params_a, bins=np.linspace(-0, 3, 51), histtype='stepfilled', alpha=0.5) plt.xlabel('a') plt.grid() plt.savefig(args.name+'-linear-param-a-dist.png', dpi=100, bbox_inches='tight') plt.close() plt.figure(figsize=(12,9)) plt.hist(params_b, bins=np.linspace(-20, 20, 51), histtype='stepfilled', alpha=0.5) plt.xlabel('b') plt.grid() plt.savefig(args.name+'-linear-param-b-dist.png', dpi=100, bbox_inches='tight') plt.close() plt.figure(figsize=(12,9)) plt.scatter(params_a, params_b, marker='+') plt.xlabel('a') plt.ylabel('b') plt.ylim(-20,20) plt.xlim(0,3) plt.grid() plt.savefig(args.name+'-linear-param-scatter.png', dpi=100, bbox_inches='tight') plt.close() # rest and obs refer to pixel grid print 'Estimating deltas in forest frame...' model_flux = params_a[:,np.newaxis]*np.power(forest_wave/args.forest_wave_ref, params_b[:,np.newaxis])*continuum*np.exp(-abs_coefs) delta_flux_rest = forest_flux/model_flux - 1.0 delta_ivar_rest = forest_ivar*(model_flux*model_flux) print 'Shifting deltas to observed frame...' shifted_rows = forest_skim['shifted_rows'][:] shifted_cols = forest_skim['shifted_cols'][:] shifted_loglam = forest_skim['shifted_loglam'][:] delta_flux_obs = ma.empty((num_forests, len(shifted_loglam))) delta_ivar_obs = ma.empty_like(delta_flux_obs) delta_flux_obs[shifted_rows, shifted_cols] = delta_flux_rest delta_ivar_obs[shifted_rows, shifted_cols] = delta_ivar_rest print 'Plotting mean delta...' mask_params = (params_a > .01) & (params_a < 100) & (params_b > -20) & (params_b < 20) print 'Number with okay params: %d' % np.sum(mask_params) delta_flux_mean = ma.average(delta_flux_obs[mask_params], axis=0, weights=delta_ivar_obs[mask_params]) plt.figure(figsize=(12,9)) plt.plot(np.power(10.0, shifted_loglam), delta_flux_mean) # plt.ylim(0.06*np.array([-1,1])) plt.xlabel(r'Observed Wavelength ($\AA$)') plt.ylabel(r'Delta Mean') plt.grid() plt.savefig(args.name+'-linear-delta-mean.png', dpi=100, bbox_inches='tight') plt.close() delta_flux_var = ma.average((delta_flux_obs[mask_params] - delta_flux_mean)**2, axis=0, weights=delta_ivar_obs[mask_params]) plt.figure(figsize=(12,9)) plt.plot(np.power(10.0, shifted_loglam), delta_flux_var) plt.ylim(0,0.5) plt.xlabel(r'Observed Wavelength ($\AA$)') plt.ylabel(r'Delta Variance') plt.grid() plt.savefig(args.name+'-linear-delta-var.png', dpi=100, bbox_inches='tight') plt.close()
def doit(scenario, outdir, start_index=0): static = Dataset(os.path.join(utils.luh2_dir(), 'staticData_quarterdeg.nc')) icwtr = static.variables['icwtr'][:, :] atol = 5e-5 variables = tuple([(x % fnf, 'f4', '1', -9999, 'time') for fnf in ('f', 'n') for x in ('secd%s%%s' % n for n in ('y', 'i', 'm'))] + [('bins%s' % fnf, 'f4', '1', -9999, 'bins') for fnf in ('f', 'n')]) baselinef = None baselinen = None if scenario == 'all': # historical must be the first scenario processed scenarios = sorted(utils.luh2_scenarios()) else: scenarios = [scenario] for scenario in scenarios: oname = os.path.join(outdir, 'secd-%s.nc' % scenario) tname = utils.luh2_transitions(scenario) sname = utils.luh2_states(scenario) if not (os.path.isfile(tname) and os.path.isfile(sname)): click.echo("skipping %s" % scenario) continue click.echo('%s -> %s' % (scenario, oname)) with Dataset(oname, 'w') as out: click.echo(sname) click.echo(tname) with Dataset(tname) as trans: with Dataset(sname) as state: _ = init_nc(out, state, variables) if scenario == 'historical': # Create a 3-D array to hold the last 50 years (plus 2) valuesf = init_values(state, 'secdf', start_index, icwtr) valuesn = init_values(state, 'secdn', start_index, icwtr) elif baselinef is None or baselinen is None: with Dataset(os.path.join( outdir, 'secd-historical.nc')) as hist: valuesf = hist.variables['binsf'][:] valuesn = hist.variables['binsn'][:] else: valuesf = baselinef.copy() valuesn = baselinen.copy() # Write initial data to output. valuesf[0].fill(0) valuesn[0].fill(0) write_data(out, 'f', start_index, valuesf) write_data(out, 'n', start_index, valuesn) remove = ma.empty_like(valuesf[0]) frac = ma.empty_like(valuesf[0]) posf = tuple( filter(lambda x: re.match(pos_re('f'), x), trans.variables.keys())) posn = tuple( filter(lambda x: re.match(pos_re('n'), x), trans.variables.keys())) negf = tuple( filter(lambda x: re.match(neg_re('f'), x), trans.variables.keys())) negn = tuple( filter(lambda x: re.match(neg_re('n'), x), trans.variables.keys())) click.echo(" " + ', '.join(posf)) click.echo(" " + ', '.join(negf)) click.echo(" " + ', '.join(posn)) click.echo(" " + ', '.join(negn)) for idx in range(start_index, trans.variables['time'].shape[0]): click.echo(" year %d" % to_year(scenario, idx)) # Compute transitions from / to secondary. sum_layers(trans, idx, negf, remove) sum_layers(trans, idx, posf, valuesf[0]) # Adjust secondary history dorem(valuesf, remove, frac) # Repeat for non-forested sum_layers(trans, idx, negn, remove) sum_layers(trans, idx, posn, valuesn[0]) dorem(valuesn, remove, frac) # Check consistency of data. asserts(state, idx, 'secdf', valuesf, atol) asserts(state, idx, 'secdn', valuesn, atol) # Write data to output. write_data(out, 'f', idx + 1, valuesf) write_data(out, 'n', idx + 1, valuesn) # Rotate the array. valuesf = roll_values(valuesf) valuesn = roll_values(valuesn) if scenario == 'historical': baselinef = write_bins(out, 'binsf', valuesf).copy() baselinen = write_bins(out, 'binsn', valuesn).copy() start_index = 0
def main(): # parse command-line arguments parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) ## targets to fit parser.add_argument('--name', type=str, default=None, help='skim file name') parser.add_argument('--abs-beta', type=float, default=3.92, help='absorption redshift scaling power') parser.add_argument('--abs-alpha', type=float, default=0.0018, help='absorption coefficient') parser.add_argument('--forest-wave-ref', type=float, default=1185.0, help='forest wave reference') args = parser.parse_args() # import data forest_skim = h5py.File(args.name + '-forest.hdf5', 'r') forest_flux = np.ma.MaskedArray(forest_skim['flux'][:], mask=forest_skim['mask'][:]) forest_ivar = np.ma.MaskedArray(forest_skim['ivar'][:], mask=forest_skim['mask'][:]) forest_loglam = forest_skim['loglam'][:] forest_wave = np.power(10.0, forest_loglam) forest_norm = forest_skim['norm'][:] quasar_redshifts = forest_skim['z'][:] redshift_order = np.argsort(quasar_redshifts) wave_lya = forest_skim.attrs['wave_lya'] forest_pixel_redshifts = ( 1.0 + quasar_redshifts[:, np.newaxis]) * forest_wave / wave_lya - 1.0 print 'Input data shape: ', forest_pixel_redshifts.shape #### Method 1, find which mean flux slice to use for which pixel ## uses: shifted_rows, shifted_cols, flux.shape, forest_flux, forest_weight, args.subsample_step ## redshift_order, forest_pixel_redshifts print 'Starting linear continuum fit ...' num_forests, num_forest_waves = forest_flux.shape print 'Building model matrix...' log_forest_wave_ratio = np.log(forest_wave / args.forest_wave_ref) # raveled_weights = np.ones_like(forest_ivar).ravel()#np.sqrt(forest_ivar/(1.0+forest_ivar*0.055)).ravel() num_params = 2 param_coefs = np.tile( np.vstack((np.ones(num_forest_waves), log_forest_wave_ratio)).reshape( (-1, ), order='F'), num_forests) param_rows = np.repeat(np.arange(num_forests * num_forest_waves), num_params) param_cols = np.vstack( (np.repeat(np.arange(num_forests) * num_params, num_forest_waves), np.repeat(np.arange(num_forests) * num_params + 1, num_forest_waves))).reshape((-1, ), order='F') # num_params = 1 # param_coefs = np.tile(np.ones(num_forest_waves), num_forests) # param_rows = np.arange(num_forests*num_forest_waves) # param_cols = np.repeat(np.arange(num_forests), num_forest_waves) print('Param coef shapes: {} {} {}'.format(param_coefs.shape, param_rows.shape, param_cols.shape)) #### Add continuum coefficients cont_coefs = np.tile(np.ones(num_forest_waves), num_forests) cont_rows = np.arange(num_forests * num_forest_waves) cont_cols = np.tile(np.arange(num_forest_waves), num_forests) print('Continuum coef shapes: {} {} {}'.format(cont_coefs.shape, cont_rows.shape, cont_cols.shape)) #### Add absorption coefficients abs_coefs = args.abs_alpha * np.power(1 + forest_pixel_redshifts, args.abs_beta) # forest_min_z = 1.9 # forest_max_z = 3.5 # forest_dz = 0.1 # num_z_bins = int((forest_max_z-forest_min_z)/forest_dz) # fz_zbin_indices = np.floor((forest_pixel_redshifts.ravel() - forest_min_z)/forest_dz).astype(int) # # print fz_zbin_indices.shape # print fz_zbin_indices # lo_coef = forest_pixel_redshifts - fz_zbin_indices*dz # hi_coef = forest_dz-lo_coef # abs_coefs = np.vstack((lo_coef,hi_coef)).reshape((-1,),order='F') # abs_cols = fz_zbin_indices # abs_rows = np.repeat(np.arange(num_forests*num_forest_waves), 2) # abs_coefs = np.ones(num_forest_waves*num_forests) # abs_rows = np.arange(num_forests*num_forest_waves) # abs_cols = fz_zbin_indices # print abs_coefs.shape model_coefs = np.concatenate((cont_coefs, param_coefs)) model_rows = np.concatenate((cont_rows, param_rows)) model_cols = np.concatenate((cont_cols, num_forest_waves + param_cols)) print('Model coef shapes: {} {} {}'.format(model_coefs.shape, model_rows.shape, model_cols.shape)) model_shape = (num_forests * num_forest_waves, num_forest_waves + num_params * num_forests) model_matrix = scipy.sparse.csc_matrix( (model_coefs, (model_rows, model_cols)), shape=model_shape) print('Model matrix shape: {}'.format(model_matrix.shape)) model_y = ma.log(ma.masked_where(forest_flux <= 0, forest_flux)) + abs_coefs print('y shape, num masked pixels: {} {}'.format(model_y.shape, np.sum(model_y.mask))) # valid = ~model_y.mask.ravel() regr = linear_model.LinearRegression(fit_intercept=False) print('... performing fit using {} ...\n'.format(regr)) # regr.fit(model_matrix[valid], model_y.ravel()[valid]) regr.fit(model_matrix, model_y.ravel()) soln = regr.coef_ continuum = np.exp(soln[:num_forest_waves]) # absorption = soln[num_forest_waves:2*num_forest_waves] params_a_slice = slice(num_forest_waves, num_forest_waves + num_params * num_forests, num_params) params_a = np.exp(soln[params_a_slice]) params_b_slice = slice(num_forest_waves + 1, num_forest_waves + num_params * num_forests, num_params) params_b = soln[params_b_slice] # mean_transmission = np.exp(soln[num_forest_waves+num_params*num_forests:]) print('Number of continuum params: {}'.format(continuum.shape)) outfile = h5py.File(args.name + '-linear-continuum.hdf5', 'w') dataset_kwargs = {'compression': 'gzip'} # copy attributes from input file for attr_key in forest_skim.attrs: outfile.attrs[attr_key] = forest_skim.attrs[attr_key] # save args outfile.attrs['abs_alpha'] = args.abs_alpha outfile.attrs['abs_beta'] = args.abs_beta outfile.attrs['forest_wave_ref'] = args.forest_wave_ref # save fit results outfile.create_dataset('params_a', data=params_a, **dataset_kwargs) outfile.create_dataset('params_b', data=params_b, **dataset_kwargs) outfile.create_dataset('continuum', data=continuum, **dataset_kwargs) outfile.create_dataset('continuum_wave', data=forest_wave, **dataset_kwargs) outfile.close() savefig_kwargs = {'dpi': 100, 'bbox_inches': 'tight'} hist_kwargs = {'histtype': 'stepfilled', 'alpha': 0.5} # plt.figure(figsize=(12,9)) # plt.plot(np.linspace(forest_min_z, forest_max_z, num_z_bins), mean_transmission, c='k') # plt.ylabel(r'z') # plt.xlabel(r'Mean F(z)') # plt.grid() # plt.savefig(args.name+'-linear-mean-transmission.png', dpi=100, bbox_inches='tight') # plt.close() plt.figure(figsize=(12, 9)) plt.step(forest_wave, continuum, c='k', where='mid') def draw_example(i, **kwargs): print(quasar_redshifts[i]) plt.scatter(forest_wave, forest_norm[i] * forest_flux[i], marker='+', **kwargs) plt.plot( forest_wave, forest_norm[i] * params_a[i] * np.exp(params_b[i] * log_forest_wave_ratio) * continuum, **kwargs) # draw_example(1, color='blue') # draw_example(10, color='green') # draw_example(100, color='red') plt.xlim(forest_wave[0], forest_wave[-1]) plt.ylabel(r'Continuum (arb. units)') plt.xlabel(r'Rest Wavelength ($\AA$)') plt.grid(True) plt.savefig(args.name + '-linear-continuum.png', **savefig_kwargs) plt.close() plt.figure(figsize=(12, 9)) plt.hist(params_a, bins=np.linspace(-0, 3, 51), **hist_kwargs) plt.xlabel('a') plt.grid(True) plt.savefig(args.name + '-linear-param-a-dist.png', **savefig_kwargs) plt.close() plt.figure(figsize=(12, 9)) plt.hist(params_b, bins=np.linspace(-20, 20, 51), **hist_kwargs) plt.xlabel('b') plt.grid(True) plt.savefig(args.name + '-linear-param-b-dist.png', **savefig_kwargs) plt.close() plt.figure(figsize=(12, 9)) plt.scatter(params_a, params_b, marker='+') plt.xlabel('a') plt.ylabel('b') plt.ylim(-20, 20) plt.xlim(0, 3) plt.grid(True) plt.savefig(args.name + '-linear-param-scatter.png', **savefig_kwargs) plt.close() # rest and obs refer to pixel grid print('Estimating deltas in forest frame...') model_flux = params_a[:, np.newaxis] * np.power( forest_wave / args.forest_wave_ref, params_b[:, np.newaxis]) * continuum * np.exp(-abs_coefs) delta_flux_rest = forest_flux / model_flux - 1.0 delta_ivar_rest = forest_ivar * (model_flux * model_flux) print('Shifting deltas to observed frame...') shifted_rows = forest_skim['shifted_rows'][:] shifted_cols = forest_skim['shifted_cols'][:] shifted_loglam = forest_skim['shifted_loglam'][:] delta_flux_obs = ma.empty((num_forests, len(shifted_loglam))) delta_ivar_obs = ma.empty_like(delta_flux_obs) delta_flux_obs[shifted_rows, shifted_cols] = delta_flux_rest delta_ivar_obs[shifted_rows, shifted_cols] = delta_ivar_rest print('Plotting mean delta...') mask_params = ((params_a > .01) & (params_a < 100) & (params_b > -20) & (params_b < 20)) print('Number with okay params: {:d}'.format(np.sum(mask_params))) delta_flux_mean = ma.average(delta_flux_obs[mask_params], axis=0, weights=delta_ivar_obs[mask_params]) plt.figure(figsize=(12, 9)) plt.plot(np.power(10.0, shifted_loglam), delta_flux_mean) # plt.ylim(0.06*np.array([-1,1])) plt.xlabel(r'Observed Wavelength ($\AA$)') plt.ylabel(r'Delta Mean') plt.grid(True) plt.savefig(args.name + '-linear-delta-mean.png', **savefig_kwargs) plt.close() delta_flux_var = ma.average( (delta_flux_obs[mask_params] - delta_flux_mean)**2, axis=0, weights=delta_ivar_obs[mask_params]) plt.figure(figsize=(12, 9)) plt.plot(np.power(10.0, shifted_loglam), delta_flux_var) plt.ylim(0, 0.5) plt.xlabel(r'Observed Wavelength ($\AA$)') plt.ylabel(r'Delta Variance') plt.grid(True) plt.savefig(args.name + '-linear-delta-var.png', **savefig_kwargs) plt.close()