def data_for_variable(self, variable, min_depth=None, max_depth=None): """ function to go through all the depth_from, depth_to, sensor combinations for the given variable and yields ISMNTimeSeries if a match is found. if min_depth and/or max_depth where given it only returns a ISMNTimeSeries if depth_from >= min_depth and/or depth_to <= max_depth Parameters ---------- variable: string variable to read one of * 'soil moisture', * 'soil temperature', * 'soil suction', * 'precipitation', * 'air temperature', * 'field capacity', * 'permanent wilting point', * 'plant available water', * 'potential plant available water', * 'saturation', * 'silt fraction', * 'snow depth', * 'sand fraction', * 'clay fraction', * 'organic carbon', * 'snow water equivalent', * 'surface temperature', * 'surface temperature quality flag original' min_depth : float, optional depth_from of variable has to be >= min_depth in order to be included. max_depth : float, optional depth_to of variable has to be <= max_depth in order to be included. Returns ------- time_series : iterator(pytesmo.io.ismn.readers.ISMNTimeSeries) ISMNTimeSeries object containing data and metadata """ if min_depth is None: min_depth = np.min(self.depth_from) if max_depth is None: max_depth = np.max(self.depth_to) for var, d1, d2, filename in zip(self.variables, self.depth_from, self.depth_to, self.filenames): if var != variable: continue if ((d1 >= min_depth) & (d2 <= max_depth)): yield readers.read_data(filename)
def read_ts(self, idx): """ read a time series directly by the id Parameters ---------- idx : int id into self.metadata, best one of those returned from get_dataset_ids() Returns ------- timeseries : pandas.DataFrame of the read data """ ts = readers.read_data(self.metadata['filename'][idx]) return ts.data
def get_min_max_obs_timestamp(self, variable="soil moisture", min_depth=None, max_depth=None): """ goes throug the filenames associated with a station and reads the date of the first and last observation to get and approximate time coverage of the station. This is just an overview. If holes have to be detected the complete file must be read. Parameters ---------- self: type description variable: string, optional one of * 'soil moisture', * 'soil temperature', * 'soil suction', * 'precipitation', * 'air temperature', * 'field capacity', * 'permanent wilting point', * 'plant available water', * 'potential plant available water', * 'saturation', * 'silt fraction', * 'snow depth', * 'sand fraction', * 'clay fraction', * 'organic carbon', * 'snow water equivalent', * 'surface temperature', * 'surface temperature quality flag original' min_depth : float, optional depth_from of variable has to be >= min_depth in order to be included. max_depth : float, optional depth_to of variable has to be <= max_depth in order to be included. Returns ------- start_date: datetime end_date: datetime """ start_date = None end_date = None if min_depth is None: min_depth = np.min(self.depth_from) if max_depth is None: max_depth = np.max(self.depth_to) for var, d1, d2, filename in zip(self.variables, self.depth_from, self.depth_to, self.filenames): if var == variable and ((d1 >= min_depth) & (d2 <= max_depth)): data = readers.read_data(filename).data.index.to_pydatetime() sdate = data[0] edate = data[-1] if start_date is None or start_date > sdate: start_date = sdate if end_date is None or end_date < edate: end_date = edate return start_date, end_date
def read_variable(self, variable, depth_from=None, depth_to=None, sensor=None): """ actually reads the given variable from the file. Parameters are required until any ambiguity is resolved. If there is only one depth for the given variable then only variable is required. If there are multiple depths at least depth_from is required. If there are multiple depth_to possibilities for one variable-depth_from combination also depth_to has to be specified. If 2 sensors are measuring the same variable in the same depth then also the sensor has to be specified. Parameters ---------- variable: string variable to read one of * 'soil moisture', * 'soil temperature', * 'soil suction', * 'precipitation', * 'air temperature', * 'field capacity', * 'permanent wilting point', * 'plant available water', * 'potential plant available water', * 'saturation', * 'silt fraction', * 'snow depth', * 'sand fraction', * 'clay fraction', * 'organic carbon', * 'snow water equivalent', * 'surface temperature', * 'surface temperature quality flag original' depth_from : float, optional shallower depth of layer the variable was measured at depth_to : float, optional deeper depth of layer the variable was measured at sensor : string, optional name of the sensor Returns ------- data : readers.ISMNTimeSeries ISMNTimeSeries object containing the relevant metadata for the time series as well as a .data pointing to a pandas.DataFrame Raises ------ ISMNError: if not all ambiguity was resolved by the given input parameters or if no data was found for the given input parameters """ if depth_from is None: depth_f, depth_t = self.get_depths(variable) if depth_f.size > 1: raise ISMNError("there are multiple depths for this variable" "Please specify the one you want to read") elif depth_f.size == 1: depth_from = depth_f[0] elif depth_f.size == 0: raise ISMNError("there are no depths for this variable" "Something went wrong") if depth_to is None: depth_f, depth_t = self.get_depths(variable) if depth_t.size > 1: raise ISMNError("there are multiple depths with the same depth_from value" "Please specify the depth_to value you want to read") elif depth_t.size == 1: depth_to = depth_t[0] elif depth_t.size == 0: raise ISMNError("there are no depths for this variable" "Something went wrong") if sensor is None: sensors = self.get_sensors(variable, depth_from, depth_to) if sensors.size > 1: raise ISMNError("there are multiple sensors for this combination of " "variable, depth_to, depth_from. Please specify which one " "you want to read") elif sensors.size == 1: sensor = sensors[0] elif sensors.size == 0: raise ISMNError("there are no sensors for this variable, depth_from, depth_to " "combination. Please make sure you specified valid depths") index_filename = np.where((variable == self.variables) & (depth_from == self.depth_from) & (depth_to == self.depth_to) & (sensor == self.sensors))[0] if index_filename.size != 1: raise ISMNError("There is no data for this combination of variable, depth_from, " "depth_to and sensor. Please check.") else: return readers.read_data(self.filenames[index_filename[0]])
def optimise(params, timespan=('2009-01', '2009-12'), gpi=None, rescaling=None): """ This function is optimising the parameters vegetation water content 'm_veg', soil moisture 'm_soil' and, if specified, a third optional parameter. The third optional parameter can eitehr be sand 'sand', clay 'clay', fractional root mean square height 'f_rms', stem volume 's_vol' or temperature 'temp'. Parameters ---------- params : list of dicts Model parameters. At least four of the following parameters needs to be specified if an optional parameter has been selected, otherwise all of them needs to be specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol' gpi : int, optional Grid point index. If specified, it will read data from datapool. Returns ------- df : pandas.DataFrame Optimised soil moisture, vegetation water concent and, if specified, optional optimised parameter. """ if gpi is None: ts_resam = pd.read_csv(os.path.join("data", "2011528_2009.csv"), index_col=0, parse_dates=True)[timespan[0]:timespan[1]] gpi = 2011528 else: ts_resam = read_resam(gpi)[timespan[0]:timespan[1]] m_veg_x0 = params.pop('m_veg_x0') m_soil_x0 = params.pop('m_soil_x0') columns = ['m_veg', 'm_soil'] x0 = np.array([m_veg_x0, m_soil_x0]) df = pd.DataFrame(index=ts_resam.index, columns=columns) df = df.fillna(np.nan) # optimise m_soil and m_veg for index, row in ts_resam.iterrows(): ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist()) ascat_sig = \ db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist())) args = (ascat_inc, ascat_sig, params, '') res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead') if res['success'] == True: df['m_veg'][index] = res['x'][0] df['m_soil'][index] = res['x'][1] str_static_p = \ ', '.join("%s: %r" % t for t in locals().iteritems()) str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format( m_veg_x0, m_soil_x0) ismn_file = os.path.join( 'data', 'ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm' ) ismn_data = ismn_readers.read_data(ismn_file) insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename( columns={'soil moisture': 'insitu'}) gldas = pd.read_csv(os.path.join('data', 'GLDAS_737602.csv'), parse_dates=True, index_col=0) gldas.rename(columns={'086_L1': 'gldas'}, inplace=True) gldas = pd.DataFrame(gldas['gldas']) ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'}) matched = temp_match.matching(ascat, insitu, gldas) if rescaling is not None: scaled = scaling.scale(matched, rescaling, reference_index=1) else: scaled = matched metrics = OrderedDict() metrics['bias'] = df_metrics.bias(scaled) metrics['pearson'] = df_metrics.pearsonr(scaled) metrics['kendall'] = df_metrics.kendalltau(scaled) metrics['ubrmsd'] = df_metrics.ubrmsd(scaled) metrics['var_ratio'] = df_var_ratio(scaled) tcol_error = df_metrics.tcol_error(scaled)._asdict() ts_title = "Soil moisture. " if rescaling is not None: ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling]) else: ts_title = ' '.join([ts_title, 'No rescaling.']) axes = scaled.plot(subplots=True, title=ts_title, figsize=(18, 8)) # these are matplotlib.patch.Patch properties props = dict(facecolor='white', alpha=0) columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas') row_labels = [ 'bias', 'pearson R', 'kendall tau', 'unbiased RMSD', 'variance ratio' ] cell_text = [] for metric in metrics: metric_values = metrics[metric] if type(metric_values) == tuple: metric_values = metric_values[0] metric_values = metric_values._asdict() cell_text.append([ "%.2f" % metric_values['ascat_and_insitu'], "%.2f" % metric_values['ascat_and_gldas'], "%.2f" % metric_values['insitu_and_gldas'] ]) table = plt.table(cellText=cell_text, colLabels=columns, colWidths=[0.1, 0.1, 0.1], rowLabels=row_labels, loc='bottom', bbox=(0.2, -1.25, 0.5, 0.8)) tcol_table = plt.table(cellText=[[ "%.2f" % tcol_error['ascat'], "%.2f" % tcol_error['gldas'], "%.2f" % tcol_error['insitu'] ]], colLabels=('ascat', 'gldas', 'insitu'), colWidths=[0.1, 0.1, 0.1], rowLabels=['Triple collocation error'], loc='bottom', bbox=(0.2, -1.65, 0.5, 0.3)) plt.subplots_adjust(left=0.08, bottom=0.35) axes = scatter_matrix(scaled) axes.flat[0].figure.suptitle(ts_title) # only draw 1:1 line if scaling was applied if rescaling is not None: for j, ax in enumerate(axes.flatten()): if np.remainder(j + 1, 3 + 1) != 1: min_x, max_x = ax.get_xlim() min_y, max_y = ax.get_ylim() # find minimum lower left coordinate and maximum upper right min_ll = min([min_x, min_y]) max_ur = max([max_x, max_y]) ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6') return df
def validate(params, timespan=('2009-01', '2009-12'), gpi=None, rescaling=None, y_axis_range=None): """ This function is optimising the parameters vegetation water content 'm_veg', soil moisture 'm_soil' and, if specified, a third optional parameter. The third optional parameter can eitehr be sand 'sand', clay 'clay', fractional root mean square height 'f_rms', stem volume 's_vol' or temperature 'temp'. Parameters ---------- params : list of dicts Model parameters. At least four of the following parameters needs to be specified if an optional parameter has been selected, otherwise all of them needs to be specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol' timespan : tuple, optional timespan to analyze gpi : int, optional Grid point index. If specified, it will read data from datapool. rescaling : string, optional rescaling method, one of 'min_max', 'linreg', 'mean_std' and 'lin_cdf_match' Default: None insitu is the reference to which is scaled y_axis_range : tuple, optional specify (min, max) of y axis Returns ------- df : pandas.DataFrame Optimised soil moisture, vegetation water concent and, if specified, optional optimised parameter. """ unit_dict = {'freq': 'GHz', 'sand': '', 'clay': '', 'temp': '$^\circ$C', 'eps': '', 'theta': '$^\circ$', 'f_rms': '', 'sig_bare': 'dB', 'm_soil': '%', 'm_veg': '%', 'm_soil_x0': '%', 'm_veg_x0': '%', 's_vol': '$m^3ha^{-1}$', 'sig_canopy': 'dB', 'sig_for': 'dB', 'sig_floor': 'dB', 'polarization': ''} param_should = ['sand', 'clay', 'temp', 's_vol', 'f_rms', 'm_veg_x0', 'm_soil_x0'] for param in param_should: assert param in params.keys() if gpi is None: ts_resam = pd.read_csv(os.path.join(os.path.split(os.path.abspath(__file__))[0],'data','2011528_2009.csv'), index_col=0, parse_dates=True)[timespan[0]:timespan[1]] gpi = 2011528 else: ts_resam = read_resam(gpi)[timespan[0]:timespan[1]] m_veg_x0 = params.pop('m_veg_x0') m_soil_x0 = params.pop('m_soil_x0') columns = ['m_veg', 'm_soil'] x0 = np.array([m_veg_x0, m_soil_x0]) df = pd.DataFrame(index=ts_resam.index, columns=columns) df = df.fillna(np.nan) # optimise m_soil and m_veg for index, row in ts_resam.iterrows(): ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist()) ascat_sig = \ db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist())) args = (ascat_inc, ascat_sig, params, '') res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead') if res['success'] == True: df['m_veg'][index] = res['x'][0] df['m_soil'][index] = res['x'][1] str_static_p = \ ', '.join("%s: %r" % t for t in locals().iteritems()) str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format(m_veg_x0, m_soil_x0) ismn_file = os.path.join(os.path.split(os.path.abspath(__file__))[0],'data','ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm') ismn_data = ismn_readers.read_data(ismn_file) insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename(columns={'soil moisture': 'insitu'}) gldas = pd.read_csv(os.path.join(os.path.split(os.path.abspath(__file__))[0],'data', 'GLDAS_737602.csv'), parse_dates=True, index_col=0) gldas.rename(columns={'086_L1': 'gldas'}, inplace=True) gldas = pd.DataFrame(gldas['gldas']) / 100.0 ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'}) matched = temp_match.matching(ascat, insitu, gldas) if rescaling is not None: scaled = scaling.scale(matched, rescaling, reference_index=1) else: scaled = matched metrics = OrderedDict() metrics['bias'] = df_metrics.bias(scaled) metrics['pearson'] = df_metrics.pearsonr(scaled) metrics['spearman'] = df_metrics.spearmanr(scaled) metrics['ubrmsd'] = df_metrics.rmsd(scaled) metrics['std_ratio'] = df_std_ratio(scaled) tcol_error = df_metrics.tcol_error(scaled)._asdict() ts_title = "Soil moisture. " if rescaling is not None: ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling]) rmsd_title = 'unbiased RMSD' else: ts_title = ' '.join([ts_title, 'No rescaling.']) rmsd_title = 'RMSD' axes = scaled.plot(title=ts_title, figsize=(18, 8)) plt.legend() # these are matplotlib.patch.Patch properties props = dict(facecolor='white', alpha=0) columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas') row_labels = ['bias', 'pearson R', 'spearman rho', rmsd_title, 'stddev ratio'] cell_text = [] for metric in metrics: metric_values = metrics[metric] if type(metric_values) == tuple: metric_values = metric_values[0] metric_values = metric_values._asdict() cell_text.append(["%.2f" % metric_values['ascat_and_insitu'], "%.2f" % metric_values['ascat_and_gldas'], "%.2f" % metric_values['insitu_and_gldas']]) table = plt.table( cellText=cell_text, colLabels=columns, colWidths=[0.1, 0.1, 0.1], rowLabels=row_labels, loc='bottom', bbox=(0.2, -0.5, 0.5, 0.3)) tcol_table = plt.table( cellText=[["%.2f" % tcol_error['ascat'], "%.2f" % tcol_error['gldas'], "%.2f" % tcol_error['insitu']]], colLabels=('ascat ', 'gldas ', 'insitu '), colWidths=[0.1, 0.1, 0.1], rowLabels=['Triple collocation error'], loc='bottom', bbox=(0.2, -0.6, 0.5, 0.1)) plt.subplots_adjust(left=0.08, bottom=0.35, right=0.85) plt.draw() # if y_axis_range is not None: axes.set_ylim(y_axis_range) params['m_veg_x0'] = m_veg_x0 params['m_soil_x0'] = m_soil_x0 infotext = [] for label in sorted(param_should): infotext.append('%s = %s %s' % (label, params[label], unit_dict[label])) infotext = '\n'.join(infotext) # place a text box in upper left in axes coords axes.text(1.03, 1, infotext, transform=axes.transAxes, fontsize=12, verticalalignment='top', bbox=props) axes = scatter_matrix(scaled) axes.flat[0].figure.suptitle(ts_title) # only draw 1:1 line if scaling was applied for j, ax in enumerate(axes.flatten()): if y_axis_range is not None: ax.set_xlim(y_axis_range) if np.remainder(j + 1, 3 + 1) != 1: if y_axis_range is not None: ax.set_ylim(y_axis_range) min_x, max_x = ax.get_xlim() min_y, max_y = ax.get_ylim() # find minimum lower left coordinate and maximum upper right min_ll = min([min_x, min_y]) max_ur = max([max_x, max_y]) ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6')
def optimise(params, timespan=('2009-01', '2009-12'), gpi=None, rescaling=None): """ This function is optimising the parameters vegetation water content 'm_veg', soil moisture 'm_soil' and, if specified, a third optional parameter. The third optional parameter can eitehr be sand 'sand', clay 'clay', fractional root mean square height 'f_rms', stem volume 's_vol' or temperature 'temp'. Parameters ---------- params : list of dicts Model parameters. At least four of the following parameters needs to be specified if an optional parameter has been selected, otherwise all of them needs to be specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol' gpi : int, optional Grid point index. If specified, it will read data from datapool. Returns ------- df : pandas.DataFrame Optimised soil moisture, vegetation water concent and, if specified, optional optimised parameter. """ if gpi is None: ts_resam = pd.read_csv(os.path.join("data", "2011528_2009.csv"), index_col=0, parse_dates=True)[timespan[0]:timespan[1]] gpi = 2011528 else: ts_resam = read_resam(gpi)[timespan[0]:timespan[1]] m_veg_x0 = params.pop('m_veg_x0') m_soil_x0 = params.pop('m_soil_x0') columns = ['m_veg', 'm_soil'] x0 = np.array([m_veg_x0, m_soil_x0]) df = pd.DataFrame(index=ts_resam.index, columns=columns) df = df.fillna(np.nan) # optimise m_soil and m_veg for index, row in ts_resam.iterrows(): ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist()) ascat_sig = \ db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist())) args = (ascat_inc, ascat_sig, params, '') res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead') if res['success'] == True: df['m_veg'][index] = res['x'][0] df['m_soil'][index] = res['x'][1] str_static_p = \ ', '.join("%s: %r" % t for t in locals().iteritems()) str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format(m_veg_x0, m_soil_x0) ismn_file = os.path.join('data', 'ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm') ismn_data = ismn_readers.read_data(ismn_file) insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename(columns={'soil moisture': 'insitu'}) gldas = pd.read_csv(os.path.join('data', 'GLDAS_737602.csv'), parse_dates=True, index_col=0) gldas.rename(columns={'086_L1': 'gldas'}, inplace=True) gldas = pd.DataFrame(gldas['gldas']) ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'}) matched = temp_match.matching(ascat, insitu, gldas) if rescaling is not None: scaled = scaling.scale(matched, rescaling, reference_index=1) else: scaled = matched metrics = OrderedDict() metrics['bias'] = df_metrics.bias(scaled) metrics['pearson'] = df_metrics.pearsonr(scaled) metrics['kendall'] = df_metrics.kendalltau(scaled) metrics['ubrmsd'] = df_metrics.ubrmsd(scaled) metrics['var_ratio'] = df_var_ratio(scaled) tcol_error = df_metrics.tcol_error(scaled)._asdict() ts_title = "Soil moisture. " if rescaling is not None: ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling]) else: ts_title = ' '.join([ts_title, 'No rescaling.']) axes = scaled.plot(subplots=True, title=ts_title, figsize=(18, 8)) # these are matplotlib.patch.Patch properties props = dict(facecolor='white', alpha=0) columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas') row_labels = ['bias', 'pearson R', 'kendall tau', 'unbiased RMSD', 'variance ratio'] cell_text = [] for metric in metrics: metric_values = metrics[metric] if type(metric_values) == tuple: metric_values = metric_values[0] metric_values = metric_values._asdict() cell_text.append(["%.2f" % metric_values['ascat_and_insitu'], "%.2f" % metric_values['ascat_and_gldas'], "%.2f" % metric_values['insitu_and_gldas']]) table = plt.table( cellText=cell_text, colLabels=columns, colWidths=[0.1, 0.1, 0.1], rowLabels=row_labels, loc='bottom', bbox=(0.2, -1.25, 0.5, 0.8)) tcol_table = plt.table( cellText=[["%.2f" % tcol_error['ascat'], "%.2f" % tcol_error['gldas'], "%.2f" % tcol_error['insitu']]], colLabels=('ascat', 'gldas', 'insitu'), colWidths=[0.1, 0.1, 0.1], rowLabels=['Triple collocation error'], loc='bottom', bbox=(0.2, -1.65, 0.5, 0.3)) plt.subplots_adjust(left=0.08, bottom=0.35) axes = scatter_matrix(scaled) axes.flat[0].figure.suptitle(ts_title) # only draw 1:1 line if scaling was applied if rescaling is not None: for j, ax in enumerate(axes.flatten()): if np.remainder(j + 1, 3 + 1) != 1: min_x, max_x = ax.get_xlim() min_y, max_y = ax.get_ylim() # find minimum lower left coordinate and maximum upper right min_ll = min([min_x, min_y]) max_ur = max([max_x, max_y]) ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6') return df