def generate_qartod(site, node, sensor, cut_off): """ Load all FLORT data for a defined reference designator (using the site, node and sensor names to construct the reference designator) and collected via the different data delivery methods and combine them into a single data set from which QARTOD test limits for the gross range and climatology tests can be calculated. :param site: Site designator, extracted from the first part of the reference designator :param node: Node designator, extracted from the second part of the reference designator :param sensor: Sensor designator, extracted from the third and fourth part of the reference designator :param cut_off: string formatted date to use as cut-off for data to add to QARTOD test sets :return gr_lookup: CSV formatted strings to save to a csv file for the QARTOD gross range lookup tables. :return clm_lookup: CSV formatted strings to save to a csv file for the QARTOD climatology lookup tables. :return clm_table: CSV formatted strings to save to a csv file for the QARTOD climatology range tables. """ # load the combined data for the different sources of FLORT data data = combine_delivery_methods(site, node, sensor) # create boolean arrays of the data marked as "fail" by the quality checks and generate initial # HITL annotations that can be combined with system annotations to create a cleaned up data set # prior to calculating the QARTOD test values if node == 'WFP01': index = 10 # decimate the WFP data so we can process it else: index = 1 chl_fail = data.estimated_chlorophyll_qc_summary_flag.where( data.estimated_chlorophyll_qc_summary_flag > 3).notnull() blocks = identify_blocks(chl_fail[::index], [18, 72]) chl_hitl = create_annotations(site, node, sensor, blocks) chl_hitl['parameters'] = [[22, 1141] for i in chl_hitl['parameters']] cdom_fail = data.fluorometric_cdom_qc_summary_flag.where( data.fluorometric_cdom_qc_summary_flag > 3).notnull() blocks = identify_blocks(cdom_fail[::index], [18, 72]) cdom_hitl = create_annotations(site, node, sensor, blocks) cdom_hitl['parameters'] = [[23, 1143] for i in cdom_hitl['parameters']] beta_fail = data.beta_700_qc_summary_flag.where( data.beta_700_qc_summary_flag > 3).notnull() blocks = identify_blocks(beta_fail[::index], [18, 72], 24) beta_hitl = create_annotations(site, node, sensor, blocks) beta_hitl['parameters'] = [[24, 25, 1139] for i in beta_hitl['parameters']] # combine the different dictionaries into a single HITL annotation dictionary for later use hitl = chl_hitl.copy() for d in (cdom_hitl, beta_hitl): for key, value in d.items(): hitl[key] = hitl[key] + d[key] # get the current system annotations for the sensor annotations = get_annotations(site, node, sensor) annotations = pd.DataFrame(annotations) if not annotations.empty: annotations = annotations.drop(columns=['@class']) annotations['beginDate'] = pd.to_datetime( annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') annotations['endDate'] = pd.to_datetime( annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') # append the fail annotations to the existing annotations annotations = annotations.append(pd.DataFrame(hitl), ignore_index=True, sort=False) # create an annotation-based quality flag data = add_annotation_qc_flags(data, annotations) # clean-up the data, NaN-ing values that were marked as fail in the QC checks and/or identified as a block # of failed data, and then removing all records where the rollup annotation (every parameter fails) was # set to fail. data['estimated_chlorophyll'][chl_fail] = np.nan if 'fluorometric_chl_a_annotations_qc_results' in data.variables: m = data.fluorometric_chl_a_annotations_qc_results == 4 data['estimated_chlorophyll'][m] = np.nan data['fluorometric_cdom'][cdom_fail] = np.nan if 'fluorometric_cdom_annotations_qc_results' in data.variables: m = data.fluorometric_cdom_annotations_qc_results == 4 data['fluorometric_cdom'][m] = np.nan data['beta_700'][beta_fail] = np.nan if 'total_volume_scattering_coefficient_annotations_qc_results' in data.variables: m = data.total_volume_scattering_coefficient_annotations_qc_results == 4 data['beta_700'][m] = np.nan data['bback'][m] = np.nan if 'rollup_annotations_qc_results' in data.variables: data = data.where(data.rollup_annotations_qc_results < 4) # if a cut_off date was used, limit data to all data collected up to the cut_off date. # otherwise, set the limit to the range of the downloaded data. if cut_off: cut = parser.parse(cut_off) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') else: cut = parser.parse(data.time_coverage_end) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') data = data.sel(time=slice('2014-01-01T00:00:00', end_date)) # set the parameters and the gross range limits parameters = ['bback', 'estimated_chlorophyll', 'fluorometric_cdom'] limits = [[0, 3], [0, 30], [0, 375]] # create the initial gross range entry gr_lookup = process_gross_range(data, parameters, limits, site=site, node=node, sensor=sensor, stream='flort_sample') # add the stream name and the source comment gr_lookup['notes'] = ( 'User range based on data collected through {}.'.format(src_date)) # based on the site and node, determine if we need a depth based climatology depth_bins = np.array([]) if node in ['SP001', 'WFP01']: if site in [ 'CE01ISSP', 'CE02SHSP', 'CE06ISSP', 'CE07SHSP', 'CE09OSPM' ]: vocab = get_vocabulary(site, node, sensor)[0] max_depth = vocab['maxdepth'] depth_bins = woa_standard_bins() m = depth_bins[:, 1] <= max_depth depth_bins = depth_bins[m, :] # create and format the climatology lookups and tables for the data clm_lookup, clm_table = process_climatology(data, parameters, limits, depth_bins=depth_bins, site=site, node=node, sensor=sensor, stream='flort_sample') # add the stream name clm_lookup['stream'] = 'flort_sample' return annotations, gr_lookup, clm_lookup, clm_table
def generate_qartod(site, node, sensor, cut_off): """ Load all of the pH data for a defined reference designator (using the site, node and sensor names to construct the reference designator) collected via the three data delivery methods of telemetered, recovered host and recovered instrument and combine them into a single data set from which QARTOD test limits for the gross range and climatology tests can be calculated. :param site: Site designator, extracted from the first part of the reference designator :param node: Node designator, extracted from the second part of the reference designator :param sensor: Sensor designator, extracted from the third and fourth part of the reference designator :param cut_off: string formatted date to use as cut-off for data to add to QARTOD test sets :return annotations: Initial list of auto-generated HITL annotations as a pandas dataframe :return gr_lookup: CSV formatted strings to save to a csv file for the QARTOD gross range lookup tables. :return clm_lookup: CSV formatted strings to save to a csv file for the QARTOD climatology lookup tables. :return clm_table: CSV formatted strings to save to a csv file for the QARTOD climatology range tables. """ # load and combine all of the data sources for the pH sensor data = combine_delivery_methods(site, node, sensor) # create a boolean array of the data marked as "fail" by the pH quality checks and generate initial # HITL annotations that can be combined with system annotations and pH quality checks to create # a cleaned up data set prior to calculating the QARTOD test values fail = data.seawater_ph_quality_flag.where( data.seawater_ph_quality_flag == 4).notnull() blocks = identify_blocks(fail, [24, 24]) hitl = create_annotations(site, node, sensor, blocks) # get the current system annotations for the sensor annotations = get_annotations(site, node, sensor) annotations = pd.DataFrame(annotations) if not annotations.empty: annotations = annotations.drop(columns=['@class']) annotations['beginDate'] = pd.to_datetime( annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') annotations['endDate'] = pd.to_datetime( annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') # append the fail annotations to the existing annotations annotations = annotations.append(pd.DataFrame(hitl), ignore_index=True, sort=False) # create a roll-up annotation flag data = add_annotation_qc_flags(data, annotations) # clean-up the data, removing values that fail the pH quality checks or were marked as fail in the annotations data = data.where((data.seawater_ph_quality_flag != 4) & (data.rollup_annotations_qc_results != 4)) # if a cut_off date was used, limit data to all data collected up to the cut_off date. # otherwise, set the limit to the range of the downloaded data. if cut_off: cut = parser.parse(cut_off) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') else: cut = parser.parse(data.time_coverage_end) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') data = data.sel(time=slice("2014-01-01T00:00:00", end_date)) # create the initial gross range entry gr = process_gross_range(data, ['seawater_ph'], [6.9, 9.0], site=site, node=node, sensor=sensor) # re-work gross entry for the different streams and parameter names gr_lookup = pd.DataFrame() gr_lookup = gr_lookup.append([gr, gr, gr], ignore_index=True) gr_lookup['parameter'][0] = {'inp': 'phsen_abcdef_ph_seawater'} gr_lookup['stream'][0] = 'phsen_abcdef_dcl_instrument' gr_lookup['parameter'][1] = {'inp': 'phsen_abcdef_ph_seawater'} gr_lookup['stream'][1] = 'phsen_abcdef_dcl_instrument_recovered' gr_lookup['parameter'][2] = {'inp': 'phsen_abcdef_ph_seawater'} gr_lookup['stream'][2] = 'phsen_abcdef_instrument' gr_lookup['source'] = ( 'Sensor min/max based on the vendor standard calibration range. ' 'The user min/max is the historical mean of all data collected ' 'up to {} +/- 3 standard deviations.'.format(src_date)) # create and format the climatology entry and table cll, clm_table = process_climatology(data, ['seawater_ph'], [6.9, 9.0], site=site, node=node, sensor=sensor) # re-work climatology entry for the different streams and parameter names clm_lookup = pd.DataFrame() clm_lookup = clm_lookup.append([cll, cll, cll]) clm_lookup['parameters'][0] = { 'inp': 'phsen_abcdef_ph_seawater', 'tinp': 'time', 'zinp': 'None' } clm_lookup['stream'][0] = 'phsen_abcdef_dcl_instrument' clm_lookup['parameters'][1] = { 'inp': 'phsen_abcdef_ph_seawater', 'tinp': 'time', 'zinp': 'None' } clm_lookup['stream'][1] = 'phsen_abcdef_dcl_instrument_recovered' clm_lookup['parameters'][2] = { 'inp': 'phsen_abcdef_ph_seawater', 'tinp': 'time', 'zinp': 'None' } clm_lookup['stream'][2] = 'phsen_abcdef_instrument' return annotations, gr_lookup, clm_lookup, clm_table
def generate_qartod(site, node, sensor, cut_off): """ Load the CTD data for a defined reference designator (using the site, node and sensor names to construct the reference designator) collected via the telemetered, recovered host and instrument methods and combine them into a single data set from which QARTOD test limits for the gross range and climatology tests can be calculated. :param site: Site designator, extracted from the first part of the reference designator :param node: Node designator, extracted from the second part of the reference designator :param sensor: Sensor designator, extracted from the third and fourth part of the reference designator :param cut_off: string formatted date to use as cut-off for data to add to QARTOD test sets :return gr_lookup: CSV formatted strings to save to a csv file for the QARTOD gross range lookup tables. :return clm_lookup: CSV formatted strings to save to a csv file for the QARTOD climatology lookup tables. :return clm_table: CSV formatted strings to save to a csv file for the QARTOD climatology range table for the seafloor pressure and temperature. """ # load the combined telemetered and recovered_host data for the data and water streams data = combine_delivery_methods(site, node, sensor) # get the current system annotations for the sensor annotations = get_annotations(site, node, sensor) annotations = pd.DataFrame(annotations) if not annotations.empty: annotations = annotations.drop(columns=['@class']) annotations['beginDate'] = pd.to_datetime(annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') annotations['endDate'] = pd.to_datetime(annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') # create an annotation-based quality flag data = add_annotation_qc_flags(data, annotations) # clean-up the data, removing values that were marked as fail either from the quality checks or in the # annotations, and all data collected after the cut off date data = data.where(data.rollup_annotations_qc_results < 4) # if a cut_off date was used, limit data to all data collected up to the cut_off date. # otherwise, set the limit to the range of the downloaded data. if cut_off: cut = parser.parse(cut_off) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') else: cut = parser.parse(data.time_coverage_end) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') data = data.sel(time=slice('2014-01-01T00:00:00', end_date)) # set the parameters and the sensor range limits parameters = ['seawater_conductivity', 'seawater_temperature', 'seawater_pressure', 'practical_salinity'] if site == 'CE09OSSM' and node == 'MFD37': plimit = [0, 600] # 600 m stain gauge pressure sensor else: plimit = [0, 100] # 100 m stain gauge pressure sensor limits = [[0, 9], [-5, 35], plimit, [0, 42]] # create the initial gross range entry gr_lookup = process_gross_range(data, parameters, limits, site=site, node=node, sensor=sensor) # replicate it three times for the different streams gr_lookup = pd.concat([gr_lookup] * 3, ignore_index=True) # re-work the gross range entries for the different streams, resetting the variable names back to OOINet names streams = ['ctdbp_cdef_dcl_instrument', 'ctdbp_cdef_dcl_instrument_recovered', 'ctdbp_cdef_instrument_recovered'] variables = [ ['conductivity', 'temp', 'pressure', 'practical_salinity'], ['conductivity', 'temp', 'pressure', 'practical_salinity'], ['ctdbp_seawater_conductivity', 'ctdbp_seawater_temperature', 'ctdbp_seawater_pressure', 'practical_salinity'] ] idx = 0 for num, stream in enumerate(streams): for j in range(4): gr_lookup['parameter'][idx + j] = {'inp': variables[num][j]} gr_lookup['stream'][idx + j] = stream idx += 4 # set the default source string gr_lookup['source'] = ('Sensor min/max based on the vendor sensor specifications. ' 'The user min/max is the historical mean of all data collected ' 'up to {} +/- 3 standard deviations.'.format(src_date)) # create the initial climatology lookup and tables for the data clm_lookup, clm_table = process_climatology(data, parameters[1:4:2], limits[1:4:2], site=site, node=node, sensor=sensor) # replicate the climatology lookup table three times for the different streams clm_lookup = pd.concat([clm_lookup] * 3, ignore_index=True) # re-work the climatology lookup table for the different streams, resetting the variable names back to OOINet names idx = 0 for num, stream in enumerate(streams): for j in [1, 3]: clm_lookup['parameters'][idx] = {'inp': variables[num][j], 'tinp': 'time', 'zinp': 'None'} clm_lookup['stream'][idx] = stream idx += 1 return annotations, gr_lookup, clm_lookup, clm_table
def generate_qartod(site, node, sensor, cut_off): """ Load all of the pCO2 data for a defined reference designator (using the site, node and sensor names to construct the reference designator) collected via the recovered instrument method and combine them into a single data set from which QARTOD test limits for the gross range and climatology tests can be calculated. :param site: Site designator, extracted from the first part of the reference designator :param node: Node designator, extracted from the second part of the reference designator :param sensor: Sensor designator, extracted from the third and fourth part of the reference designator :param cut_off: string formatted date to use as cut-off for data to add to QARTOD test sets :return gr_lookup: CSV formatted strings to save to a csv file for the QARTOD gross range lookup tables. :return clm_lookup: CSV formatted strings to save to a csv file for the QARTOD climatology lookup tables. :return clm_table: CSV formatted strings to save to a csv file for the QARTOD climatology range table for the seafloor pressure and temperature. """ # load the combined telemetered and recovered_host data for the data and water streams data = combine_delivery_methods(site, node, sensor) # basic quality check on the pressure record to eliminate on deck measurements qc_flag = data['time'].astype( 'int32') * 0 + 1 # default flag values, no errors out_of_range = data.abs_seafloor_pressure.mean() - ( data.abs_seafloor_pressure.std() * 5) m = (data.abs_seafloor_pressure < out_of_range) | (data.presf_tide_pressure < out_of_range) qc_flag[m] = 4 # get the current system annotations for the sensor annotations = get_annotations(site, node, sensor) annotations = pd.DataFrame(annotations) if not annotations.empty: annotations = annotations.drop(columns=['@class']) annotations['beginDate'] = pd.to_datetime( annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') annotations['endDate'] = pd.to_datetime( annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') # create an annotation-based quality flag data = add_annotation_qc_flags(data, annotations) # clean-up the data, removing values that were marked as fail either from the quality checks or in the # annotations, and all data collected after the cut off date data = data.where((qc_flag != 4) & (data.rollup_annotations_qc_results < 4)) # if a cut_off date was used, limit data to all data collected up to the cut_off date. # otherwise, set the limit to the range of the downloaded data. if cut_off: cut = parser.parse(cut_off) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') else: cut = parser.parse(data.time_coverage_end) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') data = data.sel(time=slice('2014-01-01T00:00:00', end_date)) # set the parameters and the pressure limits parameters = [ 'seawater_temperature', 'abs_seafloor_pressure', 'seawater_temperature', 'abs_seafloor_pressure', 'presf_tide_temperature', 'presf_tide_pressure' ] if site in ['CE01ISSM', 'CE06ISSM']: plimit = [0, 70] # 100 psia pressure sensor elif site == 'CE07SHSM': plimit = [0, 207] # 300 psia pressure sensor else: plimit = [0, 689] # 1000 psia pressure sensor limits = [[-5, 35], plimit, [-5, 35], plimit, [-5, 35], plimit] # create the initial gross range entry gr_lookup = process_gross_range(data, parameters, limits, site=site, node=node, sensor=sensor) # re-work gross entry for the different streams gr_lookup['stream'][0] = 'presf_abc_dcl_tide_measurement' gr_lookup['stream'][1] = 'presf_abc_dcl_tide_measurement' gr_lookup['stream'][2] = 'presf_abc_dcl_tide_measurement_recovered' gr_lookup['stream'][3] = 'presf_abc_dcl_tide_measurement_recovered' gr_lookup['stream'][4] = 'presf_abc_tide_measurement_recovered' gr_lookup['stream'][5] = 'presf_abc_tide_measurement_recovered' gr_lookup['source'] = ( 'Sensor min/max based on the vendor sensor specifications. ' 'The user min/max is the historical mean of all data collected ' 'up to {} +/- 3 standard deviations.'.format(src_date)) # create and format the climatology lookups and tables for the data clm_lookup, clm_table = process_climatology(data, parameters, limits, site=site, node=node, sensor=sensor) # re-work climatology entries for the different streams clm_lookup['stream'][0] = 'presf_abc_dcl_tide_measurement' clm_lookup['stream'][1] = 'presf_abc_dcl_tide_measurement' clm_lookup['stream'][2] = 'presf_abc_dcl_tide_measurement_recovered' clm_lookup['stream'][3] = 'presf_abc_dcl_tide_measurement_recovered' clm_lookup['stream'][4] = 'presf_abc_tide_measurement_recovered' clm_lookup['stream'][5] = 'presf_abc_tide_measurement_recovered' return annotations, gr_lookup, clm_lookup, clm_table
def generate_qartod(site, node, sensor, cut_off): """ Load all of the pCO2 data for a defined reference designator (using the site, node and sensor names to construct the reference designator) collected via the recovered instrument method and combine them into a single data set from which QARTOD test limits for the gross range and climatology tests can be calculated. :param site: Site designator, extracted from the first part of the reference designator :param node: Node designator, extracted from the second part of the reference designator :param sensor: Sensor designator, extracted from the third and fourth part of the reference designator :param cut_off: string formatted date to use as cut-off for data to add to QARTOD test sets :return gr_lookup: CSV formatted strings to save to a csv file for the QARTOD gross range lookup tables. :return clm_lookup: CSV formatted strings to save to a csv file for the QARTOD climatology lookup tables. :return atm_table: CSV formatted strings to save to a csv file for the QARTOD climatology range table for the atmospheric pCO2. :return ssw_table: CSV formatted strings to save to a csv file for the QARTOD climatology range table for the surface seawater pCO2. """ # load the combined telemetered and recovered_host data for the air and water streams air = combine_delivery_methods(site, node, sensor, 'air') water = combine_delivery_methods(site, node, sensor, 'water') # get the current system annotations for the sensor annotations = get_annotations(site, node, sensor) annotations = pd.DataFrame(annotations) if not annotations.empty: annotations = annotations.drop(columns=['@class']) annotations['beginDate'] = pd.to_datetime( annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') annotations['endDate'] = pd.to_datetime( annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') # create an annotation-based quality flag for the atmospheric data air = add_annotation_qc_flags(air, annotations) # create an annotation-based quality flag for the surface seawater data water = add_annotation_qc_flags(water, annotations) # clean-up the air data, removing values that marked as suspect or fail in the annotations air = air.where((air.partial_pressure_co2_atm_annotations_qc_results < 3) & (air.rollup_annotations_qc_results < 3)) # clean-up the water data, removing values that marked as suspect or fail in the annotations water = water.where( (water.partial_pressure_co2_ssw_annotations_qc_results < 3) & (water.rollup_annotations_qc_results < 3)) # if a cut_off date was used, limit data to all data collected up to the cut_off date. # otherwise, set the limit to the range of the downloaded data. if cut_off: cut = parser.parse(cut_off) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') else: cut = parser.parse(air.time_coverage_end) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') air = air.sel(time=slice('2014-01-01T00:00:00', end_date)) water = water.sel(time=slice('2014-01-01T00:00:00', end_date)) # create the initial gross range entry data = xr.merge( [air.partial_pressure_co2_atm, water.partial_pressure_co2_ssw]) parameters = ['partial_pressure_co2_atm', 'partial_pressure_co2_ssw'] limits = [[0, 1000], [0, 1000]] gr = process_gross_range(data, parameters, limits, site=site, node=node, sensor=sensor) # re-work gross entry for the different streams and parameter names gr_lookup = pd.DataFrame() gr_lookup = gr_lookup.append([gr, gr], ignore_index=True) gr_lookup['parameter'][0] = {'inp': 'partial_pressure_co2_atm'} gr_lookup['stream'][0] = 'pco2a_a_dcl_instrument_air' gr_lookup['parameter'][1] = {'inp': 'partial_pressure_co2_ssw'} gr_lookup['stream'][1] = 'pco2a_a_dcl_instrument_water' gr_lookup['parameter'][2] = {'inp': 'partial_pressure_co2_atm'} gr_lookup['stream'][2] = 'pco2a_a_dcl_instrument_air_recovered' gr_lookup['parameter'][3] = {'inp': 'partial_pressure_co2_ssw'} gr_lookup['stream'][3] = 'pco2a_a_dcl_instrument_water_recovered' gr_lookup['source'] = ( 'Sensor min/max based on the vendor standard calibration range. ' 'The user min/max is the historical mean of all data collected ' 'up to {} +/- 3 standard deviations.'.format(src_date)) # create and format the climatology lookups and tables for the air and water streams atm, atm_table = process_climatology(data, ['partial_pressure_co2_atm'], [0, 1000], site=site, node=node, sensor=sensor) ssw, ssw_table = process_climatology(data, ['partial_pressure_co2_ssw'], [0, 1000], site=site, node=node, sensor=sensor) # re-work climatology entry for the different streams and parameter names atm_lookup = pd.DataFrame() atm_lookup = atm_lookup.append([atm, atm]) atm_lookup['parameters'][0] = { 'inp': 'partial_pressure_co2_atm', 'tinp': 'time', 'zinp': 'None' } atm_lookup['stream'][0] = 'pco2a_a_dcl_instrument_air' atm_lookup['parameters'][1] = { 'inp': 'partial_pressure_co2_atm', 'tinp': 'time', 'zinp': 'None' } atm_lookup['stream'][1] = 'pco2a_a_dcl_instrument_air_recovered' ssw_lookup = pd.DataFrame() ssw_lookup = ssw_lookup.append([ssw, ssw]) ssw_lookup['parameters'][0] = { 'inp': 'partial_pressure_co2_ssw', 'tinp': 'time', 'zinp': 'None' } ssw_lookup['stream'][0] = 'pco2a_a_dcl_instrument_water' ssw_lookup['parameters'][1] = { 'inp': 'partial_pressure_co2_ssw', 'tinp': 'time', 'zinp': 'None' } ssw_lookup['stream'][1] = 'pco2a_a_dcl_instrument_water_recovered' clm_lookup = pd.DataFrame() clm_lookup = clm_lookup.append([atm_lookup, ssw_lookup]) return annotations, gr_lookup, clm_lookup, atm_table, ssw_table
def generate_qartod(site, node, sensor, cut_off): """ Load all FLORT data for a defined reference designator (using the site, node and sensor names to construct the reference designator) and collected via the different data delivery methods and combine them into a single data set from which QARTOD test limits for the gross range and climatology tests can be calculated. :param site: Site designator, extracted from the first part of the reference designator :param node: Node designator, extracted from the second part of the reference designator :param sensor: Sensor designator, extracted from the third and fourth part of the reference designator :param cut_off: string formatted date to use as cut-off for data to add to QARTOD test sets :return gr_lookup: CSV formatted strings to save to a csv file for the QARTOD gross range lookup tables. :return clm_lookup: CSV formatted strings to save to a csv file for the QARTOD climatology lookup tables. :return clm_table: CSV formatted strings to save to a csv file for the QARTOD climatology range table for the seafloor pressure and temperature. """ # load the combined data for the different sources of FLORT data data = combine_delivery_methods(site, node, sensor) # create boolean arrays of the data marked as "fail" by the quality checks and generate initial # HITL annotations that can be combined with system annotations to create a cleaned up data set # prior to calculating the QARTOD test values chl_fail = data.estimated_chlorophyll_qc_summary_flag.where(data.estimated_chlorophyll_qc_summary_flag > 3).notnull() blocks = identify_blocks(chl_fail, [18, 72]) chl_hitl = create_annotations(site, node, sensor, blocks) chl_hitl['parameters'] = ['chl' for i in chl_hitl['parameters']] cdom_fail = data.fluorometric_cdom_qc_summary_flag.where(data.fluorometric_cdom_qc_summary_flag > 3).notnull() blocks = identify_blocks(cdom_fail, [18, 72]) cdom_hitl = create_annotations(site, node, sensor, blocks) cdom_hitl['parameters'] = ['cdom' for i in cdom_hitl['parameters']] beta_fail = data.beta_700_qc_summary_flag.where(data.beta_700_qc_summary_flag > 3).notnull() blocks = identify_blocks(beta_fail, [18, 72]) beta_hitl = create_annotations(site, node, sensor, blocks) beta_hitl['parameters'] = ['beta' for i in beta_hitl['parameters']] bback_fail = data.bback_qc_summary_flag.where(data.bback_qc_summary_flag > 3).notnull() blocks = identify_blocks(bback_fail, [18, 72]) bback_hitl = create_annotations(site, node, sensor, blocks) bback_hitl['parameters'] = ['bback' for i in bback_hitl['parameters']] # combine the different dictionaries into a single HITL annotation dictionary for later use hitl = chl_hitl.copy() for d in (cdom_hitl, beta_hitl, bback_hitl): for key, value in d.items(): hitl[key] = hitl[key] + d[key] # get the current system annotations for the sensor annotations = get_annotations(site, node, sensor) annotations = pd.DataFrame(annotations) if not annotations.empty: annotations = annotations.drop(columns=['@class']) annotations['beginDate'] = pd.to_datetime(annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') annotations['endDate'] = pd.to_datetime(annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S') # append the fail annotations to the existing annotations annotations = annotations.append(pd.DataFrame(hitl), ignore_index=True, sort=False) # create an annotation-based quality flag data = add_annotation_qc_flags(data, annotations) # clean-up the data, NaN-ing values that were marked as fail in the QC checks, and then removing # all records where the rollup annotation was set to fail data['estimated_chlorophyll'][chl_fail] = np.nan data['fluorometric_cdom'][cdom_fail] = np.nan data['beta_700'][beta_fail] = np.nan data['bback'][beta_fail] = np.nan data['bback'][bback_fail] = np.nan data = data.where(data.rollup_annotations_qc_results < 4) # if a cut_off date was used, limit data to all data collected up to the cut_off date. # otherwise, set the limit to the range of the downloaded data. if cut_off: cut = parser.parse(cut_off) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') else: cut = parser.parse(data.time_coverage_end) cut = cut.astimezone(pytz.utc) end_date = cut.strftime('%Y-%m-%dT%H:%M:%S') src_date = cut.strftime('%Y-%m-%d') data = data.sel(time=slice('2014-01-01T00:00:00', end_date)) # set the parameters and the gross range limits parameters = ['bback', 'estimated_chlorophyll', 'fluorometric_cdom'] limits = [[0, 5], [0, 30], [0, 375]] # create the initial gross range entry gr_lookup = process_gross_range(data, parameters, limits, site=site, node=node, sensor=sensor) # add the stream name and the source comment gr_lookup['stream'] = 'flort_sample' gr_lookup['source'] = ('Sensor min/max based on the vendor sensor specifications. ' 'The user min/max is the historical mean of all data collected ' 'up to {} +/- 3 standard deviations.'.format(src_date)) # create and format the climatology lookups and tables for the data clm_lookup, clm_table = process_climatology(data, parameters, limits, site=site, node=node, sensor=sensor) # add the stream name clm_lookup['stream'] = 'flort_sample' return annotations, gr_lookup, clm_lookup, clm_table