Exemplo n.º 1
0
def generate_qartod(site, node, sensor, cut_off):
    """
    Load all FLORT data for a defined reference designator (using the site,
    node and sensor names to construct the reference designator) and
    collected via the different data delivery methods and combine them into a
    single data set from which QARTOD test limits for the gross range and
    climatology tests can be calculated.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :param cut_off: string formatted date to use as cut-off for data to add
        to QARTOD test sets
    :return gr_lookup: CSV formatted strings to save to a csv file for the
        QARTOD gross range lookup tables.
    :return clm_lookup: CSV formatted strings to save to a csv file for the
        QARTOD climatology lookup tables.
    :return clm_table: CSV formatted strings to save to a csv file for the
        QARTOD climatology range tables.
    """
    # load the combined data for the different sources of FLORT data
    data = combine_delivery_methods(site, node, sensor)

    # create boolean arrays of the data marked as "fail" by the quality checks and generate initial
    # HITL annotations that can be combined with system annotations to create a cleaned up data set
    # prior to calculating the QARTOD test values
    if node == 'WFP01':
        index = 10  # decimate the WFP data so we can process it
    else:
        index = 1
    chl_fail = data.estimated_chlorophyll_qc_summary_flag.where(
        data.estimated_chlorophyll_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(chl_fail[::index], [18, 72])
    chl_hitl = create_annotations(site, node, sensor, blocks)
    chl_hitl['parameters'] = [[22, 1141] for i in chl_hitl['parameters']]

    cdom_fail = data.fluorometric_cdom_qc_summary_flag.where(
        data.fluorometric_cdom_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(cdom_fail[::index], [18, 72])
    cdom_hitl = create_annotations(site, node, sensor, blocks)
    cdom_hitl['parameters'] = [[23, 1143] for i in cdom_hitl['parameters']]

    beta_fail = data.beta_700_qc_summary_flag.where(
        data.beta_700_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(beta_fail[::index], [18, 72], 24)
    beta_hitl = create_annotations(site, node, sensor, blocks)
    beta_hitl['parameters'] = [[24, 25, 1139] for i in beta_hitl['parameters']]

    # combine the different dictionaries into a single HITL annotation dictionary for later use
    hitl = chl_hitl.copy()
    for d in (cdom_hitl, beta_hitl):
        for key, value in d.items():
            hitl[key] = hitl[key] + d[key]

    # get the current system annotations for the sensor
    annotations = get_annotations(site, node, sensor)
    annotations = pd.DataFrame(annotations)
    if not annotations.empty:
        annotations = annotations.drop(columns=['@class'])
        annotations['beginDate'] = pd.to_datetime(
            annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')
        annotations['endDate'] = pd.to_datetime(
            annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')

    # append the fail annotations to the existing annotations
    annotations = annotations.append(pd.DataFrame(hitl),
                                     ignore_index=True,
                                     sort=False)

    # create an annotation-based quality flag
    data = add_annotation_qc_flags(data, annotations)

    # clean-up the data, NaN-ing values that were marked as fail in the QC checks and/or identified as a block
    # of failed data, and then removing all records where the rollup annotation (every parameter fails) was
    # set to fail.
    data['estimated_chlorophyll'][chl_fail] = np.nan
    if 'fluorometric_chl_a_annotations_qc_results' in data.variables:
        m = data.fluorometric_chl_a_annotations_qc_results == 4
        data['estimated_chlorophyll'][m] = np.nan

    data['fluorometric_cdom'][cdom_fail] = np.nan
    if 'fluorometric_cdom_annotations_qc_results' in data.variables:
        m = data.fluorometric_cdom_annotations_qc_results == 4
        data['fluorometric_cdom'][m] = np.nan

    data['beta_700'][beta_fail] = np.nan
    if 'total_volume_scattering_coefficient_annotations_qc_results' in data.variables:
        m = data.total_volume_scattering_coefficient_annotations_qc_results == 4
        data['beta_700'][m] = np.nan
        data['bback'][m] = np.nan

    if 'rollup_annotations_qc_results' in data.variables:
        data = data.where(data.rollup_annotations_qc_results < 4)

    # if a cut_off date was used, limit data to all data collected up to the cut_off date.
    # otherwise, set the limit to the range of the downloaded data.
    if cut_off:
        cut = parser.parse(cut_off)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')
    else:
        cut = parser.parse(data.time_coverage_end)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')

    data = data.sel(time=slice('2014-01-01T00:00:00', end_date))

    # set the parameters and the gross range limits
    parameters = ['bback', 'estimated_chlorophyll', 'fluorometric_cdom']
    limits = [[0, 3], [0, 30], [0, 375]]

    # create the initial gross range entry
    gr_lookup = process_gross_range(data,
                                    parameters,
                                    limits,
                                    site=site,
                                    node=node,
                                    sensor=sensor,
                                    stream='flort_sample')

    # add the stream name and the source comment
    gr_lookup['notes'] = (
        'User range based on data collected through {}.'.format(src_date))

    # based on the site and node, determine if we need a depth based climatology
    depth_bins = np.array([])
    if node in ['SP001', 'WFP01']:
        if site in [
                'CE01ISSP', 'CE02SHSP', 'CE06ISSP', 'CE07SHSP', 'CE09OSPM'
        ]:
            vocab = get_vocabulary(site, node, sensor)[0]
            max_depth = vocab['maxdepth']
            depth_bins = woa_standard_bins()
            m = depth_bins[:, 1] <= max_depth
            depth_bins = depth_bins[m, :]

    # create and format the climatology lookups and tables for the data
    clm_lookup, clm_table = process_climatology(data,
                                                parameters,
                                                limits,
                                                depth_bins=depth_bins,
                                                site=site,
                                                node=node,
                                                sensor=sensor,
                                                stream='flort_sample')

    # add the stream name
    clm_lookup['stream'] = 'flort_sample'

    return annotations, gr_lookup, clm_lookup, clm_table
Exemplo n.º 2
0
def generate_qartod(site, node, sensor, cut_off):
    """
    Load all of the pH data for a defined reference designator (using the site,
    node and sensor names to construct the reference designator) collected via
    the three data delivery methods of telemetered, recovered host and
    recovered instrument and combine them into a single data set from which
    QARTOD test limits for the gross range and climatology tests can be
    calculated.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :param cut_off: string formatted date to use as cut-off for data to add
        to QARTOD test sets
    :return annotations: Initial list of auto-generated HITL annotations as
        a pandas dataframe
    :return gr_lookup: CSV formatted strings to save to a csv file for the
        QARTOD gross range lookup tables.
    :return clm_lookup: CSV formatted strings to save to a csv file for the
        QARTOD climatology lookup tables.
    :return clm_table: CSV formatted strings to save to a csv file for the
        QARTOD climatology range tables.
    """
    # load and combine all of the data sources for the pH sensor
    data = combine_delivery_methods(site, node, sensor)

    # create a boolean array of the data marked as "fail" by the pH quality checks and generate initial
    # HITL annotations that can be combined with system annotations and pH quality checks to create
    # a cleaned up data set prior to calculating the QARTOD test values
    fail = data.seawater_ph_quality_flag.where(
        data.seawater_ph_quality_flag == 4).notnull()
    blocks = identify_blocks(fail, [24, 24])
    hitl = create_annotations(site, node, sensor, blocks)

    # get the current system annotations for the sensor
    annotations = get_annotations(site, node, sensor)
    annotations = pd.DataFrame(annotations)
    if not annotations.empty:
        annotations = annotations.drop(columns=['@class'])
        annotations['beginDate'] = pd.to_datetime(
            annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')
        annotations['endDate'] = pd.to_datetime(
            annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')

    # append the fail annotations to the existing annotations
    annotations = annotations.append(pd.DataFrame(hitl),
                                     ignore_index=True,
                                     sort=False)

    # create a roll-up annotation flag
    data = add_annotation_qc_flags(data, annotations)

    # clean-up the data, removing values that fail the pH quality checks or were marked as fail in the annotations
    data = data.where((data.seawater_ph_quality_flag != 4)
                      & (data.rollup_annotations_qc_results != 4))

    # if a cut_off date was used, limit data to all data collected up to the cut_off date.
    # otherwise, set the limit to the range of the downloaded data.
    if cut_off:
        cut = parser.parse(cut_off)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')
    else:
        cut = parser.parse(data.time_coverage_end)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')

    data = data.sel(time=slice("2014-01-01T00:00:00", end_date))

    # create the initial gross range entry
    gr = process_gross_range(data, ['seawater_ph'], [6.9, 9.0],
                             site=site,
                             node=node,
                             sensor=sensor)

    # re-work gross entry for the different streams and parameter names
    gr_lookup = pd.DataFrame()
    gr_lookup = gr_lookup.append([gr, gr, gr], ignore_index=True)
    gr_lookup['parameter'][0] = {'inp': 'phsen_abcdef_ph_seawater'}
    gr_lookup['stream'][0] = 'phsen_abcdef_dcl_instrument'
    gr_lookup['parameter'][1] = {'inp': 'phsen_abcdef_ph_seawater'}
    gr_lookup['stream'][1] = 'phsen_abcdef_dcl_instrument_recovered'
    gr_lookup['parameter'][2] = {'inp': 'phsen_abcdef_ph_seawater'}
    gr_lookup['stream'][2] = 'phsen_abcdef_instrument'
    gr_lookup['source'] = (
        'Sensor min/max based on the vendor standard calibration range. '
        'The user min/max is the historical mean of all data collected '
        'up to {} +/- 3 standard deviations.'.format(src_date))

    # create and format the climatology entry and table
    cll, clm_table = process_climatology(data, ['seawater_ph'], [6.9, 9.0],
                                         site=site,
                                         node=node,
                                         sensor=sensor)

    # re-work climatology entry for the different streams and parameter names
    clm_lookup = pd.DataFrame()
    clm_lookup = clm_lookup.append([cll, cll, cll])
    clm_lookup['parameters'][0] = {
        'inp': 'phsen_abcdef_ph_seawater',
        'tinp': 'time',
        'zinp': 'None'
    }
    clm_lookup['stream'][0] = 'phsen_abcdef_dcl_instrument'
    clm_lookup['parameters'][1] = {
        'inp': 'phsen_abcdef_ph_seawater',
        'tinp': 'time',
        'zinp': 'None'
    }
    clm_lookup['stream'][1] = 'phsen_abcdef_dcl_instrument_recovered'
    clm_lookup['parameters'][2] = {
        'inp': 'phsen_abcdef_ph_seawater',
        'tinp': 'time',
        'zinp': 'None'
    }
    clm_lookup['stream'][2] = 'phsen_abcdef_instrument'

    return annotations, gr_lookup, clm_lookup, clm_table
def generate_qartod(site, node, sensor, cut_off):
    """
    Load the CTD data for a defined reference designator (using the site, node
    and sensor names to construct the reference designator) collected via the
    telemetered, recovered host and instrument methods and combine them into a
    single data set from which QARTOD test limits for the gross range and
    climatology tests can be calculated.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :param cut_off: string formatted date to use as cut-off for data to add
        to QARTOD test sets
    :return gr_lookup: CSV formatted strings to save to a csv file for the
        QARTOD gross range lookup tables.
    :return clm_lookup: CSV formatted strings to save to a csv file for the
        QARTOD climatology lookup tables.
    :return clm_table: CSV formatted strings to save to a csv file for the
        QARTOD climatology range table for the seafloor pressure and temperature.
    """
    # load the combined telemetered and recovered_host data for the data and water streams
    data = combine_delivery_methods(site, node, sensor)

    # get the current system annotations for the sensor
    annotations = get_annotations(site, node, sensor)
    annotations = pd.DataFrame(annotations)
    if not annotations.empty:
        annotations = annotations.drop(columns=['@class'])
        annotations['beginDate'] = pd.to_datetime(annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')
        annotations['endDate'] = pd.to_datetime(annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')

    # create an annotation-based quality flag
    data = add_annotation_qc_flags(data, annotations)

    # clean-up the data, removing values that were marked as fail either from the quality checks or in the
    # annotations, and all data collected after the cut off date
    data = data.where(data.rollup_annotations_qc_results < 4)

    # if a cut_off date was used, limit data to all data collected up to the cut_off date.
    # otherwise, set the limit to the range of the downloaded data.
    if cut_off:
        cut = parser.parse(cut_off)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')
    else:
        cut = parser.parse(data.time_coverage_end)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')

    data = data.sel(time=slice('2014-01-01T00:00:00', end_date))

    # set the parameters and the sensor range limits
    parameters = ['seawater_conductivity', 'seawater_temperature', 'seawater_pressure', 'practical_salinity']

    if site == 'CE09OSSM' and node == 'MFD37':
        plimit = [0, 600]   # 600 m stain gauge pressure sensor
    else:
        plimit = [0, 100]   # 100 m stain gauge pressure sensor

    limits = [[0, 9], [-5, 35], plimit, [0, 42]]

    # create the initial gross range entry
    gr_lookup = process_gross_range(data, parameters, limits, site=site, node=node, sensor=sensor)

    # replicate it three times for the different streams
    gr_lookup = pd.concat([gr_lookup] * 3, ignore_index=True)

    # re-work the gross range entries for the different streams, resetting the variable names back to OOINet names
    streams = ['ctdbp_cdef_dcl_instrument', 'ctdbp_cdef_dcl_instrument_recovered', 'ctdbp_cdef_instrument_recovered']
    variables = [
        ['conductivity', 'temp', 'pressure', 'practical_salinity'],
        ['conductivity', 'temp', 'pressure', 'practical_salinity'],
        ['ctdbp_seawater_conductivity', 'ctdbp_seawater_temperature', 'ctdbp_seawater_pressure', 'practical_salinity']
    ]
    idx = 0
    for num, stream in enumerate(streams):
        for j in range(4):
            gr_lookup['parameter'][idx + j] = {'inp': variables[num][j]}
            gr_lookup['stream'][idx + j] = stream
        idx += 4

    # set the default source string
    gr_lookup['source'] = ('Sensor min/max based on the vendor sensor specifications. '
                           'The user min/max is the historical mean of all data collected '
                           'up to {} +/- 3 standard deviations.'.format(src_date))

    # create the initial climatology lookup and tables for the data
    clm_lookup, clm_table = process_climatology(data, parameters[1:4:2], limits[1:4:2],
                                                site=site, node=node, sensor=sensor)

    # replicate the climatology lookup table three times for the different streams
    clm_lookup = pd.concat([clm_lookup] * 3, ignore_index=True)

    # re-work the climatology lookup table for the different streams, resetting the variable names back to OOINet names
    idx = 0
    for num, stream in enumerate(streams):
        for j in [1, 3]:
            clm_lookup['parameters'][idx] = {'inp': variables[num][j], 'tinp': 'time', 'zinp': 'None'}
            clm_lookup['stream'][idx] = stream
            idx += 1

    return annotations, gr_lookup, clm_lookup, clm_table
def generate_qartod(site, node, sensor, cut_off):
    """
    Load all of the pCO2 data for a defined reference designator (using the
    site, node and sensor names to construct the reference designator)
    collected via the recovered instrument method and combine them into a
    single data set from which QARTOD test limits for the gross range and
    climatology tests can be calculated.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :param cut_off: string formatted date to use as cut-off for data to add
        to QARTOD test sets
    :return gr_lookup: CSV formatted strings to save to a csv file for the
        QARTOD gross range lookup tables.
    :return clm_lookup: CSV formatted strings to save to a csv file for the
        QARTOD climatology lookup tables.
    :return clm_table: CSV formatted strings to save to a csv file for the
        QARTOD climatology range table for the seafloor pressure and temperature.
    """
    # load the combined telemetered and recovered_host data for the data and water streams
    data = combine_delivery_methods(site, node, sensor)

    # basic quality check on the pressure record to eliminate on deck measurements
    qc_flag = data['time'].astype(
        'int32') * 0 + 1  # default flag values, no errors
    out_of_range = data.abs_seafloor_pressure.mean() - (
        data.abs_seafloor_pressure.std() * 5)
    m = (data.abs_seafloor_pressure < out_of_range) | (data.presf_tide_pressure
                                                       < out_of_range)
    qc_flag[m] = 4

    # get the current system annotations for the sensor
    annotations = get_annotations(site, node, sensor)
    annotations = pd.DataFrame(annotations)
    if not annotations.empty:
        annotations = annotations.drop(columns=['@class'])
        annotations['beginDate'] = pd.to_datetime(
            annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')
        annotations['endDate'] = pd.to_datetime(
            annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')

    # create an annotation-based quality flag
    data = add_annotation_qc_flags(data, annotations)

    # clean-up the data, removing values that were marked as fail either from the quality checks or in the
    # annotations, and all data collected after the cut off date
    data = data.where((qc_flag != 4)
                      & (data.rollup_annotations_qc_results < 4))

    # if a cut_off date was used, limit data to all data collected up to the cut_off date.
    # otherwise, set the limit to the range of the downloaded data.
    if cut_off:
        cut = parser.parse(cut_off)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')
    else:
        cut = parser.parse(data.time_coverage_end)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')

    data = data.sel(time=slice('2014-01-01T00:00:00', end_date))

    # set the parameters and the pressure limits
    parameters = [
        'seawater_temperature', 'abs_seafloor_pressure',
        'seawater_temperature', 'abs_seafloor_pressure',
        'presf_tide_temperature', 'presf_tide_pressure'
    ]
    if site in ['CE01ISSM', 'CE06ISSM']:
        plimit = [0, 70]  # 100 psia pressure sensor
    elif site == 'CE07SHSM':
        plimit = [0, 207]  # 300 psia pressure sensor
    else:
        plimit = [0, 689]  # 1000 psia pressure sensor

    limits = [[-5, 35], plimit, [-5, 35], plimit, [-5, 35], plimit]

    # create the initial gross range entry
    gr_lookup = process_gross_range(data,
                                    parameters,
                                    limits,
                                    site=site,
                                    node=node,
                                    sensor=sensor)

    # re-work gross entry for the different streams
    gr_lookup['stream'][0] = 'presf_abc_dcl_tide_measurement'
    gr_lookup['stream'][1] = 'presf_abc_dcl_tide_measurement'
    gr_lookup['stream'][2] = 'presf_abc_dcl_tide_measurement_recovered'
    gr_lookup['stream'][3] = 'presf_abc_dcl_tide_measurement_recovered'
    gr_lookup['stream'][4] = 'presf_abc_tide_measurement_recovered'
    gr_lookup['stream'][5] = 'presf_abc_tide_measurement_recovered'
    gr_lookup['source'] = (
        'Sensor min/max based on the vendor sensor specifications. '
        'The user min/max is the historical mean of all data collected '
        'up to {} +/- 3 standard deviations.'.format(src_date))

    # create and format the climatology lookups and tables for the data
    clm_lookup, clm_table = process_climatology(data,
                                                parameters,
                                                limits,
                                                site=site,
                                                node=node,
                                                sensor=sensor)

    # re-work climatology entries for the different streams
    clm_lookup['stream'][0] = 'presf_abc_dcl_tide_measurement'
    clm_lookup['stream'][1] = 'presf_abc_dcl_tide_measurement'
    clm_lookup['stream'][2] = 'presf_abc_dcl_tide_measurement_recovered'
    clm_lookup['stream'][3] = 'presf_abc_dcl_tide_measurement_recovered'
    clm_lookup['stream'][4] = 'presf_abc_tide_measurement_recovered'
    clm_lookup['stream'][5] = 'presf_abc_tide_measurement_recovered'

    return annotations, gr_lookup, clm_lookup, clm_table
def generate_qartod(site, node, sensor, cut_off):
    """
    Load all of the pCO2 data for a defined reference designator (using the
    site, node and sensor names to construct the reference designator)
    collected via the recovered instrument method and combine them into a
    single data set from which QARTOD test limits for the gross range and
    climatology tests can be calculated.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :param cut_off: string formatted date to use as cut-off for data to add
        to QARTOD test sets
    :return gr_lookup: CSV formatted strings to save to a csv file for the
        QARTOD gross range lookup tables.
    :return clm_lookup: CSV formatted strings to save to a csv file for the
        QARTOD climatology lookup tables.
    :return atm_table: CSV formatted strings to save to a csv file for the
        QARTOD climatology range table for the atmospheric pCO2.
    :return ssw_table: CSV formatted strings to save to a csv file for the
        QARTOD climatology range table for the surface seawater pCO2.
    """
    # load the combined telemetered and recovered_host data for the air and water streams
    air = combine_delivery_methods(site, node, sensor, 'air')
    water = combine_delivery_methods(site, node, sensor, 'water')

    # get the current system annotations for the sensor
    annotations = get_annotations(site, node, sensor)
    annotations = pd.DataFrame(annotations)
    if not annotations.empty:
        annotations = annotations.drop(columns=['@class'])
        annotations['beginDate'] = pd.to_datetime(
            annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')
        annotations['endDate'] = pd.to_datetime(
            annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')

    # create an annotation-based quality flag for the atmospheric data
    air = add_annotation_qc_flags(air, annotations)

    # create an annotation-based quality flag for the surface seawater data
    water = add_annotation_qc_flags(water, annotations)

    # clean-up the air data, removing values that marked as suspect or fail in the annotations
    air = air.where((air.partial_pressure_co2_atm_annotations_qc_results < 3)
                    & (air.rollup_annotations_qc_results < 3))

    # clean-up the water data, removing values that marked as suspect or fail in the annotations
    water = water.where(
        (water.partial_pressure_co2_ssw_annotations_qc_results < 3)
        & (water.rollup_annotations_qc_results < 3))

    # if a cut_off date was used, limit data to all data collected up to the cut_off date.
    # otherwise, set the limit to the range of the downloaded data.
    if cut_off:
        cut = parser.parse(cut_off)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')
    else:
        cut = parser.parse(air.time_coverage_end)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')

    air = air.sel(time=slice('2014-01-01T00:00:00', end_date))
    water = water.sel(time=slice('2014-01-01T00:00:00', end_date))

    # create the initial gross range entry
    data = xr.merge(
        [air.partial_pressure_co2_atm, water.partial_pressure_co2_ssw])
    parameters = ['partial_pressure_co2_atm', 'partial_pressure_co2_ssw']
    limits = [[0, 1000], [0, 1000]]
    gr = process_gross_range(data,
                             parameters,
                             limits,
                             site=site,
                             node=node,
                             sensor=sensor)

    # re-work gross entry for the different streams and parameter names
    gr_lookup = pd.DataFrame()
    gr_lookup = gr_lookup.append([gr, gr], ignore_index=True)
    gr_lookup['parameter'][0] = {'inp': 'partial_pressure_co2_atm'}
    gr_lookup['stream'][0] = 'pco2a_a_dcl_instrument_air'
    gr_lookup['parameter'][1] = {'inp': 'partial_pressure_co2_ssw'}
    gr_lookup['stream'][1] = 'pco2a_a_dcl_instrument_water'
    gr_lookup['parameter'][2] = {'inp': 'partial_pressure_co2_atm'}
    gr_lookup['stream'][2] = 'pco2a_a_dcl_instrument_air_recovered'
    gr_lookup['parameter'][3] = {'inp': 'partial_pressure_co2_ssw'}
    gr_lookup['stream'][3] = 'pco2a_a_dcl_instrument_water_recovered'
    gr_lookup['source'] = (
        'Sensor min/max based on the vendor standard calibration range. '
        'The user min/max is the historical mean of all data collected '
        'up to {} +/- 3 standard deviations.'.format(src_date))

    # create and format the climatology lookups and tables for the air and water streams
    atm, atm_table = process_climatology(data, ['partial_pressure_co2_atm'],
                                         [0, 1000],
                                         site=site,
                                         node=node,
                                         sensor=sensor)
    ssw, ssw_table = process_climatology(data, ['partial_pressure_co2_ssw'],
                                         [0, 1000],
                                         site=site,
                                         node=node,
                                         sensor=sensor)

    # re-work climatology entry for the different streams and parameter names
    atm_lookup = pd.DataFrame()
    atm_lookup = atm_lookup.append([atm, atm])
    atm_lookup['parameters'][0] = {
        'inp': 'partial_pressure_co2_atm',
        'tinp': 'time',
        'zinp': 'None'
    }
    atm_lookup['stream'][0] = 'pco2a_a_dcl_instrument_air'
    atm_lookup['parameters'][1] = {
        'inp': 'partial_pressure_co2_atm',
        'tinp': 'time',
        'zinp': 'None'
    }
    atm_lookup['stream'][1] = 'pco2a_a_dcl_instrument_air_recovered'

    ssw_lookup = pd.DataFrame()
    ssw_lookup = ssw_lookup.append([ssw, ssw])
    ssw_lookup['parameters'][0] = {
        'inp': 'partial_pressure_co2_ssw',
        'tinp': 'time',
        'zinp': 'None'
    }
    ssw_lookup['stream'][0] = 'pco2a_a_dcl_instrument_water'
    ssw_lookup['parameters'][1] = {
        'inp': 'partial_pressure_co2_ssw',
        'tinp': 'time',
        'zinp': 'None'
    }
    ssw_lookup['stream'][1] = 'pco2a_a_dcl_instrument_water_recovered'

    clm_lookup = pd.DataFrame()
    clm_lookup = clm_lookup.append([atm_lookup, ssw_lookup])

    return annotations, gr_lookup, clm_lookup, atm_table, ssw_table
def generate_qartod(site, node, sensor, cut_off):
    """
    Load all FLORT data for a defined reference designator (using the site,
    node and sensor names to construct the reference designator) and
    collected via the different data delivery methods and combine them into a
    single data set from which QARTOD test limits for the gross range and
    climatology tests can be calculated.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :param cut_off: string formatted date to use as cut-off for data to add
        to QARTOD test sets
    :return gr_lookup: CSV formatted strings to save to a csv file for the
        QARTOD gross range lookup tables.
    :return clm_lookup: CSV formatted strings to save to a csv file for the
        QARTOD climatology lookup tables.
    :return clm_table: CSV formatted strings to save to a csv file for the
        QARTOD climatology range table for the seafloor pressure and
        temperature.
    """
    # load the combined data for the different sources of FLORT data
    data = combine_delivery_methods(site, node, sensor)

    # create boolean arrays of the data marked as "fail" by the quality checks and generate initial
    # HITL annotations that can be combined with system annotations to create a cleaned up data set
    # prior to calculating the QARTOD test values
    chl_fail = data.estimated_chlorophyll_qc_summary_flag.where(data.estimated_chlorophyll_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(chl_fail, [18, 72])
    chl_hitl = create_annotations(site, node, sensor, blocks)
    chl_hitl['parameters'] = ['chl' for i in chl_hitl['parameters']]

    cdom_fail = data.fluorometric_cdom_qc_summary_flag.where(data.fluorometric_cdom_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(cdom_fail, [18, 72])
    cdom_hitl = create_annotations(site, node, sensor, blocks)
    cdom_hitl['parameters'] = ['cdom' for i in cdom_hitl['parameters']]

    beta_fail = data.beta_700_qc_summary_flag.where(data.beta_700_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(beta_fail, [18, 72])
    beta_hitl = create_annotations(site, node, sensor, blocks)
    beta_hitl['parameters'] = ['beta' for i in beta_hitl['parameters']]

    bback_fail = data.bback_qc_summary_flag.where(data.bback_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(bback_fail, [18, 72])
    bback_hitl = create_annotations(site, node, sensor, blocks)
    bback_hitl['parameters'] = ['bback' for i in bback_hitl['parameters']]

    # combine the different dictionaries into a single HITL annotation dictionary for later use
    hitl = chl_hitl.copy()
    for d in (cdom_hitl, beta_hitl, bback_hitl):
        for key, value in d.items():
            hitl[key] = hitl[key] + d[key]

    # get the current system annotations for the sensor
    annotations = get_annotations(site, node, sensor)
    annotations = pd.DataFrame(annotations)
    if not annotations.empty:
        annotations = annotations.drop(columns=['@class'])
        annotations['beginDate'] = pd.to_datetime(annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')
        annotations['endDate'] = pd.to_datetime(annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')

    # append the fail annotations to the existing annotations
    annotations = annotations.append(pd.DataFrame(hitl), ignore_index=True, sort=False)

    # create an annotation-based quality flag
    data = add_annotation_qc_flags(data, annotations)

    # clean-up the data, NaN-ing values that were marked as fail in the QC checks, and then removing
    # all records where the rollup annotation was set to fail
    data['estimated_chlorophyll'][chl_fail] = np.nan
    data['fluorometric_cdom'][cdom_fail] = np.nan
    data['beta_700'][beta_fail] = np.nan
    data['bback'][beta_fail] = np.nan
    data['bback'][bback_fail] = np.nan
    data = data.where(data.rollup_annotations_qc_results < 4)

    # if a cut_off date was used, limit data to all data collected up to the cut_off date.
    # otherwise, set the limit to the range of the downloaded data.
    if cut_off:
        cut = parser.parse(cut_off)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')
    else:
        cut = parser.parse(data.time_coverage_end)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')

    data = data.sel(time=slice('2014-01-01T00:00:00', end_date))

    # set the parameters and the gross range limits
    parameters = ['bback', 'estimated_chlorophyll', 'fluorometric_cdom']
    limits = [[0, 5], [0, 30], [0, 375]]

    # create the initial gross range entry
    gr_lookup = process_gross_range(data, parameters, limits, site=site, node=node, sensor=sensor)

    # add the stream name and the source comment
    gr_lookup['stream'] = 'flort_sample'
    gr_lookup['source'] = ('Sensor min/max based on the vendor sensor specifications. '
                           'The user min/max is the historical mean of all data collected '
                           'up to {} +/- 3 standard deviations.'.format(src_date))

    # create and format the climatology lookups and tables for the data
    clm_lookup, clm_table = process_climatology(data, parameters, limits, site=site, node=node, sensor=sensor)

    # add the stream name
    clm_lookup['stream'] = 'flort_sample'

    return annotations, gr_lookup, clm_lookup, clm_table