Exemplo n.º 1
0
    def compare_time_series_trends(self,
                                   var_name=None,
                                   comp_dataset=None,
                                   comp_var_name=None,
                                   time_match_threshhold=60,
                                   time_shift=60 * 60,
                                   time_step=None,
                                   time_qc_threshold=60 * 15):
        """
        Method to perform a time series comparison test between two Xarray Datasets
        to detect a shift in time based on two similar variables. This test will
        compare two similar measurements and look to see if there is a time shift
        forwards or backwards that makes the comparison better. If so assume the
        time has shifted.

        This test is not 100% accurate. It may be fooled with noisy data. Use
        with your own discretion.

        Parameters
        ----------
        var_name : str
            Data variable name.
        comp_dataset : Xarray Dataset
            Dataset containing comparison data to use in test.
        comp_var_name : str
            Name of variable in comp_dataset to use in test.
        time_match_threshhold : int
            Number of seconds to use in tolerance with reindex() method
            to match time from self to comparison Dataset.
        time_shift : int
            Number of seconds to shift analysis window before and after
            the time in self Dataset time.
        time_step : int
            Time step in seconds for self Dataset time. If not provided
            will attempt to find the most common time step.
        time_qc_threshold : int
            The quality control threshold to use for setting test. If the
            calculated time shift is larger than this value will set all
            values in the QC variable to a tripped test value.

        """

        # If no comparison variable name given assume matches variable name
        if comp_var_name is None:
            comp_var_name = var_name

        # If no comparison Dataset given assume self Dataset
        if comp_dataset is None:
            comp_dataset = self

        # Extract copy of DataArray for work below
        self_da = copy.deepcopy(self._obj[var_name])
        comp_da = copy.deepcopy(comp_dataset[comp_var_name])

        # Convert comp data units to match
        comp_da.values = convert_units(comp_da.values, comp_da.attrs['units'],
                                       self_da.attrs['units'])
        comp_da.attrs['units'] = self_da.attrs['units']

        # Match comparison data to time of data
        if time_step is None:
            time_step = determine_time_delta(self._obj['time'].values)
        sum_diff = np.array([], dtype=float)
        time_diff = np.array([], dtype=np.int32)
        for tm_shift in range(-1 * time_shift, time_shift + int(time_step),
                              int(time_step)):
            self_da_shifted = self_da.assign_coords(
                time=self_da.time.values.astype('datetime64[s]') + tm_shift)

            data_matched, comp_data_matched = xr.align(self_da, comp_da)
            self_da_shifted = self_da_shifted.reindex(
                time=comp_da.time.values,
                method='nearest',
                tolerance=np.timedelta64(time_match_threshhold, 's'))
            diff = np.abs(self_da_shifted.values - comp_da.values)
            sum_diff = np.append(sum_diff, np.nansum(diff))
            time_diff = np.append(time_diff, tm_shift)

        index = np.argmin(np.abs(sum_diff))
        time_diff = time_diff[index]

        index = None
        if np.abs(time_diff) > time_qc_threshold:
            index = np.arange(0, self_da.size)
        meaning = (
            f"Time shift detected with Minimum Difference test. Comparison of "
            f"{var_name} with {comp_var_name} off by {time_diff} seconds "
            f"exceeding absolute threshold of {time_qc_threshold} seconds.")
        self._obj.qcfilter.add_test(var_name,
                                    index=index,
                                    test_meaning=meaning,
                                    test_assessment='Indeterminate')
Exemplo n.º 2
0
def fft_shading_test(obj,
                     variable='diffuse_hemisp_narrowband_filter4',
                     fft_window=30,
                     shad_freq_lower=[0.008, 0.017],
                     shad_freq_upper=[0.0105, 0.0195],
                     ratio_thresh=[3.15, 1.2],
                     time_interval=None,
                     smooth_window=5,
                     shading_thresh=0.4):
    """
    Function to test shadowband radiometer (MFRSR, RSS, etc) instruments
    for shading related problems.  Program was adapted by Adam Theisen
    from the method defined in Alexandrov et al 2007 to process on a
    point by point basis using a window of data around that point for
    the FFT analysis.

    For ARM data, testing has found that this works the best on narrowband
    filter4 for MFRSR data.

    Function has been tested and is in use by the ARM DQ Office for
    problem detection.  It is know to have some false positives at times.

    Need to run obj.clean.cleanup() ahead of time to ensure proper addition
    to QC variable

    Parameters
    ----------
    obj : xarray Dataset
        Data object
    variable : string
        Name of variable to process
    fft_window : int
        Number of samples to use in the FFT window.  Default is +- 30 samples
        Note: this is +- so the full window will be double
    shad_freq_lower : list
        Lower frequency over which to look for peaks in FFT
    shad_freq_upper : list
        Upper frequency over which to look for peaks in FFT
    ratio_thresh : list
        Threshold for each freq window to flag data.  I.e. if the peak is 3.15 times
        greater than the surrounding area
    time_interval : float
        Sampling rate of the instrument
    smooth_window : int
        Number of samples to use in smoothing FFTs before analysis
    shading_thresh : float
        After smoothing, the value over which is considered a shading signal

    Returns
    -------
    obj : xarray Dataset
        Data object

    References
    ----------
    Alexandrov, Mikhail & Kiedron, Peter & Michalsky, Joseph & Hodges, Gary
    & Flynn, Connor & Lacis, Andrew. (2007). Optical depth measurements by
    shadow-band radiometers and their uncertainties. Applied optics. 46.
    8027-38. 10.1364/AO.46.008027.

    """

    # Get time and data from variable
    time = obj['time'].values
    data = obj[variable].values
    if 'missing_value' in obj[variable].attrs:
        missing = obj[variable].attrs['missing_value']
    else:
        missing = -9999.

    # Get time interval between measurements
    if time_interval is None:
        dt = determine_time_delta(time)
    else:
        dt = time_interval

    # Compute the FFT for each point +- window samples
    task = []
    sun_up = is_sun_visible(latitude=obj['lat'].values,
                            longitude=obj['lon'].values,
                            date_time=time)
    for t in range(len(time)):
        sind = t - fft_window
        eind = t + fft_window
        if sind < 0:
            sind = 0
        if eind > len(time):
            eind = len(time)

        # Get data and remove all nan/missing values
        d = data[sind:eind]
        idx = ((d != missing) & (np.isnan(d) is not True))
        index = np.where(idx)
        d = d[index]

        # Add to task for dask processing
        task.append(
            dask.delayed(fft_shading_test_process)(
                time[t],
                d,
                shad_freq_lower=shad_freq_lower,
                shad_freq_upper=shad_freq_upper,
                ratio_thresh=ratio_thresh,
                time_interval=dt,
                is_sunny=sun_up[t]))

    # Process using dask
    result = dask.compute(*task)

    # Run data through a rolling median to filter out singular
    # false positives
    shading = [r['shading'] for r in result]
    shading = pd.Series(shading).rolling(window=smooth_window,
                                         min_periods=1).median()

    # Find indices where shading is indicated
    idx = (np.asarray(shading) > shading_thresh)
    index = np.where(idx)

    # Add test to QC Variable
    desc = 'FFT Shading Test'
    obj.qcfilter.add_test(variable, index=index, test_meaning=desc)

    # Prepare frequency and fft variables for adding to object
    fft = np.empty([len(time), fft_window * 2])
    fft[:] = np.nan
    freq = np.empty([len(time), fft_window * 2])
    freq[:] = np.nan
    for i, r in enumerate(result):
        dummy = r['fft']
        fft[i, 0:len(dummy)] = dummy
        dummy = r['freq']
        freq[i, 0:len(dummy)] = dummy

    attrs = {
        'units': '',
        'long_name': 'FFT Results for Shading Test',
        'upper_freq': shad_freq_upper,
        'lower_freq': shad_freq_lower
    }
    fft_window = xr.DataArray(range(fft_window * 2),
                              dims=['fft_window'],
                              attrs={
                                  'long_name': 'FFT Window',
                                  'units': '1'
                              })
    da = xr.DataArray(fft,
                      dims=['time', 'fft_window'],
                      attrs=attrs,
                      coords=[obj['time'], fft_window])
    obj['fft'] = da
    attrs = {'units': '', 'long_name': 'FFT Frequency Values for Shading Test'}
    da = xr.DataArray(freq,
                      dims=['time', 'fft_window'],
                      attrs=attrs,
                      coords=[obj['time'], fft_window])
    obj['fft_freq'] = da

    return obj
Exemplo n.º 3
0
def fft_shading_test(obj,
                     variable='diffuse_hemisp_narrowband_filter4',
                     fft_window=30,
                     shad_freq_lower=[0.008, 0.017],
                     shad_freq_upper=[0.0105, 0.0195],
                     ratio_thresh=[3.15, 1.2],
                     time_interval=None):
    """
    Function to test shadowband radiometer (MFRSR, RSS, etc) instruments
    for shading related problems.  Program was adapted by Adam Theisen
    from the method defined in Alexandrov et al 2007 to process on a
    point by point basis using a window of data around that point for
    the FFT analysis.

    For ARM data, testing has found that this works the best on narrowband
    filter4 for MFRSR data.

    Function has been tested and is in use by the ARM DQ Office for
    problem detection.  It is know to have some false positives at times.

    Need to run obj.clean.cleanup() ahead of time to ensure proper addition
    to QC variable

    Parameters
    ----------
    obj : xarray Dataset
        Data object

    Returns
    -------
    obj : xarray Dataset
        Data object

    References
    ----------
    Alexandrov, Mikhail & Kiedron, Peter & Michalsky, Joseph & Hodges, Gary
    & Flynn, Connor & Lacis, Andrew. (2007). Optical depth measurements by
    shadow-band radiometers and their uncertainties. Applied optics. 46.
    8027-38. 10.1364/AO.46.008027.

    """

    # Get time and data from variable
    time = obj['time'].values
    data = obj[variable].values
    if 'missing_value' in obj[variable].attrs:
        missing = obj[variable].attrs['missing_value']
    else:
        missing = -9999.

    # Get time interval between measurements
    dt = time_interval
    if time_interval is None:
        dt = determine_time_delta(time)

    # Compute the FFT for each point +- window samples
    task = []
    for t in range(len(time)):
        sind = t - fft_window
        eind = t + fft_window
        if sind < 0:
            sind = 0
        if eind > len(time):
            eind = len(time)

        # Get data and remove all nan/missing values
        d = data[sind:eind]
        idx = ((d != missing) & (np.isnan(d) is not True))
        index = np.where(idx)
        d = d[index]

        # Add to task for dask processing
        lat = [
            obj['lat'].values
        ] if not isinstance(obj['lat'].values, list) else obj['lat'].values
        lon = [
            obj['lon'].values
        ] if not isinstance(obj['lon'].values, list) else obj['lon'].values
        task.append(
            dask.delayed(fft_shading_test_process)(
                time[t],
                lat[0],
                lon[0],
                d,
                shad_freq_lower=shad_freq_lower,
                shad_freq_upper=shad_freq_upper,
                ratio_thresh=ratio_thresh,
                time_interval=dt))

    # Process using dask
    result = dask.compute(*task)

    # Run data through a rolling median to filter out singular
    # false positives
    result = pd.Series(result).rolling(window=5, min_periods=1).median()

    # Find indices where shading is indicated
    idx = (np.asarray(result) > 0.4)
    index = np.where(idx)

    # Add test to QC Variable
    desc = 'FFT Shading Test'
    result = obj.qcfilter.add_test(variable, index=index, test_meaning=desc)

    return obj