Пример #1
0
def chan_smooth(mxds, vis, type='triang', size=3, gain=1.0, window=None):
    """
    Apply a smoothing kernel to the channel axis

    Parameters
    ----------
    mxds : xarray.core.dataset.Dataset
        input multi-xarray Dataset with global data
    vis : str
        visibility partition in the mxds to use
    type : str or tuple
        type of window function to use: 'boxcar', 'triang', 'hann' etc. Default is 'triang'.  Scipy.signal is used to generate the
        window weights, refer to https://docs.scipy.org/doc/scipy/reference/signal.windows.html#module-scipy.signal.windows for a
        complete list of supported windows. If your window choice requires additional parameters, use a tuple e.g. ('exponential', None, 0.6)
    size : int
        width of window (# of channels). Default is 3
    gain : float
        gain factor after convolution. Used to set weights. Default is unity gain (1.0)
    window : list of floats
        user defined window weights to apply (all other options ignored if this is supplied). Default is None
        
    Returns
    -------
    xarray.core.dataset.Dataset
        New output multi-xarray Dataset with global data
    """
    import xarray
    import numpy as np
    from scipy.signal import get_window
    from cngi._utils._io import mxds_copier

    xds = mxds.attrs[vis]
    
    if window is None:
        window = gain * get_window(type, size, False) / (np.sum(get_window(type, size, False)))
    else:
        window = np.atleast_1d(window)
        
    window = xarray.DataArray(window, dims=['window'])
    
    # save names of coordinates, then reset them all to variables
    coords = [cc for cc in list(xds.coords) if cc not in xds.dims]
    new_xds = xds.reset_coords()
    
    # create rolling window view of dataset along channel dimension
    rolling_xds = new_xds.rolling(chan=size, min_periods=1, center=True).construct('window')
    
    for dv in rolling_xds.data_vars:
        xda = rolling_xds.data_vars[dv]
    
        # apply chan smoothing to compatible variables
        if ('window' in xda.dims) and (new_xds[dv].dtype.type != np.str_) and (new_xds[dv].dtype.type != np.bool_):
            new_xds[dv] = xda.dot(window).astype(new_xds[dv].dtype)
        
    # return the appropriate variables to coordinates and stick attributes back in
    new_xds = new_xds.set_coords(coords).assign_attrs(xds.attrs)
    
    return mxds_copier(mxds, vis, new_xds)
Пример #2
0
def chan_average(mxds, vis, width=1):
    """
    Average data across the channel axis

    Parameters
    ----------
    mxds : xarray.core.dataset.Dataset
        input multi-xarray Dataset with global data
    vis : str
        visibility partition in the mxds to use
    width : int
        number of adjacent channels to average. Default=1 (no change)

    Returns
    -------
    xarray.core.dataset.Dataset
        New output multi-xarray Dataset with global data
    """
    from cngi._utils._io import mxds_copier
    
    xds = mxds.attrs[vis]
    
    # save names of coordinates, then reset them all to variables
    coords = [cc for cc in list(xds.coords) if cc not in xds.dims]
    xds = xds.reset_coords()

    # use remaining non-chan coordinates and attributes to initialize new return xds
    new_xds = xds[[cc for cc in list(xds.coords) if cc not in ['chan']]]
    
    for dv in xds.data_vars:
        xda = xds.data_vars[dv]
        
        # apply chan averaging to compatible variables
        if 'chan' in xda.dims:
            if (dv == 'DATA') and ('SIGMA_SPECTRUM' in xds.data_vars):
                xda = (xds.DATA / xds.SIGMA_SPECTRUM**2).coarsen(chan=width, boundary='trim').sum()
                xda = xda * (xds.SIGMA_SPECTRUM**2).coarsen(chan=width, boundary='trim').sum()
            elif (dv == 'CORRECTED_DATA') and ('WEIGHT_SPECTRUM' in xds.data_vars):
                xda = (xds.CORRECTED_DATA * xds.WEIGHT_SPECTRUM).coarsen(chan=width, boundary='trim').sum()
                xda = xda / xds.WEIGHT_SPECTRUM.coarsen(chan=width, boundary='trim').sum()
            else:
                # .mean() produces runtimewarning errors (still works though), using .sum() / width is cleaner
                xda = (xda.coarsen(chan=width, boundary='trim').sum() / width).astype(xds.data_vars[dv].dtype)
        
        new_xds = new_xds.assign(dict([(dv,xda)]))

    # return the appropriate variables to coordinates
    new_xds = new_xds.set_coords(coords)

    return mxds_copier(mxds, vis, new_xds)
Пример #3
0
def apply_flags(mxds, vis, flags='FLAG'):
    """
    Apply flag variables to other data in Visibility Dataset

    Parameters
    ----------
    mxds : xarray.core.dataset.Dataset
        input multi-xarray Dataset with global data
    vis : str
        visibility partition in the mxds to use
    flags : list or str
        data var name or list of names to use as flags. Default 'FLAG' uses the FLAG field
    Returns
    -------
    xarray.core.dataset.Dataset
        output multi-xarray Dataset with global data
    """
    import numpy as np
    from cngi._utils._io import mxds_copier

    xds = mxds.attrs[vis]

    flags = np.atleast_1d(flags)

    flagged_xds = xds.copy()

    # loop over each flag dimension
    # flag each data var with matching dimensions
    for fv in flags:
        for dv in xds.data_vars:
            if dv == fv: continue  # dont flag the flags
            if flagged_xds[dv].dims == flagged_xds[fv].dims:
                flagged_xds[dv] = flagged_xds[dv].where(
                    flagged_xds[fv] == 0).astype(xds[dv].dtype)

    return mxds_copier(mxds, vis, flagged_xds)
Пример #4
0
def uv_cont_fit(mxds,
                vis,
                source='DATA',
                target='CONTFIT',
                fitorder=1,
                excludechans=[]):
    """
    Fit a polynomial regression to source data and return model values to target

    Parameters
    ----------
    mxds : xarray.core.dataset.Dataset
        input multi-xarray Dataset with global data
    vis : str
        visibility partition in the mxds to use
    source : str
        data variable in the dataset on which to fit the regression. Default is 'DATA'
    target : str
        new data variable to place the fit result, overwrites if already present. Default is 'CONTFIT'
    fitorder : int
        polynomial order for the fit, must be >= 1, but values larger than 1 will slow down rapidly.  Default is 1
    excludechans : list of ints
        indices of channels to exclude from the fit.  Default is empty (include all channels)
    
    Returns
    -------
    xarray.core.dataset.Dataset
        New output multi-xarray Dataset with global data
    """
    import numpy as np
    import xarray
    from sklearn.linear_model import LinearRegression
    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.impute import SimpleImputer
    from cngi._utils._io import mxds_copier

    xds = mxds.attrs[vis]

    # selected channel bin values serve as our training data X
    # expanding out polynomial combinations allows us to use linear regression for non-linear higher order fits
    # see: https://scikit-learn.org/stable/modules/linear_model.html#polynomial-regression-extending-linear-models-with-basis-functions
    chans = np.arange(xds.dims['chan']).reshape(-1, 1)
    xx = PolynomialFeatures(fitorder).fit_transform(chans)

    # indices of channels to use for fitting
    includechans = np.setdiff1d(range(len(chans)), np.atleast_1d(excludechans))

    # define a function to fit a 1-D linear regression across the prescribed axis
    # see: https://scikit-learn.org/stable/modules/linear_model.html#ordinary-least-squares
    # with the dask='parallelized' option in apply_ufunc, this function receives a straight numpy array of chunk size
    # but does not compute the dag, which is nice
    def lr1d(npa):
        #npa = xds.DATA[:100,:210,:,:1].values #.swapaxes(2,3)
        yy = npa.swapaxes(0, 2).reshape(
            len(xx),
            -1)  # flatten to chans by (time * baseline * pol) features
        yy[:, np.
           all(np.isnan(yy), axis=0
               )] = 0  # fill baseline/time/pol cols that are all nan with 0's
        yy_r = SimpleImputer(missing_values=np.nan,
                             strategy='median').fit_transform(
                                 np.real(yy))  # remove remaining nan's
        model_r = LinearRegression(fit_intercept=False).fit(
            xx[includechans], yy_r[includechans])
        model_vals = model_r.predict(xx)  # compute model values
        if npa.dtype == 'complex128':
            yy_i = SimpleImputer(missing_values=np.nan,
                                 strategy='median').fit_transform(np.imag(yy))
            model_i = LinearRegression(fit_intercept=False).fit(
                xx[includechans], yy_i[includechans])
            model_vals = model_vals + 1j * model_i.predict(
                xx)  # compute model values
        return model_vals.reshape(npa.swapaxes(0, 2).shape).swapaxes(0, 2)

    model_data = xarray.apply_ufunc(lr1d,
                                    xds[source].chunk({'chan': -1}),
                                    dask='parallelized',
                                    output_dtypes=[xds[source].dtype])

    new_xds = xds.assign({target: model_data}).unify_chunks()

    # compute some fit metrics to store in attributes section
    error = new_xds[target][:, :,
                            includechans, :] - new_xds[source][:, :,
                                                               includechans, :]
    abs_error = (error.real**2 + error.imag**2)**0.5
    rms_error = (error**2).mean()**0.5
    min_max_error = [abs_error.min(), abs_error.max()]
    bw_frac = len(includechans) / len(chans)
    freq_frac = (xds.chan[includechans].max() - xds.chan[includechans].min()
                 ) / (xds.chan.max() - xds.chan.min())

    new_xds = new_xds.assign_attrs({
        target + '_rms_error': rms_error,
        target + '_min_max_error': min_max_error,
        target + '_bw_frac': bw_frac,
        target + '_freq_frac': freq_frac
    })

    return mxds_copier(mxds, vis, new_xds)
Пример #5
0
def time_average(mxds,
                 vis,
                 bin=1,
                 width=None,
                 span='state',
                 maxuvwdistance=None):
    """
    Average data across the time axis

    Parameters
    ----------
    mxds : xarray.core.dataset.Dataset
        input multi-xarray Dataset with global data
    vis : str
        visibility partition in the mxds to use
    bin : int
        number of adjacent times to average, used when width is None. Default=1 (no change)
    width : str
        resample to width freq (i.e. '10s') and produce uniform time steps over span. Ignores bin. Default None uses bin value.
        see https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html.
    span : str
        span of the binning. Allowed values are 'scan', 'state' or 'both'.  Default is 'state' (meaning all states in a scan)
    maxuvwdistance (future) : float
        NOT IMPLEMENTED. maximum separation of start-to-end baselines that can be included in an average. (meters)

    Returns
    -------
    xarray.core.dataset.Dataset
        New output multi-xarray Dataset with global data
    """
    import numpy as np
    from cngi._utils._io import mxds_copier

    xds = mxds.attrs[vis]
    intnan = np.full((1), np.nan, dtype=np.int32)[0]

    # drop vars that don't have time so they don't get stacked later on
    notime_vars = [
        cc for cc in list(xds.data_vars) if 'time' not in xds[cc].dims
    ]
    xds = xds.drop_vars(notime_vars)

    #######
    # mapped out over groups
    def timebin(gxds, stacked=True):
        if stacked: gxds = gxds.unstack('stb')

        # mean coarsen/resample everything but data and weight
        dvs = [
            dv for dv in gxds.data_vars if dv not in
            ['DATA', 'CORRECTED_DATA', 'DATA_WEIGHT', 'CORRECTED_DATA_WEIGHT']
        ] + list(gxds.coords)
        if width is None:
            nxds = gxds[dvs].coarsen(time=bin, boundary='pad').mean()
        else:
            nxds = gxds[dvs].resample(time=width).mean()

        # sum coarsen/resample weight
        for wt in ['DATA_WEIGHT', 'CORRECTED_DATA_WEIGHT']:
            if wt in gxds.data_vars:
                if width is None:
                    nxds[wt] = gxds[wt].coarsen(time=bin, boundary='pad').sum()
                else:
                    nxds[wt] = gxds[wt].resample(time=width).sum()

        # use weight in coarsening/resampling data cols
        for col in ['DATA', 'CORRECTED_DATA']:
            if (col in gxds.data_vars) and (col + '_WEIGHT' in gxds.data_vars):
                if width is None:
                    xda = (gxds[col] * gxds[col + '_WEIGHT']).coarsen(
                        time=bin, boundary='pad').sum()
                else:
                    xda = (gxds[col] *
                           gxds[col + '_WEIGHT']).resample(time=width).sum()
                nxds[col] = xda / nxds[col + '_WEIGHT']

        if stacked: nxds = nxds.stack({'stb': ('time', 'baseline')})
        return nxds

    #############
    # span across state by grouping on scans (keeps scans separate)
    if span == 'state':
        txds = xds.stack({'stb': ('time', 'baseline')})
        txds = txds.groupby('SCAN_NUMBER').map(timebin)
        txds = txds.where(txds.SCAN_NUMBER.notnull() &
                          (txds.SCAN_NUMBER > intnan),
                          drop=True).unstack('stb')
        txds = txds.transpose('time', 'baseline', 'chan', 'pol', 'uvw_index',
                              'spw_id', 'pol_id')

    # span across scans by grouping on states (keeps states separate)
    elif span == 'scan':
        txds = xds.stack({'stb': ('time', 'baseline')})
        txds = txds.groupby('STATE_ID').map(timebin)
        txds = txds.where(txds.STATE_ID.notnull() & (txds.STATE_ID > intnan),
                          drop=True).unstack('stb')
        txds = txds.transpose('time', 'baseline', 'chan', 'pol', 'uvw_index',
                              'spw_id', 'pol_id')

    # span across both
    else:
        txds = timebin(xds, stacked=False)

    # coarsen can change int/bool dtypes to float, so they need to be manually set back
    for dv in txds.data_vars:
        txds[dv] = txds[dv].astype(xds[dv].dtype)

    # put the attributes and dropped data vars back in
    txds = txds.assign_attrs(xds.attrs).assign(
        dict([(dv, mxds.attrs[vis][dv]) for dv in notime_vars]))

    # verify values
    #cxds1 = xds_state.assign_coords({'time_s': xds_state.time.astype('datetime64[s]')}).swap_dims({'time':'time_s'})
    #cxds2 = txds.assign_coords({'time_s': txds.time.astype('datetime64[s]')}).swap_dims({'time':'time_s'})
    #cxds = cxds1.DATA - cxds2.DATA
    #cxds[51].values

    return mxds_copier(mxds, vis, txds)