def chan_smooth(mxds, vis, type='triang', size=3, gain=1.0, window=None): """ Apply a smoothing kernel to the channel axis Parameters ---------- mxds : xarray.core.dataset.Dataset input multi-xarray Dataset with global data vis : str visibility partition in the mxds to use type : str or tuple type of window function to use: 'boxcar', 'triang', 'hann' etc. Default is 'triang'. Scipy.signal is used to generate the window weights, refer to https://docs.scipy.org/doc/scipy/reference/signal.windows.html#module-scipy.signal.windows for a complete list of supported windows. If your window choice requires additional parameters, use a tuple e.g. ('exponential', None, 0.6) size : int width of window (# of channels). Default is 3 gain : float gain factor after convolution. Used to set weights. Default is unity gain (1.0) window : list of floats user defined window weights to apply (all other options ignored if this is supplied). Default is None Returns ------- xarray.core.dataset.Dataset New output multi-xarray Dataset with global data """ import xarray import numpy as np from scipy.signal import get_window from cngi._utils._io import mxds_copier xds = mxds.attrs[vis] if window is None: window = gain * get_window(type, size, False) / (np.sum(get_window(type, size, False))) else: window = np.atleast_1d(window) window = xarray.DataArray(window, dims=['window']) # save names of coordinates, then reset them all to variables coords = [cc for cc in list(xds.coords) if cc not in xds.dims] new_xds = xds.reset_coords() # create rolling window view of dataset along channel dimension rolling_xds = new_xds.rolling(chan=size, min_periods=1, center=True).construct('window') for dv in rolling_xds.data_vars: xda = rolling_xds.data_vars[dv] # apply chan smoothing to compatible variables if ('window' in xda.dims) and (new_xds[dv].dtype.type != np.str_) and (new_xds[dv].dtype.type != np.bool_): new_xds[dv] = xda.dot(window).astype(new_xds[dv].dtype) # return the appropriate variables to coordinates and stick attributes back in new_xds = new_xds.set_coords(coords).assign_attrs(xds.attrs) return mxds_copier(mxds, vis, new_xds)
def chan_average(mxds, vis, width=1): """ Average data across the channel axis Parameters ---------- mxds : xarray.core.dataset.Dataset input multi-xarray Dataset with global data vis : str visibility partition in the mxds to use width : int number of adjacent channels to average. Default=1 (no change) Returns ------- xarray.core.dataset.Dataset New output multi-xarray Dataset with global data """ from cngi._utils._io import mxds_copier xds = mxds.attrs[vis] # save names of coordinates, then reset them all to variables coords = [cc for cc in list(xds.coords) if cc not in xds.dims] xds = xds.reset_coords() # use remaining non-chan coordinates and attributes to initialize new return xds new_xds = xds[[cc for cc in list(xds.coords) if cc not in ['chan']]] for dv in xds.data_vars: xda = xds.data_vars[dv] # apply chan averaging to compatible variables if 'chan' in xda.dims: if (dv == 'DATA') and ('SIGMA_SPECTRUM' in xds.data_vars): xda = (xds.DATA / xds.SIGMA_SPECTRUM**2).coarsen(chan=width, boundary='trim').sum() xda = xda * (xds.SIGMA_SPECTRUM**2).coarsen(chan=width, boundary='trim').sum() elif (dv == 'CORRECTED_DATA') and ('WEIGHT_SPECTRUM' in xds.data_vars): xda = (xds.CORRECTED_DATA * xds.WEIGHT_SPECTRUM).coarsen(chan=width, boundary='trim').sum() xda = xda / xds.WEIGHT_SPECTRUM.coarsen(chan=width, boundary='trim').sum() else: # .mean() produces runtimewarning errors (still works though), using .sum() / width is cleaner xda = (xda.coarsen(chan=width, boundary='trim').sum() / width).astype(xds.data_vars[dv].dtype) new_xds = new_xds.assign(dict([(dv,xda)])) # return the appropriate variables to coordinates new_xds = new_xds.set_coords(coords) return mxds_copier(mxds, vis, new_xds)
def apply_flags(mxds, vis, flags='FLAG'): """ Apply flag variables to other data in Visibility Dataset Parameters ---------- mxds : xarray.core.dataset.Dataset input multi-xarray Dataset with global data vis : str visibility partition in the mxds to use flags : list or str data var name or list of names to use as flags. Default 'FLAG' uses the FLAG field Returns ------- xarray.core.dataset.Dataset output multi-xarray Dataset with global data """ import numpy as np from cngi._utils._io import mxds_copier xds = mxds.attrs[vis] flags = np.atleast_1d(flags) flagged_xds = xds.copy() # loop over each flag dimension # flag each data var with matching dimensions for fv in flags: for dv in xds.data_vars: if dv == fv: continue # dont flag the flags if flagged_xds[dv].dims == flagged_xds[fv].dims: flagged_xds[dv] = flagged_xds[dv].where( flagged_xds[fv] == 0).astype(xds[dv].dtype) return mxds_copier(mxds, vis, flagged_xds)
def uv_cont_fit(mxds, vis, source='DATA', target='CONTFIT', fitorder=1, excludechans=[]): """ Fit a polynomial regression to source data and return model values to target Parameters ---------- mxds : xarray.core.dataset.Dataset input multi-xarray Dataset with global data vis : str visibility partition in the mxds to use source : str data variable in the dataset on which to fit the regression. Default is 'DATA' target : str new data variable to place the fit result, overwrites if already present. Default is 'CONTFIT' fitorder : int polynomial order for the fit, must be >= 1, but values larger than 1 will slow down rapidly. Default is 1 excludechans : list of ints indices of channels to exclude from the fit. Default is empty (include all channels) Returns ------- xarray.core.dataset.Dataset New output multi-xarray Dataset with global data """ import numpy as np import xarray from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.impute import SimpleImputer from cngi._utils._io import mxds_copier xds = mxds.attrs[vis] # selected channel bin values serve as our training data X # expanding out polynomial combinations allows us to use linear regression for non-linear higher order fits # see: https://scikit-learn.org/stable/modules/linear_model.html#polynomial-regression-extending-linear-models-with-basis-functions chans = np.arange(xds.dims['chan']).reshape(-1, 1) xx = PolynomialFeatures(fitorder).fit_transform(chans) # indices of channels to use for fitting includechans = np.setdiff1d(range(len(chans)), np.atleast_1d(excludechans)) # define a function to fit a 1-D linear regression across the prescribed axis # see: https://scikit-learn.org/stable/modules/linear_model.html#ordinary-least-squares # with the dask='parallelized' option in apply_ufunc, this function receives a straight numpy array of chunk size # but does not compute the dag, which is nice def lr1d(npa): #npa = xds.DATA[:100,:210,:,:1].values #.swapaxes(2,3) yy = npa.swapaxes(0, 2).reshape( len(xx), -1) # flatten to chans by (time * baseline * pol) features yy[:, np. all(np.isnan(yy), axis=0 )] = 0 # fill baseline/time/pol cols that are all nan with 0's yy_r = SimpleImputer(missing_values=np.nan, strategy='median').fit_transform( np.real(yy)) # remove remaining nan's model_r = LinearRegression(fit_intercept=False).fit( xx[includechans], yy_r[includechans]) model_vals = model_r.predict(xx) # compute model values if npa.dtype == 'complex128': yy_i = SimpleImputer(missing_values=np.nan, strategy='median').fit_transform(np.imag(yy)) model_i = LinearRegression(fit_intercept=False).fit( xx[includechans], yy_i[includechans]) model_vals = model_vals + 1j * model_i.predict( xx) # compute model values return model_vals.reshape(npa.swapaxes(0, 2).shape).swapaxes(0, 2) model_data = xarray.apply_ufunc(lr1d, xds[source].chunk({'chan': -1}), dask='parallelized', output_dtypes=[xds[source].dtype]) new_xds = xds.assign({target: model_data}).unify_chunks() # compute some fit metrics to store in attributes section error = new_xds[target][:, :, includechans, :] - new_xds[source][:, :, includechans, :] abs_error = (error.real**2 + error.imag**2)**0.5 rms_error = (error**2).mean()**0.5 min_max_error = [abs_error.min(), abs_error.max()] bw_frac = len(includechans) / len(chans) freq_frac = (xds.chan[includechans].max() - xds.chan[includechans].min() ) / (xds.chan.max() - xds.chan.min()) new_xds = new_xds.assign_attrs({ target + '_rms_error': rms_error, target + '_min_max_error': min_max_error, target + '_bw_frac': bw_frac, target + '_freq_frac': freq_frac }) return mxds_copier(mxds, vis, new_xds)
def time_average(mxds, vis, bin=1, width=None, span='state', maxuvwdistance=None): """ Average data across the time axis Parameters ---------- mxds : xarray.core.dataset.Dataset input multi-xarray Dataset with global data vis : str visibility partition in the mxds to use bin : int number of adjacent times to average, used when width is None. Default=1 (no change) width : str resample to width freq (i.e. '10s') and produce uniform time steps over span. Ignores bin. Default None uses bin value. see https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html. span : str span of the binning. Allowed values are 'scan', 'state' or 'both'. Default is 'state' (meaning all states in a scan) maxuvwdistance (future) : float NOT IMPLEMENTED. maximum separation of start-to-end baselines that can be included in an average. (meters) Returns ------- xarray.core.dataset.Dataset New output multi-xarray Dataset with global data """ import numpy as np from cngi._utils._io import mxds_copier xds = mxds.attrs[vis] intnan = np.full((1), np.nan, dtype=np.int32)[0] # drop vars that don't have time so they don't get stacked later on notime_vars = [ cc for cc in list(xds.data_vars) if 'time' not in xds[cc].dims ] xds = xds.drop_vars(notime_vars) ####### # mapped out over groups def timebin(gxds, stacked=True): if stacked: gxds = gxds.unstack('stb') # mean coarsen/resample everything but data and weight dvs = [ dv for dv in gxds.data_vars if dv not in ['DATA', 'CORRECTED_DATA', 'DATA_WEIGHT', 'CORRECTED_DATA_WEIGHT'] ] + list(gxds.coords) if width is None: nxds = gxds[dvs].coarsen(time=bin, boundary='pad').mean() else: nxds = gxds[dvs].resample(time=width).mean() # sum coarsen/resample weight for wt in ['DATA_WEIGHT', 'CORRECTED_DATA_WEIGHT']: if wt in gxds.data_vars: if width is None: nxds[wt] = gxds[wt].coarsen(time=bin, boundary='pad').sum() else: nxds[wt] = gxds[wt].resample(time=width).sum() # use weight in coarsening/resampling data cols for col in ['DATA', 'CORRECTED_DATA']: if (col in gxds.data_vars) and (col + '_WEIGHT' in gxds.data_vars): if width is None: xda = (gxds[col] * gxds[col + '_WEIGHT']).coarsen( time=bin, boundary='pad').sum() else: xda = (gxds[col] * gxds[col + '_WEIGHT']).resample(time=width).sum() nxds[col] = xda / nxds[col + '_WEIGHT'] if stacked: nxds = nxds.stack({'stb': ('time', 'baseline')}) return nxds ############# # span across state by grouping on scans (keeps scans separate) if span == 'state': txds = xds.stack({'stb': ('time', 'baseline')}) txds = txds.groupby('SCAN_NUMBER').map(timebin) txds = txds.where(txds.SCAN_NUMBER.notnull() & (txds.SCAN_NUMBER > intnan), drop=True).unstack('stb') txds = txds.transpose('time', 'baseline', 'chan', 'pol', 'uvw_index', 'spw_id', 'pol_id') # span across scans by grouping on states (keeps states separate) elif span == 'scan': txds = xds.stack({'stb': ('time', 'baseline')}) txds = txds.groupby('STATE_ID').map(timebin) txds = txds.where(txds.STATE_ID.notnull() & (txds.STATE_ID > intnan), drop=True).unstack('stb') txds = txds.transpose('time', 'baseline', 'chan', 'pol', 'uvw_index', 'spw_id', 'pol_id') # span across both else: txds = timebin(xds, stacked=False) # coarsen can change int/bool dtypes to float, so they need to be manually set back for dv in txds.data_vars: txds[dv] = txds[dv].astype(xds[dv].dtype) # put the attributes and dropped data vars back in txds = txds.assign_attrs(xds.attrs).assign( dict([(dv, mxds.attrs[vis][dv]) for dv in notime_vars])) # verify values #cxds1 = xds_state.assign_coords({'time_s': xds_state.time.astype('datetime64[s]')}).swap_dims({'time':'time_s'}) #cxds2 = txds.assign_coords({'time_s': txds.time.astype('datetime64[s]')}).swap_dims({'time':'time_s'}) #cxds = cxds1.DATA - cxds2.DATA #cxds[51].values return mxds_copier(mxds, vis, txds)