def _load_a_common_dataset(self, name): ### load a common dataset from the first file if name == 'channo' and not self._channel_select is None: self.create_dataset(name, data=self._channel_select) memh5.copyattrs(self.infiles[0][name].attrs, self[name].attrs) else: super(RawTimestream, self)._load_a_common_dataset(name)
def _copy_a_common_dataset(self, name, other): ### copy a common dataset from `other` to self if name == 'channo' and not other._subset_channel_select is None: self.create_dataset(name, data=other._subset_channel_select) memh5.copyattrs(other[name].attrs, self[name].attrs) else: super(RawTimestream, self)._copy_a_common_dataset(name, other)
def _load_a_special_common_dataset(self, name, axis_name): ### load a common dataset that need to take specail care ### this dataset need to be distributed along axis_name if axis_name ### is just self.main_data_dist_axis dset = self.infiles[0][name] axis = self.main_data_axes.index(axis_name) tmp = np.arange(dset.shape[0]) sel = tmp[self.main_data_select[axis]].tolist() data = dset[sel] # if axis_name is just the distributed axis, load dataset distributed if axis == self.main_data_dist_axis: data = mpiarray.MPIArray.from_numpy_array(data, axis=self.main_axes_ordered_datasets[name].index(axis)) self.create_dataset(name, data=data) # copy attrs of this dset memh5.copyattrs(dset.attrs, self[name].attrs)
def stokes2lin(self): """Convert the Stokes polarized data to linear polarization.""" try: pol = self.pol except KeyError: raise RuntimeError('Polarization of the data is unknown, can not convert') if pol.attrs['pol_type'] == 'linear' and pol.shape[0] == 4: warning.warn('Data is already linear polarization, no need to convert') return if pol.attrs['pol_type'] == 'stokes' and pol.shape[0] == 4: # redistribute to 0 axis if polarization is the distributed axis original_dist_axis = self.main_data_dist_axis if 'polarization' == self.main_data_axes[self.main_data_dist_axis]: self.redistribute(0) pol = pol[:].tolist() p = self.pol_dict # create a new MPIArray to hold the new data md = mpiarray.MPIArray(self.main_data.shape, axis=self.main_data_dist_axis, comm=self.comm, dtype=self.main_data.dtype) # convert to linear xx, yy, xy, yx md.local_array[:, :, 0] = self.main_data.local_data[:, :, pol.index(p['I'])] + self.main_data.local_data[:, :, pol.index(p['Q'])] # xx md.local_array[:, :, 1] = self.main_data.local_data[:, :, pol.index(p['I'])] - self.main_data.local_data[:, :, pol.index(p['Q'])] # yy md.local_array[:, :, 2] = self.main_data.local_data[:, :, pol.index(p['U'])] + 1.0J * self.main_data.local_data[:, :, pol.index(p['V'])] # xy md.local_array[:, :, 3] = self.main_data.local_data[:, :, pol.index(p['U'])] - 1.0J * self.main_data.local_data[:, :, pol.index(p['V'])] # yx attr_dict = {} # temporarily save attrs of this dataset memh5.copyattrs(self.main_data.attrs, attr_dict) del self[self.main_data_name] # create main data self.create_dataset(self.main_data_name, shape=md.shape, dtype=md.dtype, data=md, distributed=True, distributed_axis=self.main_data_dist_axis) memh5.copyattrs(attr_dict, self.main_data.attrs) del self['pol'] self.create_dataset('pol', data=np.array([p['xx'], p['yy'], p['xy'], p['yx']]), dtype='i4') self['pol'].attrs['pol_type'] = 'linear' # redistribute self to original axis self.redistribute(original_dist_axis) else: raise RuntimeError('Can not convert to linear polarization')
def stokes2lin(self): """Convert the Stokes polarized data to linear polarization.""" try: pol = self.pol except KeyError: raise RuntimeError('Polarization of the data is unknown, can not convert') if pol.attrs['pol_type'] == 'linear' and pol.shape[0] == 4: warning.warn('Data is already linear polarization, no need to convert') return if pol.attrs['pol_type'] == 'stokes' and pol.shape[0] == 4: pol = pol[:].tolist() # redistribute to 0 axis if polarization is the distributed axis original_dist_axis = self.main_data_dist_axis if 'polarization' == self.main_data_axes[self.main_data_dist_axis]: self.redistribute(0) # create a new MPIArray to hold the new data md = mpiarray.MPIArray(self.main_data.shape, axis=self.main_data_dist_axis, comm=self.comm, dtype=self.main_data.dtype) # convert to linear xx, yy, xy, yx md.local_array[:, :, 0] = self.main_data.local_data[:, :, pol.index('I')] + self.main_data.local_data[:, :, pol.index('Q')] # xx md.local_array[:, :, 1] = self.main_data.local_data[:, :, pol.index('I')] - self.main_data.local_data[:, :, pol.index('Q')] # yy md.local_array[:, :, 2] = self.main_data.local_data[:, :, pol.index('U')] + 1.0J * self.main_data.local_data[:, :, pol.index('V')] # xy md.local_array[:, :, 3] = self.main_data.local_data[:, :, pol.index('U')] - 1.0J * self.main_data.local_data[:, :, pol.index('V')] # yx attr_dict = {} # temporarily save attrs of this dataset memh5.copyattrs(self.main_data.attrs, attr_dict) del self[self.main_data_name] # create main data self.create_dataset(self.main_data_name, shape=md.shape, dtype=md.dtype, data=md, distributed=True, distributed_axis=self.main_data_dist_axis) memh5.copyattrs(attr_dict, self.main_data.attrs) del self['pol'] self.create_dataset('pol', data=np.array(['xx', 'yy', 'xy', 'yx'])) self['pol'].attrs['pol_type'] = 'linear' # redistribute self to original axis self.redistribute(original_dist_axis) else: raise RuntimeError('Can not conver to linear polarization')
def _load_a_common_dataset(self, name): ### load a common dataset from the first file if name in self.freq_ordered_datasets.keys(): self._load_a_special_common_dataset(name, 'frequency') elif name in self.bl_ordered_datasets.keys(): self._load_a_special_common_dataset(name, 'baseline') elif name == 'feedno' and not self._feed_select is None: self.create_dataset(name, data=self._feed_select) memh5.copyattrs(self.infiles[0][name].attrs, self[name].attrs) elif name in self.feed_ordered_datasets.keys() and not self._feed_select is None: fh = self.infiles[0] feedno = fh['feedno'][:].tolist() feed_inds = [ feedno.index(fd) for fd in self._feed_select ] feed_axis = self.feed_ordered_datasets[name].index(0) slc = [slice(0, None)] * (feed_axis + 1) slc[feed_axis] = feed_inds self.create_dataset(name, data=fh[name][tuple(slc)]) memh5.copyattrs(self.infiles[0][name].attrs, self[name].attrs) else: super(TimestreamCommon, self)._load_a_common_dataset(name)
def __init__(self, *args, **kwargs): # Pull out the values of needed arguments axes_from = kwargs.pop('axes_from', None) attrs_from = kwargs.pop('attrs_from', None) dist = kwargs.pop('distributed', True) comm = kwargs.pop('comm', None) # Run base initialiser memh5.BasicCont.__init__(self, distributed=dist, comm=comm) # Check to see if this call looks like it was called like # memh5.MemDiskGroup would have been. If it is, we're probably trying to # create a bare container, so don't initialise any datasets. This # behaviour is needed to support tod.concatenate if len(args) or 'data_group' in kwargs: return # Create axis entries for axis in self._axes: axis_map = None # Check if axis is specified in initialiser if axis in kwargs: # If axis is an integer, turn into an arange as a default definition if isinstance(kwargs[axis], int): axis_map = np.arange(kwargs[axis]) else: axis_map = kwargs[axis] # If not set in the arguments copy from another object if set elif axes_from is not None and axis in axes_from.index_map: axis_map = axes_from.index_map[axis] # Set the index_map[axis] if we have a definition, otherwise throw an error if axis_map is not None: self.create_index_map(axis, axis_map) else: raise RuntimeError('No definition of axis %s supplied.' % axis) # Iterate over datasets and initialise any that specify it for name, spec in self._dataset_spec.items(): if 'initialise' in spec and spec['initialise']: self.add_dataset(name) # Copy over attributes if attrs_from is not None: # Copy attributes from container root memh5.copyattrs(attrs_from.attrs, self.attrs) # Copy attributes over from any common datasets for name in self._dataset_spec.keys(): if name in self.datasets and name in attrs_from.datasets: memh5.copyattrs(attrs_from.datasets[name].attrs, self.datasets[name].attrs) # Make sure that the __memh5_subclass attribute is accurate clspath = self.__class__.__module__ + '.' + self.__class__.__name__ clsattr = self.attrs.get('__memh5_subclass', None) if clsattr and (clsattr != clspath): self.attrs['__memh5_subclass'] = clspath
def make_empty_corrdata( freq=None, input=None, time=None, axes_from=None, attrs_from=None, distributed=True, distributed_axis=0, comm=None, ): """Make an empty CorrData (i.e. timestream) container. Parameters ---------- freq : np.ndarray, optional Frequency map to use. input : np.ndarray, optional Input map. time : np.ndarray, optional Time map. axes_from : BasicCont, optional Another container to copy any unspecified axes from. attrs_from : BasicCont, optional Another container to copy any unspecified attributes from. distributed : boolean, optional Whether to create the container in distributed mode. distributed_axis : int, optional Axis to distribute over. comm : MPI.Comm, optional MPI communicator to distribute over. Returns ------- data : andata.CorrData """ # Setup frequency axis if freq is None: if axes_from is not None and "freq" in axes_from.index_map: freq = axes_from.index_map["freq"] else: raise RuntimeError("No frequency axis defined.") # Setup input axis if input is None: if axes_from is not None and "input" in axes_from.index_map: input = axes_from.index_map["input"] else: raise RuntimeError("No input axis defined.") # Setup time axis if time is None: if axes_from is not None and "time" in axes_from.index_map: time = axes_from.index_map["time"] else: raise RuntimeError("No time axis defined.") # Create CorrData object and setup axies from ch_util import andata # Initialise distributed container data = andata.CorrData.__new__(andata.CorrData) memh5.BasicCont.__init__(data, distributed=True, comm=comm) # Copy over attributes if attrs_from is not None: memh5.copyattrs(attrs_from.attrs, data.attrs) # Create index map data.create_index_map("freq", freq) data.create_index_map("input", input) data.create_index_map("time", time) # Construct and create product map if axes_from is not None and "prod" in axes_from.index_map: prodmap = axes_from.index_map["prod"] else: nfeed = len(input) prodmap = np.array([[fi, fj] for fi in range(nfeed) for fj in range(fi, nfeed)]) data.create_index_map("prod", prodmap) # Construct and create stack map if axes_from is not None and "stack" in axes_from.index_map: stackmap = axes_from.index_map["stack"] vis_shape = (data.nfreq, len(stackmap), data.ntime) vis_axis = np.array(["freq", "stack", "time"]) else: stackmap = np.empty_like(prodmap, dtype=[("prod", "<u4"), ("conjugate", "u1")]) stackmap["prod"][:] = np.arange(len(prodmap)) stackmap["conjugate"] = 0 vis_shape = (data.nfreq, data.nprod, data.ntime) vis_axis = np.array(["freq", "prod", "time"]) data.create_index_map("stack", stackmap) # Construct and create reverse map stack if axes_from is not None and "stack" in axes_from.reverse_map: reverse_map_stack = axes_from.reverse_map["stack"] data.create_reverse_map("stack", reverse_map_stack) # Determine datatype for weights if ((axes_from is not None) and hasattr(axes_from, "flags") and ("vis_weight" in axes_from.flags)): weight_dtype = axes_from.flags["vis_weight"].dtype else: weight_dtype = np.float32 # Create empty datasets, and add axis attributes to them dset = data.create_dataset( "vis", shape=vis_shape, dtype=np.complex64, distributed=distributed, distributed_axis=distributed_axis, ) dset.attrs["axis"] = vis_axis dset[:] = 0.0 dset = data.create_flag( "vis_weight", shape=vis_shape, dtype=weight_dtype, distributed=distributed, distributed_axis=distributed_axis, ) dset.attrs["axis"] = vis_axis dset[:] = 0.0 dset = data.create_flag( "inputs", shape=(data.ninput, data.ntime), dtype=np.float32, distributed=False, distributed_axis=None, ) dset.attrs["axis"] = np.array(["input", "time"]) dset[:] = 0.0 dset = data.create_dataset( "gain", shape=(data.nfreq, data.ninput, data.ntime), dtype=np.complex64, distributed=distributed, distributed_axis=distributed_axis, ) dset.attrs["axis"] = np.array(["freq", "input", "time"]) dset[:] = 0.0 return data
def __init__(self, *args, **kwargs): # Pull out the values of needed arguments axes_from = kwargs.pop("axes_from", None) attrs_from = kwargs.pop("attrs_from", None) dist = kwargs.pop("distributed", True) comm = kwargs.pop("comm", None) self.allow_chunked = kwargs.pop("allow_chunked", False) # Run base initialiser memh5.BasicCont.__init__(self, distributed=dist, comm=comm) # Check to see if this call looks like it was called like # memh5.MemDiskGroup would have been. If it is, we're probably trying to # create a bare container, so don't initialise any datasets. This # behaviour is needed to support tod.concatenate if len(args) or "data_group" in kwargs: return # Create axis entries for axis in self.axes: axis_map = None # Check if axis is specified in initialiser if axis in kwargs: # If axis is an integer, turn into an arange as a default definition if isinstance(kwargs[axis], int): axis_map = np.arange(kwargs[axis]) else: axis_map = kwargs[axis] # If not set in the arguments copy from another object if set elif axes_from is not None and axis in axes_from.index_map: axis_map = axes_from.index_map[axis] # Set the index_map[axis] if we have a definition, otherwise throw an error if axis_map is not None: self.create_index_map(axis, axis_map) else: raise RuntimeError("No definition of axis %s supplied." % axis) reverse_map_stack = None # Create reverse map if "reverse_map_stack" in kwargs: # If axis is an integer, turn into an arange as a default definition if isinstance(kwargs["reverse_map_stack"], int): reverse_map_stack = np.arange(kwargs["reverse_map_stack"]) else: reverse_map_stack = kwargs["reverse_map_stack"] # If not set in the arguments copy from another object if set elif axes_from is not None and "stack" in axes_from.reverse_map: reverse_map_stack = axes_from.reverse_map["stack"] # Set the reverse_map['stack'] if we have a definition, # otherwise do NOT throw an error, errors are thrown in # classes that actually need a reverse stack if reverse_map_stack is not None: self.create_reverse_map("stack", reverse_map_stack) # Iterate over datasets and initialise any that specify it for name, spec in self.dataset_spec.items(): if "initialise" in spec and spec["initialise"]: self.add_dataset(name) # Copy over attributes if attrs_from is not None: # Copy attributes from container root memh5.copyattrs(attrs_from.attrs, self.attrs) # Copy attributes over from any common datasets for name in self.dataset_spec.keys(): if name in self.datasets and name in attrs_from.datasets: memh5.copyattrs( attrs_from.datasets[name].attrs, self.datasets[name].attrs ) # Make sure that the __memh5_subclass attribute is accurate clspath = self.__class__.__module__ + "." + self.__class__.__name__ clsattr = self.attrs.get("__memh5_subclass", None) if clsattr and (clsattr != clspath): self.attrs["__memh5_subclass"] = clspath
def separate_pol_and_bl(self, keep_dist_axis=False): """Separate baseline axis to polarization and baseline. This will create and return a Timestream container holding the polarization and baseline separated data. Parameters ---------- keep_dist_axis : bool, optional Whether to redistribute main data to the original dist axis if the dist axis has changed during the operation. Default False. """ # if dist axis is baseline, redistribute it along time original_dist_axis = self.main_data_dist_axis if 'baseline' == self.main_data_axes[original_dist_axis]: keep_dist_axis = False # can not keep dist axis in this case self.redistribute(0) # create a Timestream container to hold the pol and bl separated data ts = timestream.Timestream(dist_axis=self.main_data_dist_axis, comm=self.comm) feedno = sorted(self['feedno'][:].tolist()) xchans = [self['channo'][feedno.index(fd)][0] for fd in feedno] ychans = [self['channo'][feedno.index(fd)][1] for fd in feedno] nfeed = len(feedno) xx_pairs = [(xchans[i], xchans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed)] yy_pairs = [(ychans[i], ychans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed)] xy_pairs = [(xchans[i], ychans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed)] yx_pairs = [(ychans[i], xchans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed)] blorder = [tuple(bl) for bl in self['blorder']] conj_blorder = [tuple(bl[::-1]) for bl in self['blorder']] def _get_ind(chp): try: return False, blorder.index(chp) except ValueError: return True, conj_blorder.index(chp) # xx xx_list = [_get_ind(chp) for chp in xx_pairs] xx_inds = [ind for (cj, ind) in xx_list] xx_conj = [cj for (cj, ind) in xx_list] # yy yy_list = [_get_ind(chp) for chp in yy_pairs] yy_inds = [ind for (cj, ind) in yy_list] yy_conj = [cj for (cj, ind) in yy_list] # xy xy_list = [_get_ind(chp) for chp in xy_pairs] xy_inds = [ind for (cj, ind) in xy_list] xy_conj = [cj for (cj, ind) in xy_list] # yx yx_list = [_get_ind(chp) for chp in yx_pairs] yx_inds = [ind for (cj, ind) in yx_list] yx_conj = [cj for (cj, ind) in yx_list] # create a MPIArray to hold the pol and bl separated vis rvis = self.main_data.local_data shp = rvis.shape[:2] + (4, len(xx_inds)) vis = np.empty(shp, dtype=rvis.dtype) vis[:, :, 0] = np.where(xx_conj, rvis[:, :, xx_inds].conj(), rvis[:, :, xx_inds]) # xx vis[:, :, 1] = np.where(yy_conj, rvis[:, :, yy_inds].conj(), rvis[:, :, yy_inds]) # yy vis[:, :, 2] = np.where(xy_conj, rvis[:, :, xy_inds].conj(), rvis[:, :, xy_inds]) # xy vis[:, :, 3] = np.where(yx_conj, rvis[:, :, yx_inds].conj(), rvis[:, :, yx_inds]) # yx vis = mpiarray.MPIArray.wrap(vis, axis=self.main_data_dist_axis, comm=self.comm) # create main data ts.create_main_data(vis) # copy attrs from rt memh5.copyattrs(self.main_data.attrs, ts.main_data.attrs) # create attrs of this dataset ts.main_data.attrs[ 'dimname'] = 'Time, Frequency, Polarization, Baseline' # create a MPIArray to hold the pol and bl separated vis_mask rvis_mask = self['vis_mask'].local_data shp = rvis_mask.shape[:2] + (4, len(xx_inds)) vis_mask = np.empty(shp, dtype=rvis_mask.dtype) vis_mask[:, :, 0] = rvis_mask[:, :, xx_inds] # xx vis_mask[:, :, 1] = rvis_mask[:, :, yy_inds] # yy vis_mask[:, :, 2] = rvis_mask[:, :, xy_inds] # xy vis_mask[:, :, 3] = rvis_mask[:, :, yx_inds] # yx vis_mask = mpiarray.MPIArray.wrap(vis_mask, axis=self.main_data_dist_axis, comm=self.comm) # create vis_mask axis_order = ts.main_axes_ordered_datasets[ts.main_data_name] ts.create_main_axis_ordered_dataset(axis_order, 'vis_mask', vis_mask, axis_order) # create other datasets needed # pol ordered dataset p = self.pol_dict ts.create_pol_ordered_dataset('pol', data=np.array( [p['xx'], p['yy'], p['xy'], p['yx']], dtype='i4')) ts['pol'].attrs['pol_type'] = 'linear' # bl ordered dataset blorder = np.array([[feedno[i], feedno[j]] for i in xrange(nfeed) for j in xrange(i, nfeed)]) ts.create_bl_ordered_dataset('blorder', data=blorder) # copy attrs of this dset memh5.copyattrs(self['blorder'].attrs, ts['blorder'].attrs) # other bl ordered dataset if len( set(self.bl_ordered_datasets.keys()) - {'vis', 'vis_mask', 'blorder', 'true_blorder', 'bl_pol'}) > 0: raise RuntimeError('Should not have other bl_ordered_datasets %s' % (set(self.bl_ordered_datasets.keys()) - {'vis', 'vis_mask', 'blorder'})) # copy other attrs for attrs_name, attrs_value in self.attrs.iteritems(): if attrs_name not in self.time_ordered_attrs: ts.attrs[attrs_name] = attrs_value # copy other datasets for dset_name, dset in self.iteritems(): if dset_name == self.main_data_name or dset_name == 'vis_mask': # already created above continue elif dset_name in self.main_axes_ordered_datasets.keys(): if dset_name in self.bl_ordered_datasets.keys(): # already created above continue else: axis_order = self.main_axes_ordered_datasets[dset_name] axis = None for order in axis_order: if isinstance(order, int): axis = order if axis is None: raise RuntimeError( 'Invalid axis order %s for dataset %s' % (axis_order, dset_name)) ts.create_main_axis_ordered_dataset( axis, dset_name, dset.data, axis_order) elif dset_name in self.time_ordered_datasets.keys(): axis_order = self.time_ordered_datasets[dset_name] ts.create_time_ordered_dataset(dset_name, dset.data, axis_order) elif dset_name in self.feed_ordered_datasets.keys(): if dset_name == 'channo': # channo no useful for Timestream continue else: axis_order = self.feed_ordered_datasets[dset_name] ts.create_feed_ordered_dataset(dset_name, dset.data, axis_order) else: if dset.common: ts.create_dataset(dset_name, data=dset) elif dset.distributed: ts.create_dataset(dset_name, data=dset.data, shape=dset.shape, dtype=dset.dtype, distributed=True, distributed_axis=dset.distributed_axis) # copy attrs of this dset memh5.copyattrs(dset.attrs, ts[dset_name].attrs) # redistribute self to original axis if keep_dist_axis: self.redistribute(original_dist_axis) return ts
def separate_pol_and_bl(self, keep_dist_axis=False): """Separate baseline axis to polarization and baseline. This will create and return a Timestream container holding the polarization and baseline separated data. Parameters ---------- keep_dist_axis : bool, optional Whether to redistribute main data to the original dist axis if the dist axis has changed during the operation. Default False. """ # if dist axis is baseline, redistribute it along time original_dist_axis = self.main_data_dist_axis if 'baseline' == self.main_data_axes[original_dist_axis]: keep_dist_axis = False # can not keep dist axis in this case self.redistribute(0) # create a Timestream container to hold the pol and bl separated data ts = timestream.Timestream(dist_axis=self.main_data_dist_axis, comm=self.comm) feedno = sorted(self['feedno'][:].tolist()) xchans = [ self['channo'][feedno.index(fd)][0] for fd in feedno ] ychans = [ self['channo'][feedno.index(fd)][1] for fd in feedno ] nfeed = len(feedno) xx_pairs = [ (xchans[i], xchans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed) ] yy_pairs = [ (ychans[i], ychans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed) ] xy_pairs = [ (xchans[i], ychans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed) ] yx_pairs = [ (ychans[i], xchans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed) ] blorder = [ tuple(bl) for bl in self['blorder'] ] conj_blorder = [ tuple(bl[::-1]) for bl in self['blorder'] ] def _get_ind(chp): try: return False, blorder.index(chp) except ValueError: return True, conj_blorder.index(chp) # xx xx_list = [ _get_ind(chp) for chp in xx_pairs ] xx_inds = [ ind for (cj, ind) in xx_list ] xx_conj = [ cj for (cj, ind) in xx_list ] # yy yy_list = [ _get_ind(chp) for chp in yy_pairs ] yy_inds = [ ind for (cj, ind) in yy_list ] yy_conj = [ cj for (cj, ind) in yy_list ] # xy xy_list = [ _get_ind(chp) for chp in xy_pairs ] xy_inds = [ ind for (cj, ind) in xy_list ] xy_conj = [ cj for (cj, ind) in xy_list ] # yx yx_list = [ _get_ind(chp) for chp in yx_pairs ] yx_inds = [ ind for (cj, ind) in yx_list ] yx_conj = [ cj for (cj, ind) in yx_list ] # create a MPIArray to hold the pol and bl separated vis rvis = self.main_data.local_data shp = rvis.shape[:2] + (4, len(xx_inds)) vis = np.empty(shp, dtype=rvis.dtype) vis[:, :, 0] = np.where(xx_conj, rvis[:, :, xx_inds].conj(), rvis[:, :, xx_inds]) # xx vis[:, :, 1] = np.where(yy_conj, rvis[:, :, yy_inds].conj(), rvis[:, :, yy_inds]) # yy vis[:, :, 2] = np.where(xy_conj, rvis[:, :, xy_inds].conj(), rvis[:, :, xy_inds]) # xy vis[:, :, 3] = np.where(yx_conj, rvis[:, :, yx_inds].conj(), rvis[:, :, yx_inds]) # yx vis = mpiarray.MPIArray.wrap(vis, axis=self.main_data_dist_axis, comm=self.comm) # create main data ts.create_main_data(vis) # copy attrs from rt memh5.copyattrs(self.main_data.attrs, ts.main_data.attrs) # create attrs of this dataset ts.main_data.attrs['dimname'] = 'Time, Frequency, Polarization, Baseline' # create a MPIArray to hold the pol and bl separated vis_mask rvis_mask = self['vis_mask'].local_data shp = rvis_mask.shape[:2] + (4, len(xx_inds)) vis_mask = np.empty(shp, dtype=rvis_mask.dtype) vis_mask[:, :, 0] = rvis_mask[:, :, xx_inds] # xx vis_mask[:, :, 1] = rvis_mask[:, :, yy_inds] # yy vis_mask[:, :, 2] = rvis_mask[:, :, xy_inds] # xy vis_mask[:, :, 3] = rvis_mask[:, :, yx_inds] # yx vis_mask = mpiarray.MPIArray.wrap(vis_mask, axis=self.main_data_dist_axis, comm=self.comm) # create vis_mask axis_order = ts.main_axes_ordered_datasets[ts.main_data_name] ts.create_main_axis_ordered_dataset(axis_order, 'vis_mask', vis_mask, axis_order) # create other datasets needed # pol ordered dataset ts.create_pol_ordered_dataset('pol', data=np.array(['xx', 'yy', 'xy', 'yx'])) ts['pol'].attrs['pol_type'] = 'linear' # bl ordered dataset blorder = np.array([ [feedno[i], feedno[j]] for i in xrange(nfeed) for j in xrange(i, nfeed) ]) ts.create_bl_ordered_dataset('blorder', data=blorder) # copy attrs of this dset memh5.copyattrs(self['blorder'].attrs, ts['blorder'].attrs) # other bl ordered dataset if len(set(self.bl_ordered_datasets.keys()) - {'vis', 'vis_mask', 'blorder'}) > 0: raise RuntimeError('Should not have other bl_ordered_datasets %s' % (set(self.bl_ordered_datasets.keys()) - {'vis', 'vis_mask', 'blorder'})) # copy other attrs for attrs_name, attrs_value in self.attrs.iteritems(): if attrs_name not in self.time_ordered_attrs: ts.attrs[attrs_name] = attrs_value # copy other datasets for dset_name, dset in self.iteritems(): if dset_name == self.main_data_name or dset_name == 'vis_mask': # already created above continue elif dset_name in self.main_axes_ordered_datasets.keys(): if dset_name in self.bl_ordered_datasets.keys(): # already created above continue else: axis_order = self.main_axes_ordered_datasets[dset_name] axis = None for order in axis_order: if isinstance(order, int): axis = order if axis is None: raise RuntimeError('Invalid axis order %s for dataset %s' % (axis_order, dset_name)) ts.create_main_axis_ordered_dataset(axis, dset_name, dset.data, axis_order) elif dset_name in self.time_ordered_datasets.keys(): axis_order = self.time_ordered_datasets[dset_name] ts.create_time_ordered_dataset(dset_name, dset.data, axis_order) elif dset_name in self.feed_ordered_datasets.keys(): if dset_name == 'channo': # channo no useful for Timestream continue else: axis_order = self.feed_ordered_datasets[dset_name] ts.create_feed_ordered_dataset(dset_name, dset.data, axis_order) else: if dset.common: ts.create_dataset(dset_name, data=dset) elif dset.distributed: ts.create_dataset(dset_name, data=dset.data, shape=dset.shape, dtype=dset.dtype, distributed=True, distributed_axis=dset.distributed_axis) # copy attrs of this dset memh5.copyattrs(dset.attrs, ts[dset_name].attrs) # redistribute self to original axis if keep_dist_axis: self.redistribute(original_dist_axis) return ts
def process_gated_data(data, only_off=False): """ Processes fast gating data and turns it into gated form. Parameters ---------- data : andata.CorrData Correlator data with noise source switched synchronously with the integration. only_off : boolean Only return the off dataset. Do not return gated datasets. Returns ------- newdata : andata.CorrData Correlator data folded on the noise source. Comments -------- For now the correlator only supports fast gating with one gate (gated_vis1) and 50% duty cycle. The vis dataset contains on+off and the gated_vis1 contains on-off. This function returns a new andata object with vis containing the off data only and gated_vis1 as in the original andata object. The attribute 'gpu.gpu_intergration_period' is divided by 2 since during an integration half of the frames have on data. """ # Make sure we're distributed over something other than time data.redistribute("freq") # Get distribution parameters dist = isinstance(data.vis, memh5.MemDatasetDistributed) comm = data.vis.comm # Construct new CorrData object for gated dataset newdata = andata.CorrData.__new__(andata.CorrData) if dist: memh5.BasicCont.__init__(newdata, distributed=dist, comm=comm) else: memh5.BasicCont.__init__(newdata, distributed=dist) memh5.copyattrs(data.attrs, newdata.attrs) # Add index maps to newdata newdata.create_index_map("freq", data.index_map["freq"]) newdata.create_index_map("prod", data.index_map["prod"]) newdata.create_index_map("input", data.input) newdata.create_index_map("time", data.index_map["time"]) # Add datasets (for noise OFF) to newdata # Extract the noise source off data vis_off = 0.5 * ( data.vis[:].view(np.ndarray) - data["gated_vis1"][:].view(np.ndarray) ) # Turn vis_off into MPIArray if we are distributed if dist: vis_off = mpiarray.MPIArray.wrap(vis_off, axis=0, comm=comm) # Add new visibility dataset vis_dset = newdata.create_dataset("vis", data=vis_off, distributed=dist) memh5.copyattrs(data.vis.attrs, vis_dset.attrs) # Add gain dataset (if exists) for vis_off. # These will be the gains for both the noise on ON and OFF data if "gain" in data: gain = data.gain[:].view(np.ndarray) # Turn gain into MPIArray if we are distributed if dist: gain = mpiarray.MPIArray.wrap(gain, axis=0, comm=comm) gain_dset = newdata.create_dataset("gain", data=gain, distributed=dist) memh5.copyattrs(data.gain.attrs, gain_dset.attrs) # Pull out weight dataset if it exists. # These will be the weights for both the noise on ON and OFF data if "vis_weight" in data.flags: vis_weight = data.weight[:].view(np.ndarray) # Turn vis_weight into MPIArray if we are distributed if dist: vis_weight = mpiarray.MPIArray.wrap(vis_weight, axis=0, comm=comm) vis_weight_dset = newdata.create_flag( "vis_weight", data=vis_weight, distributed=dist ) memh5.copyattrs(data.weight.attrs, vis_weight_dset.attrs) # Add gated dataset (only gated_vis1 currently supported by correlator # with 50% duty cycle) if not only_off: gated_vis1 = data["gated_vis1"][:].view(np.ndarray) # Turn gated_vis1 into MPIArray if we are distributed if dist: gated_vis1 = mpiarray.MPIArray.wrap(gated_vis1, axis=0, comm=comm) gate_dset = newdata.create_dataset( "gated_vis1", data=gated_vis1, distributed=dist ) memh5.copyattrs(data["gated_vis1"].attrs, gate_dset.attrs) # The CHIME pipeline uses gpu.gpu_intergration_period to estimate the integration period # for both the on and off gates. That number has to be changed (divided by 2) since # with fast gating one integration period has 1/2 of data for the on gate and 1/2 # for the off gate newdata.attrs["gpu.gpu_intergration_period"] = ( data.attrs["gpu.gpu_intergration_period"] // 2 ) return newdata
def process_synced_data(data, ni_params=None, only_off=False): """Turn a synced noise source observation into gated form. This will decimate the visibility to only the noise source off bins, and will add 1 or more gated on-off dataset according to the specification in doclib:5. Parameters ---------- data : andata.CorrData Correlator data with noise source switched synchronously with the integration. ni_params : dict Dictionary with the noise injection parameters. Optional for data after ctime=1435349183. ni_params has the following keys - ni_period: Noise injection period in GPU integrations. It is assummed to be the same for all the enabled noise sources - ni_on_bins: A list of lists, one per enabled noise source, with the corresponding ON gates (within a period). For each noise source, the list contains the indices of the time frames for which the source is ON. Example: For 3 GPU integration period (3 gates: 0, 1, 2), two enabled noise sources, one ON during gate 0, the other ON during gate 1, and both OFF during gate 2, then ``` ni_params = {'ni_period':3, 'ni_on_bins':[[0], [1]]} ``` only_off : boolean Only return the off dataset. Do not return gated datasets. Returns ------- newdata : andata.CorrData Correlator data folded on the noise source. Comments -------- - The function assumes that the fpga frame counter, which is used to determine the noise injection gating parameters, is unwrapped. - For noise injection data before ctime=1435349183 (i.e. for noise injection data before 20150626T200540Z_pathfinder_corr) the noise injection information is not in the headers so this function cannot be used to determine the noise injection parameters. A different method is required. Although it is recommended to check the data directly in this case, the previous version of this function assumed that ni_params = {'ni_period':2, 'ni_on_bins':[[0],]} for noise injection data before ctime=1435349183. Although this is not always true, it is true for big old datasets like pass1g. Use the value of ni_params recommended above to reproduce the results of the old function with the main old datasets. - Data (visibility, gain and weight datasets) are averaged for all the off gates within the noise source period, and also for all the on gates of each noise source. - For the time index map, only one timestamp per noise period is kept (no averaging) """ if ni_params is None: # ctime before which the noise injection information is not in the # headers so this function cannot be used to determine the noise # injection parameters. ctime_no_noise_inj_data = 1435349183 if data.index_map["time"]["ctime"][0] > ctime_no_noise_inj_data: # All the data required to figure out the noise inj gating is in # the data header try: ni_params = _find_ni_params(data) except ValueError: warn_str = ( "There are no enabled noise sources for these data. " "Returning input" ) warnings.warn(warn_str) return data else: # This is data before ctime = 1435349183. Noise injection # parameters are not in the data header. Raise error t = datetime.datetime.utcfromtimestamp(ctime_no_noise_inj_data) t_str = t.strftime("%Y %b %d %H:%M:%S UTC") err_str = ( "ni_params parameter is required for data before " "%s (ctime=%i)." % (t_str, ctime_no_noise_inj_data) ) raise Exception(err_str) if len([s for s in data.datasets.keys() if "gated_vis" in s]): # If there are datasets with gated_vis in their names then assume # this is fast gating data, where the vis dataset has on+off and # the vis_gatedxx has onxx-off. Process separatedly since in # this case the noise injection parameters are not in gpu # integration frames but in fpga frames and the gates are already # separated newdata = process_gated_data(data, only_off=only_off) else: # time bins with noise ON for each source (within a noise period) # This is a list of lists, each list corresponding to the ON time bins # for each noise source. ni_on_bins = ni_params["ni_on_bins"] # Number of enabled noise sources N_ni_sources = len(ni_on_bins) # Noise injection period (assume all sources have same period) ni_period = ni_params["ni_period"] # time bins with all noise sources off (within a noise period) ni_off_bins = np.delete(list(range(ni_period)), np.concatenate(ni_on_bins)) # Find largest number of exact noise injection periods nt = ni_period * (data.ntime // ni_period) # Make sure we're distributed over something other than time data.redistribute("freq") # Get distribution parameters dist = isinstance(data.vis, memh5.MemDatasetDistributed) comm = data.vis.comm # Construct new CorrData object for gated dataset newdata = andata.CorrData.__new__(andata.CorrData) if dist: memh5.BasicCont.__init__(newdata, distributed=dist, comm=comm) else: memh5.BasicCont.__init__(newdata, distributed=dist) memh5.copyattrs(data.attrs, newdata.attrs) # Add index maps to newdata newdata.create_index_map("freq", data.index_map["freq"]) newdata.create_index_map("prod", data.index_map["prod"]) newdata.create_index_map("input", data.input) # Extract timestamps for OFF bins. Only one timestamp per noise period is # kept. These will be the timestamps for both the noise on ON and OFF data time = data.index_map["time"][ni_off_bins[0] : nt : ni_period] folding_period = time["ctime"][1] - time["ctime"][0] folding_start = time["ctime"][0] # Add index map for noise OFF timestamps. newdata.create_index_map("time", time) # Add datasets (for noise OFF) to newdata # Extract the noise source off data if len(ni_off_bins) > 1: # Average all time bins with noise OFF within a period vis_sky = [data.vis[..., gate:nt:ni_period] for gate in ni_off_bins] vis_sky = np.mean(vis_sky, axis=0) else: vis_sky = data.vis[..., ni_off_bins[0] : nt : ni_period] # Turn vis_sky into MPIArray if we are distributed if dist: vis_sky = mpiarray.MPIArray.wrap(vis_sky, axis=0, comm=comm) # Add new visibility dataset vis_dset = newdata.create_dataset("vis", data=vis_sky, distributed=dist) memh5.copyattrs(data.vis.attrs, vis_dset.attrs) # Add gain dataset (if exists) for noise OFF data. # Gain dataset also averaged (within a period) # These will be the gains for both the noise on ON and OFF data if "gain" in data: if len(ni_off_bins) > 1: gain = [data.gain[..., gate:nt:ni_period] for gate in ni_off_bins] gain = np.mean(gain, axis=0) else: gain = data.gain[..., ni_off_bins[0] : nt : ni_period] # Turn gain into MPIArray if we are distributed if dist: gain = mpiarray.MPIArray.wrap(gain, axis=0, comm=comm) # Add new gain dataset gain_dset = newdata.create_dataset("gain", data=gain, distributed=dist) memh5.copyattrs(data.gain.attrs, gain_dset.attrs) # Pull out weight dataset if it exists. # vis_weight dataset also averaged (within a period) # These will be the weights for both the noise on ON and OFF data if "vis_weight" in data.flags: if len(ni_off_bins) > 1: vis_weight = [ data.weight[..., gate:nt:ni_period] for gate in ni_off_bins ] vis_weight = np.mean(vis_weight, axis=0) else: vis_weight = data.weight[..., ni_off_bins[0] : nt : ni_period] # Turn vis_weight into MPIArray if we are distributed if dist: vis_weight = mpiarray.MPIArray.wrap(vis_weight, axis=0, comm=comm) # Add new vis_weight dataset vis_weight_dset = newdata.create_flag( "vis_weight", data=vis_weight, distributed=dist ) memh5.copyattrs(data.weight.attrs, vis_weight_dset.attrs) # Add gated datasets for each noise source: if not only_off: for i in range(N_ni_sources): # Construct the noise source only data vis_noise = [data.vis[..., gate:nt:ni_period] for gate in ni_on_bins[i]] vis_noise = np.mean(vis_noise, axis=0) # Averaging vis_noise -= vis_sky # Subtracting sky contribution # Turn vis_noise into MPIArray if we are distributed if dist: vis_noise = mpiarray.MPIArray.wrap(vis_noise, axis=0, comm=comm) # Add noise source dataset gate_dset = newdata.create_dataset( "gated_vis{0}".format(i + 1), data=vis_noise, distributed=dist ) gate_dset.attrs["axis"] = np.array( ["freq", "prod", "gated_time{0}".format(i + 1)] ) gate_dset.attrs["folding_period"] = folding_period gate_dset.attrs["folding_start"] = folding_start # Construct array of gate weights (sum = 0) gw = np.zeros(ni_period, dtype=np.float) gw[ni_off_bins] = -1.0 / len(ni_off_bins) gw[ni_on_bins[i]] = 1.0 / len(ni_on_bins[i]) gate_dset.attrs["gate_weight"] = gw return newdata