Exemplo n.º 1
0
def _cut_non_chime(data, visi, chan_array, inputs=None):
    """
    Remove non CHIME channels (noise injection, RFI antenna,
    26m, etc...) from visibility. Also remove channels marked
    as powered-off in layout DB.
    """

    # Map of channels to corr. inputs:
    input_map = data.input
    tmstp = data.index_map["time"]["ctime"]  # time stamp
    # Datetime halfway through data:
    half_time = ch_eph.unix_to_datetime(tmstp[int(len(tmstp) // 2)])
    # Get information on correlator inputs, if not already supplied
    if inputs is None:
        inputs = tools.get_correlator_inputs(half_time)
    # Reorder inputs to have sema order as input map (and data)
    inputs = tools.reorder_correlator_inputs(input_map, inputs)
    # Get noise source channel index:

    # Test if inputs are attached to CHIME antenna and powered on:
    pwds = tools.is_chime_on(inputs)

    for ii in range(len(inputs)):
        #        if ( (not tools.is_chime(inputs[ii]))
        if (not pwds[ii]) and (ii in chan_array):
            # Remove non-CHIME-on channels from visibility matrix...
            idx = np.where(chan_array == ii)[0][0]  # index of channel
            visi = np.delete(visi, idx, axis=0)
            # ...and from product array:
            chan_array = np.delete(chan_array, idx, axis=0)

    return visi, chan_array
Exemplo n.º 2
0
 def sat_phase(self,freq,offset=(0,0)):
     self.layout = array(tools.get_correlator_inputs(self.dt))
     output = []
     for baseline in self.bl:
         bl_vector = self.get_bl(*baseline)
         bdots = dot(self.all_coords(offset), bl_vector)
         output.append(2*constants.pi*bdots*(freq*10**6)/constants.c)
     return output
Exemplo n.º 3
0
    def set_metadata(self, tms, input_map):
        """Sets self.corr_inputs, self.pwds, self.pstns, self.p1_idx, self.p2_idx"""
        from ch_util import tools

        # Get CHIME ON channels:
        half_time = ephemeris.unix_to_datetime(tms[int(len(tms) // 2)])
        corr_inputs = tools.get_correlator_inputs(half_time)
        self.corr_inputs = tools.reorder_correlator_inputs(
            input_map, corr_inputs)
        pwds = tools.is_chime_on(
            self.corr_inputs)  # Which inputs are CHIME ON antennas
        self.pwds = np.array(pwds, dtype=bool)
        # Get cylinders and polarizations
        self.pstns, self.p1_idx, self.p2_idx = self.get_pos_pol(
            self.corr_inputs, self.pwds)
Exemplo n.º 4
0
def gen_inp(nfeed=256):
    """ Generate input information for feeds

    Parameters
    ----------
    feeds : list
         Feeds whose input info is needed
    nfeeds : int
         Number of feeds in total

    Returns
    -------
    corrinput_real : 
         All 256 inputs
    inpx : 
         Only x feeds
    inpy : 
         Only y feeds
    """

    # Assumes a standard layout for 128 feeds on each cyl
    xfeeds = range(nfeed / 4) + range(2 * nfeed / 4, 3 * nfeed / 4)
    yfeeds = range(nfeed / 4, 2 * nfeed / 4) + range(3 * nfeed / 4,
                                                     4 * nfeed / 4)

    xcorrs = []
    ycorrs = []

    for ii in range(nfeed / 2):
        for jj in range(ii, nfeed / 2):
            xcorrs.append(misc.feed_map(xfeeds[ii], xfeeds[jj], nfeed))
            ycorrs.append(misc.feed_map(yfeeds[ii], yfeeds[jj], nfeed))

    corrinputs = tools.get_correlator_inputs(\
        datetime.datetime(2015, 6, 1, 0, 0, 0), correlator='K7BP16-0004')

    # Need to rearrange to match order in the correlated data
    corrinput_real = rearrange_list(corrinputs, nfeeds=256)

    inpx = []
    inpy = []

    for i in range(nfeed / 2):
        inpx.append(corrinput_real[xfeeds[i]])
        inpy.append(corrinput_real[yfeeds[i]])

    return corrinput_real, inpx, inpy, xcorrs, ycorrs, xfeeds, yfeeds
Exemplo n.º 5
0
def gen_inp(nfeed=256):
    """ Generate input information for feeds

    Parameters
    ----------
    feeds : list
         Feeds whose input info is needed
    nfeeds : int
         Number of feeds in total

    Returns
    -------
    corrinput_real : 
         All 256 inputs
    inpx : 
         Only x feeds
    inpy : 
         Only y feeds
    """

    # Assumes a standard layout for 128 feeds on each cyl
    xfeeds = range(nfeed/4) + range(2 * nfeed/4, 3 * nfeed/4)
    yfeeds = range(nfeed/4, 2 * nfeed/4) + range(3 * nfeed/4, 4 * nfeed/4)

    xcorrs = []
    ycorrs = []

    for ii in range(nfeed/2):
        for jj in range(ii, nfeed/2):
            xcorrs.append(misc.feed_map(xfeeds[ii], xfeeds[jj], nfeed))
            ycorrs.append(misc.feed_map(yfeeds[ii], yfeeds[jj], nfeed))

    corrinputs = tools.get_correlator_inputs(\
        datetime.datetime(2015, 6, 1, 0, 0, 0), correlator='K7BP16-0004')

    # Need to rearrange to match order in the correlated data
    corrinput_real = rearrange_list(corrinputs, nfeeds=256)

    inpx = []
    inpy = []
    
    for i in range(nfeed/2):
        inpx.append(corrinput_real[xfeeds[i]])
        inpy.append(corrinput_real[yfeeds[i]])

    return corrinput_real, inpx, inpy, xcorrs, ycorrs, xfeeds, yfeeds
Exemplo n.º 6
0
    def get_prod_sel(self, data):
        """ """
        from ch_util import tools

        input_map = data.input
        tms = data.time
        half_time = ephemeris.unix_to_datetime(tms[int(len(tms) // 2)])
        corr_inputs = tools.get_correlator_inputs(half_time)
        corr_inputs = tools.reorder_correlator_inputs(input_map, corr_inputs)
        pwds = tools.is_chime_on(
            corr_inputs)  # Which inputs are CHIME ON antennas

        wchp1, wchp2, echp1, echp2 = self.get_cyl_pol(corr_inputs, pwds)

        # Ensure base channels are CHIME and ON
        while not pwds[np.where(input_map["chan_id"] == self.bswp1)[0][0]]:
            self.bswp1 += 1
        while not pwds[np.where(input_map["chan_id"] == self.bswp2)[0][0]]:
            self.bswp2 += 1
        while not pwds[np.where(input_map["chan_id"] == self.bsep1)[0][0]]:
            self.bsep1 += 1
        while not pwds[np.where(input_map["chan_id"] == self.bsep2)[0][0]]:
            self.bsep2 += 1

        prod_sel = []
        for (ii, prod) in enumerate(data.prod):
            add_prod = False
            add_prod = add_prod or (
                (prod[0] == self.bswp1 and prod[1] in echp1) or
                (prod[1] == self.bswp1 and prod[0] in echp1))
            add_prod = add_prod or (
                (prod[0] == self.bswp2 and prod[1] in echp2) or
                (prod[1] == self.bswp2 and prod[0] in echp2))
            add_prod = add_prod or (
                (prod[0] == self.bsep1 and prod[1] in wchp1) or
                (prod[1] == self.bsep1 and prod[0] in wchp1))
            add_prod = add_prod or (
                (prod[0] == self.bsep2 and prod[1] in wchp2) or
                (prod[1] == self.bsep2 and prod[0] in wchp2))

            if add_prod:
                prod_sel.append(ii)

        prod_sel.sort()

        return prod_sel, pwds
Exemplo n.º 7
0
    def _load_layout(self):
        """Load the CHIME/Pathfinder layout from the database.

        Generally this routine shouldn't be called directly. Use
        :method:`CHIME.from_layout` or configure from a YAML file.
        """
        if self.layout is None:
            raise Exception("Layout attributes not set.")

        # Fetch feed layout from database
        feeds = tools.get_correlator_inputs(self.layout, self.correlator)

        if mpiutil.size > 1:
            feeds = mpiutil.world.bcast(feeds, root=0)

        if self.skip_non_chime:
            raise Exception("Not supported.")

        self._feeds = feeds
Exemplo n.º 8
0
 def expected_phase(self):
     output = []
     self.layout = array(tools.get_correlator_inputs(self.dt))
     if not self.set_up:
         self.transit_time()
         self.read_transit_data()
     if self.bl is not None:
         for baseline in self.bl:
             bl_vector = self.get_bl(*baseline)
             freqs = array([i[0] for i in self.data.freq])
             bdots = dot(self.transit_coords(), bl_vector)
             output.append(2*constants.pi*bdots*(freq*10**6)/constants.c)
     else:
         for ii in xrange(256):
             for jj in xrange(i,256):
                 bl_vector = self.get_bl(ii,jj)
                 freqs = array([i[0] for i in self.data.freq])
                 bdots = dot(self.transit_coords(), bl_vector)
                 output.append(2*constants.pi*bdots*(freqs*10**6)/constants.c)
     return output
Exemplo n.º 9
0
    def next(self, ts):
        """Generate an input description from the timestream passed in.

        Parameters
        ----------
        ts : andata.CorrData
            Timestream container.

        Returns
        -------
        inputs : list of :class:`CorrInput`
            A list of describing the inputs as they are in the file.
        """

        # Fetch from the cache if we can
        if self.cache and self._cached_inputs:
            self.log.debug("Using cached inputs.")
            return self._cached_inputs

        inputs = None

        if mpiutil.rank0:

            # Get the datetime of the middle of the file
            time = ephemeris.unix_to_datetime(0.5 * (ts.time[0] + ts.time[-1]))
            inputs = tools.get_correlator_inputs(time)

            inputs = tools.reorder_correlator_inputs(ts.index_map["input"], inputs)

        # Broadcast input description to all ranks
        inputs = mpiutil.world.bcast(inputs, root=0)

        # Save into the cache for the next iteration
        if self.cache:
            self._cached_inputs = inputs

        # Make sure all nodes have container before return
        mpiutil.world.Barrier()

        return inputs
Exemplo n.º 10
0
def offline_point_source_calibration(file_list,
                                     source,
                                     inputmap=None,
                                     start=None,
                                     stop=None,
                                     physical_freq=None,
                                     tcorr=None,
                                     logging_params=DEFAULT_LOGGING,
                                     **kwargs):
    # Load config
    config = DEFAULTS.deepcopy()
    config.merge(NameSpace(kwargs))

    # Setup logging
    log.setup_logging(logging_params)
    mlog = log.get_logger(__name__)

    mlog.info("ephemeris file: %s" % ephemeris.__file__)

    # Set the model to use
    fitter_function = utils.fit_point_source_transit
    model_function = utils.model_point_source_transit

    farg = inspect.getargspec(fitter_function)
    defaults = {
        key: val
        for key, val in zip(farg.args[-len(farg.defaults):], farg.defaults)
    }
    poly_deg_amp = kwargs.get('poly_deg_amp', defaults['poly_deg_amp'])
    poly_deg_phi = kwargs.get('poly_deg_phi', defaults['poly_deg_phi'])
    poly_type = kwargs.get('poly_type', defaults['poly_type'])

    param_name = ([
        '%s_poly_amp_coeff%d' % (poly_type, cc)
        for cc in range(poly_deg_amp + 1)
    ] + [
        '%s_poly_phi_coeff%d' % (poly_type, cc)
        for cc in range(poly_deg_phi + 1)
    ])

    model_kwargs = [('poly_deg_amp', poly_deg_amp),
                    ('poly_deg_phi', poly_deg_phi), ('poly_type', poly_type)]
    model_name = '.'.join(
        [getattr(model_function, key) for key in ['__module__', '__name__']])

    tval = {}

    # Set where to evaluate gain
    ha_eval_str = ['raw_transit']

    if config.multi_sample:
        ha_eval_str += ['transit', 'peak']
        ha_eval = [0.0, None]
        fitslc = slice(1, 3)

    ind_eval = ha_eval_str.index(config.evaluate_gain_at)

    # Determine dimensions
    direction = ['amp', 'phi']
    nparam = len(param_name)
    ngain = len(ha_eval_str)
    ndir = len(direction)

    # Determine frequencies
    data = andata.CorrData.from_acq_h5(file_list,
                                       datasets=(),
                                       start=start,
                                       stop=stop)
    freq = data.freq

    if physical_freq is not None:
        index_freq = np.array(
            [np.argmin(np.abs(ff - freq)) for ff in physical_freq])
        freq_sel = utils.convert_to_slice(index_freq)
        freq = freq[index_freq]
    else:
        index_freq = np.arange(freq.size)
        freq_sel = None

    nfreq = freq.size

    # Compute flux of source
    inv_rt_flux_density = tools.invert_no_zero(
        np.sqrt(FluxCatalog[source].predict_flux(freq)))

    # Read in the eigenvaluess for all frequencies
    data = andata.CorrData.from_acq_h5(file_list,
                                       datasets=['erms', 'eval'],
                                       freq_sel=freq_sel,
                                       start=start,
                                       stop=stop)

    # Determine source coordinates
    this_csd = np.floor(ephemeris.unix_to_csd(np.median(data.time)))
    timestamp0 = ephemeris.transit_times(FluxCatalog[source].skyfield,
                                         ephemeris.csd_to_unix(this_csd))[0]
    src_ra, src_dec = ephemeris.object_coords(FluxCatalog[source].skyfield,
                                              date=timestamp0,
                                              deg=True)

    ra = ephemeris.lsa(data.time)
    ha = ra - src_ra
    ha = ha - (ha > 180.0) * 360.0 + (ha < -180.0) * 360.0
    ha = np.radians(ha)

    itrans = np.argmin(np.abs(ha))

    window = 0.75 * np.max(np.abs(ha))

    off_source = np.abs(ha) > window

    mlog.info("CSD %d" % this_csd)
    mlog.info("Hour angle at transit (%d of %d):  %0.2f deg   " %
              (itrans, len(ha), np.degrees(ha[itrans])))
    mlog.info("Hour angle off source: %0.2f deg" %
              np.median(np.abs(np.degrees(ha[off_source]))))

    src_dec = np.radians(src_dec)
    lat = np.radians(ephemeris.CHIMELATITUDE)

    # Determine division of frequencies
    ninput = data.ninput
    ntime = data.ntime
    nblock_freq = int(np.ceil(nfreq / float(config.nfreq_per_block)))

    # Determine bad inputs
    eps = 10.0 * np.finfo(data['erms'].dtype).eps
    good_freq = np.flatnonzero(np.all(data['erms'][:] > eps, axis=-1))
    ind_sub_freq = good_freq[slice(0, good_freq.size,
                                   max(int(good_freq.size / 10), 1))]

    tmp_data = andata.CorrData.from_acq_h5(file_list,
                                           datasets=['evec'],
                                           freq_sel=ind_sub_freq,
                                           start=start,
                                           stop=stop)
    eps = 10.0 * np.finfo(tmp_data['evec'].dtype).eps
    bad_input = np.flatnonzero(
        np.all(np.abs(tmp_data['evec'][:, 0]) < eps, axis=(0, 2)))

    input_axis = tmp_data.input.copy()

    del tmp_data

    # Query layout database for correlator inputs
    if inputmap is None:
        inputmap = tools.get_correlator_inputs(
            datetime.datetime.utcfromtimestamp(data.time[itrans]),
            correlator='chime')

    inputmap = tools.reorder_correlator_inputs(input_axis, inputmap)

    tools.change_chime_location(rotation=config.telescope_rotation)

    # Determine x and y pol index
    xfeeds = np.array([
        idf for idf, inp in enumerate(inputmap)
        if (idf not in bad_input) and tools.is_array_x(inp)
    ])
    yfeeds = np.array([
        idf for idf, inp in enumerate(inputmap)
        if (idf not in bad_input) and tools.is_array_y(inp)
    ])

    nfeed = xfeeds.size + yfeeds.size

    pol = [yfeeds, xfeeds]
    polstr = ['Y', 'X']
    npol = len(pol)

    neigen = min(max(npol, config.neigen), data['eval'].shape[1])

    phase_ref = config.phase_reference_index
    phase_ref_by_pol = [
        pol[pp].tolist().index(phase_ref[pp]) for pp in range(npol)
    ]

    # Calculate dynamic range
    eval0_off_source = np.median(data['eval'][:, 0, off_source], axis=-1)

    dyn = data['eval'][:, 1, :] * tools.invert_no_zero(
        eval0_off_source[:, np.newaxis])

    # Determine frequencies to mask
    not_rfi = np.ones((nfreq, 1), dtype=np.bool)
    if config.mask_rfi is not None:
        for frng in config.mask_rfi:
            not_rfi[:, 0] &= ((freq < frng[0]) | (freq > frng[1]))

    mlog.info("%0.1f percent of frequencies available after masking RFI." %
              (100.0 * np.sum(not_rfi, dtype=np.float32) / float(nfreq), ))

    #dyn_flg = utils.contiguous_flag(dyn > config.dyn_rng_threshold, centre=itrans)
    if source in config.dyn_rng_threshold:
        dyn_rng_threshold = config.dyn_rng_threshold[source]
    else:
        dyn_rng_threshold = config.dyn_rng_threshold.default

    mlog.info("Dynamic range threshold set to %0.1f." % dyn_rng_threshold)

    dyn_flg = dyn > dyn_rng_threshold

    # Calculate fit flag
    fit_flag = np.zeros((nfreq, npol, ntime), dtype=np.bool)
    for pp in range(npol):

        mlog.info("Dynamic Range Nsample, Pol %d:  %s" % (pp, ','.join([
            "%d" % xx for xx in np.percentile(np.sum(dyn_flg, axis=-1),
                                              [25, 50, 75, 100])
        ])))

        if config.nsigma1 is None:
            fit_flag[:, pp, :] = dyn_flg & not_rfi

        else:

            fit_window = config.nsigma1 * np.radians(
                utils.get_window(freq, pol=polstr[pp], dec=src_dec, deg=True))

            win_flg = np.abs(ha)[np.newaxis, :] <= fit_window[:, np.newaxis]

            fit_flag[:, pp, :] = (dyn_flg & win_flg & not_rfi)

    # Calculate base error
    base_err = data['erms'][:, np.newaxis, :]

    # Check for sign flips
    ref_resp = andata.CorrData.from_acq_h5(file_list,
                                           datasets=['evec'],
                                           input_sel=config.eigen_reference,
                                           freq_sel=freq_sel,
                                           start=start,
                                           stop=stop)['evec'][:, 0:neigen,
                                                              0, :]

    sign0 = 1.0 - 2.0 * (ref_resp.real < 0.0)

    # Check that we have the correct reference feed
    if np.any(np.abs(ref_resp.imag) > 0.0):
        ValueError("Reference feed %d is incorrect." % config.eigen_reference)

    del ref_resp

    # Save index_map
    results = {}
    results['model'] = model_name
    results['param'] = param_name
    results['freq'] = data.index_map['freq'][:]
    results['input'] = input_axis
    results['eval'] = ha_eval_str
    results['dir'] = direction

    for key, val in model_kwargs:
        results[key] = val

    # Initialize numpy arrays to hold results
    if config.return_response:

        results['response'] = np.zeros((nfreq, ninput, ntime),
                                       dtype=np.complex64)
        results['response_err'] = np.zeros((nfreq, ninput, ntime),
                                           dtype=np.float32)
        results['fit_flag'] = fit_flag
        results['ha_axis'] = ha
        results['ra'] = ra

    else:

        results['gain_eval'] = np.zeros((nfreq, ninput, ngain),
                                        dtype=np.complex64)
        results['weight_eval'] = np.zeros((nfreq, ninput, ngain),
                                          dtype=np.float32)
        results['frac_gain_err'] = np.zeros((nfreq, ninput, ngain, ndir),
                                            dtype=np.float32)

        results['parameter'] = np.zeros((nfreq, ninput, nparam),
                                        dtype=np.float32)
        results['parameter_err'] = np.zeros((nfreq, ninput, nparam),
                                            dtype=np.float32)

        results['index_eval'] = np.full((nfreq, ninput), -1, dtype=np.int8)
        results['gain'] = np.zeros((nfreq, ninput), dtype=np.complex64)
        results['weight'] = np.zeros((nfreq, ninput), dtype=np.float32)

        results['ndof'] = np.zeros((nfreq, ninput, ndir), dtype=np.float32)
        results['chisq'] = np.zeros((nfreq, ninput, ndir), dtype=np.float32)

        results['timing'] = np.zeros((nfreq, ninput), dtype=np.complex64)

    # Initialize metric like variables
    results['runtime'] = np.zeros((nblock_freq, 2), dtype=np.float64)

    # Compute distances
    dist = tools.get_feed_positions(inputmap)
    for pp, feeds in enumerate(pol):
        dist[feeds, :] -= dist[phase_ref[pp], np.newaxis, :]

    # Loop over frequency blocks
    for gg in range(nblock_freq):

        mlog.info("Frequency block %d of %d." % (gg, nblock_freq))

        fstart = gg * config.nfreq_per_block
        fstop = min((gg + 1) * config.nfreq_per_block, nfreq)
        findex = np.arange(fstart, fstop)
        ngroup = findex.size

        freq_sel = utils.convert_to_slice(index_freq[findex])

        timeit_start_gg = time.time()

        #
        if config.return_response:
            gstart = start
            gstop = stop

            tslc = slice(0, ntime)

        else:
            good_times = np.flatnonzero(np.any(fit_flag[findex], axis=(0, 1)))

            if good_times.size == 0:
                continue

            gstart = int(np.min(good_times))
            gstop = int(np.max(good_times)) + 1

            tslc = slice(gstart, gstop)

            gstart += start
            gstop += start

        hag = ha[tslc]
        itrans = np.argmin(np.abs(hag))

        # Load eigenvectors.
        nudata = andata.CorrData.from_acq_h5(
            file_list,
            datasets=['evec', 'vis', 'flags/vis_weight'],
            apply_gain=False,
            freq_sel=freq_sel,
            start=gstart,
            stop=gstop)

        # Save time to load data
        results['runtime'][gg, 0] = time.time() - timeit_start_gg
        timeit_start_gg = time.time()

        mlog.info("Time to load (per frequency):  %0.3f sec" %
                  (results['runtime'][gg, 0] / ngroup, ))

        # Loop over polarizations
        for pp, feeds in enumerate(pol):

            # Get timing correction
            if tcorr is not None:
                tgain = tcorr.get_gain(nudata.freq, nudata.input[feeds],
                                       nudata.time)
                tgain *= tgain[:, phase_ref_by_pol[pp], np.newaxis, :].conj()

                tgain_transit = tgain[:, :, itrans].copy()
                tgain *= tgain_transit[:, :, np.newaxis].conj()

            # Create the polarization masking vector
            P = np.zeros((1, ninput, 1), dtype=np.float64)
            P[:, feeds, :] = 1.0

            # Loop over frequencies
            for gff, ff in enumerate(findex):

                flg = fit_flag[ff, pp, tslc]

                if (2 * int(np.sum(flg))) < (nparam +
                                             1) and not config.return_response:
                    continue

                # Normalize by eigenvalue and correct for pi phase flips in process.
                resp = (nudata['evec'][gff, 0:neigen, :, :] *
                        np.sqrt(data['eval'][ff, 0:neigen, np.newaxis, tslc]) *
                        sign0[ff, :, np.newaxis, tslc])

                # Rotate to single-pol response
                # Move time to first axis for the matrix multiplication
                invL = tools.invert_no_zero(
                    np.rollaxis(data['eval'][ff, 0:neigen, np.newaxis, tslc],
                                -1, 0))

                UT = np.rollaxis(resp, -1, 0)
                U = np.swapaxes(UT, -1, -2)

                mu, vp = np.linalg.eigh(np.matmul(UT.conj(), P * U))

                rsign0 = (1.0 - 2.0 * (vp[:, 0, np.newaxis, :].real < 0.0))

                resp = mu[:, np.newaxis, :] * np.matmul(U, rsign0 * vp * invL)

                # Extract feeds of this pol
                # Transpose so that time is back to last axis
                resp = resp[:, feeds, -1].T

                # Compute error on response
                dataflg = ((nudata.weight[gff, feeds, :] > 0.0)
                           & np.isfinite(nudata.weight[gff, feeds, :])).astype(
                               np.float32)

                resp_err = dataflg * base_err[ff, :, tslc] * np.sqrt(
                    nudata.vis[gff, feeds, :].real) * tools.invert_no_zero(
                        np.sqrt(mu[np.newaxis, :, -1]))

                # Reference to specific input
                resp *= np.exp(
                    -1.0J *
                    np.angle(resp[phase_ref_by_pol[pp], np.newaxis, :]))

                # Apply timing correction
                if tcorr is not None:
                    resp *= tgain[gff]

                    results['timing'][ff, feeds] = tgain_transit[gff]

                # Fringestop
                lmbda = scipy.constants.c * 1e-6 / nudata.freq[gff]

                resp *= tools.fringestop_phase(
                    hag[np.newaxis, :], lat, src_dec,
                    dist[feeds, 0, np.newaxis] / lmbda,
                    dist[feeds, 1, np.newaxis] / lmbda)

                # Normalize by source flux
                resp *= inv_rt_flux_density[ff]
                resp_err *= inv_rt_flux_density[ff]

                # If requested, reference phase to the median value
                if config.med_phase_ref:
                    phi0 = np.angle(resp[:, itrans, np.newaxis])
                    resp *= np.exp(-1.0J * phi0)
                    resp *= np.exp(
                        -1.0J *
                        np.median(np.angle(resp), axis=0, keepdims=True))
                    resp *= np.exp(1.0J * phi0)

                # Check if return_response flag was set by user
                if not config.return_response:

                    if config.multi_sample:
                        moving_window = config.nsigma2 and config.nsigma2 * np.radians(
                            utils.get_window(nudata.freq[gff],
                                             pol=polstr[pp],
                                             dec=src_dec,
                                             deg=True))

                    # Loop over inputs
                    for pii, ii in enumerate(feeds):

                        is_good = flg & (np.abs(resp[pii, :]) >
                                         0.0) & (resp_err[pii, :] > 0.0)

                        # Set the intial gains based on raw response at transit
                        if is_good[itrans]:
                            results['gain_eval'][ff, ii,
                                                 0] = tools.invert_no_zero(
                                                     resp[pii, itrans])
                            results['frac_gain_err'][ff, ii, 0, :] = (
                                resp_err[pii, itrans] * tools.invert_no_zero(
                                    np.abs(resp[pii, itrans])))
                            results['weight_eval'][ff, ii, 0] = 0.5 * (
                                np.abs(resp[pii, itrans])**2 *
                                tools.invert_no_zero(resp_err[pii, itrans]))**2

                            results['index_eval'][ff, ii] = 0
                            results['gain'][ff,
                                            ii] = results['gain_eval'][ff, ii,
                                                                       0]
                            results['weight'][ff,
                                              ii] = results['weight_eval'][ff,
                                                                           ii,
                                                                           0]

                        # Exit if not performing multi time sample fit
                        if not config.multi_sample:
                            continue

                        if (2 * int(np.sum(is_good))) < (nparam + 1):
                            continue

                        try:
                            param, param_err, gain, gain_err, ndof, chisq, tval = fitter_function(
                                hag[is_good],
                                resp[pii, is_good],
                                resp_err[pii, is_good],
                                ha_eval,
                                window=moving_window,
                                tval=tval,
                                **config.fit)
                        except Exception as rex:
                            if config.verbose:
                                mlog.info(
                                    "Frequency %0.2f, Feed %d failed with error: %s"
                                    % (nudata.freq[gff], ii, rex))
                            continue

                        # Check for nan
                        wfit = (np.abs(gain) *
                                tools.invert_no_zero(np.abs(gain_err)))**2
                        if np.any(~np.isfinite(np.abs(gain))) or np.any(
                                ~np.isfinite(wfit)):
                            continue

                        # Save to results using the convention that you should *multiply* the visibilites by the gains
                        results['gain_eval'][
                            ff, ii, fitslc] = tools.invert_no_zero(gain)
                        results['frac_gain_err'][ff, ii, fitslc,
                                                 0] = gain_err.real
                        results['frac_gain_err'][ff, ii, fitslc,
                                                 1] = gain_err.imag
                        results['weight_eval'][ff, ii, fitslc] = wfit

                        results['parameter'][ff, ii, :] = param
                        results['parameter_err'][ff, ii, :] = param_err

                        results['ndof'][ff, ii, :] = ndof
                        results['chisq'][ff, ii, :] = chisq

                        # Check if the fit was succesful and update the gain evaluation index appropriately
                        if np.all((chisq / ndof.astype(np.float32)
                                   ) <= config.chisq_per_dof_threshold):
                            results['index_eval'][ff, ii] = ind_eval
                            results['gain'][ff, ii] = results['gain_eval'][
                                ff, ii, ind_eval]
                            results['weight'][ff, ii] = results['weight_eval'][
                                ff, ii, ind_eval]

                else:

                    # Return response only (do not fit model)
                    results['response'][ff, feeds, :] = resp
                    results['response_err'][ff, feeds, :] = resp_err

        # Save time to fit data
        results['runtime'][gg, 1] = time.time() - timeit_start_gg

        mlog.info("Time to fit (per frequency):  %0.3f sec" %
                  (results['runtime'][gg, 1] / ngroup, ))

        # Clean up
        del nudata
        gc.collect()

    # Print total run time
    mlog.info("TOTAL TIME TO LOAD: %0.3f min" %
              (np.sum(results['runtime'][:, 0]) / 60.0, ))
    mlog.info("TOTAL TIME TO FIT:  %0.3f min" %
              (np.sum(results['runtime'][:, 1]) / 60.0, ))

    # Set the best estimate of the gain
    if not config.return_response:

        flag = results['index_eval'] >= 0
        gain = results['gain']

        # Compute amplitude
        amp = np.abs(gain)

        # Hard cutoffs on the amplitude
        med_amp = np.median(amp[flag])
        min_amp = med_amp * config.min_amp_scale_factor
        max_amp = med_amp * config.max_amp_scale_factor

        flag &= ((amp >= min_amp) & (amp <= max_amp))

        # Flag outliers in amplitude for each frequency
        for pp, feeds in enumerate(pol):

            med_amp_by_pol = np.zeros(nfreq, dtype=np.float32)
            sig_amp_by_pol = np.zeros(nfreq, dtype=np.float32)

            for ff in range(nfreq):

                this_flag = flag[ff, feeds]

                if np.any(this_flag):

                    med, slow, shigh = utils.estimate_directional_scale(
                        amp[ff, feeds[this_flag]])
                    lower = med - config.nsigma_outlier * slow
                    upper = med + config.nsigma_outlier * shigh

                    flag[ff, feeds] &= ((amp[ff, feeds] >= lower) &
                                        (amp[ff, feeds] <= upper))

                    med_amp_by_pol[ff] = med
                    sig_amp_by_pol[ff] = 0.5 * (shigh - slow) / np.sqrt(
                        np.sum(this_flag, dtype=np.float32))

            if config.nsigma_med_outlier:

                med_flag = med_amp_by_pol > 0.0

                not_outlier = flag_outliers(med_amp_by_pol,
                                            med_flag,
                                            window=config.window_med_outlier,
                                            nsigma=config.nsigma_med_outlier)
                flag[:, feeds] &= not_outlier[:, np.newaxis]

                mlog.info("Pol %s:  %d frequencies are outliers." %
                          (polstr[pp],
                           np.sum(~not_outlier & med_flag, dtype=np.int)))

        # Determine bad frequencies
        flag_freq = (np.sum(flag, axis=1, dtype=np.float32) /
                     float(ninput)) > config.threshold_good_freq
        good_freq = np.flatnonzero(flag_freq)

        # Determine bad inputs
        fraction_good = np.sum(flag[good_freq, :], axis=0,
                               dtype=np.float32) / float(good_freq.size)
        flag_input = fraction_good > config.threshold_good_input

        # Finalize flag
        flag &= (flag_freq[:, np.newaxis] & flag_input[np.newaxis, :])

        # Interpolate gains
        interp_gain, interp_weight = interpolate_gain(
            freq,
            gain,
            results['weight'],
            flag=flag,
            length_scale=config.interpolation_length_scale,
            mlog=mlog)
        # Save gains to object
        results['flag'] = flag
        results['gain'] = interp_gain
        results['weight'] = interp_weight

    # Return results
    return results
Exemplo n.º 11
0
def main(config_file=None, logging_params=DEFAULT_LOGGING):

    # Load config
    config = DEFAULTS.deepcopy()
    if config_file is not None:
        print(config_file)
        config.merge(NameSpace(load_yaml_config(config_file)))

    # Setup logging
    log.setup_logging(logging_params)
    logger = log.get_logger(__name__)

    timer = Timer(logger)

    # Load data
    sfile = config.data.filename if os.path.isabs(
        config.data.filename) else os.path.join(config.directory,
                                                config.data.filename)
    sdata = StabilityData.from_file(sfile)

    ninput, ntime = sdata['tau'].shape

    # Load temperature data
    tfile = (config.temperature.filename
             if os.path.isabs(config.temperature.filename) else os.path.join(
                 config.directory, config.temperature.filename))

    tkeys = ['flag', 'data_flag', 'outlier']
    if config.temperature.load:
        tkeys += config.temperature.load

    tdata = TempData.from_acq_h5(tfile, datasets=tkeys)

    # Query layout database
    inputmap = tools.get_correlator_inputs(ephemeris.unix_to_datetime(
        np.median(sdata.time[:])),
                                           correlator='chime')

    good_input = np.flatnonzero(np.any(sdata['flags']['tau'][:], axis=-1))
    pol = sutil.get_pol(sdata, inputmap)
    npol = len(pol)

    mezz_index, crate_index = sutil.get_mezz_and_crate(sdata, inputmap)

    if config.mezz_ref.enable:
        phase_ref = [
            ipol[mezz_index[ipol] == iref]
            for ipol, iref in zip(pol, config.mezz_ref.mezz)
        ]
    else:
        phase_ref = config.data.phase_ref

    # Load timing
    if config.timing.enable:

        # Extract filenames from config
        timing_files = [
            tf if os.path.isabs(tf) else os.path.join(config.directory, tf)
            for tf in config.timing.files
        ]
        timing_files_hpf = [
            os.path.join(os.path.dirname(tf), 'hpf', os.path.basename(tf))
            for tf in timing_files
        ]
        timing_files_lpf = [
            os.path.join(os.path.dirname(tf), 'lpf', os.path.basename(tf))
            for tf in timing_files
        ]

        # If requested, add the timing data back into the delay data
        if config.timing.add.enable:

            timer.start("Adding timing data to delay measurements.")

            ns_tau, _, ns_flag, ns_inputs = sutil.get_timing_correction(
                sdata, timing_files, **config.timing.add.kwargs)

            index = timing.map_input_to_noise_source(sdata.index_map['input'],
                                                     ns_inputs)

            timing_tau = ns_tau[index, :]
            timing_flag = ns_flag[index, :]
            for ipol, iref in zip(pol, config.data.phase_ref):
                timing_tau[ipol, :] = timing_tau[ipol, :] - timing_tau[
                    iref, np.newaxis, :]
                timing_flag[ipol, :] = timing_flag[ipol, :] & timing_flag[
                    iref, np.newaxis, :]

            sdata['tau'][:] = sdata['tau'][:] + timing_tau
            sdata['flags']['tau'][:] = sdata['flags']['tau'][:] & timing_flag

            timer.stop()

        # Extract the dependent variables from the timing dataset
        timer.start("Calculating timing dependence.")

        if config.timing.sep_delay:
            logger.info("Fitting HPF and LPF timing correction separately.")
            files = timing_files_hpf
            files2 = timing_files_lpf
        else:
            files2 = None
            if config.timing.hpf_delay:
                logger.info("Using HPF timing correction for delay.")
                files = timing_files_hpf
            elif config.timing.lpf_delay:
                logger.info("Using LPF timing correction for delay.")
                files = timing_files_lpf
            else:
                logger.info("Using full timing correction for delay.")
                files = timing_files

        kwargs = {}
        if config.timing.lpf_amp:
            logger.info("Using LPF timing correction for amplitude.")
            kwargs['afiles'] = timing_files_lpf
        elif config.timing.hpf_amp:
            logger.info("Using HPF timing correction for amplitude.")
            kwargs['afiles'] = timing_files_hpf
        else:
            logger.info("Using full timing correction for amplitude.")
            kwargs['afiles'] = timing_files

        for key in ['ns_ref', 'inter_cmn', 'fit_amp', 'ref_amp', 'cmn_amp']:
            if key in config.timing:
                kwargs[key] = config.timing[key]

        xtiming, xtiming_flag, xtiming_group = sutil.timing_dependence(
            sdata, files, inputmap, **kwargs)

        if files2 is not None:
            logger.info("Calculating second timing dependence.")
            kwargs['fit_amp'] = False
            xtiming2, xtiming2_flag, xtiming2_group = sutil.timing_dependence(
                sdata, files2, inputmap, **kwargs)

            xtiming = np.concatenate((xtiming, xtiming2), axis=-1)
            xtiming_flag = np.concatenate((xtiming_flag, xtiming2_flag),
                                          axis=-1)
            xtiming_group = np.concatenate((xtiming_group, xtiming2_group),
                                           axis=-1)

        timer.stop()

    else:
        xtiming = None
        xtiming_flag = None
        xtiming_group = None

    # Reference delay data to mezzanine
    if config.mezz_ref.enable:

        timer.start("Referencing delay measurements to mezzanine.")

        for ipol, iref in zip(pol, config.mezz_ref.mezz):

            this_mezz = ipol[mezz_index[ipol] == iref]

            wmezz = sdata['flags']['tau'][this_mezz, :].astype(np.float32)

            norm = np.sum(wmezz, axis=0)

            taut_mezz = np.sum(wmezz * sdata['tau'][this_mezz, :],
                               axis=0) * tools.invert_no_zero(norm)
            flagt_mezz = norm > 0.0

            sdata['tau'][
                ipol, :] = sdata['tau'][ipol, :] - taut_mezz[np.newaxis, :]
            sdata['flags']['tau'][ipol, :] = sdata['flags']['tau'][
                ipol, :] & flagt_mezz[np.newaxis, :]

        timer.stop()

    # Load cable monitor
    if config.cable_monitor.enable:

        timer.start("Calculating cable monitor dependence.")

        cbl = timing.TimingCorrection.from_acq_h5(
            config.cable_monitor.filename)

        kwargs = {'include_diff': config.cable_monitor.include_diff}

        xcable, xcable_flag, xcable_group = sutil.cable_monitor_dependence(
            sdata, cbl, **kwargs)

        timer.stop()

    else:
        xcable = None
        xcable_flag = None
        xcable_group = None

    # Load NS distance
    if config.ns_distance.enable:

        timer.start("Calculating NS distance dependence.")

        kwargs = {}
        kwargs['phase_ref'] = phase_ref

        for key in [
                'sensor', 'temp_field', 'sep_cyl', 'sep_feed',
                'include_offset', 'include_ha'
        ]:
            if key in config.ns_distance:
                kwargs[key] = config.ns_distance[key]

        if config.ns_distance.use_cable_monitor:
            kwargs['is_cable_monitor'] = True
            kwargs['use_alpha'] = config.ns_distance.use_alpha
            nsx = timing.TimingCorrection.from_acq_h5(
                config.cable_monitor.filename)
        else:
            kwargs['is_cable_monitor'] = False
            nsx = tdata

        xdist, xdist_flag, xdist_group = sutil.ns_distance_dependence(
            sdata, nsx, inputmap, **kwargs)

        if (config.ns_distance.deriv
                is not None) and (config.ns_distance.deriv > 0):

            for dd in range(1, config.ns_distance.deriv + 1):

                d_xdist, d_xdist_flag, d_xdist_group = sutil.ns_distance_dependence(
                    sdata, tdata, inputmap, deriv=dd, **kwargs)

                tind = np.atleast_1d(1)
                xdist = np.concatenate((xdist, d_xdist[:, :, tind]), axis=-1)
                xdist_flag = xnp.concatenate(
                    (xdist_flag, d_xdist_flag[:, :, tind]), axis=-1)
                xdist_group = np.concatenate(
                    (xdist_group, d_xdist_group[:, tind]), axis=-1)

        timer.stop()

    else:
        xdist = None
        xdist_flag = None
        xdist_group = None

    # Load temperatures
    if config.temperature.enable:

        timer.start("Calculating temperature dependence.")

        xtemp, xtemp_flag, xtemp_group, xtemp_name = sutil.temperature_dependence(
            sdata,
            tdata,
            config.temperature.sensor,
            field=config.temperature.temp_field,
            inputmap=inputmap,
            phase_ref=phase_ref,
            check_hut=config.temperature.check_hut)

        if (config.temperature.deriv
                is not None) and (config.temperature.deriv > 0):

            for dd in range(1, config.temperature.deriv + 1):

                d_xtemp, d_xtemp_flag, d_xtemp_group, d_xtemp_name = sutil.temperature_dependence(
                    sdata,
                    tdata,
                    config.temperature.sensor,
                    field=config.temperature.temp_field,
                    deriv=dd,
                    inputmap=inputmap,
                    phase_ref=phase_ref,
                    check_hut=config.temperature.check_hut)

                xtemp = np.concatenate((xtemp, d_xtemp), axis=-1)
                xtemp_flag = xnp.concatenate((xtemp_flag, d_xtemp_flag),
                                             axis=-1)
                xtemp_group = np.concatenate((xtemp_group, d_xtemp_group),
                                             axis=-1)
                xtemp_name += d_xtemp_name

        timer.stop()

    else:
        xtemp = None
        xtemp_flag = None
        xtemp_group = None
        xtemp_name = None

    # Combine into single feature matrix
    x, coeff_name = _concatenate(xdist,
                                 xtemp,
                                 xcable,
                                 xtiming,
                                 name_xtemp=xtemp_name)

    x_group, _ = _concatenate(xdist_group, xtemp_group, xcable_group,
                              xtiming_group)

    x_flag, _ = _concatenate(xdist_flag, xtemp_flag, xcable_flag, xtiming_flag)
    x_flag = np.all(x_flag, axis=-1) & sdata.flags['tau'][:]

    nfeature = x.shape[-1]

    logger.info("Fitting %d features." % nfeature)

    # Save data
    if config.preliminary_save.enable:

        if config.preliminary_save.filename is not None:
            ofile = (config.preliminary_save.filename if os.path.isabs(
                config.preliminary_save.filename) else os.path.join(
                    config.directory, config.preliminary_save.filename))
        else:
            ofile = os.path.splitext(
                sfile)[0] + '_%s.h5' % config.preliminary_save.suffix

        sdata.save(ofile, mode='w')

    # Subtract mean
    if config.mean_subtract:
        timer.start("Subtracting mean value.")

        tau, mu_tau, mu_tau_flag = sutil.mean_subtract(sdata,
                                                       sdata['tau'][:],
                                                       x_flag,
                                                       use_calibrator=True)

        mu_x = np.zeros(mu_tau.shape + (nfeature, ), dtype=x.dtype)
        mu_x_flag = np.zeros(mu_tau.shape + (nfeature, ), dtype=np.bool)
        x_no_mu = x.copy()
        for ff in range(nfeature):
            x_no_mu[..., ff], mu_x[...,
                                   ff], mu_x_flag[...,
                                                  ff] = sutil.mean_subtract(
                                                      sdata,
                                                      x[:, :, ff],
                                                      x_flag,
                                                      use_calibrator=True)
        timer.stop()

    else:
        x_no_mu = x.copy()
        tau = sdata['tau'][:].copy()

    # Calculate unique days
    csd_uniq, bmap = np.unique(sdata['csd'][:], return_inverse=True)
    ncsd = csd_uniq.size

    # Prepare unique sources
    classification = np.char.add(np.char.add(sdata['calibrator'][:], '/'),
                                 sdata['source'][:])

    # If requested, load existing coefficients
    if config.coeff is not None:
        coeff = andata.BaseData.from_acq_h5(config.coeff)
        evaluate_only = True
    else:
        evaluate_only = False

    # If requested, set up boot strapping
    if config.bootstrap.enable:

        nboot = config.bootstrap.number
        nchoices = ncsd if config.bootstrap.by_transit else ntime
        nsample = int(config.bootstrap.fraction * nchoices)

        bindex = np.zeros((nboot, nsample), dtype=np.int)
        for roll in range(nboot):
            bindex[roll, :] = np.sort(
                np.random.choice(nchoices,
                                 size=nsample,
                                 replace=config.bootstrap.replace))

    else:

        nboot = 1
        bindex = np.arange(ntime, dtype=np.int)[np.newaxis, :]

    # Prepare output
    if config.output.directory is not None:
        output_dir = config.output.directory
    else:
        output_dir = config.data.directory

    if config.output.suffix is not None:
        output_suffix = config.output.suffix
    else:
        output_suffix = os.path.splitext(os.path.basename(
            config.data.filename))[0]

    # Perform joint fit
    for bb, bind in enumerate(bindex):

        if config.bootstrap.enable and config.bootstrap.by_transit:
            tind = np.concatenate(
                tuple([np.flatnonzero(bmap == ii) for ii in bind]))
        else:
            tind = bind

        ntime = tind.size

        if config.jackknife.enable:
            start = int(
                config.jackknife.start * ncsd
            ) if config.jackknife.start <= 1.0 else config.jackknife.start
            end = int(
                config.jackknife.end *
                ncsd) if config.jackknife.end <= 1.0 else config.jackknife.end

            time_flag_fit = (bmap >= start) & (bmap < end)

            if config.jackknife.restrict_stat:
                time_flag_stat = np.logical_not(time_flag_fit)
            else:
                time_flag_stat = np.ones(ntime, dtype=np.bool)

        else:
            time_flag_fit = np.ones(ntime, dtype=np.bool)
            time_flag_stat = np.ones(ntime, dtype=np.bool)

        logger.info(
            "Fitting data between %s (CSD %d) and %s (CSD %d)" %
            (ephemeris.unix_to_datetime(np.min(
                sdata.time[tind[time_flag_fit]])).strftime("%Y-%m-%d"),
             np.min(sdata['csd'][:][tind[time_flag_fit]]),
             ephemeris.unix_to_datetime(np.max(
                 sdata.time[tind[time_flag_fit]])).strftime("%Y-%m-%d"),
             np.max(sdata['csd'][:][tind[time_flag_fit]])))

        logger.info(
            "Calculating statistics from data between %s (CSD %d) and %s (CSD %d)"
            % (ephemeris.unix_to_datetime(
                np.min(sdata.time[tind[time_flag_stat]])).strftime("%Y-%m-%d"),
               np.min(sdata['csd'][:][tind[time_flag_stat]]),
               ephemeris.unix_to_datetime(
                   np.max(
                       sdata.time[tind[time_flag_stat]])).strftime("%Y-%m-%d"),
               np.max(sdata['csd'][:][tind[time_flag_stat]])))

        if evaluate_only:
            timer.start("Evaluating coefficients provided.")
            fitter = sutil.JointTempEvaluation(
                x_no_mu[:, tind, :],
                tau[:, tind],
                coeff['coeff'][:],
                flag=x_flag[:, tind],
                coeff_name=coeff.index_map['feature'][:],
                feature_name=coeff_name,
                intercept=coeff['intercept'][:],
                intercept_name=coeff.index_map['classification'][:],
                classification=classification[tind])
            timer.stop()

        else:
            timer.start("Setting up fit.  Bootstrap %d of %d." %
                        (bb + 1, nboot))

            fitter = sutil.JointTempRegression(
                x_no_mu[:, tind, :],
                tau[:, tind],
                x_group,
                flag=x_flag[:, tind],
                classification=classification[tind],
                coeff_name=coeff_name)
            timer.stop()

            timer.start("Performing fit.  Bootstrap %d of %d." %
                        (bb + 1, nboot))
            fitter.fit_temp(time_flag=time_flag_fit, **config.fit_options)
            timer.stop()

        # If bootstrapping, append counter to filename
        if config.bootstrap.enable:
            output_suffix_bb = output_suffix + "_bootstrap_%04d" % (
                config.bootstrap.index_start + bb, )

            with open(
                    os.path.join(output_dir,
                                 "bootstrap_index_%s.json" % output_suffix_bb),
                    'w') as jhandler:
                json.dump({
                    "bind": bind.tolist(),
                    "tind": tind.tolist()
                }, jhandler)

        else:
            output_suffix_bb = output_suffix

        # Save statistics to file
        if config.output.stat:

            # If requested, break the model up into its various components for calculating statistics
            stat_key = ['data', 'model', 'resid']
            if config.refine_model.enable:
                stat_add = fitter.refine_model(config.refine_model.include)
                stat_key += stat_add

            # Redefine axes
            bdata = StabilityData()
            for dset in ["source", "csd", "calibrator", "calibrator_time"]:
                bdata.create_dataset(dset, data=sdata[dset][tind])

            bdata.create_index_map("time", sdata.index_map["time"][tind])
            bdata.create_index_map("input", sdata.index_map["input"][:])
            bdata.attrs["calibrator"] = sdata.attrs.get("calibrator", "CYG_A")

            # Calculate statistics
            stat = {}
            for statistic in ['std', 'mad']:
                for attr in stat_key:
                    for ref, ref_common in zip(['mezz', 'cmn'], [False, True]):
                        stat[(statistic, attr, ref)] = sutil.short_long_stat(
                            bdata,
                            getattr(fitter, attr),
                            fitter._flag & time_flag_stat[np.newaxis, :],
                            stat=statistic,
                            ref_common=ref_common,
                            pol=pol)

            output_filename = os.path.join(output_dir,
                                           "stat_%s.h5" % output_suffix_bb)

            write_stat(bdata, stat, fitter, output_filename)

        # Save coefficients to file
        if config.output.coeff:
            output_filename = os.path.join(output_dir,
                                           "coeff_%s.h5" % output_suffix_bb)

            write_coeff(sdata, fitter, output_filename)

        # Save residuals to file
        if config.output.resid:
            output_filename = os.path.join(output_dir,
                                           "resid_%s.h5" % output_suffix_bb)

            write_resid(sdata, fitter, output_filename)

        del fitter
        gc.collect()
Exemplo n.º 12
0
def main(config_file=None, logging_params=DEFAULT_LOGGING):

    # Setup logging
    log.setup_logging(logging_params)
    mlog = log.get_logger(__name__)

    # Set config
    config = DEFAULTS.deepcopy()
    if config_file is not None:
        config.merge(NameSpace(load_yaml_config(config_file)))

    # Set niceness
    current_niceness = os.nice(0)
    os.nice(config.niceness - current_niceness)
    mlog.info('Changing process niceness from %d to %d.  Confirm:  %d' %
              (current_niceness, config.niceness, os.nice(0)))

    # Find acquisition files
    acq_files = sorted(glob(os.path.join(config.data_dir, config.acq, "*.h5")))
    nfiles = len(acq_files)

    # Determine time range of each file
    findex = []
    tindex = []
    for ii, filename in enumerate(acq_files):
        subdata = andata.CorrData.from_acq_h5(filename, datasets=())

        findex += [ii] * subdata.ntime
        tindex += range(subdata.ntime)

    findex = np.array(findex)
    tindex = np.array(tindex)

    # Determine transits within these files
    transits = []

    data = andata.CorrData.from_acq_h5(acq_files, datasets=())

    solar_rise = ephemeris.solar_rising(data.time[0] - 24.0 * 3600.0,
                                        end_time=data.time[-1])

    for rr in solar_rise:

        ss = ephemeris.solar_setting(rr)[0]

        solar_flag = np.flatnonzero((data.time >= rr) & (data.time <= ss))

        if solar_flag.size > 0:

            solar_flag = solar_flag[::config.downsample]

            tval = data.time[solar_flag]

            this_findex = findex[solar_flag]
            this_tindex = tindex[solar_flag]

            file_list, tindices = [], []

            for ii in range(nfiles):

                this_file = np.flatnonzero(this_findex == ii)

                if this_file.size > 0:

                    file_list.append(acq_files[ii])
                    tindices.append(this_tindex[this_file])

            date = ephemeris.unix_to_datetime(rr).strftime('%Y%m%dT%H%M%SZ')
            transits.append((date, tval, file_list, tindices))

    # Create file prefix and suffix
    prefix = []

    prefix.append("redundant_calibration")

    if config.output_prefix is not None:
        prefix.append(config.output_prefix)

    prefix = '_'.join(prefix)

    suffix = []

    if config.include_auto:
        suffix.append("wauto")
    else:
        suffix.append("noauto")

    if config.include_intracyl:
        suffix.append("wintra")
    else:
        suffix.append("nointra")

    if config.fix_degen:
        suffix.append("fixed_degen")
    else:
        suffix.append("degen")

    suffix = '_'.join(suffix)

    # Loop over solar transits
    for date, timestamps, files, time_indices in transits:

        nfiles = len(files)

        mlog.info("%s (%d files) " % (date, nfiles))

        output_file = os.path.join(config.output_dir,
                                   "%s_SUN_%s_%s.h5" % (prefix, date, suffix))

        mlog.info("Saving to:  %s" % output_file)

        # Get info about this set of files
        data = andata.CorrData.from_acq_h5(files,
                                           datasets=['flags/inputs'],
                                           apply_gain=False,
                                           renormalize=False)

        coord = sun_coord(timestamps, deg=True)

        fstart = config.freq_start if config.freq_start is not None else 0
        fstop = config.freq_stop if config.freq_stop is not None else data.freq.size
        freq_index = range(fstart, fstop)

        freq = data.freq[freq_index]

        ntime = timestamps.size
        nfreq = freq.size

        # Determind bad inputs
        if config.bad_input_file is None or not os.path.isfile(
                config.bad_input_file):
            bad_input = np.flatnonzero(
                ~np.all(data.flags['inputs'][:], axis=-1))
        else:
            with open(config.bad_input_file, 'r') as handler:
                bad_input = pickle.load(handler)

        mlog.info("%d inputs flagged as bad." % bad_input.size)

        nant = data.ninput

        # Determine polarization product maps
        dbinputs = tools.get_correlator_inputs(ephemeris.unix_to_datetime(
            timestamps[0]),
                                               correlator='chime')

        dbinputs = tools.reorder_correlator_inputs(data.input, dbinputs)

        feedpos = tools.get_feed_positions(dbinputs)

        prod = defaultdict(list)
        dist = defaultdict(list)

        for pp, this_prod in enumerate(data.prod):

            aa, bb = this_prod
            inp_aa = dbinputs[aa]
            inp_bb = dbinputs[bb]

            if (aa in bad_input) or (bb in bad_input):
                continue

            if not tools.is_chime(inp_aa) or not tools.is_chime(inp_bb):
                continue

            if not config.include_intracyl and (inp_aa.cyl == inp_bb.cyl):
                continue

            if not config.include_auto and (aa == bb):
                continue

            this_dist = list(feedpos[aa, :] - feedpos[bb, :])

            if tools.is_array_x(inp_aa) and tools.is_array_x(inp_bb):
                key = 'XX'

            elif tools.is_array_y(inp_aa) and tools.is_array_y(inp_bb):
                key = 'YY'

            elif not config.include_crosspol:
                continue

            elif tools.is_array_x(inp_aa) and tools.is_array_y(inp_bb):
                key = 'XY'

            elif tools.is_array_y(inp_aa) and tools.is_array_x(inp_bb):
                key = 'YX'

            else:
                raise RuntimeError("CHIME feeds not polarized.")

            prod[key].append(pp)
            dist[key].append(this_dist)

        polstr = sorted(prod.keys())
        polcnt = 0
        pol_sky_id = []
        bmap = {}
        for key in polstr:
            prod[key] = np.array(prod[key])
            dist[key] = np.array(dist[key])

            p_bmap, p_ubaseline = generate_mapping(dist[key])
            nubase = p_ubaseline.shape[0]

            bmap[key] = p_bmap + polcnt

            if polcnt > 0:

                ubaseline = np.concatenate((ubaseline, p_ubaseline), axis=0)
                pol_sky_id += [key] * nubase

            else:

                ubaseline = p_ubaseline.copy()
                pol_sky_id = [key] * nubase

            polcnt += nubase
            mlog.info("%d unique baselines" % polcnt)

        nsky = ubaseline.shape[0]

        # Create arrays to hold the results
        ores = {}
        ores['freq'] = freq
        ores['input'] = data.input
        ores['time'] = timestamps
        ores['coord'] = coord
        ores['pol'] = np.array(pol_sky_id)
        ores['baseline'] = ubaseline

        # Create array to hold gain results
        ores['gain'] = np.zeros((nfreq, nant, ntime), dtype=np.complex)
        ores['sky'] = np.zeros((nfreq, nsky, ntime), dtype=np.complex)
        ores['err'] = np.zeros((nfreq, nant + nsky, ntime, 2), dtype=np.float)

        # Loop over polarisations
        for key in polstr:

            reverse_map = bmap[key]
            p_prod = prod[key]

            isort = np.argsort(reverse_map)

            p_prod = p_prod[isort]

            p_ant1 = data.prod['input_a'][p_prod]
            p_ant2 = data.prod['input_b'][p_prod]
            p_vismap = reverse_map[isort]

            # Find the redundant groups
            tmp = np.where(np.diff(p_vismap) != 0)[0]
            edges = np.zeros(2 + tmp.size, dtype='int')
            edges[0] = 0
            edges[1:-1] = tmp + 1
            edges[-1] = p_vismap.size

            kept_base = np.unique(p_vismap)

            # Determine the unique antennas
            kept_ants = np.unique(np.concatenate([p_ant1, p_ant2]))
            antmap = np.zeros(kept_ants.max() + 1, dtype='int') - 1

            p_nant = kept_ants.size
            for i in range(p_nant):
                antmap[kept_ants[i]] = i

            p_ant1_use = antmap[p_ant1].copy()
            p_ant2_use = antmap[p_ant2].copy()

            # Create matrix
            p_nvis = p_prod.size
            nred = edges.size - 1

            npar = p_nant + nred

            A = np.zeros((p_nvis, npar), dtype=np.float32)
            B = np.zeros((p_nvis, npar), dtype=np.float32)

            for kk in range(p_nant):

                flag_ant1 = p_ant1_use == kk
                if np.any(flag_ant1):
                    A[flag_ant1, kk] = 1.0
                    B[flag_ant1, kk] = 1.0

                flag_ant2 = p_ant2_use == kk
                if np.any(flag_ant2):
                    A[flag_ant2, kk] = 1.0
                    B[flag_ant2, kk] = -1.0

            for ee in range(nred):

                A[edges[ee]:edges[ee + 1], p_nant + ee] = 1.0

                B[edges[ee]:edges[ee + 1], p_nant + ee] = 1.0

            # Add equations to break degeneracy
            if config.fix_degen:
                A = np.concatenate((A, np.zeros((1, npar), dtype=np.float32)))
                A[-1, 0:p_nant] = 1.0

                B = np.concatenate((B, np.zeros((3, npar), dtype=np.float32)))
                B[-3, 0:p_nant] = 1.0
                B[-2, 0:p_nant] = feedpos[kept_ants, 0]
                B[-1, 0:p_nant] = feedpos[kept_ants, 1]

            # Loop over frequencies
            for ff, find in enumerate(freq_index):

                mlog.info("Freq %d of %d.  %0.2f MHz." %
                          (ff + 1, nfreq, freq[ff]))

                cnt = 0

                # Loop over files
                for ii, (filename, tind) in enumerate(zip(files,
                                                          time_indices)):

                    ntind = len(tind)
                    mlog.info("Processing file %s (%d time samples)" %
                              (filename, ntind))

                    # Compute noise weight
                    with h5py.File(filename, 'r') as hf:
                        wnoise = np.median(hf['flags/vis_weight'][find, :, :],
                                           axis=-1)

                    # Loop over times
                    for tt in tind:

                        t0 = time.time()

                        mlog.info("Time %d of %d.  %d index of current file." %
                                  (cnt + 1, ntime, tt))

                        # Load visibilities
                        with h5py.File(filename, 'r') as hf:

                            snap = hf['vis'][find, :, tt]
                            wsnap = wnoise * (
                                (hf['flags/vis_weight'][find, :, tt] > 0.0) &
                                (np.abs(snap) > 0.0)).astype(np.float32)

                        # Extract relevant products for this polarization
                        snap = snap[p_prod]
                        wsnap = wsnap[p_prod]

                        # Turn into amplitude and phase, avoiding NaN
                        mask = (wsnap > 0.0)

                        amp = np.where(mask, np.log(np.abs(snap)), 0.0)
                        phi = np.where(mask, np.angle(snap), 0.0)

                        # Deal with phase wrapping
                        for aa, bb in zip(edges[:-1], edges[1:]):
                            dphi = phi[aa:bb] - np.sort(phi[aa:bb])[int(
                                (bb - aa) / 2)]
                            phi[aa:bb] += (2.0 * np.pi * (dphi < -np.pi) -
                                           2.0 * np.pi * (dphi > np.pi))

                        # Add elements to fix degeneracy
                        if config.fix_degen:
                            amp = np.concatenate((amp, np.zeros(1)))
                            phi = np.concatenate((phi, np.zeros(3)))

                        # Determine noise matrix
                        inv_diagC = wsnap * np.abs(snap)**2 * 2.0

                        if config.fix_degen:
                            inv_diagC = np.concatenate((inv_diagC, np.ones(1)))

                        # Amplitude estimate and covariance
                        amp_param_cov = np.linalg.inv(
                            np.dot(A.T, inv_diagC[:, np.newaxis] * A))
                        amp_param = np.dot(amp_param_cov,
                                           np.dot(A.T, inv_diagC * amp))

                        # Phase estimate and covariance
                        if config.fix_degen:
                            inv_diagC = np.concatenate((inv_diagC, np.ones(2)))

                        phi_param_cov = np.linalg.inv(
                            np.dot(B.T, inv_diagC[:, np.newaxis] * B))
                        phi_param = np.dot(phi_param_cov,
                                           np.dot(B.T, inv_diagC * phi))

                        # Save to large array
                        ores['gain'][ff, kept_ants,
                                     cnt] = np.exp(amp_param[0:p_nant] +
                                                   1.0J * phi_param[0:p_nant])

                        ores['sky'][ff, kept_base,
                                    cnt] = np.exp(amp_param[p_nant:] +
                                                  1.0J * phi_param[p_nant:])

                        ores['err'][ff, kept_ants, cnt,
                                    0] = np.diag(amp_param_cov[0:p_nant,
                                                               0:p_nant])
                        ores['err'][ff, nant + kept_base, cnt,
                                    0] = np.diag(amp_param_cov[p_nant:,
                                                               p_nant:])

                        ores['err'][ff, kept_ants, cnt,
                                    1] = np.diag(phi_param_cov[0:p_nant,
                                                               0:p_nant])
                        ores['err'][ff, nant + kept_base, cnt,
                                    1] = np.diag(phi_param_cov[p_nant:,
                                                               p_nant:])

                        # Increment time counter
                        cnt += 1

                        # Print time elapsed
                        mlog.info("Took %0.1f seconds." % (time.time() - t0, ))

        # Save to pickle file
        with h5py.File(output_file, 'w') as handler:

            handler.attrs['date'] = date

            for key, val in ores.iteritems():
                handler.create_dataset(key, data=val)
Exemplo n.º 13
0
def main(config_file=None, logging_params=DEFAULT_LOGGING):

    # Load config
    config = DEFAULTS.deepcopy()
    if config_file is not None:
        config.merge(NameSpace(load_yaml_config(config_file)))

    # Setup logging
    log.setup_logging(logging_params)
    logger = log.get_logger(__name__)

    ## Load data for flagging
    # Load fpga restarts
    time_fpga_restart = []
    if config.fpga_restart_file is not None:

        with open(config.fpga_restart_file, 'r') as handler:
            for line in handler:
                time_fpga_restart.append(
                    ephemeris.datetime_to_unix(
                        ephemeris.timestr_to_datetime(line.split('_')[0])))

    time_fpga_restart = np.array(time_fpga_restart)

    # Load housekeeping flag
    if config.housekeeping_file is not None:
        ftemp = TempData.from_acq_h5(config.housekeeping_file,
                                     datasets=["time_flag"])
    else:
        ftemp = None

    # Load jump data
    if config.jump_file is not None:
        with h5py.File(config.jump_file, 'r') as handler:
            jump_time = handler["time"][:]
            jump_size = handler["jump_size"][:]
    else:
        jump_time = None
        jump_size = None

    # Load rain data
    if config.rain_file is not None:
        with h5py.File(config.rain_file, 'r') as handler:
            rain_ranges = handler["time_range_conservative"][:]
    else:
        rain_ranges = []

    # Load data flags
    data_flags = {}
    if config.data_flags:
        finder.connect_database()
        flag_types = finder.DataFlagType.select()
        possible_data_flags = []
        for ft in flag_types:
            possible_data_flags.append(ft.name)
            if ft.name in config.data_flags:
                new_data_flags = finder.DataFlag.select().where(
                    finder.DataFlag.type == ft)
                data_flags[ft.name] = list(new_data_flags)

    # Set desired range of time
    start_time = (ephemeris.datetime_to_unix(
        datetime.datetime(
            *config.start_date)) if config.start_date is not None else None)
    end_time = (ephemeris.datetime_to_unix(datetime.datetime(
        *config.end_date)) if config.end_date is not None else None)

    ## Find gain files
    files = {}
    for src in config.sources:
        files[src] = sorted(
            glob.glob(
                os.path.join(config.directory, src.lower(),
                             "%s_%s_lsd_*.h5" % (
                                 config.prefix,
                                 src.lower(),
                             ))))
    csd = {}
    for src in config.sources:
        csd[src] = np.array(
            [int(os.path.splitext(ff)[0][-4:]) for ff in files[src]])

    for src in config.sources:
        logger.info("%s:  %d files" % (src, len(csd[src])))

    ## Remove files that occur during flag
    csd_flag = {}
    for src in config.sources:

        body = ephemeris.source_dictionary[src]

        csd_flag[src] = np.ones(csd[src].size, dtype=np.bool)

        for ii, cc in enumerate(csd[src][:]):

            ttrans = ephemeris.transit_times(body,
                                             ephemeris.csd_to_unix(cc))[0]

            if (start_time is not None) and (ttrans < start_time):
                csd_flag[src][ii] = False
                continue

            if (end_time is not None) and (ttrans > end_time):
                csd_flag[src][ii] = False
                continue

            # If requested, remove daytime transits
            if not config.include_daytime.get(
                    src, config.include_daytime.default) and daytime_flag(
                        ttrans)[0]:
                logger.info("%s CSD %d:  daytime transit" % (src, cc))
                csd_flag[src][ii] = False
                continue

            # Remove transits during HKP drop out
            if ftemp is not None:
                itemp = np.flatnonzero(
                    (ftemp.time[:] >= (ttrans - config.transit_window))
                    & (ftemp.time[:] <= (ttrans + config.transit_window)))
                tempflg = ftemp['time_flag'][itemp]
                if (tempflg.size == 0) or ((np.sum(tempflg, dtype=np.float32) /
                                            float(tempflg.size)) < 0.50):
                    logger.info("%s CSD %d:  no housekeeping" % (src, cc))
                    csd_flag[src][ii] = False
                    continue

            # Remove transits near jumps
            if jump_time is not None:
                njump = np.sum((jump_size > config.min_jump_size)
                               & (jump_time > (ttrans - config.jump_window))
                               & (jump_time < ttrans))
                if njump > config.max_njump:
                    logger.info("%s CSD %d:  %d jumps before" %
                                (src, cc, njump))
                    csd_flag[src][ii] = False
                    continue

            # Remove transits near rain
            for rng in rain_ranges:
                if (((ttrans - config.transit_window) <= rng[1])
                        and ((ttrans + config.transit_window) >= rng[0])):

                    logger.info("%s CSD %d:  during rain" % (src, cc))
                    csd_flag[src][ii] = False
                    break

            # Remove transits during data flag
            for name, flag_list in data_flags.items():

                if csd_flag[src][ii]:

                    for flg in flag_list:

                        if (((ttrans - config.transit_window) <=
                             flg.finish_time)
                                and ((ttrans + config.transit_window) >=
                                     flg.start_time)):

                            logger.info("%s CSD %d:  %s flag" %
                                        (src, cc, name))
                            csd_flag[src][ii] = False
                            break

    # Print number of files left after flagging
    for src in config.sources:
        logger.info("%s:  %d files (after flagging)" %
                    (src, np.sum(csd_flag[src])))

    ## Construct pair wise differences
    npair = len(config.diff_pair)
    shift = [nd * 24.0 * 3600.0 for nd in config.nday_shift]

    calmap = []
    calpair = []

    for (tsrc, csrc), sh in zip(config.diff_pair, shift):

        body_test = ephemeris.source_dictionary[tsrc]
        body_cal = ephemeris.source_dictionary[csrc]

        for ii, cc in enumerate(csd[tsrc]):

            if csd_flag[tsrc][ii]:

                test_transit = ephemeris.transit_times(
                    body_test, ephemeris.csd_to_unix(cc))[0]
                cal_transit = ephemeris.transit_times(body_cal,
                                                      test_transit + sh)[0]
                cal_csd = int(np.fix(ephemeris.unix_to_csd(cal_transit)))

                ttrans = np.sort([test_transit, cal_transit])

                if cal_csd in csd[csrc]:
                    jj = list(csd[csrc]).index(cal_csd)

                    if csd_flag[csrc][jj] and not np.any(
                        (time_fpga_restart >= ttrans[0])
                            & (time_fpga_restart <= ttrans[1])):
                        calmap.append([ii, jj])
                        calpair.append([tsrc, csrc])

    calmap = np.array(calmap)
    calpair = np.array(calpair)

    ntransit = calmap.shape[0]

    logger.info("%d total transit pairs" % ntransit)
    for ii in range(ntransit):

        t1 = ephemeris.transit_times(
            ephemeris.source_dictionary[calpair[ii, 0]],
            ephemeris.csd_to_unix(csd[calpair[ii, 0]][calmap[ii, 0]]))[0]
        t2 = ephemeris.transit_times(
            ephemeris.source_dictionary[calpair[ii, 1]],
            ephemeris.csd_to_unix(csd[calpair[ii, 1]][calmap[ii, 1]]))[0]

        logger.info("%s (%d) - %s (%d):  %0.1f hr" %
                    (calpair[ii, 0], csd_flag[calpair[ii, 0]][calmap[ii, 0]],
                     calpair[ii, 1], csd_flag[calpair[ii, 1]][calmap[ii, 1]],
                     (t1 - t2) / 3600.0))

    # Determine unique diff pairs
    diff_name = np.array(['%s/%s' % tuple(cp) for cp in calpair])
    uniq_diff, lbl_diff, cnt_diff = np.unique(diff_name,
                                              return_inverse=True,
                                              return_counts=True)
    ndiff = uniq_diff.size

    for ud, udcnt in zip(uniq_diff, cnt_diff):
        logger.info("%s:  %d transit pairs" % (ud, udcnt))

    ## Load gains
    inputmap = tools.get_correlator_inputs(datetime.datetime.utcnow(),
                                           correlator='chime')
    ninput = len(inputmap)
    nfreq = 1024

    # Set up gain arrays
    gain = np.zeros((2, nfreq, ninput, ntransit), dtype=np.complex64)
    weight = np.zeros((2, nfreq, ninput, ntransit), dtype=np.float32)
    input_sort = np.zeros((2, ninput, ntransit), dtype=np.int)

    kcsd = np.zeros((2, ntransit), dtype=np.float32)
    timestamp = np.zeros((2, ntransit), dtype=np.float64)
    is_daytime = np.zeros((2, ntransit), dtype=np.bool)

    for tt in range(ntransit):

        for kk, (src, ind) in enumerate(zip(calpair[tt], calmap[tt])):

            body = ephemeris.source_dictionary[src]
            filename = files[src][ind]

            logger.info("%s:  %s" % (src, filename))

            temp = containers.StaticGainData.from_file(filename)

            freq = temp.freq[:]
            inputs = temp.input[:]

            isort = reorder_inputs(inputmap, inputs)
            inputs = inputs[isort]

            gain[kk, :, :, tt] = temp.gain[:, isort]
            weight[kk, :, :, tt] = temp.weight[:, isort]
            input_sort[kk, :, tt] = isort

            kcsd[kk, tt] = temp.attrs['lsd']
            timestamp[kk, tt] = ephemeris.transit_times(
                body, ephemeris.csd_to_unix(kcsd[kk, tt]))[0]
            is_daytime[kk, tt] = daytime_flag(timestamp[kk, tt])[0]

            if np.any(isort != np.arange(isort.size)):
                logger.info("Input ordering has changed: %s" %
                            ephemeris.unix_to_datetime(
                                timestamp[kk, tt]).strftime("%Y-%m-%d"))

        logger.info("")

    inputs = np.array([(inp.id, inp.input_sn) for inp in inputmap],
                      dtype=[('chan_id', 'u2'), ('correlator_input', 'S32')])

    ## Load input flags
    inpflg = np.ones((2, ninput, ntransit), dtype=np.bool)

    min_flag_time = np.min(timestamp) - 7.0 * 24.0 * 60.0 * 60.0
    max_flag_time = np.max(timestamp) + 7.0 * 24.0 * 60.0 * 60.0

    flaginput_files = sorted(
        glob.glob(
            os.path.join(config.flaginput_dir, "*" + config.flaginput_suffix,
                         "*.h5")))

    if flaginput_files:
        logger.info("Found %d flaginput files." % len(flaginput_files))
        tmp = andata.FlagInputData.from_acq_h5(flaginput_files, datasets=())
        start, stop = [
            int(yy) for yy in np.percentile(
                np.flatnonzero((tmp.time[:] >= min_flag_time)
                               & (tmp.time[:] <= max_flag_time)), [0, 100])
        ]

        cont = andata.FlagInputData.from_acq_h5(flaginput_files,
                                                start=start,
                                                stop=stop,
                                                datasets=['flag'])

        for kk in range(2):
            inpflg[kk, :, :] = cont.resample('flag',
                                             timestamp[kk],
                                             transpose=True)

            logger.info("Flaginput time offsets in minutes (pair %d):" % kk)
            logger.info(
                str(
                    np.fix((cont.time[cont.search_update_time(timestamp[kk])] -
                            timestamp[kk]) / 60.0).astype(np.int)))

    # Sort flags so they are in same order
    for tt in range(ntransit):
        for kk in range(2):
            inpflg[kk, :, tt] = inpflg[kk, input_sort[kk, :, tt], tt]

    # Do not apply input flag to phase reference
    for ii in config.index_phase_ref:
        inpflg[:, ii, :] = True

    ## Flag out gains with high uncertainty and frequencies with large fraction of data flagged
    frac_err = tools.invert_no_zero(np.sqrt(weight) * np.abs(gain))

    flag = np.all((weight > 0.0) & (np.abs(gain) > 0.0) &
                  (frac_err < config.max_uncertainty),
                  axis=0)

    freq_flag = ((np.sum(flag, axis=(1, 2), dtype=np.float32) /
                  float(np.prod(flag.shape[1:]))) > config.freq_threshold)

    if config.apply_rfi_mask:
        freq_flag &= np.logical_not(rfi.frequency_mask(freq))

    flag = flag & freq_flag[:, np.newaxis, np.newaxis]

    good_freq = np.flatnonzero(freq_flag)

    logger.info("Number good frequencies %d" % good_freq.size)

    ## Generate flags with more conservative cuts on frequency
    c_flag = flag & np.all(frac_err < config.conservative.max_uncertainty,
                           axis=0)

    c_freq_flag = ((np.sum(c_flag, axis=(1, 2), dtype=np.float32) /
                    float(np.prod(c_flag.shape[1:]))) >
                   config.conservative.freq_threshold)

    if config.conservative.apply_rfi_mask:
        c_freq_flag &= np.logical_not(rfi.frequency_mask(freq))

    c_flag = c_flag & c_freq_flag[:, np.newaxis, np.newaxis]

    c_good_freq = np.flatnonzero(c_freq_flag)

    logger.info("Number good frequencies (conservative thresholds) %d" %
                c_good_freq.size)

    ## Apply input flags
    flag &= np.all(inpflg[:, np.newaxis, :, :], axis=0)

    ## Update flags based on beam flag
    if config.beam_flag_file is not None:

        dbeam = andata.BaseData.from_acq_h5(config.beam_flag_file)

        db_csd = np.floor(ephemeris.unix_to_csd(dbeam.index_map['time'][:]))

        for ii, name in enumerate(config.beam_flag_datasets):
            logger.info("Applying %s beam flag." % name)
            if not ii:
                db_flag = dbeam.flags[name][:]
            else:
                db_flag &= dbeam.flags[name][:]

        cnt = 0
        for ii, dbc in enumerate(db_csd):

            this_csd = np.flatnonzero(np.any(kcsd == dbc, axis=0))

            if this_csd.size > 0:

                logger.info("Beam flag for %d matches %s." %
                            (dbc, str(kcsd[:, this_csd])))

                flag[:, :, this_csd] &= db_flag[np.newaxis, :, ii, np.newaxis]

                cnt += 1

        logger.info("Applied %0.1f percent of the beam flags" %
                    (100.0 * cnt / float(db_csd.size), ))

    ## Flag inputs with large amount of missing data
    input_frac_flagged = (
        np.sum(flag[good_freq, :, :], axis=(0, 2), dtype=np.float32) /
        float(good_freq.size * ntransit))
    input_flag = input_frac_flagged > config.input_threshold

    for ii in config.index_phase_ref:
        logger.info("Phase reference %d has %0.3f fraction of data flagged." %
                    (ii, input_frac_flagged[ii]))
        input_flag[ii] = True

    good_input = np.flatnonzero(input_flag)

    flag = flag & input_flag[np.newaxis, :, np.newaxis]

    logger.info("Number good inputs %d" % good_input.size)

    ## Calibrate
    gaincal = gain[0] * tools.invert_no_zero(gain[1])

    frac_err_cal = np.sqrt(frac_err[0]**2 + frac_err[1]**2)

    count = np.sum(flag, axis=-1, dtype=np.int)
    stat_flag = count > config.min_num_transit

    ## Calculate phase
    amp = np.abs(gaincal)
    phi = np.angle(gaincal)

    ## Calculate polarisation groups
    pol_dict = {'E': 'X', 'S': 'Y'}
    cyl_dict = {2: 'A', 3: 'B', 4: 'C', 5: 'D'}

    if config.group_by_cyl:
        group_id = [
            (inp.pol,
             inp.cyl) if tools.is_chime(inp) and (ii in good_input) else None
            for ii, inp in enumerate(inputmap)
        ]
    else:
        group_id = [
            inp.pol if tools.is_chime(inp) and (ii in good_input) else None
            for ii, inp in enumerate(inputmap)
        ]

    ugroup_id = sorted([uidd for uidd in set(group_id) if uidd is not None])
    ngroup = len(ugroup_id)

    group_list_noref = [
        np.array([
            gg for gg, gid in enumerate(group_id)
            if (gid == ugid) and gg not in config.index_phase_ref
        ]) for ugid in ugroup_id
    ]

    group_list = [
        np.array([gg for gg, gid in enumerate(group_id) if gid == ugid])
        for ugid in ugroup_id
    ]

    if config.group_by_cyl:
        group_str = [
            "%s-%s" % (pol_dict[pol], cyl_dict[cyl]) for pol, cyl in ugroup_id
        ]
    else:
        group_str = [pol_dict[pol] for pol in ugroup_id]

    index_phase_ref = []
    for gstr, igroup in zip(group_str, group_list):
        candidate = [ii for ii in config.index_phase_ref if ii in igroup]
        if len(candidate) != 1:
            index_phase_ref.append(None)
        else:
            index_phase_ref.append(candidate[0])

    logger.info(
        "Phase reference: %s" %
        ', '.join(['%s = %s' % tpl
                   for tpl in zip(group_str, index_phase_ref)]))

    ## Apply thermal correction to amplitude
    if config.amp_thermal.enabled:

        logger.info("Applying thermal correction.")

        # Load the temperatures
        tdata = TempData.from_acq_h5(config.amp_thermal.filename)

        index = tdata.search_sensors(config.amp_thermal.sensor)[0]

        temp = tdata.datasets[config.amp_thermal.field][index]
        temp_func = scipy.interpolate.interp1d(tdata.time, temp,
                                               **config.amp_thermal.interp)

        itemp = temp_func(timestamp)
        dtemp = itemp[0] - itemp[1]

        flag_func = scipy.interpolate.interp1d(
            tdata.time, tdata.datasets['flag'][index].astype(np.float32),
            **config.amp_thermal.interp)

        dtemp_flag = np.all(flag_func(timestamp) == 1.0, axis=0)

        flag &= dtemp_flag[np.newaxis, np.newaxis, :]

        for gstr, igroup in zip(group_str, group_list):
            pstr = gstr[0]
            thermal_coeff = np.polyval(config.amp_thermal.coeff[pstr], freq)
            gthermal = 1.0 + thermal_coeff[:, np.newaxis, np.newaxis] * dtemp[
                np.newaxis, np.newaxis, :]

            amp[:, igroup, :] *= tools.invert_no_zero(gthermal)

    ## Compute common mode
    if config.subtract_common_mode_before:
        logger.info("Calculating common mode amplitude and phase.")
        cmn_amp, flag_cmn_amp = compute_common_mode(amp,
                                                    flag,
                                                    group_list_noref,
                                                    median=False)
        cmn_phi, flag_cmn_phi = compute_common_mode(phi,
                                                    flag,
                                                    group_list_noref,
                                                    median=False)

        # Subtract common mode (from phase only)
        logger.info("Subtracting common mode phase.")
        group_flag = np.zeros((ngroup, ninput), dtype=np.bool)
        for gg, igroup in enumerate(group_list):
            group_flag[gg, igroup] = True
            phi[:,
                igroup, :] = phi[:, igroup, :] - cmn_phi[:, gg, np.newaxis, :]

            for iref in index_phase_ref:
                if (iref is not None) and (iref in igroup):
                    flag[:, iref, :] = flag_cmn_phi[:, gg, :]

    ## If requested, determine and subtract a delay template
    if config.fit_delay_before:
        logger.info("Fitting delay template.")
        omega = timing.FREQ_TO_OMEGA * freq

        tau, tau_flag, _ = construct_delay_template(
            omega,
            phi,
            c_flag & flag,
            min_num_freq_for_delay_fit=config.min_num_freq_for_delay_fit)

        # Compute residuals
        logger.info("Subtracting delay template.")
        phi = phi - tau[np.newaxis, :, :] * omega[:, np.newaxis, np.newaxis]

    ## Normalize by median over time
    logger.info("Calculating median amplitude and phase.")
    med_amp = np.zeros((nfreq, ninput, ndiff), dtype=amp.dtype)
    med_phi = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype)

    count_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=np.int)
    stat_flag_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=np.bool)

    def weighted_mean(yy, ww, axis=-1):
        return np.sum(ww * yy, axis=axis) * tools.invert_no_zero(
            np.sum(ww, axis=axis))

    for dd in range(ndiff):

        this_diff = np.flatnonzero(lbl_diff == dd)

        this_flag = flag[:, :, this_diff]

        this_amp = amp[:, :, this_diff]
        this_amp_err = this_amp * frac_err_cal[:, :,
                                               this_diff] * this_flag.astype(
                                                   np.float32)

        this_phi = phi[:, :, this_diff]
        this_phi_err = frac_err_cal[:, :, this_diff] * this_flag.astype(
            np.float32)

        count_by_diff[:, :, dd] = np.sum(this_flag, axis=-1, dtype=np.int)
        stat_flag_by_diff[:, :,
                          dd] = count_by_diff[:, :,
                                              dd] > config.min_num_transit

        if config.weighted_mean == 2:
            logger.info("Calculating inverse variance weighted mean.")
            med_amp[:, :,
                    dd] = weighted_mean(this_amp,
                                        tools.invert_no_zero(this_amp_err**2),
                                        axis=-1)
            med_phi[:, :,
                    dd] = weighted_mean(this_phi,
                                        tools.invert_no_zero(this_phi_err**2),
                                        axis=-1)

        elif config.weighted_mean == 1:
            logger.info("Calculating uniform weighted mean.")
            med_amp[:, :, dd] = weighted_mean(this_amp,
                                              this_flag.astype(np.float32),
                                              axis=-1)
            med_phi[:, :, dd] = weighted_mean(this_phi,
                                              this_flag.astype(np.float32),
                                              axis=-1)

        else:
            logger.info("Calculating median value.")
            for ff in range(nfreq):
                for ii in range(ninput):
                    if np.any(this_flag[ff, ii, :]):
                        med_amp[ff, ii, dd] = wq.median(
                            this_amp[ff, ii, :],
                            this_flag[ff, ii, :].astype(np.float32))
                        med_phi[ff, ii, dd] = wq.median(
                            this_phi[ff, ii, :],
                            this_flag[ff, ii, :].astype(np.float32))

    damp = np.zeros_like(amp)
    dphi = np.zeros_like(phi)
    for dd in range(ndiff):
        this_diff = np.flatnonzero(lbl_diff == dd)
        damp[:, :, this_diff] = amp[:, :, this_diff] * tools.invert_no_zero(
            med_amp[:, :, dd, np.newaxis]) - 1.0
        dphi[:, :,
             this_diff] = phi[:, :, this_diff] - med_phi[:, :, dd, np.newaxis]

    # Compute common mode
    if not config.subtract_common_mode_before:
        logger.info("Calculating common mode amplitude and phase.")
        cmn_amp, flag_cmn_amp = compute_common_mode(damp,
                                                    flag,
                                                    group_list_noref,
                                                    median=True)
        cmn_phi, flag_cmn_phi = compute_common_mode(dphi,
                                                    flag,
                                                    group_list_noref,
                                                    median=True)

        # Subtract common mode (from phase only)
        logger.info("Subtracting common mode phase.")
        group_flag = np.zeros((ngroup, ninput), dtype=np.bool)
        for gg, igroup in enumerate(group_list):
            group_flag[gg, igroup] = True
            dphi[:, igroup, :] = dphi[:, igroup, :] - cmn_phi[:, gg,
                                                              np.newaxis, :]

            for iref in index_phase_ref:
                if (iref is not None) and (iref in igroup):
                    flag[:, iref, :] = flag_cmn_phi[:, gg, :]

    ## Compute RMS
    logger.info("Calculating RMS of amplitude and phase.")
    mad_amp = np.zeros((nfreq, ninput), dtype=amp.dtype)
    std_amp = np.zeros((nfreq, ninput), dtype=amp.dtype)

    mad_phi = np.zeros((nfreq, ninput), dtype=phi.dtype)
    std_phi = np.zeros((nfreq, ninput), dtype=phi.dtype)

    mad_amp_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=amp.dtype)
    std_amp_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=amp.dtype)

    mad_phi_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype)
    std_phi_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype)

    for ff in range(nfreq):
        for ii in range(ninput):
            this_flag = flag[ff, ii, :]
            if np.any(this_flag):
                std_amp[ff, ii] = np.std(damp[ff, ii, this_flag])
                std_phi[ff, ii] = np.std(dphi[ff, ii, this_flag])

                mad_amp[ff, ii] = 1.48625 * wq.median(
                    np.abs(damp[ff, ii, :]), this_flag.astype(np.float32))
                mad_phi[ff, ii] = 1.48625 * wq.median(
                    np.abs(dphi[ff, ii, :]), this_flag.astype(np.float32))

                for dd in range(ndiff):
                    this_diff = this_flag & (lbl_diff == dd)
                    if np.any(this_diff):

                        std_amp_by_diff[ff, ii, dd] = np.std(damp[ff, ii,
                                                                  this_diff])
                        std_phi_by_diff[ff, ii, dd] = np.std(dphi[ff, ii,
                                                                  this_diff])

                        mad_amp_by_diff[ff, ii, dd] = 1.48625 * wq.median(
                            np.abs(damp[ff, ii, :]),
                            this_diff.astype(np.float32))
                        mad_phi_by_diff[ff, ii, dd] = 1.48625 * wq.median(
                            np.abs(dphi[ff, ii, :]),
                            this_diff.astype(np.float32))

    ## Construct delay template
    if not config.fit_delay_before:
        logger.info("Fitting delay template.")
        omega = timing.FREQ_TO_OMEGA * freq

        tau, tau_flag, _ = construct_delay_template(
            omega,
            dphi,
            c_flag & flag,
            min_num_freq_for_delay_fit=config.min_num_freq_for_delay_fit)

        # Compute residuals
        logger.info("Subtracting delay template from phase.")
        resid = (dphi - tau[np.newaxis, :, :] *
                 omega[:, np.newaxis, np.newaxis]) * flag.astype(np.float32)

    else:
        resid = dphi

    tau_count = np.sum(tau_flag, axis=-1, dtype=np.int)
    tau_stat_flag = tau_count > config.min_num_transit

    tau_count_by_diff = np.zeros((ninput, ndiff), dtype=np.int)
    tau_stat_flag_by_diff = np.zeros((ninput, ndiff), dtype=np.bool)
    for dd in range(ndiff):
        this_diff = np.flatnonzero(lbl_diff == dd)
        tau_count_by_diff[:, dd] = np.sum(tau_flag[:, this_diff],
                                          axis=-1,
                                          dtype=np.int)
        tau_stat_flag_by_diff[:,
                              dd] = tau_count_by_diff[:,
                                                      dd] > config.min_num_transit

    ## Calculate statistics of residuals
    std_resid = np.zeros((nfreq, ninput), dtype=phi.dtype)
    mad_resid = np.zeros((nfreq, ninput), dtype=phi.dtype)

    std_resid_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype)
    mad_resid_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype)

    for ff in range(nfreq):
        for ii in range(ninput):
            this_flag = flag[ff, ii, :]
            if np.any(this_flag):
                std_resid[ff, ii] = np.std(resid[ff, ii, this_flag])
                mad_resid[ff, ii] = 1.48625 * wq.median(
                    np.abs(resid[ff, ii, :]), this_flag.astype(np.float32))

                for dd in range(ndiff):
                    this_diff = this_flag & (lbl_diff == dd)
                    if np.any(this_diff):
                        std_resid_by_diff[ff, ii,
                                          dd] = np.std(resid[ff, ii,
                                                             this_diff])
                        mad_resid_by_diff[ff, ii, dd] = 1.48625 * wq.median(
                            np.abs(resid[ff, ii, :]),
                            this_diff.astype(np.float32))

    ## Calculate statistics of delay template
    mad_tau = np.zeros((ninput, ), dtype=phi.dtype)
    std_tau = np.zeros((ninput, ), dtype=phi.dtype)

    mad_tau_by_diff = np.zeros((ninput, ndiff), dtype=phi.dtype)
    std_tau_by_diff = np.zeros((ninput, ndiff), dtype=phi.dtype)

    for ii in range(ninput):
        this_flag = tau_flag[ii]
        if np.any(this_flag):
            std_tau[ii] = np.std(tau[ii, this_flag])
            mad_tau[ii] = 1.48625 * wq.median(np.abs(tau[ii]),
                                              this_flag.astype(np.float32))

            for dd in range(ndiff):
                this_diff = this_flag & (lbl_diff == dd)
                if np.any(this_diff):
                    std_tau_by_diff[ii, dd] = np.std(tau[ii, this_diff])
                    mad_tau_by_diff[ii, dd] = 1.48625 * wq.median(
                        np.abs(tau[ii]), this_diff.astype(np.float32))

    ## Define output
    res = {
        "timestamp": {
            "data": timestamp,
            "axis": ["div", "time"]
        },
        "is_daytime": {
            "data": is_daytime,
            "axis": ["div", "time"]
        },
        "csd": {
            "data": kcsd,
            "axis": ["div", "time"]
        },
        "pair_map": {
            "data": lbl_diff,
            "axis": ["time"]
        },
        "pair_count": {
            "data": cnt_diff,
            "axis": ["pair"]
        },
        "gain": {
            "data": gaincal,
            "axis": ["freq", "input", "time"]
        },
        "frac_err": {
            "data": frac_err_cal,
            "axis": ["freq", "input", "time"]
        },
        "flags/gain": {
            "data": flag,
            "axis": ["freq", "input", "time"],
            "flag": True
        },
        "flags/gain_conservative": {
            "data": c_flag,
            "axis": ["freq", "input", "time"],
            "flag": True
        },
        "flags/count": {
            "data": count,
            "axis": ["freq", "input"],
            "flag": True
        },
        "flags/stat": {
            "data": stat_flag,
            "axis": ["freq", "input"],
            "flag": True
        },
        "flags/count_by_pair": {
            "data": count_by_diff,
            "axis": ["freq", "input", "pair"],
            "flag": True
        },
        "flags/stat_by_pair": {
            "data": stat_flag_by_diff,
            "axis": ["freq", "input", "pair"],
            "flag": True
        },
        "med_amp": {
            "data": med_amp,
            "axis": ["freq", "input", "pair"]
        },
        "med_phi": {
            "data": med_phi,
            "axis": ["freq", "input", "pair"]
        },
        "flags/group_flag": {
            "data": group_flag,
            "axis": ["group", "input"],
            "flag": True
        },
        "cmn_amp": {
            "data": cmn_amp,
            "axis": ["freq", "group", "time"]
        },
        "cmn_phi": {
            "data": cmn_phi,
            "axis": ["freq", "group", "time"]
        },
        "amp": {
            "data": damp,
            "axis": ["freq", "input", "time"]
        },
        "phi": {
            "data": dphi,
            "axis": ["freq", "input", "time"]
        },
        "std_amp": {
            "data": std_amp,
            "axis": ["freq", "input"]
        },
        "std_amp_by_pair": {
            "data": std_amp_by_diff,
            "axis": ["freq", "input", "pair"]
        },
        "mad_amp": {
            "data": mad_amp,
            "axis": ["freq", "input"]
        },
        "mad_amp_by_pair": {
            "data": mad_amp_by_diff,
            "axis": ["freq", "input", "pair"]
        },
        "std_phi": {
            "data": std_phi,
            "axis": ["freq", "input"]
        },
        "std_phi_by_pair": {
            "data": std_phi_by_diff,
            "axis": ["freq", "input", "pair"]
        },
        "mad_phi": {
            "data": mad_phi,
            "axis": ["freq", "input"]
        },
        "mad_phi_by_pair": {
            "data": mad_phi_by_diff,
            "axis": ["freq", "input", "pair"]
        },
        "tau": {
            "data": tau,
            "axis": ["input", "time"]
        },
        "flags/tau": {
            "data": tau_flag,
            "axis": ["input", "time"],
            "flag": True
        },
        "flags/tau_count": {
            "data": tau_count,
            "axis": ["input"],
            "flag": True
        },
        "flags/tau_stat": {
            "data": tau_stat_flag,
            "axis": ["input"],
            "flag": True
        },
        "flags/tau_count_by_pair": {
            "data": tau_count_by_diff,
            "axis": ["input", "pair"],
            "flag": True
        },
        "flags/tau_stat_by_pair": {
            "data": tau_stat_flag_by_diff,
            "axis": ["input", "pair"],
            "flag": True
        },
        "std_tau": {
            "data": std_tau,
            "axis": ["input"]
        },
        "std_tau_by_pair": {
            "data": std_tau_by_diff,
            "axis": ["input", "pair"]
        },
        "mad_tau": {
            "data": mad_tau,
            "axis": ["input"]
        },
        "mad_tau_by_pair": {
            "data": mad_tau_by_diff,
            "axis": ["input", "pair"]
        },
        "resid_phi": {
            "data": resid,
            "axis": ["freq", "input", "time"]
        },
        "std_resid_phi": {
            "data": std_resid,
            "axis": ["freq", "input"]
        },
        "std_resid_phi_by_pair": {
            "data": std_resid_by_diff,
            "axis": ["freq", "input", "pair"]
        },
        "mad_resid_phi": {
            "data": mad_resid,
            "axis": ["freq", "input"]
        },
        "mad_resid_phi_by_pair": {
            "data": mad_resid_by_diff,
            "axis": ["freq", "input", "pair"]
        },
    }

    ## Create the output container
    logger.info("Creating StabilityData container.")
    data = StabilityData()

    data.create_index_map(
        "div", np.array(["numerator", "denominator"], dtype=np.string_))
    data.create_index_map("pair", np.array(uniq_diff, dtype=np.string_))
    data.create_index_map("group", np.array(group_str, dtype=np.string_))

    data.create_index_map("freq", freq)
    data.create_index_map("input", inputs)
    data.create_index_map("time", timestamp[0, :])

    logger.info("Writing datsets to container.")
    for name, dct in res.iteritems():
        is_flag = dct.get('flag', False)
        if is_flag:
            dset = data.create_flag(name.split('/')[-1], data=dct['data'])
        else:
            dset = data.create_dataset(name, data=dct['data'])

        dset.attrs['axis'] = np.array(dct['axis'], dtype=np.string_)

    data.attrs['phase_ref'] = np.array(
        [iref for iref in index_phase_ref if iref is not None])

    # Determine the output filename and save results
    start_time, end_time = ephemeris.unix_to_datetime(
        np.percentile(timestamp, [0, 100]))
    tfmt = "%Y%m%d"
    night_str = 'night_' if not np.any(is_daytime) else ''
    output_file = os.path.join(
        config.output_dir, "%s_%s_%sraw_stability_data.h5" %
        (start_time.strftime(tfmt), end_time.strftime(tfmt), night_str))

    logger.info("Saving results to %s." % output_file)
    data.save(output_file)
Exemplo n.º 14
0
def main(config_file=None, logging_params=DEFAULT_LOGGING):

    # Setup logging
    log.setup_logging(logging_params)
    mlog = log.get_logger(__name__)

    # Set config
    config = DEFAULTS.deepcopy()
    if config_file is not None:
        config.merge(NameSpace(load_yaml_config(config_file)))

    # Create transit tracker
    source_list = FluxCatalog.sort(
    ) if not config.source_list else config.source_list

    cal_list = [
        name for name, obj in FluxCatalog.iteritems()
        if (obj.dec >= config.min_dec) and (
            obj.predict_flux(config.freq_nominal) >= config.min_flux) and (
                name in source_list)
    ]

    if not cal_list:
        raise RuntimeError("No calibrators found.")

    # Sort list by flux at nominal frequency
    cal_list.sort(
        key=lambda name: FluxCatalog[name].predict_flux(config.freq_nominal))

    # Add to transit tracker
    transit_tracker = containers.TransitTrackerOffline(
        nsigma=config.nsigma_source, extend_night=config.extend_night)
    for name in cal_list:
        transit_tracker[name] = FluxCatalog[name].skyfield

    mlog.info("Initializing offline point source processing.")

    search_time = config.start_time or 0

    # Find all calibration files
    all_files = sorted(
        glob.glob(
            os.path.join(config.acq_dir,
                         '*' + config.correlator + config.acq_suffix, '*.h5')))
    if not all_files:
        return

    # Remove files whose last modified time is before the time of the most recent update
    all_files = [
        ff for ff in all_files if (os.path.getmtime(ff) > search_time)
    ]
    if not all_files:
        return

    # Remove files that are currently locked
    all_files = [
        ff for ff in all_files
        if not os.path.isfile(os.path.splitext(ff)[0] + '.lock')
    ]
    if not all_files:
        return

    # Add files to transit tracker
    for ff in all_files:
        transit_tracker.add_file(ff)

    # Extract point source transits ready for analysis
    all_transits = transit_tracker.get_transits()

    # Create dictionary to hold results
    h5_psrc_fit = {}
    inputmap = None

    # Loop over transits
    for transit in all_transits:

        src, csd, is_day, files, start, stop = transit

        # Discard any point sources with unusual csd value
        if (csd < config.min_csd) or (csd > config.max_csd):
            continue

        # Discard any point sources transiting during the day
        if is_day > config.process_daytime:
            continue

        mlog.info(
            'Processing %s transit on CSD %d (%d files, %d time samples)' %
            (src, csd, len(files), stop - start + 1))

        # Load inputmap
        if inputmap is None:
            if config.inputmap is None:
                inputmap = tools.get_correlator_inputs(
                    ephemeris.unix_to_datetime(ephemeris.csd_to_unix(csd)),
                    correlator=config.correlator)
            else:
                with open(config.inputmap, 'r') as handler:
                    inputmap = pickle.load(handler)

        # Grab the timing correction for this transit
        tcorr = None
        if config.apply_timing:

            if config.timing_glob is not None:

                mlog.info(
                    "Loading timing correction from extended timing solutions."
                )

                timing_files = sorted(glob.glob(config.timing_glob))

                if timing_files:

                    try:
                        tcorr = search_extended_timing_solutions(
                            timing_files, ephemeris.csd_to_unix(csd))

                    except Exception as e:
                        mlog.error(
                            'search_extended_timing_solutions failed with error: %s'
                            % e)

                    else:
                        mlog.info(str(tcorr))

            if tcorr is None:

                mlog.info(
                    "Loading timing correction from chimetiming acquisitions.")

                try:
                    tcorr = timing.load_timing_correction(
                        files,
                        start=start,
                        stop=stop,
                        window=config.timing_window,
                        instrument=config.correlator)
                except Exception as e:
                    mlog.error(
                        'timing.load_timing_correction failed with error: %s' %
                        e)
                    mlog.warning(
                        'No timing correction applied to %s transit on CSD %d.'
                        % (src, csd))
                else:
                    mlog.info(str(tcorr))

        # Call the main routine to process data
        try:
            outdct = offline_cal.offline_point_source_calibration(
                files,
                src,
                start=start,
                stop=stop,
                inputmap=inputmap,
                tcorr=tcorr,
                logging_params=logging_params,
                **config.analysis.as_dict())

        except Exception as e:
            msg = 'offline_cal.offline_point_source_calibration failed with error:  %s' % e
            mlog.error(msg)
            continue
            #raise RuntimeError(msg)

        # Find existing gain files for this particular point source
        if src not in h5_psrc_fit:

            output_files = find_files(config, psrc=src)
            if output_files is not None:
                output_files = output_files[-1]
                mlog.info('Writing %s transit on CSD %d to existing file %s.' %
                          (src, csd, output_files))

            h5_psrc_fit[src] = containers.PointSourceWriter(
                src,
                output_file=output_files,
                output_dir=config.output_dir,
                output_suffix=point_source_name_to_file_suffix(src),
                instrument=config.correlator,
                max_file_size=config.max_file_size,
                max_num=config.max_num_time,
                memory_size=0)

        # Associate this gain calibration to the transit time
        this_time = ephemeris.transit_times(FluxCatalog[src].skyfield,
                                            ephemeris.csd_to_unix(csd))[0]

        outdct['csd'] = csd
        outdct['is_daytime'] = is_day
        outdct['acquisition'] = os.path.basename(os.path.dirname(files[0]))

        # Write to output file
        mlog.info('Writing to disk results from %s transit on CSD %d.' %
                  (src, csd))
        h5_psrc_fit[src].write(this_time, **outdct)

        # Dump an individual file for this point source transit
        mlog.info('Dumping to disk single file for %s transit on CSD %d.' %
                  (src, csd))
        dump_dir = os.path.join(config.output_dir, 'point_source_gains')
        containers.mkdir(dump_dir)

        dump_file = os.path.join(dump_dir, '%s_csd_%d.h5' % (src.lower(), csd))
        h5_psrc_fit[src].dump(dump_file,
                              datasets=[
                                  'csd', 'acquisition', 'is_daytime', 'gain',
                                  'weight', 'timing', 'model'
                              ])

        mlog.info('Finished analysis of %s transit on CSD %d.' % (src, csd))
Exemplo n.º 15
0
def main(config_file=None, logging_params=DEFAULT_LOGGING):

    # Setup logging
    log.setup_logging(logging_params)
    mlog = log.get_logger(__name__)

    # Set config
    config = DEFAULTS.deepcopy()
    if config_file is not None:
        config.merge(NameSpace(load_yaml_config(config_file)))

    # Set niceness
    current_niceness = os.nice(0)
    os.nice(config.niceness - current_niceness)
    mlog.info('Changing process niceness from %d to %d.  Confirm:  %d' %
                  (current_niceness, config.niceness, os.nice(0)))

    # Create output suffix
    output_suffix = config.output_suffix if config.output_suffix is not None else "jumps"

    # Calculate the wavelet transform for the following scales
    nwin = 2 * config.max_scale + 1
    nhwin = nwin // 2

    if config.log_scale:
        mlog.info("Using log scale.")
        scale = np.logspace(np.log10(config.min_scale), np.log10(nwin), num=config.num_points, dtype=np.int)
    else:
        mlog.info("Using linear scale.")
        scale = np.arange(config.min_scale, nwin, dtype=np.int)

    # Loop over acquisitions
    for acq in config.acq:

        # Find acquisition files
        all_data_files = sorted(glob(os.path.join(config.data_dir, acq, "*.h5")))
        nfiles = len(all_data_files)

        if nfiles == 0:
            continue

        mlog.info("Now processing acquisition %s (%d files)" % (acq, nfiles))

        # Determine list of feeds to examine
        dset = ['flags/inputs'] if config.use_input_flag else ()

        rdr = andata.CorrData.from_acq_h5(all_data_files, datasets=dset,
                                          apply_gain=False, renormalize=False)

        inputmap = tools.get_correlator_inputs(ephemeris.unix_to_datetime(rdr.time[0]),
                                               correlator='chime')

        # Extract good inputs
        if config.use_input_flag:
            ifeed = np.flatnonzero((np.sum(rdr.flags['inputs'][:], axis=-1, dtype=np.int) /
                                     float(rdr.flags['inputs'].shape[-1])) > config.input_threshold)
        else:
            ifeed = np.array([ii for ii, inp in enumerate(inputmap) if tools.is_chime(inp)])

        ninp = len(ifeed)

        mlog.info("Processing %d feeds." % ninp)

        # Create list of candidates
        cfreq, cinput, ctime, cindex = [], [], [], []
        jump_flag, jump_time, jump_auto = [], [], []
        ncandidate = 0

        # Determine number of files to process at once
        if config.max_num_file is None:
            chunk_size = nfiles
        else:
            chunk_size = min(config.max_num_file, nfiles)

        # Loop over chunks of files
        for chnk, data_files in enumerate(chunks(all_data_files, chunk_size)):

            mlog.info("Now processing chunk %d (%d files)" % (chnk, len(data_files)))

            # Deteremine selections along the various axes
            rdr = andata.CorrData.from_acq_h5(data_files, datasets=())

            auto_sel = np.array([ii for ii, pp in enumerate(rdr.prod) if pp[0] == pp[1]])
            auto_sel = andata._convert_to_slice(auto_sel)

            if config.time_start is None:
                ind_start = 0
            else:
                time_start = ephemeris.datetime_to_unix(datetime.datetime(*config.time_start))
                ind_start = int(np.argmin(np.abs(rdr.time - time_start)))

            if config.time_stop is None:
                ind_stop = rdr.ntime
            else:
                time_stop = ephemeris.datetime_to_unix(datetime.datetime(*config.time_stop))
                ind_stop = int(np.argmin(np.abs(rdr.time - time_stop)))

            if config.freq_physical is not None:

                if hasattr(config.freq_physical, '__iter__'):
                    freq_physical = config.freq_physical
                else:
                    freq_physical = [config.freq_physical]

                freq_sel = [np.argmin(np.abs(ff - rdr.freq)) for ff in freq_physical]
                freq_sel = andata._convert_to_slice(freq_sel)

            else:
                fstart = config.freq_start if config.freq_start is not None else 0
                fstop = config.freq_stop if config.freq_stop is not None else rdr.freq.size
                freq_sel = slice(fstart, fstop)

            # Load autocorrelations
            t0 = time.time()
            data = andata.CorrData.from_acq_h5(data_files, datasets=['vis'], start=ind_start, stop=ind_stop,
                                                           freq_sel=freq_sel, prod_sel=auto_sel,
                                                           apply_gain=False, renormalize=False)

            mlog.info("Took %0.1f seconds to load autocorrelations." % (time.time() - t0,))

            # If first chunk, save the frequencies that are being used
            if not chnk:
                all_freq = data.freq.copy()

            # If requested do not consider data during day or near bright source transits
            flag_quiet = np.ones(data.ntime, dtype=np.bool)
            if config.ignore_sun:
                flag_quiet &= ~transit_flag('sun', data.time, freq=np.min(data.freq), pol='X', nsig=1.0)

            if config.only_quiet:
                flag_quiet &= ~daytime_flag(data.time)
                for ss in ["CYG_A", "CAS_A", "TAU_A", "VIR_A"]:
                    flag_quiet &= ~transit_flag(ss, data.time, freq=np.min(data.freq), pol='X', nsig=1.0)

            # Loop over frequencies
            for ff, freq in enumerate(data.freq):

                print_cnt = 0
                mlog.info("FREQ %d (%0.2f MHz)" % (ff, freq))

                auto = data.vis[ff, :, :].real

                fractional_auto = auto * tools.invert_no_zero(np.median(auto, axis=-1, keepdims=True)) - 1.0

                # Loop over inputs
                for ii in ifeed:

                    print_cnt += 1
                    do_print = not (print_cnt % 100)

                    if do_print:
                        mlog.info("INPUT %d" % ii)
                    t0 = time.time()

                    signal = fractional_auto[ii, :]

                    # Perform wavelet transform
                    coef, freqs = pywt.cwt(signal, scale, config.wavelet_name)

                    if do_print:
                        mlog.info("Took %0.1f seconds to perform wavelet transform." % (time.time() - t0,))
                    t0 = time.time()

                    # Find local modulus maxima
                    flg_mod_max, mod_max = mod_max_finder(scale, coef, threshold=config.thresh, search_span=config.search_span)

                    if do_print:
                        mlog.info("Took %0.1f seconds to find modulus maxima." % (time.time() - t0,))
                    t0 = time.time()

                    # Find persisent modulus maxima across scales
                    candidates, cmm, pdrift, start, stop, lbl = finger_finder(scale, flg_mod_max, mod_max,
                                                                              istart=max(config.min_rise - config.min_scale, 0),
                                                                              do_fill=False)

                    if do_print:
                        mlog.info("Took %0.1f seconds to find fingers." % (time.time() - t0,))
                    t0 = time.time()

                    if candidates is None:
                        continue

                    # Cut bad candidates
                    index_good_candidates = np.flatnonzero((scale[stop] >= config.max_scale) &
                                                            flag_quiet[candidates[start, np.arange(start.size)]] &
                                                            (pdrift <= config.psigma_max))

                    ngood = index_good_candidates.size

                    if ngood == 0:
                        continue

                    mlog.info("Input %d has %d jumps" % (ii, ngood))

                    # Add remaining candidates to list
                    ncandidate += ngood

                    cfreq += [freq] * ngood
                    cinput += [ii] * ngood

                    for igc in index_good_candidates:

                        icenter = candidates[start[igc], igc]

                        cindex.append(icenter)
                        ctime.append(data.time[icenter])

                        aa = max(0, icenter - nhwin)
                        bb = min(data.ntime, icenter + nhwin + 1)

                        ncut = bb - aa

                        temp_var = np.zeros(nwin, dtype=np.bool)
                        temp_var[0:ncut] = True
                        jump_flag.append(temp_var)

                        temp_var = np.zeros(nwin, dtype=data.time.dtype)
                        temp_var[0:ncut] = data.time[aa:bb].copy()
                        jump_time.append(temp_var)

                        temp_var = np.zeros(nwin, dtype=auto.dtype)
                        temp_var[0:ncut] = auto[ii, aa:bb].copy()
                        jump_auto.append(temp_var)


            # Garbage collect
            del data
            gc.collect()

        # If we found any jumps, write them to a file.
        if ncandidate > 0:

            output_file = os.path.join(config.output_dir, "%s_%s.h5" % (acq, output_suffix))

            mlog.info("Writing %d jumps to: %s" % (ncandidate, output_file))

            # Write to output file
            with h5py.File(output_file, 'w') as handler:

                handler.attrs['files'] = all_data_files
                handler.attrs['chan_id'] = ifeed
                handler.attrs['freq'] = all_freq

                index_map = handler.create_group('index_map')
                index_map.create_dataset('jump', data=np.arange(ncandidate))
                index_map.create_dataset('window', data=np.arange(nwin))

                ax = np.array(['jump'])

                dset = handler.create_dataset('freq', data=np.array(cfreq))
                dset.attrs['axis'] = ax

                dset = handler.create_dataset('input', data=np.array(cinput))
                dset.attrs['axis'] = ax

                dset = handler.create_dataset('time', data=np.array(ctime))
                dset.attrs['axis'] = ax

                dset = handler.create_dataset('time_index', data=np.array(cindex))
                dset.attrs['axis'] = ax


                ax = np.array(['jump', 'window'])

                dset = handler.create_dataset('jump_flag', data=np.array(jump_flag))
                dset.attrs['axis'] = ax

                dset = handler.create_dataset('jump_time', data=np.array(jump_time))
                dset.attrs['axis'] = ax

                dset = handler.create_dataset('jump_auto', data=np.array(jump_auto))
                dset.attrs['axis'] = ax

        else:
            mlog.info("No jumps found for %s acquisition." % acq)