Exemplo n.º 1
0
def search_coarse_channel(data_dict,
                          find_doppler_instance,
                          logwriter=None,
                          filewriter=None):
    """ Run a turboseti search on a single coarse channel.

    This function is separate from the FindDoppler class to allow parallelization. This should not be called
    directly, but rather via the `FindDoppler.search()` or `FindDoppler.search_dask()` routines.

    Args:
        data_dict (dict): File's waterfall data handler.
                        Required keys: {'filename','f_start', 'f_stop', 'coarse_chan', 'n_coarse_chan'}
        find_doppler_instance (FindDoppler): Instance of FindDoppler class (needed to access search params)
        logwriter (LogWriter): A LogWriter to write log output into. If None, one will be created.
        filewriter (FileWriter): A FileWriter to use to write the dat file. If None, one will be created.
    Returns:
        Success (bool): Returns True if successful (needed for dask).
    """
    d = data_dict
    fd = find_doppler_instance

    out_dir = fd.out_dir
    filename_in = fd.data_handle.filename
    header_in = fd.data_handle.header
    min_drift = fd.min_drift
    max_drift = fd.max_drift
    snr = fd.snr
    obs_info = fd.obs_info
    flagging = fd.flagging

    #logger.info("Start searching for coarse channel: %s" % d['coarse_chan'])
    data_obj = DATAH5(d['filename'],
                      f_start=d['f_start'],
                      f_stop=d['f_stop'],
                      coarse_chan=d['coarse_chan'],
                      n_coarse_chan=d['n_coarse_chan'])

    fileroot_out = filename_in.split('/')[-1].replace('.h5', '').replace(
        '.fits', '').replace('.fil', '')
    if logwriter is None:
        logwriter = LogWriter(
            '%s/%s_%i.log' %
            (out_dir.rstrip('/'), fileroot_out, d['coarse_chan']))
    if filewriter is None:
        filewriter = FileWriter(
            '%s/%s_%i.dat' %
            (out_dir.rstrip('/'), fileroot_out, d['coarse_chan']), header_in)

    spectra, drift_indices = data_obj.load_data()
    tsteps = data_obj.tsteps
    tsteps_valid = data_obj.tsteps_valid
    tdwidth = data_obj.tdwidth
    fftlen = data_obj.fftlen
    nframes = tsteps_valid
    shoulder_size = data_obj.shoulder_size

    if flagging:
        ##EE This flags the edges of the PFF for BL data (with 3Hz res per channel).
        ##EE The PFF flat profile falls after around 100k channels.
        ##EE But it falls slowly enough that could use 50-80k channels.
        median_flag = np.median(spectra)
        #             spectra[:,:80000] = median_flag/float(tsteps)
        #             spectra[:,-80000:] = median_flag/float(tsteps)

        ##EE Flagging spikes in time series.
        time_series = spectra.sum(axis=1)
        time_series_median = np.median(time_series)

        # Flagging spikes > 10 in SNR
        mask = (time_series - time_series_median) / time_series.std() > 10
        if mask.any():
            logwriter.info("Found spikes in the time series. Removing ...")
            spectra[mask, :] = time_series_median / float(
                fftlen
            )  # So that the value is not the median in the time_series.

    else:
        median_flag = np.array([0])

    # allocate array for findopplering
    # init findopplering array to zero
    tree_findoppler = np.zeros(tsteps * tdwidth,
                               dtype=np.float64) + median_flag

    # allocate array for holding original
    # Allocates array in a fast way (without initialize)
    tree_findoppler_original = np.empty_like(tree_findoppler)

    # allocate array for negative doppler rates
    tree_findoppler_flip = np.empty_like(tree_findoppler)

    # build index mask for in-place tree doppler correction
    ibrev = np.zeros(tsteps, dtype=np.int32)

    for i in range(0, tsteps):
        ibrev[i] = bitrev(i, int(np.log2(tsteps)))

    ##EE: should double check if tdwidth is really better than fftlen here.
    max_val = max_vals()
    if max_val.maxsnr is None:
        max_val.maxsnr = np.zeros(tdwidth, dtype=np.float64)
    if max_val.maxdrift is None:
        max_val.maxdrift = np.zeros(tdwidth, dtype=np.float64)
    if max_val.maxsmooth is None:
        max_val.maxsmooth = np.zeros(tdwidth, dtype='uint8')
    if max_val.maxid is None:
        max_val.maxid = np.zeros(tdwidth, dtype='uint32')
    if max_val.total_n_hits is None:
        max_val.total_n_hits = 0

    # EE: Making "shoulders" to avoid "edge effects". Could do further testing.
    specstart = int(tsteps * shoulder_size / 2)
    specend = tdwidth - (tsteps * shoulder_size)

    # --------------------------------
    # Stats calc
    the_mean_val, the_stddev = comp_stats(spectra.sum(axis=0))

    # --------------------------------
    # Looping over drift_rate_nblock
    # --------------------------------
    drift_rate_nblock = int(
        np.floor(max_drift / (data_obj.drift_rate_resolution * tsteps_valid)))

    ##EE-debuging        kk = 0

    for drift_block in range(-1 * drift_rate_nblock, drift_rate_nblock + 1):
        logger.debug("Drift_block %i" % drift_block)

        # ----------------------------------------------------------------------
        # Negative drift rates search.
        # ----------------------------------------------------------------------
        if drift_block <= 0:

            # Populates the find_doppler tree with the spectra
            populate_tree(spectra,
                          tree_findoppler,
                          nframes,
                          tdwidth,
                          tsteps,
                          fftlen,
                          shoulder_size,
                          roll=drift_block,
                          reverse=1)

            # populate original array
            np.copyto(tree_findoppler_original, tree_findoppler)

            # populate neg doppler array
            np.copyto(tree_findoppler_flip, tree_findoppler_original)

            # Flip matrix across X dimension to search negative doppler drift rates
            FlipX(tree_findoppler_flip, tdwidth, tsteps)
            logger.info("Doppler correcting reverse...")
            tt.taylor_flt(tree_findoppler_flip, tsteps * tdwidth, tsteps)
            logger.debug("done...")

            complete_drift_range = data_obj.drift_rate_resolution * np.array(
                range(-1 * tsteps_valid *
                      (np.abs(drift_block) + 1) + 1, -1 * tsteps_valid *
                      (np.abs(drift_block)) + 1))
            for k, drift_rate in enumerate(complete_drift_range[
                (complete_drift_range < min_drift)
                    & (complete_drift_range >= -1 * max_drift)]):
                # indx  = ibrev[drift_indices[::-1][k]] * tdwidth

                # DCP 2020.04 -- WAR to drift rate in flipped files
                if data_obj.header['DELTAF'] < 0:
                    drift_rate *= -1

                indx = ibrev[drift_indices[::-1][
                    (complete_drift_range < min_drift)
                    & (complete_drift_range >= -1 * max_drift)][k]] * tdwidth

                # SEARCH NEGATIVE DRIFT RATES
                spectrum = tree_findoppler_flip[indx:indx + tdwidth]

                # normalize
                spectrum -= the_mean_val
                spectrum /= the_stddev

                # Reverse spectrum back
                spectrum = spectrum[::-1]

                n_hits, max_val = hitsearch(spectrum, specstart, specend, snr,
                                            drift_rate, data_obj.header,
                                            tdwidth, max_val, 0)
                info_str = "Found %d hits at drift rate %15.15f\n" % (
                    n_hits, drift_rate)
                max_val.total_n_hits += n_hits
                logger.debug(info_str)
                logwriter.info(info_str)

        # ----------------------------------------------------------------------
        # Positive drift rates search.
        # ----------------------------------------------------------------------
        if drift_block >= 0:

            # Populates the find_doppler tree with the spectra
            populate_tree(spectra,
                          tree_findoppler,
                          nframes,
                          tdwidth,
                          tsteps,
                          fftlen,
                          shoulder_size,
                          roll=drift_block,
                          reverse=1)

            # populate original array
            np.copyto(tree_findoppler_original, tree_findoppler)

            logger.info("Doppler correcting forward...")
            tt.taylor_flt(tree_findoppler, tsteps * tdwidth, tsteps)
            logger.debug("done...")
            if (tree_findoppler == tree_findoppler_original).all():
                logger.error("taylor_flt has no effect?")
            else:
                logger.debug("tree_findoppler changed")

            ##EE: Calculates the range of drift rates for a full drift block.
            complete_drift_range = data_obj.drift_rate_resolution * np.array(
                range(tsteps_valid *
                      (drift_block), tsteps_valid * (drift_block + 1)))

            for k, drift_rate in enumerate(complete_drift_range[
                (complete_drift_range >= min_drift)
                    & (complete_drift_range <= max_drift)]):

                indx = ibrev[drift_indices[k]] * tdwidth

                # DCP 2020.04 -- WAR to drift rate in flipped files
                if data_obj.header['DELTAF'] < 0:
                    drift_rate *= -1

                # SEARCH POSITIVE DRIFT RATES
                spectrum = tree_findoppler[indx:indx + tdwidth]

                # normalize
                spectrum -= the_mean_val
                spectrum /= the_stddev

                n_hits, max_val = hitsearch(spectrum, specstart, specend, snr,
                                            drift_rate, data_obj.header,
                                            tdwidth, max_val, 0)
                info_str = "Found %d hits at drift rate %15.15f\n" % (
                    n_hits, drift_rate)
                max_val.total_n_hits += n_hits
                logger.debug(info_str)
                logwriter.info(info_str)

    # Writing the top hits to file.
    filewriter = tophitsearch(tree_findoppler_original,
                              max_val,
                              tsteps,
                              data_obj.header,
                              tdwidth,
                              fftlen,
                              max_drift,
                              data_obj.obs_length,
                              logwriter=logwriter,
                              filewriter=filewriter,
                              obs_info=obs_info)

    logger.info("Total number of candidates for coarse channel " +
                str(data_obj.header['coarse_chan']) +
                " is: %i" % max_val.total_n_hits)
    data_obj.close()
    filewriter.close()
    return True
Exemplo n.º 2
0
    def search_data(self, data_obj):
        '''
        '''

        try:
            logger.info("Start searching for coarse channel: %s"%data_obj.header[u'coarse_chan'])
            self.logwriter.info("Start searching for %s ; coarse channel: %i "%(data_obj.filename,data_obj.header[u'coarse_chan']))
        except:
            logger.info("Start searching for coarse channel: %s"%data_obj.header[b'coarse_chan'])
            self.logwriter.info("Start searching for %s ; coarse channel: %i "%(data_obj.filename,data_obj.header[b'coarse_chan'])) 
        spectra, drift_indices = data_obj.load_data()
        tsteps = data_obj.tsteps
        tsteps_valid = data_obj.tsteps_valid
        tdwidth = data_obj.tdwidth
        fftlen = data_obj.fftlen
        nframes = tsteps_valid
        shoulder_size = data_obj.shoulder_size

        if self.flagging:
            ##EE This flags the edges of the PFF for BL data (with 3Hz res per channel).
            ##EE The PFF flat profile falls after around 100k channels.
            ##EE But it falls slowly enough that could use 50-80k channels.
            median_flag = np.median(spectra)
#             spectra[:,:80000] = median_flag/float(tsteps)
#             spectra[:,-80000:] = median_flag/float(tsteps)

            ##EE Flagging spikes in time series.
            time_series=spectra.sum(axis=1)
            time_series_median = np.median(time_series)
            mask=(time_series-time_series_median)/time_series.std() > 10   #Flagging spikes > 10 in SNR

            if mask.any():
                self.logwriter.info("Found spikes in the time series. Removing ...")
                spectra[mask,:] = time_series_median/float(fftlen)  # So that the value is not the median in the time_series.

        else:
            median_flag = np.array([0])

        # allocate array for findopplering
        # init findopplering array to zero
        tree_findoppler = np.zeros(tsteps * tdwidth,dtype=np.float64) + median_flag

        # allocate array for holding original
        # Allocates array in a fast way (without initialize)
        tree_findoppler_original = np.empty_like(tree_findoppler)

        #/* allocate array for negative doppler rates */
        tree_findoppler_flip = np.empty_like(tree_findoppler)

        #/* build index mask for in-place tree doppler correction */
        ibrev = np.zeros(tsteps, dtype=np.int32)

        for i in range(0, tsteps):
            ibrev[i] = bitrev(i, int(np.log2(tsteps)))

##EE: should double check if tdwidth is really better than fftlen here.
        max_val = max_vals()
        if max_val.maxsnr == None:
            max_val.maxsnr = np.zeros(tdwidth, dtype=np.float64)
        if max_val.maxdrift == None:
            max_val.maxdrift = np.zeros(tdwidth, dtype=np.float64)
        if max_val.maxsmooth == None:
            max_val.maxsmooth = np.zeros(tdwidth, dtype='uint8')
        if max_val.maxid == None:
            max_val.maxid = np.zeros(tdwidth, dtype='uint32')
        if max_val.total_n_hits == None:
            max_val.total_n_hits = 0

##EE-debuging
#         hist_val = hist_vals()
#         hist_len = int(np.ceil(2*(self.max_drift-self.min_drift)/data_obj.drift_rate_resolution))
#         if hist_val.histsnr == None:
#             hist_val.histsnr = np.zeros((hist_len,tdwidth), dtype=np.float64)
#         if hist_val.histdrift == None:
#             hist_val.histdrift = np.zeros((hist_len), dtype=np.float64)
#         if hist_val.histid == None:
#             hist_val.histid = np.zeros(tdwidth, dtype='uint32')

        #EE: Making "shoulders" to avoid "edge effects". Could do further testing.
        specstart = int(tsteps*shoulder_size/2)
        specend = tdwidth - (tsteps * shoulder_size)

        #--------------------------------
        #Stats calc
        self.the_mean_val, self.the_stddev = comp_stats(spectra.sum(axis=0))

        #--------------------------------
        #Looping over drift_rate_nblock
        #--------------------------------
        drift_rate_nblock = int(np.floor(self.max_drift / (data_obj.drift_rate_resolution*tsteps_valid)))

##EE-debuging        kk = 0

        for drift_block in range(-1*drift_rate_nblock,drift_rate_nblock+1):
            logger.debug( "Drift_block %i"%drift_block)

            #----------------------------------------------------------------------
            # Negative drift rates search.
            #----------------------------------------------------------------------
            if drift_block <= 0:

                #Populates the findoppler tree with the spectra
                populate_tree(spectra,tree_findoppler,nframes,tdwidth,tsteps,fftlen,shoulder_size,roll=drift_block,reverse=1)

                #/* populate original array */
                np.copyto(tree_findoppler_original, tree_findoppler)

                #/* populate neg doppler array */
                np.copyto(tree_findoppler_flip, tree_findoppler_original)
                
                #/* Flip matrix across X dimension to search negative doppler drift rates */
                FlipX(tree_findoppler_flip, tdwidth, tsteps)
                logger.info("Doppler correcting reverse...")
                tt.taylor_flt(tree_findoppler_flip, tsteps * tdwidth, tsteps)
                logger.debug( "done...")
                
                complete_drift_range = data_obj.drift_rate_resolution*np.array(range(-1*tsteps_valid*(np.abs(drift_block)+1)+1,-1*tsteps_valid*(np.abs(drift_block))+1))
                for k,drift_rate in enumerate(complete_drift_range[(complete_drift_range<self.min_drift) & (complete_drift_range>=-1*self.max_drift)]):
                    # indx  = ibrev[drift_indices[::-1][k]] * tdwidth
                    indx  = ibrev[drift_indices[::-1][(complete_drift_range<self.min_drift) & (complete_drift_range>=-1*self.max_drift)][k]] * tdwidth

                    #/* SEARCH NEGATIVE DRIFT RATES */
                    spectrum = tree_findoppler_flip[indx: indx + tdwidth]

                    #/* normalize */
                    spectrum -= self.the_mean_val
                    spectrum /= self.the_stddev

                    #Reverse spectrum back
                    spectrum = spectrum[::-1]

##EE old wrong use of reverse            n_hits, max_val = hitsearch(spectrum, specstart, specend, self.snr, drift_rate, data_obj.header, fftlen, tdwidth, channel, max_val, 1)
                    n_hits, max_val = hitsearch(spectrum, specstart, specend, self.snr, drift_rate, data_obj.header, fftlen, tdwidth, max_val, 0)
                    info_str = "Found %d hits at drift rate %15.15f\n"%(n_hits, drift_rate)
                    max_val.total_n_hits += n_hits
                    logger.debug(info_str)
                    self.logwriter.info(info_str)

##EE-debuging                    np.save(self.out_dir + '/spectrum_dr%f.npy'%(drift_rate),spectrum)

##EE-debuging                    hist_val.histsnr[kk] = spectrum
##EE-debuging                    hist_val.histdrift[kk] = drift_rate
##EE-debuging                    kk+=1

            #----------------------------------------------------------------------
            # Positive drift rates search.
            #----------------------------------------------------------------------
            if drift_block >= 0:

                #Populates the findoppler tree with the spectra
                populate_tree(spectra,tree_findoppler,nframes,tdwidth,tsteps,fftlen,shoulder_size,roll=drift_block,reverse=1)

                #/* populate original array */
                np.copyto(tree_findoppler_original, tree_findoppler)

                logger.info("Doppler correcting forward...")
                tt.taylor_flt(tree_findoppler, tsteps * tdwidth, tsteps)
                logger.debug( "done...")
                if (tree_findoppler == tree_findoppler_original).all():
                     logger.error("taylor_flt has no effect?")
                else:
                     logger.debug("tree_findoppler changed")

                ##EE: Calculates the range of drift rates for a full drift block.
                complete_drift_range = data_obj.drift_rate_resolution*np.array(range(tsteps_valid*(drift_block),tsteps_valid*(drift_block +1)))

                for k,drift_rate in enumerate(complete_drift_range[(complete_drift_range>=self.min_drift) & (complete_drift_range<=self.max_drift)]):

                    indx  = ibrev[drift_indices[k]] * tdwidth
                    #/* SEARCH POSITIVE DRIFT RATES */
                    spectrum = tree_findoppler[indx: indx+tdwidth]

                    #/* normalize */
                    spectrum -= self.the_mean_val
                    spectrum /= self.the_stddev

                    n_hits, max_val = hitsearch(spectrum, specstart, specend, self.snr, drift_rate, data_obj.header, fftlen, tdwidth, max_val, 0)
                    info_str = "Found %d hits at drift rate %15.15f\n"%(n_hits, drift_rate)
                    max_val.total_n_hits += n_hits
                    logger.debug(info_str)
                    self.logwriter.info(info_str)

                    #-------

##EE-debuging                    np.save(self.out_dir + '/spectrum_dr%f.npy'%(drift_rate),spectrum)

##EE-debuging                    hist_val.histsnr[kk] = spectrum
##EE-debuging                    hist_val.histdrift[kk] = drift_rate
##EE-debuging                    kk+=1
        #-------
##EE-debuging        np.save(self.out_dir + '/histsnr.npy', hist_val.histsnr)
##EE-debuging        np.save(self.out_dir + '/histdrift.npy', hist_val.histdrift)

        #----------------------------------------
        # Writing the top hits to file.

#         self.filewriter.report_coarse_channel(data_obj.header,max_val.total_n_hits)
        self.filewriter = tophitsearch(tree_findoppler_original, max_val, tsteps, nframes, data_obj.header, tdwidth, fftlen, self.max_drift,data_obj.obs_length, out_dir = self.out_dir, logwriter=self.logwriter, filewriter=self.filewriter, obs_info = self.obs_info)
        try:
            logger.info("Total number of candidates for coarse channel "+ str(data_obj.header[u'coarse_chan']) +" is: %i"%max_val.total_n_candi)
        except:
            logger.info("Total number of candidates for coarse channel "+ str(data_obj.header[b'coarse_chan']) +" is: %i"%max_val.total_n_candi)