Exemplo n.º 1
0
def myexecute(hotpotato):
    print('Reading header of file: %s' % (hotpotato['fil_file']))
    hdr = Header(hotpotato['DATA_DIR'] + '/' + hotpotato['fil_file'],
                 file_type='filterbank')  # Returns a Header object
    tot_time_samples = hdr.ntsamples  # Total no. of time samples in entire dynamic spectrum.
    t_samp = hdr.t_samp  # Sampling time (s)
    chan_bw = hdr.chan_bw  # Channel bandwidth (MHz)
    nchans = hdr.nchans  # No. of channels
    npol = hdr.npol  # No. of polarizations
    n_bytes = hdr.primary['nbits'] / 8.0  # No. of bytes per data sample
    hdr_size = hdr.primary['hdr_size']  # Header size (bytes)
    times = np.arange(tot_time_samples) * t_samp  # 1D array of times (s)
    # Set up frequency array. Frequencies in GHz.
    freqs_GHz = (hdr.fch1 + np.arange(nchans) * chan_bw) * 1e-3
    print(hdr)

    # Slice time axis according to the start and end times specified.
    ind_time_low = np.where(times <= hotpotato['start_time'])[0][-1]
    ind_time_high = np.where(times <= hotpotato['end_time'])[0][-1] + 1
    times = times[ind_time_low:ind_time_high]

    # Read in a chunk of filterbank data.
    print('Reading in data.')
    f = open(hotpotato['DATA_DIR'] + '/' + hotpotato['fil_file'], 'rb')
    current_cursor_position = f.tell()
    data = load_fil_data(f, ind_time_low, ind_time_high, npol, nchans, n_bytes,
                         hdr_size, hotpotato['pol'], current_cursor_position)

    # Flip frequency axis if chan_bw<0.
    if (chan_bw < 0):
        print('Channel bandwidth is negative.')
        print('Flipping frequency axis of DS')
        data = np.flip(data, axis=0)
        freqs_GHz = np.flip(freqs_GHz)
        print('Frequencies rearranged in ascending order.')

    # Load/compute median bandpass.
    if hotpotato['bandpass_method'] == 'file':
        print('Loading median bandpass from %s' % (hotpotato['bandpass_npz']))
        median_bp = np.load(hotpotato['BANDPASS_DIR'] + '/' +
                            hotpotato['bandpass_npz'],
                            allow_pickle=True)['Median bandpass']
        print('Median bandpass loaded.')
    elif hotpotato['bandpass_method'] == 'compute':
        print('Computing median bandpass')
        median_bp = calc_median_bandpass(data)
    else:
        print('Unrecognized bandpass computation method. Quitting program..')
        sys.exit(1)

    # Chop bandpass edges.
    ind_band_low = np.where(freqs_GHz >= hotpotato['freq_band_low'])[0][0]
    ind_band_high = np.where(
        freqs_GHz <= hotpotato['freq_band_high'])[0][-1] + 1
    # Clip bandpass edges.
    freqs_GHz = freqs_GHz[ind_band_low:ind_band_high]
    data = data[ind_band_low:ind_band_high]
    median_bp = median_bp[ind_band_low:ind_band_high]
    print('Bandpass edges clipped.')

    # Correct bandpass shape.
    print('Correcting data for bandpass shape')
    if 0 in median_bp:
        print('Replacing zeros in bandpass shape with median values')
        indices_zero_bp = np.where(median_bp == 0)[0]
        replace_value = np.median(median_bp[np.where(median_bp != 0)[0]])
        median_bp[indices_zero_bp] = replace_value
        data[indices_zero_bp] = replace_value
    data = correct_bandpass(data, median_bp)

    # Read and apply rfifind mask.
    if hotpotato['apply_rfimask']:
        print('Reading rfifind mask %s' % (hotpotato['rfimask']))
        nint, int_times, ptsperint, mask_zap_chans, mask_zap_ints, mask_zap_chans_per_int = read_rfimask(
            hotpotato['RFIMASK_DIR'] + '/' + hotpotato['rfimask'])
        mask_zap_chans, mask_zap_chans_per_int = modify_zapchans_bandpass(
            mask_zap_chans, mask_zap_chans_per_int, ind_band_low,
            ind_band_high)
        idx1 = np.where(int_times <= times[0])[0][-1]
        idx2 = np.where(int_times <= times[-1])[0][-1] + 1
        nint = idx2 - idx1
        int_times = int_times[idx1:idx2]
        mask_zap_chans_per_int = mask_zap_chans_per_int[idx1:idx2]
        mask_zap_ints = mask_zap_ints[np.where(
            np.logical_and(mask_zap_ints >= times[0],
                           mask_zap_ints <= times[-1]))[0]]
        # Apply rfifind mask on data.
        boolean_rfimask = np.zeros(data.shape, dtype=bool)
        for i in range(nint):
            if i == nint - 1:
                tstop_int = len(times)
            else:
                tstop_int = np.min(np.where(times >= int_times[i + 1])[0])
            tstart_int = np.min(np.where(times >= int_times[i])[0])
            boolean_rfimask[mask_zap_chans_per_int[i],
                            tstart_int:tstop_int] = True
        print('Applying RFI mask on data')
        data = np.ma.MaskedArray(data, mask=boolean_rfimask)
        # Replaced masked entries with mean value.
        print('Replacing masked entries with mean values')
        data = np.ma.filled(data, fill_value=np.nanmean(data))
        # Set up list of channels to mask in downsampled data.
        mask_zap_check = list(
            np.sort(mask_zap_chans) // hotpotato['kernel_size_freq_chans'])
        mask_chans = np.array([
            chan for chan in np.unique(mask_zap_check) if mask_zap_check.count(
                chan) == hotpotato['kernel_size_freq_chans']
        ])
    else:
        mask_chans = None

    # Remove zerodm signal.
    if hotpotato['remove_zerodm']:
        data = remove_additive_time_noise(data)[0]

    # Smooth and/or downsample the data.
    data, freqs_GHz, times = smooth_master(
        data, hotpotato['smoothing_method'], hotpotato['convolution_method'],
        hotpotato['kernel_size_freq_chans'],
        hotpotato['kernel_size_time_samples'], freqs_GHz, times)
    if hotpotato['smoothing_method'] != 'Blockavg2D':
        data, freqs_GHz, times = smooth_master(
            data, 'Blockavg2D', hotpotato['convolution_method'],
            hotpotato['kernel_size_freq_chans'],
            hotpotato['kernel_size_time_samples'], freqs_GHz, times)

    # Remove residual spectral trend.
    print('Removing residual spectral trend')
    data = data - np.median(data, axis=1)[:, None]

    # Remove any residual temporal trend.
    if hotpotato['remove_zerodm']:
        data = data - np.median(data, axis=0)[None, :]
        print('Zerodm removal completed.')

    if mask_chans is not None:
        data[mask_chans] = 0.0

    # Clip off masked channels at edges of the frequency band.
    if mask_chans is not None:
        # Lowest channel not to be masked.
        low_ch_index = 0
        while low_ch_index + 1 in mask_chans:
            low_ch_index += 1
        # Highest channel not to be masked.
        high_ch_index = len(freqs_GHz) - 1
        while high_ch_index in mask_chans:
            high_ch_index -= 1
        freqs_GHz = freqs_GHz[low_ch_index:high_ch_index + 1]
        data = data[low_ch_index:high_ch_index + 1]
        # Modify channel mask to reflect properties of updated data range.
        mask_chans = np.delete(mask_chans, np.where(mask_chans < low_ch_index))
        mask_chans = np.delete(mask_chans,
                               np.where(mask_chans > high_ch_index))
        mask_chans = np.array(mask_chans - low_ch_index, dtype=int)

    # Produce imshow plot of data.
    if not os.path.isdir(hotpotato['OUTPUT_DIR']):
        os.makedirs(hotpotato['OUTPUT_DIR'])

    plot_ds(data,
            times,
            freqs_GHz,
            hotpotato['OUTPUT_DIR'] + '/' + hotpotato['basename'],
            show_plot=hotpotato['show_plot'],
            time_unit='s',
            freq_unit='GHz',
            flux_unit='arbitrary units',
            vmin=np.mean(data) - 2 * np.std(data),
            vmax=np.mean(data) + 5 * np.std(data),
            log_colorbar=False,
            cmap=hotpotato['cmap'],
            mask_chans=mask_chans)

    # Update header to reflect data properties.
    hdr.primary.pop('hdr_size', None)
    hdr.primary['fch1'] = freqs_GHz[0] * 1e3
    hdr.primary['foff'] = (freqs_GHz[1] - freqs_GHz[0]) * 1e3
    hdr.primary['nchans'] = len(freqs_GHz)
    hdr.primary['nifs'] = 1
    hdr.primary['tsamp'] = times[1] - times[0]
    hdr.primary['nbits'] = 32  # Cast data to np.float32 type.

    # Write data to either .npz file or a filterbank file.
    if hotpotato['do_write']:
        if hotpotato['write_format'] == 'npz':
            write_npz(data, freqs_GHz, times, mask_chans, hotpotato)
        elif hotpotato['write_format'] == 'fil' or hotpotato[
                'write_format'] == 'filterbank':
            write_fil(data, times, freqs_GHz, hdr, hotpotato)
        else:
            print(
                'File write format not recognized. Terminating program execution.'
            )

    return data, freqs_GHz, times
Exemplo n.º 2
0
def myexecute(cand_index, cand_DMs, cand_sigma, cand_dedisp_times, downfact,
              metadata, int_times, mask_zap_chans, mask_zap_chans_per_int,
              freqs_GHz, tot_time_samples, t_samp, chan_bw, npol, nchans,
              n_bytes, hdr_size, hotpotato, f, rank):
    print('RANK %d: Working on candidate at index %d' % (rank, cand_index))
    DM = cand_DMs[cand_index]  # DM (pc/cc) of single pulse candidate
    cand_time = cand_dedisp_times[cand_index]  # Candidate time (s)
    t_ex = calc_tDM(
        freqs_GHz[0], DM, freqs_GHz[-1]
    )  # Extra time of data to be loaded around the candidate time
    if DM < 15.0:
        t_ex = np.max([0.2, t_ex])
    # Index of start time of data chunk to be loaded.
    if hotpotato['t_before'] is not None:
        tstart = np.max(
            [0., np.floor(
                (cand_time - hotpotato['t_before']) / t_samp)]).astype(int)
    else:
        tstart = np.max([0, np.floor((cand_time - t_ex) / t_samp)]).astype(int)
    # Index of stop time of data chunk to be loaded.
    if hotpotato['t_after'] is not None:
        tstop = np.min([
            tot_time_samples,
            np.floor((cand_time + hotpotato['t_after']) / t_samp)
        ]).astype(int)
    else:
        tstop = np.min(
            [tot_time_samples,
             np.floor((cand_time + 2 * t_ex) / t_samp)]).astype(int)
    # 1D array of times (s)
    times = np.arange(tstart, tstop) * t_samp
    # Data chunk to load
    data = load_fil_data(f, tstart, tstop, npol, nchans, n_bytes, hdr_size,
                         hotpotato['pol'], f.tell())

    # Flip frequency axis of DS if channel bandwidth is negative.
    if (chan_bw < 0):
        print('RANK %d: Flipping frequency axis of DS' % (rank))
        data = np.flip(data, axis=0)
    # Clip bandpass edges.
    data = data[hotpotato['ind_band_low']:hotpotato['ind_band_high']]

    # Compute bandpass if needed.
    if hotpotato['bandpass_method'] == 'compute':
        hotpotato['median_bp'] = calc_median_bandpass(data)
    # Correct data for bandpass shape.
    print('RANK %d: Correcting data for bandpass shape' % (rank))
    if 0 in hotpotato['median_bp']:
        indices_zero_bp = np.where(hotpotato['median_bp'] == 0)[0]
        replace_value = np.median(
            hotpotato['median_bp'][np.where(hotpotato['median_bp'] != 0)[0]])
        hotpotato['median_bp'][indices_zero_bp] = replace_value
        data[indices_zero_bp] = replace_value
    data = correct_bandpass(data, hotpotato['median_bp'])

    # Apply rfifind mask on data.
    if hotpotato['apply_rfimask']:
        idx1 = np.where(int_times <= times[0])[0][-1]
        idx2 = np.where(int_times < times[-1])[0][-1] + 1
        cand_nint = idx2 - idx1
        cand_int_times = int_times[idx1:idx2]
        cand_mask_zap_chans_per_int = mask_zap_chans_per_int[idx1:idx2]
        # Boolean rfifind mask
        boolean_rfimask = np.zeros(data.shape, dtype=bool)
        for i in range(cand_nint):
            if i == cand_nint - 1:
                tstop_int = tstop
            else:
                tstop_int = np.min(np.where(times >= cand_int_times[i + 1])[0])
            tstart_int = np.min(np.where(times >= cand_int_times[i])[0])
            boolean_rfimask[cand_mask_zap_chans_per_int[i],
                            tstart_int:tstop_int] = True
        print('RANK %d: Applying RFI mask on data' % (rank))
        data = np.ma.MaskedArray(data, mask=boolean_rfimask)
        # Replaced masked entries with mean value.
        print('RANK %d: Replacing masked entries with mean values' % (rank))
        data = np.ma.filled(data, fill_value=np.nanmean(data))
        # Set up list of channels to mask in downsampled data.
        mask_zap_check = list(
            np.sort(mask_zap_chans) // hotpotato['kernel_size_freq_chans'])
        mask_chans = np.array([
            chan for chan in np.unique(mask_zap_check) if mask_zap_check.count(
                chan) == hotpotato['kernel_size_freq_chans']
        ])
    else:
        mask_chans = None

    # Remove zerodm signal.
    if hotpotato['remove_zerodm']:
        data = remove_additive_time_noise(data)[0]
        print('RANK %d: Zerodm removal completed.' % (rank))

    # Smooth and/or downsample the data.
    kernel_size_time_samples = hotpotato['downsamp_time'][np.where(
        np.array(hotpotato['low_dm_cats']) <= DM)[0][-1]]
    data, freqs_GHz_smoothed, times = smooth_master(
        data, hotpotato['smoothing_method'], hotpotato['convolution_method'],
        hotpotato['kernel_size_freq_chans'], kernel_size_time_samples,
        freqs_GHz, times)
    if hotpotato['smoothing_method'] != 'Blockavg2D':
        data, freqs_GHz_smoothed, times = smooth_master(
            data, 'Blockavg2D', hotpotato['convolution_method'],
            hotpotato['kernel_size_freq_chans'], kernel_size_time_samples,
            freqs_GHz_smoothed, times)

    # Remove residual spectral trend.
    print('RANK %d: Residual spectral trend subtracted.' % (rank))
    data = data - np.median(data, axis=1)[:, None]

    # Remove any residual temporal trend.
    if hotpotato['remove_zerodm']:
        data = data - np.median(data, axis=0)[None, :]
        if mask_chans is not None:
            data[mask_chans] = 0.0
        print('RANK %d: Zerodm removal completed.' % (rank))

    # Clip off masked channels at edges of the frequency band.
    if mask_chans is not None:
        # Lowest channel not to be masked.
        low_ch_index = 0
        while low_ch_index + 1 in mask_chans:
            low_ch_index += 1
        # Highest channel not to be masked.
        high_ch_index = len(freqs_GHz) - 1
        while high_ch_index in mask_chans:
            high_ch_index -= 1
        freqs_GHz = freqs_GHz[low_ch_index:high_ch_index + 1]
        data = data[low_ch_index:high_ch_index + 1]
        # Modify channel mask to reflect properties of updated data range.
        mask_chans = np.delete(mask_chans, np.where(mask_chans < low_ch_index))
        mask_chans = np.delete(mask_chans,
                               np.where(mask_chans > high_ch_index))
        mask_chans = np.array(mask_chans - low_ch_index, dtype=int)

    # Dedisperse the data at DM of candidate detection.
    dedisp_ds, dedisp_times, dedisp_timeseries = dedisperse_ds(
        data, freqs_GHz_smoothed, DM, freqs_GHz_smoothed[-1],
        freqs_GHz_smoothed[0], times[1] - times[0], times[0])

    # Smooth dedispersed dynamic spectrum and dedispersed time series using a boxcar matched-filter of size "downfact" samples.
    if hotpotato['do_smooth_dedisp']:
        filter = boxcar(
            int(downfact
                ))  # A uniform (boxcar) filter with a width equal to downfact
        filter = filter / np.sum(filter)  # Normalize filter to unit integral.
        print(
            'RANK %d: Convolving dedispersed dynamic spectrum along time with a Boxcar matched filter of width %d bins'
            % (rank, downfact))
        dedisp_ds = convolve1d(
            dedisp_ds, filter,
            axis=-1)  # Smoothed dedispersed dynamic spectrum
        dedisp_timeseries = np.sum(dedisp_ds, axis=0)

    # Candidate verification plot
    spcand_verification_plot(cand_index,
                             cand_dedisp_times,
                             cand_DMs,
                             cand_sigma,
                             metadata,
                             data,
                             times,
                             freqs_GHz_smoothed,
                             dedisp_ds,
                             dedisp_timeseries,
                             dedisp_times,
                             SAVE_DIR=hotpotato['OUTPUT_DIR'],
                             output_formats=hotpotato['output_formats'],
                             show_plot=hotpotato['show_plot'],
                             low_DM_cand=hotpotato['low_DM_cand'],
                             high_DM_cand=hotpotato['high_DM_cand'],
                             mask_chans=mask_chans,
                             vmin=np.mean(data) - 2 * np.std(data),
                             vmax=np.mean(data) + 5 * np.std(data),
                             cmap=hotpotato['cmap'],
                             do_smooth_dedisp=hotpotato['do_smooth_dedisp'],
                             filter_width=int(downfact))

    # Write smoothed dynamic spectrum to disk as .npz file.
    if hotpotato['write_npz']:
        npz_filename = hotpotato['OUTPUT_DIR'] + '/' + hotpotato[
            'basename'] + '_t%.2f_DM%.1f' % (cand_time, DM)
        write_npz_data(data, freqs_GHz_smoothed, times, mask_chans,
                       npz_filename)
ind_band_low = np.where(freqs_GHz >= dict['freq_band_low'])[0][0]
ind_band_high = np.where(freqs_GHz <= dict['freq_band_high'])[0][-1]
freqs_GHz = freqs_GHz[ind_band_low:ind_band_high + 1]
data = data[ind_band_low:ind_band_high + 1]

# Calculate median bandpass.
print('Calculating median bandpass...')
median_bp = np.zeros(len(data))
for i in range(len(data)):
    median_bp[i] = np.median(data[i])
print('Calculation completed.')
plot_bandpass(freqs_GHz, median_bp, freq_unit, flux_unit, dict['basename'],
              dict['BANDPASS_DIR'], dict['show_plot'])

# Correct data for bandpass shape.
data = correct_bandpass(data, median_bp)
# Smooth the dynamic spectrum using specified paramters.
data, freqs_GHz, times = smooth_master(data, dict['smoothing_method'],
                                       dict['convolution_method'],
                                       dict['kernel_size_freq_chans'],
                                       dict['kernel_size_time_samples'],
                                       freqs_GHz, times)
t_samp_smoothed = times[1] - times[0]
# vmin
if (dict['vmin_percentile'] != None):
    vmin = np.nanpercentile(data, dict['vmin_percentile'])
else:
    vmin = None
# vmax
if (dict['vmax_percentile'] != None):
    vmax = np.nanpercentile(data, dict['vmax_percentile'])