def read_singlepulse_files(infiles, threshold, T_start, T_end): DMs = [] candlist = [] num_v_DMstr = {} for ii, infile in enumerate(infiles): if infile.endswith(".singlepulse"): filenmbase = infile[:infile.rfind(".singlepulse")] else: filenmbase = infile info = infodata.infodata(filenmbase + ".inf") DMstr = "%.2f" % info.DM DMs.append(info.DM) num_v_DMstr[DMstr] = 0 if ii == 0: info0 = info if os.stat(infile)[6]: try: cands = Num.loadtxt(infile) if len(cands.shape) == 1: cands = Num.asarray([cands]) for cand in cands: if cand[2] < T_start: continue if cand[2] > T_end: break if cand[1] >= threshold: candlist.append(candidate(*cand)) num_v_DMstr[DMstr] += 1 except: # No candidates in the file IndexError DMs.sort() return info0, DMs, candlist, num_v_DMstr
def __init__(self, filename): self.basename = filename[:filename.find("_rfifind.") + 8] self.idata = infodata.infodata(self.basename + ".inf") self.read_stats() self.read_mask() self.get_bandpass() self.get_median_bandpass() self.determine_padvals()
def get_obs_info(inffile): """Read in an .inf file to extract observation information. Return observation RA, Dec, duration, and source name. """ inf = infodata.infodata(inffile) T = inf.dt * inf.N # total observation time (s) RA = inf.RA dec = inf.DEC src = inf.object MJD = inf.epoch telescope = inf.telescope freq = (inf.numchan / 2 - 0.5) * inf.chan_width + inf.lofreq # center freq return { 'T': T, 'RA': RA, 'dec': dec, 'src': src, 'MJD': MJD, 'telescope': telescope, 'freq': freq }
#!/usr/bin/env python from __future__ import (print_function, division) import presto.psr_utils as pu import sys from presto.infodata import infodata if len(sys.argv) != 2: print("chooseN <file.inf|numpoints>") print( " Prints a good value for fast FFTs to be used for -numout in prepdata/prepsubband" ) sys.exit(1) if sys.argv[1].endswith('.inf'): inf = infodata(sys.argv[1]) n = inf.N else: try: n = int(sys.argv[1]) except: print("chooseN <file.inf|numpoints>") print( " Prints a good value for fast FFTs to be used for -numout in prepdata/prepsubband" ) sys.exit(2) print(pu.choose_N(n))
def main(): usage = "usage: %prog [options]" parser = OptionParser(usage) parser.add_option("-n", "--number", type="int", dest="nM", default=40, help="Number of points in each chunk (millions)") parser.add_option("-o", "--outdir", type="string", dest="outdir", default=".", help="Output directory to store results") parser.add_option("-d", "--workdir", type="string", dest="workdir", default=".", help="Working directory for search") parser.add_option("-l", "--flo", type="float", dest="flo", default=10.0, help="Low frequency (Hz) to search") parser.add_option("-f", "--frac", type="float", dest="frac", default=0.5, help="Fraction to overlap") parser.add_option("-x", "--fhi", type="float", dest="fhi", default=10000.0, help="High frequency (Hz) to search") parser.add_option("-z", "--zmax", type="int", dest="zmax", default=160, help="Maximum fourier drift (bins) to search") parser.add_option("-w", "--wmax", type="int", dest="wmax", default=0, help="Maximum fourier drift deriv (bins) to search") parser.add_option("-a", "--numharm", type="int", dest="numharm", default=4, help="Number of harmonics to sum when searching") parser.add_option("-s", "--sigma", type="float", dest="sigma", default=2.0, help="Cutoff sigma to consider a candidate") (options, args) = parser.parse_args() if options.outdir[-1] != "/": options.outdir = options.outdir + "/" if options.workdir != '.': chdir(options.workdir) if options.nM >= 1000000: if options.nM % 1000000: print( "If you specify --num nM to be > 1000000, it must be divisible by 1000000." ) exit(1) else: options.nM *= 1000000 short_nM = options.nM // 1000000 # The basename of the data files if argv[1].endswith(".dat"): basename = "../" + argv[1][:-4] else: basename = "../" + argv[1] # Get the bird file (the first birdie file in the directory!) birdname = glob("../*.birds") if birdname: birdname = birdname[0] outnamebase = options.outdir + basename[3:] inf = read_inffile(basename) idata = infodata.infodata(basename + ".inf") N = inf.N t0i = inf.mjd_i t0f = inf.mjd_f num = 0 point = 0 T = options.nM * inf.dt / 86400.0 baryv = get_baryv(idata.RA, idata.DEC, idata.epoch, T, obs='GB') print("Baryv = ", baryv) inf.N = options.nM inf.numonoff = 0 nM = options.nM // 1000000 while point + options.nM < N: pM = point // 1000000 outname = basename[3:] + '_%03dM' % nM + '_%02d' % num stdout.write('\n' + outname + '\n\n') inf.name = outname tstartf = inf.mjd_f + num * T * options.frac if tstartf > 1.0: tstartf = tstartf - 1.0 inf.mjd_i = inf.mjd_i + 1 inf.mjd_f = tstartf writeinf(inf) myexecute('dd if=' + basename + '.dat of=' + outname + '.dat bs=4000000 skip=' + repr(pM) + ' count=' + repr(nM)) myexecute('realfft ' + outname + '.dat') myexecute('rm -f ' + outname + '.dat') myexecute('cp ' + birdname + ' ' + outname + '.birds') myexecute('makezaplist.py ' + outname + '.birds') myexecute('rm -f ' + outname + '.birds') myexecute('zapbirds -zap -zapfile ' + outname + '.zaplist -baryv %g ' % baryv + outname + '.fft') myexecute('rm -f ' + outname + '.zaplist') if options.wmax > 0: myexecute( 'accelsearch -sigma %.2f -zmax %d -wmax %d -numharm %d -flo %f -fhi %f ' % (options.sigma, options.zmax, options.wmax, options.numharm, options.flo, options.fhi) + outname + '.fft') myexecute('rm ' + outname + '.fft ' + outname + '_JERK_%d.txtcand' % options.wmax) myexecute('cp ' + outname + '_JERK_%d ' % options.wmax + options.outdir) myexecute('cp ' + outname + '_JERK_%d.cand ' % options.wmax + options.outdir) else: myexecute( 'accelsearch -sigma %.2f -zmax %d -numharm %d -flo %f -fhi %f ' % (options.sigma, options.zmax, options.numharm, options.flo, options.fhi) + outname + '.fft') myexecute('rm ' + outname + '.fft ' + outname + '_ACCEL_%d.txtcand' % options.zmax) myexecute('cp ' + outname + '_ACCEL_%d ' % options.zmax + options.outdir) myexecute('cp ' + outname + '_ACCEL_%d.cand ' % options.zmax + options.outdir) myexecute('cp ' + outname + '.inf ' + options.outdir) num = num + 1 point = point + int(options.nM * options.frac)
def main(): parser = OptionParser(usage) parser.add_option( "-x", "--xwin", action="store_true", dest="xwin", default=False, help="Don't make a postscript plot, just use an X-window") parser.add_option("-p", "--noplot", action="store_false", dest="makeplot", default=True, help="Look for pulses but do not generate a plot") parser.add_option( "-m", "--maxwidth", type="float", dest="maxwidth", default=0.0, help="Set the max downsampling in sec (see below for default)") parser.add_option("-t", "--threshold", type="float", dest="threshold", default=5.0, help="Set a different threshold SNR (default=5.0)") parser.add_option("-s", "--start", type="float", dest="T_start", default=0.0, help="Only plot events occuring after this time (s)") parser.add_option("-e", "--end", type="float", dest="T_end", default=1e9, help="Only plot events occuring before this time (s)") parser.add_option("-g", "--glob", type="string", dest="globexp", default=None, help="Process the files from this glob expression") parser.add_option("-f", "--fast", action="store_true", dest="fast", default=False, help="Use a faster method of de-trending (2x speedup)") parser.add_option( "-b", "--nobadblocks", action="store_false", dest="badblocks", default=True, help="Don't check for bad-blocks (may save strong pulses)") parser.add_option("-d", "--detrendlen", type="int", dest="detrendfact", default=1, help="Chunksize for detrending (pow-of-2 in 1000s)") (opts, args) = parser.parse_args() if len(args) == 0: if opts.globexp == None: print(full_usage) sys.exit(0) else: args = [] for globexp in opts.globexp.split(): args += glob.glob(globexp) useffts = True dosearch = True if opts.xwin: pgplot_device = "/XWIN" else: pgplot_device = "" fftlen = 8192 # Should be a power-of-two for best speed chunklen = 8000 # Must be at least max_downfact less than fftlen assert (opts.detrendfact in [1, 2, 4, 8, 16, 32]) detrendlen = opts.detrendfact * 1000 if (detrendlen > chunklen): chunklen = detrendlen fftlen = int(next2_to_n(chunklen)) blocks_per_chunk = chunklen // detrendlen overlap = (fftlen - chunklen) // 2 worklen = chunklen + 2 * overlap # currently it is fftlen... max_downfact = 30 default_downfacts = [2, 3, 4, 6, 9, 14, 20, 30, 45, 70, 100, 150, 220, 300] if args[0].endswith(".singlepulse"): filenmbase = args[0][:args[0].rfind(".singlepulse")] dosearch = False elif args[0].endswith(".dat"): filenmbase = args[0][:args[0].rfind(".dat")] else: filenmbase = args[0] # Don't do a search, just read results and plot if not dosearch: info, DMs, candlist, num_v_DMstr = \ read_singlepulse_files(args, opts.threshold, opts.T_start, opts.T_end) orig_N, orig_dt = int(info.N), info.dt obstime = orig_N * orig_dt else: DMs = [] candlist = [] num_v_DMstr = {} # Loop over the input files for filenm in args: if filenm.endswith(".dat"): filenmbase = filenm[:filenm.rfind(".dat")] else: filenmbase = filenm info = infodata.infodata(filenmbase + ".inf") DMstr = "%.2f" % info.DM DMs.append(info.DM) N, dt = int(info.N), info.dt obstime = N * dt # Choose the maximum width to search based on time instead # of bins. This helps prevent increased S/N when the downsampling # changes as the DM gets larger. if opts.maxwidth > 0.0: downfacts = [ x for x in default_downfacts if x * dt <= opts.maxwidth ] else: downfacts = [x for x in default_downfacts if x <= max_downfact] if len(downfacts) == 0: downfacts = [default_downfacts[0]] if (filenm == args[0]): orig_N = N orig_dt = dt if useffts: fftd_kerns = make_fftd_kerns(default_downfacts, fftlen) if info.breaks: offregions = list( zip([x[1] for x in info.onoff[:-1]], [x[0] for x in info.onoff[1:]])) # If last break spans to end of file, don't read it in (its just padding) if offregions[-1][1] == N - 1: N = offregions[-1][0] + 1 outfile = open(filenmbase + '.singlepulse', mode='w') # Compute the file length in detrendlens roundN = N // detrendlen * detrendlen numchunks = roundN // chunklen # Read in the file print('Reading "%s"...' % filenm) timeseries = Num.fromfile(filenm, dtype=Num.float32, count=roundN) # Split the timeseries into chunks for detrending numblocks = roundN // detrendlen timeseries.shape = (numblocks, detrendlen) stds = Num.zeros(numblocks, dtype=Num.float64) # de-trend the data one chunk at a time print(' De-trending the data and computing statistics...') for ii, chunk in enumerate(timeseries): if opts.fast: # use median removal instead of detrending (2x speedup) tmpchunk = chunk.copy() tmpchunk.sort() med = tmpchunk[detrendlen // 2] chunk -= med tmpchunk -= med else: # The detrend calls are the most expensive in the program timeseries[ii] = scipy.signal.detrend(chunk, type='linear') tmpchunk = timeseries[ii].copy() tmpchunk.sort() # The following gets rid of (hopefully) most of the # outlying values (i.e. power dropouts and single pulses) # If you throw out 5% (2.5% at bottom and 2.5% at top) # of random gaussian deviates, the measured stdev is ~0.871 # of the true stdev. Thus the 1.0/0.871=1.148 correction below. # The following is roughly .std() since we already removed the median stds[ii] = Num.sqrt( (tmpchunk[detrendlen // 40:-detrendlen // 40]**2.0).sum() / (0.95 * detrendlen)) stds *= 1.148 # sort the standard deviations and separate those with # very low or very high values sort_stds = stds.copy() sort_stds.sort() # identify the differences with the larges values (this # will split off the chunks with very low and very high stds locut = (sort_stds[1:numblocks // 2 + 1] - sort_stds[:numblocks // 2]).argmax() + 1 hicut = ( sort_stds[numblocks // 2 + 1:] - sort_stds[numblocks // 2:-1]).argmax() + numblocks // 2 - 2 std_stds = scipy.std(sort_stds[locut:hicut]) median_stds = sort_stds[(locut + hicut) // 2] print(" pseudo-median block standard deviation = %.2f" % (median_stds)) if (opts.badblocks): lo_std = median_stds - 4.0 * std_stds hi_std = median_stds + 4.0 * std_stds # Determine a list of "bad" chunks. We will not search these. bad_blocks = Num.nonzero((stds < lo_std) | (stds > hi_std))[0] print(" identified %d bad blocks out of %d (i.e. %.2f%%)" % \ (len(bad_blocks), len(stds), 100.0*float(len(bad_blocks))/float(len(stds)))) stds[bad_blocks] = median_stds else: bad_blocks = [] print(" Now searching...") # Now normalize all of the data and reshape it to 1-D timeseries /= stds[:, Num.newaxis] timeseries.shape = (roundN, ) # And set the data in the bad blocks to zeros # Even though we don't search these parts, it is important # because of the overlaps for the convolutions for bad_block in bad_blocks: loind, hiind = bad_block * detrendlen, (bad_block + 1) * detrendlen timeseries[loind:hiind] = 0.0 # Convert to a set for faster lookups below bad_blocks = set(bad_blocks) # Step through the data dm_candlist = [] for chunknum in range(numchunks): loind = chunknum * chunklen - overlap hiind = (chunknum + 1) * chunklen + overlap # Take care of beginning and end of file overlap issues if (chunknum == 0): # Beginning of file chunk = Num.zeros(worklen, dtype=Num.float32) chunk[overlap:] = timeseries[loind + overlap:hiind] elif (chunknum == numchunks - 1): # end of the timeseries chunk = Num.zeros(worklen, dtype=Num.float32) chunk[:-overlap] = timeseries[loind:hiind - overlap] else: chunk = timeseries[loind:hiind] # Make a set with the current block numbers lowblock = blocks_per_chunk * chunknum currentblocks = set(Num.arange(blocks_per_chunk) + lowblock) localgoodblocks = Num.asarray( list(currentblocks - bad_blocks)) - lowblock # Search this chunk if it is not all bad if len(localgoodblocks): # This is the good part of the data (end effects removed) goodchunk = chunk[overlap:-overlap] # need to pass blocks/chunklen, localgoodblocks # dm_candlist, dt, opts.threshold to cython routine # Search non-downsampled data first # NOTE: these nonzero() calls are some of the most # expensive calls in the program. Best bet would # probably be to simply iterate over the goodchunk # in C and append to the candlist there. hibins = Num.flatnonzero(goodchunk > opts.threshold) hivals = goodchunk[hibins] hibins += chunknum * chunklen hiblocks = hibins // detrendlen # Add the candidates (which are sorted by bin) for bin, val, block in zip(hibins, hivals, hiblocks): if block not in bad_blocks: time = bin * dt dm_candlist.append( candidate(info.DM, val, time, bin, 1)) # Prepare our data for the convolution if useffts: fftd_chunk = rfft(chunk, -1) # Now do the downsampling... for ii, downfact in enumerate(downfacts): if useffts: # Note: FFT convolution is faster for _all_ downfacts, even 2 goodchunk = fft_convolve(fftd_chunk, fftd_kerns[ii], overlap, -overlap) else: # The normalization of this kernel keeps the post-smoothing RMS = 1 kernel = Num.ones(downfact, dtype=Num.float32) / \ Num.sqrt(downfact) smoothed_chunk = scipy.signal.convolve( chunk, kernel, 1) goodchunk = smoothed_chunk[overlap:-overlap] #hibins = Num.nonzero(goodchunk>opts.threshold)[0] hibins = Num.flatnonzero(goodchunk > opts.threshold) hivals = goodchunk[hibins] hibins += chunknum * chunklen hiblocks = hibins // detrendlen hibins = hibins.tolist() hivals = hivals.tolist() # Now walk through the new candidates and remove those # that are not the highest but are within downfact/2 # bins of a higher signal pulse hibins, hivals = prune_related1( hibins, hivals, downfact) # Insert the new candidates into the candlist, but # keep it sorted... for bin, val, block in zip(hibins, hivals, hiblocks): if block not in bad_blocks: time = bin * dt bisect.insort( dm_candlist, candidate(info.DM, val, time, bin, downfact)) # Now walk through the dm_candlist and remove the ones that # are within the downsample proximity of a higher # signal-to-noise pulse dm_candlist = prune_related2(dm_candlist, downfacts) print(" Found %d pulse candidates" % len(dm_candlist)) # Get rid of those near padding regions if info.breaks: prune_border_cases(dm_candlist, offregions) # Write the pulses to an ASCII output file if len(dm_candlist): #dm_candlist.sort(cmp_sigma) outfile.write( "# DM Sigma Time (s) Sample Downfact\n") for cand in dm_candlist: outfile.write(str(cand)) outfile.close() # Add these candidates to the overall candidate list for cand in dm_candlist: candlist.append(cand) num_v_DMstr[DMstr] = len(dm_candlist) if (opts.makeplot): # Step through the candidates to make a SNR list DMs.sort() snrs = [] for cand in candlist: if not Num.isinf(cand.sigma): snrs.append(cand.sigma) if snrs: maxsnr = max(int(max(snrs)), int(opts.threshold)) + 3 else: maxsnr = int(opts.threshold) + 3 # Generate the SNR histogram snrs = Num.asarray(snrs) (num_v_snr, edges) = Num.histogram(snrs, int(maxsnr - opts.threshold + 1), [opts.threshold, maxsnr]) snrs = edges[:-1] + 0.5 * (edges[1] - edges[0]) num_v_snr = num_v_snr.astype(Num.float32) num_v_snr[num_v_snr == 0.0] = 0.001 # Generate the DM histogram num_v_DM = Num.zeros(len(DMs)) for ii, DM in enumerate(DMs): num_v_DM[ii] = num_v_DMstr["%.2f" % DM] DMs = Num.asarray(DMs) # open the plot device short_filenmbase = filenmbase[:filenmbase.find("_DM")] if opts.T_end > obstime: opts.T_end = obstime if pgplot_device: ppgplot.pgopen(pgplot_device) else: if (opts.T_start > 0.0 or opts.T_end < obstime): ppgplot.pgopen(short_filenmbase + '_%.0f-%.0fs_singlepulse.ps/VPS' % (opts.T_start, opts.T_end)) else: ppgplot.pgopen(short_filenmbase + '_singlepulse.ps/VPS') ppgplot.pgpap(7.5, 1.0) # Width in inches, aspect # plot the SNR histogram ppgplot.pgsvp(0.06, 0.31, 0.6, 0.87) ppgplot.pgswin(opts.threshold, maxsnr, Num.log10(0.5), Num.log10(2 * max(num_v_snr))) ppgplot.pgsch(0.8) ppgplot.pgbox("BCNST", 0, 0, "BCLNST", 0, 0) ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, "Signal-to-Noise") ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, "Number of Pulses") ppgplot.pgsch(1.0) ppgplot.pgbin(snrs, Num.log10(num_v_snr), 1) # plot the DM histogram ppgplot.pgsvp(0.39, 0.64, 0.6, 0.87) # Add [1] to num_v_DM in YMAX below so that YMIN != YMAX when max(num_v_DM)==0 ppgplot.pgswin( min(DMs) - 0.5, max(DMs) + 0.5, 0.0, 1.1 * max(num_v_DM + [1])) ppgplot.pgsch(0.8) ppgplot.pgbox("BCNST", 0, 0, "BCNST", 0, 0) ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, r"DM (pc cm\u-3\d)") ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, "Number of Pulses") ppgplot.pgsch(1.0) ppgplot.pgbin(DMs, num_v_DM, 1) # plot the SNR vs DM plot ppgplot.pgsvp(0.72, 0.97, 0.6, 0.87) ppgplot.pgswin(min(DMs) - 0.5, max(DMs) + 0.5, opts.threshold, maxsnr) ppgplot.pgsch(0.8) ppgplot.pgbox("BCNST", 0, 0, "BCNST", 0, 0) ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, r"DM (pc cm\u-3\d)") ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, "Signal-to-Noise") ppgplot.pgsch(1.0) cand_ts = Num.zeros(len(candlist), dtype=Num.float32) cand_SNRs = Num.zeros(len(candlist), dtype=Num.float32) cand_DMs = Num.zeros(len(candlist), dtype=Num.float32) for ii, cand in enumerate(candlist): cand_ts[ii], cand_SNRs[ii], cand_DMs[ii] = \ cand.time, cand.sigma, cand.DM ppgplot.pgpt(cand_DMs, cand_SNRs, 20) # plot the DM vs Time plot ppgplot.pgsvp(0.06, 0.97, 0.08, 0.52) ppgplot.pgswin(opts.T_start, opts.T_end, min(DMs) - 0.5, max(DMs) + 0.5) ppgplot.pgsch(0.8) ppgplot.pgbox("BCNST", 0, 0, "BCNST", 0, 0) ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, "Time (s)") ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, r"DM (pc cm\u-3\d)") # Circles are symbols 20-26 in increasing order snr_range = 12.0 cand_symbols = (cand_SNRs - opts.threshold) / snr_range * 6.0 + 20.5 cand_symbols = cand_symbols.astype(Num.int32) cand_symbols[cand_symbols > 26] = 26 for ii in [26, 25, 24, 23, 22, 21, 20]: inds = Num.nonzero(cand_symbols == ii)[0] ppgplot.pgpt(cand_ts[inds], cand_DMs[inds], ii) # Now fill the infomation area ppgplot.pgsvp(0.05, 0.95, 0.87, 0.97) ppgplot.pgsch(1.0) ppgplot.pgmtxt('T', 0.5, 0.0, 0.0, "Single pulse results for '%s'" % short_filenmbase) ppgplot.pgsch(0.8) # first row ppgplot.pgmtxt('T', -1.1, 0.02, 0.0, 'Source: %s'%\ info.object) ppgplot.pgmtxt('T', -1.1, 0.33, 0.0, 'RA (J2000):') ppgplot.pgmtxt('T', -1.1, 0.5, 0.0, info.RA) ppgplot.pgmtxt('T', -1.1, 0.73, 0.0, 'N samples: %.0f' % orig_N) # second row ppgplot.pgmtxt('T', -2.4, 0.02, 0.0, 'Telescope: %s'%\ info.telescope) ppgplot.pgmtxt('T', -2.4, 0.33, 0.0, 'DEC (J2000):') ppgplot.pgmtxt('T', -2.4, 0.5, 0.0, info.DEC) ppgplot.pgmtxt('T', -2.4, 0.73, 0.0, 'Sampling time: %.2f \gms'%\ (orig_dt*1e6)) # third row if info.instrument.find("pigot") >= 0: instrument = "Spigot" else: instrument = info.instrument ppgplot.pgmtxt('T', -3.7, 0.02, 0.0, 'Instrument: %s' % instrument) if (info.bary): ppgplot.pgmtxt('T', -3.7, 0.33, 0.0, r'MJD\dbary\u: %.12f' % info.epoch) else: ppgplot.pgmtxt('T', -3.7, 0.33, 0.0, r'MJD\dtopo\u: %.12f' % info.epoch) ppgplot.pgmtxt('T', -3.7, 0.73, 0.0, r'Freq\dctr\u: %.1f MHz'%\ ((info.numchan/2-0.5)*info.chan_width+info.lofreq)) ppgplot.pgiden() ppgplot.pgend()
def __init__(self, filename): self.pfd_filename = filename infile = open(filename, "rb") # See if the .bestprof file is around try: self.bestprof = bestprof(filename + ".bestprof") except IOError: self.bestprof = 0 swapchar = '<' # this is little-endian data = infile.read(5 * 4) testswap = struct.unpack(swapchar + "i" * 5, data) # This is a hack to try and test the endianness of the data. # None of the 5 values should be a large positive number. if (Num.fabs(Num.asarray(testswap))).max() > 100000: swapchar = '>' # this is big-endian (self.numdms, self.numperiods, self.numpdots, self.nsub, self.npart) = \ struct.unpack(swapchar+"i"*5, data) (self.proflen, self.numchan, self.pstep, self.pdstep, self.dmstep, \ self.ndmfact, self.npfact) = struct.unpack(swapchar+"i"*7, infile.read(7*4)) self.filenm = infile.read( struct.unpack(swapchar + "i", infile.read(4))[0]) self.candnm = infile.read( struct.unpack(swapchar + "i", infile.read(4))[0]).decode("utf-8") self.telescope = infile.read( struct.unpack(swapchar + "i", infile.read(4))[0]).decode("utf-8") self.pgdev = infile.read( struct.unpack(swapchar + "i", infile.read(4))[0]) test = infile.read(16) if not test[:8] == b"Unknown" and b':' in test: self.rastr = test[:test.find(b'\0')] test = infile.read(16) self.decstr = test[:test.find(b'\0')] else: self.rastr = "Unknown" self.decstr = "Unknown" if ':' not in test: infile.seek(-16, 1) # rewind the file before the bad read (self.dt, self.startT) = struct.unpack(swapchar + "dd", infile.read(2 * 8)) (self.endT, self.tepoch, self.bepoch, self.avgvoverc, self.lofreq, \ self.chan_wid, self.bestdm) = struct.unpack(swapchar+"d"*7, infile.read(7*8)) # The following "fixes" (we think) the observing frequency of the Spigot # based on tests done by Ingrid on 0737 (comparing it to GASP) # The same sorts of corrections should be made to WAPP data as well... # The tepoch corrections are empirically determined timing corrections # Note that epoch is only double precision and so the floating # point accuracy is ~1 us! if self.telescope == 'GBT': if (Num.fabs(Num.fmod(self.dt, 8.192e-05) < 1e-12) and \ ("spigot" in filename.lower() or "guppi" not in filename.lower()) and \ (self.tepoch < 54832.0)): sys.stderr.write("Assuming SPIGOT data...\n") if self.chan_wid == 800.0 / 1024: # Spigot 800 MHz mode 2 self.lofreq -= 0.5 * self.chan_wid # original values #if self.tepoch > 0.0: self.tepoch += 0.039334/86400.0 #if self.bestprof: self.bestprof.epochf += 0.039334/86400.0 # values measured with 1713+0747 wrt BCPM2 on 13 Sept 2007 if self.tepoch > 0.0: self.tepoch += 0.039365 / 86400.0 if self.bestprof: self.bestprof.epochf += 0.039365 / 86400.0 elif self.chan_wid == 800.0 / 2048: self.lofreq -= 0.5 * self.chan_wid if self.tepoch < 53700.0: # Spigot 800 MHz mode 16 (downsampled) if self.tepoch > 0.0: self.tepoch += 0.039352 / 86400.0 if self.bestprof: self.bestprof.epochf += 0.039352 / 86400.0 else: # Spigot 800 MHz mode 14 # values measured with 1713+0747 wrt BCPM2 on 13 Sept 2007 if self.tepoch > 0.0: self.tepoch += 0.039365 / 86400.0 if self.bestprof: self.bestprof.epochf += 0.039365 / 86400.0 elif self.chan_wid == 50.0 / 1024 or self.chan_wid == 50.0 / 2048: # Spigot 50 MHz modes self.lofreq += 0.5 * self.chan_wid # Note: the offset has _not_ been measured for the 2048-lag mode if self.tepoch > 0.0: self.tepoch += 0.039450 / 86400.0 if self.bestprof: self.bestprof.epochf += 0.039450 / 86400.0 (self.topo_pow, tmp) = struct.unpack(swapchar + "f" * 2, infile.read(2 * 4)) (self.topo_p1, self.topo_p2, self.topo_p3) = struct.unpack(swapchar+"d"*3, \ infile.read(3*8)) (self.bary_pow, tmp) = struct.unpack(swapchar + "f" * 2, infile.read(2 * 4)) (self.bary_p1, self.bary_p2, self.bary_p3) = struct.unpack(swapchar+"d"*3, \ infile.read(3*8)) (self.fold_pow, tmp) = struct.unpack(swapchar + "f" * 2, infile.read(2 * 4)) (self.fold_p1, self.fold_p2, self.fold_p3) = struct.unpack(swapchar+"d"*3, \ infile.read(3*8)) # Save current p, pd, pdd # NOTE: Fold values are actually frequencies! self.curr_p1, self.curr_p2, self.curr_p3 = \ psr_utils.p_to_f(self.fold_p1, self.fold_p2, self.fold_p3) self.pdelays_bins = Num.zeros(self.npart, dtype='d') (self.orb_p, self.orb_e, self.orb_x, self.orb_w, self.orb_t, self.orb_pd, \ self.orb_wd) = struct.unpack(swapchar+"d"*7, infile.read(7*8)) self.dms = Num.asarray(struct.unpack(swapchar+"d"*self.numdms, \ infile.read(self.numdms*8))) if self.numdms == 1: self.dms = self.dms[0] self.periods = Num.asarray(struct.unpack(swapchar+"d"*self.numperiods, \ infile.read(self.numperiods*8))) self.pdots = Num.asarray(struct.unpack(swapchar+"d"*self.numpdots, \ infile.read(self.numpdots*8))) self.numprofs = self.nsub * self.npart if (swapchar == '<'): # little endian self.profs = Num.zeros((self.npart, self.nsub, self.proflen), dtype='d') for ii in range(self.npart): for jj in range(self.nsub): self.profs[ii, jj, :] = Num.fromfile(infile, Num.float64, self.proflen) else: self.profs = Num.asarray(struct.unpack(swapchar+"d"*self.numprofs*self.proflen, \ infile.read(self.numprofs*self.proflen*8))) self.profs = Num.reshape(self.profs, (self.npart, self.nsub, self.proflen)) if (self.numchan == 1): try: idata = infodata.infodata( self.filenm[:self.filenm.rfind(b'.')] + b".inf") try: if idata.waveband == "Radio": self.bestdm = idata.DM self.numchan = idata.numchan except: self.bestdm = 0.0 self.numchan = 1 except IOError: print("Warning! Can't open the .inf file for " + filename + "!") self.binspersec = self.fold_p1 * self.proflen self.chanpersub = self.numchan // self.nsub self.subdeltafreq = self.chan_wid * self.chanpersub self.hifreq = self.lofreq + (self.numchan - 1) * self.chan_wid self.losubfreq = self.lofreq + self.subdeltafreq - self.chan_wid self.subfreqs = Num.arange(self.nsub, dtype='d')*self.subdeltafreq + \ self.losubfreq self.subdelays_bins = Num.zeros(self.nsub, dtype='d') # Save current DM self.currdm = 0 self.killed_subbands = [] self.killed_intervals = [] self.pts_per_fold = [] # Note: a foldstats struct is read in as a group of 7 doubles # the correspond to, in order: # numdata, data_avg, data_var, numprof, prof_avg, prof_var, redchi self.stats = Num.zeros((self.npart, self.nsub, 7), dtype='d') for ii in range(self.npart): currentstats = self.stats[ii] for jj in range(self.nsub): if (swapchar == '<'): # little endian currentstats[jj] = Num.fromfile(infile, Num.float64, 7) else: currentstats[jj] = Num.asarray(struct.unpack(swapchar+"d"*7, \ infile.read(7*8))) self.pts_per_fold.append( self.stats[ii][0][0]) # numdata from foldstats self.start_secs = Num.add.accumulate([0] + self.pts_per_fold[:-1]) * self.dt self.pts_per_fold = Num.asarray(self.pts_per_fold) self.mid_secs = self.start_secs + 0.5 * self.dt * self.pts_per_fold if (not self.tepoch == 0.0): self.start_topo_MJDs = self.start_secs / 86400.0 + self.tepoch self.mid_topo_MJDs = self.mid_secs / 86400.0 + self.tepoch if (not self.bepoch == 0.0): self.start_bary_MJDs = self.start_secs / 86400.0 + self.bepoch self.mid_bary_MJDs = self.mid_secs / 86400.0 + self.bepoch self.Nfolded = Num.add.reduce(self.pts_per_fold) self.T = self.Nfolded * self.dt self.avgprof = (self.profs / self.proflen).sum() self.varprof = self.calc_varprof() # nominal number of degrees of freedom for reduced chi^2 calculation self.DOFnom = float(self.proflen) - 1.0 # corrected number of degrees of freedom due to inter-bin correlations self.dt_per_bin = self.curr_p1 / self.proflen / self.dt self.DOFcor = self.DOFnom * self.DOF_corr() infile.close() self.barysubfreqs = None if self.avgvoverc == 0: if self.candnm.startswith("PSR_"): # If this doesn't work, we should try to use the barycentering calcs # in the presto module. try: psrname = self.candnm[4:] self.polycos = polycos.polycos(psrname, filenm=self.pfd_filename + ".polycos") midMJD = self.tepoch + 0.5 * self.T / 86400.0 self.avgvoverc = self.polycos.get_voverc( int(midMJD), midMJD - int(midMJD)) #sys.stderr.write("Approximate Doppler velocity (in c) is: %.4g\n"%self.avgvoverc) # Make the Doppler correction self.barysubfreqs = self.subfreqs * (1.0 + self.avgvoverc) except IOError: self.polycos = 0 if self.barysubfreqs is None: self.barysubfreqs = self.subfreqs
def main(): parser = optparse.OptionParser(prog="rrattrap.py", \ version="Chen Karako, updated by Chitrang Patel(June 23, 2015)",\ usage="%prog --inffile <.inf file> [options] *.singlepulse",\ description="Group single pulse events and rank them based on the sigma behavior. \ Plot DM vs time with different colours for different ranks as follows:\ \t\tRank 1 (Other) : Grey\ \t\tRank 2 (RFI) : Red\ \t\tRank 3 (ok) : Cyan\ \t\tRank 4 (good) : dim blue\ \t\tRank 5 (very good) : dark blue\ \t\tRank 6 (excellent) : Magenta" ) parser.add_option('--CLOSE-DM', dest='close_dm', type='float', \ help="DM to below which the signalis considered RFI(Default: 2", \ default=2.0) parser.add_option('--use-configfile', dest='use_configfile', action='store_true', \ help="If this flag is set - import the config file for selecting grouping" \ "parameters.(Default: do not use a config file.)", default=False) parser.add_option('--use-DMplan', dest='use_DMplan', action='store_true', \ help="If this flag is set - Use the ddplan for selecting grouping" \ "parameters. Make sure that you have a corresponding config file containing" \ "the DDplan. (Default: do not use ddplan)", default=False) parser.add_option('--min-group', dest='min_group', type='int', \ help="minimum number of events in a group to no be considered noise." \ "(Default: 45)", \ default=45) parser.add_option('--dm-thresh', dest='dm_thresh', type='float', \ help="DM threshold to use for nearest neighbour. Suggest a value greater " \ " than the DM step size(Default: 0.5 pc/cm^3 - will not work if DM " \ "step size is greater than 0.5)", default=0.5) parser.add_option('--time-thresh', dest='time_thresh', type='float', \ help="Time threshold to use for nearest neighbour. Suggest a value that " \ " is a few times the max pulse width(Default: 0.1 s)", default=0.1) parser.add_option('--min-sigma', dest='min_sigma', type='float', \ help="minimum signal-to-noise above which the group is highly considered" \ "to be astrophysical. (Default: 8.0)", \ default=8.0) parser.add_option('--no-plot', dest='plot', action='store_false', \ help="Do not plot the groups in the DM time plot." \ "(Default: Make a plot)", default=True) parser.add_option('--plottype', dest='plottype', type = 'string',\ help="Make a plot using : 'matplotlib' or 'pgplot'."\ , default='pgplot') parser.add_option('--min-rank-to-plot', dest='min_ranktoplot', type = 'int',\ help="Only groups with rank upto this will plotted.(default: plot \ all except rank 1)" , default=0) parser.add_option('--min-rank-to-write', dest='min_ranktowrite', type = 'int',\ help="Only info of the groups with rank upto this will written." \ "(default: write all except rank 1)", default=0) parser.add_option('--inffile', dest='inffile', type = 'string',\ help="A .inf file. I suggest a .rfifind.inf file."\ , default=None) parser.add_option('-o', dest='outbasenm', type = 'string',\ help="outfile base name. .groups.txt will be added to the given name."\ , default='') options, args = parser.parse_args() if not hasattr(options, 'inffile'): raise ValueError("You must supply a .inf file. I suggest .rfifind.inf") if not options.inffile.endswith(".inf"): raise ValueError("Cannot recognize file type from extension. " " Only '.inf' types are supported.") if options.use_DMplan or options.use_configfile: import singlepulse.rrattrap_config as rrattrap_config RANKS = np.asarray([2, 0, 3, 4, 5, 6]) if options.use_configfile: CLOSE_DM = rrattrap_config.CLOSE_DM MIN_GROUP = rrattrap_config.MIN_GROUP TIME_THRESH = rrattrap_config.TIME_THRESH DM_THRESH = rrattrap_config.DM_THRESH MIN_SIGMA = rrattrap_config.MIN_SIGMA PLOT = rrattrap_config.PLOT PLOTTYPE = rrattrap_config.PLOTTYPE RANKS_TO_WRITE = rrattrap_config.RANKS_TO_WRITE RANKS_TO_PLOT = rrattrap_config.RANKS_TO_PLOT else: CLOSE_DM = options.close_dm MIN_GROUP = options.min_group TIME_THRESH = options.time_thresh DM_THRESH = options.dm_thresh MIN_SIGMA = options.min_sigma PLOT = options.plot PLOTTYPE = options.plottype RANKS_TO_WRITE = list(RANKS[RANKS > options.min_ranktowrite]) RANKS_TO_PLOT = list(RANKS[RANKS > options.min_ranktoplot]) inffile = options.inffile inf = infodata.infodata(inffile) print_debug("Beginning read_sp_files... " + strftime("%Y-%m-%d %H:%M:%S")) groups = spio.read_sp_files(args[1:])[0] print_debug("Finished read_sp_files, beginning create_groups... " + strftime("%Y-%m-%d %H:%M:%S")) print_debug("Number of single pulse events: %d " % len(groups)) groups = create_groups( groups, inffile, min_nearby=1, ignore_obs_end=10, time_thresh=TIME_THRESH, dm_thresh=DM_THRESH, use_dmplan=options.use_DMplan ) # ignore the last 10 seconds of the obs, for palfa print_debug("Number of groups: %d " % len(groups)) print_debug("Finished create_groups, beginning grouping_sp_dmt... " + strftime("%Y-%m-%d %H:%M:%S")) grouping_sp_dmt(groups, use_dmplan=options.use_DMplan, time_thresh=TIME_THRESH, dm_thresh=DM_THRESH) print_debug("Number of groups (after initial grouping): %d " % len(groups)) print_debug("Finished grouping_sp_dmt, beginning flag_noise... " + strftime("%Y-%m-%d %H:%M:%S")) flag_noise( groups, use_dmplan=options.use_DMplan, min_group=MIN_GROUP) # do an initial coarse noise flagging and removal pop_by_rank(groups, 1) print_debug("Number of groups (after removed noise gps w <10 sps): %d " % len(groups)) print_debug("Beginning grouping_sp_t... " + strftime("%Y-%m-%d %H:%M:%S")) # Regroup good groups based on proximity in time only (compensate for missing middles): groups = grouping_sp_t(groups, use_dmplan=options.use_DMplan, time_thresh=TIME_THRESH, dm_thresh=DM_THRESH) print_debug("Finished grouping_sp_t. " + strftime("%Y-%m-%d %H:%M:%S")) # Flag RFI groups, noise flag_rfi(groups, close_dm=CLOSE_DM) # Rank groups and identify noise (<45/40/35/30 sp events) groups print_debug("Ranking groups...") rank_groups(groups, use_dmplan=options.use_DMplan, min_group=MIN_GROUP, min_sigma=MIN_SIGMA) # Remove noise groups print_debug("Before removing noise, len(groups): %s" % len(groups)) pop_by_rank(groups, 1) print_debug("After removing noise, len(groups): %s" % len(groups)) # Group rfi with very close groups print_debug("len(groups) before grouping_rfi: %s" % len(groups)) print_debug("Beginning grouping_rfi... " + strftime("%Y-%m-%d %H:%M:%S")) grouping_rfi(groups, use_dmplan=options.use_DMplan, time_thresh=TIME_THRESH, dm_thresh=DM_THRESH) print_debug("Finished grouping_rfi. " + strftime("%Y-%m-%d %H:%M:%S")) # Rank groups print_debug("Finished rank_groups, beginning DM span check... " + strftime("%Y-%m-%d %H:%M:%S")) # Remove groups that are likely RFI, based on their large span in DM print_debug("Beginning DM span check...") check_dmspan(groups, inf.dt, inf.lofreq, inf.lofreq + inf.BW) print_debug("Finished DM span check, beginning writing to outfile... " + strftime("%Y-%m-%d %H:%M:%S")) outfile = open(options.outbasenm + 'groups.txt', 'w') summaryfile = open(options.outbasenm + 'spsummary.txt', 'w') rank_dict = rank_occur(groups) for rank in sorted(ALL_RANKS_ORDERED): if rank != 1: outfile.write("Number of rank %d groups: %d \n" % (rank, rank_dict.get(rank, 0))) summaryfile.write("Number of rank %d groups: %d \n" % (rank, rank_dict.get(rank, 0))) outfile.write("\n") summaryfile.close() # Reverse sort lists so good groups are written at the top of the file groups.sort(key=lambda x: ALL_RANKS_ORDERED.index(x.rank), reverse=True) # write list of events in each group for grp in groups: if grp.rank in RANKS_TO_WRITE: outfile.write(str(grp) + '\n') #print group summary outfile.write('\n') outfile.write( "# DM Sigma Time (s) Sample Downfact \n") for sp in grp.singlepulses: outfile.write("%7.2f %7.2f %13.6f %10d %3d \n" % sp) outfile.write('\n') outfile.close() print_debug("Finished writing to outfile, now plotting... " + strftime("%Y-%m-%d %H:%M:%S")) if PLOT: ranks = RANKS_TO_PLOT # Sort groups so better-ranked groups are plotted on top of worse groups groups.sort(key=lambda x: ALL_RANKS_ORDERED.index(x.rank)) # create several DM vs t plots, splitting up DM in overlapping intervals # DMs 0-30, 20-110, 100-300, 300-1000 if PLOTTYPE.lower() == 'pgplot': # Use PGPLOT to plot plot_sp_rated_pgplot(groups, ranks, inffile, 0, 30) print_debug("Finished PGplotting DMs0-30 " + strftime("%Y-%m-%d %H:%M:%S")) plot_sp_rated_pgplot(groups, ranks, inffile, 20, 110) print_debug("Finished PGplotting DMs20-110 " + strftime("%Y-%m-%d %H:%M:%S")) plot_sp_rated_pgplot(groups, ranks, inffile, 100, 310) print_debug("Finished PGplotting DMs100-310 " + strftime("%Y-%m-%d %H:%M:%S")) plot_sp_rated_pgplot(groups, ranks, inffile, 300, 1000) print_debug("Finished PGplotting DMs100-310 " + strftime("%Y-%m-%d %H:%M:%S")) plot_sp_rated_pgplot(groups, ranks, inffile, 1000, 10000) print_debug("Finished PGplotting DMs100-310 " + strftime("%Y-%m-%d %H:%M:%S")) elif PLOTTYPE.lower() == 'matplotlib': # Use matplotlib to plot plot_sp_rated_all(groups, ranks, inffile, 0, 30) print_debug("Finished plotting DMs0-30 " + strftime("%Y-%m-%d %H:%M:%S")) plot_sp_rated_all(groups, ranks, inffile, 20, 110) print_debug("Finished plotting DMs20-110 " + strftime("%Y-%m-%d %H:%M:%S")) plot_sp_rated_all(groups, ranks, inffile, 100, 310) print_debug("Finished plotting DMs100-310 " + strftime("%Y-%m-%d %H:%M:%S")) plot_sp_rated_all(groups, ranks, inffile, 300, 1000) print_debug("Finished plotting DMs300-1000 " + strftime("%Y-%m-%d %H:%M:%S")) plot_sp_rated_all(groups, ranks, inffile, 1000, 10000) print_debug("Finished plotting DMs1000-10000 " + strftime("%Y-%m-%d %H:%M:%S")) else: print( "Plot type must be one of 'matplotlib' or 'pgplot'. Not plotting." )
""") else: birds = read_birds(sys.argv[1]) bases, infilenms = group_infiles(sys.argv[2:]) lastsize = 0 lastT = 0 lastbase = bases[0] baryv = 0 for infilenm in infilenms: currsize = os.stat(infilenm).st_size with open(infilenm, "rb+") as infile: currbase = [x for x in bases if infilenm.startswith(x)][-1] if (currsize != lastsize) or (currbase != lastbase): fn, ext = os.path.splitext(infilenm) print(f"Reading file info from '{fn}.inf'") info = pi.infodata(fn + ".inf") currT = info.dt * info.N # Only re-compute baryv if we need to if baryv == 0 or (currbase != lastbase): baryv = mod_get_baryv( info.RA, info.DEC, info.epoch, currT, obs=scopes[info.telescope.lower()], bary=info.bary) # Only re-compute freqs to zap if the times are also different if (currT != lastT): zaplist = process_birds(birds, currT, baryv, info) # print(zaplist) # Now actually do the zapping