def on_rollback(conn): """An event to be executed when a transaction is rolled back. See SQLAlchemy for details about event triggers. """ utils.print_debug("Rolling back database transaction.", 'database', \ stepsback=7)
def __prep_obslog_search(arf, tolerant): """Prepare some observation info for searching for observing log entries. Inputs: arf: ArchiveFile object. tolerant: Be tolerant with name matching. This is important for flux-cal observations. (Default: False) Outputs: obsdt_utc: The UTC datetime at the start of the observation names: Object names to match """ # Use tolerant name matching # Be sure to use the original name recorded in the header names = (arf['origname'], ) if tolerant and arf['origname'].endswith("_R") and \ not (("_O" in arf['origname']) or ("_N" in arf['origname']) or ("_S" in arf['origname'])): base = arf['origname'][:-2] if base in utils.read_fluxcal_names(): # Be tolerant with name matching names += (base + "_N_R", base + "_O_R", base + "_S_R") else: names += (base.lstrip('BJ') + '_R', ) utils.print_debug("Will check for the following name for obs-log " \ "matching: %s" % ", ".join(names), 'correct') # Get date of observation obsdt_utc = rs.utils.mjd_to_datetime(arf['mjd']) obsdt_utc = UTC_TZ.localize(obsdt_utc) return obsdt_utc, names
def on_begin(conn): """An event to be executed when a transaction is opened. See SQLAlchemy for details about event triggers. """ utils.print_debug("Opening database transaction.", 'database', \ stepsback=7)
def _clean(self, ar): nchan = ar.get_nchan() nsub = ar.get_nsubint() weights = (ar.get_weights() > 0) nchan_masked = np.sum(weights.sum(axis=0) == 0) nsub_masked = np.sum(weights.sum(axis=1) == 0) sub_badfrac = 1 - weights.sum(axis=1) / float(nchan - nchan_masked) chan_badfrac = 1 - weights.sum(axis=0) / float(nsub - nsub_masked) sub_is_bad = np.argwhere(sub_badfrac > self.configs.badchantol) utils.print_debug( 'Number of subints to mask because too many ' 'channels are already masked: %d (%.1f %%)' % (sub_is_bad.size, 100.0 * sub_is_bad.size / nsub), 'clean') for isub in sub_is_bad: clean_utils.zero_weight_subint(ar, isub) chan_is_bad = np.argwhere(chan_badfrac > self.configs.badsubtol) utils.print_debug( 'Number of channels to mask because too many ' 'subints are already masked: %d (%.1f %%)' % (chan_is_bad.size, 100.0 * chan_is_bad.size / nchan), 'clean') for ichan in chan_is_bad: clean_utils.zero_weight_chan(ar, ichan)
def on_commit(conn): """An event to be executed when a transaction is committed. See SQLAlchemy for details about event triggers. """ utils.print_debug("Committing database transaction.", 'database', \ stepsback=7)
def _clean(self, ar): reference = ar.clone() reference.pscrunch() if self.configs.fscrunchfirst: if ar.get_dedispersed(): raise errors.CleanError('The "hotbins" cleaner "fscrunchfirst"' \ 'an only be used on non-dedispersed data.') utils.print_debug('Determining hotbins based on f-scrunched data', 'clean') reference.set_dispersion_measure(0) reference.fscrunch() if self.configs.tscrunchfirst: utils.print_debug('Determining hotbins based on t-scrunched data', 'clean') reference.tscrunch() if self.configs.iscal: calbins = self.__locate_cal(ar) # Clean on-cal region self.__find_and_replace_hotbins(ar, reference, calbins) # Clean off-cal region self.__find_and_replace_hotbins(ar, reference, ~calbins) else: offbins = np.ones(ar.get_nbin(), dtype='bool') for lobin, hibin in self.configs.onpulse: offbins[lobin:hibin] = False self.__find_and_replace_hotbins(ar, reference, offbins)
def __remove_bad_channels(self, ar): """Zero-weight bad channels and channels containing bad frequencies. However, zero-weighting is used for trimming, so the process is reversible. Inputs: ar: The psrchive archive object to clean. Outputs: None """ if self.configs.badchans: nremoved = 0 for tozap in self.configs.badchans: if type(tozap) is types.IntType: # A single bad channel to zap clean_utils.zero_weight_chan(ar, tozap) nremoved += 1 else: # An (inclusive) interval of bad channels to zap lochan, hichan = tozap for xx in xrange(lochan, hichan): clean_utils.zero_weight_chan(ar, tozap) nremoved += 1 utils.print_debug("Removed %d channels due to bad chans " \ "(%s) in %s" % (nremoved, self.configs.badfreqs, \ ar.get_filename()), 'clean') if self.configs.badfreqs: nremoved = 0 # Get a list of frequencies nchan = ar.get_nchan() lofreqs = np.empty(nchan) hifreqs = np.empty(nchan) chanbw = ar.get_bandwidth()/nchan for ichan in xrange(nchan): prof = ar.get_Profile(0, 0, ichan) ctr = prof.get_centre_frequency() lofreqs[ichan] = ctr - chanbw/2.0 hifreqs[ichan] = ctr + chanbw/2.0 for tozap in self.configs.badfreqs: if type(tozap) is types.FloatType: # A single bad freq to zap for ichan in np.argwhere((lofreqs<=tozap) & (hifreqs>tozap)): ichan = ichan.squeeze() clean_utils.zero_weight_chan(ar, ichan) nremoved += 1 else: # An (inclusive) interval of bad freqs to zap flo, fhi = tozap for ichan in np.argwhere((hifreqs>=flo) & (lofreqs<=fhi)): ichan = ichan.squeeze() clean_utils.zero_weight_chan(ar, ichan) nremoved += 1 utils.print_debug("Removed %d channels due to bad freqs " \ "(%s) in %s" % (nremoved, self.configs.badfreqs, \ ar.get_filename()), 'clean')
def before_cursor_execute(conn, cursor, statement, parameters, \ context, executemany): """An event to be executed before execution of SQL queries. See SQLAlchemy for details about event triggers. """ # Step back 7 levels through the call stack to find # the function that called 'execute' msg = str(statement) if executemany and len(parameters) > 1: msg += "\n Executing %d statements" % len(parameters) elif parameters: msg += "\n Params: %s" % str(parameters) utils.print_debug(msg, "queries", stepsback=6)
def __find_and_replace_hotbins(self, ar, reference, offbins): nbins = ar.get_nbin() indices = np.arange(nbins) offbin_indices = indices[offbins] for isub in np.arange(reference.get_nsubint()): for ichan in np.arange(reference.get_nchan()): # Always use first polarization channel # (i.e. use total intensity - data are p-scrunched) prof = reference.get_Profile(int(isub), 0, int(ichan)) data = prof.get_amps() offdata = data[offbins] med = np.median(offdata) mad = np.median(np.abs(offdata - med)) std = mad * 1.4826 # This is the approximate relation between the # standard deviation and the median absolute # deviation (assuming normally distributed data). ioffbad = np.abs(offdata - med) > std * self.configs.threshold ibad = offbin_indices[ioffbad] igood = offbin_indices[~ioffbad] nbad = np.sum(ioffbad) utils.print_debug('isub: %d, ichan: %d, ipol: %d\n' \ ' med: %g, mad: %g\n' \ ' %d hotbins found (ibin: %s)' % \ (isub, ichan, 0, med, mad, nbad, ibad), 'clean') # Replace data in cleaned archive with noise if self.configs.fscrunchfirst: chans_to_clean = np.arange(ar.get_nchan()) else: chans_to_clean = [int(ichan)] if self.configs.tscrunchfirst: subints_to_clean = np.arange(ar.get_nsubint()) else: subints_to_clean = [int(isub)] # We always p-scrunch pols_to_clean = np.arange(ar.get_npol()) for jsub in subints_to_clean: for jchan in chans_to_clean: for jpol in pols_to_clean: cleanedprof = ar.get_Profile( int(jsub), int(jpol), int(jchan)) cleaneddata = cleanedprof.get_amps() gooddata = cleaneddata[igood] avg = gooddata.mean() std = gooddata.std() if std > 0: noise = np.random.normal( avg, std, size=nbad).astype('float32') cleaneddata[ibad] = noise
def prune_band(infn, response=None): """Prune the edges of the band. This is useful for removing channels where there is no response. The file is modified in-place. However, zero-weighting is used for pruning, so the process is reversible. Inputs: infn: name of file to trim. response: A tuple specifying the range of frequencies outside of which should be de-weighted. Outputs: None """ if response is None: response = config.cfg.rcvr_response_lims if response is None: utils.print_info( "No freq range specified for band pruning. Skipping...", 2) else: # Use absolute value in case band is flipped (BW<0) lofreq = infn['freq'] - np.abs(0.5 * infn['bw']) hifreq = infn['freq'] + np.abs(0.5 * infn['bw']) utils.print_info("Pruning frequency band to (%g-%g MHz)" % response, 2) utils.print_debug("Archive's freq band (%g-%g MHz)" % \ (lofreq, hifreq), 'clean') pazcmd = 'paz -m %s ' % infn.fn runpaz = False # Only run paz if either of the following clauses are True if response[0] > lofreq: # Part of archive's low freqs are outside rcvr's response pazcmd += '-F "%f %f" ' % (lofreq, response[0]) runpaz = True if response[1] < hifreq: # Part of archive's high freqs are outside rcvr's response pazcmd += '-F "%f %f" ' % (response[1], hifreq) runpaz = True if runpaz: utils.execute(pazcmd) else: warnings.warn("Not pruning band edges! All data are " \ "within the receiver's response.", \ errors.CoastGuardWarning)
def get_correction_string(arfn, obsinfo=None, backend='asterix', receiver=None, fixcoords=False): """Get psredit command string that will correct the file header. Input: arfn: The name of the input archive file. obsinfo: A dictionary of observing log information to use. (Default: search observing logs for matching entry) backend: Override backend name with this value. (Default: asterix) receiver: Override receiver name with this value. (Default: Determine receiver automatically) fixcoords: Force fixing of coordinates. (Default: Don't bother if they seem to be correct) Output: corrstr: The parameter string of corrections used with psredit. note: A note about header correction """ note = "" # Load archive arf = utils.ArchiveFile(arfn) if receiver is None: rcvr = determine_receiver(arf) elif receiver in ('P217-3', 'P200-3', 'S110-1', 'S60-2', 'S36-5'): rcvr = receiver else: raise ValueError("Receiver provided (%s) is not recognized." % receiver) if arf['rcvr'] != rcvr: note += "Receiver is wrong (%s) setting to '%s'. " % \ (arf['rcvr'], rcvr) corrstr = "%s,be:name=%s" % (RCVR_INFO[rcvr], backend) if fixcoords or (obsinfo is not None) or arf['name'].endswith('_R') or \ arf['ra'].startswith('00:00:00'): try: if obsinfo is None: # Search for observing log entry obsinfo = get_obslog_entry(arf, tolerant=True) utils.print_debug( "Information from matching observing log line:\n%s" % pprint.pformat(obsinfo), 'correct') rastr, decstr = get_coordinates(arf, obsinfo) except errors.HeaderCorrectionError as exc: note += exc.get_message() + "\n(Could not correct coordinates)" raise else: corrstr += ",coord=%s%s" % (rastr, decstr) else: note += "No reason to correct coords." if obsinfo is not None: name = obsinfo['name'] corrstr += ",name=%s" % obsinfo['name'] else: name = arf['name'] if name.endswith("_R"): # Calibration diode was fired. # Observation could be pol-cal scan or flux-cal scan if any([ name.startswith(fluxcal) for fluxcal in utils.read_fluxcal_names(config.fluxcal_cfg) ]): # Flux calibrator if name.endswith("_S_R") or name.endswith("_N_R"): corrstr += ",type=FluxCal-Off" elif name.endswith("_O_R"): corrstr += ",type=FluxCal-On" else: # Polarization calibrator corrstr += ",type=PolnCal" else: corrstr += ",type=Pulsar" return corrstr, note
def deep_clean(toclean, chanthresh=None, subintthresh=None, binthresh=None): import psrchive # Temporarily, because python bindings # are not available on all computers if chanthresh is None: chanthresh = config.cfg.clean_chanthresh if subintthresh is None: subintthresh = config.cfg.clean_subintthresh if binthresh is None: binthresh = config.cfg.clean_binthresh ar = toclean.clone() ar.pscrunch() ar.remove_baseline() ar.dedisperse() # Remove profile data = ar.get_data().squeeze() template = np.apply_over_axes(np.sum, data, (0, 1)).squeeze() clean_utils.remove_profile_inplace(ar, template, None) ar.dededisperse() # First clean channels chandata = clean_utils.get_chans(ar, remove_prof=True) chanweights = clean_utils.get_chan_weights(ar).astype(bool) chanmeans = clean_utils.scale_chans(chandata.mean(axis=1), chanweights=chanweights) chanmeans /= clean_utils.get_robust_std(chanmeans, chanweights) chanstds = clean_utils.scale_chans(chandata.std(axis=1), chanweights=chanweights) chanstds /= clean_utils.get_robust_std(chanstds, chanweights) badchans = np.concatenate((np.argwhere(np.abs(chanmeans) >= chanthresh), \ np.argwhere(np.abs(chanstds) >= chanthresh))) badchans = np.unique(badchans) utils.print_info( "Number of channels to be de-weighted: %d" % len(badchans), 2) for ichan in badchans: utils.print_info("De-weighting chan# %d" % ichan, 3) clean_utils.zero_weight_chan(ar, ichan) clean_utils.zero_weight_chan(toclean, ichan) # Next clean subints subintdata = clean_utils.get_subints(ar, remove_prof=True) subintweights = clean_utils.get_subint_weights(ar).astype(bool) subintmeans = clean_utils.scale_subints(subintdata.mean(axis=1), \ subintweights=subintweights) subintmeans /= clean_utils.get_robust_std(subintmeans, subintweights) subintstds = clean_utils.scale_subints(subintdata.std(axis=1), \ subintweights=subintweights) subintstds /= clean_utils.get_robust_std(subintstds, subintweights) badsubints = np.concatenate((np.argwhere(np.abs(subintmeans) >= subintthresh), \ np.argwhere(np.abs(subintstds) >= subintthresh))) if config.debug.CLEAN: utils.print_debug("Making debug plot for deep_clean", 'clean') plt.subplots_adjust(hspace=0.4) chanax = plt.subplot(4, 1, 1) plt.plot(np.arange(len(chanmeans)), chanmeans, 'k-') plt.axhline(chanthresh, c='k', ls='--') plt.axhline(-chanthresh, c='k', ls='--') plt.xlabel('Channel Number', size='x-small') plt.ylabel('Average', size='x-small') plt.subplot(4, 1, 2, sharex=chanax) plt.plot(np.arange(len(chanstds)), chanstds, 'k-') plt.axhline(chanthresh, c='k', ls='--') plt.axhline(-chanthresh, c='k', ls='--') plt.xlabel('Channel Number', size='x-small') plt.ylabel('Standard Deviation', size='x-small') subintax = plt.subplot(4, 1, 3) plt.plot(np.arange(len(subintmeans)), subintmeans, 'k-') plt.axhline(subintthresh, c='k', ls='--') plt.axhline(-subintthresh, c='k', ls='--') plt.xlabel('Sub-int Number', size='x-small') plt.ylabel('Average', size='x-small') plt.subplot(4, 1, 4, sharex=subintax) plt.plot(np.arange(len(subintstds)), subintstds, 'k-') plt.axhline(subintthresh, c='k', ls='--') plt.axhline(-subintthresh, c='k', ls='--') plt.xlabel('Sub-int Number', size='x-small') plt.ylabel('Standard Deviation', size='x-small') plt.show() badsubints = np.unique(badsubints) utils.print_info( "Number of sub-ints to be de-weighted: %d" % len(badsubints), 2) for isub in badsubints: utils.print_info("De-weighting subint# %d" % isub, 3) clean_utils.zero_weight_subint(ar, isub) clean_utils.zero_weight_subint(toclean, isub) # Re-dedisperse the data ar.dedisperse() # Now replace hot bins utils.print_info("Will find and clean 'hot' bins", 2) clean_utils.clean_hot_bins(toclean, thresh=binthresh)
def group_subband_dirs(subdirs, maxspan=None, maxgap=None, \ tossfrac=None, filetype='subint'): """Based on file names group sub-ints from different sub-bands. Each subband is assumed to be in a separate directory. Inputs: subdirs: List of sub-band directories maxspan: Maximum span, in seconds, between first and last sub-int in a combined file. maxgap: Maximum gap, in seconds, permitted before starting a new output file. tossfrac: Fraction of sub-ints required for a sub-band to be combined. If a sub-band has fewer than tossfrac*N_subint sub-ints it will be excluded. filetype: Type of files being grouped. Can be 'subint', or 'single'. (Default: 'subint') Outputs: usedirs: List of directories to use when combining. (NOTE: This may be different than the input 'subdirs' because some directories may have too few subints to be worth combining. This depends on the input value of 'tossfrac'.) groups: List of groups of files to be combined. (NOTE: These are the file name only (i.e. no path) Each file listed appears in each of 'usedirs'.) """ if maxspan is None: maxspan = config.cfg.combine_maxspan if maxgap is None: maxgap = config.cfg.combine_maxgap if tossfrac is None: tossfrac = 1 - config.cfg.missing_subint_tolerance if filetype not in FILETYPE_SPECIFICS: raise errors.InputError("File type (%s) is not recognized. " \ "Possible values are: '%s'" % \ (filetype, "', '".join(FILETYPE_SPECIFICS.keys()))) else: globpat, get_start = FILETYPE_SPECIFICS[filetype] # Ensure paths are absolute subdirs = [os.path.abspath(path) for path in subdirs] utils.print_debug("Grouping subints from %d sub-band directories" % \ len(subdirs), 'combine') nindirs = len(subdirs) nsubbands = len(subdirs) nperdir = collections.Counter() noccurs = collections.Counter() nintotal = 0 for subdir in subdirs: fns = glob.glob(os.path.join(subdir, globpat)) nn = len(fns) utils.print_debug("Found %d sub-int files in %s" % \ (nn, subdir), 'combine') nintotal += nn nperdir[subdir] = nn noccurs.update([os.path.basename(fn) for fn in fns]) nsubints = len(noccurs) # Remove sub-bands that have too few subints thresh = tossfrac * nsubints for ii in xrange(len(subdirs) - 1, -1, -1): subdir = subdirs[ii] if nperdir[subdir] < thresh: utils.print_info("Ignoring sub-ints from %s. " \ "It has too few sub-ints (%d < %d; tossfrac: %f)" % \ (subdir, nperdir[subdir], thresh, tossfrac), 2) subdirs.pop(ii) del nperdir[subdir] fns = glob.glob(os.path.join(subdir, globpat)) noccurs.subtract([os.path.basename(fn) for fn in fns]) nsubbands -= 1 # Remove subints that are no longer included in any subbands to_del = [] for fn in noccurs: if not noccurs[fn]: to_del.append(fn) for fn in to_del: del noccurs[fn] # Now combine subints lastsubint = datetime.datetime.min filestart = datetime.datetime.min groups = [] if nsubbands: for subint in sorted(noccurs): if noccurs[subint] < nsubbands: utils.print_info("Ignoring sub-int (%s). It doesn't apear in all " \ "subbands (only %d of %d)" % \ (subint, noccurs[subint], nsubbands), 2) continue start = get_start(os.path.join(subdirs[0], subint)) if ((start - filestart).total_seconds() > maxspan) or \ ((start - lastsubint).total_seconds() > maxgap): filestart = start utils.print_debug("Starting a new file at %s" % \ filestart, 'combine') # Start a new file groups.append([]) groups[-1].append(subint) lastsubint = start nused = sum([len(grp) for grp in groups]) utils.print_info("Grouped %d files from %d directories into %d groups.\n" \ "(Threw out %d directories and %d files)" % \ (nintotal, nindirs, len(groups), nindirs-len(subdirs), \ nintotal-nused), 2) return subdirs, groups
def run(self, ar): utils.print_info( "Cleaning '%s' with %s" % (ar.get_filename(), self.name), 1) utils.print_debug("Cleaning parameters: %s" % self.get_config_string(), 'clean') self._clean(ar)
def __obslog_db_match(obsdt_utc, names): """Find entries in observing log database matching the given information. Inputs: obsdt_utc: The UTC datetime at the start of the observation names: Object names to match Outputs: logentries: Matching log entries. """ db = database.Database('obslog') utcstart_col = sa.cast(db.obsinfo.c.obstimestamp, sa.DateTime) # Find entries within +- 1 day of observation start time start = obsdt_utc - datetime.timedelta(days=1) end = obsdt_utc + datetime.timedelta(days=1) with db.transaction() as conn: select = db.select([db.obsinfo.c.object.label('name'), (db.obsinfo.c.lst/3600.0).label('lststart'), utcstart_col.label('utcstart'), db.obsinfo.c.azim.label('az'), db.obsinfo.c.elev.label('alt'), db.obsinfo.c.scan.label('scannum'), db.obsinfo.c.lon, db.obsinfo.c.lat]).\ where(db.obsinfo.c.object.in_(names) & (utcstart_col >= start) & (utcstart_col <= end)) result = conn.execute(select) rows = result.fetchall() result.close() utils.print_debug( "Found %d matching obslog DB entries " "(name: %s; UTC: %s)" % (len(rows), ", ".join(names), obsdt_utc.strftime("%c")), 'correct') logentries = [] for row in rows: # refine matching based on time utils.print_debug("%s" % row, 'correct') twentyfivesec = datetime.timedelta(seconds=25) logdt_utc = UTC_TZ.localize(row['utcstart']) if (logdt_utc - twentyfivesec) <= obsdt_utc <= (logdt_utc + twentyfivesec): # Compute a few values to be consistent with obslog file parsing utc_hrs = row['utcstart'].hour + ( row['utcstart'].minute + (row['utcstart'].second + row['utcstart'].microsecond * 1e-6) / 60.0) / 60.0 logdt_local = logdt_utc.astimezone(BERLIN_TZ) localdate = logdt_local.date() entry = dict(row) entry['scannum'] = str(row['scannum']) entry['utcstart'] = utc_hrs entry['utc'] = row['utcstart'].strftime('%c') entry['localdate'] = localdate entry['catalog_rastr'] = rs.utils.deg_to_hmsstr(row['lon'], decpnts=3, style='units')[0] entry['catalog_decstr'] = rs.utils.deg_to_dmsstr(row['lat'], decpnts=3, style='units')[0] logentries.append(entry) return logentries
def __obslog_file_match(obsdt_utc, names): """Find entries in observing log files matching the given information. Inputs: obsdt_utc: The UTC datetime at the start of the observation names: Object names to match Outputs: logentries: Matching log entries. """ obsdt_local = obsdt_utc.astimezone(BERLIN_TZ) obsutc = obsdt_utc.time() obsdate = obsdt_local.date( ) # NOTE: discrepancy between timezones for time and date # This is a bad idea, but is done to be consistent with # what is used in the observation log files. obsutc_hours = obsutc.hour + (obsutc.minute + (obsutc.second) / 60.0) / 60.0 obsutc_hhmm = obsutc.hour + (obsutc.minute) / 60.0 if obsutc.second > 30: delta = HOURS_PER_MIN else: delta = -HOURS_PER_MIN # Get log file # NOTE: Date in file name is when the obslog was written out obslogfns = glob.glob(os.path.join(config.obslog_dir, "*.prot")) obslogfns.sort() tosearch = [] for currfn in obslogfns: fndatetime = datetime.datetime.strptime(os.path.split(currfn)[-1], \ '%y%m%d.prot') fndate = fndatetime.date() if fndate == obsdate: tosearch.append(currfn) elif fndate > obsdate: tosearch.append(currfn) break if not tosearch: raise errors.HeaderCorrectionError("Could not find an obslog file " \ "for the obs date (%s)." % obsdate.strftime("%Y-%b-%d")) utils.print_debug( 'Searching obs log files:\n %s' % "\n ".join(tosearch), 'correct') logentries = [] check = False for obslogfn in tosearch: with open(obslogfn, 'r') as obslog: for line in obslog: try: currinfo = parse_obslog_line(line) except errors.FormatError: # Not a valid observation log entry continue if check: utils.print_debug( "Checking obslog line:\n%s\n" "Obs date: %s, obs log date: %s, next date: %s\n" "Obs UTC: %f, obs log UTC: %f, next UTC: %f\n" % (prevline, obsdate, previnfo['localdate'], currinfo['localdate'], obsutc_hhmm, previnfo['utcstart'], currinfo['utcstart']), 'correct') if (obsdate >= previnfo['localdate']) and \ (obsdate <= currinfo['localdate']) and \ (is_close(obsutc_hhmm, previnfo['utcstart'], 1) or \ is_close(obsutc_hhmm+delta, previnfo['utcstart'], 1)): #and (obsutc_hhmm <= currinfo['utcstart']): # Not needed anymore? utils.print_debug( "Matching observing log line:\n%s" % prevline, 'correct') logentries.append(previnfo) # Check in next iteration if observation's source name matches # that of the current obslog entry check = (utils.get_prefname(currinfo['name']) in names) prevline = line previnfo = currinfo utils.print_debug( "Found %d potentially matching obs-log entries" % len(logentries), 'correct') return logentries
def clean_hotbins(ar, thresh=None, fscrunchfirst=None, onpulse=[]): """Replace hot bits with white noise. Inputs: ar: The archive to be cleaned thresh: The threshold (in number of sigmas) for a bin to be removed. fscrunchfirst: Determine which bins to removed by looking at frequency scrunched data. Remove the hot bins in all frequency channels. onpulse: On-pulse regions to be ignored when computing profile statistics. A list of 2-tuples is expected. Outputs: None - The archive is cleaned in place. """ if thresh is None: thresh = config.cfg.clean_hotbins_thresh if fscrunchfirst is None: fscrunchfirst = config.cfg.clean_hotbins_fscrunchfirst utils.print_debug("Cleaning hot bins (thresh: %g, " \ "on-pulse regions: %s)" % (thresh, onpulse), 'clean') nbins = ar.get_nbin() indices = np.arange(nbins) offbins = np.ones(nbins, dtype='bool') offbin_indices = indices[offbins] for lobin, hibin in onpulse: offbins[lobin:hibin] = False if fscrunchfirst: utils.print_debug("Determining hotbins based on f-scrunched data", 'clean') reference = ar.clone() reference.set_dispersion_measure(0) reference.fscrunch() else: reference = ar nsub = reference.get_nsubint() for isub in np.arange(nsub): for ichan in np.arange(reference.get_nchan()): for ipol in np.arange(reference.get_npol()): prof = reference.get_Profile(int(isub), int(ipol), int(ichan)) data = prof.get_amps() offdata = data[offbins] med = np.median(offdata) mad = np.median(np.abs(offdata - med)) std = mad * 1.4826 # This is the approximate relation between the # standard deviation and the median absolute # deviation (assuming normally distributed data). ioffbad = np.abs(offdata - med) > std * thresh ibad = offbin_indices[ioffbad] igood = offbin_indices[~ioffbad] nbad = np.sum(ioffbad) utils.print_debug('isub: %d, ichan: %d, ipol: %d\n' \ ' med: %g, mad: %g\n' \ ' %d hotbins found (ibin: %s)' % \ (isub, ichan, ipol, med, mad, nbad, ibad), 'clean') # Replace data in cleaned archive with noise if fscrunchfirst: # We need to clean all frequency channels for jchan in np.arange(ar.get_nchan()): cleanedprof = ar.get_Profile(int(isub), int(ipol), int(jchan)) cleaneddata = cleanedprof.get_amps() gooddata = cleaneddata[igood] avg = gooddata.mean() std = gooddata.std() if std > 0: noise = np.random.normal( avg, std, size=nbad).astype('float32') cleaneddata[ibad] = noise else: gooddata = data[igood] avg = gooddata.mean() std = gooddata.std() if std > 0: noise = np.random.normal(avg, std, size=nbad).astype('float32') data[ibad] = noise