def get_lines (path): try: with io.open (path, 'rt') as f: for line in f: yield line.strip () except Exception as e: warn ('failed to read from "%s": %s', path, e)
def get_one_line (path): try: with io.open (path, 'rt') as f: return f.readline ().strip () except Exception as e: warn ('failed to read a line from "%s": %s', path, e) return None
def get_sacct_info (jobid, itemname): try: with io.open (os.devnull, 'rb') as devnull: info = subprocess.check_output (['sacct', '-j', str (jobid) + '.batch', '-n', '-P', '-o', itemname], shell=False, stdin=devnull, close_fds=True) return info.splitlines () except Exception as e: warn ('failed to get sacct item "%s" for job %s: %s', itemname, jobid, e) return None
def count_lines (path): try: n = 0 with io.open (path, 'rt') as f: for line in f: n += 1 return n except Exception as e: warn ('failed to count lines of "%s": %s', path, e) return 0
def getDelay (self, bp): from mirtask.util import POL_XX, POL_YY, POL_XY, POL_YX ant1, ant2, pol = bp2aap (bp) delays = self.vars['delays'] if max (ant1, ant2) > delays.shape[0]: warn ('not enough antennas in delays array!') return 0 if pol == POL_XX: pidx1, pidx2 = 0, 0 elif pol == POL_YY: pidx1, pidx2 = 1, 1 elif pol == POL_XY: pidx1, pidx2 = 0, 1 elif pol == POL_YX: pidx1, pidx2 = 1, 0 else: warn ('not sure what to do with this pol for delays') pidx1, pidx2 = 0, 0 return delays[ant2-1,pidx2] - delays[ant1-1,pidx1]
def get_max_worker_maxrss (): maxrss = 0 for wjobid in get_lines ('worker-arraymasterids'): if not len (wjobid): continue lines = get_sacct_info (wjobid, 'MaxRSS') if lines is None: continue for line in lines: line = line.strip () if not len (line): continue if line[-1] == 'K': maxrss = max (maxrss, int (line[:-1])) elif line[-1] == 'M': maxrss = max (maxrss, int (round (float (line[:-1]) * 1024))) else: warn ('unexpected sacct MaxRSS output for job %s: %r', wjobid, line) return maxrss
def on_error (func, path, exc_info): warn ('couldn\'t rmtree %s: in %s of %s: %s', self, func.__name__, path, exc_info[1])
def solve (self): if self.nants > self.nsamps: cli.warn ('not enough measurements to solve: %d ants, %d samples' % (self.nants, self.nsamps)) return # First solve for (log) amplitudes, which we can do as a classic # linear least squares problem (in log space). We're implicitly # modeling the source as 1+0j on all baselines, i.e., we're assuming a # point source and solving for amplitudes in units of the source flux # density. lna_A = np.zeros ((self.nsamps, self.nants)) for i in range (self.nsamps): i1, i2 = self.blidxs[i] lna_A[i,i1] = 1 lna_A[i,i2] = 1 lna_b = np.log (np.abs (self.vis)) lna_x, lna_chisq, lna_rank, lna_sing = np.linalg.lstsq (lna_A, lna_b) lna_chisq = lna_chisq[0] # We just solved for log values to model visibilities; to bring # visibilities into model domain, we need the inverses of these # values. We can then normalize the amplitudes of all of the observed # visibilities. amps = np.exp (-lna_x) normvis = self.vis.copy () for i in range (self.nsamps): i1, i2 = self.blidxs[i] normvis[i] *= amps[i1] * amps[i2] # Now, solve for the phases with a bespoke (but simple) iterative # algorithm. For each antenna we just compute the phase of the summed # differences between it and the "model" and alter the phase by that. # Loosely modeled on MIRIAD gpcal PhaseSol(). curphasors = np.ones (self.nants, dtype=np.complex) newphasors = np.empty (self.nants, dtype=np.complex) tol = 1e-5 damping = 0.9 for iter_num in range (100): newphasors.fill (0) for i, vis in enumerate (normvis): i1, i2 = self.blidxs[i] newphasors[i1] += curphasors[i2] * vis newphasors[i2] += curphasors[i1] * np.conj (vis) newphasors /= np.abs (newphasors) temp = curphasors + damping * (newphasors - curphasors) temp /= np.abs (temp) delta = (np.abs (temp - curphasors)**2).mean () #print ('ZZ', iter_num, delta, np.angle (temp, deg=True)) curphasors = temp if delta < tol: break # Calibrate out phases too np.conj (curphasors, curphasors) gains = amps * curphasors for i in range (self.nsamps): i1, i2 = self.blidxs[i] normvis[i] *= curphasors[i1] * np.conj (curphasors[i2]) self.gains = gains self.normvis = normvis
def _ms_transpose (vpath, tpath, transpose_args, squash_time_gaps=False): from pwkit.environments.casa import util as casautil b = casautil.sanitize_unicode def vispath (*args): return b(os.path.join (vpath, *args)) # TODO: I think that with ms.nrow() and ms.range() we can do this # while taking only one pass through the data. tb = casautil.tools.table () ms = casautil.tools.ms () print ('pass 1 ...') # Load polarization stuff we need tb.open (vispath ('DATA_DESCRIPTION')) ddid_to_pid = tb.getcol (b'POLARIZATION_ID') ddid_to_spwid = tb.getcol (b'SPECTRAL_WINDOW_ID') tb.close () tb.open (vispath ('POLARIZATION')) numcorrs = tb.getcol (b'NUM_CORR') npids = numcorrs.size prodinfo = [None] * npids for i in range (npids): corrtypes = tb.getcell (b'CORR_TYPE', i) prodinfo[i] = [casautil.pol_to_miriad[c] for c in corrtypes] tb.close () ddprods = [prodinfo[p] for p in ddid_to_pid] # Load spw configuration stuff we need. Don't grid the info yet # since many of the spws may be filtered out by the selection # setup. tb.open (vispath ('SPECTRAL_WINDOW')) nspws = tb.getcol (b'NUM_CHAN').size sfreqs = [] for i in range (nspws): sfreqs.append (tb.getcell (b'CHAN_FREQ', i) * 1e-9) # Hz -> GHz tb.close () # Antenna info tb.open (vispath ('ANTENNA')) nants = tb.getcol (b'DISH_DIAMETER').size names = tb.getcol (b'NAME') stations = tb.getcol (b'STATION') fullnames = [] maxnamelen = 0 for i in range (nants): f = '%s@%s' % (names[i], stations[i]) fullnames.append (f) maxnamelen = max (maxnamelen, len (f)) antnames = np.zeros ((nants, maxnamelen), dtype=np.byte) for i in range (nants): f = fullnames[i] n = len (f) antnames[i,:n] = np.fromstring (f, dtype=np.byte) # Open and set up filtering. msselect() says it supports # 'polarization' as a field, but it doesn't seem to do anything? ms.open (vispath ()) ms_selectors = frozenset ('array baseline field observation polarization ' 'scan scanintent spw taql time uvdist'.split ()) mssel = dict (kv for kv in iteritems(transpose_args) if kv[0] in ms_selectors) # ms.selectinit () needed for selectpolarization() below ms.msselect (b(mssel)) # Changes shape of 'data' column below. Disable for now since # I don't feel like debugging it. if 'polarization' in transpose_args: warn ('polarization selection not implemented for MS data') pass #ms.selectpolarization (transpose_args['polarization'].split (',')) # Get table of times and basepols ms.iterinit (maxrows=65536) # XXX semi-arbitrary constant ms.iterorigin () colnames = b('time antenna1 antenna2 data_desc_id'.split ()) nrecs = 0 times = set () pbps = set () seenspws = set () while True: cols = ms.getdata (items=colnames) # time is (chunksize) for i in range (cols['time'].size): t = cols['time'][i] / 86400. + 2400000.5 # CASA to miriad timesystems ddid = cols['data_desc_id'][i] pi = ddprods[ddid] a1 = cols['antenna1'][i] + 1 # 0-based -> 1-based a2 = cols['antenna2'][i] + 1 seenspws.add (ddid_to_spwid[ddid]) for j in range (len (pi)): nrecs += 1 pbp = mtutil.bpToPBP32 (mtutil.aap2bp (a1, a2, pi[j])) times.add (t) pbps.add (pbp) if not ms.iternext (): break # Get the timestamps onto a nice even grid, checking that our # gridding is decent. datatimes = np.asarray (sorted (times), dtype=np.double) nt = datatimes.size time0 = datatimes[0] cadence = np.median (datatimes[1:] - datatimes[:-1]) tidxs = (datatimes - time0) / cadence timemap = np.empty (nt, dtype=np.int) ntslot = int (round (tidxs[-1])) + 1 tscale = ntslot * 1. / nt ntoff = 0 if squash_time_gaps: slot_to_data = np.zeros (ntslot, dtype=np.int) - 1 for i in range (nt): timemap[i] = int (round (tidxs[i])) if (tidxs[i] - timemap[i]) > 0.01: ntoff += 1 if squash_time_gaps: slot_to_data[timemap[i]] = i if ntoff > 0: warn ('had %d timestamps (out of %d) with poor mapping onto the grid', ntoff, nt) if squash_time_gaps: # Re-index the data to remove time gaps. As a convenience we throw in # a small break between discrete observations. seen_any = False in_populated_run = False squashed_idx = 0 new_gap_size = 1 for i in range (ntslot): if slot_to_data[i] == -1: # There are no data for this slot. in_populated_run = False else: # There are data for this slot. if not in_populated_run and seen_any: squashed_idx += new_gap_size timemap[slot_to_data[i]] = squashed_idx squashed_idx += 1 seen_any = True in_populated_run = True ntslot = squashed_idx tscale = ntslot * 1. / nt if tscale > 1.05: warn ('data size increasing by factor of %.2f to get everything onto ' 'the time grid', tscale) nt = ntslot # Now do the same thing for the spectral windows that are actually used, # computing lookup info for fast mapping of DDID to our frequency grid. freqs = set () for spwid in seenspws: freqs.update (sfreqs[spwid]) datafreqs = np.asarray (sorted (freqs), dtype=np.double) nf = datafreqs.size freq0 = datafreqs[0] sdf = np.median (datafreqs[1:] - datafreqs[:-1]) nfslot = int (round ((datafreqs[-1] - freq0) / sdf)) + 1 fscale = nfslot * 1. / nf ddfreqmap = [] nfoff = 0 maxnchan = 0 for i in range (len (ddid_to_spwid)): spwid = ddid_to_spwid[i] if spwid not in seenspws: ddfreqmap.append (None) continue # If more than one DDID shares a SPWID, we're recomputing this stuff. # Oh well. ddfreqs = sfreqs[spwid] ddidx0 = None ddprevidx = None if ddfreqs.size > 1 and ddfreqs[1] < ddfreqs[0]: ddstep = -1 else: ddstep = 1 for j in range (ddfreqs.size): trueidx = (ddfreqs[j] - freq0) / sdf ddidx = int (round (trueidx)) if (ddidx - trueidx) > 0.01: nfoff += 1 if j == 0: ddidx0 = ddidx elif ddidx != ddprevidx + ddstep: die ('cannot transpose: spw must map directly onto freq grid ' '(spw #%d, chan %d->%d, %d->%d)', spwid, j - 1, j, ddprevidx, ddidx) ddprevidx = ddidx if ddstep == -1: ddidx0 = ddidx ddfreqmap.append ((ddidx0, ddfreqs.size, ddstep)) maxnchan = max (maxnchan, ddfreqs.size) if nfoff > 0: warn ('had %d frequencies (out of %d) with poor mapping onto the grid', nfoff, nf) if fscale > 1.05: warn ('data size increasing by factor of %.2f to get everything onto ' 'the frequency grid', fscale) freqs = np.arange (nfslot) * sdf + freq0 nf = nfslot # Compute offsets and record sizes for our output file, and write # the header. Write-then-seek seems to break if buffering is used??? pbps = np.asarray (sorted (pbps), dtype=np.int32) nbp = pbps.size corr_bytes = 8 * nf uvww_bytes = 4 * 8 flag_bytes = nf slice_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nt data_offset = ((header.size + 7) // 8) * 8 data_size = slice_bytes * nbp vars_offset = ((data_offset + data_size + 7) // 8) * 8 def corr_offset (bpidx, tidx, fidx): return data_offset + bpidx * slice_bytes + corr_bytes * tidx + 8 * fidx def flag_offset (bpidx, tidx, fidx): return (data_offset + bpidx * slice_bytes + corr_bytes * nt + flag_bytes * tidx + fidx) def uvww_offset (bpidx, tidx): return (data_offset + bpidx * slice_bytes + (corr_bytes + flag_bytes) * nt + uvww_bytes * tidx) f = open (tpath, 'wb+', 0) f.truncate (vars_offset) # hint how big the file will be f.write (header.pack (BYTE_ORDER_MARKER, FORMAT_VERSION, nbp, nt, nf, freq0, sdf, time0, cadence, data_offset, vars_offset)) # Our little system for buffering/writing data. Given how the CASA Python # interface works, I don't think we can preallocate a huge buffer that # everything gets stuffed in. Which is sad. TODO: smarter data structure # that sorts the keys as we insert them. buffer_size = [0] # hack so we can modify value in the funcs below buffer_info = {} buffer_data = np.empty (CACHE_SIZE, dtype=np.uint8) currec = 0 def dump (): if not len (buffer_info): return pct = 100. * currec / nrecs msg = ' %3.1f%% (%d/%d) writing ...' % (pct, currec, nrecs) unbufout.write(msg.ljust (60).encode('utf8') + b'\r') offsets = sorted (iterkeys(buffer_info)) curofs = None for offset in offsets: bofs, blen = buffer_info[offset] if curofs is None or offset != curofs: f.seek (offset) f.write (buffer_data[bofs:bofs+blen]) curofs = offset + blen buffer_size[0] = 0 buffer_info.clear () def bufferview (offset, dtype, nelem): bofs = (buffer_size[0] + 7) & (~7) # align for safety blen = dtype ().nbytes * nelem if bofs + blen > CACHE_SIZE: dump () bofs = 0 # if paranoid, check that offset not already in buffer_data buffer_size[0] = bofs + blen buffer_info[offset] = (bofs, blen) return buffer_data[bofs:bofs+blen].view (dtype) # Pass 2: write data. Set up some stuff for progress reporting. # NOTE: we're going to keep on rewriting uvw once for each spw print ('pass 2 ...') unbufout = os.fdopen (os.dup (1), 'wb', 0) tstart = time.time () tlastprint = 0 nvis = 0 seenany = np.zeros (nbp, dtype=np.bool) meanuvw = np.zeros ((nbp, 3), dtype=np.double) muvwcounts = np.zeros (nbp, dtype=np.int) datacol = transpose_args.get ('datacol', 'data') colnames = b([datacol] + 'time antenna1 antenna2 data_desc_id flag uvw sigma'.split ()) maxrows = CACHE_SIZE // (2 * maxnchan * 16) # 128 bits per viz.; factor of 2 safety margin ms.iterinit (maxrows=maxrows) ms.iterorigin () while True: cols = ms.getdata (items=colnames) # flag and data are (npol, nchan, chunksize) # uvw is (3, chunksize) # sigma is (npol, chunksize) # rest are scalars, shape (chunksize) # data is complex128!!! converting is super slow and sad :-( data = cols[datacol] flags = cols['flag'] for i in range (cols['time'].size): t = cols['time'][i] / 86400. + 2400000.5 # CASA to miriad timesystems tidx = timemap[datatimes.searchsorted (t)] ddid = cols['data_desc_id'][i] pi = ddprods[ddid] npol = len (pi) a1 = cols['antenna1'][i] + 1 # 0-based -> 1-based a2 = cols['antenna2'][i] + 1 freqidx0, nchan, step = ddfreqmap[ddid] if currec % 100 == 0 and currec: now = time.time () if now - tlastprint > 1: pct = 100. * currec / nrecs elapsed = now - tstart total = 1. * elapsed * nrecs / currec eta = total - elapsed msg = ' %3.1f%% (%d/%d) elapsed %s ETA %s total %s' % \ (pct, currec, nrecs, _sfmt (elapsed), _sfmt (eta), _sfmt (total)) unbufout.write(msg.ljust (60).encode('utf8') + b'\r') tlastprint = now nvis += npol * nchan for j in range (npol): currec += 1 pbp = mtutil.bpToPBP32 (mtutil.aap2bp (a1, a2, pi[j])) bpidx = pbps.searchsorted (pbp) uvww = bufferview (uvww_offset (bpidx, tidx), np.double, 4) uvww[:3] = cols['uvw'][:,i] * casautil.INVERSE_C_MNS uvww[3] = cols['sigma'][j,i]**-2 muvwcounts[bpidx] += 1 meanuvw[bpidx] += uvww[:3] corrdata = bufferview (corr_offset (bpidx, tidx, freqidx0), np.complex64, nchan) corrdata[:] = data[j,::step,i] # copy and convert flagdata = bufferview (flag_offset (bpidx, tidx, freqidx0), np.uint8, nchan) np.logical_not (flags[j,::step,i], flagdata) if flagdata.any (): seenany[bpidx] = 1 if not ms.iternext (): break dump () tfinish = time.time () elapsed = tfinish - tstart print (' 100%% (%d/%d) elapsed %s ETA 0s total %s ' % (currec, nrecs, _sfmt (elapsed), _sfmt (elapsed))) unbufout.close () # Finally, write out variables f.seek (vars_offset) savevariable (f, 'vispath', np.fromstring (b(vpath), dtype=np.byte)) savevariable (f, 'basepols', pbps) savevariable (f, 'antnames', antnames) flaggedbps = pbps[np.where (seenany == 0)] savevariable (f, 'flaggedbps', flaggedbps) s = ' '.join ('%s=%s' % t for t in iteritems(transpose_args)) savevariable (f, 'transargs', np.fromstring (b(s), dtype=np.byte)) wbad = np.where (muvwcounts == 0) muvwcounts[wbad] = 1 meanuvw[:,0] /= muvwcounts # see _mir_transpose () meanuvw[:,1] /= muvwcounts meanuvw[:,2] /= muvwcounts meanuvw[wbad] = 0 meanuvw *= (freq0 + 0.5 * sdf * nf) / freq0 savevariable (f, 'meanuvws', meanuvw) f.close () ms.close () return currec, nvis, data_size
def _mir_transpose (vpath, tpath, unused_transpose_args): from miriad import VisData from mirtask.util import mir2pbp32 from . import visobjs vis = VisData (vpath) # Pass 1: build up list of basepols, times first = True nrecs = 0 delays = None window = None fc = visobjs.FreqConfig () times = set () pbps = set () visgen = vis.readLowlevel ('3', False) print ('pass 1 ...') for inp, pream, data, flags in visgen: t = pream[3] pbp = mir2pbp32 (inp, pream) nrecs += 1 if first: ftrack = fc.makeTracker (inp) first = False if ftrack.updated (): fc.fill (inp) if fc.numSpectralWindows () != 1: die ('cannot transpose: need exactly one spectral window') idents = list (fc.fundamentalWinIdents ()) newwindow = fc.windowFromIdent (idents[0]) if window is not None and newwindow != window: die ('cannot transpose: frequency config changes inside dataset') window = newwindow if delays is None: nants = inp.getVarInt ('nants') dinfo = inp.probeVar ('delay0') if dinfo is None: delays = False elif dinfo[1] == 2 * nants: # An ATA extension: one fixed delay per antpol. Reshape # to be a bit more sensible delays = inp.getVarFloat ('delay0', 2 * nants) delays = delays.reshape ((2, nants)).T else: delays = inp.getVarFloat ('delay0', nants) delays = np.vstack ((delays, delays)).T times.add (t) pbps.add (pbp) # Get the timestamps onto a nice even grid, checking that our # gridding is decent. datatimes = np.asarray (sorted (times), dtype=np.double) nt = datatimes.size time0 = datatimes[0] cadence = np.median (datatimes[1:] - datatimes[:-1]) tidxs = (datatimes - time0) / cadence timemap = np.empty (nt, dtype=np.int) nslot = int (round (tidxs[-1])) + 1 scale = nslot * 1. / nt noff = 0 for i in range (nt): timemap[i] = int (round (tidxs[i])) if (tidxs[i] - timemap[i]) > 0.01: noff += 1 if noff > 0: warn ('had %d timestamps (out of %d) with poor ' 'mapping onto the grid', noff, nt) if scale > 1.05: warn ('data size increasing by factor of %.2f ' 'to get everything onto the time grid', scale) times = np.arange (nslot) * cadence + time0 nt = nslot # Compute a few other things pbps = np.asarray (sorted (pbps), dtype=np.int32) nbp = pbps.size # Without the int(), nchan is a numpy.int32, the type of which # propagates along to various byte counts and offsets which end up # overflowing for sufficiently large datasets and causing # exceptions on negative values getting passed to the various # system calls used below. nchan = int (window.nchan) sdf = window.width / nchan sfreq = window.cfreq - 0.5 * (window.width - sdf) corr_bytes = 8 * nchan uvww_bytes = 4 * 8 flag_bytes = nchan slice_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nt dump_bytes = (corr_bytes + flag_bytes + uvww_bytes) * nbp nsimult = CACHE_SIZE // dump_bytes # Write out header info # Write-then-seek seems to break if buffering is used??? data_offset = ((header.size + 7) // 8) * 8 data_size = slice_bytes * nbp vars_offset = ((data_offset + data_size + 7) // 8) * 8 f = open (tpath, 'w+', 0) f.truncate (vars_offset) # hint how big the file will be f.write (header.pack (BYTE_ORDER_MARKER, FORMAT_VERSION, nbp, nt, nchan, sfreq, sdf, time0, cadence, data_offset, vars_offset)) # Pass 2: write data. Below we cast the tidx variables to ints for # the same reason as with nchan above. def corr_offset (bpidx, tidx): return data_offset + bpidx * slice_bytes + corr_bytes * int (tidx) def flag_offset (bpidx, tidx): return (data_offset + bpidx * slice_bytes + corr_bytes * nt + flag_bytes * int (tidx)) def uvww_offset (bpidx, tidx): return (data_offset + bpidx * slice_bytes + (corr_bytes + flag_bytes) * nt + uvww_bytes * int (tidx)) lsts = np.empty (nt, dtype=np.double) corrs = np.empty ((nsimult, nbp, nchan), dtype=np.complex64) flags = np.empty ((nsimult, nbp, nchan), dtype=np.int8) uvwws = np.empty ((nsimult, nbp, 4), dtype=np.double) seen = np.empty ((nsimult, nbp), dtype=np.bool) lstbuf = np.empty (nsimult, dtype=np.double) empty_corr = np.zeros (nchan, dtype=np.complex64) empty_flags = np.zeros (nchan, dtype=np.int8) empty_uvww = np.zeros (4, dtype=np.double) # Progress reporting: unbufout = os.fdopen (os.dup (1), 'w', 0) currec = 0 tstart = time.time () tlastprint = 0 def dump (curtimes): nbatch = len (curtimes) tidxs = np.empty (nbatch, dtype=np.int) for time, sidx in iteritems(curtimes): tidxs[sidx] = timemap[datatimes.searchsorted (time)] lsts[tidxs[sidx]] = lstbuf[sidx] info = np.empty ((nbatch, 3), dtype=np.int) info[:,0] = tidxs.argsort () info[:,1] = tidxs[info[:,0]] info[0,2] = 1 for i in range (1, nbatch): info[i,2] = (info[i,1] != info[i-1,1] + 1) for bpidx in range (nbp): for sidx, tidx, seek in info: if seek: f.seek (corr_offset (bpidx, tidx)) if seen[sidx,bpidx]: f.write (corrs[sidx,bpidx]) else: f.write (empty_corr) for sidx, tidx, seek in info: if seek: f.seek (flag_offset (bpidx, tidx)) if seen[sidx,bpidx]: f.write (flags[sidx,bpidx]) else: f.write (empty_flags) for sidx, tidx, seek in info: if seek: f.seek (uvww_offset (bpidx, tidx)) if seen[sidx,bpidx]: f.write (uvwws[sidx,bpidx]) else: f.write (empty_uvww) newchunk = True curtimes = {} nrec = nvis = 0 seenany = np.zeros (nbp, dtype=np.bool) meanuvw = np.zeros ((nbp, 3), dtype=np.double) muvwcounts = np.zeros (nbp, dtype=np.int) visgen = vis.readLowlevel ('3', False) print ('pass 2 ...') for inp, pream, data, recflags in visgen: uvw = pream[:3] t = pream[3] pbp = mir2pbp32 (inp, pream) var = inp.getVariance () if var == 0: var = 1. weight = 1. / var if currec % 500 == 0 and currec: now = time.time () if now - tlastprint > 1: pct = 100. * currec / nrecs elapsed = now - tstart total = 1. * elapsed * nrecs / currec eta = total - elapsed msg = ' %3.1f%% (%d/%d) elapsed %s ETA %s total %s' % \ (pct, currec, nrecs, _sfmt (elapsed), _sfmt (eta), _sfmt (total)) unbufout.write(msg.ljust (60).encode('utf8') + b'\r') tlastprint = now currec += 1 if t not in curtimes and len (curtimes) == nsimult: msg = ' %3.1f%% (%d/%d) writing ...' % (pct, currec, nrecs) unbufout.write(msg.ljust (60).encode('utf8') + b'\r\n') dump (curtimes) newchunk = True if newchunk: curtimes = {} newchunk = False sidx = curtimes.get (t) if sidx is None: sidx = len (curtimes) curtimes[t] = sidx seen[sidx].fill (False) bpidx = pbps.searchsorted (pbp) seen[sidx,bpidx] = True uvwws[sidx,bpidx,:3] = uvw uvwws[sidx,bpidx,3] = weight corrs[sidx,bpidx] = data flags[sidx,bpidx] = recflags.astype (np.int8) lstbuf[sidx] = inp.getVarDouble ('lst') muvwcounts[bpidx] += 1 meanuvw[bpidx] += uvw if recflags.any (): seenany[bpidx] = 1 nrec += 1 nvis += data.size if len (curtimes): msg = ' 100%% (%d/%d) writing ...' % (currec, nrecs) unbufout.write(msg.ljust (60).encode('utf8') + b'\r') dump (curtimes) tfinish = time.time () elapsed = tfinish - tstart print (' 100%% (%d/%d) elapsed %s ETA 0s total %s ' % \ (currec, nrecs, _sfmt (elapsed), _sfmt (elapsed))) unbufout.close () # Finally, write out variables f.seek (vars_offset) savevariable (f, 'vispath', np.fromstring (str (vis), dtype=np.byte)) savevariable (f, 'basepols', pbps) if delays is not False: savevariable (f, 'delays', delays) flaggedbps = pbps[np.where (seenany == 0)] savevariable (f, 'flaggedbps', flaggedbps) savevariable (f, 'lsts', lsts) wbad = np.where (muvwcounts == 0) muvwcounts[wbad] = 1 meanuvw[:,0] /= muvwcounts # apparently broadcasting doesn't meanuvw[:,1] /= muvwcounts # do what you'd want here. Not sure meanuvw[:,2] /= muvwcounts # why, but it's only two extra lines. meanuvw[wbad] = 0 # Take the mean across the spectral window, as well as in time: meanuvw *= window.cfreq / sfreq savevariable (f, 'meanuvws', meanuvw) f.close () return nrec, nvis, data_size
def main (): info = Holder () info.jobname = get_one_line ('jobname.txt') info.jobid = get_one_line ('jobid') if info.jobid is None: info.jobid_fetch_failed = 1 info.jobid = '?' else: line = get_sacct_first (info.jobid, 'ExitCode,MaxRSS,Elapsed,State') if line is None: info.sacct_fetch_failed = 1 # Could theoretically fill in some of these from our various log # files but I can't imagine a situation where sacct will actually # fail on us. info.exitinfo = '?' info.mastermaxrss = '?' info.elapsed = '?' info.state = '?' info.success = -1 else: info.exitinfo, info.mastermaxrss, info.elapsed, info.state = line.split ('|') info.success = 1 if info.exitinfo == '0:0' else 0 info.workermaxrss = get_max_worker_maxrss () tsubmit = get_one_line ('submit.wallclock') tstart = get_one_line ('start.wallclock') if tsubmit is not None and tstart is not None: info.startdelay = int (tstart) - int (tsubmit) try: info.ntasks = -1 info.tot_nsuccess = -1 info.tot_nfail = -1 info.nleft = -1 info.nattempts = -1 info.cur_nsuccess = -1 info.cur_nfail = -1 natt = 0 nsucc = 0 nfail = 0 info.ntasks = count_lines ('../tasks') info.tot_nsuccess = count_lines ('../success') info.tot_nfail = count_lines ('../failure') info.nleft = info.ntasks - info.tot_nsuccess - info.tot_nfail with io.open ('attempts.log', 'rt') as f: for line in f: pieces = line.strip ().split () if pieces[1] == 'issued': natt += 1 elif pieces[1] == 'complete': if pieces[-1] == '0': nsucc += 1 else: nfail += 1 info.nattempts = natt info.cur_nsuccess = nsucc info.cur_nfail = nfail except Exception as e: warn ('couldn\'t summarize attempts: %s', e) with io.open ('postmortem.log', 'wt') as f: d = info.__dict__ for k in sorted (d.iterkeys ()): val = d[k] if val is not None: print ('%s=%s' % (k, val), file=f) try: with io.open (os.path.expanduser ('~/.robotinfo'), 'rt') as f: user = f.readline ().strip () consumer_key = f.readline ().strip () consumer_secret = f.readline ().strip () access_token = f.readline ().strip () access_secret = f.readline ().strip () auth = tweepy.OAuthHandler (consumer_key, consumer_secret) auth.set_access_token (access_token, access_secret) api = tweepy.API (auth) t = ('@' + user + ' %(jobname)s %(state)s succ=%(success)d nt=%(ntasks)d ' 'ns=%(cur_nsuccess)d nf=%(cur_nfail)d nleft=%(nleft)d' % info.__dict__) api.update_status (status=t) except Exception as e: warn ('couldn\'t tweet: %s', e)