def test_multi_epoch_sub(sci_image_data_20200601, sci_image_data_20200604, refimg_data_first2_imgs): outdir = os.path.dirname(sci_image_data_20200601.local_path) outname = os.path.join(outdir, f'{uuid.uuid4().hex}.fits') coadd = zuds.ScienceCoadd.from_images([sci_image_data_20200604, sci_image_data_20200601], outname) coadd.binleft = '2020-06-01' coadd.binright = '2020-06-04' zuds.DBSession().add(coadd) for a in coadd.input_images: se_sub = zuds.SingleEpochSubtraction.from_images( a, refimg_data_first2_imgs ) zuds.DBSession().add(se_sub) final = zuds.MultiEpochSubtraction.from_images( coadd, refimg_data_first2_imgs, force_map_subs=False, ) zuds.DBSession().add(final) zuds.DBSession().commit()
def test_stack_input_images(sci_image_data_20200531, sci_image_data_20200601): images = [sci_image_data_20200531, sci_image_data_20200601] outdir = os.path.dirname(images[0].local_path) outname = os.path.join(outdir, f'{uuid.uuid4().hex}.fits') stack = zuds.ReferenceImage.from_images(images, outname) zuds.DBSession().add(stack) zuds.DBSession().commit() assert len(stack.input_images) == 2 assert sci_image_data_20200601 in stack.input_images assert sci_image_data_20200531 in stack.input_images
def test_science_image_modified(science_image): db = zuds.DBSession() science_image.seeing = 2.3 db.add(science_image) db.commit() modified = science_image.modified science_image.basename = 'abcd' db.add(science_image) db.commit() new_modified = science_image.modified assert new_modified > modified
def unphotometered_sources(image_id, footprint): poly = array(tuple(footprint.ravel())) jcond2 = sa.and_(zuds.ForcedPhotometry.image_id == image_id, zuds.ForcedPhotometry.source_id == zuds.Source.id) query = zuds.DBSession().query( zuds.Source.id, zuds.Source.ra, zuds.Source.dec).outerjoin( zuds.ForcedPhotometry, jcond2).filter(zuds.ForcedPhotometry.id == None).filter( sa.func.q3c_poly_query(zuds.Source.ra, zuds.Source.dec, poly)) return query.all()
fn = f'/global/cfs/cdirs/m937/www/data/scratch/{s.field:06d}/' \ f'c{s.ccdid:02d}/q{s.qid}/{zuds.fid_map[s.fid]}/{s.basename}' shutil.copy(inpt, fn) shutil.copy(inpt.replace('sciimg', 'mskimg'), fn.replace('sciimg', 'mskimg')) # commits try: detections, sub = dosub.do_one(fn, sciclass, subclass, refvers, tmpdir='tmp') except (dosub.TooManyDetectionsError, OSError, ValueError) as e: zuds.DBSession().rollback() print(f'Error: too many detections on {fn} sub') sci = zuds.ScienceImage.get_by_basename(os.path.basename(fn)) ref = zuds.DBSession().query(zuds.ReferenceImage).filter( zuds.ReferenceImage.field == sci.field, zuds.ReferenceImage.ccdid == sci.ccdid, zuds.ReferenceImage.qid == sci.qid, zuds.ReferenceImage.fid == sci.fid, zuds.ReferenceImage.version == refvers).first() blocker = zuds.FailedSubtraction(target_image=sci, reference_image=ref, reason=str(e)) zuds.DBSession().add(blocker) zuds.DBSession().commit() continue except dosub.PredecessorError as e:
zuds.init_db() # db.DBSession().autoflush = False # db.DBSession().get_bind().echo = True __author__ = 'Danny Goldstein <*****@*****.**>' __whatami__ = 'Make the subtractions for ZUDS.' infile = sys.argv[1] # file listing all the subs to do photometry on my_work = zuds.get_my_share_of_work(infile) alerts = [] for detid in my_work: start = time.time() d = zuds.DBSession().query(zuds.Detection).get(int(detid)) if d.alert is not None: alert = d.alert else: alert = zuds.Alert.from_detection(d) zuds.DBSession().add(alert) zuds.DBSession().commit() stop = time.time() print(f'made alert for {detid} ({d.source.id}) in {stop-start:.2f} sec', flush=True) if not alert.sent: zuds.send_alert(alert) alert.sent = True print( f'sent alert for {alert.detection.id} ({alert.detection.source.id})', flush=True)
import sys import time import zuds zuds.init_db() zuds.init_db() zuds.DBSession().get_bind().echo = True __author__ = 'Danny Goldstein <*****@*****.**>' __whatami__ = 'Make the references for ZUDS.' infile = sys.argv[1] # file listing all the images to make subtractions of # get the work sources = zuds.get_my_share_of_work(infile) for source_id in sources: tstart = time.time() sstart = time.time() source = zuds.DBSession().query(zuds.Source).get(source_id) bestdet = source.best_detection best_sub = bestdet.image best_new = best_sub.target_image best_ref = best_sub.reference_image field = f'{best_sub.field:06d}' ccdid = f'c{best_sub.ccdid:02d}' qid = f'q{best_sub.qid}' fid = f'{zuds.fid_map[best_sub.fid]}'
def do_one(fn, sciclass, subclass, refvers, tmpdir='/tmp'): tstart = time.time() sstart = time.time() sci = sciclass.get_by_basename(os.path.basename(fn)) sci.map_to_local_file(fn) maskname = os.path.join(os.path.dirname(fn), sci.mask_image.basename) sci.mask_image.map_to_local_file(maskname) weightname = fn.replace('.fits', '.weight.fits') rmsname = fn.replace('.fits', '.rms.fits') if os.path.exists(weightname): sci._weightimg = zuds.FITSImage.from_file(weightname) elif os.path.exists(rmsname): sci._rmsimg = zuds.FITSImage.from_file(rmsname) else: if sciclass == zuds.ScienceImage: # use sextractor to make the science image _ = sci.rms_image else: raise RuntimeError(f'Cannot produce a subtraction for {fn},' f' the image has no weightmap or rms map.') sstop = time.time() print(f'sci: {sstop-sstart:.2f} sec to load {sci.basename}', flush=True) field = f'{sci.field:06d}' ccdid = f'c{sci.ccdid:02d}' qid = f'q{sci.qid}' fid = f'{zuds.fid_map[sci.fid]}' refname = f'/global/cfs/cdirs/m937/www/data/scratch/{field}/{ccdid}/{qid}/' \ f'{fid}/ref.{field}_{ccdid}_{qid}_{fid}.{refvers}.fits' if not (zuds.ReferenceImage.get_by_basename(os.path.basename(refname)) and os.path.exists(refname)): zuds.DBSession().rollback() raise RuntimeError(f'Ref {refname} does not exist. Skipping...') rstart = time.time() ref = zuds.ReferenceImage.get_by_basename(os.path.basename(refname)) ref.map_to_local_file(refname) ref.mask_image.map_to_local_file(refname.replace('.fits', '.mask.fits')) ref._weightimg = zuds.FITSImage.from_file( refname.replace('.fits', '.weight.fits')) rstop = time.time() print(f'ref: {rstop-rstart:.2f} sec to load ref for {sci.basename}', flush=True) basename = zuds.sub_name(sci.basename, ref.basename) prev = subclass.get_by_basename(basename) #prev=None #if (prev is not None) and (prev.modified is not None) and \ # (prev.modified > datetime.now() - timedelta(hours=24)): # db.DBSession().rollback() # continue if prev is not None: raise PredecessorError(f'{basename} already has a predecessor') substart = time.time() sub = subclass.from_images(sci, ref, data_product=False, tmpdir=tmpdir, refined=True) substop = time.time() print(f'sub: {substop-substart:.2f} sec to make {sub.basename}', flush=True) catstart = time.time() cat = zuds.PipelineFITSCatalog.from_image(sub) catstop = time.time() print( f'cat: {catstop-catstart:.2f} sec to make catalog for {sub.basename}', flush=True) dstart = time.time() detections = zuds.Detection.from_catalog(cat, filter=True) if len(detections) > MAX_DETS: raise TooManyDetectionsError( f'Error: {len(detections)} detections (>{MAX_DETS}) ' f'on "{sub.basename}", something wrong with the image probably') dstop = time.time() print(f'det: {dstop-dstart:.2f} sec to make detections for {sub.basename}', flush=True) stampstart = time.time() if isinstance(sub, zuds.SingleEpochSubtraction): sub_target = sub.aligned_to(sub.reference_image) else: sub_target = sub if isinstance(sub.target_image, zuds.ScienceImage): new_target = sub.target_image.aligned_to(sub.reference_image) else: new_target = sub.target_image stamps = [] for detection in detections: for i in [sub_target, new_target, sub.reference_image]: # make a stamp for the first detection stamp = zuds.Thumbnail.from_detection(detection, i) stamps.append(stamp) archstart = time.time() #subcopy = db.HTTPArchiveCopy.from_product(sub) #catcopy = db.HTTPArchiveCopy.from_product(cat) #mskcopy = db.HTTPArchiveCopy.from_product(sub.mask_image) zuds.DBSession().add(sub) #db.DBSession().add(cat) zuds.DBSession().add_all(detections) zuds.DBSession().add_all(stamps) #db.DBSession().add(mskcopy) #db.DBSession().add(catcopy) #db.DBSession().add(subcopy) #archive.archive(subcopy) #archive.archive(catcopy) #archive.archive(mskcopy) #db.DBSession().commit() archstop = time.time() print( f'archive: {archstop-archstart:.2f} sec to archive stuff for ' f'{sub.basename}', flush=True) cleanstart = time.time() sci.unmap() cleanstop = time.time() tstop = time.time() print( f'clean: took {cleanstop - cleanstart} sec to clean ' f'up after {sub.basename}"', flush=True) print(f'took {tstop - tstart} sec to make "{sub.basename}"', flush=True) return detections, sub
f'clean: took {cleanstop - cleanstart} sec to clean ' f'up after {sub.basename}"', flush=True) print(f'took {tstop - tstart} sec to make "{sub.basename}"', flush=True) return detections, sub if __name__ == '__main__': infile = sys.argv[1] # file listing all the images to make subtractions of refvers = sys.argv[2] subclass = zuds.MultiEpochSubtraction sciclass = zuds.ScienceCoadd #subclass = db.SingleEpochSubtraction #sciclass = db.ScienceImage # get the work imgs = zuds.get_my_share_of_work(infile) for fn in imgs: try: detections, sub = do_one(fn, sciclass, subclass, refvers) except Exception as e: traceback.print_exception(*sys.exc_info()) zuds.DBSession().rollback() continue else: zuds.DBSession().commit()
# set the stack window size STACK_WINDOW = 7. # days STACK_INTERVAL = timedelta(days=STACK_WINDOW) # create the date table gs = sa.func.generate_series timetype = sa.DateTime(timezone=False) mindate = sa.cast('2017-01-03', timetype) maxdate = sa.cast(sa.func.now(), timetype) lcol = gs(mindate, maxdate - STACK_INTERVAL, STACK_INTERVAL).label('left') rcol = gs(mindate + STACK_INTERVAL, maxdate, STACK_INTERVAL).label('right') daterange = zuds.DBSession().query(lcol, rcol).subquery() target = sa.func.array_agg(zuds.ScienceImage.id).label('target') stacksize = sa.func.array_length(target, 1).label('stacksize') stackcond = stacksize >= 2 jcond = sa.and_(zuds.ScienceImage.obsdate > daterange.c.left, zuds.ScienceImage.obsdate <= daterange.c.right) res = zuds.DBSession().query( zuds.ScienceImage.field, zuds.ScienceImage.ccdid, zuds.ScienceImage.qid, zuds.ScienceImage.fid, daterange.c.left, daterange.c.right, target).select_from( sa.join( zuds.SingleEpochSubtraction, zuds.ScienceImage.__table__, zuds.SingleEpochSubtraction.target_image_id == zuds.ScienceImage.id ).join(
import time import zuds __author__ = 'Danny Goldstein <*****@*****.**>' __whatami__ = 'Make alerts for ZUDS.' # get unalerted detections unalerted = zuds.DBSession().query( zuds.Detection ).filter( zuds.Detection.source_id != None ).outerjoin( zuds.Alert ).filter( zuds.Alert.id == None ).all() print(f'Need to make alerts for {len(unalerted)} detections') alerts = [] for detection in unalerted: tstart = time.time() alert = zuds.Alert.from_detection(detection) alerts.append(alert) tstop = time.time() print(f'took {tstop - tstart:.2f} sec to make alert ' f'for {detection.source_id}')
#db.init_db() #db.DBSession().get_bind().echo = True __author__ = 'Danny Goldstein <*****@*****.**>' __whatami__ = 'Make the references for ZUDS.' infile = sys.argv[1] # file listing all the images to make subtractions of # get the work jobs = zuds.get_my_share_of_work(infile, reader=pd.read_csv) for _, job in jobs.iterrows(): tstart = time.time() sstart = time.time() images = zuds.DBSession().query(zuds.ZTFFile).filter( zuds.ZTFFile.id.in_(eval(job['target']))).all() zuds.ensure_images_have_the_same_properties(images, zuds.GROUP_PROPERTIES) field = f'{images[0].field:06d}' ccdid = f'c{images[0].ccdid:02d}' qid = f'q{images[0].qid}' fid = f'{zuds.fid_map[images[0].fid]}' for image in images: path = f'/global/cfs/cdirs/m937/www/data/scratch/{field}/{ccdid}/{qid}/' \ f'{fid}/{image.basename}' image.map_to_local_file(path) image.mask_image.map_to_local_file(path.replace('sciimg', 'mskimg')) basename = f'{field}_{ccdid}_{qid}_{fid}_{job["left"]}_' \ f'{job["right"]}.coadd.fits'
rank = comm.Get_rank() size = comm.Get_size() # avoid pandas to csv bottleneck using parallelism df = pd.DataFrame(output) df.to_csv(f'output_{rank:04d}.csv', index=False, header=rank == 0) comm.Barrier() if rank == 0: with open(outfile, 'w') as f: for fn in [f'output_{r:04d}.csv' for r in range(size)]: if os.path.exists(fn): with open(fn, 'r') as g: f.write(g.read()) os.remove(fn) jobid = os.getenv('SLURM_JOB_ID') if jobid is not None: job = zuds.DBSession().query(zuds.ForcePhotJob).filter( zuds.ForcePhotJob.slurm_id == jobid).first() job.status = 'ready_for_loading' zuds.DBSession().add(job) zuds.DBSession().commit() else: df = pd.DataFrame(output) df.to_csv(outfile, index=False) stop = time.time() zuds.print_time(start, stop, 0, 'start to finish')
sci.mask_image.clear() sci.mask_image.map_to_local_file(maskname) c1 = min_date <= sci.obsdate <= max_date c2 = 1.7 < sci.seeing < 2.5 c3 = 19.2 < sci.maglimit < 22. c4 = sci.infobits == 0 if c1 and c2 and c3 and c4: ok.append(sci) # get the very best images top = sorted(ok, key=lambda i: i.maglimit, reverse=True)[:50] if len(top) == 0: print(f'Not enough images ({len(top)} < 14) to make reference ' f'for {d}. Skipping...') zuds.DBSession().rollback() continue coaddname = os.path.join( d, f'ref.{ok[0].field:06d}_c{ok[0].ccdid:02d}' f'_q{ok[0].qid}_{zuds.fid_map[ok[0].fid]}.{version}.fits') if len(top) < 14: print(f'Not enough images ({len(top)} < 14) to make reference ' f'{coaddname}. Skipping...') zuds.DBSession().rollback() continue try: coadd = zuds.ReferenceImage.from_images(top, coaddname,
def retrieve_images(images_or_ids, job_script_destination='.', frame_destination='.', log_destination='.', preserve_dirs=False, n_jobs=14, tape=True, http=True, ipac=True, archive_new=True): """Image whereclause should be a clause element on ZTFFile.""" images_or_ids = np.atleast_1d(images_or_ids) ids = [int(i) if not hasattr(i, 'id') else i.id for i in images_or_ids] got = [] if tape: jt = sa.join(zuds.ZTFFile, zuds.TapeCopy, zuds.ZTFFile.id == zuds.TapeCopy.product_id) full_query = zuds.DBSession().query( zuds.ZTFFile, zuds.TapeCopy).select_from(jt).outerjoin( zuds.HTTPArchiveCopy, zuds.ZTFFile.id == zuds.HTTPArchiveCopy.product_id).filter( zuds.HTTPArchiveCopy.product_id == None) full_query = full_query.filter( zuds.ZTFFile.id.in_(ids), zuds.ZTFFile.fid != 3) # dont use i-band from tape # this is the query to get the image paths metatable = pd.read_sql(full_query.statement, zuds.DBSession().get_bind()) df = metatable[['basename', 'archive_id']] df = df.rename({'archive_id': 'tarpath'}, axis='columns') tars = df['tarpath'].unique() got.extend(metatable['product_id'].tolist()) # sort tarball retrieval by location on tape t = datetime.datetime.utcnow().isoformat().replace(' ', '_') hpss_in = Path(job_script_destination) / f'hpss_{t}.in' hpss_out = Path(job_script_destination) / f'hpss_{t}.out' with open(hpss_in, 'w') as f: f.write("\n".join([f'ls -P {tar}' for tar in tars])) f.write("\n") # always end with a \n syscall = f'/usr/common/mss/bin/hsi -O {hpss_out} in {hpss_in}' # for some reason hsi writes in >> mode, so need to delete the output # file if it exists to prevent it from mixing with results of a previous # run if hpss_out.exists(): os.remove(hpss_out) p = subprocess.Popen(syscall.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) while True: if p.poll() is not None: break else: time.sleep(0.01) retcode = p.returncode stderr, stdout = p.stderr, p.stdout # 64 means some of the files didnt exist, that's ok if retcode not in [0, 64]: raise subprocess.CalledProcessError(stderr.read()) # filter out the lines that dont start with FILE with open(hpss_out, 'r') as f: lines = [line for line in f.readlines() if line.startswith('FILE')] stream = io.StringIO(''.join(lines)) # read it into pandas ordered = pd.read_csv(stream, delim_whitespace=True, names=[ 'ignore-2', 'hpsspath', 'ignore-1', 'ignore0', 'position', 'tape', 'ignore1', 'ignore2', 'ignore3', 'ignore4', 'ignore5', 'ignore6', 'ignore7' ]) ordered['tape'] = [t[:-2] for t in ordered['tape']] ordered['position'] = [t.split('+')[0] for t in ordered['position']] ordered = ordered.sort_values(['tape', 'position']) for column in ordered.copy().columns: if column.startswith('ignore'): del ordered[column] # submit the jobs based on which tape the tar files reside on # and in what order they are on the tape dependency_dict = {} for tape, group in ordered.groupby( np.arange(len(ordered)) // (len(ordered) // n_jobs)): # get the tarfiles tarnames = group['hpsspath'].tolist() images = [ df[df['tarpath'] == tarname]['basename'].tolist() for tarname in tarnames ] jobid = submit_hpss_job(tarnames, images, job_script_destination, frame_destination, log_destination, tape, preserve_dirs) for image in df[[name in tarnames for name in df['tarpath']]]['basename']: dependency_dict[image] = jobid if http: # now do the ones that are on disk jt = sa.join(zuds.ZTFFile, zuds.HTTPArchiveCopy, zuds.ZTFFile.id == zuds.HTTPArchiveCopy.product_id) full_query = zuds.DBSession().query( zuds.ZTFFile, zuds.HTTPArchiveCopy).select_from(jt) full_query = full_query.filter(zuds.ZTFFile.id.in_(ids)) # this is the query to get the image paths metatable2 = pd.read_sql(full_query.statement, zuds.DBSession().get_bind()) got.extend(metatable2['product_id'].tolist()) # copy each image over for _, row in metatable2.iterrows(): path = row['archive_path'] if preserve_dirs: target = Path(frame_destination) / os.path.join( *path.split('/')[-5:]) else: target = Path(frame_destination) / os.path.basename(path) if Path(target).absolute() == Path(path).absolute(): # don't overwrite an already existing file continue target.parent.mkdir(exist_ok=True, parents=True) shutil.copy(path, target) # download the remaining images individually from IPAC if ipac: remaining = [int(i) for i in np.setdiff1d(ids, got)] remaining = zuds.DBSession().query(zuds.ZTFFile).filter( zuds.ZTFFile.id.in_(remaining)).all() cookie = zuds.ipac_authenticate() for i in remaining: if preserve_dirs: destination = Path(frame_destination) / i.relname else: destination = Path(frame_destination) / i.basename suffix = 'mskimg.fits' if isinstance( i, zuds.MaskImage) else 'sciimg.fits' try: if isinstance(i, zuds.MaskImage): i.parent_image.download(suffix=suffix, destination=destination, cookie=cookie) else: i.download(suffix=suffix, destination=destination, cookie=cookie) except requests.RequestException: continue if archive_new: i.map_to_local_file(destination) # ensure the image header is written to the DB i.load_header() acopy = zuds.HTTPArchiveCopy.from_product(i, check=False) acopy.put() # and archive the file to disk zuds.DBSession().add(acopy) zuds.DBSession().commit()
# db.DBSession().autoflush = False # db.DBSession().get_bind().echo = True __author__ = 'Danny Goldstein <*****@*****.**>' __whatami__ = 'Make the subtractions for ZUDS.' infile = sys.argv[1] # file listing all the subs to do photometry on BATCH_SIZE = 50 my_work = zuds.get_my_share_of_work(infile) def batch(iterable, n=1): l = len(iterable) for ndx in range(0, l, n): yield iterable[ndx:min(ndx + n, l)] for thumbids in batch(my_work, n=BATCH_SIZE): start = time.time() thumbs = zuds.DBSession().query(zuds.Thumbnail).filter(zuds.Thumbnail.id.in_(thumbids.tolist())) for t in thumbs: t.persist() stop = time.time() zuds.print_time(start, stop, t, 'get and persist') start = time.time() zuds.DBSession().commit() stop = time.time() zuds.print_time(start, stop, t, 'commit')