def create_image(img_name, data, hdf_opts={}): """ Create HDF and FITS versions of a given image """ output_dir_fits = 'fits_generated' output_dir_hdf = 'hdf_generated' idi_img = idi.IdiHdulist() idi_img.add_image_hdu(img_name, data=data) # Create all the filenames fits_filename = join(output_dir_fits, img_name+'.fits') hdf_filename = join(output_dir_hdf, img_name+'.h5') hdf_comp_filename = join(output_dir_hdf, img_name+'_comp.h5') gzip_comp_filename = join(output_dir_fits, img_name+'.fits.gz') fits_comp_filename = join(output_dir_fits, img_name+'.fits.fz') # Delete files that already exists file_list = [fits_filename, hdf_filename, fits_comp_filename, hdf_comp_filename, gzip_comp_filename] for fname in file_list: if exists(fname): os.remove(fname) print("\nWriting %s to disk" % img_name) t1 = time.time() fitsio.export_fits(idi_img, fits_filename) t2 = time.time() hdfio.export_hdf(idi_img, hdf_filename) t3 = time.time() hdfio.export_hdf(idi_img, hdf_comp_filename, **hdf_opts) t4 = time.time()
def _download_cutouts(args, url=None, filters=None, tmp_dir=None, auth=None): list_table, ids, batch_index = args session = requests.Session() session.auth = auth output_file = os.path.join(tmp_dir, 'batch_cutout_%d.hdf' % batch_index) # Check if output cutout file exists, if it does, we are good # TODO: check that file is ok, but it most probably is if os.path.isfile(output_file): print('Found cutout file for batch file %d, skipping download' % batch_index) return output_file # Download batches for all bands output_paths = {} for filt in filters: print('Download filter %s for batch %d' % (filt, batch_index)) list_table['filter'] = filt # Saving download file to folder filename = os.path.join(tmp_dir, ('batch_%s_%d') % (filt, batch_index)) list_table.write(filename, format='ascii.tab') # Request download resp = session.post(url, files={'list': open(filename, 'rb')}, stream=True) # Checking that access worked after number of retries assert (resp.status_code == 200) tar_filename = resp.headers['Content-Disposition'].split('"')[-2] # Proceed to download the data with open(os.path.join(tmp_dir, tar_filename), 'wb') as f: for chunk in resp.iter_content(chunk_size=1024): f.write(chunk) # Untar the archive and remove file with tarfile.TarFile(os.path.join(tmp_dir, tar_filename), "r") as tarball: tarball.extractall(tmp_dir) # Removing tar file after extraction os.remove(filename) os.remove(os.path.join(tmp_dir, tar_filename)) # Recover path to output dir output_path = os.path.join(tmp_dir, tar_filename.split('.tar')[0]) output_paths[filt] = output_path # Transform each file into an HDFFITS format, named based on the object ids fnames = glob.glob(output_path + '/*.fits') for fname in fnames: indx = int(fname.split(output_path + '/')[1].split('-')[0]) - 2 output_filename = os.path.join(output_path, '%d.hdf' % ids[indx]) a = read_fits(fname) export_hdf(a, output_filename) # Removing converted file os.remove(fname) # At this stage all filters have been downloaded for this batch, now # aggregating all of them into a single HDF file with h5py.File(output_file, mode='w') as d: for id in ids: for f in filters: with h5py.File(os.path.join(output_paths[f], '%d.hdf' % id), mode='r+') as s: d.copy(s, '%d/%s' % (id, f)) # For good measure, remove all temporary directory for f in filters: shutil.rmtree(output_paths[f]) return output_file
def load_fits(file_name): """ Load FITS file, create various things """ output_dir_fits = 'fits_generated' output_dir_hdf = 'hdf_generated' idi_img = fitsio.read_fits(file_name) for hdu_name in idi_img: hdu = idi_img[hdu_name] if isinstance(hdu, idi.IdiTableHdu): for col in hdu.colnames: if hdu[col].dtype.type is np.float32: #print "CONVERTING %s TO INT" % col hdu[col] = hdu[col].astype('int32') if col == 'FLUX': print "FRUX" hdu[col] = hdu[col].data / 16 hdu[col].dtype = 'int32' #print hdu[col].dtype img_name = os.path.split(file_name)[1] img_name = os.path.splitext(img_name)[0] name = img_name #idi_img.add_image_hdu(img_name, data=data) # Create all the filenames fits_filename = join(output_dir_fits, img_name+'.fits') hdf_filename = join(output_dir_hdf, img_name+'.h5') hdf_comp_filename = join(output_dir_hdf, img_name+'_comp.h5') gzip_comp_filename = join(output_dir_fits, img_name+'.fits.gz') fits_comp_filename = join(output_dir_fits, img_name+'.fits.fz') # Delete files that already exists file_list = [fits_filename, hdf_filename, fits_comp_filename, hdf_comp_filename, gzip_comp_filename] for fname in file_list: if exists(fname): os.remove(fname) print "\nWriting %s to disk" % img_name t1 = time.time() fitsio.export_fits(idi_img, fits_filename) t2 = time.time() hdfio.export_hdf(idi_img, hdf_filename) t3 = time.time() hdfio.export_hdf(idi_img, hdf_comp_filename, **hdf_opts) t4 = time.time() os.system("./fpack -table %s" % fits_filename) t5 = time.time() os.system("gzip -c %s > %s.gz" % (fits_filename, fits_filename)) t6 = time.time() dd = { 'img_name': name, 'fits_size': getsize(fits_filename), 'hdf_size': getsize(hdf_filename), 'hdf_comp_size': getsize(hdf_comp_filename), 'fits_comp_size': getsize(fits_filename + '.fz'), 'gzip_comp_size': getsize(fits_filename + '.gz'), 'fits_time': (t2 - t1), 'hdf_time': (t3 - t2), 'hdf_comp_time': (t4 - t3), 'fits_comp_time': (t5 - t4), 'gzip_comp_time': (t6 - t5), 'comp_fact_hdf': float(getsize(fits_filename)) / getsize(hdf_comp_filename), 'comp_fact_fits': float(getsize(fits_filename)) / getsize(fits_comp_filename), 'comp_fact_gzip': float(getsize(fits_filename)) / getsize(gzip_comp_filename) } rh = dd['comp_fact_gzip'] th = dd['gzip_comp_time'] dd["weissman_hdf"] = weissman_score(dd["comp_fact_hdf"], dd["hdf_comp_time"], rh, th) dd["weissman_fits"] = weissman_score(dd["comp_fact_fits"], dd["fits_comp_time"], rh, th) print "FITS file size: %sB" % dd['fits_size'] print "HDF file size: %sB" % dd['hdf_size'] print "FITS comp size: %sB" % dd['fits_comp_size'] print "HDF comp size: %sB" % dd['hdf_comp_size'] print "GZIP comp size: %sB" % dd['gzip_comp_size'] print "FITS creation time: %2.2fs" % dd['fits_time'] print "HDF creation time: %2.2fs" % dd['hdf_time'] print "FITS comp time: %2.2fs" % dd['fits_comp_time'] print "HDF comp time: %2.2fs" % dd['hdf_comp_time'] print "GZIP comp time: %2.2fs" % dd['gzip_comp_time'] print "FITS/FITS compression: %2.2fx" % dd['comp_fact_fits'] print "HDF/FITS compression: %2.2fx" % dd['comp_fact_hdf'] print "GZIP/FITS compression: %2.2fx" % dd['comp_fact_gzip'] print "FITS weissman score: %2.2f" % dd['weissman_fits'] print "HDF weissman score: %2.2f" % dd['weissman_hdf'] return dd
def create_image(name, data, hdf_opts={}): """ Create HDF and FITS versions of a given image """ output_dir_fits = 'fits_generated' output_dir_hdf = 'hdf_generated' idi_img = idi.IdiHdulist() idi_img.add_image_hdu(img_name, data=data) # Create all the filenames fits_filename = join(output_dir_fits, img_name+'.fits') hdf_filename = join(output_dir_hdf, img_name+'.h5') hdf_comp_filename = join(output_dir_hdf, img_name+'_comp.h5') gzip_comp_filename = join(output_dir_fits, img_name+'.fits.gz') fits_comp_filename = join(output_dir_fits, img_name+'.fits.fz') # Delete files that already exists file_list = [fits_filename, hdf_filename, fits_comp_filename, hdf_comp_filename, gzip_comp_filename] for fname in file_list: if exists(fname): os.remove(fname) print "\nWriting %s to disk" % img_name t1 = time.time() fitsio.export_fits(idi_img, fits_filename) t2 = time.time() hdfio.export_hdf(idi_img, hdf_filename) t3 = time.time() hdfio.export_hdf(idi_img, hdf_comp_filename, **hdf_opts) t4 = time.time() os.system("./fpack -table %s" % fits_filename) t5 = time.time() os.system("gzip -c %s > %s.gz" % (fits_filename, fits_filename)) t6 = time.time() dd = { 'img_name': name, 'fits_size': getsize(fits_filename), 'hdf_size': getsize(hdf_filename), 'hdf_comp_size': getsize(hdf_comp_filename), 'fits_comp_size': getsize(fits_filename + '.fz'), 'gzip_comp_size': getsize(fits_filename + '.gz'), 'fits_time': (t2 - t1), 'hdf_time': (t3 - t2), 'hdf_comp_time': (t4 - t3), 'fits_comp_time': (t5 - t4), 'gzip_comp_time': (t6 - t5), 'comp_fact_hdf': float(getsize(fits_filename)) / getsize(hdf_comp_filename), 'comp_fact_fits': float(getsize(fits_filename)) / getsize(fits_comp_filename), 'comp_fact_gzip': float(getsize(fits_filename)) / getsize(gzip_comp_filename) } rh = dd['comp_fact_gzip'] th = dd['gzip_comp_time'] dd["weissman_hdf"] = weissman_score(dd["comp_fact_hdf"], dd["hdf_comp_time"], rh, th) dd["weissman_fits"] = weissman_score(dd["comp_fact_fits"], dd["fits_comp_time"], rh, th) print "FITS file size: %sB" % dd['fits_size'] print "HDF file size: %sB" % dd['hdf_size'] print "FITS comp size: %sB" % dd['fits_comp_size'] print "HDF comp size: %sB" % dd['hdf_comp_size'] print "GZIP comp size: %sB" % dd['gzip_comp_size'] print "FITS creation time: %2.2fs" % dd['fits_time'] print "HDF creation time: %2.2fs" % dd['hdf_time'] print "FITS comp time: %2.2fs" % dd['fits_comp_time'] print "HDF comp time: %2.2fs" % dd['hdf_comp_time'] print "GZIP comp time: %2.2fs" % dd['gzip_comp_time'] print "FITS/FITS compression: %2.2fx" % dd['comp_fact_fits'] print "HDF/FITS compression: %2.2fx" % dd['comp_fact_hdf'] print "GZIP/FITS compression: %2.2fx" % dd['comp_fact_gzip'] print "FITS weissman score: %2.2f" % dd['weissman_fits'] print "HDF weissman score: %2.2f" % dd['weissman_hdf'] return dd
from fits2hdf.io import fitsio, hdfio import pylab as plt import os import glob fits_file = 'test/fits/LWA1-2014-02-23T11H06M51.fitsidi' print "Reading %s" % fits_file fits_a = fitsio.read_fits(fits_file) print fits_a if os.path.exists("test.hdf"): os.remove("test.hdf") print "Writing %s copy" % fits_file hdfio.export_hdf(fits_a, "test.hdf") try: fits_b = hdfio.read_hdf("test.hdf") print fits_b except: print "ERROR: can't read %s" % fits_file raise ##print fits_a #print fits_b
def load_fits(file_name): """ Load FITS file, create various things """ output_dir_fits = 'fits_generated' output_dir_hdf = 'hdf_generated' _mkdir(output_dir_fits) _mkdir(output_dir_hdf) idi_img = fitsio.read_fits(file_name) for hdu_name in idi_img: hdu = idi_img[hdu_name] if isinstance(hdu, idi.IdiTableHdu): for col in hdu.colnames: if hdu[col].dtype.type is np.float32: #print "CONVERTING %s TO INT" % col hdu[col] = hdu[col].astype('int32') if col == 'FLUX': print("FRUX") hdu[col] = hdu[col].data / 16 hdu[col].dtype = 'int32' #print hdu[col].dtype img_name = os.path.split(file_name)[1] img_name = os.path.splitext(img_name)[0] name = img_name #idi_img.add_image_hdu(img_name, data=data) # Create all the filenames fits_filename = join(output_dir_fits, img_name+'.fits') hdf_filename = join(output_dir_hdf, img_name+'.h5') hdf_comp_filename = join(output_dir_hdf, img_name+'_comp.h5') gzip_comp_filename = join(output_dir_fits, img_name+'.fits.gz') fits_comp_filename = join(output_dir_fits, img_name+'.fits.fz') # Delete files that already exists file_list = [fits_filename, hdf_filename, fits_comp_filename, hdf_comp_filename, gzip_comp_filename] for fname in file_list: if exists(fname): os.remove(fname) print("\nWriting {} to \n{}\n{}".format(img_name,fits_filename,hdf_filename)) t1 = time.time() fitsio.export_fits(idi_img, fits_filename) t2 = time.time() hdfio.export_hdf(idi_img, hdf_filename) t3 = time.time() hdfio.export_hdf(idi_img, hdf_comp_filename, **hdf_opts) t4 = time.time() subprocess.check_call(['fpack','-table',fits_filename]) t5 = time.time() subprocess.check_call(['gzip','-k',fits_filename]) t6 = time.time() dd = { 'img_name': name, 'fits_size': getsize(fits_filename), 'hdf_size': getsize(hdf_filename), 'hdf_comp_size': getsize(hdf_comp_filename), 'fits_comp_size': getsize(fits_filename + '.fz'), 'gzip_comp_size': getsize(fits_filename + '.gz'), 'fits_time': (t2 - t1), 'hdf_time': (t3 - t2), 'hdf_comp_time': (t4 - t3), 'fits_comp_time': (t5 - t4), 'gzip_comp_time': (t6 - t5), 'comp_fact_hdf': float(getsize(fits_filename)) / getsize(hdf_comp_filename), 'comp_fact_fits': float(getsize(fits_filename)) / getsize(fits_comp_filename), 'comp_fact_gzip': float(getsize(fits_filename)) / getsize(gzip_comp_filename) } rh = dd['comp_fact_gzip'] th = dd['gzip_comp_time'] dd["weissman_hdf"] = weissman_score(dd["comp_fact_hdf"], dd["hdf_comp_time"], rh, th) dd["weissman_fits"] = weissman_score(dd["comp_fact_fits"], dd["fits_comp_time"], rh, th) print("FITS file size: %sB" % dd['fits_size']) print("HDF file size: %sB" % dd['hdf_size']) print("FITS comp size: %sB" % dd['fits_comp_size']) print("HDF comp size: %sB" % dd['hdf_comp_size']) print("GZIP comp size: %sB" % dd['gzip_comp_size']) print("FITS creation time: %2.2fs" % dd['fits_time']) print("HDF creation time: %2.2fs" % dd['hdf_time']) print("FITS comp time: %2.2fs" % dd['fits_comp_time']) print("HDF comp time: %2.2fs" % dd['hdf_comp_time']) print("GZIP comp time: %2.2fs" % dd['gzip_comp_time']) print("FITS/FITS compression: %2.2fx" % dd['comp_fact_fits']) print("HDF/FITS compression: %2.2fx" % dd['comp_fact_hdf']) print("GZIP/FITS compression: %2.2fx" % dd['comp_fact_gzip']) print("FITS weissman score: %2.2f" % dd['weissman_fits']) print("HDF weissman score: %2.2f" % dd['weissman_hdf']) return dd
def create_image(name, data, hdf_opts={}): """ Create HDF and FITS versions of a given image """ output_dir_fits = 'fits_generated' output_dir_hdf = 'hdf_generated' _mkdir(output_dir_fits) _mkdir(output_dir_hdf) idi_img = idi.IdiHdulist() idi_img.add_image_hdu(img_name, data=data) # Create all the filenames fits_filename = join(output_dir_fits, img_name+'.fits') hdf_filename = join(output_dir_hdf, img_name+'.h5') hdf_comp_filename = join(output_dir_hdf, img_name+'_comp.h5') gzip_comp_filename = join(output_dir_fits, img_name+'.fits.gz') fits_comp_filename = join(output_dir_fits, img_name+'.fits.fz') # Delete files that already exists file_list = [fits_filename, hdf_filename, fits_comp_filename, hdf_comp_filename, gzip_comp_filename] for fname in file_list: if exists(fname): os.remove(fname) print("\nWriting {} to \n{}\n{}".format(img_name,fits_filename,hdf_filename)) t1 = time.time() fitsio.export_fits(idi_img, fits_filename) assert exists(fits_filename) t2 = time.time() hdfio.export_hdf(idi_img, hdf_filename) t3 = time.time() hdfio.export_hdf(idi_img, hdf_comp_filename, **hdf_opts) t4 = time.time() subprocess.check_call(['fpack','-table',fits_filename]) t5 = time.time() subprocess.check_call(['gzip','-k',fits_filename]) t6 = time.time() dd = { 'img_name': name, 'fits_size': getsize(fits_filename), 'hdf_size': getsize(hdf_filename), 'hdf_comp_size': getsize(hdf_comp_filename), 'fits_comp_size': getsize(fits_filename + '.fz'), 'gzip_comp_size': getsize(fits_filename + '.gz'), 'fits_time': (t2 - t1), 'hdf_time': (t3 - t2), 'hdf_comp_time': (t4 - t3), 'fits_comp_time': (t5 - t4), 'gzip_comp_time': (t6 - t5), 'comp_fact_hdf': float(getsize(fits_filename)) / getsize(hdf_comp_filename), 'comp_fact_fits': float(getsize(fits_filename)) / getsize(fits_comp_filename), 'comp_fact_gzip': float(getsize(fits_filename)) / getsize(gzip_comp_filename) } rh = dd['comp_fact_gzip'] th = dd['gzip_comp_time'] dd["weissman_hdf"] = weissman_score(dd["comp_fact_hdf"], dd["hdf_comp_time"], rh, th) dd["weissman_fits"] = weissman_score(dd["comp_fact_fits"], dd["fits_comp_time"], rh, th) print("FITS file size: %sB" % dd['fits_size']) print("HDF file size: %sB" % dd['hdf_size']) print("FITS comp size: %sB" % dd['fits_comp_size']) print("HDF comp size: %sB" % dd['hdf_comp_size']) print("GZIP comp size: %sB" % dd['gzip_comp_size']) print("FITS creation time: %2.2fs" % dd['fits_time']) print("HDF creation time: %2.2fs" % dd['hdf_time']) print("FITS comp time: %2.2fs" % dd['fits_comp_time']) print("HDF comp time: %2.2fs" % dd['hdf_comp_time']) print("GZIP comp time: %2.2fs" % dd['gzip_comp_time']) print("FITS/FITS compression: %2.2fx" % dd['comp_fact_fits']) print("HDF/FITS compression: %2.2fx" % dd['comp_fact_hdf']) print("GZIP/FITS compression: %2.2fx" % dd['comp_fact_gzip']) print("FITS weissman score: %2.2f" % dd['weissman_fits']) print("HDF weissman score: %2.2f" % dd['weissman_hdf']) return dd