def read_ms(infile, verbosity=1): """ Convert MS to a HDF file :param infile: Measurement Set path :return: HDU version of Measurement Set """ pp = PrintLog(verbosity=verbosity) ms = pt.table(infile) # Create a HDU List for storing HDUs hdul = IdiHdulist(verbosity=verbosity) # Add each column to the main HDU hdu_main = table2hdu(ms, "MAIN", verbosity=verbosity, close_after=False) hdul["MAIN"] = hdu_main # Now look for other keyword tables for key, val in ms.getkeywords().items(): pp.debug(val) if type(val) in (unicode, str): if val.startswith("Table: "): tblpath = val.strip().split("Table: ")[1] pp.h2("Opening %s" % key) t = pt.table(tblpath) t_hdu = table2hdu(t, key, verbosity=verbosity) hdul[key] = t_hdu else: hdul["MAIN"].header.vals[key] = val ms.close() return hdul
def table2hdu(table, hd, verbosity=1, close_after=True): """ Convert MS table to a Header-Data unit :param table: name of table (MS path) :param hdu: header data unit, either a string or a HDU :return: HDU version of MS table """ pp = PrintLog(verbosity=verbosity) if isinstance(hd, str): pp.h3("Creating %s HDU" % hd) hd = IdiTableHdu(name=hd) colnames = table.colnames() keywords = table.getkeywords() for colname in colnames: try: pp.debug("Reading col %s" % colname) hd.add_column(table.getcol(colname), name=colname) except RuntimeError: # This can be raised when no data is in the column pp.warn("Could not add %s" % colname) for key, val in keywords.items(): hd.header.vals[key] = val if close_after: table.close() return hd
def create_dataset(hgroup, name, data, **kwargs): """ Create dataset from data, will attempt to compress :param hgroup: h5py group in which to add dataset :param name: name of dataset :param data: data to write """ verbosity = 0 if 'verbosity' in kwargs: verbosity = kwargs.pop('verbosity') pp = PrintLog(verbosity) np_types = [ np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64, np.complex64, np.complex128, np.void] np_types = set(np_types) #print name, str(data.dtype) #print data.dtype.type, data.dtype.type in np_types if data.dtype.type in np_types: pp.debug("Creating compressed %s" % name) dset = create_compressed(hgroup, name, data, **kwargs) else: try: pp.debug("Creating non-compressed %s" % name) dset = hgroup.create_dataset(name, data=data) except TypeError: #print name, data.dtype raise return dset
def export_ms(hdf_file, ms_file, verbosity=1): """ Convert an HDF file to MS :param hdf_file: Input HDF-MS filename :param ms_file: Output MS filename TODO: Get this working properly. """ pp = PrintLog(verbosity=verbosity) hdul = IdiHdulist(verbosity=1) hdul.read_hdf("testms.h5") main_hdu = hdul["MAIN"] vdict = { 'float32': 'float', 'float64': 'double', 'complex64': 'complex', 'complex128': 'dcomplex', 'int32': 'int', 'uint32': 'uint', 'str': 'string', 'bool': 'bool' } col_descs = [] for col, cdata in main_hdu.data.items(): col = str(col) pp.pp("%16s %s %s" % (col, cdata.shape, cdata.dtype)) if cdata.ndim == 1: vt = vdict[str(cdata.dtype)] cdesc = pt.makescacoldesc(col, cdata[0], valuetype=vt) else: cdesc = pt.makearrcoldesc(col, cdata[0], valuetype=vt) col_descs.append(cdesc) tdesc = pt.maketabdesc(col_descs) t = pt.table("table.ms", tdesc, nrow=main_hdu.n_rows)
def export_ms(hdf_file, ms_file, verbosity=1): """ Convert an HDF file to MS :param hdf_file: Input HDF-MS filename :param ms_file: Output MS filename TODO: Get this working properly. """ pp = PrintLog(verbosity=verbosity) hdul = IdiHdulist(verbosity=1) hdul.read_hdf("testms.h5") main_hdu = hdul["MAIN"] vdict = {'float32' : 'float', 'float64' : 'double', 'complex64' : 'complex', 'complex128' : 'dcomplex', 'int32' : 'int', 'uint32' : 'uint', 'str' : 'string', 'bool' : 'bool' } col_descs = [] for col, cdata in main_hdu.data.items(): col = str(col) pp.pp("%16s %s %s" % (col, cdata.shape, cdata.dtype)) if cdata.ndim == 1: vt = vdict[str(cdata.dtype)] cdesc = pt.makescacoldesc(col, cdata[0], valuetype=vt) else: cdesc = pt.makearrcoldesc(col, cdata[0], valuetype=vt) col_descs.append(cdesc) tdesc = pt.maketabdesc(col_descs) t = pt.table("table.ms", tdesc, nrow=main_hdu.n_rows)
def create_dataset(hgroup, name, data, **kwargs): """ Create dataset from data, will attempt to compress :param hgroup: h5py group in which to add dataset :param name: name of dataset :param data: data to write """ verbosity = 0 if 'verbosity' in kwargs: verbosity = kwargs.pop('verbosity') pp = PrintLog(verbosity) np_types = [ np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64, np.complex64, np.complex128, np.void ] np_types = set(np_types) #print name, str(data.dtype) #print data.dtype.type, data.dtype.type in np_types if data.dtype.type in np_types: pp.debug("Creating compressed %s" % name) dset = create_compressed(hgroup, name, data, **kwargs) else: try: pp.debug("Creating non-compressed %s" % name) dset = hgroup.create_dataset(name, data=data) except TypeError: #print name, data.dtype raise return dset
def convert_fits_to_hdf(args=None): """ Convert a FITS file to HDF5 in HDFITS format An input and output directory must be specified, and all files with a matching extension will be converted. Command line options set the compression algorithm and other run-time settings. """ # Parse options and arguments parser = argparse.ArgumentParser(description='Convert FITS files to HDF5 files in HDFITS format.') parser.add_argument('-c', '--compression', dest='comp', type=str, help='Data compression. Defaults to None, also lzf, bitshuffle, gzip') parser.add_argument('-x', '--extension', dest='ext', type=str, default='fits', help='File extension of FITS files. Defaults to .fits') parser.add_argument('-v', '--verbosity', dest='verbosity', type=int, default=4, help='verbosity level (default 0, up to 5)') parser.add_argument('-s', '--scaleoffset', dest='scale_offset', default=None, help='Add scale offset') parser.add_argument('-S', '--shuffle', dest='shuffle', action='store_true', default=None, help='Apply byte shuffle filter') parser.add_argument('-t', '--pytables', dest='table_type', action='store_true', default=None, help='Set output tables to be PyTables TABLE class, instead of HDFITES DATA_GROUP') parser.add_argument('-C', '--checksum', dest='checksum', action='store_true', default=None, help='Compute fletcher32 checksum on datasets.') parser.add_argument('dir_in', help='input directory') parser.add_argument('dir_out', help='output_directory') args = parser.parse_args() dir_in = args.dir_in dir_out = args.dir_out if not os.path.exists(dir_out): print("Creating directory %s" % dir_out) os.mkdir(dir_out) # Form a list of keyword arguments to pass to HDF5 export kwargs = {} if args.comp is not None: kwargs['compression'] = args.comp if args.scale_offset is not None: kwargs['scaleoffset'] = int(args.scale_offset) if args.shuffle is not None: kwargs['shuffle'] = args.shuffle if args.checksum is not None: kwargs['fletcher32'] = args.checksum if args.table_type is not None: kwargs['table_type'] = 'TABLE' else: kwargs['table_type'] = 'DATA_GROUP' pp = PrintLog(verbosity=args.verbosity) if args.verbosity == 0: warnings.simplefilter("ignore") pp.h1("FITS2HDF") pp.pa("Input directory: %s" % dir_in) pp.pa("Output directory: %s" % dir_out) pp.pa("Dataset creation arguments:") for key, val in kwargs.items(): pp.pa("%16s: %s" % (key, val)) # Create list of files to process filelist = os.listdir(dir_in) filelist = [fn for fn in filelist if fn.endswith(args.ext)] t_start = time.time() file_count = 0 for filename in filelist: file_in = os.path.join(dir_in, filename) file_out = os.path.join(dir_out, filename.split('.' + args.ext)[0] + '.h5') a = IdiHdulist() try: pp.pp("\nReading %s" % file_in) a = read_fits(file_in) pp.pp("Creating %s" % file_out) t1 = time.time() export_hdf(a, file_out, **kwargs) t2 = time.time() pp.pp("Input filesize: %sB" % os.path.getsize(file_in)) pp.pp("Output filesize: %sB" % os.path.getsize(file_out)) compfact = float(os.path.getsize(file_in)) / float(os.path.getsize(file_out)) pp.pp("Compression: %2.2fx" % compfact) pp.pp("Comp/write time: %2.2fs" % (t2 - t1)) file_count += 1 except IOError: pp.err("ERROR: Cannot load %s" % file_in) pp.h1("\nSUMMARY") pp.pa("Files created: %i" % file_count) pp.pa("Time taken: %2.2fs" % (time.time() - t_start))
def convert_hdf_to_fits(args=None): """ Convert a HDF5 (in HDFITS format) to a FITS file An input and output directory must be specified, and all files with a matching extension will be converted. Command line options set the run-time settings. """ # Parse options and arguments parser = argparse.ArgumentParser(description='Convert HDF5 in HDFITS format FITS files.') parser.add_argument('-x', '--extension', dest='ext', type=str, default='h5', help='File extension of HDFITS files. Defaults to .h5') parser.add_argument('-v', '--verbosity', dest='verbosity', type=int, default=4, help='verbosity level (default 0, up to 5)') parser.add_argument('dir_in', help='input directory') parser.add_argument('dir_out', help='output_directory') args = parser.parse_args() dir_in = args.dir_in dir_out = args.dir_out if not os.path.exists(dir_out): print("Creating directory %s" % dir_out) os.mkdir(dir_out) # Form a list of keyword arguments to pass to HDF5 export kwargs = {} pp = PrintLog(verbosity=args.verbosity) if args.verbosity == 0: warnings.simplefilter("ignore") pp.h1("HDF2FITS") pp.pa("Input directory: %s" % dir_in) pp.pa("Output directory: %s" % dir_out) pp.pa("Dataset creation arguments:") for key, val in kwargs.items(): pp.pa("%16s: %s" % (key, val)) # Create list of files to process filelist = os.listdir(dir_in) filelist = [fn for fn in filelist if fn.endswith(args.ext)] t_start = time.time() file_count = 0 for filename in filelist: file_in = os.path.join(dir_in, filename) file_out = os.path.join(dir_out, filename.split('.' + args.ext)[0] + '.fits') a = IdiHdulist() try: pp.pp("\nReading %s" % file_in) a = read_hdf(file_in) pp.pp("Creating %s" % file_out) t1 = time.time() export_fits(a, file_out, **kwargs) t2 = time.time() pp.pp("Input filesize: %sB" % os.path.getsize(file_in)) pp.pp("Output filesize: %sB" % os.path.getsize(file_out)) compfact = float(os.path.getsize(file_in)) / float(os.path.getsize(file_out)) pp.pp("Compression: %2.2fx" % compfact) pp.pp("Comp/write time: %2.2fs" % (t2 - t1)) file_count += 1 except IOError: pp.err("ERROR: Cannot load %s" % file_in) pp.h1("\nSUMMARY") pp.pa("Files created: %i" % file_count) pp.pa("Time taken: %2.2fs" % (time.time() - t_start))