def write_table_fits(input, output, overwrite=False, append=False): """ Write a Table object to a FITS file Parameters ---------- input : Table The table to write out. output : str The filename to write the table to. overwrite : bool Whether to overwrite any existing file without warning. append : bool Whether to append the table to an existing file """ # Encode any mixin columns into standard Columns. input = _encode_mixins(input) table_hdu = table_to_hdu(input, character_as_bytes=True) # Check if output file already exists if isinstance(output, str) and os.path.exists(output): if overwrite: os.remove(output) elif not append: raise OSError(NOT_OVERWRITING_MSG.format(output)) if append: # verify=False stops it reading and checking the existing file. fits_append(output, table_hdu.data, table_hdu.header, verify=False) else: table_hdu.writeto(output)
def write(table, output=None, format=None, Writer=None, fast_writer=True, *, overwrite=False, **kwargs): # Docstring inserted below _validate_read_write_kwargs('write', format=format, fast_writer=fast_writer, overwrite=overwrite, **kwargs) if isinstance(output, (str, bytes, os.PathLike)): output = os.path.expanduser(output) if not overwrite and os.path.lexists(output): raise OSError(NOT_OVERWRITING_MSG.format(output)) if output is None: output = sys.stdout # Ensure that `table` is a Table subclass. names = kwargs.get('names') if isinstance(table, Table): # While we are only going to read data from columns, we may need to # to adjust info attributes such as format, so we make a shallow copy. table = table.__class__(table, names=names, copy=False) else: # Otherwise, create a table from the input. table = Table(table, names=names, copy=False) table0 = table[:0].copy() core._apply_include_exclude_names(table0, kwargs.get('names'), kwargs.get('include_names'), kwargs.get('exclude_names')) diff_format_with_names = set(kwargs.get('formats', [])) - set(table0.colnames) if diff_format_with_names: warnings.warn( 'The key(s) {} specified in the formats argument do not match a column name.' .format(diff_format_with_names), AstropyWarning) if table.has_mixin_columns: fast_writer = False Writer = _get_format_class(format, Writer, 'Writer') writer = get_writer(Writer=Writer, fast_writer=fast_writer, **kwargs) if writer._format_name in core.FAST_CLASSES: writer.write(table, output) return lines = writer.write(table) # Write the lines to output outstr = os.linesep.join(lines) if not hasattr(output, 'write'): # NOTE: we need to specify newline='', otherwise the default # behavior is for Python to translate \r\n (which we write because # of os.linesep) into \r\r\n. Specifying newline='' disables any # auto-translation. output = open(output, 'w', newline='') output.write(outstr) output.write(os.linesep) output.close() else: output.write(outstr) output.write(os.linesep)
def test_logging(capsys, tmp_path): # Run skypy with default verbosity and check log is empty config_filename = get_pkg_data_filename('data/test_config.yml') output_filename = str(tmp_path / 'logging.fits') skypy.main([config_filename, output_filename]) out, err = capsys.readouterr() assert (not err) # Run again with increased verbosity and capture log. Force an exception by # not using the "--overwrite" flag when the output file already exists. with pytest.raises(SystemExit): skypy.main([config_filename, output_filename, '--verbose']) out, err = capsys.readouterr() # Determine all DAG jobs and function calls from config config = load_skypy_yaml(config_filename) cosmology = config.pop('cosmology', None) tables = config.pop('tables', {}) config.update({k: v.pop('.init', Call(Table)) for k, v in tables.items()}) columns = [f'{t}.{c}' for t, cols in tables.items() for c in cols] functions = [f for f in config.values() if isinstance(f, Call)] functions += [ f for t, cols in tables.items() for f in cols.values() if isinstance(f, Call) ] # Check all jobs appear in the log for job in list(config) + list(tables) + columns: log_string = f"[INFO] skypy.pipeline: Generating {job}" assert (log_string in err) # Check all functions appear in the log for f in functions: log_string = f"[INFO] skypy.pipeline: Calling {f.function.__name__}" assert (log_string in err) # Check cosmology appears in the log if cosmology: assert ("[INFO] skypy.pipeline: Setting cosmology" in err) # Check writing output file is in the log assert (f"[INFO] skypy: Writing {output_filename}" in err) # Check error for existing output file is in the log try: # New error message introduced in astropy PR #12179 from astropy.utils.misc import NOT_OVERWRITING_MSG error_string = NOT_OVERWRITING_MSG.format(output_filename) except ImportError: # Fallback on old error message from astropy v4.x error_string = f"[ERROR] skypy: File {output_filename!r} already exists." assert (error_string in err) # Run again with decreased verbosity and check the log is empty with pytest.raises(SystemExit): skypy.main([config_filename, output_filename, '-qq']) out, err = capsys.readouterr() assert (not err)
def write_table_votable(input, output, table_id=None, overwrite=False, tabledata_format=None): """ Write a Table object to an VO table file Parameters ---------- input : Table The table to write out. output : str The filename to write the table to. table_id : str, optional The table ID to use. If this is not specified, the 'ID' keyword in the ``meta`` object of the table will be used. overwrite : bool, optional Whether to overwrite any existing file without warning. tabledata_format : str, optional The format of table data to write. Must be one of ``tabledata`` (text representation), ``binary`` or ``binary2``. Default is ``tabledata``. See :ref:`astropy:votable-serialization`. """ # Only those columns which are instances of BaseColumn or Quantity can be written unsupported_cols = input.columns.not_isinstance((BaseColumn, Quantity)) if unsupported_cols: unsupported_names = [col.info.name for col in unsupported_cols] raise ValueError( 'cannot write table with mixin column(s) {} to VOTable'.format( unsupported_names)) # Check if output file already exists if isinstance(output, str) and os.path.exists(output): if overwrite: os.remove(output) else: raise OSError(NOT_OVERWRITING_MSG.format(output)) # Create a new VOTable file table_file = from_table(input, table_id=table_id) # Write out file table_file.to_xml(output, tabledata_format=tabledata_format)
def _overwrite_existing(self, overwrite, fileobj, closed): """Overwrite an existing file if ``overwrite`` is ``True``, otherwise raise an OSError. The exact behavior of this method depends on the _File object state and is only meant for use within the ``_open_*`` internal methods. """ # The file will be overwritten... if ((self.file_like and hasattr(fileobj, 'len') and fileobj.len > 0) or (os.path.exists(self.name) and os.path.getsize(self.name) != 0)): if overwrite: if self.file_like and hasattr(fileobj, 'truncate'): fileobj.truncate(0) else: if not closed: fileobj.close() os.remove(self.name) else: raise OSError(NOT_OVERWRITING_MSG.format(self.name))
def _pandas_write(fmt, tbl, filespec, overwrite=False, **kwargs): """Provide io Table connector to write table using pandas. """ pandas_fmt = fmt[len(PANDAS_PREFIX):] # chop the 'pandas.' in front # Get defaults and then override with user-supplied values write_kwargs = PANDAS_FMTS[pandas_fmt]['write'].copy() write_kwargs.update(kwargs) df = tbl.to_pandas() write_method = getattr(df, 'to_' + pandas_fmt) if not overwrite: try: # filespec is not always a path-like exists = os.path.exists(filespec) except TypeError: # skip invalid arguments pass else: if exists: # only error if file already exists raise OSError(NOT_OVERWRITING_MSG.format(filespec)) return write_method(filespec, **write_kwargs)
def write_table_hdf5(table, output, path=None, compression=False, append=False, overwrite=False, serialize_meta=False, **create_dataset_kwargs): """ Write a Table object to an HDF5 file This requires `h5py <http://www.h5py.org/>`_ to be installed. Parameters ---------- table : `~astropy.table.Table` Data table that is to be written to file. output : str or :class:`h5py.File` or :class:`h5py.Group` If a string, the filename to write the table to. If an h5py object, either the file or the group object to write the table to. path : str The path to which to write the table inside the HDF5 file. This should be relative to the input file or group. If not specified, defaults to ``__astropy_table__``. compression : bool or str or int Whether to compress the table inside the HDF5 file. If set to `True`, ``'gzip'`` compression is used. If a string is specified, it should be one of ``'gzip'``, ``'szip'``, or ``'lzf'``. If an integer is specified (in the range 0-9), ``'gzip'`` compression is used, and the integer denotes the compression level. append : bool Whether to append the table to an existing HDF5 file. overwrite : bool Whether to overwrite any existing file without warning. If ``append=True`` and ``overwrite=True`` then only the dataset will be replaced; the file/group will not be overwritten. serialize_meta : bool Whether to serialize rich table meta-data when writing the HDF5 file, in particular such data required to write and read back mixin columns like ``Time``, ``SkyCoord``, or ``Quantity`` to the file. **create_dataset_kwargs Additional keyword arguments are passed to ``h5py.File.create_dataset()`` or ``h5py.Group.create_dataset()``. """ from astropy.table import meta try: import h5py except ImportError: raise Exception("h5py is required to read and write HDF5 files") if path is None: # table is just an arbitrary, hardcoded string here. path = '__astropy_table__' elif path.endswith('/'): raise ValueError("table path should end with table name, not /") if '/' in path: group, name = path.rsplit('/', 1) else: group, name = None, path if isinstance(output, (h5py.File, h5py.Group)): if len(list(output.keys())) > 0 and name == '__astropy_table__': raise ValueError("table path should always be set via the " "path= argument when writing to existing " "files") elif name == '__astropy_table__': warnings.warn("table path was not set via the path= argument; " "using default path {}".format(path)) if group: try: output_group = output[group] except (KeyError, ValueError): output_group = output.create_group(group) else: output_group = output elif isinstance(output, str): if os.path.exists(output) and not append: if overwrite and not append: os.remove(output) else: raise OSError(NOT_OVERWRITING_MSG.format(output)) # Open the file for appending or writing f = h5py.File(output, 'a' if append else 'w') # Recursively call the write function try: return write_table_hdf5(table, f, path=path, compression=compression, append=append, overwrite=overwrite, serialize_meta=serialize_meta) finally: f.close() else: raise TypeError('output should be a string or an h5py File or ' 'Group object') # Check whether table already exists if name in output_group: if append and overwrite: # Delete only the dataset itself del output_group[name] if serialize_meta and name + '.__table_column_meta__' in output_group: del output_group[name + '.__table_column_meta__'] else: raise OSError(f"Table {path} already exists") # Encode any mixin columns as plain columns + appropriate metadata table = _encode_mixins(table) # Table with numpy unicode strings can't be written in HDF5 so # to write such a table a copy of table is made containing columns as # bytestrings. Now this copy of the table can be written in HDF5. if any(col.info.dtype.kind == 'U' for col in table.itercols()): table = table.copy(copy_data=False) table.convert_unicode_to_bytestring() # Warn if information will be lost when serialize_meta=False. This is # hardcoded to the set difference between column info attributes and what # HDF5 can store natively (name, dtype) with no meta. if serialize_meta is False: for col in table.itercols(): for attr in ('unit', 'format', 'description', 'meta'): if getattr(col.info, attr, None) not in (None, {}): warnings.warn( "table contains column(s) with defined 'unit', 'format'," " 'description', or 'meta' info attributes. These will" " be dropped since serialize_meta=False.", AstropyUserWarning) # Write the table to the file if compression: if compression is True: compression = 'gzip' dset = output_group.create_dataset(name, data=table.as_array(), compression=compression, **create_dataset_kwargs) else: dset = output_group.create_dataset(name, data=table.as_array(), **create_dataset_kwargs) if serialize_meta: header_yaml = meta.get_yaml_from_table(table) header_encoded = np.array([h.encode('utf-8') for h in header_yaml]) output_group.create_dataset(meta_path(name), data=header_encoded) else: # Write the Table meta dict key:value pairs to the file as HDF5 # attributes. This works only for a limited set of scalar data types # like numbers, strings, etc., but not any complex types. This path # also ignores column meta like unit or format. for key in table.meta: val = table.meta[key] try: dset.attrs[key] = val except TypeError: warnings.warn( "Attribute `{}` of type {} cannot be written to " "HDF5 files - skipping. (Consider specifying " "serialize_meta=True to write all meta data)".format( key, type(val)), AstropyUserWarning)
def write_table_parquet(table, output, overwrite=False): """ Write a Table object to a Parquet file This requires `pyarrow <https://arrow.apache.org/docs/python/>`_ to be installed. Parameters ---------- table : `~astropy.table.Table` Data table that is to be written to file. output : str or path-like The filename to write the table to. overwrite : bool, optional Whether to overwrite any existing file without warning. Default `False`. """ from astropy.table import meta, serialize from astropy.utils.data_info import serialize_context_as pa, parquet, writer_version = get_pyarrow() if not isinstance(output, (str, os.PathLike)): raise TypeError( f'`output` should be a string or path-like, not {output}') # Convert all compound columns into serialized column names, where # e.g. 'time' becomes ['time.jd1', 'time.jd2']. with serialize_context_as('parquet'): encode_table = serialize.represent_mixins_as_columns(table) # We store the encoded serialization metadata as a yaml string. meta_yaml = meta.get_yaml_from_table(encode_table) meta_yaml_str = '\n'.join(meta_yaml) metadata = {} for name, col in encode_table.columns.items(): # Parquet will retain the datatypes of columns, but string and # byte column length is lost. Therefore, we special-case these # types to record the length for precise round-tripping. if col.dtype.type is np.str_: metadata[f'table::len::{name}'] = str(col.dtype.itemsize // 4) elif col.dtype.type is np.bytes_: metadata[f'table::len::{name}'] = str(col.dtype.itemsize) metadata['table_meta_yaml'] = meta_yaml_str # Pyarrow stores all metadata as byte strings, so we explicitly encode # our unicode strings in metadata as UTF-8 byte strings here. metadata_encode = { k.encode('UTF-8'): v.encode('UTF-8') for k, v in metadata.items() } # Build the pyarrow schema by converting from the numpy dtype of each # column to an equivalent pyarrow type with from_numpy_dtype() type_list = [(name, pa.from_numpy_dtype(encode_table.dtype[name].type)) for name in encode_table.dtype.names] schema = pa.schema(type_list, metadata=metadata_encode) if os.path.exists(output): if overwrite: # We must remove the file prior to writing below. os.remove(output) else: raise OSError(NOT_OVERWRITING_MSG.format(output)) # We use version='2.0' for full support of datatypes including uint32. with parquet.ParquetWriter(output, schema, version=writer_version) as writer: # Convert each Table column to a pyarrow array arrays = [pa.array(col) for col in encode_table.itercols()] # Create a pyarrow table from the list of arrays and the schema pa_table = pa.Table.from_arrays(arrays, schema=schema) # Write the pyarrow table to a file writer.write_table(pa_table)