def test_different_table_rows(self): """ Test tables that are otherwise identical but one has more rows than the other. """ ca1 = Column('A', format='L', array=[True, False]) cb1 = Column('B', format='L', array=[True, False]) ca2 = Column('A', format='L', array=[True, False, True]) cb2 = Column('B', format='L', array=[True, False, True]) ta = BinTableHDU.from_columns([ca1, cb1]) tb = BinTableHDU.from_columns([ca2, cb2]) diff = TableDataDiff(ta.data, tb.data) assert not diff.identical assert diff.diff_column_count == () assert len(diff.common_columns) == 2 assert diff.diff_rows == (2, 3) assert diff.diff_values == [] report = diff.report() assert 'Table rows differ' in report assert 'a: 2' in report assert 'b: 3' in report assert 'No further data comparison performed.'
def test_different_table_field_counts(self): """ Test tables with some common columns, but different number of columns overall. """ ca = Column('A', format='L', array=[True, False]) cb = Column('B', format='L', array=[True, False]) cc = Column('C', format='L', array=[True, False]) ta = BinTableHDU.from_columns([cb]) tb = BinTableHDU.from_columns([ca, cb, cc]) diff = TableDataDiff(ta.data, tb.data) assert not diff.identical assert diff.diff_column_count == (1, 3) assert len(diff.common_columns) == 1 assert diff.common_column_names == {'b'} assert diff.diff_column_names == ([], ['A', 'C']) assert diff.diff_ratio == 0 assert diff.diff_total == 0 report = diff.report() assert ' Tables have different number of columns:' in report assert ' a: 1\n b: 3' in report
def test_ignore_table_fields(self): c1 = Column('A', format='L', array=[True, False]) c2 = Column('B', format='X', array=[[0], [1]]) c3 = Column('C', format='4I', dim='(2, 2)', array=[[0, 1, 2, 3], [4, 5, 6, 7]]) c4 = Column('B', format='X', array=[[1], [0]]) c5 = Column('C', format='4I', dim='(2, 2)', array=[[1, 2, 3, 4], [5, 6, 7, 8]]) ta = BinTableHDU.from_columns([c1, c2, c3]) tb = BinTableHDU.from_columns([c1, c4, c5]) diff = TableDataDiff(ta.data, tb.data, ignore_fields=['B', 'C']) assert diff.identical # The only common column should be c1 assert len(diff.common_columns) == 1 assert diff.common_column_names == {'a'} assert diff.diff_ratio == 0 assert diff.diff_total == 0
def test_identical_tables(self): c1 = Column('A', format='L', array=[True, False]) c2 = Column('B', format='X', array=[[0], [1]]) c3 = Column('C', format='4I', dim='(2, 2)', array=[[0, 1, 2, 3], [4, 5, 6, 7]]) c4 = Column('D', format='J', bscale=2.0, array=[0, 1]) c5 = Column('E', format='A3', array=['abc', 'def']) c6 = Column('F', format='E', unit='m', array=[0.0, 1.0]) c7 = Column('G', format='D', bzero=-0.1, array=[0.0, 1.0]) c8 = Column('H', format='C', array=[0.0 + 1.0j, 2.0 + 3.0j]) c9 = Column('I', format='M', array=[4.0 + 5.0j, 6.0 + 7.0j]) c10 = Column('J', format='PI(2)', array=[[0, 1], [2, 3]]) columns = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10] ta = BinTableHDU.from_columns(columns) tb = BinTableHDU.from_columns([c.copy() for c in columns]) diff = TableDataDiff(ta.data, tb.data) assert diff.identical assert len(diff.common_columns) == 10 assert diff.common_column_names == set('abcdefghij') assert diff.diff_ratio == 0 assert diff.diff_total == 0
def test_diff_empty_tables(self): """ Regression test for https://aeon.stsci.edu/ssb/trac/pyfits/ticket/178 Ensure that diffing tables containing empty data doesn't crash. """ c1 = Column('D', format='J') c2 = Column('E', format='J') thdu = BinTableHDU.from_columns([c1, c2], nrows=0) hdula = fits.HDUList([thdu]) hdulb = fits.HDUList([thdu]) diff = FITSDiff(hdula, hdulb) assert diff.identical
def test_different_table_field_names(self): ca = Column('A', format='L', array=[True, False]) cb = Column('B', format='L', array=[True, False]) cc = Column('C', format='L', array=[True, False]) ta = BinTableHDU.from_columns([ca, cb]) tb = BinTableHDU.from_columns([ca, cc]) diff = TableDataDiff(ta.data, tb.data) assert not diff.identical assert len(diff.common_columns) == 1 assert diff.common_column_names == {'a'} assert diff.diff_column_names == (['B'], ['C']) assert diff.diff_ratio == 0 assert diff.diff_total == 0 report = diff.report() assert 'Extra column B of format L in a' in report assert 'Extra column C of format L in b' in report
def columns(self): if self._has_data and hasattr(self.data, '_coldefs'): return self.data._coldefs format = self._bitpix2tform[self._header['BITPIX']] pcount = self._header['PCOUNT'] parnames = [] bscales = [] bzeros = [] for idx in range(pcount): bscales.append(self._header.get('PSCAL' + str(idx + 1), None)) bzeros.append(self._header.get('PZERO' + str(idx + 1), None)) parnames.append(self._header['PTYPE' + str(idx + 1)]) formats = [format] * len(parnames) dim = [None] * len(parnames) # Now create columns from collected parameters, but first add the DATA # column too, to contain the group data. parnames.append('DATA') bscales.append(self._header.get('BSCALE')) bzeros.append(self._header.get('BZEROS')) data_shape = self.shape[:-1] formats.append(str(int(np.prod(data_shape))) + format) dim.append(data_shape) parnames = _unique_parnames(parnames) self._data_field = parnames[-1] cols = [ Column(name=name, format=fmt, bscale=bscale, bzero=bzero, dim=dim) for name, fmt, bscale, bzero, dim in zip( parnames, formats, bscales, bzeros, dim) ] coldefs = ColDefs(cols) return coldefs
def __new__(cls, input=None, bitpix=None, pardata=None, parnames=[], bscale=None, bzero=None, parbscales=None, parbzeros=None): """ Parameters ---------- input : array or FITS_rec instance input data, either the group data itself (a `numpy.ndarray`) or a record array (`FITS_rec`) which will contain both group parameter info and the data. The rest of the arguments are used only for the first case. bitpix : int data type as expressed in FITS ``BITPIX`` value (8, 16, 32, 64, -32, or -64) pardata : sequence of arrays parameter data, as a list of (numeric) arrays. parnames : sequence of str list of parameter names. bscale : int ``BSCALE`` of the data bzero : int ``BZERO`` of the data parbscales : sequence of int list of bscales for the parameters parbzeros : sequence of int list of bzeros for the parameters """ if not isinstance(input, FITS_rec): if pardata is None: npars = 0 else: npars = len(pardata) if parbscales is None: parbscales = [None] * npars if parbzeros is None: parbzeros = [None] * npars if parnames is None: parnames = ['PAR{}'.format(idx + 1) for idx in range(npars)] if len(parnames) != npars: raise ValueError('The number of parameter data arrays does ' 'not match the number of parameters.') unique_parnames = _unique_parnames(parnames + ['DATA']) if bitpix is None: bitpix = DTYPE2BITPIX[input.dtype.name] fits_fmt = GroupsHDU._bitpix2tform[bitpix] # -32 -> 'E' format = FITS2NUMPY[fits_fmt] # 'E' -> 'f4' data_fmt = '{}{}'.format(str(input.shape[1:]), format) formats = ','.join(([format] * npars) + [data_fmt]) gcount = input.shape[0] cols = [Column(name=unique_parnames[idx], format=fits_fmt, bscale=parbscales[idx], bzero=parbzeros[idx]) for idx in range(npars)] cols.append(Column(name=unique_parnames[-1], format=fits_fmt, bscale=bscale, bzero=bzero)) coldefs = ColDefs(cols) self = FITS_rec.__new__(cls, np.rec.array(None, formats=formats, names=coldefs.names, shape=gcount)) # By default the data field will just be 'DATA', but it may be # uniquified if 'DATA' is already used by one of the group names self._data_field = unique_parnames[-1] self._coldefs = coldefs self.parnames = parnames for idx, name in enumerate(unique_parnames[:-1]): column = coldefs[idx] # Note: _get_scale_factors is used here and in other cases # below to determine whether the column has non-default # scale/zero factors. # TODO: Find a better way to do this than using this interface scale, zero = self._get_scale_factors(column)[3:5] if scale or zero: self._cache_field(name, pardata[idx]) else: np.rec.recarray.field(self, idx)[:] = pardata[idx] column = coldefs[self._data_field] scale, zero = self._get_scale_factors(column)[3:5] if scale or zero: self._cache_field(self._data_field, input) else: np.rec.recarray.field(self, npars)[:] = input else: self = FITS_rec.__new__(cls, input) self.parnames = None return self
def test_different_table_data(self): """ Test diffing table data on columns of several different data formats and dimensions. """ ca1 = Column('A', format='L', array=[True, False]) ca2 = Column('B', format='X', array=[[0], [1]]) ca3 = Column('C', format='4I', dim='(2, 2)', array=[[0, 1, 2, 3], [4, 5, 6, 7]]) ca4 = Column('D', format='J', bscale=2.0, array=[0.0, 2.0]) ca5 = Column('E', format='A3', array=['abc', 'def']) ca6 = Column('F', format='E', unit='m', array=[0.0, 1.0]) ca7 = Column('G', format='D', bzero=-0.1, array=[0.0, 1.0]) ca8 = Column('H', format='C', array=[0.0 + 1.0j, 2.0 + 3.0j]) ca9 = Column('I', format='M', array=[4.0 + 5.0j, 6.0 + 7.0j]) ca10 = Column('J', format='PI(2)', array=[[0, 1], [2, 3]]) cb1 = Column('A', format='L', array=[False, False]) cb2 = Column('B', format='X', array=[[0], [0]]) cb3 = Column('C', format='4I', dim='(2, 2)', array=[[0, 1, 2, 3], [5, 6, 7, 8]]) cb4 = Column('D', format='J', bscale=2.0, array=[2.0, 2.0]) cb5 = Column('E', format='A3', array=['abc', 'ghi']) cb6 = Column('F', format='E', unit='m', array=[1.0, 2.0]) cb7 = Column('G', format='D', bzero=-0.1, array=[2.0, 3.0]) cb8 = Column('H', format='C', array=[1.0 + 1.0j, 2.0 + 3.0j]) cb9 = Column('I', format='M', array=[5.0 + 5.0j, 6.0 + 7.0j]) cb10 = Column('J', format='PI(2)', array=[[1, 2], [3, 4]]) ta = BinTableHDU.from_columns( [ca1, ca2, ca3, ca4, ca5, ca6, ca7, ca8, ca9, ca10]) tb = BinTableHDU.from_columns( [cb1, cb2, cb3, cb4, cb5, cb6, cb7, cb8, cb9, cb10]) diff = TableDataDiff(ta.data, tb.data, numdiffs=20) assert not diff.identical # The column definitions are the same, but not the column values assert diff.diff_columns == () assert diff.diff_values[0] == (('A', 0), (True, False)) assert diff.diff_values[1] == (('B', 1), ([1], [0])) assert diff.diff_values[2][0] == ('C', 1) assert (diff.diff_values[2][1][0] == [[4, 5], [6, 7]]).all() assert (diff.diff_values[2][1][1] == [[5, 6], [7, 8]]).all() assert diff.diff_values[3] == (('D', 0), (0, 2.0)) assert diff.diff_values[4] == (('E', 1), ('def', 'ghi')) assert diff.diff_values[5] == (('F', 0), (0.0, 1.0)) assert diff.diff_values[6] == (('F', 1), (1.0, 2.0)) assert diff.diff_values[7] == (('G', 0), (0.0, 2.0)) assert diff.diff_values[8] == (('G', 1), (1.0, 3.0)) assert diff.diff_values[9] == (('H', 0), (0.0 + 1.0j, 1.0 + 1.0j)) assert diff.diff_values[10] == (('I', 0), (4.0 + 5.0j, 5.0 + 5.0j)) assert diff.diff_values[11][0] == ('J', 0) assert (diff.diff_values[11][1][0] == [0, 1]).all() assert (diff.diff_values[11][1][1] == [1, 2]).all() assert diff.diff_values[12][0] == ('J', 1) assert (diff.diff_values[12][1][0] == [2, 3]).all() assert (diff.diff_values[12][1][1] == [3, 4]).all() assert diff.diff_total == 13 assert diff.diff_ratio == 0.65 report = diff.report() assert ('Column A data differs in row 0:\n' ' a> True\n' ' b> False') in report assert ('...and at 1 more indices.\n' ' Column D data differs in row 0:') in report assert ('13 different table data element(s) found (65.00% different)' in report) assert report.count('more indices') == 1