示例#1
0
def test_table_info_attributes(table_types):
    """
    Test the info() method of printing a summary of table column attributes
    """
    a = np.array([1, 2, 3], dtype='int32')
    b = np.array([1, 2, 3], dtype='float32')
    c = np.array(['a', 'c', 'e'], dtype='|S1')
    t = table_types.Table([a, b, c], names=['a', 'b', 'c'])

    # Minimal output for a typical table
    tinfo = t.info(out=None)
    subcls = ['class'] if table_types.Table.__name__ == 'MyTable' else []
    assert tinfo.colnames == [
        'name', 'dtype', 'shape', 'unit', 'format', 'description', 'class',
        'n_bad', 'length'
    ]
    assert np.all(tinfo['name'] == ['a', 'b', 'c'])
    assert np.all(
        tinfo['dtype'] == ['int32', 'float32',
                           dtype_info_name('S1')])
    if subcls:
        assert np.all(tinfo['class'] == ['MyColumn'] * 3)

    # All output fields including a mixin column
    t['d'] = [1, 2, 3] * u.m
    t['d'].description = 'quantity'
    t['a'].format = '%02d'
    t['e'] = time.Time([1, 2, 3], format='mjd')
    t['e'].info.description = 'time'
    t['f'] = coordinates.SkyCoord([1, 2, 3], [1, 2, 3], unit='deg')
    t['f'].info.description = 'skycoord'

    tinfo = t.info(out=None)
    assert np.all(tinfo['name'] == 'a b c d e f'.split())
    assert np.all(tinfo['dtype'] == [
        'int32', 'float32',
        dtype_info_name('S1'), 'float64', 'object', 'object'
    ])
    assert np.all(tinfo['unit'] == ['', '', '', 'm', '', 'deg,deg'])
    assert np.all(tinfo['format'] == ['%02d', '', '', '', '', ''])
    assert np.all(
        tinfo['description'] == ['', '', '', 'quantity', 'time', 'skycoord'])
    cls = t.ColumnClass.__name__
    assert np.all(tinfo['class'] == [cls, cls, cls, cls, 'Time', 'SkyCoord'])

    # Test that repr(t.info) is same as t.info()
    out = StringIO()
    t.info(out=out)
    assert repr(t.info) == out.getvalue()
示例#2
0
def test_table_info_stats(table_types):
    """
    Test the info() method of printing a summary of table column statistics
    """
    a = np.array([1, 2, 1, 2], dtype='int32')
    b = np.array([1, 2, 1, 2], dtype='float32')
    c = np.array(['a', 'c', 'e', 'f'], dtype='|S1')
    d = time.Time([1, 2, 1, 2], format='mjd')
    t = table_types.Table([a, b, c, d], names=['a', 'b', 'c', 'd'])

    # option = 'stats'
    masked = 'masked=True ' if t.masked else ''
    out = StringIO()
    t.info('stats', out=out)
    table_header_line = f'<{t.__class__.__name__} {masked}length=4>'
    exp = [
        table_header_line, 'name mean std min max', '---- ---- --- --- ---',
        '   a  1.5 0.5   1   2', '   b  1.5 0.5 1.0 2.0',
        '   c   --  --  --  --', '   d   --  -- 1.0 2.0'
    ]
    assert out.getvalue().splitlines() == exp

    # option = ['attributes', 'stats']
    tinfo = t.info(['attributes', 'stats'], out=None)
    assert tinfo.colnames == [
        'name', 'dtype', 'shape', 'unit', 'format', 'description', 'class',
        'mean', 'std', 'min', 'max', 'n_bad', 'length'
    ]
    assert np.all(tinfo['mean'] == ['1.5', '1.5', '--', '--'])
    assert np.all(tinfo['std'] == ['0.5', '0.5', '--', '--'])
    assert np.all(tinfo['min'] == ['1', '1.0', '--', '1.0'])
    assert np.all(tinfo['max'] == ['2', '2.0', '--', '2.0'])

    out = StringIO()
    t.info('stats', out=out)
    exp = [
        table_header_line, 'name mean std min max', '---- ---- --- --- ---',
        '   a  1.5 0.5   1   2', '   b  1.5 0.5 1.0 2.0',
        '   c   --  --  --  --', '   d   --  -- 1.0 2.0'
    ]
    assert out.getvalue().splitlines() == exp

    # option = ['attributes', custom]
    custom = data_info_factory(names=['sum', 'first'],
                               funcs=[np.sum, lambda col: col[0]])
    out = StringIO()
    tinfo = t.info(['attributes', custom], out=None)
    assert tinfo.colnames == [
        'name', 'dtype', 'shape', 'unit', 'format', 'description', 'class',
        'sum', 'first', 'n_bad', 'length'
    ]
    assert np.all(tinfo['name'] == ['a', 'b', 'c', 'd'])
    assert np.all(tinfo['dtype'] ==
                  ['int32', 'float32',
                   dtype_info_name('S1'), 'object'])
    assert np.all(tinfo['sum'] == ['6', '6.0', '--', '--'])
    assert np.all(tinfo['first'] == ['1', '1.0', 'a', '1.0'])
示例#3
0
def test_table_info_stats(table_types):
    """
    Test the info() method of printing a summary of table column statistics
    """
    a = np.array([1, 2, 1, 2], dtype='int32')
    b = np.array([1, 2, 1, 2], dtype='float32')
    c = np.array(['a', 'c', 'e', 'f'], dtype='|S1')
    d = time.Time([1, 2, 1, 2], format='mjd')
    t = table_types.Table([a, b, c, d], names=['a', 'b', 'c', 'd'])

    # option = 'stats'
    masked = 'masked=True ' if t.masked else ''
    out = StringIO()
    t.info('stats', out=out)
    table_header_line = '<{0} {1}length=4>'.format(t.__class__.__name__, masked)
    exp = [table_header_line,
           'name mean std min max',
           '---- ---- --- --- ---',
           '   a  1.5 0.5   1   2',
           '   b  1.5 0.5 1.0 2.0',
           '   c   --  --  --  --',
           '   d   --  -- 1.0 2.0']
    assert out.getvalue().splitlines() == exp

    # option = ['attributes', 'stats']
    tinfo = t.info(['attributes', 'stats'], out=None)
    assert tinfo.colnames == ['name', 'dtype', 'shape', 'unit', 'format', 'description',
                              'class', 'mean', 'std', 'min', 'max', 'n_bad', 'length']
    assert np.all(tinfo['mean'] == ['1.5', '1.5', '--', '--'])
    assert np.all(tinfo['std'] == ['0.5', '0.5', '--', '--'])
    assert np.all(tinfo['min'] == ['1', '1.0', '--', '1.0'])
    assert np.all(tinfo['max'] == ['2', '2.0', '--', '2.0'])

    out = StringIO()
    t.info('stats', out=out)
    exp = [table_header_line,
           'name mean std min max',
           '---- ---- --- --- ---',
           '   a  1.5 0.5   1   2',
           '   b  1.5 0.5 1.0 2.0',
           '   c   --  --  --  --',
           '   d   --  -- 1.0 2.0']
    assert out.getvalue().splitlines() == exp

    # option = ['attributes', custom]
    custom = data_info_factory(names=['sum', 'first'],
                               funcs=[np.sum, lambda col: col[0]])
    out = StringIO()
    tinfo = t.info(['attributes', custom], out=None)
    assert tinfo.colnames == ['name', 'dtype', 'shape', 'unit', 'format', 'description',
                              'class', 'sum', 'first', 'n_bad', 'length']
    assert np.all(tinfo['name'] == ['a', 'b', 'c', 'd'])
    assert np.all(tinfo['dtype'] == ['int32', 'float32', dtype_info_name('S1'), 'object'])
    assert np.all(tinfo['sum'] == ['6', '6.0', '--', '--'])
    assert np.all(tinfo['first'] == ['1', '1.0', 'a', '1.0'])
示例#4
0
def test_table_info_attributes(table_types):
    """
    Test the info() method of printing a summary of table column attributes
    """
    a = np.array([1, 2, 3], dtype='int32')
    b = np.array([1, 2, 3], dtype='float32')
    c = np.array(['a', 'c', 'e'], dtype='|S1')
    t = table_types.Table([a, b, c], names=['a', 'b', 'c'])

    # Minimal output for a typical table
    tinfo = t.info(out=None)
    subcls = ['class'] if table_types.Table.__name__ == 'MyTable' else []
    assert tinfo.colnames == ['name', 'dtype', 'shape', 'unit', 'format',
                              'description', 'class', 'n_bad', 'length']
    assert np.all(tinfo['name'] == ['a', 'b', 'c'])
    assert np.all(tinfo['dtype'] == ['int32', 'float32', dtype_info_name('S1')])
    if subcls:
        assert np.all(tinfo['class'] == ['MyColumn'] * 3)

    # All output fields including a mixin column
    t['d'] = [1, 2, 3] * u.m
    t['d'].description = 'quantity'
    t['a'].format = '%02d'
    t['e'] = time.Time([1, 2, 3], format='mjd')
    t['e'].info.description = 'time'
    t['f'] = coordinates.SkyCoord([1, 2, 3], [1, 2, 3], unit='deg')
    t['f'].info.description = 'skycoord'

    tinfo = t.info(out=None)
    assert np.all(tinfo['name'] == 'a b c d e f'.split())
    assert np.all(tinfo['dtype'] == ['int32', 'float32', dtype_info_name('S1'), 'float64',
                                     'object', 'object'])
    assert np.all(tinfo['unit'] == ['', '', '', 'm', '', 'deg,deg'])
    assert np.all(tinfo['format'] == ['%02d', '', '', '', '', ''])
    assert np.all(tinfo['description'] == ['', '', '', 'quantity', 'time', 'skycoord'])
    cls = t.ColumnClass.__name__
    assert np.all(tinfo['class'] == [cls, cls, cls, cls, 'Time', 'SkyCoord'])

    # Test that repr(t.info) is same as t.info()
    out = StringIO()
    t.info(out=out)
    assert repr(t.info) == out.getvalue()
示例#5
0
def test_dtype_info_name(input, output):
    """
    Test that dtype_info_name is giving the expected output

    Here the available types::

      'b' boolean
      'i' (signed) integer
      'u' unsigned integer
      'f' floating-point
      'c' complex-floating point
      'O' (Python) objects
      'S', 'a' (byte-)string
      'U' Unicode
      'V' raw data (void)
    """
    assert dtype_info_name(input) == output
示例#6
0
    def _pformat_col_iter(self,
                          col,
                          max_lines,
                          show_name,
                          show_unit,
                          outs,
                          show_dtype=False,
                          show_length=None):
        """Iterator which yields formatted string representation of column values.

        Parameters
        ----------
        max_lines : int
            Maximum lines of output (header + data rows)

        show_name : bool
            Include column name. Default is True.

        show_unit : bool
            Include a header row for unit.  Default is to show a row
            for units only if one or more columns has a defined value
            for the unit.

        outs : dict
            Must be a dict which is used to pass back additional values
            defined within the iterator.

        show_dtype : bool
            Include column dtype. Default is False.

        show_length : bool
            Include column length at end.  Default is to show this only
            if the column is not shown completely.
        """
        max_lines, _ = self._get_pprint_size(max_lines, -1)
        dtype = getattr(col, 'dtype', None)
        multidims = getattr(col, 'shape', [0])[1:]
        if multidims:
            multidim0 = tuple(0 for n in multidims)
            multidim1 = tuple(n - 1 for n in multidims)
            trivial_multidims = np.prod(multidims) == 1

        i_dashes = None
        i_centers = []  # Line indexes where content should be centered
        n_header = 0
        if show_name:
            i_centers.append(n_header)
            # Get column name (or 'None' if not set)
            col_name = str(col.info.name)
            n_header += 1
            yield self._name_and_structure(col_name, dtype)
        if show_unit:
            i_centers.append(n_header)
            n_header += 1
            yield str(col.info.unit or '')
        if show_dtype:
            i_centers.append(n_header)
            n_header += 1
            if dtype is not None:
                col_dtype = dtype_info_name((dtype, multidims))
            else:
                col_dtype = col.__class__.__qualname__ or 'object'
            yield col_dtype
        if show_unit or show_name or show_dtype:
            i_dashes = n_header
            n_header += 1
            yield '---'

        max_lines -= n_header
        n_print2 = max_lines // 2
        n_rows = len(col)

        # This block of code is responsible for producing the function that
        # will format values for this column.  The ``format_func`` function
        # takes two args (col_format, val) and returns the string-formatted
        # version.  Some points to understand:
        #
        # - col_format could itself be the formatting function, so it will
        #    actually end up being called with itself as the first arg.  In
        #    this case the function is expected to ignore its first arg.
        #
        # - auto_format_func is a function that gets called on the first
        #    column value that is being formatted.  It then determines an
        #    appropriate formatting function given the actual value to be
        #    formatted.  This might be deterministic or it might involve
        #    try/except.  The latter allows for different string formatting
        #    options like %f or {:5.3f}.  When auto_format_func is called it:

        #    1. Caches the function in the _format_funcs dict so for subsequent
        #       values the right function is called right away.
        #    2. Returns the formatted value.
        #
        # - possible_string_format_functions is a function that yields a
        #    succession of functions that might successfully format the
        #    value.  There is a default, but Mixin methods can override this.
        #    See Quantity for an example.
        #
        # - get_auto_format_func() returns a wrapped version of auto_format_func
        #    with the column id and possible_string_format_functions as
        #    enclosed variables.
        col_format = col.info.format or getattr(col.info, 'default_format',
                                                None)
        pssf = (getattr(col.info, 'possible_string_format_functions', None)
                or _possible_string_format_functions)
        auto_format_func = get_auto_format_func(col, pssf)
        format_func = col.info._format_funcs.get(col_format, auto_format_func)

        if len(col) > max_lines:
            if show_length is None:
                show_length = True
            i0 = n_print2 - (1 if show_length else 0)
            i1 = n_rows - n_print2 - max_lines % 2
            indices = np.concatenate(
                [np.arange(0, i0 + 1),
                 np.arange(i1 + 1, len(col))])
        else:
            i0 = -1
            indices = np.arange(len(col))

        def format_col_str(idx):
            if multidims:
                # Prevents columns like Column(data=[[(1,)],[(2,)]], name='a')
                # with shape (n,1,...,1) from being printed as if there was
                # more than one element in a row
                if trivial_multidims:
                    return format_func(col_format, col[(idx, ) + multidim0])
                else:
                    left = format_func(col_format, col[(idx, ) + multidim0])
                    right = format_func(col_format, col[(idx, ) + multidim1])
                    return f'{left} .. {right}'
            else:
                return format_func(col_format, col[idx])

        # Add formatted values if within bounds allowed by max_lines
        for idx in indices:
            if idx == i0:
                yield '...'
            else:
                try:
                    yield format_col_str(idx)
                except ValueError:
                    raise ValueError(
                        'Unable to parse format string "{}" for entry "{}" '
                        'in column "{}"'.format(col_format, col[idx],
                                                col.info.name))

        outs['show_length'] = show_length
        outs['n_header'] = n_header
        outs['i_centers'] = i_centers
        outs['i_dashes'] = i_dashes