示例#1
0
def test_group_by_errors(T1):
    """
    Appropriate errors get raised.
    """
    # Bad column name as string
    with pytest.raises(ValueError):
        T1.group_by('f')

    # Bad column names in list
    with pytest.raises(ValueError):
        T1.group_by(['f', 'g'])

    # Wrong length array
    with pytest.raises(ValueError):
        T1.group_by(np.array([1, 2]))

    # Wrong type
    with pytest.raises(TypeError):
        T1.group_by(None)

    # Masked key column
    t1 = QTable(T1, masked=True)
    t1['a'].mask[4] = True
    with pytest.raises(ValueError):
        t1.group_by('a')
示例#2
0
def test_table_aggregate(T1):
    """
    Aggregate a table
    """
    # Table with only summable cols
    t1 = T1['a', 'c', 'd']
    tg = t1.group_by('a')
    tga = tg.groups.aggregate(np.sum)
    assert tga.pformat() == [
        ' a   c    d ', '--- ---- ---', '  0  0.0   4', '  1  6.0  18',
        '  2 22.0   6'
    ]
    # Reverts to default groups
    assert np.all(tga.groups.indices == np.array([0, 3]))
    assert tga.groups.keys is None

    # metadata survives
    assert tga.meta['ta'] == 1
    assert tga['c'].meta['a'] == 1
    assert tga['c'].description == 'column c'

    # Aggregate with np.sum with masked elements.  This results
    # in one group with no elements, hence a nan result and conversion
    # to float for the 'd' column.
    t1m = QTable(T1, masked=True)
    t1m['c'].mask[4:6] = True
    t1m['d'].mask[4:6] = True
    tg = t1m.group_by('a')
    with pytest.warns(UserWarning, match="converting a masked element to nan"):
        tga = tg.groups.aggregate(np.sum)

    assert tga.pformat() == [
        ' a   c    d    q  ', '               m  ', '--- ---- ---- ----',
        '  0  nan  nan  4.0', '  1  3.0 13.0 18.0', '  2 22.0  6.0  6.0'
    ]

    # Aggregrate with np.sum with masked elements, but where every
    # group has at least one remaining (unmasked) element.  Then
    # the int column stays as an int.
    t1m = QTable(t1, masked=True)
    t1m['c'].mask[5] = True
    t1m['d'].mask[5] = True
    tg = t1m.group_by('a')
    tga = tg.groups.aggregate(np.sum)
    assert tga.pformat() == [
        ' a   c    d ', '--- ---- ---', '  0  0.0   4', '  1  3.0  13',
        '  2 22.0   6'
    ]

    # Aggregate with a column type that cannot by supplied to the aggregating
    # function.  This raises a warning but still works.
    tg = T1.group_by('a')
    with pytest.warns(AstropyUserWarning, match="Cannot aggregate column"):
        tga = tg.groups.aggregate(np.sum)
    assert tga.pformat() == [
        ' a   c    d   q  ', '              m  ', '--- ---- --- ----',
        '  0  0.0   4  4.0', '  1  6.0  18 18.0', '  2 22.0   6  6.0'
    ]
示例#3
0
def test_group_by_masked(T1):
    t1m = QTable(T1, masked=True)
    t1m['c'].mask[4] = True
    t1m['d'].mask[5] = True
    assert t1m.group_by('a').pformat() == [
        ' a   b   c   d   q ', '                 m ', '--- --- --- --- ---',
        '  0   a  --   4 4.0', '  1   b 3.0  -- 5.0', '  1   a 2.0   6 6.0',
        '  1   a 1.0   7 7.0', '  2   c 7.0   0 0.0', '  2   b 5.0   1 1.0',
        '  2   b 6.0   2 2.0', '  2   a 4.0   3 3.0'
    ]
示例#4
0
def test_grouped_slicing(T1):
    """
    Test that slicing a table removes previous grouping
    """

    for masked in (False, True):
        t1 = QTable(T1, masked=masked)

        # Regular slice of a table
        tg = t1.group_by('a')
        tg2 = tg[3:5]
        assert np.all(tg2.groups.indices == np.array([0, len(tg2)]))
        assert tg2.groups.keys is None
示例#5
0
def test_group_mixins():
    """
    Test grouping a table with mixin columns
    """
    # Setup mixins
    idx = np.arange(4)
    x = np.array([3., 1., 2., 1.])
    q = x * u.m
    lon = coordinates.Longitude(x * u.deg)
    lat = coordinates.Latitude(x * u.deg)
    # For Time do J2000.0 + few * 0.1 ns (this requires > 64 bit precision)
    tm = time.Time(2000, format='jyear') + time.TimeDelta(x * 1e-10,
                                                          format='sec')
    sc = coordinates.SkyCoord(ra=lon, dec=lat)
    aw = table_helpers.ArrayWrapper(x)
    nd = np.array([(3, 'c'), (1, 'a'), (2, 'b'), (1, 'a')],
                  dtype='<i4,|S1').view(NdarrayMixin)

    qt = QTable([idx, x, q, lon, lat, tm, sc, aw, nd],
                names=['idx', 'x', 'q', 'lon', 'lat', 'tm', 'sc', 'aw', 'nd'])

    # Test group_by with each supported mixin type
    mixin_keys = ['x', 'q', 'lon', 'lat', 'tm', 'sc', 'aw', 'nd']
    for key in mixin_keys:
        qtg = qt.group_by(key)

        # Test that it got the sort order correct
        assert np.all(qtg['idx'] == [1, 3, 2, 0])

        # Test that the groups are right
        # Note: skip testing SkyCoord column because that doesn't have equality
        for name in ['x', 'q', 'lon', 'lat', 'tm', 'aw', 'nd']:
            assert np.all(qt[name][[1, 3]] == qtg.groups[0][name])
            assert np.all(qt[name][[2]] == qtg.groups[1][name])
            assert np.all(qt[name][[0]] == qtg.groups[2][name])

    # Test that unique also works with mixins since most of the work is
    # done with group_by().  This is using *every* mixin as key.
    uqt = unique(qt, keys=mixin_keys)
    assert len(uqt) == 3
    assert np.all(uqt['idx'] == [1, 2, 0])
    assert np.all(uqt['x'] == [1., 2., 3.])

    # Column group_by() with mixins
    idxg = qt['idx'].group_by(qt[mixin_keys])
    assert np.all(idxg == [1, 3, 2, 0])
示例#6
0
def test_group_mixins():
    """
    Test grouping a table with mixin columns
    """
    # Setup mixins
    idx = np.arange(4)
    x = np.array([3., 1., 2., 1.])
    q = x * u.m
    lon = coordinates.Longitude(x * u.deg)
    lat = coordinates.Latitude(x * u.deg)
    # For Time do J2000.0 + few * 0.1 ns (this requires > 64 bit precision)
    tm = time.Time(2000, format='jyear') + time.TimeDelta(x * 1e-10, format='sec')
    sc = coordinates.SkyCoord(ra=lon, dec=lat)
    aw = table_helpers.ArrayWrapper(x)
    nd = np.array([(3, 'c'), (1, 'a'), (2, 'b'), (1, 'a')],
                  dtype='<i4,|S1').view(NdarrayMixin)

    qt = QTable([idx, x, q, lon, lat, tm, sc, aw, nd],
                names=['idx', 'x', 'q', 'lon', 'lat', 'tm', 'sc', 'aw', 'nd'])

    # Test group_by with each supported mixin type
    mixin_keys = ['x', 'q', 'lon', 'lat', 'tm', 'sc', 'aw', 'nd']
    for key in mixin_keys:
        qtg = qt.group_by(key)

        # Test that it got the sort order correct
        assert np.all(qtg['idx'] == [1, 3, 2, 0])

        # Test that the groups are right
        # Note: skip testing SkyCoord column because that doesn't have equality
        for name in ['x', 'q', 'lon', 'lat', 'tm', 'aw', 'nd']:
            assert np.all(qt[name][[1, 3]] == qtg.groups[0][name])
            assert np.all(qt[name][[2]] == qtg.groups[1][name])
            assert np.all(qt[name][[0]] == qtg.groups[2][name])

    # Test that unique also works with mixins since most of the work is
    # done with group_by().  This is using *every* mixin as key.
    uqt = unique(qt, keys=mixin_keys)
    assert len(uqt) == 3
    assert np.all(uqt['idx'] == [1, 2, 0])
    assert np.all(uqt['x'] == [1., 2., 3.])

    # Column group_by() with mixins
    idxg = qt['idx'].group_by(qt[mixin_keys])
    assert np.all(idxg == [1, 3, 2, 0])
示例#7
0
def test_grouped_copy(T1):
    """
    Test that copying a table or column copies the groups properly
    """
    for masked in (False, True):
        t1 = QTable(T1, masked=masked)
        tg = t1.group_by('a')
        tgc = tg.copy()
        assert np.all(tgc.groups.indices == tg.groups.indices)
        assert np.all(tgc.groups.keys == tg.groups.keys)

        tac = tg['a'].copy()
        assert np.all(tac.groups.indices == tg['a'].groups.indices)

        c1 = t1['a'].copy()
        gc1 = c1.group_by(t1['a'])
        gc1c = gc1.copy()
        assert np.all(gc1c.groups.indices == np.array([0, 1, 4, 8]))
示例#8
0
def test_mutable_operations(T1):
    """
    Operations like adding or deleting a row should removing grouping,
    but adding or removing or renaming a column should retain grouping.
    """
    for masked in (False, True):
        t1 = QTable(T1, masked=masked)

        # add row
        tg = t1.group_by('a')
        tg.add_row((0, 'a', 3.0, 4, 4 * u.m))
        assert np.all(tg.groups.indices == np.array([0, len(tg)]))
        assert tg.groups.keys is None

        # remove row
        tg = t1.group_by('a')
        tg.remove_row(4)
        assert np.all(tg.groups.indices == np.array([0, len(tg)]))
        assert tg.groups.keys is None

        # add column
        tg = t1.group_by('a')
        indices = tg.groups.indices.copy()
        tg.add_column(Column(name='e', data=np.arange(len(tg))))
        assert np.all(tg.groups.indices == indices)
        assert np.all(tg['e'].groups.indices == indices)
        assert np.all(tg['e'].groups.keys == tg.groups.keys)

        # remove column (not key column)
        tg = t1.group_by('a')
        tg.remove_column('b')
        assert np.all(tg.groups.indices == indices)
        # Still has original key col names
        assert tg.groups.keys.dtype.names == ('a', )
        assert np.all(tg['a'].groups.indices == indices)

        # remove key column
        tg = t1.group_by('a')
        tg.remove_column('a')
        assert np.all(tg.groups.indices == indices)
        assert tg.groups.keys.dtype.names == ('a', )
        assert np.all(tg['b'].groups.indices == indices)

        # rename key column
        tg = t1.group_by('a')
        tg.rename_column('a', 'aa')
        assert np.all(tg.groups.indices == indices)
        assert tg.groups.keys.dtype.names == ('a', )
        assert np.all(tg['aa'].groups.indices == indices)
示例#9
0
def match_lines(surveyTable, columns_array):

    surveyTable_group = surveyTable.group_by('NED source name')
    selectedTable = QTable()
    for source in surveyTable_group.groups.keys['NED source name']:
        mask = surveyTable_group.groups.keys['NED source name'] == source
        sourceTable = surveyTable_group.groups[mask]
        conditions = []
        for columns in columns_array:
            conditions_temp = []
            for column in columns:
                condition = (True in sourceTable[column])
                conditions_temp.append(condition)
            conditions.append((any(conditions_temp)))
        if all(conditions):
            if len(selectedTable) == 0:
                selectedTable = sourceTable.copy()
            else:
                selectedTable = vstack([selectedTable, sourceTable])

    sourcelist = pd.DataFrame()
    if len(selectedTable) == 0:
        print('No target found')
    else:
        names = selectedTable.group_by(
            'NED source name').groups.keys['NED source name']
        names = list(names)
        RAs = []
        Decs = []
        for source in names:
            mask = surveyTable_group.groups.keys['NED source name'] == source
            sourceTable = surveyTable_group.groups[mask]
            RAs.append(sourceTable['ALMA RA'][0])
            Decs.append(sourceTable['ALMA Dec'][0])

        sourcelist['name'] = names
        sourcelist['RA'] = RAs
        sourcelist['Dec'] = Decs

    return selectedTable, sourcelist
示例#10
0
def test_table_group_by(T1):
    """
    Test basic table group_by functionality for possible key types and for
    masked/unmasked tables.
    """
    for masked in (False, True):
        t1 = QTable(T1, masked=masked)
        # Group by a single column key specified by name
        tg = t1.group_by('a')
        assert np.all(tg.groups.indices == np.array([0, 1, 4, 8]))
        assert str(tg.groups) == "<TableGroups indices=[0 1 4 8]>"
        assert str(tg['a'].groups) == "<ColumnGroups indices=[0 1 4 8]>"

        # Sorted by 'a' and in original order for rest
        assert tg.pformat() == [
            ' a   b   c   d   q ', '                 m ',
            '--- --- --- --- ---', '  0   a 0.0   4 4.0',
            '  1   b 3.0   5 5.0', '  1   a 2.0   6 6.0',
            '  1   a 1.0   7 7.0', '  2   c 7.0   0 0.0',
            '  2   b 5.0   1 1.0', '  2   b 6.0   2 2.0', '  2   a 4.0   3 3.0'
        ]
        assert tg.meta['ta'] == 1
        assert tg['c'].meta['a'] == 1
        assert tg['c'].description == 'column c'

        # Group by a table column
        tg2 = t1.group_by(t1['a'])
        assert tg.pformat() == tg2.pformat()

        # Group by two columns spec'd by name
        for keys in (['a', 'b'], ('a', 'b')):
            tg = t1.group_by(keys)
            assert np.all(tg.groups.indices == np.array([0, 1, 3, 4, 5, 7, 8]))
            # Sorted by 'a', 'b' and in original order for rest
            assert tg.pformat() == [
                ' a   b   c   d   q ', '                 m ',
                '--- --- --- --- ---', '  0   a 0.0   4 4.0',
                '  1   a 2.0   6 6.0', '  1   a 1.0   7 7.0',
                '  1   b 3.0   5 5.0', '  2   a 4.0   3 3.0',
                '  2   b 5.0   1 1.0', '  2   b 6.0   2 2.0',
                '  2   c 7.0   0 0.0'
            ]

        # Group by a Table
        tg2 = t1.group_by(t1['a', 'b'])
        assert tg.pformat() == tg2.pformat()

        # Group by a structured array
        tg2 = t1.group_by(t1['a', 'b'].as_array())
        assert tg.pformat() == tg2.pformat()

        # Group by a simple ndarray
        tg = t1.group_by(np.array([0, 1, 0, 1, 2, 1, 0, 0]))
        assert np.all(tg.groups.indices == np.array([0, 4, 7, 8]))
        assert tg.pformat() == [
            ' a   b   c   d   q ', '                 m ',
            '--- --- --- --- ---', '  2   c 7.0   0 0.0',
            '  2   b 6.0   2 2.0', '  1   a 2.0   6 6.0',
            '  1   a 1.0   7 7.0', '  2   b 5.0   1 1.0',
            '  2   a 4.0   3 3.0', '  1   b 3.0   5 5.0', '  0   a 0.0   4 4.0'
        ]