def test_group_by_errors(T1): """ Appropriate errors get raised. """ # Bad column name as string with pytest.raises(ValueError): T1.group_by('f') # Bad column names in list with pytest.raises(ValueError): T1.group_by(['f', 'g']) # Wrong length array with pytest.raises(ValueError): T1.group_by(np.array([1, 2])) # Wrong type with pytest.raises(TypeError): T1.group_by(None) # Masked key column t1 = QTable(T1, masked=True) t1['a'].mask[4] = True with pytest.raises(ValueError): t1.group_by('a')
def test_table_aggregate(T1): """ Aggregate a table """ # Table with only summable cols t1 = T1['a', 'c', 'd'] tg = t1.group_by('a') tga = tg.groups.aggregate(np.sum) assert tga.pformat() == [ ' a c d ', '--- ---- ---', ' 0 0.0 4', ' 1 6.0 18', ' 2 22.0 6' ] # Reverts to default groups assert np.all(tga.groups.indices == np.array([0, 3])) assert tga.groups.keys is None # metadata survives assert tga.meta['ta'] == 1 assert tga['c'].meta['a'] == 1 assert tga['c'].description == 'column c' # Aggregate with np.sum with masked elements. This results # in one group with no elements, hence a nan result and conversion # to float for the 'd' column. t1m = QTable(T1, masked=True) t1m['c'].mask[4:6] = True t1m['d'].mask[4:6] = True tg = t1m.group_by('a') with pytest.warns(UserWarning, match="converting a masked element to nan"): tga = tg.groups.aggregate(np.sum) assert tga.pformat() == [ ' a c d q ', ' m ', '--- ---- ---- ----', ' 0 nan nan 4.0', ' 1 3.0 13.0 18.0', ' 2 22.0 6.0 6.0' ] # Aggregrate with np.sum with masked elements, but where every # group has at least one remaining (unmasked) element. Then # the int column stays as an int. t1m = QTable(t1, masked=True) t1m['c'].mask[5] = True t1m['d'].mask[5] = True tg = t1m.group_by('a') tga = tg.groups.aggregate(np.sum) assert tga.pformat() == [ ' a c d ', '--- ---- ---', ' 0 0.0 4', ' 1 3.0 13', ' 2 22.0 6' ] # Aggregate with a column type that cannot by supplied to the aggregating # function. This raises a warning but still works. tg = T1.group_by('a') with pytest.warns(AstropyUserWarning, match="Cannot aggregate column"): tga = tg.groups.aggregate(np.sum) assert tga.pformat() == [ ' a c d q ', ' m ', '--- ---- --- ----', ' 0 0.0 4 4.0', ' 1 6.0 18 18.0', ' 2 22.0 6 6.0' ]
def test_group_by_masked(T1): t1m = QTable(T1, masked=True) t1m['c'].mask[4] = True t1m['d'].mask[5] = True assert t1m.group_by('a').pformat() == [ ' a b c d q ', ' m ', '--- --- --- --- ---', ' 0 a -- 4 4.0', ' 1 b 3.0 -- 5.0', ' 1 a 2.0 6 6.0', ' 1 a 1.0 7 7.0', ' 2 c 7.0 0 0.0', ' 2 b 5.0 1 1.0', ' 2 b 6.0 2 2.0', ' 2 a 4.0 3 3.0' ]
def test_grouped_slicing(T1): """ Test that slicing a table removes previous grouping """ for masked in (False, True): t1 = QTable(T1, masked=masked) # Regular slice of a table tg = t1.group_by('a') tg2 = tg[3:5] assert np.all(tg2.groups.indices == np.array([0, len(tg2)])) assert tg2.groups.keys is None
def test_group_mixins(): """ Test grouping a table with mixin columns """ # Setup mixins idx = np.arange(4) x = np.array([3., 1., 2., 1.]) q = x * u.m lon = coordinates.Longitude(x * u.deg) lat = coordinates.Latitude(x * u.deg) # For Time do J2000.0 + few * 0.1 ns (this requires > 64 bit precision) tm = time.Time(2000, format='jyear') + time.TimeDelta(x * 1e-10, format='sec') sc = coordinates.SkyCoord(ra=lon, dec=lat) aw = table_helpers.ArrayWrapper(x) nd = np.array([(3, 'c'), (1, 'a'), (2, 'b'), (1, 'a')], dtype='<i4,|S1').view(NdarrayMixin) qt = QTable([idx, x, q, lon, lat, tm, sc, aw, nd], names=['idx', 'x', 'q', 'lon', 'lat', 'tm', 'sc', 'aw', 'nd']) # Test group_by with each supported mixin type mixin_keys = ['x', 'q', 'lon', 'lat', 'tm', 'sc', 'aw', 'nd'] for key in mixin_keys: qtg = qt.group_by(key) # Test that it got the sort order correct assert np.all(qtg['idx'] == [1, 3, 2, 0]) # Test that the groups are right # Note: skip testing SkyCoord column because that doesn't have equality for name in ['x', 'q', 'lon', 'lat', 'tm', 'aw', 'nd']: assert np.all(qt[name][[1, 3]] == qtg.groups[0][name]) assert np.all(qt[name][[2]] == qtg.groups[1][name]) assert np.all(qt[name][[0]] == qtg.groups[2][name]) # Test that unique also works with mixins since most of the work is # done with group_by(). This is using *every* mixin as key. uqt = unique(qt, keys=mixin_keys) assert len(uqt) == 3 assert np.all(uqt['idx'] == [1, 2, 0]) assert np.all(uqt['x'] == [1., 2., 3.]) # Column group_by() with mixins idxg = qt['idx'].group_by(qt[mixin_keys]) assert np.all(idxg == [1, 3, 2, 0])
def test_grouped_copy(T1): """ Test that copying a table or column copies the groups properly """ for masked in (False, True): t1 = QTable(T1, masked=masked) tg = t1.group_by('a') tgc = tg.copy() assert np.all(tgc.groups.indices == tg.groups.indices) assert np.all(tgc.groups.keys == tg.groups.keys) tac = tg['a'].copy() assert np.all(tac.groups.indices == tg['a'].groups.indices) c1 = t1['a'].copy() gc1 = c1.group_by(t1['a']) gc1c = gc1.copy() assert np.all(gc1c.groups.indices == np.array([0, 1, 4, 8]))
def test_mutable_operations(T1): """ Operations like adding or deleting a row should removing grouping, but adding or removing or renaming a column should retain grouping. """ for masked in (False, True): t1 = QTable(T1, masked=masked) # add row tg = t1.group_by('a') tg.add_row((0, 'a', 3.0, 4, 4 * u.m)) assert np.all(tg.groups.indices == np.array([0, len(tg)])) assert tg.groups.keys is None # remove row tg = t1.group_by('a') tg.remove_row(4) assert np.all(tg.groups.indices == np.array([0, len(tg)])) assert tg.groups.keys is None # add column tg = t1.group_by('a') indices = tg.groups.indices.copy() tg.add_column(Column(name='e', data=np.arange(len(tg)))) assert np.all(tg.groups.indices == indices) assert np.all(tg['e'].groups.indices == indices) assert np.all(tg['e'].groups.keys == tg.groups.keys) # remove column (not key column) tg = t1.group_by('a') tg.remove_column('b') assert np.all(tg.groups.indices == indices) # Still has original key col names assert tg.groups.keys.dtype.names == ('a', ) assert np.all(tg['a'].groups.indices == indices) # remove key column tg = t1.group_by('a') tg.remove_column('a') assert np.all(tg.groups.indices == indices) assert tg.groups.keys.dtype.names == ('a', ) assert np.all(tg['b'].groups.indices == indices) # rename key column tg = t1.group_by('a') tg.rename_column('a', 'aa') assert np.all(tg.groups.indices == indices) assert tg.groups.keys.dtype.names == ('a', ) assert np.all(tg['aa'].groups.indices == indices)
def match_lines(surveyTable, columns_array): surveyTable_group = surveyTable.group_by('NED source name') selectedTable = QTable() for source in surveyTable_group.groups.keys['NED source name']: mask = surveyTable_group.groups.keys['NED source name'] == source sourceTable = surveyTable_group.groups[mask] conditions = [] for columns in columns_array: conditions_temp = [] for column in columns: condition = (True in sourceTable[column]) conditions_temp.append(condition) conditions.append((any(conditions_temp))) if all(conditions): if len(selectedTable) == 0: selectedTable = sourceTable.copy() else: selectedTable = vstack([selectedTable, sourceTable]) sourcelist = pd.DataFrame() if len(selectedTable) == 0: print('No target found') else: names = selectedTable.group_by( 'NED source name').groups.keys['NED source name'] names = list(names) RAs = [] Decs = [] for source in names: mask = surveyTable_group.groups.keys['NED source name'] == source sourceTable = surveyTable_group.groups[mask] RAs.append(sourceTable['ALMA RA'][0]) Decs.append(sourceTable['ALMA Dec'][0]) sourcelist['name'] = names sourcelist['RA'] = RAs sourcelist['Dec'] = Decs return selectedTable, sourcelist
def test_table_group_by(T1): """ Test basic table group_by functionality for possible key types and for masked/unmasked tables. """ for masked in (False, True): t1 = QTable(T1, masked=masked) # Group by a single column key specified by name tg = t1.group_by('a') assert np.all(tg.groups.indices == np.array([0, 1, 4, 8])) assert str(tg.groups) == "<TableGroups indices=[0 1 4 8]>" assert str(tg['a'].groups) == "<ColumnGroups indices=[0 1 4 8]>" # Sorted by 'a' and in original order for rest assert tg.pformat() == [ ' a b c d q ', ' m ', '--- --- --- --- ---', ' 0 a 0.0 4 4.0', ' 1 b 3.0 5 5.0', ' 1 a 2.0 6 6.0', ' 1 a 1.0 7 7.0', ' 2 c 7.0 0 0.0', ' 2 b 5.0 1 1.0', ' 2 b 6.0 2 2.0', ' 2 a 4.0 3 3.0' ] assert tg.meta['ta'] == 1 assert tg['c'].meta['a'] == 1 assert tg['c'].description == 'column c' # Group by a table column tg2 = t1.group_by(t1['a']) assert tg.pformat() == tg2.pformat() # Group by two columns spec'd by name for keys in (['a', 'b'], ('a', 'b')): tg = t1.group_by(keys) assert np.all(tg.groups.indices == np.array([0, 1, 3, 4, 5, 7, 8])) # Sorted by 'a', 'b' and in original order for rest assert tg.pformat() == [ ' a b c d q ', ' m ', '--- --- --- --- ---', ' 0 a 0.0 4 4.0', ' 1 a 2.0 6 6.0', ' 1 a 1.0 7 7.0', ' 1 b 3.0 5 5.0', ' 2 a 4.0 3 3.0', ' 2 b 5.0 1 1.0', ' 2 b 6.0 2 2.0', ' 2 c 7.0 0 0.0' ] # Group by a Table tg2 = t1.group_by(t1['a', 'b']) assert tg.pformat() == tg2.pformat() # Group by a structured array tg2 = t1.group_by(t1['a', 'b'].as_array()) assert tg.pformat() == tg2.pformat() # Group by a simple ndarray tg = t1.group_by(np.array([0, 1, 0, 1, 2, 1, 0, 0])) assert np.all(tg.groups.indices == np.array([0, 4, 7, 8])) assert tg.pformat() == [ ' a b c d q ', ' m ', '--- --- --- --- ---', ' 2 c 7.0 0 0.0', ' 2 b 6.0 2 2.0', ' 1 a 2.0 6 6.0', ' 1 a 1.0 7 7.0', ' 2 b 5.0 1 1.0', ' 2 a 4.0 3 3.0', ' 1 b 3.0 5 5.0', ' 0 a 0.0 4 4.0' ]