def testUnderscoreColumnFormat(self): """Test the per-filter column format with an underscore""" config = TransformObjectCatalogConfig() config.outputBands = ["g", "r", "i"] config.camelCase = False task = TransformObjectCatalogTask(config=config) funcs = {'Fwhm': HsmFwhm(dataset='meas')} df = task.run(self.parq, funcs=funcs, dataId=self.dataId) self.assertIsInstance(df, pd.DataFrame) for filt in config.outputBands: self.assertIn(filt + '_Fwhm', df.columns)
def testUnderscoreColumnFormat(self): """Test the per-filter column format with an underscore""" config = TransformObjectCatalogConfig() filterMap = {"HSC-G": "g", "HSC-R": "r", "HSC-I": "i"} config.filterMap = filterMap config.camelCase = False task = TransformObjectCatalogTask(config=config) funcs = {'Fwhm': HsmFwhm(dataset='meas')} df = task.run(self.parq, funcs=funcs, dataId=self.dataId) self.assertIsInstance(df, pd.DataFrame) for filt in filterMap.values(): self.assertIn(filt + '_Fwhm', df.columns)
def testNoOutputBands(self): """All the input bands should go into the output, and nothing else. """ config = TransformObjectCatalogConfig() config.multilevelOutput = True task = TransformObjectCatalogTask(config=config) funcs = {'Fwhm': HsmFwhm(dataset='meas')} df = task.run(self.parq, funcs=funcs, dataId=self.dataId) self.assertIsInstance(df, pd.DataFrame) self.assertNotIn('HSC-G', df) for filt in ['g', 'r', 'i']: self.assertIsInstance(df[filt], pd.DataFrame) self.assertIn('Fwhm', df[filt].columns)
def testMultilevelOutput(self): """Test the non-flattened result dataframe with a multilevel column index""" config = TransformObjectCatalogConfig() config.outputBands = ["r", "i"] config.multilevelOutput = True task = TransformObjectCatalogTask(config=config) funcs = {'Fwhm': HsmFwhm(dataset='meas')} df = task.run(self.parq, funcs=funcs, dataId=self.dataId) self.assertIsInstance(df, pd.DataFrame) self.assertNotIn('g', df) for filt in config.outputBands: self.assertIsInstance(df[filt], pd.DataFrame) self.assertIn('Fwhm', df[filt].columns)
def setUp(self): self.parq = MultilevelParquetTable(os.path.join( ROOT, self.catFilename)) self.filters = self.parq.columnLevelNames['filter'] self.task = TransformObjectCatalogTask() self.shortFilters = [ f for k, f in self.task.config.filterMap.items() if k in self.filters ] self.task.config.functorFile = self.yamlFile self.funcs = self.task.getFunctors() self.columnNames = list(self.funcs.funcDict.keys()) self.columnNames += list(PostprocessAnalysis._defaultFlags) self.noDupCols = [k for k, f in self.funcs.funcDict.items() if f.noDup]
def testNullFilter(self): """Test that columns for all filters are created despite they may not exist in the input data. """ config = TransformObjectCatalogConfig() config.camelCase = True # Want y band columns despite the input data do not have them # Exclude g band columns despite the input data have them config.outputBands = ["r", "i", "y"] # Arbitrarily choose a boolean flag column to be "good" config.goodFlags = ['GoodFlagColumn'] task = TransformObjectCatalogTask(config=config) # Add in a float column, an integer column, a good flag, and # a bad flag. It does not matter which columns we choose, just # that they have the appropriate type. funcs = { 'FloatColumn': HsmFwhm(dataset='meas'), 'IntColumn': Column('base_InputCount_value', dataset='meas'), 'GoodFlagColumn': Column('slot_GaussianFlux_flag', dataset='meas'), 'BadFlagColumn': Column('slot_Centroid_flag', dataset='meas') } df = task.run(self.parq, funcs=funcs, dataId=self.dataId) self.assertIsInstance(df, pd.DataFrame) for filt in config.outputBands: self.assertIn(filt + 'FloatColumn', df.columns) self.assertIn(filt + 'IntColumn', df.columns) self.assertIn(filt + 'BadFlagColumn', df.columns) self.assertIn(filt + 'GoodFlagColumn', df.columns) # Check that the default filling has worked. self.assertNotIn('gFloatColumn', df.columns) self.assertTrue(df['yFloatColumn'].isnull().all()) self.assertTrue(df['iFloatColumn'].notnull().all()) self.assertTrue(np.all(df['iIntColumn'].values >= 0)) self.assertTrue(np.all(df['yIntColumn'].values < 0)) self.assertTrue(np.all(~df['yGoodFlagColumn'].values)) self.assertTrue(np.all(df['yBadFlagColumn'].values)) # Check that the datatypes are preserved. self.assertEqual(df['iFloatColumn'].dtype, np.dtype(np.float64)) self.assertEqual(df['yFloatColumn'].dtype, np.dtype(np.float64)) self.assertEqual(df['iIntColumn'].dtype, np.dtype(np.int64)) self.assertEqual(df['yIntColumn'].dtype, np.dtype(np.int64)) self.assertEqual(df['iGoodFlagColumn'].dtype, np.dtype(np.bool_)) self.assertEqual(df['yGoodFlagColumn'].dtype, np.dtype(np.bool_)) self.assertEqual(df['iBadFlagColumn'].dtype, np.dtype(np.bool_)) self.assertEqual(df['yBadFlagColumn'].dtype, np.dtype(np.bool_))
def testNullFilter(self): """Test that columns for all filters are created despite they may not exist in the input data. """ config = TransformObjectCatalogConfig() # Want y band columns despite the input data do not have them # Exclude g band columns despite the input data have them config.outputBands = ["r", "i", "y"] task = TransformObjectCatalogTask(config=config) funcs = {'Fwhm': HsmFwhm(dataset='meas')} df = task.run(self.parq, funcs=funcs, dataId=self.dataId) self.assertIsInstance(df, pd.DataFrame) for column in ('coord_ra', 'coord_dec'): self.assertIn(column, df.columns) for filt in config.outputBands: self.assertIn(filt + 'Fwhm', df.columns) self.assertNotIn('gFwhm', df.columns) self.assertTrue(df['yFwhm'].isnull().all()) self.assertTrue(df['iFwhm'].notnull().all())
#!/usr/bin/env python from lsst.pipe.tasks.postprocess import TransformObjectCatalogTask TransformObjectCatalogTask.parseAndRun()
class PostprocessTestCase(unittest.TestCase): catFilename = 'multilevel_test.parq' yamlFile = 'testFunc.yaml' def setUp(self): self.parq = MultilevelParquetTable(os.path.join( ROOT, self.catFilename)) self.filters = self.parq.columnLevelNames['filter'] self.task = TransformObjectCatalogTask() self.shortFilters = [ f for k, f in self.task.config.filterMap.items() if k in self.filters ] self.task.config.functorFile = self.yamlFile self.funcs = self.task.getFunctors() self.columnNames = list(self.funcs.funcDict.keys()) self.columnNames += list(PostprocessAnalysis._defaultFlags) self.noDupCols = [k for k, f in self.funcs.funcDict.items() if f.noDup] def tearDown(self): del self.parq def checkMultiLevelResults(self, df, dataId=None): assert type(df.columns) == pd.core.indexes.multi.MultiIndex assert len(df.columns.levels) == 2 # Make sure two levels assert df.columns.names == ['filter', 'column'] # Make sure the correct columns are there assert all([f in df.columns.levels[0] for f in self.filters]) assert all([c in df.columns.levels[1] for c in self.columnNames]) if dataId is not None: for k in dataId.keys(): assert all([k in df[f].columns for f in self.filters]) def checkFlatResults(self, df, dataId=None): assert type(df.columns) == pd.core.indexes.base.Index noDupCols = list(self.noDupCols) # Copy if dataId is not None: noDupCols += list(dataId.keys()) assert all([c in df.columns for c in self.noDupCols]) missing = [] for filt, col in itertools.product(self.shortFilters, self.columnNames): if col not in self.noDupCols: mungedCol = '{0}_{1}'.format(filt, col) if mungedCol not in df.columns: missing.append(mungedCol) assert len(missing) == 0 def testRun(self): dataId = {'patch': '4,4'} # Test with multilevel output self.task.config.multilevelOutput = True df = self.task.run(self.parq) self.checkMultiLevelResults(df) df = self.task.run(self.parq, funcs=self.funcs) self.checkMultiLevelResults(df) df = self.task.run(self.parq, dataId=dataId) self.checkMultiLevelResults(df) # Test with flat output self.task.config.multilevelOutput = False df = self.task.run(self.parq) self.checkFlatResults(df) df = self.task.run(self.parq, funcs=self.funcs) self.checkFlatResults(df) df = self.task.run(self.parq, dataId=dataId) self.checkFlatResults(df)