Exemplo n.º 1
0
    def test_storeSources(self):
        """Store and retrieve DiaSources."""
        config = PpdbConfig(db_url="sqlite:///",
                            isolation_level="READ_UNCOMMITTED",
                            read_sources_months=12,
                            read_forced_sources_months=12)
        ppdb = Ppdb(config)
        ppdb.makeSchema()

        pixel_ranges = _makePixelRanges()
        visit_time = datetime.datetime.now()

        # have to store Objects first
        if self.use_pandas:
            objects = _makeObjectCatalogPandas(pixel_ranges)
            catalog, oids = _makeSourceCatalogPandas(objects)
        else:
            objects = _makeObjectCatalog(pixel_ranges)
            catalog, oids = _makeSourceCatalog(objects)

        # save the objects
        ppdb.storeDiaObjects(objects, visit_time)

        # save the sources
        ppdb.storeDiaSources(catalog)

        # read it back and check sizes
        res = ppdb.getDiaSourcesInRegion(pixel_ranges, visit_time, self.use_pandas)
        self._assertCatalog(res, len(catalog), type=self.data_type)

        # read it back using different method
        res = ppdb.getDiaSources(oids, visit_time, self.use_pandas)
        self._assertCatalog(res, len(catalog), type=self.data_type)
Exemplo n.º 2
0
 def test_makeSchema(self):
     """Test for making an instance of Ppdb using in-memory sqlite engine.
     """
     # sqlite does not support default READ_COMMITTED, for in-memory
     # database have to use connection pool
     config = PpdbConfig(db_url="sqlite://",
                         isolation_level="READ_UNCOMMITTED")
     ppdb = Ppdb(config)
     # the essence of a test here is that there are no exceptions.
     ppdb.makeSchema()
 def setUp(self):
     self.ppdbCfg = PpdbConfig()
     # Create DB in memory.
     self.ppdbCfg.db_url = 'sqlite://'
     self.ppdbCfg.isolation_level = "READ_UNCOMMITTED"
     self.ppdbCfg.dia_object_index = "baseline"
     self.ppdbCfg.dia_object_columns = []
     self.ppdb = Ppdb(
         config=self.ppdbCfg,
         afw_schemas=dict(DiaObject=afwTable.SourceTable.makeMinimalSchema(),
                          DiaSource=afwTable.SourceTable.makeMinimalSchema()))
     self.ppdb._schema.makeSchema()
class TestApVerifyQueries(unittest.TestCase):

    def setUp(self):
        self.ppdbCfg = PpdbConfig()
        # Create DB in memory.
        self.ppdbCfg.db_url = 'sqlite://'
        self.ppdbCfg.isolation_level = "READ_UNCOMMITTED"
        self.ppdbCfg.dia_object_index = "baseline"
        self.ppdbCfg.dia_object_columns = []
        self.ppdb = Ppdb(
            config=self.ppdbCfg,
            afw_schemas=dict(DiaObject=afwTable.SourceTable.makeMinimalSchema(),
                             DiaSource=afwTable.SourceTable.makeMinimalSchema()))
        self.ppdb._schema.makeSchema()

    def tearDown(self):
        del self.ppdb

    def test_count_zero_objects(self):
        value = countUnassociatedObjects(self.ppdb)
        self.assertEqual(value, 0)

    def test_count_objects(self):
        n_created = 5
        sources = createTestObjects(n_created, {'nDiaSources': 'I'})
        sources[-1]['nDiaSources'] = 2

        # nsecs must be an integer, not 1.4e18
        dateTime = dafBase.DateTime(nsecs=1400000000 * 10**9)
        self.ppdb.storeDiaObjects(sources, dateTime.toPython())

        value = countUnassociatedObjects(self.ppdb)
        self.assertEqual(n_created - 1, value)

    @staticmethod
    def _makeVisitInfo(exposureId):
        # Real VisitInfo hard to create
        visitInfo = unittest.mock.NonCallableMock(
            afwImage.VisitInfo,
            **{"getExposureId.return_value": exposureId}
        )
        return visitInfo

    def test_isExposureProcessed(self):
        n_created = 5
        sources = createTestObjects(n_created, {'ccdVisitId': 'I'})
        for source in sources:
            source['ccdVisitId'] = 2381

        self.ppdb.storeDiaSources(sources)

        self.assertTrue(isVisitProcessed(self.ppdb, TestApVerifyQueries._makeVisitInfo(2381)))
        self.assertFalse(isVisitProcessed(self.ppdb, TestApVerifyQueries._makeVisitInfo(42)))
Exemplo n.º 5
0
    def test_emptyGetsBaseline0months(self):
        """Test for getting data from empty database.

        All get() methods should return empty results, only useful for
        checking that code is not broken.
        """

        # set read_sources_months to 0 so that Forced/Sources are None
        config = PpdbConfig(db_url="sqlite:///",
                            isolation_level="READ_UNCOMMITTED",
                            read_sources_months=0,
                            read_forced_sources_months=0)
        ppdb = Ppdb(config)
        ppdb.makeSchema()

        pixel_ranges = _makePixelRanges()
        visit_time = datetime.datetime.now()

        # get objects by region
        res = ppdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas)
        self._assertCatalog(res, 0, type=self.data_type)

        # get sources by region
        res = ppdb.getDiaSourcesInRegion(pixel_ranges, visit_time, return_pandas=self.use_pandas)
        self.assertIs(res, None)

        # get sources by object ID, empty object list
        res = ppdb.getDiaSources([], visit_time, return_pandas=self.use_pandas)

        # get forced sources by object ID, empty object list
        res = ppdb.getDiaForcedSources([], visit_time, return_pandas=self.use_pandas)
        self.assertIs(res, None)
Exemplo n.º 6
0
    def test_storeObjectsLast(self):
        """Store and retrieve DiaObjects using DiaObjectLast table."""
        # don't care about sources.
        config = PpdbConfig(db_url="sqlite:///",
                            isolation_level="READ_UNCOMMITTED",
                            dia_object_index="last_object_table",
                            object_last_replace=True)
        ppdb = Ppdb(config)
        ppdb.makeSchema()

        pixel_ranges = _makePixelRanges()
        visit_time = datetime.datetime.now()

        # make afw catalog with Objects
        if self.use_pandas:
            catalog = _makeObjectCatalogPandas(pixel_ranges)
        else:
            catalog = _makeObjectCatalog(pixel_ranges)

        # store catalog
        ppdb.storeDiaObjects(catalog, visit_time)

        # read it back and check sizes
        res = ppdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas)
        self._assertCatalog(res, len(catalog), type=self.data_type)
Exemplo n.º 7
0
    def test_emptyGetsObjectLast(self):
        """Test for getting DiaObjects from empty database using DiaObjectLast
        table.

        All get() methods should return empty results, only useful for
        checking that code is not broken.
        """

        # don't care about sources.
        config = PpdbConfig(db_url="sqlite:///",
                            isolation_level="READ_UNCOMMITTED",
                            dia_object_index="last_object_table")
        ppdb = Ppdb(config)
        ppdb.makeSchema()

        pixel_ranges = _makePixelRanges()

        # get objects by region
        res = ppdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas)
        self._assertCatalog(res, 0, type=self.data_type)
Exemplo n.º 8
0
def main():

    descr = 'Create schema for Prompt Products Database.'
    parser = ArgumentParser(description=descr)
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help='More verbose output, can use several times.')
    parser.add_argument('--drop', action='store_true', default=False,
                        help='Drop existing schema first, this will delete '
                        'all data in the tables, use with extreme caution')
    parser.add_argument('-c', '--config', default=None, metavar='PATH',
                        help='Name of the database config file (pex.config)')
    parser.add_argument('-t', '--tablespace', default=None, metavar='TABLESPACE',
                        help='Name of the Oracle tablespace for new tables.')
    parser.add_argument('-i', '--iot', default=False,
                        action='store_true',
                        help='Make index-organized DiaObjectLast table.')
    parser.add_argument('-a', '--association', default=False,
                        action='store_true',
                        help='Use afw.table schema from ap_association')
    args = parser.parse_args()

    # configure logging
    _configLogger(args.verbose)

    config = L1dbprotoConfig()
    if args.config:
        config.load(args.config)

    afw_schemas = None
    if args.association:
        afw_schemas = dict(DiaObject=make_minimal_dia_object_schema(),
                           DiaSource=make_minimal_dia_source_schema())

    # instantiate db interface
    db = Ppdb(config=config, afw_schemas=afw_schemas)

    # do it
    db.makeSchema(drop=args.drop, oracle_tablespace=args.tablespace, oracle_iot=args.iot)
Exemplo n.º 9
0
    def _getPpdb(self, config):
        """Extract a Ppdb object from an arbitrary task config.

        Parameters
        ----------
        config : `lsst.pex.config.Config` or `None`
            A config that may contain a `lsst.dax.ppdb.PpdbConfig`.
            Behavior is undefined if there is more than one such member.

        Returns
        -------
        ppdb : `lsst.dax.ppdb.Ppdb`-like or `None`
            A `lsst.dax.ppdb.Ppdb` object or a drop-in replacement, or `None`
            if no `lsst.dax.ppdb.PpdbConfig` is present in ``config``.
        """
        if config is None:
            return None
        if isinstance(config, PpdbConfig):
            return Ppdb(config)

        for field in config.values():
            if isinstance(field, ConfigurableInstance):
                result = self._getPpdbFromConfigurableField(field)
                if result:
                    return result
            elif isinstance(field, ConfigChoiceField.instanceDictClass):
                try:
                    # can't test with hasattr because of non-standard getattr
                    field.names
                except FieldValidationError:
                    result = self._getPpdb(field.active)
                else:
                    result = self._getPpdbFromConfigIterable(field.active)
                if result:
                    return result
            elif isinstance(field, ConfigDictField.DictClass):
                result = self._getPpdbFromConfigIterable(field.values())
                if result:
                    return result
            elif isinstance(field, Config):
                # Can't test for `ConfigField` more directly than this
                result = self._getPpdb(field)
                if result:
                    return result
        return None
Exemplo n.º 10
0
    def run(self):
        """Run whole shebang.
        """

        if self.args.config:
            self.config.load(self.args.config)

        if self.args.dump_config:
            self.config.saveToStream(sys.stdout)
            return 0

        # instantiate db interface
        db = Ppdb(self.config)

        if self.config.divide > 1:
            # check that we have reasonable MPI setup
            if self.config.mp_mode == "mpi":
                comm = MPI.COMM_WORLD
                num_proc = comm.Get_size()
                rank = comm.Get_rank()
                node = MPI.Get_processor_name()
                _LOG.info(COLOR_YELLOW + "MPI job rank=%d size=%d, node %s" + COLOR_RESET,
                          rank, num_proc, node)
                num_tiles = self.config.divide**2
                if num_proc != num_tiles:
                    raise ValueError(f"Number of MPI processes ({num_proc}) "
                                     f"does not match number of tiles ({num_tiles})")
                if rank != 0:
                    # run simple loop for all non-master processes
                    return self.run_mpi_tile_loop(db, comm)

        # Initialize starting values from database visits table
        last_visit = db.lastVisit()
        if last_visit is not None:
            start_visit_id = last_visit.visitId + 1
            start_time = last_visit.visitTime + timedelta(seconds=self.config.interval)
        else:
            start_visit_id = self.config.start_visit_id
            start_time = self.config.start_time_dt

        if self.config.divide > 1:
            _LOG.info("Will divide FOV into %dx%d regions", self.config.divide, self.config.divide)
        _LOG.info("Max. number of ranges for pixelator: %d", self.config.htm_max_ranges)

        # read sources file
        _LOG.info("Start loading variable sources from %r", self.config.sources_file)
        var_sources = numpy.load(self.config.sources_file)
        _LOG.info("Finished loading variable sources, count = %s", len(var_sources))

        # diaObjectId for last new DIA object, for variable sources we use their
        # index as objectId, for transients we want to use ID outside that range
        if last_visit is not None and last_visit.lastObjectId is not None:
            self.lastObjectId = max(self.lastObjectId, last_visit.lastObjectId)
        if self.lastObjectId < len(var_sources):
            _LOG.error('next object id is too low: %s', self.lastObjectId)
            return 1
        _LOG.debug("lastObjectId: %s", self.lastObjectId)

        # diaSourceId for last DIA source stored in database
        if last_visit is not None and last_visit.lastSourceId is not None:
            self.lastSourceId = max(self.lastSourceId, last_visit.lastSourceId)
        _LOG.info("lastSourceId: %s", self.lastSourceId)

        # loop over visits
        visitTimes = _visitTimes(start_time, self.config.interval, self.args.num_visits)
        for visit_id, dt in enumerate(visitTimes, start_visit_id):

            if visit_id % 1000 == 0:
                _LOG.info(COLOR_YELLOW + "+++ Start daily activities" + COLOR_RESET)
                db.dailyJob()
                _LOG.info(COLOR_YELLOW + "+++ Done with daily activities" + COLOR_RESET)

            _LOG.info(COLOR_GREEN + "+++ Start processing visit %s at %s" + COLOR_RESET, visit_id, dt)
            loop_timer = timer.Timer().start()

            with timer.Timer("DIA"):
                # point telescope in random southern direction
                pointing_xyz = generators.rand_sphere_xyz(1, -1)[0]
                pointing_v = UnitVector3d(pointing_xyz[0], pointing_xyz[1], pointing_xyz[2])
                ra = LonLat.longitudeOf(pointing_v).asDegrees()
                decl = LonLat.latitudeOf(pointing_v).asDegrees()

                # sphgeom.Circle opening angle is actually a half of opening angle
                region = Circle(pointing_v, Angle(self.config.FOV_rad/2))

                _LOG.info("Pointing ra, decl = %s, %s; xyz = %s", ra, decl, pointing_xyz)

                # Simulating difference image analysis
                dia = DIA.DIA(pointing_xyz, self.config.FOV_rad, var_sources,
                              self.config.false_per_visit + self.config.transient_per_visit)
                sources, indices = dia.makeSources()
                _LOG.info("DIA generated %s sources", len(sources))

                # assign IDs to transients
                for i in range(len(sources)):
                    if indices[i] < 0:
                        self.lastObjectId += 1
                        indices[i] = self.lastObjectId

            # print current database row counts, this takes long time
            # so only do it once in a while
            modu = 200 if visit_id <= 10000 else 1000
            if visit_id % modu == 0:
                counts = db.tableRowCount()
                for tbl, count in sorted(counts.items()):
                    _LOG.info('%s row count: %s', tbl, count)

            # numpy seems to do some multi-threaded stuff which "leaks" CPU cycles to the code below
            # and it gets counted as resource usage in timers, add a short delay here so that threads
            # finish and don't influence our timers below.
            time.sleep(0.1)

            if self.config.divide == 1:

                # do it in-process
                with timer.Timer("VisitProcessing"):
                    self.visit(db, visit_id, dt, region, sources, indices)

            else:

                if self.config.mp_mode == "fork":

                    tiles = geom.make_square_tiles(
                        self.config.FOV_rad, self.config.divide, self.config.divide, pointing_v)

                    with timer.Timer("VisitProcessing"):
                        # spawn subprocesses to handle individual tiles
                        children = []
                        for ix, iy, region in tiles:

                            # make sure lastSourceId is unique in in each process
                            self.lastSourceId += len(sources)
                            tile = (ix, iy)

                            pid = os.fork()
                            if pid == 0:
                                # child

                                self.visit(db, visit_id, dt, region, sources, indices, tile)
                                # stop here
                                sys.exit(0)

                            else:
                                _LOG.debug("Forked process %d for tile %s", pid, tile)
                                children.append(pid)

                        # wait until all children finish
                        for pid in children:
                            try:
                                pid, status = os.waitpid(pid, 0)
                                if status != 0:
                                    _LOG.warning(COLOR_RED + "Child process PID=%s failed: %s" +
                                                 COLOR_RESET, pid, status)
                            except OSError as exc:
                                _LOG.warning(COLOR_RED + "wait failed for PID=%s: %s" +
                                             COLOR_RESET, pid, exc)

                elif self.config.mp_mode == "mpi":

                    tiles = geom.make_square_tiles(
                        self.config.FOV_rad, self.config.divide, self.config.divide, pointing_v, False)
                    _LOG.info("Split FOV into %d tiles for MPI", len(tiles))

                    # spawn subprocesses to handle individual tiles, special
                    # care needed for self.lastSourceId because it's
                    # propagated back from (0, 0)
                    lastSourceId = self.lastSourceId
                    tile_data = []
                    for ix, iy, region in tiles:
                        lastSourceId += len(sources)
                        tile = (ix, iy)
                        tile_data += [(visit_id, dt, region, sources, indices, tile, lastSourceId)]
                        # make sure lastSourceId is unique in in each process

                    with timer.Timer("VisitProcessing"):
                        _LOG.info("Scatter sources to %d tile processes", len(tile_data))
                        self.run_mpi_tile(db, MPI.COMM_WORLD, tile_data)
                    self.lastSourceId = lastSourceId

            if not self.args.no_update:
                # store last visit info
                db.saveVisit(visit_id, dt)

            _LOG.info(COLOR_BLUE + "--- Finished processing visit %s, time: %s" +
                      COLOR_RESET, visit_id, loop_timer)

        # stop MPI slaves
        if self.config.divide > 1 and self.config.mp_mode == "mpi":
            _LOG.info("Stopping MPI tile processes")
            tile_data = [None] * self.config.divide**2
            self.run_mpi_tile(db, MPI.COMM_WORLD, tile_data)

        return 0