示例#1
0
def init_db(options):
    from tkp.config import initialize_pipeline_config, get_database_config
    cfgfile = os.path.join(os.getcwd(), "pipeline.cfg")
    if os.path.exists(cfgfile):
        pipe_config = initialize_pipeline_config(cfgfile, "notset")
        dbconfig = get_database_config(pipe_config['database'], apply=False)
    else:
        dbconfig = get_database_config(None, apply=False)

    if 'engine' not in dbconfig or not dbconfig['engine']:
        dbconfig['engine'] = 'postgresql'

    if 'port' not in dbconfig or not dbconfig['port']:
        if dbconfig['engine'] == 'monetdb':
            dbconfig['port'] = 50000
        else:
            dbconfig['port'] = 5432

    if 'database' not in dbconfig or not dbconfig['database']:
        dbconfig['database'] = getpass.getuser()

    if 'user' not in dbconfig or not dbconfig['user']:
        dbconfig['user'] = dbconfig['database']

    if 'password' not in dbconfig or not dbconfig['password']:
        dbconfig['password'] = dbconfig['user']

    if 'host' not in dbconfig or not dbconfig['host']:
        dbconfig['host'] = 'localhost'

    dbconfig['yes'] = options.yes
    dbconfig['destroy'] = options.destroy

    populate(dbconfig)
示例#2
0
def get_db_config():
    cfgfile = os.path.join(os.getcwd(), "pipeline.cfg")
    if os.path.exists(cfgfile):
        pipe_config = initialize_pipeline_config(cfgfile, "notset")
        dbconfig = get_database_config(pipe_config['database'], apply=False)
    else:
        dbconfig = get_database_config(None, apply=False)
    return dbconfig
示例#3
0
def get_db_config():
    cfgfile = os.path.join(os.getcwd(), "pipeline.cfg")
    if os.path.exists(cfgfile):
        pipe_config = initialize_pipeline_config(cfgfile, "notset")
        dbconfig = get_database_config(pipe_config['database'], apply=False)
    else:
        dbconfig = get_database_config(None, apply=False)
    return dbconfig
示例#4
0
def init_db(options):
    from tkp.config import initialize_pipeline_config, get_database_config
    cfgfile = os.path.join(os.getcwd(), "pipeline.cfg")
    if os.path.exists(cfgfile):
        pipe_config = initialize_pipeline_config(cfgfile, "notset")
        dbconfig = get_database_config(pipe_config['database'], apply=False)
    else:
        dbconfig = get_database_config(None, apply=False)

    dbconfig['yes'] = options.yes
    dbconfig['destroy'] = options.destroy

    populate(dbconfig)
示例#5
0
文件: main.py 项目: jdswinbank/tkp
def setup(pipe_config, supplied_mon_coords=None):
    """
    Initialises the pipeline run.
    """
    if not supplied_mon_coords:
        supplied_mon_coords = []

    # Setup logfile before we do anything else
    log_dir = pipe_config.logging.log_dir
    setup_logging(log_dir,
                  debug=pipe_config.logging.debug,
                  use_colorlog=pipe_config.logging.colorlog)

    job_dir = pipe_config.DEFAULT.job_directory
    if not os.access(job_dir, os.X_OK):
        msg = "can't access job folder %s" % job_dir
        logger.error(msg)
        raise IOError(msg)
    logger.info("Job dir: %s", job_dir)

    db_config = get_database_config(pipe_config.database, apply=True)
    dump_database_backup(db_config, job_dir)

    job_config = load_job_config(pipe_config)
    dump_configs_to_logdir(log_dir, job_config, pipe_config)

    sync_rejectreasons(tkp.db.Database().Session())

    job_config, dataset_id = initialise_dataset(job_config,
                                                supplied_mon_coords)

    return job_dir, job_config, dataset_id
示例#6
0
def setup(pipe_config, supplied_mon_coords=None):
    """
    Initialises the pipeline run.
    """
    if not supplied_mon_coords:
        supplied_mon_coords = []

    # Setup logfile before we do anything else
    log_dir = pipe_config.logging.log_dir
    setup_logging(log_dir, debug=pipe_config.logging.debug,
                  use_colorlog=pipe_config.logging.colorlog)

    job_dir = pipe_config.DEFAULT.job_directory
    if not os.access(job_dir, os.X_OK):
        msg = "can't access job folder %s" % job_dir
        logger.error(msg)
        raise IOError(msg)
    logger.info("Job dir: %s", job_dir)

    db_config = get_database_config(pipe_config.database, apply=True)
    dump_database_backup(db_config, job_dir)

    job_config = load_job_config(pipe_config)
    dump_configs_to_logdir(log_dir, job_config, pipe_config)

    sync_rejectreasons(tkp.db.Database().Session())

    job_config, dataset_id = initialise_dataset(job_config, supplied_mon_coords)

    return job_dir, job_config, dataset_id
示例#7
0
 def test_use_username_as_default(self):
     # database name and password default to the username
     os.environ["TKP_DBUSER"] = DUMMY_VALUE
     os.environ["TKP_DBENGINE"] = "monetdb"
     os.environ["TKP_DBHOST"] = DUMMY_VALUE
     os.environ["TKP_DBPORT"] = DUMMY_INT
     db_config = get_database_config(self.pipeline_cfg['database'])
     self._test_for_dummy_values(db_config)
示例#8
0
 def test_use_username_as_default(self):
     # database name and password default to the username
     os.environ["TKP_DBUSER"] = DUMMY_VALUE
     os.environ["TKP_DBENGINE"] = "monetdb"
     os.environ["TKP_DBHOST"] = DUMMY_VALUE
     os.environ["TKP_DBPORT"] = DUMMY_INT
     db_config = get_database_config(self.pipeline_cfg['database'])
     self._test_for_dummy_values(db_config)
示例#9
0
 def test_env_vars(self):
     # Demonstrate that we correctly read the environment
     os.environ["TKP_DBENGINE"] = "monetdb"
     os.environ["TKP_DBNAME"] = DUMMY_VALUE
     os.environ["TKP_DBUSER"] = DUMMY_VALUE
     os.environ["TKP_DBPASSWORD"] = DUMMY_VALUE
     os.environ["TKP_DBHOST"] = DUMMY_VALUE
     os.environ["TKP_DBPORT"] = DUMMY_INT
     db_config = get_database_config(self.pipeline_cfg['database'])
     self._test_for_dummy_values(db_config)
示例#10
0
 def test_env_vars(self):
     # Demonstrate that we correctly read the environment
     os.environ["TKP_DBENGINE"] = "monetdb"
     os.environ["TKP_DBNAME"] = DUMMY_VALUE
     os.environ["TKP_DBUSER"] = DUMMY_VALUE
     os.environ["TKP_DBPASSWORD"] = DUMMY_VALUE
     os.environ["TKP_DBHOST"] = DUMMY_VALUE
     os.environ["TKP_DBPORT"] = DUMMY_INT
     db_config = get_database_config(self.pipeline_cfg['database'])
     self._test_for_dummy_values(db_config)
示例#11
0
 def test_defaults_monetdb(self):
     # Demonstrate that we get the expected default values
     os.environ["TKP_DBENGINE"] = "monetdb"
     username = getpass.getuser()
     db_config = get_database_config()
     self.assertEqual(db_config['engine'], "monetdb")
     self.assertEqual(db_config['database'], username)
     self.assertEqual(db_config['user'], "monetdb")
     self.assertEqual(db_config['password'], "monetdb")
     self.assertEqual(db_config['host'], "localhost")
     self.assertEqual(db_config['port'], 50000)
示例#12
0
 def test_defaults_monetdb(self):
     # Demonstrate that we get the expected default values
     os.environ["TKP_DBENGINE"] = "monetdb"
     username = getpass.getuser()
     db_config = get_database_config()
     self.assertEqual(db_config['engine'], "monetdb")
     self.assertEqual(db_config['database'], username)
     self.assertEqual(db_config['user'], username)
     self.assertEqual(db_config['password'], username)
     self.assertEqual(db_config['host'], "localhost")
     self.assertEqual(db_config['port'], 50000)
示例#13
0
 def test_database_dump_monet(self):
     dbconfig = get_database_config()
     with NamedTemporaryFile() as dumpfile:
         dump_monetdb(dbconfig['host'], dbconfig['port'],
                      dbconfig['database'], dbconfig['user'],
                      dbconfig['password'], dumpfile.name)
         # Output should start with "START TRANSACTION;" and end with
         # "COMMIT;"
         dumpfile.seek(0)
         self.assertEqual(dumpfile.readline().strip(), "START TRANSACTION;")
         dumpfile.seek(-8, os.SEEK_END)
         self.assertEqual(dumpfile.readline().strip(), "COMMIT;")
示例#14
0
 def test_database_dump_monet(self):
     dbconfig = get_database_config()
     with NamedTemporaryFile() as dumpfile:
         dump_monetdb(
             dbconfig['host'], dbconfig['port'], dbconfig['database'],
             dbconfig['user'], dbconfig['password'], dumpfile.name
         )
         # Output should start with "START TRANSACTION;" and end with
         # "COMMIT;"
         dumpfile.seek(0)
         self.assertEqual(dumpfile.readline().strip(), "START TRANSACTION;")
         dumpfile.seek(-8, os.SEEK_END)
         self.assertEqual(dumpfile.readline().strip(), "COMMIT;")
示例#15
0
class TestDump(unittest.TestCase):
    @requires_database()
    @unittest.skipUnless(get_database_config()['engine'] == "postgresql",
                         "Postgres disabled")
    def test_database_dump_pg(self):
        dbconfig = get_database_config()
        with NamedTemporaryFile() as dumpfile:
            dump_pg(dbconfig['host'], dbconfig['port'], dbconfig['database'],
                    dbconfig['user'], dbconfig['password'], dumpfile.name)

            # Output should start with "--\n-- PostgreSQL database dump\n":
            dumpfile.seek(0)
            self.assertEqual(dumpfile.readline().strip(), "--")
            self.assertEqual(dumpfile.readline().strip(),
                             "-- PostgreSQL database dump")
            # And end with "-- PostgreSQL database dump complete\n--\n\n"
            dumpfile.seek(-41, os.SEEK_END)
            self.assertEqual(dumpfile.read().strip(),
                             "-- PostgreSQL database dump complete\n--")

    @requires_database()
    @unittest.skipUnless(get_database_config()['engine'] == "monetdb",
                         "Monet disabled")
    def test_database_dump_monet(self):
        dbconfig = get_database_config()
        with NamedTemporaryFile() as dumpfile:
            dump_monetdb(dbconfig['host'], dbconfig['port'],
                         dbconfig['database'], dbconfig['user'],
                         dbconfig['password'], dumpfile.name)
            # Output should start with "START TRANSACTION;" and end with
            # "COMMIT;"
            dumpfile.seek(0)
            self.assertEqual(dumpfile.readline().strip(), "START TRANSACTION;")
            dumpfile.seek(-8, os.SEEK_END)
            self.assertEqual(dumpfile.readline().strip(), "COMMIT;")

    def test_database_dump_unknown(self):
        self.assertRaises(NotImplementedError, dump_db, "dummy_engine", None,
                          None, None, None, None, None)
示例#16
0
    def test_database_dump_pg(self):
        dbconfig = get_database_config()
        with NamedTemporaryFile() as dumpfile:
            dump_pg(dbconfig['host'], dbconfig['port'], dbconfig['database'],
                    dbconfig['user'], dbconfig['password'], dumpfile.name)

            # Output should start with "--\n-- PostgreSQL database dump\n":
            dumpfile.seek(0)
            self.assertEqual(dumpfile.readline().strip(), "--")
            self.assertEqual(dumpfile.readline().strip(),
                             "-- PostgreSQL database dump")
            # And end with "-- PostgreSQL database dump complete\n--\n\n"
            dumpfile.seek(-41, os.SEEK_END)
            self.assertEqual(dumpfile.read().strip(),
                             "-- PostgreSQL database dump complete\n--")
示例#17
0
    def test_database_dump_pg(self):
        dbconfig = get_database_config()
        with NamedTemporaryFile() as dumpfile:
            dump_pg(
                dbconfig['host'], dbconfig['port'], dbconfig['database'],
                dbconfig['user'], dbconfig['password'], dumpfile.name
            )

            # Output should start with "--\n-- PostgreSQL database dump\n":
            dumpfile.seek(0)
            self.assertEqual(dumpfile.readline().strip(), "--")
            self.assertEqual(dumpfile.readline().strip(), "-- PostgreSQL database dump")
            # And end with "-- PostgreSQL database dump complete\n--\n\n"
            dumpfile.seek(-41, os.SEEK_END)
            self.assertEqual(dumpfile.read().strip(), "-- PostgreSQL database dump complete\n--")
示例#18
0
def populate(dbconfig):
    """
    Populates a database with TRAP tables.

    args:
        dbconfig: a dict containing db connection settings

    raises an exception when one of the tables already exists.
    """

    if not dbconfig['yes']:
        verify(dbconfig)

    # configure the database before we do anyting else
    get_database_config(dbconfig, apply=True)

    database = tkp.db.database.Database()
    database.connect(check=False)

    if dbconfig['destroy']:
        destroy(dbconfig)

    if dbconfig['engine'] == 'postgresql':
        # make sure plpgsql is enabled
        try:
            database.session.execute("CREATE LANGUAGE plpgsql;")
        except ProgrammingError:
            database.session.rollback()
    if dbconfig['engine'] == 'monetdb':
        set_monetdb_schema(database.session, dbconfig)
        # reconnect to switch to schema
        database.session.commit()
        database.reconnect()

    batch_file = os.path.join(sql_repo, 'batch')

    error = "\nproblem processing \"%s\".\nMaybe the DB is already populated. "\
            "Try -d/--destroy argument for initdb cmd.\n\n"

    tkp.db.model.Base.metadata.create_all(database.alchemy_engine)

    version = tkp.db.model.Version(name='revision',
                                   value=tkp.db.model.SCHEMA_VERSION)
    database.session.add(version)

    tkp.db.quality.sync_rejectreasons(database.session)

    for line in [l.strip() for l in open(batch_file) if not l.startswith("#")]:
        if not line:  # skip empty lines
            continue
        print "processing %s" % line
        sql_file = os.path.join(sql_repo, line)
        with open(sql_file) as sql_handler:
            sql = sql_handler.read()
            dialected = dialectise(sql, dbconfig['engine']).strip()

            if not dialected:  # empty query, can happen
                continue
            try:
                database.session.execute(dialected)
            except Exception as e:
                sys.stderr.write(error % sql_file)
                raise

        database.session.commit()
        database.close()
示例#19
0
文件: main.py 项目: hughbg/tkp
def run(job_name, mon_coords, local=False):
    setup_event_listening(celery_app)
    pipe_config = initialize_pipeline_config(
        os.path.join(os.getcwd(), "pipeline.cfg"),
        job_name)

    debug = pipe_config.logging.debug
    #Setup logfile before we do anything else
    log_dir = pipe_config.logging.log_dir
    setup_log_file(log_dir, debug)

    job_dir = pipe_config.DEFAULT.job_directory
    if not os.access(job_dir, os.X_OK):
        msg = "can't access job folder %s" % job_dir
        logger.error(msg)
        raise IOError(msg)
    logger.info("Job dir: %s", job_dir)

    db_config = get_database_config(pipe_config.database, apply=True)
    dump_database_backup(db_config, job_dir)

    job_config = load_job_config(pipe_config)
    se_parset = job_config.source_extraction
    deruiter_radius = job_config.association.deruiter_radius

    all_images = imp.load_source('images_to_process',
                                 os.path.join(job_dir,
                                              'images_to_process.py')).images

    logger.info("dataset %s contains %s images" % (job_name, len(all_images)))

    logger.info("performing database consistency check")
    if not dbconsistency.check():
        logger.error("Inconsistent database found; aborting")
        return 1

    dataset_id = create_dataset(job_config.persistence.dataset_id,
                                job_config.persistence.description)

    if job_config.persistence.dataset_id == -1:
        store_config(job_config, dataset_id)  # new data set
    else:
        job_config_from_db = fetch_config(dataset_id)  # existing data set
        if check_job_configs_match(job_config, job_config_from_db):
            logger.debug("Job configs from file / database match OK.")
        else:
            logger.warn("Job config file has changed since dataset was "
                        "first loaded into database. ")
            logger.warn("Using job config settings loaded from database, see "
                        "log dir for details")
        job_config = job_config_from_db

    dump_configs_to_logdir(log_dir, job_config, pipe_config)

    logger.info("performing persistence step")
    image_cache_params = pipe_config.image_cache
    imgs = [[img] for img in all_images]
    metadatas = runner(tasks.persistence_node_step, imgs, [image_cache_params],
                       local)
    metadatas = [m[0] for m in metadatas]

    logger.info("Storing images")
    image_ids = store_images(metadatas,
                             job_config.source_extraction.extraction_radius_pix,
                             dataset_id)

    db_images = [Image(id=image_id) for image_id in image_ids]

    logger.info("performing quality check")
    urls = [img.url for img in db_images]
    arguments = [job_config]
    rejecteds = runner(tasks.quality_reject_check, urls, arguments, local)

    good_images = []
    for image, rejected in zip(db_images, rejecteds):
        if rejected:
            reason, comment = rejected
            steps.quality.reject_image(image.id, reason, comment)
        else:
            good_images.append(image)

    if not good_images:
        logger.warn("No good images under these quality checking criteria")
        return

    grouped_images = group_per_timestep(good_images)
    timestep_num = len(grouped_images)
    for n, (timestep, images) in enumerate(grouped_images):
        msg = "processing %s images in timestep %s (%s/%s)"
        logger.info(msg % (len(images), timestep, n+1, timestep_num))

        logger.info("performing source extraction")
        urls = [img.url for img in images]
        arguments = [se_parset]
        extract_sources = runner(tasks.extract_sources, urls, arguments, local)

        logger.info("storing extracted to database")
        for image, sources in zip(images, extract_sources):
            dbgen.insert_extracted_sources(image.id, sources, 'blind')

        logger.info("performing database operations")
        for image in images:
            logger.info("performing DB operations for image %s" % image.id)

            logger.info("performing source association")
            dbass.associate_extracted_sources(image.id,
                                              deRuiter_r=deruiter_radius)
            logger.info("performing null detections")
            null_detections = dbnd.get_nulldetections(image.id)
            logger.info("Found %s null detections" % len(null_detections))
            # Only if we found null_detections the next steps are necessary
            if len(null_detections) > 0:
                logger.info("performing forced fits")
                ff_nd = forced_fits(image.url, null_detections, se_parset)
                dbgen.insert_extracted_sources(image.id, ff_nd, 'ff_nd')
                logger.info("adding null detections")
                dbnd.associate_nd(image.id)
            if len(mon_coords) > 0:
                logger.info("performing monitoringlist")
                ff_ms = forced_fits(image.url, mon_coords, se_parset)
                dbgen.insert_extracted_sources(image.id, ff_ms, 'ff_ms')
                logger.info("adding monitoring sources")
                dbmon.associate_ms(image.id)
            transients = search_transients(image.id,
                                           job_config['transient_search'])
        dbgen.update_dataset_process_end_ts(dataset_id)
示例#20
0
 def test_invalid_dbengine(self):
     # Should *not* raise; database_config does not sanity check.
     os.environ["TKP_DBENGINE"] = DUMMY_VALUE
     get_database_config()
示例#21
0
文件: populate.py 项目: ajstewart/tkp
def populate(dbconfig):
    """
    Populates a database with TRAP tables.

    args:
        dbconfig: a dict containing db connection settings

    raises an exception when one of the tables already exists.
    """

    if not dbconfig['yes']:
        verify(dbconfig)

    # configure the database before we do anyting else
    get_database_config(dbconfig, apply=True)

    database = tkp.db.database.Database()
    database.connect(check=False)

    if dbconfig['destroy']:
        destroy(dbconfig)

    if dbconfig['engine'] == 'postgresql':
        # make sure plpgsql is enabled
        try:
            database.session.execute("CREATE LANGUAGE plpgsql;")
        except ProgrammingError:
            database.session.rollback()
    if dbconfig['engine'] == 'monetdb':
        set_monetdb_schema(database.session, dbconfig)
        # reconnect to switch to schema
        database.session.commit()
        database.reconnect()

    batch_file = os.path.join(sql_repo, 'batch')

    error = "\nproblem processing \"%s\".\nMaybe the DB is already populated. "\
            "Try -d/--destroy argument for initdb cmd.\n\n"

    tkp.db.model.Base.metadata.create_all(database.alchemy_engine)

    version = tkp.db.model.Version(name='revision',
                                   value=tkp.db.model.SCHEMA_VERSION)
    database.session.add(version)

    tkp.db.quality.sync_rejectreasons(database.session)

    for line in [l.strip() for l in open(batch_file) if not l.startswith("#")]:
        if not line:  # skip empty lines
            continue
        print "processing %s" % line
        sql_file = os.path.join(sql_repo, line)
        with open(sql_file) as sql_handler:
            sql = sql_handler.read()
            dialected = dialectise(sql, dbconfig['engine']).strip()

            if not dialected:  # empty query, can happen
                continue
            try:
                database.session.execute(dialected)
            except Exception as e:
                sys.stderr.write(error % sql_file)
                raise

        database.session.commit()
        database.close()
示例#22
0
 def test_unconfigured(self):
     # Should *not* raise.
     get_database_config()
示例#23
0
 def test_unconfigured(self):
     # Should *not* raise.
     get_database_config()
示例#24
0
 def test_invalid_dbengine(self):
     # Should *not* raise; database_config does not sanity check.
     os.environ["TKP_DBENGINE"] = DUMMY_VALUE
     get_database_config()
示例#25
0
文件: main.py 项目: mkuiack/tkp
def run(job_name, supplied_mon_coords=[]):
    pipe_config = initialize_pipeline_config(
        os.path.join(os.getcwd(), "pipeline.cfg"),
        job_name)

    # get parallelise props. Defaults to multiproc with autodetect num cores
    parallelise = pipe_config.get('parallelise', {})
    distributor = os.environ.get('TKP_PARALLELISE', parallelise.get('method',
                                                                    'multiproc'))
    runner = Runner(distributor=distributor,
                    cores=parallelise.get('cores', 0))

    debug = pipe_config.logging.debug
    #Setup logfile before we do anything else
    log_dir = pipe_config.logging.log_dir
    setup_log_file(log_dir, debug)

    job_dir = pipe_config.DEFAULT.job_directory
    if not os.access(job_dir, os.X_OK):
        msg = "can't access job folder %s" % job_dir
        logger.error(msg)
        raise IOError(msg)
    logger.info("Job dir: %s", job_dir)

    db_config = get_database_config(pipe_config.database, apply=True)
    dump_database_backup(db_config, job_dir)

    job_config = load_job_config(pipe_config)
    se_parset = job_config.source_extraction
    deruiter_radius = job_config.association.deruiter_radius
    beamwidths_limit = job_config.association.beamwidths_limit
    new_src_sigma = job_config.transient_search.new_source_sigma_margin

    all_images = imp.load_source('images_to_process',
                                 os.path.join(job_dir,
                                              'images_to_process.py')).images

    logger.info("dataset %s contains %s images" % (job_name, len(all_images)))

    logger.info("performing database consistency check")
    if not dbconsistency.check():
        logger.error("Inconsistent database found; aborting")
        return 1

    dataset_id = create_dataset(job_config.persistence.dataset_id,
                                job_config.persistence.description)

    if job_config.persistence.dataset_id == -1:
        store_config(job_config, dataset_id)  # new data set
        if supplied_mon_coords:
            dbgen.insert_monitor_positions(dataset_id,supplied_mon_coords)
    else:
        job_config_from_db = fetch_config(dataset_id)  # existing data set
        if check_job_configs_match(job_config, job_config_from_db):
            logger.debug("Job configs from file / database match OK.")
        else:
            logger.warn("Job config file has changed since dataset was "
                        "first loaded into database. ")
            logger.warn("Using job config settings loaded from database, see "
                        "log dir for details")
        job_config = job_config_from_db
        if supplied_mon_coords:
            logger.warn("Monitor positions supplied will be ignored. "
                        "(Previous dataset specified)")

    dump_configs_to_logdir(log_dir, job_config, pipe_config)

    logger.info("performing persistence step")
    image_cache_params = pipe_config.image_cache
    imgs = [[img] for img in all_images]

    rms_est_sigma = job_config.persistence.rms_est_sigma
    rms_est_fraction = job_config.persistence.rms_est_fraction
    metadatas = runner.map("persistence_node_step", imgs,
                           [image_cache_params, rms_est_sigma, rms_est_fraction])
    metadatas = [m[0] for m in metadatas if m]

    logger.info("Storing images")
    image_ids = store_images(metadatas,
                             job_config.source_extraction.extraction_radius_pix,
                             dataset_id)

    db_images = [Image(id=image_id) for image_id in image_ids]

    logger.info("performing quality check")
    urls = [img.url for img in db_images]
    arguments = [job_config]
    rejecteds = runner.map("quality_reject_check", urls, arguments)

    good_images = []
    for image, rejected in zip(db_images, rejecteds):
        if rejected:
            reason, comment = rejected
            steps.quality.reject_image(image.id, reason, comment)
        else:
            good_images.append(image)

    if not good_images:
        logger.warn("No good images under these quality checking criteria")
        return

    grouped_images = group_per_timestep(good_images)
    timestep_num = len(grouped_images)
    for n, (timestep, images) in enumerate(grouped_images):
        msg = "processing %s images in timestep %s (%s/%s)"
        logger.info(msg % (len(images), timestep, n+1, timestep_num))

        logger.info("performing source extraction")
        urls = [img.url for img in images]
        arguments = [se_parset]

        extraction_results = runner.map("extract_sources", urls, arguments)

        logger.info("storing extracted sources to database")
        # we also set the image max,min RMS values which calculated during
        # source extraction
        for image, results in zip(images, extraction_results):
            image.update(rms_min=results.rms_min, rms_max=results.rms_max,
                detection_thresh=se_parset['detection_threshold'],
                analysis_thresh=se_parset['analysis_threshold'])
            dbgen.insert_extracted_sources(image.id, results.sources, 'blind')

        logger.info("performing database operations")

        for image in images:
            logger.info("performing DB operations for image %s" % image.id)

            logger.info("performing source association")
            dbass.associate_extracted_sources(image.id,
                                              deRuiter_r=deruiter_radius,
                                              new_source_sigma_margin=new_src_sigma)

            expiration = job_config.source_extraction.expiration
            all_fit_posns, all_fit_ids = steps_ff.get_forced_fit_requests(image,
                                                                          expiration)
            if all_fit_posns:
                successful_fits, successful_ids = steps_ff.perform_forced_fits(
                    all_fit_posns, all_fit_ids, image.url, se_parset)

                steps_ff.insert_and_associate_forced_fits(image.id,successful_fits,
                                                          successful_ids)


        dbgen.update_dataset_process_end_ts(dataset_id)

    logger.info("calculating variability metrics")
    execute_store_varmetric(dataset_id)
示例#26
0
def run(job_name, supplied_mon_coords=[]):
    pipe_config = initialize_pipeline_config(
        os.path.join(os.getcwd(), "pipeline.cfg"),
        job_name)

    # get parallelise props. Defaults to multiproc with autodetect num cores
    parallelise = pipe_config.get('parallelise', {})
    distributor = os.environ.get('TKP_PARALLELISE', parallelise.get('method',
                                                                    'multiproc'))
    runner = Runner(distributor=distributor,
                    cores=parallelise.get('cores', 0))

    debug = pipe_config.logging.debug
    #Setup logfile before we do anything else
    log_dir = pipe_config.logging.log_dir
    setup_log_file(log_dir, debug)

    job_dir = pipe_config.DEFAULT.job_directory
    if not os.access(job_dir, os.X_OK):
        msg = "can't access job folder %s" % job_dir
        logger.error(msg)
        raise IOError(msg)
    logger.info("Job dir: %s", job_dir)

    db_config = get_database_config(pipe_config.database, apply=True)
    dump_database_backup(db_config, job_dir)

    job_config = load_job_config(pipe_config)
    se_parset = job_config.source_extraction
    deruiter_radius = job_config.association.deruiter_radius
    beamwidths_limit = job_config.association.beamwidths_limit
    new_src_sigma = job_config.transient_search.new_source_sigma_margin

    all_images = imp.load_source('images_to_process',
                                 os.path.join(job_dir,
                                              'images_to_process.py')).images

    logger.info("dataset %s contains %s images" % (job_name, len(all_images)))

    logger.info("performing database consistency check")
    if not dbconsistency.check():
        logger.error("Inconsistent database found; aborting")
        return 1

    dataset_id = create_dataset(job_config.persistence.dataset_id,
                                job_config.persistence.description)

    if job_config.persistence.dataset_id == -1:
        store_config(job_config, dataset_id)  # new data set
        if supplied_mon_coords:
            dbgen.insert_monitor_positions(dataset_id,supplied_mon_coords)
    else:
        job_config_from_db = fetch_config(dataset_id)  # existing data set
        if check_job_configs_match(job_config, job_config_from_db):
            logger.debug("Job configs from file / database match OK.")
        else:
            logger.warn("Job config file has changed since dataset was "
                        "first loaded into database. ")
            logger.warn("Using job config settings loaded from database, see "
                        "log dir for details")
        job_config = job_config_from_db
        if supplied_mon_coords:
            logger.warn("Monitor positions supplied will be ignored. "
                        "(Previous dataset specified)")

    dump_configs_to_logdir(log_dir, job_config, pipe_config)

    logger.info("performing persistence step")
    image_cache_params = pipe_config.image_cache
    imgs = [[img] for img in all_images]

    rms_est_sigma = job_config.persistence.rms_est_sigma
    rms_est_fraction = job_config.persistence.rms_est_fraction
    metadatas = runner.map("persistence_node_step", imgs,
                           [image_cache_params, rms_est_sigma, rms_est_fraction])
    metadatas = [m[0] for m in metadatas if m]

    logger.info("Storing images")
    image_ids = store_images(metadatas,
                             job_config.source_extraction.extraction_radius_pix,
                             dataset_id)

    db_images = [Image(id=image_id) for image_id in image_ids]

    logger.info("performing quality check")
    urls = [img.url for img in db_images]
    arguments = [job_config]
    rejecteds = runner.map("quality_reject_check", urls, arguments)

    good_images = []
    for image, rejected in zip(db_images, rejecteds):
        if rejected:
            reason, comment = rejected
            steps.quality.reject_image(image.id, reason, comment)
        else:
            good_images.append(image)

    if not good_images:
        logger.warn("No good images under these quality checking criteria")
        return

    grouped_images = group_per_timestep(good_images)
    timestep_num = len(grouped_images)
    for n, (timestep, images) in enumerate(grouped_images):
        msg = "processing %s images in timestep %s (%s/%s)"
        logger.info(msg % (len(images), timestep, n+1, timestep_num))

        logger.info("performing source extraction")
        urls = [img.url for img in images]
        arguments = [se_parset]

        extraction_results = runner.map("extract_sources", urls, arguments)

        logger.info("storing extracted sources to database")
        # we also set the image max,min RMS values which calculated during
        # source extraction
        for image, results in zip(images, extraction_results):
            image.update(rms_min=results.rms_min, rms_max=results.rms_max,
                detection_thresh=se_parset['detection_threshold'],
                analysis_thresh=se_parset['analysis_threshold'])
            dbgen.insert_extracted_sources(image.id, results.sources, 'blind')

        logger.info("performing database operations")

        for image in images:
            logger.info("performing DB operations for image %s" % image.id)

            logger.info("performing source association")
            dbass.associate_extracted_sources(image.id,
                                              deRuiter_r=deruiter_radius,
                                              new_source_sigma_margin=new_src_sigma)

            all_fit_posns, all_fit_ids = steps_ff.get_forced_fit_requests(image)
            if all_fit_posns:
                successful_fits, successful_ids = steps_ff.perform_forced_fits(
                    all_fit_posns, all_fit_ids, image.url, se_parset)

                steps_ff.insert_and_associate_forced_fits(image.id,successful_fits,
                                                          successful_ids)


        dbgen.update_dataset_process_end_ts(dataset_id)