Exemplo n.º 1
0
def etransfer_send_output(job_id, dry_run=False, force=False):
    """High level e-transfer function for use from scripts.

    This function makes some basic checks and then launches
    the private function _etransfer_send under the control
    of the ErrorDecorator so that any subsequent errors
    are captured.
    """

    logger.debug('Preparing to e-transfer output for job {0}'.format(job_id))

    # When not in dry run mode, check that etransfer is being
    # run on the correct machine by the correct user and with
    # sufficient available disk space.
    if not dry_run:
        etransfer_check_config()
        _etransfer_check_space()

    logger.debug('Connecting to JSA processing database')
    db = get_database()

    if not force:
        job = db.get_job(id_=job_id)

        if job.state != JSAProcState.PROCESSED:
            message = 'Job {0} cannot be e-transferred as it is in ' \
                      'state {1}'.format(job_id,
                                         JSAProcState.get_name(job.state))
            logger.error(message)
            raise CommandError(message)

    _etransfer_send(job_id, dry_run=dry_run, db=db, force=force)

    logger.debug('Done adding output for job {0} to e-transfer'.format(job_id))
Exemplo n.º 2
0
def etransfer_send_output(job_id, dry_run=False, force=False):
    """High level e-transfer function for use from scripts.

    This function makes some basic checks and then launches
    the private function _etransfer_send under the control
    of the ErrorDecorator so that any subsequent errors
    are captured.
    """

    logger.debug('Preparing to e-transfer output for job {0}'.format(job_id))

    # When not in dry run mode, check that etransfer is being
    # run on the correct machine by the correct user and with
    # sufficient available disk space.
    if not dry_run:
        etransfer_check_config()
        _etransfer_check_space()

    logger.debug('Connecting to JSA processing database')
    db = get_database()

    if not force:
        job = db.get_job(id_=job_id)

        if job.state != JSAProcState.PROCESSED:
            message = 'Job {0} cannot be e-transferred as it is in ' \
                      'state {1}'.format(job_id,
                                         JSAProcState.get_name(job.state))
            logger.error(message)
            raise CommandError(message)

    _etransfer_send(job_id, dry_run=dry_run, db=db, force=force)

    logger.debug('Done adding output for job {0} to e-transfer'.format(job_id))
Exemplo n.º 3
0
def fetch_output(job_id=None,
                 location=None,
                 task=None,
                 dry_run=False,
                 force=False):
    """Output fetch function for use from scripts."""

    logger.debug('Connecting to JSA processing database')
    db = get_database()

    # Find next job if not specified.
    if job_id is None:
        force = False

        logger.debug('Looking for a job to fetch output data for')

        jobs = db.find_jobs(state=JSAProcState.INGEST_QUEUE,
                            location=location,
                            task=task,
                            prioritize=True,
                            number=1,
                            sort=True)

        if jobs:
            job_id = jobs[0].id

        else:
            logger.warning('Did not find a job to fetch output data for!')
            return

    _fetch_job_output(job_id, db=db, force=force, dry_run=dry_run)
Exemplo n.º 4
0
def run_job(job_id=None, db=None, force=False, task=None):
    """
    Run the JSA processing of the next job. This will select the highest
    priority job in state 'WAITING' with location 'JAC'.

    Optionally an integer job_id can be given isntead to specify a specific job

    By default it will look in the database determined by the JSA_proc config.
    Optionally a database object can be given for testing purposes.

    Any errors raised will be logged in the 'log' table for the job_id.

    If insufficient disk space is available (as configured by the disk_limit
    section of the configuration file) then this function returns without
    doing anything.
    """

    # Check we have sufficient disk space for running to occur.
    config = get_config()
    output_limit = float(config.get('disk_limit', 'run_min_output_space'))
    scratch_limit = float(config.get('disk_limit', 'run_min_scratch_space'))
    output_space = get_output_dir_space()
    scratch_space = get_scratch_dir_space()

    if output_space < output_limit:
        logger.warning('Insufficient output disk space: %f / %f GiB required',
                       output_space, output_limit)
        return

    if scratch_space < scratch_limit:
        logger.warning('Insufficient scratch disk space: %f / %f GiB required',
                       scratch_space, scratch_limit)
        return

    # Get a link to the database.
    if not db:
        db = get_database()

    # Get next job if a job id is not specified
    if not job_id:
        force = False

        logger.debug('Looking for a job to run')

        jobs = db.find_jobs(state=JSAProcState.WAITING,
                            location='JAC',
                            prioritize=True,
                            number=1,
                            sort=True,
                            task=task)

        if jobs:
            job_id = jobs[0].id

        else:
            logger.warning('Did not find a job to run!')
            return

    run_a_job(job_id, db=db, force=force)
Exemplo n.º 5
0
def ingest_output(
        job_id, location=None, task=None, dry_run=False, force=False):
    """High-level output ingestion function for use from scripts."""

    logger.debug('Connecting to JSA processing database')
    db = get_database()

    if job_id is not None:
        jobs = [db.get_job(id_=job_id)]

    else:
        jobs = db.find_jobs(
            state=JSAProcState.INGESTION,
            location=location,
            task=task, prioritize=True)

    # Get full list of tasks.
    task_info = db.get_task_info()

    for job in jobs:
        job_task_info = task_info.get(job.task)

        command_ingest = None
        description = 'into CAOM-2'

        if ((job_task_info is not None)
                and (job_task_info.command_ingest is not None)):
            command_ingest = job_task_info.command_ingest
            description = 'via custom process'

        if not dry_run:
            try:
                # Change the state from INGESTION to INGESTING, raising an
                # error if the job was not already in that state.
                db.change_state(
                    job.id, JSAProcState.INGESTING,
                    'Job output is being ingested {}'.format(description),
                    state_prev=(None if force else JSAProcState.INGESTION))

            except NoRowsError:
                # This would normally be a "logger.error", but we routinely
                # run multiple copies of this ingestion routine simultaneously
                # and it is therefore expected that a lot of jobs will have
                # already been moved out of the INGESTION state by other
                # processes.  Therefore a warning or error should not be
                # logged as these lead to unnecessary warnings in the cron
                # job monitor.
                logger.debug('Job %i can not be ingested as it is not ready',
                             job.id)

                continue

            _perform_ingestion(job_id=job.id, db=db, command_ingest=command_ingest)

        else:
            logger.info(
                'Skipping ingestion %s of job %i (DRY RUN)',
                description, job.id)
Exemplo n.º 6
0
def reset_jobs(task,
               date_start,
               date_end,
               instrument=None,
               state=None,
               force=False,
               dry_run=False):
    """Change the state of the specified jobs back to "Unknown".

    If a state is specified, select only that state.
    Active jobs are skipped unless the force argument is set.
    """

    db = get_database()

    obsquery = {}

    if date_start is not None and date_end is not None:
        obsquery['utdate'] = Range(date_start, date_end)
    elif date_start is None and date_end is None:
        pass
    else:
        raise CommandError('only one of start and end date specified')

    if instrument is not None:
        obsquery['instrument'] = instrument

    if state is not None:
        state = JSAProcState.lookup_name(state)

    n_active = 0

    for job in db.find_jobs(location='JAC',
                            task=task,
                            obsquery=obsquery,
                            state=state):
        state_info = JSAProcState.get_info(job.state)

        # Check if the job is in an "active" state.
        if state_info.active and not force:
            logger.warning('Skipping active job %i (%s)', job.id,
                           state_info.name)
            n_active += 1
            continue

        logger.info('Resetting status of job %i (was %s)', job.id,
                    state_info.name)

        if not dry_run:
            db.change_state(job.id,
                            JSAProcState.UNKNOWN,
                            'Resetting job',
                            state_prev=job.state)

    if n_active:
        raise CommandError('Could not reset {0} active jobs'.format(n_active))
Exemplo n.º 7
0
def run_job(job_id=None, db=None, force=False, task=None):
    """
    Run the JSA processing of the next job. This will select the highest
    priority job in state 'WAITING' with location 'JAC'.

    Optionally an integer job_id can be given isntead to specify a specific job

    By default it will look in the database determined by the JSA_proc config.
    Optionally a database object can be given for testing purposes.

    Any errors raised will be logged in the 'log' table for the job_id.

    If insufficient disk space is available (as configured by the disk_limit
    section of the configuration file) then this function returns without
    doing anything.
    """

    # Check we have sufficient disk space for running to occur.
    config = get_config()
    output_limit = float(config.get('disk_limit', 'run_min_output_space'))
    scratch_limit = float(config.get('disk_limit', 'run_min_scratch_space'))
    output_space = get_output_dir_space()
    scratch_space = get_scratch_dir_space()

    if output_space < output_limit:
        logger.warning('Insufficient output disk space: %f / %f GiB required',
                       output_space, output_limit)
        return

    if scratch_space < scratch_limit:
        logger.warning('Insufficient scratch disk space: %f / %f GiB required',
                       scratch_space, scratch_limit)
        return

    # Get a link to the database.
    if not db:
        db = get_database()

    # Get next job if a job id is not specified
    if not job_id:
        force = False

        logger.debug('Looking for a job to run')

        jobs = db.find_jobs(state=JSAProcState.WAITING, location='JAC',
                            prioritize=True, number=1, sort=True, task=task)

        if jobs:
            job_id = jobs[0].id

        else:
            logger.warning('Did not find a job to run!')
            return

    run_a_job(job_id, db=db, force=force)
Exemplo n.º 8
0
def fetch(job_id=None, db=None, force=False, replaceparent=False, task=None):
    """
    Assemble the files required to process a job.

    If it is not given a job_id, it will take the next JAC job
    with the highest priority and a state of MISSING.

    Optionally allows a database object to be given for testing purposes.
    Otherwise uses usual database from config file.

    This will raise an error if job is not in MISSING state to start with.
    This will advance the state of the job to WAITING on completion.
    Any error's raised in the process will be logged to the job log.

    """

    # Check we have sufficient disk space for fetching to occur.
    input_space = get_input_dir_space()
    required_space = float(get_config().get('disk_limit', 'fetch_min_space'))

    if input_space < required_space and not force:
        logger.warning('Insufficient disk space: %f / %f GiB required',
                       input_space, required_space)
        return

    # Get the database.
    if not db:
        db = get_database()

    # Get next job if a job_id is not specified.
    if not job_id:
        force = False

        logger.debug('Looking for a job for which to fetch data')

        jobs = db.find_jobs(state=JSAProcState.MISSING,
                            location='JAC',
                            prioritize=True,
                            number=1,
                            sort=True,
                            task=task)

        if jobs:
            job_id = jobs[0].id

        else:
            logger.warning('Did not find a job to fetch!')
            return

    fetch_a_job(job_id, db=db, force=force, replaceparent=replaceparent)
Exemplo n.º 9
0
def _clean_job_directories(dir_function,
                           state,
                           task=None,
                           count=None,
                           clean_function=None,
                           dry_run=False,
                           clean_function_kwargs={}):
    """Generic directory deletion function.

    If a clean_function is given, it should return True when it is
    able to clean a directory and False otherwise.  It will
    be passed the extra clean_function_kwargs keyword arguments.
    """

    db = get_database()
    jobs = db.find_jobs(location='JAC', state=state, task=task)

    n = 0
    for job in jobs:
        directory = dir_function(job.id)

        if not os.path.exists(directory):
            logger.debug('Directory for job %i does not exist', job.id)
            continue

        try:
            if clean_function is None:
                logger.info('Removing directory for job %i: %s', job.id,
                            directory)

                if not dry_run:
                    shutil.rmtree(directory)

                n += 1

            else:
                if clean_function(directory,
                                  job_id=job.id,
                                  db=db,
                                  dry_run=dry_run,
                                  **clean_function_kwargs):
                    n += 1

            if (count is not None) and not (n < count):
                break

        except:
            logger.exception('Error removing directory for job %i: %s', job.id,
                             directory)
Exemplo n.º 10
0
def move_from_cadc(job_ids, dry_run=False):
    """Move a list of jobs from CADC to JAC.

    This sets up a database connection and then passes it to _move_job_from
    to move each job individually.
    """

    db = get_database()

    logger.info('Starting move of jobs from CADC to JAC')

    for job_id in job_ids:
        _move_job_from(job_id, db=db, dry_run=dry_run)

    logger.info('Done moving jobs')
Exemplo n.º 11
0
def reset_jobs(task, date_start, date_end, instrument=None,
               state=None, force=False, dry_run=False):
    """Change the state of the specified jobs back to "Unknown".

    If a state is specified, select only that state.
    Active jobs are skipped unless the force argument is set.
    """

    db = get_database()

    obsquery = {}

    if date_start is not None and date_end is not None:
        obsquery['utdate'] = Range(date_start, date_end)
    elif date_start is None and date_end is None:
        pass
    else:
        raise CommandError('only one of start and end date specified')

    if instrument is not None:
        obsquery['instrument'] = instrument

    if state is not None:
        state = JSAProcState.lookup_name(state)

    n_active = 0

    for job in db.find_jobs(location='JAC', task=task, obsquery=obsquery,
                            state=state):
        state_info = JSAProcState.get_info(job.state)

        # Check if the job is in an "active" state.
        if state_info.active and not force:
            logger.warning('Skipping active job %i (%s)',
                           job.id, state_info.name)
            n_active += 1
            continue

        logger.info('Resetting status of job %i (was %s)',
                    job.id, state_info.name)

        if not dry_run:
            db.change_state(job.id, JSAProcState.UNKNOWN,
                            'Resetting job', state_prev=job.state)

    if n_active:
        raise CommandError(
            'Could not reset {0} active jobs'.format(n_active))
Exemplo n.º 12
0
def submit_one_coadd_job(tile, parenttask, mode, parameters, location,
                         exclude_pointing_jobs=False,
                         science_obs_only=False,
                         never_update=False,
                         dryrun=True, priority=0,
                         output_task=None, pointings_only=False):
    """
    Submit a single coadd job.

    """
    # Generate tag, task name, and filter.
    if not output_task:
        coadd_task = generate_hpx_coadd_task(parenttask)
    else:
        coadd_task = output_task

    tag = generate_hpx_coadd_tag(tile, coadd_task)
    filt = create_hpx_filter(tile, parenttask)

    db = get_database()

    # Check what current parent values should be.
    try:
        parent_jobs = get_parents(tile, parenttask,
                                  exclude_pointing_jobs=exclude_pointing_jobs,
                                  pointings_only=pointings_only,
                                  science_obs_only=science_obs_only)
    except NoRowsError:
        parent_jobs = []

    # Perform upsert operation and return job ID (or None).
    return add_upd_del_job(
        db=db,
        tag=tag,
        location=location,
        mode=mode,
        parameters=parameters,
        task=coadd_task,
        priority=priority,
        parent_jobs=parent_jobs,
        filters=([filt] * len(parent_jobs)),
        tilelist=[tile],
        allow_upd=(not never_update),
        allow_del=(not never_update),
        description='coadd for tile {}'.format(tile),
        dry_run=dryrun)
Exemplo n.º 13
0
def etransfer_query_output(job_id):
    """Investigate the e-transfer status of the output of a job."""

    db = get_database()

    config = get_config()
    transdir = config.get('etransfer', 'transdir')

    files = db.get_output_files(job_id)

    problem_files = []

    print('{0:110} {1:5} {2:12} {3:5}'.format('File', 'ET', 'Directory', 'AD'))

    for file in zip(files,
                    etransfer_file_status(files),
                    check_cadc_files(files)):
        (filename, etransfer_status, ad_status) = file

        if etransfer_status is None:
            (ok, dir) = (None, '')
        else:
            (ok, dir) = etransfer_status

        print('{0:110} {1:5} {2:12} {3:5}'.format(
            filename, repr(ok), dir, repr(ad_status)))

        if ok is False:
            problem_files.append(
                os.path.join(transdir, 'reject', dir, filename))

    if problem_files:
        if yes_or_no_question(
                'Delete rejected files from e-transfer directories?'):
            for file in problem_files:
                logger.debug('Deleting file %s', file)
                os.unlink(file)

            if yes_or_no_question('Re-try e-transfer?'):
                # Clear cache before attempting to e-transfer since we just
                # removed the files from the e-transfer directories.
                _etransfer_clear_cache()

                etransfer_send_output(job_id, dry_run=False, force=True)
Exemplo n.º 14
0
def etransfer_query_output(job_id):
    """Investigate the e-transfer status of the output of a job."""

    db = get_database()

    config = get_config()
    transdir = config.get('etransfer', 'transdir')

    files = db.get_output_files(job_id)

    problem_files = []

    print('{0:110} {1:5} {2:12} {3:5}'.format('File', 'ET', 'Directory', 'AD'))

    for file in zip(files, etransfer_file_status(files),
                    check_cadc_files(files)):
        (filename, etransfer_status, ad_status) = file

        if etransfer_status is None:
            (ok, dir) = (None, '')
        else:
            (ok, dir) = etransfer_status

        print('{0:110} {1:5} {2:12} {3:5}'.format(filename, repr(ok), dir,
                                                  repr(ad_status)))

        if ok is False:
            problem_files.append(
                os.path.join(transdir, 'reject', dir, filename))

    if problem_files:
        if yes_or_no_question(
                'Delete rejected files from e-transfer directories?'):
            for file in problem_files:
                logger.debug('Deleting file %s', file)
                os.unlink(file)

            if yes_or_no_question('Re-try e-transfer?'):
                # Clear cache before attempting to e-transfer since we just
                # removed the files from the e-transfer directories.
                _etransfer_clear_cache()

                etransfer_send_output(job_id, dry_run=False, force=True)
Exemplo n.º 15
0
def _clean_job_directories(dir_function, state, task=None, count=None,
                           clean_function=None, dry_run=False,
                           clean_function_kwargs={}):
    """Generic directory deletion function.

    If a clean_function is given, it should return True when it is
    able to clean a directory and False otherwise.  It will
    be passed the extra clean_function_kwargs keyword arguments.
    """

    db = get_database()
    jobs = db.find_jobs(location='JAC', state=state, task=task)

    n = 0
    for job in jobs:
        directory = dir_function(job.id)

        if not os.path.exists(directory):
            logger.debug('Directory for job %i does not exist', job.id)
            continue

        try:
            if clean_function is None:
                logger.info('Removing directory for job %i: %s',
                            job.id, directory)

                if not dry_run:
                    shutil.rmtree(directory)

                n += 1

            else:
                if clean_function(directory, job_id=job.id, db=db,
                                  dry_run=dry_run,
                                  **clean_function_kwargs):
                    n += 1

            if (count is not None) and not (n < count):
                break

        except:
            logger.exception('Error removing directory for job %i: %s',
                             job.id, directory)
Exemplo n.º 16
0
    def __call__(self, job_id, *args, **kwargs):
        try:
            return self.function(job_id, *args, **kwargs)
        except Exception as theexception:
            logger.exception('Error caught running function %s',
                             self.function.__name__)

            if kwargs.get('dry_run', False):
                logger.info('Skipping inserting error into database (DRY RUN)')

            else:
                if 'db' in kwargs and kwargs['db'] is not None:
                    db = kwargs['db']
                else:
                    db = get_database()

                db.change_state(job_id, JSAProcState.ERROR,
                                ' '.join([str(i) for i in theexception.args]))

            raise
Exemplo n.º 17
0
def namecheck_output(task, outfile):
    db = get_database()

    # Look for post-run or error states (as the error may be from namecheck!).
    states = JSAProcState.STATE_POST_RUN.copy()
    states.add(JSAProcState.ERROR)

    with open(outfile, 'w') as f:
        for job in db.find_jobs(task=task, location='JAC', state=states):
            job_id = job.id
            logger.info('Considering job %i', job_id)

            try:
                for file in db.get_output_files(job_id):
                    if check_file_name(file):
                        logger.debug('Job %i file %s OK', job_id, file)
                    else:
                        logger.warning('Job %i file %s FAILURE', job_id, file)
                        print(file, file=f)

            except NoRowsError:
                # Ignore jobs for which we have no output files.
                pass
Exemplo n.º 18
0
def fetch_a_job(job_id, db=None, force=False, replaceparent=False):
    """
    Assemble the files required to process a job.

    Requires an integer job_id.

    Optionally allows a db to be given, for testing purposes. Otherwise
    uses usual database from config file.

    Option 'replace' will force it to overwrite parent data already in the
    input directory.

    This will raise an error if job is not in MISSING state to start with.
    This will advance the state of the job to WAITING on completion.

    """

    if not db:
        # Get link to database
        db = get_database()

    logger.info('About to fetch data for job %i', job_id)

    try:
        # Change status of job to 'Fetching', raise error if not in MISSING
        db.change_state(job_id,
                        JSAProcState.FETCHING,
                        'Data is being assembled',
                        state_prev=(None if force else JSAProcState.MISSING))

    except NoRowsError:
        # If the job was not in the MISSING state, it is likely that another
        # process is also trying to fetch it.  Trap the error so that the
        # ErrorDecorator does not put the job into the ERROR state as that
        # will cause the other process to fail to set the job to WAITING.
        logger.error('Job %i cannot be fetched because it is not missing',
                     job_id)
        return

    # Assemble any files listed in the input files tree
    try:
        input_files = db.get_input_files(job_id)
        input_files_with_paths = assemble_input_data_for_job(
            job_id, input_files)
    except NoRowsError:
        input_files_with_paths = []

    # Assemble any files from the parent jobs
    try:
        parents = db.get_parents(job_id, with_state=True)
        parent_files_with_paths = []
        for p, f, parent_state in parents:
            if parent_state not in JSAProcState.STATE_POST_RUN:
                logger.error(
                    'Job %i cannot be fetch because its parent %i is not ready',
                    job_id, p)
                raise ParentNotReadyError(
                    'Parent job {} is not ready'.format(p))

            outputs = db.get_output_files(p)
            parent_files = filter_file_list(outputs, f)
            parent_files_with_paths += assemble_parent_data_for_job(
                job_id, p, parent_files, force_new=replaceparent)
    except NoRowsError:
        parent_files_with_paths = []

    # Write out list of all input files with full path list
    files_list = input_files_with_paths + parent_files_with_paths
    list_name_path = write_input_list(job_id, files_list)

    # Advance the state of the job to 'Waiting'.
    db.change_state(
        job_id,
        JSAProcState.WAITING,
        'Data has been assembled for job and job can now be executed',
        state_prev=JSAProcState.FETCHING)

    logger.info('Done fetching data for job %i', job_id)

    return job_id
Exemplo n.º 19
0
def get_parents(tile, parenttask, exclude_pointing_jobs=False,
                science_obs_only=False, pointings_only=False):
    """
    get parent jobs for the requested tile and coaddtask,
    using the parettask to look for jobs.
    required parameters:

    Raises a  JSAProcError if there are no parent jobs that fit.
    tile (int)
    Tile number to perform coadd on.

    parenttask (string)
    input task name to look for jobs for.

    """
    # Find all jobs from the parent task which include the requested tile and
    # 1) Have a JSAQA State that is not BAD or INVALID
    # 2) Have not been marked as deleted.
    logger.debug(
        'Finding all jobs in task %s that fall on tile %i',
        parenttask, tile)

    db = get_database()
    qa_state = [JSAQAState.GOOD,
                JSAQAState.QUESTIONABLE,
                JSAQAState.UNKNOWN]

    obsquery = {'omp_status': Not(list(OMPState.STATE_NO_COADD))}
    if science_obs_only:
        obsquery['obstype'] = {'science'}
    if pointings_only:
        obsquery['obstype'] = {'pointing'}
    # Get the parent jobs.
    parentjobs = db.find_jobs(tiles=[tile],
                              task=parenttask,
                              qa_state=qa_state,
                              state=Not([JSAProcState.DELETED]),
                              obsquery=obsquery)

    parentjobs = [p.id for p in parentjobs]

    # Do some other queries to give the user info about what is not being
    # included.
    excludedjobs_ompstatus = db.find_jobs(
        tiles=[tile],
        task=parenttask,
        qa_state=qa_state,
        state=Not([JSAProcState.DELETED]),
        obsquery={'omp_status': OMPState.STATE_NO_COADD}
    )

    if science_obs_only or exclude_pointing_jobs:
        obsquery = {
            'obstype': 'pointing',
            'omp_status': Not(list(OMPState.STATE_NO_COADD)),
        }
        state = Not([JSAProcState.DELETED])
        excludedjobs_pointings = db.find_jobs(tiles=[tile],
                                              task=parenttask,
                                              qa_state=qa_state,
                                              state=state,
                                              obsquery=obsquery)

        # If it was requested to exclude entirely any job containing a
        # pointing:
        if exclude_pointing_jobs and len(excludedjobs_pointings) > 0:
            logger.debug('Tile %i contains pointing obs.', tile)
            raise JSAProcError('Pointings fall on this tile.')

    # Log information about which tasks where excluded.
    # TODO: check what logger level is being used before going through for
    # loops.
    logger.debug(
        '%i jobs in task %s fall on tile %i with appropriate QA States'
        ', OMP States and obstype states', len(parentjobs), parenttask, tile)

    if len(excludedjobs_ompstatus) > 0:
        logger.debug(
            '%i jobs were excluded due to wrong OMP status',
            len(excludedjobs_ompstatus))
        for i in excludedjobs_ompstatus:
            omp_status = db.get_obs_info(i.id)[0].omp_status
            logger.debug(
                'Job %i NOT INCLUDED (omp status of %s)',
                i.id, OMPState.get_name(omp_status))

    if science_obs_only:
        if len(excludedjobs_pointings) > 0:
            logger.debug(
                '%i additional jobs were excluded as pointings',
                len(excludedjobs_pointings))
            for i in excludedjobs_pointings:
                logger.debug('Job %i NOT INCLUDED (pointing)', i.id)

    if len(parentjobs) == 0:
        logger.debug('Tile %i has no acceptable parent jobs', tile)

        raise JSAProcError('No acceptable observations.')

    # Return the parent jobs
    return parentjobs
Exemplo n.º 20
0
def etransfer_poll_output(dry_run):
    """High level polling function to use from scripts."""

    logger.debug('Preparing to poll the e-transfer system for job output')

    # When not in dry run mode, check that etransfer is being
    # run on the correct machine by the correct user.
    if not dry_run:
        etransfer_check_config(any_user=True)

    logger.debug('Connecting to JSA processing database')
    db = get_database()

    logger.debug('Retrieving task information from database')
    task_info = db.get_task_info()

    n_err = 0

    for job in db.find_jobs(location='JAC', state=JSAProcState.TRANSFERRING):
        # Retrieve this job's task information.
        job_task_info = task_info.get(job.task)

        if ((job_task_info is None)
                or (job_task_info.command_xfer is not None)
                or (not job_task_info.etransfer)):
            # Job not from an e-transfer task: skip it.
            continue

        job_id = job.id
        logger.debug('Checking state of job %i', job_id)

        logger.debug('Retrieving list of output files')
        try:
            file_info = db.get_output_files(job_id, with_info=True)
            files = [x.filename for x in file_info]

        except NoRowsError:
            logger.error('Did not find output files for job %i', job_id)
            n_err += 1
            continue

        try:
            logger.debug('Checking if files are in the e-transfer directories')
            etransfer_status = etransfer_file_status(files)
            if any(etransfer_status):
                rejection = []
                for (file, status) in zip(files, etransfer_status):
                    if status is not None:
                        (ok, dir) = status
                        if not ok:
                            logger.error('File {0} was rejected, reason: {1}'.
                                         format(file, dir))
                            rejection.append('{0} ({1})'.format(file, dir))

                if rejection:
                    raise ETransferError('files rejected: {0}'.format(
                                         ', '.join(rejection)))

                # Otherwise we found files in the "in progress" directories
                # so proceed to the next job.
                continue

            logger.debug('Checking if all files are at CADC')
            lost = []
            for info in file_info:
                cadc_file_info = fetch_cadc_file_info(info.filename)

                if cadc_file_info is None:
                    logger.error('Job %i file %s gone from e-transfer '
                                 'but not at CADC', job_id, info.filename)
                    lost.append(info.filename)

                if cadc_file_info['content-md5'] != info.md5:
                    logger.error('Job %i file %s gone from e-transfer '
                                 'but MD5 sum does not match',
                                 job_id, info.filename)
                    lost.append(info.filename)

            if lost:
                raise ETransferError('files lost or corrupt: {0}'.format(
                                     ', '.join(lost)))
            else:
                # All files present and with correct MD5 sums.
                logger.info('Job %i appears to have all files at CADC',
                            job_id)
                if not dry_run:
                    db.change_state(job_id, JSAProcState.INGESTION,
                                    'Output files finished e-transfer',
                                    state_prev=JSAProcState.TRANSFERRING)

        except ETransferError as e:
            logger.error('Job %i failed e-transfer: %s', job_id, e.message)
            if not dry_run:
                db.change_state(
                    job_id, JSAProcState.ERROR,
                    'Job failed e-transfer: {0}'.format(e.message),
                    state_prev=JSAProcState.TRANSFERRING)

    logger.debug('Done polling the e-transfer system')

    if n_err:
        raise CommandError('Errors were encountered polling e-transfer')
Exemplo n.º 21
0
import logging
logger = logging.getLogger(__name__)
import numpy as np
from jsa_proc.config import get_database
from jsa_proc.state import JSAProcState
from jsa_proc.qastate import JSAQAState
from astropy.table import Table

db = get_database()

# Find all completed jobs
allcompleted = db.find_jobs(task=['hpx-s2-850-r2', 'hpx-s2-850-r1'],
                            state=JSAProcState.COMPLETE)

# Get observation info about them all
obsinfo = []
for i in allcompleted:
    obs = db.get_obs_info(i.id)
    if len(obs) > 1:
        logger.warning('job {}:Multiple observations found!'.format(i.id))
        obsinfo.append(obs)
    else:
        obsinfo.append(obs[0])

tiles = []
for i in allcompleted:
    tile = db.get_tilelist(i.id)
    tiles.append(list(tile))

tiles = np.array(tiles)
Exemplo n.º 22
0
def search_log_files(
        pattern, filename_pattern, task,
        project=None, state=None, after_context=None):
    db = get_database()

    re_pattern = re.compile(pattern)
    re_filename = re.compile(filename_pattern)

    if state is None:
        state = JSAProcState.COMPLETE
    else:
        state = JSAProcState.lookup_name(state)

    if after_context is None:
        after_context = 0

    search_kwargs = {
        'task': task,
        'state': state,
    }

    if project is not None:
        search_kwargs['obsquery'] = {'project': project}

    jobs = [x.id for x in db.find_jobs(**search_kwargs)]

    for job_id in jobs:
        logger.debug('Checking log files for job %i', job_id)

        log_dir = get_log_dir(job_id)

        # Find the latest matching log by iterating through them in reverse
        # order and "breaking" after the first match.
        for filename in sorted(os.listdir(log_dir), reverse=True):
            if not re_filename.search(filename):
                continue

            logger.debug('Found log file for job %i: %s', job_id, filename)

            matched = 0
            matched_lines = []

            pathname = os.path.join(log_dir, filename)
            with open(pathname, 'r') as f:
                for line in f:
                    if matched or re_pattern.search(line):
                        matched += 1
                        matched_lines.append(line.rstrip())

                    if matched > after_context:
                        break

            if matched:
                logger.info(
                    'Found match for job %i: %s', job_id, matched_lines[0])

                for matched_line in matched_lines[1:]:
                    logger.info(
                        '...    continuation %i: %s', job_id, matched_line)

            break
Exemplo n.º 23
0
def create_web_app():
    """Function to prepare the Flask web application."""

    home = get_home()
    db = get_database()
    database_name = get_config().get('database', 'database')

    app = Flask(
        'jsa_proc',
        static_folder=os.path.join(home, 'web', 'static'),
        template_folder=os.path.join(home, 'web', 'templates'),
    )

    app.secret_key = get_config().get('web', 'key')

    # Web authorization -- mostly take from flask docs snippets 8
    # http://flask.pocoo.org/snippets/8
    def check_auth(password):
        """
        Check that the staff pasword has been used.

        (Note that we don't care what the username is).
        """
        return check_staff_password(password)

    def authenticate():
        """
        Send a 401 response so that we can log in.
        """

        return Response(
            render_template('logout.html', redirect=request.referrer), 401,
            {'WWW-Authenticate': loginstring})

    def requires_auth(f):
        """
        A decorator to wrap functions that require authorization.
        """
        @wraps(f)
        def decorated(*args, **kwargs):
            auth = request.authorization
            if not auth or not check_auth(auth.password):
                return authenticate()
            return f(*args, **kwargs)

        return decorated

    # Route Handlers.

    @app.route('/')
    def home_page():
        raise HTTPRedirect(url_for('task_summary'))

    @app.route('/job/')
    @templated('job_list.html')
    def job_list():
        # Prepare query arguments list: special parameters first.
        kwargs = {
            'state': request.args.getlist('state'),
            'mode': request.args.get('mode', 'JSAProc'),
            'tiles': request.args.get('tiles')
        }

        # Now add regular string parameters, including those from
        # jcmtobsinfo.
        params = [
            'location',
            'task',
            'date_min',
            'date_max',
            'qa_state',
            'sourcename',
            'obsnum',
            'project',
            'number',
            'tau_min',
            'tau_max',
        ]

        params.extend(ObsQueryDict.keys())

        for key in params:
            kwargs[key] = request.args.get(key, None)

        # Process empty strings used as null form parameters.
        for argname in kwargs:
            if kwargs[argname] == '':
                kwargs[argname] = None

        # Store the query in the session.
        session['job_query'] = kwargs

        # Finally prepare the template context.
        return prepare_job_list(db,
                                page=request.args.get('page', None),
                                **kwargs)

    @app.route('/image/<task>/piechart')
    def summary_piechart(task='None'):
        if task == 'None':
            task = None
        obsquerydict = {}
        for key in ObsQueryDict.keys():
            obsquerydict[key] = request.args.get(key, None)
        date_min = request.args.get('date_min', None)
        date_max = request.args.get('date_max', None)
        return prepare_summary_piechart(db,
                                        task=task,
                                        obsquerydict=obsquerydict,
                                        date_min=date_min,
                                        date_max=date_max)

    @app.route('/summary/')
    @templated('task_summary.html')
    def task_summary():
        return prepare_task_summary(db)

    @app.route('/qa')
    @templated('task_qa_summary.html')
    def task_qa_summary():
        return prepare_task_qa_summary(db)

    @app.route('/job_summary/')
    @templated('job_summary.html')
    def job_summary():
        task = request.args.get('task', None)
        date_min = request.args.get('date_min', None)
        date_max = request.args.get('date_max', None)
        return prepare_job_summary(db,
                                   task=task,
                                   date_min=date_min,
                                   date_max=date_max)

    @app.route('/error_summary/')
    @templated('error_summary.html')
    def error_summary():
        return prepare_error_summary(
            db,
            redirect_url=request.full_path,
            filtering=request.args.get('filtering', None),
            chosentask=request.args.get('chosentask', None),
            extrafilter=request.args.get('extrafilter', None),
            state_prev=request.args.get('state_prev', None),
            error_state=request.args.get('error_state', None),
            filter_done=('submit_filter' in request.args),
        )

    @app.route('/job/<int:job_id>', methods=['GET'])
    @templated('job_info.html')
    def job_info(job_id):
        return prepare_job_info(db, job_id, session.get('job_query'))

    @app.route('/job/<int:job_id>/qa', methods=['GET'])
    @templated('job_qa.html')
    def job_qa(job_id):
        return prepare_job_qa_info(db, job_id, session.get('job_query'))

    @app.route('/job/<int:job_id>/add_note', methods=['POST'])
    @requires_auth
    def job_add_note(job_id):
        message = request.form['message']
        username = request.authorization['username']

        try:
            # Add the note.
            prepare_add_note(db, job_id, message, username)

            # Redirect back to the job info page.
            flash('The note has been saved.')
            raise HTTPRedirect(url_for('job_info', job_id=job_id))

        except ErrorPage as err:
            return error_page_response(err)

    @app.route('/job_change_state', methods=['POST'])
    @requires_auth
    def job_change_state():

        # Get the variables from POST
        newstate = request.form['newstate']
        state_prev = request.form['state_prev']
        message = request.form['message']
        job_ids = request.form.getlist('job_id')
        url = request.form['url']
        username = request.authorization['username']

        try:
            # Change the state.
            prepare_change_state(db, job_ids, newstate, state_prev, message,
                                 username)

            # Redirect the page to correct info.
            flash('The status has been changed to %s.' %
                  JSAProcState.get_name(newstate))
            raise HTTPRedirect(url)

        except ErrorPage as err:
            return error_page_response(err)

    @app.route('/job_change_qa', methods=['POST'])
    @requires_auth
    def job_change_qa():

        # Get the variables from POST
        if "action_next" in request.form:
            url = request.form['url-next']
        else:
            url = request.form['url']
        qa_state = request.form['qa_state']
        message = request.form['message']
        job_ids = request.form.getlist('job_id')
        username = request.authorization['username']

        try:
            # Change the state.
            prepare_change_qa(
                db,
                job_ids,
                qa_state,
                message,
                username,
            )
            # Redirect the page to correct info.
            flash('The QA status of job %s has been changed to %s.' %
                  (str(' '.join(job_ids)), JSAQAState.get_name(qa_state)))
            raise HTTPRedirect(url)

        except ErrorPage as err:
            return error_page_response(err)

    # QA Nightly Summary pages
    @app.route('/qa-nightly')
    @templated('task_qa_summary_nightly.html')
    def qa_night_page():
        """
        By default, show the previous week.

        Note that prepare_task_qa_summary interprets dates as
        inclusive, so use 6 days for the time delta to get a week

        """
        date_min = request.args.get('date_min', None)
        if date_min is None or date_min == '':
            date_min = (datetime.date.today() -
                        datetime.timedelta(days=6)).strftime('%Y-%m-%d')

        date_max = request.args.get('date_max', None)
        if date_max is None or date_max == '':
            date_max = datetime.date.today().strftime('%Y-%m-%d')
        return prepare_task_qa_summary(db,
                                       date_min=date_min,
                                       date_max=date_max,
                                       task='jcmt-nightly',
                                       byDate=True)

    @app.route('/login')
    @requires_auth
    def login():
        raise HTTPRedirect(request.referrer)

    @app.route('/logout')
    def logout():
        return authenticate()

    # Image handling.
    @app.route('/job/<int:job_id>/preview/<preview>')
    def job_preview(job_id, preview):
        path = prepare_job_preview(job_id, preview)
        return send_file(path, mimetype='image/png')

    @app.route('/job/<int:job_id>/pdf/<preview>')
    def job_preview_pdf(job_id, preview):
        path = prepare_job_preview(job_id, preview, 'pdf')
        return send_file(path, mimetype='application/pdf')

    @app.route('/job/<int:job_id>/text/<text_file>')
    def job_text_file(job_id, text_file):
        path = prepare_job_preview(job_id, text_file, 'txt')
        return send_file(path, mimetype='text/plain')

    @app.route('/job/<int:job_id>/log/<log>')
    def job_log_html(job_id, log):
        path = prepare_job_log(job_id, log)
        return send_file(path, mimetype='text/html')

    @app.route('/job/<int:job_id>/log_text/<log>')
    def job_log_text(job_id, log):
        path = prepare_job_log(job_id, log)
        return send_file(path, mimetype='text/plain')

    @app.route('/fop_summary', methods=['GET'])
    @templated('fop_summary.html')
    def fop_summary():
        userid = request.args.get('userid', None)
        semester = request.args.get('semester', None)
        projdict = {}
        if userid and semester:
            ompdb = get_omp_database(write_access=None)
            projects = ompdb.get_support_projects(str(userid), str(semester))
            for p in projects:
                jobs = db.find_jobs(obsquery={'project': str(p)},
                                    task='jcmt-nightly')
                projdict[str(p)] = [
                    len(jobs),
                    sum(1 for j in jobs if j.state == JSAProcState.ERROR),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.BAD),
                    sum(1 for j in jobs
                        if j.qa_state == JSAQAState.QUESTIONABLE),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.UNKNOWN),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.GOOD)
                ]
        else:
            projects = None

        return {'userid': userid, 'semester': semester, 'projects': projdict}

    # Filters and Tests.

    @app.route('/fop_summary_getres', methods=['POST'])
    def fop_summary_getres():
        userid = request.form['userid']
        semester = request.form['semester']
        raise HTTPRedirect(
            url_for('fop_summary', userid=userid, semester=semester))

    @app.template_filter('state_name')
    def state_name_filter(state):
        return JSAProcState.get_name(state)

    @app.template_test('state_active')
    def state_active_test(state):
        return JSAProcState.get_info(state).active

    @app.template_filter('state_phase')
    def state_phase_filter(state):
        phase = JSAProcState.get_info(state).phase
        if phase == JSAProcState.PHASE_QUEUE:
            return 'queue'
        elif phase == JSAProcState.PHASE_FETCH:
            return 'fetch'
        elif phase == JSAProcState.PHASE_RUN:
            return 'run'
        elif phase == JSAProcState.PHASE_COMPLETE:
            return 'complete'
        elif phase == JSAProcState.PHASE_ERROR:
            return 'error'
        raise HTTPError('Unknown phase {0}'.format(phase))

    @app.template_filter('qa_state_name')
    def qa_state_name(qa_state):
        if qa_state.lower() != 'total':
            name = JSAQAState.get_name(qa_state)
        else:
            name = 'Total'
        return name

    @app.template_filter('omp_state_name')
    def omp_state_name_filter(ompstate):
        return OMPState.get_name(ompstate)

    @app.template_filter('uniq')
    def uniq_filter(xs):
        return set(xs)

    @app.template_filter('datetimeformat')
    def datetimeformat(value, format='%Y-%m-%d<br>%H:%M'):
        return value.strftime(format)

    @app.template_filter('replace0')
    def replace_zero(value):
        if value == 0:
            return '-'
        else:
            return value

    @app.context_processor
    def add_to_context():
        return {
            'url_for_omp': url_for_omp,
            'url_for_omp_comment': url_for_omp_comment,
            'database_name': database_name,
        }

    # Return the Application.
    return app
Exemplo n.º 24
0
def run_a_job(job_id, db=None, force=False):
    """
    Run the JSA processing of the given job_id (integer).

    By default it will look in the database determined by the JSA_proc
    config. Optionally a database object can be given for testing
    purposes.

    """

    if not db:
        # Get link to database
        db = get_database()

    logger.info('About to run job %i', job_id)

    try:
        # Change status of job to Running, raise an error if not currently in
        # WAITING state.
        db.change_state(job_id,
                        JSAProcState.RUNNING,
                        'Job is about to be run on host {0}'.format(
                            gethostname().partition('.')[0]),
                        state_prev=(None if force else JSAProcState.WAITING))

    except NoRowsError:
        # If the job was not in the WAITING state, it is likely that another
        # process is also trying to run it.  Trap the error so that the
        # ErrorDecorator does not put the job into the ERROR state as that
        # will cause the other process to fail to set the job to PROCESSED.
        logger.error('Job %i cannot be run because it is not waiting', job_id)
        return

    # Input file_list -- this should be better? or in jsawrapdr?

    input_dir = get_input_dir(job_id)
    input_file_list_path = os.path.join(input_dir, input_list_name)
    if not os.path.exists(input_file_list_path):
        raise JSAProcError('Input file list %s not found for job_id %i' %
                           (input_file_list_path, job_id))

    # Check every file on input_file list exists.
    inputfl = open(input_file_list_path, 'r')

    for input_file in inputfl:
        input_file = input_file.strip()
        if not os.path.isfile(input_file):

            # If a file is missing, get log.
            logstring = 'Input file %s for job %i has gone missing' % (
                input_file, job_id)
            logger.error(logstring)
            logs = db.get_logs(job_id)
            states = [i.state_new for i in logs]

            # If it has only been in the state MISSING twice before, then try
            # again.
            if states.count(JSAProcState.MISSING) <= 2:
                logstring += ': moving to missing.'
                logger.warning(
                    'Moving job %i to state MISSING due to '
                    'missing file(s) %s', job_id, input_file)
                db.change_state(job_id,
                                JSAProcState.MISSING,
                                logstring,
                                state_prev=JSAProcState.RUNNING)
                return job_id

            else:
                # If it has been in the missing STATE more than two times,
                # give up and move it into ERROR state to be fixed manually.
                logstring += ': moving to error.'
                logger.info(
                    'Moving job %s to state ERROR due to missing'
                    ' file(s).', job_id)
                inputfl.close()
                raise JSAProcError(
                    'Input file %s for job %i has gone missing.' %
                    (input_file, job_id))

    inputfl.close()
    logger.debug('All input files found for job %s.', job_id)

    # Get the mode and drparameters of the job.
    job = db.get_job(id_=job_id)
    mode = job.mode
    drparameters = job.parameters

    # Get the starlink to be used from the task table.
    starpath = None
    version = None
    command_run = None
    raw_output = None
    log_ingest_command = None
    try:
        task_info = db.get_task_info(job.task)
        starpath = task_info.starlink_dir
        version = task_info.version
        command_run = task_info.command_run
        raw_output = task_info.raw_output
        log_ingest_command = task_info.log_ingest
    except NoRowsError:
        # If the task doesn't have task info, leave "starpath" as None
        # so that jsawrapdr_run uses the default value from the configuration
        # file.
        pass

    # Run the processing job.
    logger.debug('Launching jsawrapdr: mode=%s, parameters=%s', mode,
                 drparameters)

    # First of all remove the output files and log_files from the database.
    db.set_log_files(job_id, [])
    db.set_output_files(job_id, [])

    log = jsawrapdr_run(job_id,
                        input_file_list_path,
                        mode,
                        drparameters,
                        cleanup='cadc',
                        location='JAC',
                        starlink_dir=starpath,
                        persist=True,
                        version=version,
                        command_run=command_run,
                        raw_output=raw_output)

    # Create list of output files.
    logger.debug('Preparing list of output files')
    output_files = get_output_files(job_id)

    # write output files to table
    logger.debug('Storing list of output files')
    db.set_output_files(job_id, output_files)

    # Create list of output log files.
    logger.debug('Preparing list of output log files (log.*)')
    log_files = get_output_log_files(job_id)

    # Write output log files to table.
    logger.debug('Storing list of output log files')
    db.set_log_files(job_id, log_files)

    # If a log ingest command is set, run it here.
    if log_ingest_command:
        logger.debug('Will try and ingest log files')
        try:
            with open_log_file(job.id, 'ingest_log') as logingest_log:
                subprocess.check_call(
                    [log_ingest_command, str(job_id)],
                    shell=False,
                    cwd='/tmp',
                    stdout=logingest_log,
                    stderr=subprocess.STDOUT,
                    preexec_fn=restore_signals)
        except subprocess.CalledProcessError as e:
            logger.exception('Custom log ingest failed ' 'for job %i', job.id)
            db.change_state(job.id,
                            JSAProcState.ERROR,
                            'Custom log ingestion failed',
                            state_prev=JSAProcState.RUNNING)

    # If task begins with hpx, get tiles from list of output_files
    # and write to tile table in db.
    if hpx_task.search(job.task):
        logger.debug('Storing list of output tiles for HPX job ' + str(job_id))
        tiles = hpx_tiles_from_filenames([x.filename for x in output_files])
        db.set_tilelist(job_id, tiles)
        logger.debug('Job ' + str(job_id) + ' produced output on tiles ' +
                     ', '.join(str(i) for i in tiles))

    # Change state of job.
    db.change_state(job_id,
                    JSAProcState.PROCESSED,
                    'Job has been successfully processed',
                    state_prev=JSAProcState.RUNNING)

    logger.info('Done running job %i', job_id)

    return job_id
Exemplo n.º 25
0
def search_log_files(pattern,
                     filename_pattern,
                     task,
                     project=None,
                     state=None,
                     after_context=None):
    db = get_database()

    re_pattern = re.compile(pattern)
    re_filename = re.compile(filename_pattern)

    if state is None:
        state = JSAProcState.COMPLETE
    else:
        state = JSAProcState.lookup_name(state)

    if after_context is None:
        after_context = 0

    search_kwargs = {
        'task': task,
        'state': state,
    }

    if project is not None:
        search_kwargs['obsquery'] = {'project': project}

    jobs = [x.id for x in db.find_jobs(**search_kwargs)]

    for job_id in jobs:
        logger.debug('Checking log files for job %i', job_id)

        log_dir = get_log_dir(job_id)

        # Find the latest matching log by iterating through them in reverse
        # order and "breaking" after the first match.
        for filename in sorted(os.listdir(log_dir), reverse=True):
            if not re_filename.search(filename):
                continue

            logger.debug('Found log file for job %i: %s', job_id, filename)

            matched = 0
            matched_lines = []

            pathname = os.path.join(log_dir, filename)
            with open(pathname, 'r') as f:
                for line in f:
                    if matched or re_pattern.search(line):
                        matched += 1
                        matched_lines.append(line.rstrip())

                    if matched > after_context:
                        break

            if matched:
                logger.info('Found match for job %i: %s', job_id,
                            matched_lines[0])

                for matched_line in matched_lines[1:]:
                    logger.info('...    continuation %i: %s', job_id,
                                matched_line)

            break
Exemplo n.º 26
0
def investigate_unauthorized_errors(location, check_at_cadc=True):
    logger.debug('Starting to investigate unauthorized errors')

    logger.debug('Connecting to JSA processing database')
    db = get_database()

    logger.debug('Connecting to OMP/JCMT database')
    ompdb = get_omp_database()

    logger.debug('Preparing CADC TAP object')
    caom2 = CADCTap()

    logger.debug('Fetching list of jobs in the error state')
    job_logs = db.find_errors_logs(location=location)

    logger.debug('Filtering for jobs with unauthorized errors')
    filter = JSAProcErrorFilter('unauthorized')
    filter(job_logs)

    now = datetime.now(UTC)
    category = {'unknown': []}
    job_info = {}

    for job_id in job_logs.keys():
        logger.debug('Checking job %i', job_id)

        # Python doesn't let us break inner loops, so use exceptions to
        # signal when the cause of the problem is identified.
        try:
            # Find the observation IDs and use it to determine whether the
            # job uses any observations which are not yet public.

            logger.debug('Fetching observation info')
            obs_info = db.get_obs_info(job_id)
            job_info[job_id] = {'obs': obs_info}

            if not obs_info:
                logger.warning('No observation info available for this job')
                continue

            obsids = set(latin_1_encode(x.obsid)[0] for x in obs_info)

            for obsid in obsids:
                logger.debug('Fetching COMMON info for %s', obsid)

                common = ompdb.get_obsid_common(obsid)
                if common is None:
                    raise IdentifiedProblem('omp', 'obsid not in common table')
                release_date = common.release_date

                # Keep the last release date inspected in the info dictionary
                # so that if it's the one that causes a problem, we see
                # it in the output.
                job_info[job_id]['release'] = release_date

                if release_date > now:
                    raise IdentifiedProblem(
                        'release',
                        'future release date ' +
                        release_date.strftime('%Y-%m-%d'))

                logger.debug('Fetching OMP obslog status for %s', obsid)
                status = ompdb.get_obsid_status(obsid)

                if status is not None:
                    logger.debug('Got obslog status: %i', status)

                    if status == OMPState.JUNK:
                        raise IdentifiedProblem('junk', 'observation is junk')

            # Check whether all of the files are at CADC.
            if check_at_cadc:
                logger.debug('Retrieving input file list')
                files = db.get_input_files(job_id)

                logger.debug('Checking for files at CADC')
                found = check_cadc_files(files)

                if not all(found):
                    raise IdentifiedProblem(
                        'missing',
                        'file {0} missing at CADC'.format(
                            files[found.index(False)]))

            # Check whether all the observations are in CAOM-2.
            if True:
                logger.debug('Checking for observatons in CAOM-2')
                obsid_list = list(obsids)
                found = caom2.check_obsids(obsid_list)

                if not all(found):
                    raise IdentifiedProblem(
                        'caom2',
                        'observation {0} missing from CAOM-2'.format(
                            obsid_list[found.index(False)]))

        except IdentifiedProblem as problem:
            logger.info('Job {0}: {1}'.format(job_id, problem.message))
            if problem.category in category:
                category[problem.category].append(job_id)
            else:
                category[problem.category] = [job_id]

        else:
            logger.info('Job {0}: problem unknown'.format(job_id))
            category['unknown'].append(job_id)

    # Now go through the categories and output information about them.
    for (cat, jobs) in category.items():
        if jobs:
            print('Category {0}: {1} job(s)'.format(cat, len(jobs)))

            if yes_or_no_question('Show detail?', False):
                for job in jobs:
                    info = job_info[job]
                    print(job,
                          info['obs'][0].instrument,
                          info['obs'][0].utdate,
                          info['obs'][0].obsnum,
                          info['obs'][0].project,
                          info['obs'][0].obstype,
                          info['obs'][0].scanmode,
                          info['release'])

                if yes_or_no_question('Resubmit jobs?', False):
                    for job in jobs:
                        db.change_state(
                            job, JSAProcState.QUEUED,
                            'Resubmitting job after unauthorized error',
                            state_prev=JSAProcState.ERROR)
Exemplo n.º 27
0
def etransfer_poll_output(dry_run):
    """High level polling function to use from scripts."""

    logger.debug('Preparing to poll the e-transfer system for job output')

    # When not in dry run mode, check that etransfer is being
    # run on the correct machine by the correct user.
    if not dry_run:
        etransfer_check_config(any_user=True)

    logger.debug('Connecting to JSA processing database')
    db = get_database()

    logger.debug('Retrieving task information from database')
    task_info = db.get_task_info()

    n_err = 0

    for job in db.find_jobs(location='JAC', state=JSAProcState.TRANSFERRING):
        # Retrieve this job's task information.
        job_task_info = task_info.get(job.task)

        if ((job_task_info is None) or (job_task_info.command_xfer is not None)
                or (not job_task_info.etransfer)):
            # Job not from an e-transfer task: skip it.
            continue

        job_id = job.id
        logger.debug('Checking state of job %i', job_id)

        logger.debug('Retrieving list of output files')
        try:
            file_info = db.get_output_files(job_id, with_info=True)
            files = [x.filename for x in file_info]

        except NoRowsError:
            logger.error('Did not find output files for job %i', job_id)
            n_err += 1
            continue

        try:
            logger.debug('Checking if files are in the e-transfer directories')
            etransfer_status = etransfer_file_status(files)
            if any(etransfer_status):
                rejection = []
                for (file, status) in zip(files, etransfer_status):
                    if status is not None:
                        (ok, dir) = status
                        if not ok:
                            logger.error(
                                'File {0} was rejected, reason: {1}'.format(
                                    file, dir))
                            rejection.append('{0} ({1})'.format(file, dir))

                if rejection:
                    raise ETransferError('files rejected: {0}'.format(
                        ', '.join(rejection)))

                # Otherwise we found files in the "in progress" directories
                # so proceed to the next job.
                continue

            logger.debug('Checking if all files are at CADC')
            lost = []
            for info in file_info:
                cadc_file_info = fetch_cadc_file_info(info.filename)

                if cadc_file_info is None:
                    logger.error(
                        'Job %i file %s gone from e-transfer '
                        'but not at CADC', job_id, info.filename)
                    lost.append(info.filename)

                if cadc_file_info['content-md5'] != info.md5:
                    logger.error(
                        'Job %i file %s gone from e-transfer '
                        'but MD5 sum does not match', job_id, info.filename)
                    lost.append(info.filename)

            if lost:
                raise ETransferError('files lost or corrupt: {0}'.format(
                    ', '.join(lost)))
            else:
                # All files present and with correct MD5 sums.
                logger.info('Job %i appears to have all files at CADC', job_id)
                if not dry_run:
                    db.change_state(job_id,
                                    JSAProcState.INGESTION,
                                    'Output files finished e-transfer',
                                    state_prev=JSAProcState.TRANSFERRING)

        except ETransferError as e:
            logger.error('Job %i failed e-transfer: %s', job_id, e.message)
            if not dry_run:
                db.change_state(job_id,
                                JSAProcState.ERROR,
                                'Job failed e-transfer: {0}'.format(e.message),
                                state_prev=JSAProcState.TRANSFERRING)

    logger.debug('Done polling the e-transfer system')

    if n_err:
        raise CommandError('Errors were encountered polling e-transfer')
Exemplo n.º 28
0
def create_web_app():
    """Function to prepare the Flask web application."""

    home = get_home()
    db = get_database()
    database_name = get_config().get('database', 'database')

    app = Flask(
        'jsa_proc',
        static_folder=os.path.join(home, 'web', 'static'),
        template_folder=os.path.join(home, 'web', 'templates'),
    )

    app.secret_key = get_config().get('web', 'key')

    # Web authorization -- mostly take from flask docs snippets 8
    # http://flask.pocoo.org/snippets/8
    def check_auth(password):
        """
        Check that the staff pasword has been used.

        (Note that we don't care what the username is).
        """
        return check_staff_password(password)

    def authenticate():
        """
        Send a 401 response so that we can log in.
        """

        return Response(render_template('logout.html',
                                        redirect=request.referrer),
                        401, {'WWW-Authenticate': loginstring})

    def requires_auth(f):
        """
        A decorator to wrap functions that require authorization.
        """

        @wraps(f)
        def decorated(*args, **kwargs):
            auth = request.authorization
            if not auth or not check_auth(auth.password):
                return authenticate()
            return f(*args, **kwargs)

        return decorated

    # Route Handlers.

    @app.route('/')
    def home_page():
        raise HTTPRedirect(url_for('task_summary'))

    @app.route('/job/')
    @templated('job_list.html')
    def job_list():
        # Prepare query arguments list: special parameters first.
        kwargs = {
            'state': request.args.getlist('state'),
            'mode': request.args.get('mode', 'JSAProc'),
            'tiles': request.args.get('tiles')
        }

        # Now add regular string parameters, including those from
        # jcmtobsinfo.
        params = [
            'location',
            'task',
            'date_min',
            'date_max',
            'qa_state',
            'sourcename',
            'obsnum',
            'project',
            'number',
            'tau_min',
            'tau_max',
        ]

        params.extend(ObsQueryDict.keys())

        for key in params:
            kwargs[key] = request.args.get(key, None)

        # Process empty strings used as null form parameters.
        for argname in kwargs:
            if kwargs[argname] == '':
                kwargs[argname] = None

        # Store the query in the session.
        session['job_query'] = kwargs

        # Finally prepare the template context.
        return prepare_job_list(
            db,
            page=request.args.get('page', None),
            **kwargs)

    @app.route('/image/<task>/piechart')
    def summary_piechart(task='None'):
        if task == 'None':
            task = None
        obsquerydict = {}
        for key in ObsQueryDict.keys():
            obsquerydict[key] = request.args.get(key, None)
        date_min = request.args.get('date_min', None)
        date_max = request.args.get('date_max', None)
        return prepare_summary_piechart(db, task=task,
                                        obsquerydict=obsquerydict,
                                        date_min=date_min, date_max=date_max)

    @app.route('/summary/')
    @templated('task_summary.html')
    def task_summary():
        return prepare_task_summary(db)

    @app.route('/qa')
    @templated('task_qa_summary.html')
    def task_qa_summary():
        return prepare_task_qa_summary(db)

    @app.route('/job_summary/')
    @templated('job_summary.html')
    def job_summary():
        task = request.args.get('task', None)
        date_min = request.args.get('date_min', None)
        date_max = request.args.get('date_max', None)
        return prepare_job_summary(db, task=task, date_min=date_min,
                                   date_max=date_max)

    @app.route('/error_summary/')
    @templated('error_summary.html')
    def error_summary():
        return prepare_error_summary(
            db,
            redirect_url=request.full_path,
            filtering=request.args.get('filtering', None),
            chosentask=request.args.get('chosentask', None),
            extrafilter=request.args.get('extrafilter', None),
            state_prev=request.args.get('state_prev', None),
            error_state=request.args.get('error_state', None),
            filter_done=('submit_filter' in request.args),
        )

    @app.route('/job/<int:job_id>', methods=['GET'])
    @templated('job_info.html')
    def job_info(job_id):
        return prepare_job_info(db, job_id, session.get('job_query'))

    @app.route('/job/<int:job_id>/qa', methods=['GET'])
    @templated('job_qa.html')
    def job_qa(job_id):
        return prepare_job_qa_info(db, job_id, session.get('job_query'))

    @app.route('/job/<int:job_id>/add_note', methods=['POST'])
    @requires_auth
    def job_add_note(job_id):
        message = request.form['message']
        username = request.authorization['username']

        try:
            # Add the note.
            prepare_add_note(db, job_id, message, username)

            # Redirect back to the job info page.
            flash('The note has been saved.')
            raise HTTPRedirect(url_for('job_info', job_id=job_id))

        except ErrorPage as err:
            return error_page_response(err)

    @app.route('/job_change_state', methods=['POST'])
    @requires_auth
    def job_change_state():

        # Get the variables from POST
        newstate = request.form['newstate']
        state_prev = request.form['state_prev']
        message = request.form['message']
        job_ids = request.form.getlist('job_id')
        url = request.form['url']
        username = request.authorization['username']

        try:
            # Change the state.
            prepare_change_state(db, job_ids,
                                 newstate,
                                 state_prev,
                                 message,
                                 username)

            # Redirect the page to correct info.
            flash('The status has been changed to %s.' % JSAProcState.get_name(
                newstate))
            raise HTTPRedirect(url)

        except ErrorPage as err:
            return error_page_response(err)

    @app.route('/job_change_qa', methods=['POST'])
    @requires_auth
    def job_change_qa():

        # Get the variables from POST
        if "action_next" in request.form:
            url = request.form['url-next']
        else:
            url = request.form['url']
        qa_state = request.form['qa_state']
        message = request.form['message']
        job_ids = request.form.getlist('job_id')
        username = request.authorization['username']

        try:
            # Change the state.
            prepare_change_qa(db, job_ids,
                              qa_state,
                              message,
                              username,
                              )
            # Redirect the page to correct info.
            flash(
                'The QA status of job %s has been changed to %s.' %
                (str(' '.join(job_ids)), JSAQAState.get_name(qa_state))
            )
            raise HTTPRedirect(url)

        except ErrorPage as err:
            return error_page_response(err)

    # QA Nightly Summary pages
    @app.route('/qa-nightly')
    @templated('task_qa_summary_nightly.html')
    def qa_night_page():
        """
        By default, show the previous week.

        Note that prepare_task_qa_summary interprets dates as
        inclusive, so use 6 days for the time delta to get a week

        """
        date_min = request.args.get('date_min', None)
        if date_min is None or date_min == '':
            date_min = (
                datetime.date.today() - datetime.timedelta(days=6)
            ).strftime('%Y-%m-%d')

        date_max = request.args.get('date_max', None)
        if date_max is None or date_max == '':
            date_max = datetime.date.today().strftime('%Y-%m-%d')
        return prepare_task_qa_summary(db, date_min=date_min,
                                       date_max=date_max, task='jcmt-nightly',
                                       byDate=True)

    @app.route('/login')
    @requires_auth
    def login():
        raise HTTPRedirect(request.referrer)

    @app.route('/logout')
    def logout():
        return authenticate()

    # Image handling.
    @app.route('/job/<int:job_id>/preview/<preview>')
    def job_preview(job_id, preview):
        path = prepare_job_preview(job_id, preview)
        return send_file(path, mimetype='image/png')

    @app.route('/job/<int:job_id>/pdf/<preview>')
    def job_preview_pdf(job_id, preview):
        path = prepare_job_preview(job_id, preview, 'pdf')
        return send_file(path, mimetype='application/pdf')

    @app.route('/job/<int:job_id>/text/<text_file>')
    def job_text_file(job_id, text_file):
        path = prepare_job_preview(job_id, text_file, 'txt')
        return send_file(path, mimetype='text/plain')

    @app.route('/job/<int:job_id>/log/<log>')
    def job_log_html(job_id, log):
        path = prepare_job_log(job_id, log)
        return send_file(path, mimetype='text/html')

    @app.route('/job/<int:job_id>/log_text/<log>')
    def job_log_text(job_id, log):
        path = prepare_job_log(job_id, log)
        return send_file(path, mimetype='text/plain')

    @app.route('/fop_summary', methods=['GET'])
    @templated('fop_summary.html')
    def fop_summary():
        userid = request.args.get('userid', None)
        semester = request.args.get('semester', None)
        projdict = {}
        if userid and semester:
            ompdb = get_omp_database(write_access=None)
            projects = ompdb.get_support_projects(str(userid), str(semester))
            for p in projects:
                jobs = db.find_jobs(
                    obsquery={'project': p}, task='jcmt-nightly')
                projdict[p] = [
                    len(jobs),
                    sum(1 for j in jobs if j.state == JSAProcState.ERROR),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.BAD),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.QUESTIONABLE),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.UNKNOWN),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.GOOD)]
        else:
            projects = None

        return {'userid': userid, 'semester': semester, 'projects': projdict}
    # Filters and Tests.

    @app.route('/fop_summary_getres', methods=['POST'])
    def fop_summary_getres():
        userid = request.form['userid']
        semester = request.form['semester']
        raise HTTPRedirect(url_for(
            'fop_summary', userid=userid, semester=semester))

    @app.template_filter('state_name')
    def state_name_filter(state):
        return JSAProcState.get_name(state)

    @app.template_test('state_active')
    def state_active_test(state):
        return JSAProcState.get_info(state).active

    @app.template_filter('state_phase')
    def state_phase_filter(state):
        phase = JSAProcState.get_info(state).phase
        if phase == JSAProcState.PHASE_QUEUE:
            return 'queue'
        elif phase == JSAProcState.PHASE_FETCH:
            return 'fetch'
        elif phase == JSAProcState.PHASE_RUN:
            return 'run'
        elif phase == JSAProcState.PHASE_COMPLETE:
            return 'complete'
        elif phase == JSAProcState.PHASE_ERROR:
            return 'error'
        raise HTTPError('Unknown phase {0}'.format(phase))

    @app.template_filter('qa_state_name')
    def qa_state_name(qa_state):
        if qa_state.lower() != 'total':
            name = JSAQAState.get_name(qa_state)
        else:
            name = 'Total'
        return name

    @app.template_filter('omp_state_name')
    def omp_state_name_filter(ompstate):
        return OMPState.get_name(ompstate)

    @app.template_filter('uniq')
    def uniq_filter(xs):
        return set(xs)

    @app.template_filter('datetimeformat')
    def datetimeformat(value, format='%Y-%m-%d<br>%H:%M'):
        return value.strftime(format)

    @app.template_filter('replace0')
    def replace_zero(value):
        if value == 0:
            return '-'
        else:
            return value

    @app.context_processor
    def add_to_context():
        return {
            'url_for_omp': url_for_omp,
            'url_for_omp_comment': url_for_omp_comment,
            'database_name': database_name,
        }

    # Return the Application.
    return app
Exemplo n.º 29
0
def run_a_job(job_id, db=None, force=False):
    """
    Run the JSA processing of the given job_id (integer).

    By default it will look in the database determined by the JSA_proc
    config. Optionally a database object can be given for testing
    purposes.

    """

    if not db:
        # Get link to database
        db = get_database()

    logger.info('About to run job %i', job_id)

    try:
        # Change status of job to Running, raise an error if not currently in
        # WAITING state.
        db.change_state(job_id, JSAProcState.RUNNING,
                        'Job is about to be run on host {0}'.format(
                            gethostname().partition('.')[0]),
                        state_prev=(None if force else JSAProcState.WAITING))

    except NoRowsError:
        # If the job was not in the WAITING state, it is likely that another
        # process is also trying to run it.  Trap the error so that the
        # ErrorDecorator does not put the job into the ERROR state as that
        # will cause the other process to fail to set the job to PROCESSED.
        logger.error('Job %i cannot be run because it is not waiting',
                     job_id)
        return

    # Input file_list -- this should be better? or in jsawrapdr?

    input_dir = get_input_dir(job_id)
    input_file_list_path = os.path.join(input_dir, input_list_name)
    if not os.path.exists(input_file_list_path):
        raise JSAProcError('Input file list %s not found for job_id %i'
                           % (input_file_list_path, job_id))

    # Check every file on input_file list exists.
    inputfl = open(input_file_list_path, 'r')

    for input_file in inputfl:
        input_file = input_file.strip()
        if not os.path.isfile(input_file):

            # If a file is missing, get log.
            logstring = 'Input file %s for job %i has gone missing' % (
                input_file, job_id)
            logger.error(logstring)
            logs = db.get_logs(job_id)
            states = [i.state_new for i in logs]

            # If it has only been in the state MISSING twice before, then try
            # again.
            if states.count(JSAProcState.MISSING) <= 2:
                logstring += ': moving to missing.'
                logger.warning('Moving job %i to state MISSING due to '
                               'missing file(s) %s',
                               job_id, input_file)
                db.change_state(job_id, JSAProcState.MISSING,
                                logstring, state_prev=JSAProcState.RUNNING)
                return job_id

            else:
                # If it has been in the missing STATE more than two times,
                # give up and move it into ERROR state to be fixed manually.
                logstring += ': moving to error.'
                logger.info('Moving job %s to state ERROR due to missing'
                            ' file(s).', job_id)
                inputfl.close()
                raise JSAProcError('Input file %s for job %i has gone missing.'
                                   % (input_file, job_id))

    inputfl.close()
    logger.debug('All input files found for job %s.', job_id)

    # Get the mode and drparameters of the job.
    job = db.get_job(id_=job_id)
    mode = job.mode
    drparameters = job.parameters

    # Get the starlink to be used from the task table.
    starpath = None
    version = None
    command_run = None
    raw_output = None
    try:
        task_info = db.get_task_info(job.task)
        starpath = task_info.starlink_dir
        version = task_info.version
        command_run = task_info.command_run
        raw_output = task_info.raw_output
    except NoRowsError:
        # If the task doesn't have task info, leave "starpath" as None
        # so that jsawrapdr_run uses the default value from the configuration
        # file.
        pass

    # Run the processing job.
    logger.debug('Launching jsawrapdr: mode=%s, parameters=%s',
                 mode, drparameters)
    log = jsawrapdr_run(
        job_id, input_file_list_path, mode, drparameters,
        cleanup='cadc', location='JAC', starlink_dir=starpath,
        persist=True, version=version, command_run=command_run,
        raw_output=raw_output)

    # Create list of output files.
    logger.debug('Preparing list of output files')
    output_files = get_output_files(job_id)

    # write output files to table
    logger.debug('Storing list of output files')
    db.set_output_files(job_id, output_files)

    # If task begins with hpx, get tiles from list of output_files
    # and write to tile table in db.
    if hpx_task.search(job.task):
        logger.debug('Storing list of output tiles for HPX job ' + str(job_id))
        tiles = hpx_tiles_from_filenames([x.filename for x in output_files])
        db.set_tilelist(job_id, tiles)
        logger.debug('Job ' + str(job_id) + ' produced output on tiles ' +
                     ', '.join(str(i) for i in tiles))

    # Change state of job.
    db.change_state(
        job_id, JSAProcState.PROCESSED,
        'Job has been successfully processed',
        state_prev=JSAProcState.RUNNING)

    logger.info('Done running job %i', job_id)

    return job_id