Пример #1
0
def _etransfer_find_files():
    """Find files in the e-transfer directories."""

    config = get_config()
    transdir = config.get('etransfer', 'transdir')

    filestatus = {}

    for (dirpath, dirnames, filenames) in os.walk(transdir):
        if not dirpath.startswith(transdir):
            raise Exception('os.walk returned dirpath outside transdir')

        dirs = dirpath[len(transdir) + 1:].split(os.path.sep)

        if not (filenames and dirs):
            continue

        if dirs[0] == 'reject':
            for file in filenames:
                filestatus[file] = (False, dirs[1])
        else:
            for file in filenames:
                filestatus[file] = (True, dirs[0])

    return filestatus
Пример #2
0
def _etransfer_find_files():
    """Find files in the e-transfer directories."""

    config = get_config()
    transdir = config.get('etransfer', 'transdir')

    filestatus = {}

    for (dirpath, dirnames, filenames) in os.walk(transdir):
        if not dirpath.startswith(transdir):
            raise Exception('os.walk returned dirpath outside transdir')

        dirs = dirpath[len(transdir) + 1:].split(os.path.sep)

        if not (filenames and dirs):
            continue

        if dirs[0] == 'reject':
            for file in filenames:
                filestatus[file] = (False, dirs[1])
        else:
            for file in filenames:
                filestatus[file] = (True, dirs[0])

    return filestatus
Пример #3
0
def ptransfer_put(proc_dir, filename, ad_stream, md5sum):
    """Attempt to put the given file into the archive at CADC.

    Retries settings are given by the configuration file entries
    etransfer.max_tries and etransfer.retry_delay (in seconds).
    """

    config = get_config()

    max_retries = int(config.get('etransfer', 'max_tries'))
    retry_delay = int(config.get('etransfer', 'retry_delay'))

    for i in range(0, max_retries):
        try:
            put_cadc_file(filename, proc_dir, ad_stream)

            return

        except JSAProcError:
            logger.exception('Failed to put file {0} (try {1} of {2})'
                             .format(filename, i + 1, max_retries))

        sleep(retry_delay)

    raise PTransferFailure('Transfer failed')
Пример #4
0
def get_omp_database(write_access=None):
    """Construct an OMP database access object.

    Write access can either be None (the default), "omp"
    or "jcmt".  Read-only and OMP credentials come from
    the OMP siteconfig file.  JCMT database write permissions
    come from the JSA Proc configuration system.
    """

    global omp_database_access

    if write_access not in omp_database_access:
        # Connect using the "hdr_database" set of credentials, which is
        # the "staff" user (supposed to be read only) at the time of
        # writing, unless the write_access option is specified.
        if write_access is None:
            config = get_omp_siteconfig()
            credentials = 'hdr_database'
        elif write_access == 'omp':
            config = get_omp_siteconfig()
            credentials = 'database'
        elif write_access == 'jcmt':
            config = get_config()
            credentials = 'database_jcmt'
        else:
            raise JSAProcError(
                'Unknown write_access request {0}'.format(write_access))

        omp_database_access[write_access] = OMPDB(
            server=config.get(credentials, 'server'),
            user=config.get(credentials, 'user'),
            password=config.get(credentials, 'password'),
            read_only=(write_access is None))

    return omp_database_access[write_access]
Пример #5
0
def run_job(job_id=None, db=None, force=False, task=None):
    """
    Run the JSA processing of the next job. This will select the highest
    priority job in state 'WAITING' with location 'JAC'.

    Optionally an integer job_id can be given isntead to specify a specific job

    By default it will look in the database determined by the JSA_proc config.
    Optionally a database object can be given for testing purposes.

    Any errors raised will be logged in the 'log' table for the job_id.

    If insufficient disk space is available (as configured by the disk_limit
    section of the configuration file) then this function returns without
    doing anything.
    """

    # Check we have sufficient disk space for running to occur.
    config = get_config()
    output_limit = float(config.get('disk_limit', 'run_min_output_space'))
    scratch_limit = float(config.get('disk_limit', 'run_min_scratch_space'))
    output_space = get_output_dir_space()
    scratch_space = get_scratch_dir_space()

    if output_space < output_limit:
        logger.warning('Insufficient output disk space: %f / %f GiB required',
                       output_space, output_limit)
        return

    if scratch_space < scratch_limit:
        logger.warning('Insufficient scratch disk space: %f / %f GiB required',
                       scratch_space, scratch_limit)
        return

    # Get a link to the database.
    if not db:
        db = get_database()

    # Get next job if a job id is not specified
    if not job_id:
        force = False

        logger.debug('Looking for a job to run')

        jobs = db.find_jobs(state=JSAProcState.WAITING,
                            location='JAC',
                            prioritize=True,
                            number=1,
                            sort=True,
                            task=task)

        if jobs:
            job_id = jobs[0].id

        else:
            logger.warning('Did not find a job to run!')
            return

    run_a_job(job_id, db=db, force=force)
Пример #6
0
    def test_database_config(self):
        config = get_config()

        self.assertTrue(config.has_section('database'))
        self.assertTrue(config.has_option('database', 'host'))
        self.assertTrue(config.has_option('database', 'database'))
        self.assertTrue(config.has_option('database', 'user'))
        self.assertTrue(config.has_option('database', 'password'))
Пример #7
0
    def test_home_var(self):
        """Test that we get an error if the file doesn't exists.

        Also checks that the environment variable is being read.
        """

        os.environ['JSA_PROC_DIR'] = '/HORSEFEATHERS'
        with self.assertRaises(JSAProcError):
            config = get_config()
Пример #8
0
def make_misc_scratch_dir(name):
    """Create a non-job-based scratch directory.

    A name should be given to set the top level directory within
    "scratch".
    """

    config = get_config()
    return _make_scratch_dir(
        os.path.join(config.get('directories', 'scratch'), name))
Пример #9
0
def make_misc_scratch_dir(name):
    """Create a non-job-based scratch directory.

    A name should be given to set the top level directory within
    "scratch".
    """

    config = get_config()
    return _make_scratch_dir(os.path.join(
        config.get('directories', 'scratch'), name))
Пример #10
0
def run_job(job_id=None, db=None, force=False, task=None):
    """
    Run the JSA processing of the next job. This will select the highest
    priority job in state 'WAITING' with location 'JAC'.

    Optionally an integer job_id can be given isntead to specify a specific job

    By default it will look in the database determined by the JSA_proc config.
    Optionally a database object can be given for testing purposes.

    Any errors raised will be logged in the 'log' table for the job_id.

    If insufficient disk space is available (as configured by the disk_limit
    section of the configuration file) then this function returns without
    doing anything.
    """

    # Check we have sufficient disk space for running to occur.
    config = get_config()
    output_limit = float(config.get('disk_limit', 'run_min_output_space'))
    scratch_limit = float(config.get('disk_limit', 'run_min_scratch_space'))
    output_space = get_output_dir_space()
    scratch_space = get_scratch_dir_space()

    if output_space < output_limit:
        logger.warning('Insufficient output disk space: %f / %f GiB required',
                       output_space, output_limit)
        return

    if scratch_space < scratch_limit:
        logger.warning('Insufficient scratch disk space: %f / %f GiB required',
                       scratch_space, scratch_limit)
        return

    # Get a link to the database.
    if not db:
        db = get_database()

    # Get next job if a job id is not specified
    if not job_id:
        force = False

        logger.debug('Looking for a job to run')

        jobs = db.find_jobs(state=JSAProcState.WAITING, location='JAC',
                            prioritize=True, number=1, sort=True, task=task)

        if jobs:
            job_id = jobs[0].id

        else:
            logger.warning('Did not find a job to run!')
            return

    run_a_job(job_id, db=db, force=force)
Пример #11
0
def _get_config_dir_size(type_):
    """
    Return size of directorie tree described in config GiB.

    """
    config = get_config()
    dir_path = config.get('directories', type_)

    size = get_size(dir_path)

    return size
Пример #12
0
def _get_config_dir_size(type_):
    """
    Return size of directorie tree described in config GiB.

    """
    config = get_config()
    dir_path = config.get('directories', type_)

    size = get_size(dir_path)

    return size
Пример #13
0
def _etransfer_check_space():
    """Check that sufficient space is available for e-transfer.

    Raises a CommandError if a problem is detected.
    """

    config = get_config()
    required_space = float(config.get('disk_limit', 'etransfer_min_space'))
    etransfer_space = get_space(config.get('etransfer', 'transdir'))

    if etransfer_space < required_space:
        raise CommandError(
            'Insufficient disk space: {0} / {1} GiB required'.format(
                etransfer_space, required_space))
Пример #14
0
def fetch(job_id=None, db=None, force=False, replaceparent=False, task=None):
    """
    Assemble the files required to process a job.

    If it is not given a job_id, it will take the next JAC job
    with the highest priority and a state of MISSING.

    Optionally allows a database object to be given for testing purposes.
    Otherwise uses usual database from config file.

    This will raise an error if job is not in MISSING state to start with.
    This will advance the state of the job to WAITING on completion.
    Any error's raised in the process will be logged to the job log.

    """

    # Check we have sufficient disk space for fetching to occur.
    input_space = get_input_dir_space()
    required_space = float(get_config().get('disk_limit', 'fetch_min_space'))

    if input_space < required_space and not force:
        logger.warning('Insufficient disk space: %f / %f GiB required',
                       input_space, required_space)
        return

    # Get the database.
    if not db:
        db = get_database()

    # Get next job if a job_id is not specified.
    if not job_id:
        force = False

        logger.debug('Looking for a job for which to fetch data')

        jobs = db.find_jobs(state=JSAProcState.MISSING,
                            location='JAC',
                            prioritize=True,
                            number=1,
                            sort=True,
                            task=task)

        if jobs:
            job_id = jobs[0].id

        else:
            logger.warning('Did not find a job to fetch!')
            return

    fetch_a_job(job_id, db=db, force=force, replaceparent=replaceparent)
Пример #15
0
def _etransfer_check_space():
    """Check that sufficient space is available for e-transfer.

    Raises a CommandError if a problem is detected.
    """

    config = get_config()
    required_space = float(config.get('disk_limit', 'etransfer_min_space'))
    etransfer_space = get_space(config.get('etransfer', 'transdir'))

    if etransfer_space < required_space:
        raise CommandError(
            'Insufficient disk space: {0} / {1} GiB required'.format(
                etransfer_space, required_space))
Пример #16
0
def ssh_etransfer_send_output(job_id):
    """SSH to the e-transfer host to request the e-transfer of
    a job's output files."""

    config = get_config()

    logger.debug('Attempting e-transfer of job %i output', job_id)

    subprocess.check_call([
        '/usr/bin/ssh', '-x',  '-i',
        config.get('etransfer', 'key'),
        '{0}@{1}'.format(config.get('etransfer', 'user'),
                         config.get('etransfer', 'machine')),
        str(job_id)],
        shell=False)
Пример #17
0
def etransfer_check_config(any_user=False):
    """Check the configuration is good for for e-transfer.

    Raises a CommandError if a problem is detected.
    """

    config = get_config()
    etransfermachine = config.get('etransfer', 'machine')
    etransferuser = config.get('etransfer', 'user')

    if pwd.getpwuid(os.getuid()).pw_name != etransferuser and not any_user:
        raise CommandError(
            'etransfer should only be run as {0}'.format(etransferuser))
    if gethostname().partition('.')[0] != etransfermachine:
        raise CommandError(
            'etransfer should only be run on {0}'.format(etransfermachine))
Пример #18
0
def etransfer_check_config(any_user=False):
    """Check the configuration is good for for e-transfer.

    Raises a CommandError if a problem is detected.
    """

    config = get_config()
    etransfermachine = config.get('etransfer', 'machine')
    etransferuser = config.get('etransfer', 'user')

    if pwd.getpwuid(os.getuid()).pw_name != etransferuser and not any_user:
        raise CommandError('etransfer should only be run as {0}'.
                           format(etransferuser))
    if gethostname().partition('.')[0] != etransfermachine:
        raise CommandError('etransfer should only be run on {0}'.
                           format(etransfermachine))
Пример #19
0
def _get_dir(type_, job_id):
    if not isinstance(job_id, int):
        raise JSAProcError('Cannot determine directory '
                           'for non-integer job identifier')

    config = get_config()
    basedir = config.get('directories', type_)

    # Turn the job ID into a decimal string of at least 9
    # digits, then create subdirectories by removing the last 6
    # and then the last 3 digits.  This means that we retain the
    # full length name in the final directory (unlike Git) to
    # try to prevent accidental collisions if the directories are
    # manipulated manually.  The digits are counted back from the
    # end of the decimal string so that any digits in excess of
    # the fixed 9 end up in the first component.
    decimal = '{0:09d}'.format(job_id)
    return os.path.join(basedir, decimal[:-6], decimal[:-3], decimal)
Пример #20
0
def _get_dir(type_, job_id):
    if not isinstance(job_id, int):
        raise JSAProcError('Cannot determine directory '
                           'for non-integer job identifier')

    config = get_config()
    basedir = config.get('directories', type_)

    # Turn the job ID into a decimal string of at least 9
    # digits, then create subdirectories by removing the last 6
    # and then the last 3 digits.  This means that we retain the
    # full length name in the final directory (unlike Git) to
    # try to prevent accidental collisions if the directories are
    # manipulated manually.  The digits are counted back from the
    # end of the decimal string so that any digits in excess of
    # the fixed 9 end up in the first component.
    decimal = '{0:09d}'.format(job_id)
    return os.path.join(basedir, decimal[:-6], decimal[:-3], decimal)
Пример #21
0
def etransfer_query_output(job_id):
    """Investigate the e-transfer status of the output of a job."""

    db = get_database()

    config = get_config()
    transdir = config.get('etransfer', 'transdir')

    files = db.get_output_files(job_id)

    problem_files = []

    print('{0:110} {1:5} {2:12} {3:5}'.format('File', 'ET', 'Directory', 'AD'))

    for file in zip(files,
                    etransfer_file_status(files),
                    check_cadc_files(files)):
        (filename, etransfer_status, ad_status) = file

        if etransfer_status is None:
            (ok, dir) = (None, '')
        else:
            (ok, dir) = etransfer_status

        print('{0:110} {1:5} {2:12} {3:5}'.format(
            filename, repr(ok), dir, repr(ad_status)))

        if ok is False:
            problem_files.append(
                os.path.join(transdir, 'reject', dir, filename))

    if problem_files:
        if yes_or_no_question(
                'Delete rejected files from e-transfer directories?'):
            for file in problem_files:
                logger.debug('Deleting file %s', file)
                os.unlink(file)

            if yes_or_no_question('Re-try e-transfer?'):
                # Clear cache before attempting to e-transfer since we just
                # removed the files from the e-transfer directories.
                _etransfer_clear_cache()

                etransfer_send_output(job_id, dry_run=False, force=True)
Пример #22
0
def etransfer_query_output(job_id):
    """Investigate the e-transfer status of the output of a job."""

    db = get_database()

    config = get_config()
    transdir = config.get('etransfer', 'transdir')

    files = db.get_output_files(job_id)

    problem_files = []

    print('{0:110} {1:5} {2:12} {3:5}'.format('File', 'ET', 'Directory', 'AD'))

    for file in zip(files, etransfer_file_status(files),
                    check_cadc_files(files)):
        (filename, etransfer_status, ad_status) = file

        if etransfer_status is None:
            (ok, dir) = (None, '')
        else:
            (ok, dir) = etransfer_status

        print('{0:110} {1:5} {2:12} {3:5}'.format(filename, repr(ok), dir,
                                                  repr(ad_status)))

        if ok is False:
            problem_files.append(
                os.path.join(transdir, 'reject', dir, filename))

    if problem_files:
        if yes_or_no_question(
                'Delete rejected files from e-transfer directories?'):
            for file in problem_files:
                logger.debug('Deleting file %s', file)
                os.unlink(file)

            if yes_or_no_question('Re-try e-transfer?'):
                # Clear cache before attempting to e-transfer since we just
                # removed the files from the e-transfer directories.
                _etransfer_clear_cache()

                etransfer_send_output(job_id, dry_run=False, force=True)
Пример #23
0
def valid_fits(filename, allow_warnings=True):
    """Check whether a given file is a valid FITS file.

    This uses fitsverify with the -q option to determine the number
    of errors and warnings.  Returns True unless there are errors,
    or there are warnings and the allow_warning option is not set.
    """

    fitsverify = get_config().get('utilities', 'fitsverify')

    # Fitsverify exits with bad status even if there are warnings, so we
    # can't just use subprocess.check_output.
    logger.debug("Running fitsverify on file %s", filename)
    p = subprocess.Popen([fitsverify, '-q', filename],
                         stdout=subprocess.PIPE,
                         preexec_fn=restore_signals)

    (out, _) = p.communicate()
    logger.debug(out.rstrip())

    if out.startswith('verification OK'):
        return True

    elif not allow_warnings:
        return False

    m = fitsverify_output.search(out)

    if not m:
        logger.error("Fitsverify output did not match expected pattern")
        return False

    # warnings = int(m.group(1))
    errors = int(m.group(2))

    # Already know we are in "allow_warnings" mode, so just check the
    # number of actual errors.
    return not errors
Пример #24
0
def valid_fits(filename, allow_warnings=True):
    """Check whether a given file is a valid FITS file.

    This uses fitsverify with the -q option to determine the number
    of errors and warnings.  Returns True unless there are errors,
    or there are warnings and the allow_warning option is not set.
    """

    fitsverify = get_config().get('utilities', 'fitsverify')

    # Fitsverify exits with bad status even if there are warnings, so we
    # can't just use subprocess.check_output.
    logger.debug("Running fitsverify on file %s", filename)
    p = subprocess.Popen([fitsverify, '-q', filename],
                         stdout=subprocess.PIPE,
                         preexec_fn=restore_signals)

    (out, _) = p.communicate()
    logger.debug(out.rstrip())
    out = out.decode()
    if out.startswith('verification OK'):
        return True

    elif not allow_warnings:
        return False

    m = fitsverify_output.search(out)

    if not m:
        logger.error("Fitsverify output did not match expected pattern")
        return False

    # warnings = int(m.group(1))
    errors = int(m.group(2))

    # Already know we are in "allow_warnings" mode, so just check the
    # number of actual errors.
    return not errors
Пример #25
0
def valid_hds(filepath):
    """
    Checks to see if a given file is a valid hds file.

    This uses hdstrace, and assumes if it can provide a return
    code of 0 then the file is valid.
    It runs hdstrace from the starlink build defined in the
    run_job.starpath section of the config file.

    parameter:
    filepath: string
    full filename including path and suffix.

    returns Boolean
    True: file is valid hds
    False: file is not valid hds.
    """

    # Path to hdstrace.
    config = get_config()
    starpath = config.get('job_run', 'starpath')
    com_path = os.path.join(starpath, 'bin', 'hdstrace')

    # Environmental variables.
    myenv = os.environ.copy()
    myenv['ADAM_NOPROMPT'] = '1'
    myenv['ADAM_EXIT'] = '1'
    myenv['LD_LIBRARY_PATH'] = os.path.join(starpath, 'lib')

    # Run hdstrace.
    returncode = subprocess.call([com_path, filepath, 'QUIET'],
                                 env=myenv,
                                 stderr=subprocess.STDOUT,
                                 shell=False)

    # Status is True for returncode=0, False otherwise.
    return returncode == 0
Пример #26
0
def valid_hds(filepath):
    """
    Checks to see if a given file is a valid hds file.

    This uses hdstrace, and assumes if it can provide a return
    code of 0 then the file is valid.
    It runs hdstrace from the starlink build defined in the
    run_job.starpath section of the config file.

    parameter:
    filepath: string
    full filename including path and suffix.

    returns Boolean
    True: file is valid hds
    False: file is not valid hds.
    """

    # Path to hdstrace.
    config = get_config()
    starpath = config.get('job_run', 'starpath')
    com_path = os.path.join(starpath, 'bin', 'hdstrace')

    # Environmental variables.
    myenv = os.environ.copy()
    myenv['ADAM_NOPROMPT'] = '1'
    myenv['ADAM_EXIT'] = '1'
    myenv['LD_LIBRARY_PATH'] = os.path.join(starpath, 'lib')

    # Run hdstrace.
    returncode = subprocess.call([com_path, filepath, 'QUIET'],
                                 env=myenv,
                                 stderr=subprocess.STDOUT,
                                 shell=False)

    # Status is True for returncode=0, False otherwise.
    return returncode == 0
Пример #27
0
def poll_raw_ingestion(date_start,
                       date_end,
                       quick=False,
                       no_transfer_check=False,
                       dry_run=False):
    ignore_instruments = [
        x.strip()
        for x in get_config().get('rawingest', 'ignore_instruments').split(',')
    ]

    logger.debug('Connecting to database with read-only access')
    db = get_omp_database()

    logger.info('Searching for observations to ingest')
    obsids = db.find_obs_for_ingestion(date_start,
                                       date_end,
                                       no_status_check=quick,
                                       no_transfer_check=no_transfer_check,
                                       ignore_instruments=ignore_instruments)
    logger.info('Found %i observations', len(obsids))

    if not dry_run:
        logger.debug('Re-connecting to database with write access')
        db = get_omp_database(write_access='jcmt')

    n_ok = n_err = 0
    for obsid in obsids:
        if _ingest_raw_observation(obsid, db=db, dry_run=dry_run):
            n_ok += 1
        else:
            n_err += 1

    logger.info('Ingestion complete: %i successful, %i errors', n_ok, n_err)

    if n_err:
        raise CommandError('Errors encountered during ingestion')
Пример #28
0
def _etransfer_send(job_id, dry_run, db, force):
    """Private function to copy job output into the e-transfer
    directories.

    Runs under the ErrorDecorator so that errors are captured.
    """

    config = get_config()
    scratchdir = config.get('etransfer', 'scratchdir')
    transdir = config.get('etransfer', 'transdir')
    group_id = grp.getgrnam(config.get('etransfer', 'group')).gr_gid

    logger.debug('Retrieving list of output files')
    try:
        file_info = db.get_output_files(job_id, with_info=True)
        files = [x.filename for x in file_info]

    except NoRowsError:
        raise CommandError('No output files found for job {0}'.format(job_id))

    logger.debug('Checking that the MD5 sum for each file is defined')
    for info in file_info:
        if info.md5 is None:
            raise CommandError('File {0} MD5 sum is missing from database'.
                               format(info.filename))

    logger.debug('Checking that all files are present')
    outdir = get_output_dir(job_id)
    for file in files:
        if not os.path.exists(os.path.join(outdir, file)):
            raise CommandError('File {0} not in directory {1}'.
                               format(file, outdir))

    logger.debug('Checking that files are not in the scratch directory')
    scratchfiles = os.listdir(scratchdir)
    for file in files:
        if file in scratchfiles:
            raise CommandError('File {0} is in e-transfer scratch directory'.
                               format(file))

    logger.debug('Checking whether the files are already in e-transfer')
    etransfer_status = etransfer_file_status(files)
    if any(etransfer_status):
        for (file, status) in zip(files, etransfer_status):
            if status is not None:
                (ok, dir) = status
                logger.error('File {0} already in e-transfer directory {1}'.
                             format(file, dir))
        raise CommandError('Some files are already in e-transfer directories')

    for info in file_info:
        file = info.filename
        cadc_file_info = fetch_cadc_file_info(file)

        if cadc_file_info is not None:
            # We need to check whether the file is not, in fact, different
            # from the current version, because in that case we are not
            # allowed to "replace" it.
            cadc_file_md5 = cadc_file_info['content-md5']

            if info.md5 == cadc_file_md5:
                logger.info('File %s in unchanged, skipping replacement',
                            file)
                continue

            target_type = 'replace'
        else:
            target_type = 'new'

        logger.info('Placing file %s in "%s" directory', file, target_type)

        source_file = os.path.join(outdir, file)
        scratch_file = os.path.join(scratchdir, file)
        target_file = os.path.join(transdir, target_type, file)

        if not dry_run:
            # Copy the file into the scratch directory and prepare its
            # file permissions.
            shutil.copyfile(source_file, scratch_file)
            os.chown(scratch_file, -1, group_id)
            os.chmod(scratch_file, 0o664)

            # Move the file to the target directory.  This is done so that
            # the file appears atomically in the target directory in order
            # to prevent the e-transfer system seeing only part of the file.
            os.rename(scratch_file, target_file)

        else:
            logger.debug('Skipping e-transfer (DRY RUN)')

    # Finally set the state of the job to TRANSFERRING
    if not dry_run:
        db.change_state(
            job_id, JSAProcState.TRANSFERRING,
            'Output files have been copied into the e-transfer directories',
            state_prev=(None if force else JSAProcState.PROCESSED))
Пример #29
0
def _get_config_dir_space(type_):
    """Return space in a configured directory tree (GiB)."""

    config = get_config()
    return get_space(config.get('directories', type_))
Пример #30
0
def _etransfer_send(job_id, dry_run, db, force):
    """Private function to copy job output into the e-transfer
    directories.

    Runs under the ErrorDecorator so that errors are captured.
    """

    config = get_config()
    scratchdir = config.get('etransfer', 'scratchdir')
    transdir = config.get('etransfer', 'transdir')
    group_id = grp.getgrnam(config.get('etransfer', 'group')).gr_gid

    logger.debug('Retrieving list of output files')
    try:
        file_info = db.get_output_files(job_id, with_info=True)
        files = [x.filename for x in file_info]

    except NoRowsError:
        raise CommandError('No output files found for job {0}'.format(job_id))

    logger.debug('Checking that the MD5 sum for each file is defined')
    for info in file_info:
        if info.md5 is None:
            raise CommandError(
                'File {0} MD5 sum is missing from database'.format(
                    info.filename))

    logger.debug('Checking that all files are present')
    outdir = get_output_dir(job_id)
    for file in files:
        if not os.path.exists(os.path.join(outdir, file)):
            raise CommandError('File {0} not in directory {1}'.format(
                file, outdir))

    logger.debug('Checking that files are not in the scratch directory')
    scratchfiles = os.listdir(scratchdir)
    for file in files:
        if file in scratchfiles:
            raise CommandError(
                'File {0} is in e-transfer scratch directory'.format(file))

    logger.debug('Checking whether the files are already in e-transfer')
    etransfer_status = etransfer_file_status(files)
    if any(etransfer_status):
        for (file, status) in zip(files, etransfer_status):
            if status is not None:
                (ok, dir) = status
                logger.error(
                    'File {0} already in e-transfer directory {1}'.format(
                        file, dir))
        raise CommandError('Some files are already in e-transfer directories')

    for info in file_info:
        file = info.filename
        cadc_file_info = fetch_cadc_file_info(file)

        if cadc_file_info is not None:
            # We need to check whether the file is not, in fact, different
            # from the current version, because in that case we are not
            # allowed to "replace" it.
            cadc_file_md5 = cadc_file_info['content-md5']

            if info.md5 == cadc_file_md5:
                logger.info('File %s in unchanged, skipping replacement', file)
                continue

            target_type = 'replace'
        else:
            target_type = 'new'

        logger.info('Placing file %s in "%s" directory', file, target_type)

        source_file = os.path.join(outdir, file)
        scratch_file = os.path.join(scratchdir, file)
        target_file = os.path.join(transdir, target_type, file)

        if not dry_run:
            # Copy the file into the scratch directory and prepare its
            # file permissions.
            shutil.copyfile(source_file, scratch_file)
            os.chown(scratch_file, -1, group_id)
            os.chmod(scratch_file, 0o664)

            # Move the file to the target directory.  This is done so that
            # the file appears atomically in the target directory in order
            # to prevent the e-transfer system seeing only part of the file.
            os.rename(scratch_file, target_file)

        else:
            logger.debug('Skipping e-transfer (DRY RUN)')

    # Finally set the state of the job to TRANSFERRING
    if not dry_run:
        db.change_state(
            job_id,
            JSAProcState.TRANSFERRING,
            'Output files have been copied into the e-transfer directories',
            state_prev=(None if force else JSAProcState.PROCESSED))
Пример #31
0
def ptransfer_check(proc_dir, filename, stream, md5sum):
    """Check if a file is suitable for transfer to CADC.

    Given the directory, file name and stream ("new" or "replace"), determine
    if a file is acceptable.  This function aims to replicate the checks which
    would have been made by the CADC e-transfer process.  Checking for
    decompressibility is not implemented as it is not expected that we will be
    transferring compressed files.

    Raises a PTransferException (including a rejection code) if a problem
    is detected.  No changes to the filesystem should be made, so this
    function should be safe to call in dry run mode.

    Returns the CADC AD stream to be used for the file.  This is determined by
    a mapping from namecheck section to stream name in the configuration file
    entry etransfer.ad_stream.
    """

    config = get_config()

    ad_streams = dict(map(
        lambda x: x.split(':'),
        config.get('etransfer', 'ad_stream').split(' ')))

    proc_file = os.path.join(proc_dir, filename)

    # Check for permission to read the file.
    if not os.access(proc_file, os.R_OK):
        raise PTransferException('permission')

    # Check if file size is zero.
    if os.stat(proc_file).st_size == 0:
        raise PTransferException('empty')

    # Check extension and validity.
    (root, ext) = os.path.splitext(filename)
    if ext == '.sdf':
        if not valid_hds(proc_file):
            raise PTransferException('corrupt')

    elif ext == '.fits':
        if not valid_fits(proc_file):
            raise PTransferException('fitsverify')

    elif ext == '.png':
        if not valid_png(proc_file):
            raise PTransferException('corrupt')

    else:
        raise PTransferException('filetype')

    # Name-check.
    namecheck_section = check_file_name(filename, True)
    if namecheck_section is None:
        raise PTransferException('name')
    if namecheck_section in ad_streams:
        ad_stream = ad_streams[namecheck_section]
    else:
        raise PTransferException('stream')

    # Check correct new/replacement stream.
    try:
        cadc_file_info = fetch_cadc_file_info(filename)
    except JSAProcError:
        raise PTransferFailure('Unable to check CADC file info')

    if stream == 'new':
        if cadc_file_info is not None:
            raise PTransferException('not_new')

    elif stream == 'replace':
        if cadc_file_info is None:
            raise PTransferException('not_replace')
        elif md5sum == cadc_file_info['content-md5']:
            raise PTransferException('unchanged')

    else:
        raise Exception('unknown stream {0}'.format(stream))

    return ad_stream
Пример #32
0
def jsawrapdr_run(job_id, input_file_list, mode, drparameters,
                  cleanup='cadc', location='JAC', persist=False,
                  jsawrapdr=None, starlink_dir=None,
                  version=None, command_run=None,
                  raw_output=None):
    """
    Execute jsawrapdr script from python.

    This function calls jsawrapdr with following options:

    jsawrapdr --outdir=configbase/scratch/$job_id
              --inputs=input_file_list
              --id = jac-$job_id
              --mode=$mode
              --drparameters=$drparameters
              --cleanup=$cleanup (cadc by default)
              --location=$location (JAC by default)
              --fileversion=$version (if not None)
              --drcommand=$command_run (if not None)

         if persist is True, then it adds the flag:
              -persist

         if raw_output is True, it adds the option:
              --rawoutput

    Args:

      job_id (int): Job identifier from jsaproc database.

      input_file_list (str): List of files (with extensions and full
        path).

      mode (str): Can be 'night', 'obs', 'public' or 'project'.

      drparameters (str):

      cleanup (str, optional): Type of cleanup. Can be one of
        'cadc'|'none'|'all', defaults to 'cadc'.

      persist (bool, optional): Defaults to False If persist is turned
        on, then dpCapture will copy acceptable products to the
        default output directory. Otherwise it won't (used for
        debugging purposes). The output directory is determined by
        jsa_proc.admin.directories 'get_output_dir' for the given
        job_id.

      location (str, optional): One of |'cadc'|'JAC'| (NOT CURRENTLY
        IMPLEMENTED, default is 'JAC')


      jsawrapdr (str, optional): The path to jsawrapdr. If not given,
        the one in configured starlink will be used.

      starlink_dir (str, optional): The path of a starlink install to
        use. If not given, the one found in the configuration file will be
        used.

      version: CADC file name "version" or None to use default.

      command_run: custom "run" command to be passed to jsawrapdr.

    Returns:
      str: The filename (including path) of the logfile.

    """

    # Get log directory.  Note that opening a log file in this
    # directory using open_log_file will ensure that it exists.
    log_dir = get_log_dir(job_id)

    # Prepare scratch directory.
    scratch_dir = make_temp_scratch_dir(job_id)

    # Get output directory name.
    out_dir = get_output_dir(job_id)

    # If output dir currently exists, delete the directory.
    if os.path.exists(out_dir):
        shutil.rmtree(out_dir)

    # Make the "transfer" directory in advance.  (This saves dpCapture
    # or another copying routine from having to do so.)
    os.makedirs(out_dir)

    # Find paths to starlink, jsawrapdr and orac_dr.
    config = get_config()

    if starlink_dir is None:
        starpath = config.get('job_run', 'starpath')
    else:
        starpath = starlink_dir
    if not jsawrapdr:
        jsawrapdr = os.path.join(starpath, 'Perl', 'bin', 'jsawrapdr')
    orac_dir = os.path.join(starpath, 'bin', 'oracdr', 'src')

    # Set thejac recipe id.
    jacid = 'jac-'+str(job_id)

    # Collect the jsawrapdr arguments.
    jsawrapdrcom = [jsawrapdr,
                    '--debugxfer',
                    '--outdir='+scratch_dir,
                    '--inputs='+input_file_list,
                    '--id='+jacid,
                    '--mode='+mode,
                    '--cleanup='+cleanup,
                    '--drparameters='+drparameters]
    if persist:
        jsawrapdrcom.append('-persist')
        jsawrapdrcom.append('--transdir='+out_dir)

    if raw_output:
        jsawrapdrcom.append('--rawoutput')

    if version is not None:
        jsawrapdrcom.append('--fileversion={0}'.format(version))

    if command_run is not None:
        jsawrapdrcom.append('--drcommand={0}'.format(command_run))

    # Set up the environment for running jsawrapdr.
    jsa_env = os.environ.copy()
    jsa_env = setup_starlink(starpath, jsa_env)

    # Add in the LOGDIR
    jsa_env['ORAC_LOGDIR'] = log_dir

    # Ensure that we delete the results of previous log.* files the ORAC_LOGDIR if they exist.
    if os.path.exists(log_dir):
        calculation_logs = glob.glob(os.path.join(log_dir, 'log.*'))
        for cl in calculation_logs:
            os.remove(cl)

    # Open a log file and run jsawrapdr while saving output to log.
    with open_log_file(job_id, 'jsawrapdr') as log:

        # Save the log file name.
        log_name = log.name

        # Run jsawrapdr.
        retcode = subprocess.call(jsawrapdrcom, env=jsa_env, bufsize=1,
                                  stdout=log, stderr=subprocess.STDOUT,
                                  preexec_fn=restore_signals)

    # Handle jsawrapdr errors.
    if retcode != 0:
        errormessage = 'jsawrapdr exited with Retcode %i ' % (retcode)

        # Find the first ORAC error message in the jsawrapdr log.
        jsalogfile = open(log_name, 'r')
        lines = jsalogfile.read()
        jsalogfile.close()
        result = re.search(r'.*(STDERR:\s*.*)$', lines, re.DOTALL)
        if result:
            firsterror = result.group(1).split('\n')[1]

            # Insert the ORAC error at the start of the error message.
            if firsterror:
                errormessage = 'ORAC ERROR: ' + firsterror + '.\n' + \
                               errormessage

        # Raise the error.
        raise JSAProcError(errormessage)

    return log_name
Пример #33
0
def ptransfer_clean_up(dry_run=False):
    """Attempt to clean up orphaned p-tranfer "proc" directories.
    """

    if not dry_run:
        etransfer_check_config()

    config = get_config()

    trans_dir = config.get('etransfer', 'transdir')

    # Determine latest start time for which we will consider cleaning up
    # a proc directory.
    start_limit = datetime.utcnow() - timedelta(
        minutes=int(config.get('etransfer', 'cleanup_minutes')))

    start_limit_hard = datetime.utcnow() - timedelta(
        minutes=int(config.get('etransfer', 'cleanup_hard_minutes')))

    # Look for proc directories.
    proc_base_dir = os.path.join(trans_dir, 'proc')

    for dir_ in os.listdir(proc_base_dir):
        # Consider only directories with the expected name prefix.
        proc_dir = os.path.join(proc_base_dir, dir_)
        if not (dir_.startswith('proc') and os.path.isdir(proc_dir)):
            continue

        logger.debug('Directory %s found', dir_)

        # Check for and read the stamp file.
        stamp_file = os.path.join(proc_dir, 'ptransfer.ini')
        config = SafeConfigParser()
        config_files_read = config.read(stamp_file)
        if not config_files_read:
            logger.debug('Directory %s has no stamp file', dir_)
            continue

        # Check if the transfer started too recently to consider.
        start = datetime.strptime(config.get('ptransfer', 'start'),
                                  '%Y-%m-%d %H:%M:%S')

        if start > start_limit:
            logger.debug('Directory %s is too recent to clean up', dir_)
            continue

        # Check if the transfer process is still running (by PID).
        pid = int(config.get('ptransfer', 'pid'))
        is_running = True
        try:
            os.kill(pid, 0)
        except OSError:
            is_running = False

        if is_running:
            logger.debug('Directory %s corresponds to running process (%i)',
                         dir_, pid)

            if start > start_limit_hard:
                continue

            logger.debug(
                'Directory %s is older than hard limit, killing process %i',
                dir_, pid)

            if not dry_run:
                try:
                    os.kill(pid, 15)
                except OSError:
                    pass

                # Check whether the process did exit.
                sleep(5)
                try:
                    os.kill(pid, 0)
                except OSError:
                    is_running = False

                if is_running:
                    logger.warning('Could not kill process %i', pid)
                    continue

        # All checks are complete: move the files back to their initial
        # stream directories.
        n_moved = 0
        n_skipped = 0

        for stream in allowed_streams:
            stream_has_skipped_files = False

            proc_stream_dir = os.path.join(proc_dir, stream)
            if not os.path.exists(proc_stream_dir):
                continue

            orig_stream_dir = os.path.join(trans_dir, stream)
            if (not os.path.exists(orig_stream_dir)) and (not dry_run):
                os.mkdir(orig_stream_dir)

            for file_ in os.listdir(proc_stream_dir):
                logger.debug('Directory %s has file %s (%s)',
                             dir_, file_, stream)

                proc_file = os.path.join(proc_stream_dir, file_)
                orig_file = os.path.join(orig_stream_dir, file_)

                if os.path.exists(orig_file):
                    logger.warning(
                        'File %s present in %s and %s directories',
                        file_, dir_, stream)
                    n_skipped += 1
                    stream_has_skipped_files = True

                else:
                    if dry_run:
                        logger.info('Would move %s %s back to %s (DRY RUN)',
                                    dir_, file_, stream)
                    else:
                        os.rename(proc_file, orig_file)
                    n_moved += 1

            if (not stream_has_skipped_files) and (not dry_run):
                os.rmdir(proc_stream_dir)

        logger.info(
            'Proc directory %s: %i file(s) cleaned up, %i skipped',
            dir_, n_moved, n_skipped)

        # If we didn't skip any files, remove the stamp file and now-empty
        # proc directory.  (Unless in dry run mode.)
        if n_skipped or dry_run:
            continue

        os.unlink(stamp_file)
        os.rmdir(proc_dir)
Пример #34
0
def ptransfer_poll(stream=None, dry_run=False):
    """Attempt to put files into the archive at CADC.

    This function is controlled by the configuration file
    entries etransfer.transdir and etransfer.maxfiles.
    It looks in the "new" and "replace" directories inside
    "transdir" for at most  "max_files" files.  The files
    are moved to a temporary processing directory and then
    either moved to a reject directory or deleted on
    completion.  In the event of failure to transfer, the files
    are put back in either the "new" or "replace" directory.

    The stream argument can be given to select only files in the
    "new" or "replace" directory.  It must be given in the
    dry_run case since then no "proc" directory is created.
    """

    if not dry_run:
        etransfer_check_config()

    config = get_config()

    trans_dir = config.get('etransfer', 'transdir')
    max_files = int(config.get('etransfer', 'max_files'))

    files = []
    n_err = 0

    # Select transfer streams.
    streams = allowed_streams
    if stream is None:
        if dry_run:
            raise CommandError('Stream must be specified in dry run mode')
    else:
        if stream not in streams:
            raise CommandError('Unknown stream {0}'.format(stream))

        streams = (stream,)

    # Search for files to transfer.
    for stream in streams:
        for file in os.listdir(os.path.join(trans_dir, stream)):
            logger.debug('Found file %s (%s)', file, stream)
            files.append(FileInfo(file, stream))

    if not files:
        logger.info('No files found for p-transfer')
        return

    if dry_run:
        # Work in the stream directory.

        proc = files[:max_files]
        proc_dir = os.path.join(trans_dir, stream)
        use_sub_dir = False
        stamp_file = None

    else:
        # Create working directory.

        proc = []
        proc_dir = tempfile.mkdtemp(prefix='proc',
                                    dir=os.path.join(trans_dir, 'proc'))
        logger.info('Working directory: %s', proc_dir)

        # Create stream-based subdirectories.
        use_sub_dir = True
        for stream in streams:
            os.mkdir(os.path.join(proc_dir, stream))

        # Write stamp file to allow automatic clean-up.
        stamp_file = os.path.join(proc_dir, 'ptransfer.ini')

        config = SafeConfigParser()
        config.add_section('ptransfer')
        config.set('ptransfer', 'pid', str(os.getpid()))
        config.set('ptransfer', 'start',
                   datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'))

        with open(stamp_file, 'wb') as f:
            config.write(f)

        # Move some files into the working directory to prevent
        # multiple p-transfer processes trying to transfer them
        # simultaneously.
        for file in files:
            try:
                os.rename(
                    os.path.join(trans_dir, file.stream, file.name),
                    os.path.join(proc_dir, file.stream, file.name))
                proc.append(file)
                logger.debug('Processing file %s', file.name)

            except:
                # Another process may have started processing the file,
                # so skip it.
                logger.debug('Cannot move file %s, skipping', file.name)

            # Did we get enough files already?
            if len(proc) >= max_files:
                break

    # Attempt to process all the files in our working directory.
    for file in proc:
        # Determine path to the directory containing the file and the
        # file itself.
        if use_sub_dir:
            proc_sub_dir = os.path.join(proc_dir, file.stream)
        else:
            proc_sub_dir = proc_dir

        proc_file = os.path.join(proc_sub_dir, file.name)

        try:
            # Check the file.
            md5sum = get_md5sum(proc_file)
            ad_stream = ptransfer_check(
                proc_sub_dir, file.name, file.stream, md5sum)

            if dry_run:
                logger.info('Accepted file %s (%s) (DRY RUN)',
                            file.name, ad_stream)

            else:
                # Transfer the file.
                ptransfer_put(proc_sub_dir, file.name, ad_stream, md5sum)

                # Check it was transferred correctly.
                try:
                    cadc_file_info = fetch_cadc_file_info(file.name)
                except JSAProcError:
                    raise PTransferFailure('Unable to check CADC file info')

                if cadc_file_info is None:
                    # File doesn't seem to be there?
                    logger.error('File transferred but has no info')
                    raise PTransferFailure('No file info')

                elif md5sum != cadc_file_info['content-md5']:
                    # File corrupted on transfer?  Put it back but in
                    # the replace directory for later re-transfer.
                    logger.error('File transferred but MD5 sum wrong')
                    file = file._replace(stream='replace')
                    raise PTransferFailure('MD5 sum wrong')

                # On success, delete the file.
                logger.info('Transferred file %s (%s)', file.name, ad_stream)
                os.unlink(proc_file)

        except PTransferException as e:
            # In the event of an error generated by one of the pre-transfer
            # checks, move the file into a reject directory.
            n_err += 1
            code = e.reject_code
            logger.error('Rejecting file %s (%s)', file.name, code)

            if not dry_run:
                reject_dir = os.path.join(trans_dir, 'reject', code)
                if not os.path.exists(reject_dir):
                    logger.debug('Making reject directory: %s', reject_dir)
                    os.makedirs(reject_dir)

                logger.debug('Moving file to: %s', reject_dir)
                os.rename(proc_file, os.path.join(reject_dir, file.name))

        except PTransferFailure as e:
            # In the event of failure to transfer, put the file back into
            # its original stream directory.
            n_err += 1
            logger.error(
                'Failed to transfer file %s (%s)', file.name, e.message)

            if not dry_run:
                os.rename(
                    proc_file,
                    os.path.join(trans_dir, file.stream, file.name))

        except:
            # Catch any other exception and also put the file back.
            n_err += 1
            logger.exception('Error while transferring file %s', file.name)

            if not dry_run:
                os.rename(
                    proc_file,
                    os.path.join(trans_dir, file.stream, file.name))

    # Finally clean up the processing directory.  It should have nothing
    # left in it by this point other than the stream subdirectories and
    # stamp file.
    if not dry_run:
        os.unlink(stamp_file)

        for stream in streams:
            os.rmdir(os.path.join(proc_dir, stream))

        os.rmdir(proc_dir)

    # If errors occurred, exit with bad status.
    if n_err:
        raise CommandError('Errors occurred during p-transfer poll'
                           ' ({0} error(s))'.format(n_err))
Пример #35
0
def get_misc_log_dir(name):
    """Get a miscellaneous log directory."""

    config = get_config()
    return os.path.join(config.get('directories', 'log'), name)
Пример #36
0
def _get_config_dir_space(type_):
    """Return space in a configured directory tree (GiB)."""

    config = get_config()
    return get_space(config.get('directories', type_))
Пример #37
0
def create_web_app():
    """Function to prepare the Flask web application."""

    home = get_home()
    db = get_database()
    database_name = get_config().get('database', 'database')

    app = Flask(
        'jsa_proc',
        static_folder=os.path.join(home, 'web', 'static'),
        template_folder=os.path.join(home, 'web', 'templates'),
    )

    app.secret_key = get_config().get('web', 'key')

    # Web authorization -- mostly take from flask docs snippets 8
    # http://flask.pocoo.org/snippets/8
    def check_auth(password):
        """
        Check that the staff pasword has been used.

        (Note that we don't care what the username is).
        """
        return check_staff_password(password)

    def authenticate():
        """
        Send a 401 response so that we can log in.
        """

        return Response(render_template('logout.html',
                                        redirect=request.referrer),
                        401, {'WWW-Authenticate': loginstring})

    def requires_auth(f):
        """
        A decorator to wrap functions that require authorization.
        """

        @wraps(f)
        def decorated(*args, **kwargs):
            auth = request.authorization
            if not auth or not check_auth(auth.password):
                return authenticate()
            return f(*args, **kwargs)

        return decorated

    # Route Handlers.

    @app.route('/')
    def home_page():
        raise HTTPRedirect(url_for('task_summary'))

    @app.route('/job/')
    @templated('job_list.html')
    def job_list():
        # Prepare query arguments list: special parameters first.
        kwargs = {
            'state': request.args.getlist('state'),
            'mode': request.args.get('mode', 'JSAProc'),
            'tiles': request.args.get('tiles')
        }

        # Now add regular string parameters, including those from
        # jcmtobsinfo.
        params = [
            'location',
            'task',
            'date_min',
            'date_max',
            'qa_state',
            'sourcename',
            'obsnum',
            'project',
            'number',
            'tau_min',
            'tau_max',
        ]

        params.extend(ObsQueryDict.keys())

        for key in params:
            kwargs[key] = request.args.get(key, None)

        # Process empty strings used as null form parameters.
        for argname in kwargs:
            if kwargs[argname] == '':
                kwargs[argname] = None

        # Store the query in the session.
        session['job_query'] = kwargs

        # Finally prepare the template context.
        return prepare_job_list(
            db,
            page=request.args.get('page', None),
            **kwargs)

    @app.route('/image/<task>/piechart')
    def summary_piechart(task='None'):
        if task == 'None':
            task = None
        obsquerydict = {}
        for key in ObsQueryDict.keys():
            obsquerydict[key] = request.args.get(key, None)
        date_min = request.args.get('date_min', None)
        date_max = request.args.get('date_max', None)
        return prepare_summary_piechart(db, task=task,
                                        obsquerydict=obsquerydict,
                                        date_min=date_min, date_max=date_max)

    @app.route('/summary/')
    @templated('task_summary.html')
    def task_summary():
        return prepare_task_summary(db)

    @app.route('/qa')
    @templated('task_qa_summary.html')
    def task_qa_summary():
        return prepare_task_qa_summary(db)

    @app.route('/job_summary/')
    @templated('job_summary.html')
    def job_summary():
        task = request.args.get('task', None)
        date_min = request.args.get('date_min', None)
        date_max = request.args.get('date_max', None)
        return prepare_job_summary(db, task=task, date_min=date_min,
                                   date_max=date_max)

    @app.route('/error_summary/')
    @templated('error_summary.html')
    def error_summary():
        return prepare_error_summary(
            db,
            redirect_url=request.full_path,
            filtering=request.args.get('filtering', None),
            chosentask=request.args.get('chosentask', None),
            extrafilter=request.args.get('extrafilter', None),
            state_prev=request.args.get('state_prev', None),
            error_state=request.args.get('error_state', None),
            filter_done=('submit_filter' in request.args),
        )

    @app.route('/job/<int:job_id>', methods=['GET'])
    @templated('job_info.html')
    def job_info(job_id):
        return prepare_job_info(db, job_id, session.get('job_query'))

    @app.route('/job/<int:job_id>/qa', methods=['GET'])
    @templated('job_qa.html')
    def job_qa(job_id):
        return prepare_job_qa_info(db, job_id, session.get('job_query'))

    @app.route('/job/<int:job_id>/add_note', methods=['POST'])
    @requires_auth
    def job_add_note(job_id):
        message = request.form['message']
        username = request.authorization['username']

        try:
            # Add the note.
            prepare_add_note(db, job_id, message, username)

            # Redirect back to the job info page.
            flash('The note has been saved.')
            raise HTTPRedirect(url_for('job_info', job_id=job_id))

        except ErrorPage as err:
            return error_page_response(err)

    @app.route('/job_change_state', methods=['POST'])
    @requires_auth
    def job_change_state():

        # Get the variables from POST
        newstate = request.form['newstate']
        state_prev = request.form['state_prev']
        message = request.form['message']
        job_ids = request.form.getlist('job_id')
        url = request.form['url']
        username = request.authorization['username']

        try:
            # Change the state.
            prepare_change_state(db, job_ids,
                                 newstate,
                                 state_prev,
                                 message,
                                 username)

            # Redirect the page to correct info.
            flash('The status has been changed to %s.' % JSAProcState.get_name(
                newstate))
            raise HTTPRedirect(url)

        except ErrorPage as err:
            return error_page_response(err)

    @app.route('/job_change_qa', methods=['POST'])
    @requires_auth
    def job_change_qa():

        # Get the variables from POST
        if "action_next" in request.form:
            url = request.form['url-next']
        else:
            url = request.form['url']
        qa_state = request.form['qa_state']
        message = request.form['message']
        job_ids = request.form.getlist('job_id')
        username = request.authorization['username']

        try:
            # Change the state.
            prepare_change_qa(db, job_ids,
                              qa_state,
                              message,
                              username,
                              )
            # Redirect the page to correct info.
            flash(
                'The QA status of job %s has been changed to %s.' %
                (str(' '.join(job_ids)), JSAQAState.get_name(qa_state))
            )
            raise HTTPRedirect(url)

        except ErrorPage as err:
            return error_page_response(err)

    # QA Nightly Summary pages
    @app.route('/qa-nightly')
    @templated('task_qa_summary_nightly.html')
    def qa_night_page():
        """
        By default, show the previous week.

        Note that prepare_task_qa_summary interprets dates as
        inclusive, so use 6 days for the time delta to get a week

        """
        date_min = request.args.get('date_min', None)
        if date_min is None or date_min == '':
            date_min = (
                datetime.date.today() - datetime.timedelta(days=6)
            ).strftime('%Y-%m-%d')

        date_max = request.args.get('date_max', None)
        if date_max is None or date_max == '':
            date_max = datetime.date.today().strftime('%Y-%m-%d')
        return prepare_task_qa_summary(db, date_min=date_min,
                                       date_max=date_max, task='jcmt-nightly',
                                       byDate=True)

    @app.route('/login')
    @requires_auth
    def login():
        raise HTTPRedirect(request.referrer)

    @app.route('/logout')
    def logout():
        return authenticate()

    # Image handling.
    @app.route('/job/<int:job_id>/preview/<preview>')
    def job_preview(job_id, preview):
        path = prepare_job_preview(job_id, preview)
        return send_file(path, mimetype='image/png')

    @app.route('/job/<int:job_id>/pdf/<preview>')
    def job_preview_pdf(job_id, preview):
        path = prepare_job_preview(job_id, preview, 'pdf')
        return send_file(path, mimetype='application/pdf')

    @app.route('/job/<int:job_id>/text/<text_file>')
    def job_text_file(job_id, text_file):
        path = prepare_job_preview(job_id, text_file, 'txt')
        return send_file(path, mimetype='text/plain')

    @app.route('/job/<int:job_id>/log/<log>')
    def job_log_html(job_id, log):
        path = prepare_job_log(job_id, log)
        return send_file(path, mimetype='text/html')

    @app.route('/job/<int:job_id>/log_text/<log>')
    def job_log_text(job_id, log):
        path = prepare_job_log(job_id, log)
        return send_file(path, mimetype='text/plain')

    @app.route('/fop_summary', methods=['GET'])
    @templated('fop_summary.html')
    def fop_summary():
        userid = request.args.get('userid', None)
        semester = request.args.get('semester', None)
        projdict = {}
        if userid and semester:
            ompdb = get_omp_database(write_access=None)
            projects = ompdb.get_support_projects(str(userid), str(semester))
            for p in projects:
                jobs = db.find_jobs(
                    obsquery={'project': p}, task='jcmt-nightly')
                projdict[p] = [
                    len(jobs),
                    sum(1 for j in jobs if j.state == JSAProcState.ERROR),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.BAD),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.QUESTIONABLE),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.UNKNOWN),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.GOOD)]
        else:
            projects = None

        return {'userid': userid, 'semester': semester, 'projects': projdict}
    # Filters and Tests.

    @app.route('/fop_summary_getres', methods=['POST'])
    def fop_summary_getres():
        userid = request.form['userid']
        semester = request.form['semester']
        raise HTTPRedirect(url_for(
            'fop_summary', userid=userid, semester=semester))

    @app.template_filter('state_name')
    def state_name_filter(state):
        return JSAProcState.get_name(state)

    @app.template_test('state_active')
    def state_active_test(state):
        return JSAProcState.get_info(state).active

    @app.template_filter('state_phase')
    def state_phase_filter(state):
        phase = JSAProcState.get_info(state).phase
        if phase == JSAProcState.PHASE_QUEUE:
            return 'queue'
        elif phase == JSAProcState.PHASE_FETCH:
            return 'fetch'
        elif phase == JSAProcState.PHASE_RUN:
            return 'run'
        elif phase == JSAProcState.PHASE_COMPLETE:
            return 'complete'
        elif phase == JSAProcState.PHASE_ERROR:
            return 'error'
        raise HTTPError('Unknown phase {0}'.format(phase))

    @app.template_filter('qa_state_name')
    def qa_state_name(qa_state):
        if qa_state.lower() != 'total':
            name = JSAQAState.get_name(qa_state)
        else:
            name = 'Total'
        return name

    @app.template_filter('omp_state_name')
    def omp_state_name_filter(ompstate):
        return OMPState.get_name(ompstate)

    @app.template_filter('uniq')
    def uniq_filter(xs):
        return set(xs)

    @app.template_filter('datetimeformat')
    def datetimeformat(value, format='%Y-%m-%d<br>%H:%M'):
        return value.strftime(format)

    @app.template_filter('replace0')
    def replace_zero(value):
        if value == 0:
            return '-'
        else:
            return value

    @app.context_processor
    def add_to_context():
        return {
            'url_for_omp': url_for_omp,
            'url_for_omp_comment': url_for_omp_comment,
            'database_name': database_name,
        }

    # Return the Application.
    return app
Пример #38
0
def create_web_app():
    """Function to prepare the Flask web application."""

    home = get_home()
    db = get_database()
    database_name = get_config().get('database', 'database')

    app = Flask(
        'jsa_proc',
        static_folder=os.path.join(home, 'web', 'static'),
        template_folder=os.path.join(home, 'web', 'templates'),
    )

    app.secret_key = get_config().get('web', 'key')

    # Web authorization -- mostly take from flask docs snippets 8
    # http://flask.pocoo.org/snippets/8
    def check_auth(password):
        """
        Check that the staff pasword has been used.

        (Note that we don't care what the username is).
        """
        return check_staff_password(password)

    def authenticate():
        """
        Send a 401 response so that we can log in.
        """

        return Response(
            render_template('logout.html', redirect=request.referrer), 401,
            {'WWW-Authenticate': loginstring})

    def requires_auth(f):
        """
        A decorator to wrap functions that require authorization.
        """
        @wraps(f)
        def decorated(*args, **kwargs):
            auth = request.authorization
            if not auth or not check_auth(auth.password):
                return authenticate()
            return f(*args, **kwargs)

        return decorated

    # Route Handlers.

    @app.route('/')
    def home_page():
        raise HTTPRedirect(url_for('task_summary'))

    @app.route('/job/')
    @templated('job_list.html')
    def job_list():
        # Prepare query arguments list: special parameters first.
        kwargs = {
            'state': request.args.getlist('state'),
            'mode': request.args.get('mode', 'JSAProc'),
            'tiles': request.args.get('tiles')
        }

        # Now add regular string parameters, including those from
        # jcmtobsinfo.
        params = [
            'location',
            'task',
            'date_min',
            'date_max',
            'qa_state',
            'sourcename',
            'obsnum',
            'project',
            'number',
            'tau_min',
            'tau_max',
        ]

        params.extend(ObsQueryDict.keys())

        for key in params:
            kwargs[key] = request.args.get(key, None)

        # Process empty strings used as null form parameters.
        for argname in kwargs:
            if kwargs[argname] == '':
                kwargs[argname] = None

        # Store the query in the session.
        session['job_query'] = kwargs

        # Finally prepare the template context.
        return prepare_job_list(db,
                                page=request.args.get('page', None),
                                **kwargs)

    @app.route('/image/<task>/piechart')
    def summary_piechart(task='None'):
        if task == 'None':
            task = None
        obsquerydict = {}
        for key in ObsQueryDict.keys():
            obsquerydict[key] = request.args.get(key, None)
        date_min = request.args.get('date_min', None)
        date_max = request.args.get('date_max', None)
        return prepare_summary_piechart(db,
                                        task=task,
                                        obsquerydict=obsquerydict,
                                        date_min=date_min,
                                        date_max=date_max)

    @app.route('/summary/')
    @templated('task_summary.html')
    def task_summary():
        return prepare_task_summary(db)

    @app.route('/qa')
    @templated('task_qa_summary.html')
    def task_qa_summary():
        return prepare_task_qa_summary(db)

    @app.route('/job_summary/')
    @templated('job_summary.html')
    def job_summary():
        task = request.args.get('task', None)
        date_min = request.args.get('date_min', None)
        date_max = request.args.get('date_max', None)
        return prepare_job_summary(db,
                                   task=task,
                                   date_min=date_min,
                                   date_max=date_max)

    @app.route('/error_summary/')
    @templated('error_summary.html')
    def error_summary():
        return prepare_error_summary(
            db,
            redirect_url=request.full_path,
            filtering=request.args.get('filtering', None),
            chosentask=request.args.get('chosentask', None),
            extrafilter=request.args.get('extrafilter', None),
            state_prev=request.args.get('state_prev', None),
            error_state=request.args.get('error_state', None),
            filter_done=('submit_filter' in request.args),
        )

    @app.route('/job/<int:job_id>', methods=['GET'])
    @templated('job_info.html')
    def job_info(job_id):
        return prepare_job_info(db, job_id, session.get('job_query'))

    @app.route('/job/<int:job_id>/qa', methods=['GET'])
    @templated('job_qa.html')
    def job_qa(job_id):
        return prepare_job_qa_info(db, job_id, session.get('job_query'))

    @app.route('/job/<int:job_id>/add_note', methods=['POST'])
    @requires_auth
    def job_add_note(job_id):
        message = request.form['message']
        username = request.authorization['username']

        try:
            # Add the note.
            prepare_add_note(db, job_id, message, username)

            # Redirect back to the job info page.
            flash('The note has been saved.')
            raise HTTPRedirect(url_for('job_info', job_id=job_id))

        except ErrorPage as err:
            return error_page_response(err)

    @app.route('/job_change_state', methods=['POST'])
    @requires_auth
    def job_change_state():

        # Get the variables from POST
        newstate = request.form['newstate']
        state_prev = request.form['state_prev']
        message = request.form['message']
        job_ids = request.form.getlist('job_id')
        url = request.form['url']
        username = request.authorization['username']

        try:
            # Change the state.
            prepare_change_state(db, job_ids, newstate, state_prev, message,
                                 username)

            # Redirect the page to correct info.
            flash('The status has been changed to %s.' %
                  JSAProcState.get_name(newstate))
            raise HTTPRedirect(url)

        except ErrorPage as err:
            return error_page_response(err)

    @app.route('/job_change_qa', methods=['POST'])
    @requires_auth
    def job_change_qa():

        # Get the variables from POST
        if "action_next" in request.form:
            url = request.form['url-next']
        else:
            url = request.form['url']
        qa_state = request.form['qa_state']
        message = request.form['message']
        job_ids = request.form.getlist('job_id')
        username = request.authorization['username']

        try:
            # Change the state.
            prepare_change_qa(
                db,
                job_ids,
                qa_state,
                message,
                username,
            )
            # Redirect the page to correct info.
            flash('The QA status of job %s has been changed to %s.' %
                  (str(' '.join(job_ids)), JSAQAState.get_name(qa_state)))
            raise HTTPRedirect(url)

        except ErrorPage as err:
            return error_page_response(err)

    # QA Nightly Summary pages
    @app.route('/qa-nightly')
    @templated('task_qa_summary_nightly.html')
    def qa_night_page():
        """
        By default, show the previous week.

        Note that prepare_task_qa_summary interprets dates as
        inclusive, so use 6 days for the time delta to get a week

        """
        date_min = request.args.get('date_min', None)
        if date_min is None or date_min == '':
            date_min = (datetime.date.today() -
                        datetime.timedelta(days=6)).strftime('%Y-%m-%d')

        date_max = request.args.get('date_max', None)
        if date_max is None or date_max == '':
            date_max = datetime.date.today().strftime('%Y-%m-%d')
        return prepare_task_qa_summary(db,
                                       date_min=date_min,
                                       date_max=date_max,
                                       task='jcmt-nightly',
                                       byDate=True)

    @app.route('/login')
    @requires_auth
    def login():
        raise HTTPRedirect(request.referrer)

    @app.route('/logout')
    def logout():
        return authenticate()

    # Image handling.
    @app.route('/job/<int:job_id>/preview/<preview>')
    def job_preview(job_id, preview):
        path = prepare_job_preview(job_id, preview)
        return send_file(path, mimetype='image/png')

    @app.route('/job/<int:job_id>/pdf/<preview>')
    def job_preview_pdf(job_id, preview):
        path = prepare_job_preview(job_id, preview, 'pdf')
        return send_file(path, mimetype='application/pdf')

    @app.route('/job/<int:job_id>/text/<text_file>')
    def job_text_file(job_id, text_file):
        path = prepare_job_preview(job_id, text_file, 'txt')
        return send_file(path, mimetype='text/plain')

    @app.route('/job/<int:job_id>/log/<log>')
    def job_log_html(job_id, log):
        path = prepare_job_log(job_id, log)
        return send_file(path, mimetype='text/html')

    @app.route('/job/<int:job_id>/log_text/<log>')
    def job_log_text(job_id, log):
        path = prepare_job_log(job_id, log)
        return send_file(path, mimetype='text/plain')

    @app.route('/fop_summary', methods=['GET'])
    @templated('fop_summary.html')
    def fop_summary():
        userid = request.args.get('userid', None)
        semester = request.args.get('semester', None)
        projdict = {}
        if userid and semester:
            ompdb = get_omp_database(write_access=None)
            projects = ompdb.get_support_projects(str(userid), str(semester))
            for p in projects:
                jobs = db.find_jobs(obsquery={'project': str(p)},
                                    task='jcmt-nightly')
                projdict[str(p)] = [
                    len(jobs),
                    sum(1 for j in jobs if j.state == JSAProcState.ERROR),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.BAD),
                    sum(1 for j in jobs
                        if j.qa_state == JSAQAState.QUESTIONABLE),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.UNKNOWN),
                    sum(1 for j in jobs if j.qa_state == JSAQAState.GOOD)
                ]
        else:
            projects = None

        return {'userid': userid, 'semester': semester, 'projects': projdict}

    # Filters and Tests.

    @app.route('/fop_summary_getres', methods=['POST'])
    def fop_summary_getres():
        userid = request.form['userid']
        semester = request.form['semester']
        raise HTTPRedirect(
            url_for('fop_summary', userid=userid, semester=semester))

    @app.template_filter('state_name')
    def state_name_filter(state):
        return JSAProcState.get_name(state)

    @app.template_test('state_active')
    def state_active_test(state):
        return JSAProcState.get_info(state).active

    @app.template_filter('state_phase')
    def state_phase_filter(state):
        phase = JSAProcState.get_info(state).phase
        if phase == JSAProcState.PHASE_QUEUE:
            return 'queue'
        elif phase == JSAProcState.PHASE_FETCH:
            return 'fetch'
        elif phase == JSAProcState.PHASE_RUN:
            return 'run'
        elif phase == JSAProcState.PHASE_COMPLETE:
            return 'complete'
        elif phase == JSAProcState.PHASE_ERROR:
            return 'error'
        raise HTTPError('Unknown phase {0}'.format(phase))

    @app.template_filter('qa_state_name')
    def qa_state_name(qa_state):
        if qa_state.lower() != 'total':
            name = JSAQAState.get_name(qa_state)
        else:
            name = 'Total'
        return name

    @app.template_filter('omp_state_name')
    def omp_state_name_filter(ompstate):
        return OMPState.get_name(ompstate)

    @app.template_filter('uniq')
    def uniq_filter(xs):
        return set(xs)

    @app.template_filter('datetimeformat')
    def datetimeformat(value, format='%Y-%m-%d<br>%H:%M'):
        return value.strftime(format)

    @app.template_filter('replace0')
    def replace_zero(value):
        if value == 0:
            return '-'
        else:
            return value

    @app.context_processor
    def add_to_context():
        return {
            'url_for_omp': url_for_omp,
            'url_for_omp_comment': url_for_omp_comment,
            'database_name': database_name,
        }

    # Return the Application.
    return app
Пример #39
0
def get_misc_log_dir(name):
    """Get a miscellaneous log directory."""

    config = get_config()
    return os.path.join(config.get('directories', 'log'), name)
Пример #40
0
def _fetch_job_output(job_id, db, force=False, dry_run=False):
    """Private function to perform retrieval of job output files from CADC.
    """

    # Check we have sufficient disk space for fetching to occur.
    output_space = get_output_dir_space()
    required_space = float(get_config().get('disk_limit', 'fetch_min_space'))

    if output_space < required_space and not force:
        logger.warning('Insufficient disk space: %f / %f GiB required',
                       output_space, required_space)
        return

    logger.info('About to retreive output data for job %i', job_id)

    # Change state from INGEST_QUEUE to INGEST_FETCH.
    if not dry_run:
        try:
            db.change_state(
                job_id,
                JSAProcState.INGEST_FETCH,
                'Output data are being retrieved',
                state_prev=(None if force else JSAProcState.INGEST_QUEUE))
        except NoRowsError:
            logger.error(
                'Job %i cannot have output data fetched'
                ' as it not waiting for reingestion', job_id)
            return

    # Check state of output files.
    output_dir = get_output_dir(job_id)
    output_files = db.get_output_files(job_id, with_info=True)
    missing_files = []

    for file in output_files:
        filename = file.filename
        filepath = os.path.join(output_dir, filename)

        if os.path.exists(filepath):
            # If we still have the file, check its MD5 sum is correct.
            if file.md5 is None:
                logger.warning('PRESENT without MD5 sum: %s', filename)
            elif file.md5 == get_md5sum(filepath):
                logger.debug('PRESENT: %s', filename)
            else:
                raise JSAProcError(
                    'MD5 sum mismatch for existing file {0}'.format(filename))

        else:
            # Otherwise add it to the list of missing files.
            logger.debug('MISSING: %s', filename)
            missing_files.append(file)

    # Are there any files we need to retrieve?
    if missing_files:
        for file in missing_files:
            filename = file.filename
            filepath = os.path.join(output_dir, filename)

            if not dry_run:
                if os.path.exists(output_dir):
                    logger.debug('Directory %s already exists', output_dir)
                else:
                    logger.debug('Making directory %s', output_dir)
                    os.makedirs(output_dir)

                logger.info('Fetching file %s', filename)
                fetch_cadc_file(filename, output_dir, suffix='')

                if file.md5 is None:
                    logger.warning('MD5 sum missing: %s', filename)
                elif file.md5 == get_md5sum(filepath):
                    logger.debug('MD5 sum OK: %s', filename)
                else:
                    raise JSAProcError(
                        'MD5 sum mismatch for fetched file {0}'.format(
                            filename))
            else:
                logger.info('Skipping fetch of %s (DRY RUN)', filename)

    else:
        logger.info('All output files are already present')

    # Finally set the state to INGESTION.
    if not dry_run:
        db.change_state(job_id,
                        JSAProcState.INGESTION,
                        'Output data have been retrieved',
                        state_prev=JSAProcState.INGEST_FETCH)