def reset_jobs(task, date_start, date_end, instrument=None, state=None, force=False, dry_run=False): """Change the state of the specified jobs back to "Unknown". If a state is specified, select only that state. Active jobs are skipped unless the force argument is set. """ db = get_database() obsquery = {} if date_start is not None and date_end is not None: obsquery['utdate'] = Range(date_start, date_end) elif date_start is None and date_end is None: pass else: raise CommandError('only one of start and end date specified') if instrument is not None: obsquery['instrument'] = instrument if state is not None: state = JSAProcState.lookup_name(state) n_active = 0 for job in db.find_jobs(location='JAC', task=task, obsquery=obsquery, state=state): state_info = JSAProcState.get_info(job.state) # Check if the job is in an "active" state. if state_info.active and not force: logger.warning('Skipping active job %i (%s)', job.id, state_info.name) n_active += 1 continue logger.info('Resetting status of job %i (was %s)', job.id, state_info.name) if not dry_run: db.change_state(job.id, JSAProcState.UNKNOWN, 'Resetting job', state_prev=job.state) if n_active: raise CommandError('Could not reset {0} active jobs'.format(n_active))
def etransfer_check_config(any_user=False): """Check the configuration is good for for e-transfer. Raises a CommandError if a problem is detected. """ config = get_config() etransfermachine = config.get('etransfer', 'machine') etransferuser = config.get('etransfer', 'user') if pwd.getpwuid(os.getuid()).pw_name != etransferuser and not any_user: raise CommandError( 'etransfer should only be run as {0}'.format(etransferuser)) if gethostname().partition('.')[0] != etransfermachine: raise CommandError( 'etransfer should only be run on {0}'.format(etransfermachine))
def etransfer_send_output(job_id, dry_run=False, force=False): """High level e-transfer function for use from scripts. This function makes some basic checks and then launches the private function _etransfer_send under the control of the ErrorDecorator so that any subsequent errors are captured. """ logger.debug('Preparing to e-transfer output for job {0}'.format(job_id)) # When not in dry run mode, check that etransfer is being # run on the correct machine by the correct user and with # sufficient available disk space. if not dry_run: etransfer_check_config() _etransfer_check_space() logger.debug('Connecting to JSA processing database') db = get_database() if not force: job = db.get_job(id_=job_id) if job.state != JSAProcState.PROCESSED: message = 'Job {0} cannot be e-transferred as it is in ' \ 'state {1}'.format(job_id, JSAProcState.get_name(job.state)) logger.error(message) raise CommandError(message) _etransfer_send(job_id, dry_run=dry_run, db=db, force=force) logger.debug('Done adding output for job {0} to e-transfer'.format(job_id))
def ingest_raw_observation(obsid, dry_run=False): """Perform raw ingestion of an observation. This function connects to Sybase and then performs the raw data ingestion via the _ingest_raw_observation private function. """ if not dry_run: db = get_omp_database(write_access='jcmt') else: db = get_omp_database() info = db.get_obsid_common(obsid) if info is None: raise CommandError('Observation {0} does not exist'.format(obsid)) if not _ingest_raw_observation(obsid, db=db, dry_run=dry_run): raise CommandError("Ingestion failed")
def _etransfer_check_space(): """Check that sufficient space is available for e-transfer. Raises a CommandError if a problem is detected. """ config = get_config() required_space = float(config.get('disk_limit', 'etransfer_min_space')) etransfer_space = get_space(config.get('etransfer', 'transdir')) if etransfer_space < required_space: raise CommandError( 'Insufficient disk space: {0} / {1} GiB required'.format( etransfer_space, required_space))
def poll_raw_ingestion(date_start, date_end, quick=False, no_transfer_check=False, dry_run=False): ignore_instruments = [ x.strip() for x in get_config().get('rawingest', 'ignore_instruments').split(',') ] logger.debug('Connecting to database with read-only access') db = get_omp_database() logger.info('Searching for observations to ingest') obsids = db.find_obs_for_ingestion(date_start, date_end, no_status_check=quick, no_transfer_check=no_transfer_check, ignore_instruments=ignore_instruments) logger.info('Found %i observations', len(obsids)) if not dry_run: logger.debug('Re-connecting to database with write access') db = get_omp_database(write_access='jcmt') n_ok = n_err = 0 for obsid in obsids: if _ingest_raw_observation(obsid, db=db, dry_run=dry_run): n_ok += 1 else: n_err += 1 logger.info('Ingestion complete: %i successful, %i errors', n_ok, n_err) if n_err: raise CommandError('Errors encountered during ingestion')
def ptransfer_poll(stream=None, dry_run=False): """Attempt to put files into the archive at CADC. This function is controlled by the configuration file entries etransfer.transdir and etransfer.maxfiles. It looks in the "new" and "replace" directories inside "transdir" for at most "max_files" files. The files are moved to a temporary processing directory and then either moved to a reject directory or deleted on completion. In the event of failure to transfer, the files are put back in either the "new" or "replace" directory. The stream argument can be given to select only files in the "new" or "replace" directory. It must be given in the dry_run case since then no "proc" directory is created. """ if not dry_run: etransfer_check_config() config = get_config() trans_dir = config.get('etransfer', 'transdir') max_files = int(config.get('etransfer', 'max_files')) files = [] n_err = 0 # Select transfer streams. streams = allowed_streams if stream is None: if dry_run: raise CommandError('Stream must be specified in dry run mode') else: if stream not in streams: raise CommandError('Unknown stream {0}'.format(stream)) streams = (stream,) # Search for files to transfer. for stream in streams: for file in os.listdir(os.path.join(trans_dir, stream)): logger.debug('Found file %s (%s)', file, stream) files.append(FileInfo(file, stream)) if not files: logger.info('No files found for p-transfer') return if dry_run: # Work in the stream directory. proc = files[:max_files] proc_dir = os.path.join(trans_dir, stream) use_sub_dir = False stamp_file = None else: # Create working directory. proc = [] proc_dir = tempfile.mkdtemp(prefix='proc', dir=os.path.join(trans_dir, 'proc')) logger.info('Working directory: %s', proc_dir) # Create stream-based subdirectories. use_sub_dir = True for stream in streams: os.mkdir(os.path.join(proc_dir, stream)) # Write stamp file to allow automatic clean-up. stamp_file = os.path.join(proc_dir, 'ptransfer.ini') config = SafeConfigParser() config.add_section('ptransfer') config.set('ptransfer', 'pid', str(os.getpid())) config.set('ptransfer', 'start', datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')) with open(stamp_file, 'wb') as f: config.write(f) # Move some files into the working directory to prevent # multiple p-transfer processes trying to transfer them # simultaneously. for file in files: try: os.rename( os.path.join(trans_dir, file.stream, file.name), os.path.join(proc_dir, file.stream, file.name)) proc.append(file) logger.debug('Processing file %s', file.name) except: # Another process may have started processing the file, # so skip it. logger.debug('Cannot move file %s, skipping', file.name) # Did we get enough files already? if len(proc) >= max_files: break # Attempt to process all the files in our working directory. for file in proc: # Determine path to the directory containing the file and the # file itself. if use_sub_dir: proc_sub_dir = os.path.join(proc_dir, file.stream) else: proc_sub_dir = proc_dir proc_file = os.path.join(proc_sub_dir, file.name) try: # Check the file. md5sum = get_md5sum(proc_file) ad_stream = ptransfer_check( proc_sub_dir, file.name, file.stream, md5sum) if dry_run: logger.info('Accepted file %s (%s) (DRY RUN)', file.name, ad_stream) else: # Transfer the file. ptransfer_put(proc_sub_dir, file.name, ad_stream, md5sum) # Check it was transferred correctly. try: cadc_file_info = fetch_cadc_file_info(file.name) except JSAProcError: raise PTransferFailure('Unable to check CADC file info') if cadc_file_info is None: # File doesn't seem to be there? logger.error('File transferred but has no info') raise PTransferFailure('No file info') elif md5sum != cadc_file_info['content-md5']: # File corrupted on transfer? Put it back but in # the replace directory for later re-transfer. logger.error('File transferred but MD5 sum wrong') file = file._replace(stream='replace') raise PTransferFailure('MD5 sum wrong') # On success, delete the file. logger.info('Transferred file %s (%s)', file.name, ad_stream) os.unlink(proc_file) except PTransferException as e: # In the event of an error generated by one of the pre-transfer # checks, move the file into a reject directory. n_err += 1 code = e.reject_code logger.error('Rejecting file %s (%s)', file.name, code) if not dry_run: reject_dir = os.path.join(trans_dir, 'reject', code) if not os.path.exists(reject_dir): logger.debug('Making reject directory: %s', reject_dir) os.makedirs(reject_dir) logger.debug('Moving file to: %s', reject_dir) os.rename(proc_file, os.path.join(reject_dir, file.name)) except PTransferFailure as e: # In the event of failure to transfer, put the file back into # its original stream directory. n_err += 1 logger.error( 'Failed to transfer file %s (%s)', file.name, e.message) if not dry_run: os.rename( proc_file, os.path.join(trans_dir, file.stream, file.name)) except: # Catch any other exception and also put the file back. n_err += 1 logger.exception('Error while transferring file %s', file.name) if not dry_run: os.rename( proc_file, os.path.join(trans_dir, file.stream, file.name)) # Finally clean up the processing directory. It should have nothing # left in it by this point other than the stream subdirectories and # stamp file. if not dry_run: os.unlink(stamp_file) for stream in streams: os.rmdir(os.path.join(proc_dir, stream)) os.rmdir(proc_dir) # If errors occurred, exit with bad status. if n_err: raise CommandError('Errors occurred during p-transfer poll' ' ({0} error(s))'.format(n_err))
def etransfer_poll_output(dry_run): """High level polling function to use from scripts.""" logger.debug('Preparing to poll the e-transfer system for job output') # When not in dry run mode, check that etransfer is being # run on the correct machine by the correct user. if not dry_run: etransfer_check_config(any_user=True) logger.debug('Connecting to JSA processing database') db = get_database() logger.debug('Retrieving task information from database') task_info = db.get_task_info() n_err = 0 for job in db.find_jobs(location='JAC', state=JSAProcState.TRANSFERRING): # Retrieve this job's task information. job_task_info = task_info.get(job.task) if ((job_task_info is None) or (job_task_info.command_xfer is not None) or (not job_task_info.etransfer)): # Job not from an e-transfer task: skip it. continue job_id = job.id logger.debug('Checking state of job %i', job_id) logger.debug('Retrieving list of output files') try: file_info = db.get_output_files(job_id, with_info=True) files = [x.filename for x in file_info] except NoRowsError: logger.error('Did not find output files for job %i', job_id) n_err += 1 continue try: logger.debug('Checking if files are in the e-transfer directories') etransfer_status = etransfer_file_status(files) if any(etransfer_status): rejection = [] for (file, status) in zip(files, etransfer_status): if status is not None: (ok, dir) = status if not ok: logger.error( 'File {0} was rejected, reason: {1}'.format( file, dir)) rejection.append('{0} ({1})'.format(file, dir)) if rejection: raise ETransferError('files rejected: {0}'.format( ', '.join(rejection))) # Otherwise we found files in the "in progress" directories # so proceed to the next job. continue logger.debug('Checking if all files are at CADC') lost = [] for info in file_info: cadc_file_info = fetch_cadc_file_info(info.filename) if cadc_file_info is None: logger.error( 'Job %i file %s gone from e-transfer ' 'but not at CADC', job_id, info.filename) lost.append(info.filename) if cadc_file_info['content-md5'] != info.md5: logger.error( 'Job %i file %s gone from e-transfer ' 'but MD5 sum does not match', job_id, info.filename) lost.append(info.filename) if lost: raise ETransferError('files lost or corrupt: {0}'.format( ', '.join(lost))) else: # All files present and with correct MD5 sums. logger.info('Job %i appears to have all files at CADC', job_id) if not dry_run: db.change_state(job_id, JSAProcState.INGESTION, 'Output files finished e-transfer', state_prev=JSAProcState.TRANSFERRING) except ETransferError as e: logger.error('Job %i failed e-transfer: %s', job_id, e.message) if not dry_run: db.change_state(job_id, JSAProcState.ERROR, 'Job failed e-transfer: {0}'.format(e.message), state_prev=JSAProcState.TRANSFERRING) logger.debug('Done polling the e-transfer system') if n_err: raise CommandError('Errors were encountered polling e-transfer')
def _etransfer_send(job_id, dry_run, db, force): """Private function to copy job output into the e-transfer directories. Runs under the ErrorDecorator so that errors are captured. """ config = get_config() scratchdir = config.get('etransfer', 'scratchdir') transdir = config.get('etransfer', 'transdir') group_id = grp.getgrnam(config.get('etransfer', 'group')).gr_gid logger.debug('Retrieving list of output files') try: file_info = db.get_output_files(job_id, with_info=True) files = [x.filename for x in file_info] except NoRowsError: raise CommandError('No output files found for job {0}'.format(job_id)) logger.debug('Checking that the MD5 sum for each file is defined') for info in file_info: if info.md5 is None: raise CommandError( 'File {0} MD5 sum is missing from database'.format( info.filename)) logger.debug('Checking that all files are present') outdir = get_output_dir(job_id) for file in files: if not os.path.exists(os.path.join(outdir, file)): raise CommandError('File {0} not in directory {1}'.format( file, outdir)) logger.debug('Checking that files are not in the scratch directory') scratchfiles = os.listdir(scratchdir) for file in files: if file in scratchfiles: raise CommandError( 'File {0} is in e-transfer scratch directory'.format(file)) logger.debug('Checking whether the files are already in e-transfer') etransfer_status = etransfer_file_status(files) if any(etransfer_status): for (file, status) in zip(files, etransfer_status): if status is not None: (ok, dir) = status logger.error( 'File {0} already in e-transfer directory {1}'.format( file, dir)) raise CommandError('Some files are already in e-transfer directories') for info in file_info: file = info.filename cadc_file_info = fetch_cadc_file_info(file) if cadc_file_info is not None: # We need to check whether the file is not, in fact, different # from the current version, because in that case we are not # allowed to "replace" it. cadc_file_md5 = cadc_file_info['content-md5'] if info.md5 == cadc_file_md5: logger.info('File %s in unchanged, skipping replacement', file) continue target_type = 'replace' else: target_type = 'new' logger.info('Placing file %s in "%s" directory', file, target_type) source_file = os.path.join(outdir, file) scratch_file = os.path.join(scratchdir, file) target_file = os.path.join(transdir, target_type, file) if not dry_run: # Copy the file into the scratch directory and prepare its # file permissions. shutil.copyfile(source_file, scratch_file) os.chown(scratch_file, -1, group_id) os.chmod(scratch_file, 0o664) # Move the file to the target directory. This is done so that # the file appears atomically in the target directory in order # to prevent the e-transfer system seeing only part of the file. os.rename(scratch_file, target_file) else: logger.debug('Skipping e-transfer (DRY RUN)') # Finally set the state of the job to TRANSFERRING if not dry_run: db.change_state( job_id, JSAProcState.TRANSFERRING, 'Output files have been copied into the e-transfer directories', state_prev=(None if force else JSAProcState.PROCESSED))