def main(): jip = JobInfoProcessor() fip = FileInfoProcessor() (workDir, nJobs, jobList) = getWorkJobs(sys.argv[1:]) for jobNum in sorted(jobList): if jip.process(os.path.join(workDir, 'output', 'job_%d' % jobNum))[1] == 0: for fileInfo in fip.process(os.path.join(workDir, 'output', 'job_%d' % jobNum)): pathSE = fileInfo[OutputFileInfo.Path].replace('file://', '').replace('dir://', '') print('%s %s/%s' % (fileInfo[OutputFileInfo.Hash], pathSE, fileInfo[OutputFileInfo.NameDest]))
def iter_jobs(opts, workDir, jobList, splitter): (splitInfo, fip) = ({}, FileInfoProcessor()) for jobNum in jobList: activity = utils.ActivityLog('Reading job logs - [%d / %d]' % (jobNum, jobList[-1])) if opts.parameterized: fi = fip.process(os.path.join(workDir, 'output', 'job_%d' % jobNum)) outputName = fi[0][FileInfoProcessor.NameDest].split('.')[0] outputName = outputName.replace(opts.replace % jobNum, '_').replace('/', '_').replace( '__', '_').strip('_') else: if splitter: splitInfo = splitter.getSplitInfo(jobNum) outputName = splitInfo.get( DataSplitter.Nickname, splitInfo.get(DataSplitter.DatasetID, 0)) yield (jobNum, outputName) activity.finish()
def download_job_output(opts, incInfo, workDir, jobDB, token, jobNum, output): output.init(jobNum) job = jobDB.get(jobNum) # Only run over finished and not yet downloaded jobs if job.state != Job.SUCCESS: output.error('Job has not yet finished successfully!') return incInfo('Processing') if job.get('download') == 'True' and not opts.mark_ignore_dl: if not int(opts.threads): output.error('All files already downloaded!') return incInfo('Downloaded') retry = int(job.get('download attempt', 0)) failJob = False if not token.canSubmit(20 * 60, True): sys.stderr.write('Please renew access token!') sys.exit(os.EX_UNAVAILABLE) # Read the file hash entries from job info file files = FileInfoProcessor().process( os.path.join(workDir, 'output', 'job_%d' % jobNum)) or [] if files: files = lmap( lambda fi: (fi[FileInfoProcessor.Hash], fi[FileInfoProcessor.NameLocal], fi[ FileInfoProcessor.NameDest], fi[FileInfoProcessor.Path]), files) output.update_files(files) if not files: if opts.mark_empty_fail: failJob = True else: return incInfo('Job without output files') for (fileIdx, fileInfo) in enumerate(files): failJob = failJob or not download_file(opts, output, jobNum, fileIdx, fileInfo) # Ignore the first opts.retry number of failed jobs if failJob and opts.retry and (retry < int(opts.retry)): output.error('Download attempt #%d failed!' % (retry + 1)) job.set('download attempt', str(retry + 1)) jobDB.commit(jobNum, job) return incInfo('Download attempts') cleanup_files(opts, files, failJob, output) if failJob: incInfo('Failed downloads') if opts.mark_fail: # Mark job as failed to trigger resubmission job.state = Job.FAILED else: incInfo('Successful download') if opts.mark_dl: # Mark as downloaded job.set('download', 'True') # Save new job status infos jobDB.commit(jobNum, job) output.finish() time.sleep(float(opts.slowdown))