示例#1
0
def main():
	jip = JobInfoProcessor()
	fip = FileInfoProcessor()
	(workDir, nJobs, jobList) = getWorkJobs(sys.argv[1:])
	for jobNum in sorted(jobList):
		if jip.process(os.path.join(workDir, 'output', 'job_%d' % jobNum))[1] == 0:
			for fileInfo in fip.process(os.path.join(workDir, 'output', 'job_%d' % jobNum)):
				pathSE = fileInfo[OutputFileInfo.Path].replace('file://', '').replace('dir://', '')
				print('%s  %s/%s' % (fileInfo[OutputFileInfo.Hash], pathSE, fileInfo[OutputFileInfo.NameDest]))
示例#2
0
def iter_jobs(opts, workDir, jobList, splitter):
    (splitInfo, fip) = ({}, FileInfoProcessor())
    for jobNum in jobList:
        activity = utils.ActivityLog('Reading job logs - [%d / %d]' %
                                     (jobNum, jobList[-1]))

        if opts.parameterized:
            fi = fip.process(os.path.join(workDir, 'output',
                                          'job_%d' % jobNum))
            outputName = fi[0][FileInfoProcessor.NameDest].split('.')[0]
            outputName = outputName.replace(opts.replace % jobNum,
                                            '_').replace('/', '_').replace(
                                                '__', '_').strip('_')
        else:
            if splitter:
                splitInfo = splitter.getSplitInfo(jobNum)
            outputName = splitInfo.get(
                DataSplitter.Nickname, splitInfo.get(DataSplitter.DatasetID,
                                                     0))
        yield (jobNum, outputName)
        activity.finish()
示例#3
0
def download_job_output(opts, incInfo, workDir, jobDB, token, jobNum, output):
    output.init(jobNum)
    job = jobDB.get(jobNum)
    # Only run over finished and not yet downloaded jobs
    if job.state != Job.SUCCESS:
        output.error('Job has not yet finished successfully!')
        return incInfo('Processing')
    if job.get('download') == 'True' and not opts.mark_ignore_dl:
        if not int(opts.threads):
            output.error('All files already downloaded!')
        return incInfo('Downloaded')
    retry = int(job.get('download attempt', 0))
    failJob = False

    if not token.canSubmit(20 * 60, True):
        sys.stderr.write('Please renew access token!')
        sys.exit(os.EX_UNAVAILABLE)

    # Read the file hash entries from job info file
    files = FileInfoProcessor().process(
        os.path.join(workDir, 'output', 'job_%d' % jobNum)) or []
    if files:
        files = lmap(
            lambda fi:
            (fi[FileInfoProcessor.Hash], fi[FileInfoProcessor.NameLocal], fi[
                FileInfoProcessor.NameDest], fi[FileInfoProcessor.Path]),
            files)
    output.update_files(files)
    if not files:
        if opts.mark_empty_fail:
            failJob = True
        else:
            return incInfo('Job without output files')

    for (fileIdx, fileInfo) in enumerate(files):
        failJob = failJob or not download_file(opts, output, jobNum, fileIdx,
                                               fileInfo)

    # Ignore the first opts.retry number of failed jobs
    if failJob and opts.retry and (retry < int(opts.retry)):
        output.error('Download attempt #%d failed!' % (retry + 1))
        job.set('download attempt', str(retry + 1))
        jobDB.commit(jobNum, job)
        return incInfo('Download attempts')

    cleanup_files(opts, files, failJob, output)

    if failJob:
        incInfo('Failed downloads')
        if opts.mark_fail:
            # Mark job as failed to trigger resubmission
            job.state = Job.FAILED
    else:
        incInfo('Successful download')
        if opts.mark_dl:
            # Mark as downloaded
            job.set('download', 'True')

    # Save new job status infos
    jobDB.commit(jobNum, job)
    output.finish()
    time.sleep(float(opts.slowdown))