Exemplo n.º 1
0
def upload_output(task, cluster, job, *args, **kwargs):
    # Girder client
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])

    # Register generated file into girder
    task.taskflow.logger.info('Uploading results from cluster')
    output_folder_id = parse('output.folder.id').find(kwargs)
    if output_folder_id:
        task.taskflow.logger.info('Exporting to folder %s' % output_folder_id)
        output_folder_id = output_folder_id[0].value
        job['output'] = [{'folderId': output_folder_id, 'path': '.'}]
    else:
        task.taskflow.logger.info('No output.folder.id')

    # Upload files metadata
    upload_job_output_to_folder(cluster,
                                job,
                                log_write_url=None,
                                job_dir=None,
                                girder_token=task.taskflow.girder_token)

    # Done...
    task.taskflow.logger.info('Upload complete.')
Exemplo n.º 2
0
def postprocess_job(task, _, input_, cluster, image, run_parameters,
                    root_folder, container_description, input_folder,
                    output_folder, scratch_folder, job):
    task.taskflow.logger.info('Processing the results of the job.')
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])

    upload_job_output_to_folder(cluster,
                                job,
                                girder_token=task.taskflow.girder_token)

    # remove temporary input folder folder, this data is attached to the calculation model
    client.delete('folder/%s' % input_folder['_id'])

    # clean up the scratch folder
    keep_scratch = run_parameters.get('keepScratch', False)
    if keep_scratch:
        scratch_folder_id = scratch_folder['_id']
    else:
        client.delete('folder/%s' % scratch_folder['_id'])
        scratch_folder_id = None

    # ingest the output of the calculation
    output_format = container_description['output']['format']
    output_file = None
    output_items = list(client.listItem(output_folder['_id']))
    for item in output_items:
        if item['name'] == 'output.%s' % output_format:
            files = list(client.listFile(item['_id']))
            if len(files) != 1:
                raise Exception(
                    'Expecting a single file under item, found: %s' +
                    len(files))
            output_file = files[0]
            break

    if output_file is None:
        raise Exception('The calculation did not produce any output file.')

    # Now call endpoint to ingest result
    body = {
        'fileId': output_file['_id'],
        'format': output_format,
        'public': True,
        'image':
        image,  # image now also has a digest field, add it to the calculation
        'scratchFolderId': scratch_folder_id
    }
    client.put('calculations/%s' % input_['calculation']['_id'], json=body)
def postprocess_job(task, _, user, cluster, image, job, folder, container):
    task.taskflow.logger.info('Finished pulling the container')

    task.taskflow.logger.info('Processing the results of the pull.')
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])

    upload_job_output_to_folder(cluster,
                                job,
                                girder_token=task.taskflow.girder_token)

    output_items = list(client.listItem(folder['_id']))
    output_file = None
    for item in output_items:
        if item['name'] == OUTPUT_FILE:
            files = list(client.listFile(item['_id']))
            if len(files) != 1:
                log_and_raise(
                    task, 'Expecting a single file under item, found: %s' %
                    len(files))
            output_file = files[0]
            break

    if output_file is None:
        log_and_raise(task, 'Could not locate output file: %s' % OUTPUT_FILE)

    # Download the output file
    output_io = io.BytesIO()
    client.downloadFile(output_file['_id'], output_io)

    # Remove the folder
    client.delete('folder/%s' % folder['_id'])

    pull_json = json.loads(output_io.getvalue().decode('utf-8'))
    image_uri = pull_json.get('imageUri')
    # Convert size to GB
    size = round(pull_json.get('size', 0) / 1.e9, 2)

    _ensure_image_uri_is_valid(task, container, image_uri)

    repository = image.get('repository')
    tag = image.get('tag')
    digest = _extract_digest(container, image_uri)

    post_image_to_database(client, container, repository, tag, digest, cluster,
                           size)

    task.taskflow.logger.info('Success!')
Exemplo n.º 4
0
def upload_output(task, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Uploading results from cluster')

    # Refresh state of job
    client = create_girder_client(
            task.taskflow.girder_api_url, task.taskflow.girder_token)
    job = client.get('jobs/%s' % job['_id'])

    output_folder_id = parse('output.folder.id').find(kwargs)
    if output_folder_id:
        output_folder_id = output_folder_id[0].value
        job['output'] = [{
            'folderId': output_folder_id,
            'path': '.'
        }]

    upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload complete.')
Exemplo n.º 5
0
def upload_output(task, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Uploading results from cluster')

    # Refresh state of job
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)
    job = client.get('jobs/%s' % job['_id'])

    output_folder_id = parse('output.folder.id').find(kwargs)
    if output_folder_id:
        output_folder_id = output_folder_id[0].value
        job['output'] = [{'folderId': output_folder_id, 'path': '.'}]

    upload_job_output_to_folder(cluster,
                                job,
                                log_write_url=None,
                                job_dir=None,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload complete.')
Exemplo n.º 6
0
def upload_export_output(task, _, cluster, job, *args, **kwargs):
    output_folder_id = kwargs['output']['folder']['id']

    client = create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    for job_id in task.taskflow.get_metadata('export_jobs')['export_jobs']:
        # Get job
        export_job = client.get('jobs/%s' % job_id)
        export_job['output'] = [{
            'folderId': output_folder_id,
            'path': '.'
        }]

        upload_job_output_to_folder(cluster, export_job, log_write_url=None,
            job_dir=None, girder_token=task.taskflow.girder_token)

    # Upload the vtu files
    girder_token = task.taskflow.girder_token
    download_path_from_cluster(cluster, girder_token, output_folder_id, job['dir'],
                               include=['^.*\\.vtu$'])
Exemplo n.º 7
0
def upload_export_output(task, _, cluster, job, *args, **kwargs):
    output_folder_id = kwargs['output']['folder']['id']

    client = _create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    for job_id in task.taskflow.get_metadata('export_jobs')['export_jobs']:
        # Get job
        export_job = client.get('jobs/%s' % job_id)
        export_job['output'] = [{
            'folderId': output_folder_id,
            'path': '.'
        }]

        upload_job_output_to_folder(cluster, export_job, log_write_url=None,
            job_dir=None, girder_token=task.taskflow.girder_token)

    # Upload the vtu files
    girder_token = task.taskflow.girder_token
    download_path_from_cluster(cluster, girder_token, output_folder_id, job['dir'],
                               include=['^.*\\.vtu$'])
Exemplo n.º 8
0
def upload_output(task, upstream_result):
    task.taskflow.logger.info('Uploading results from cluster')
    output_folder_id = upstream_result['output']['folder']['id']
    cluster = upstream_result['cluster']
    job = upstream_result['job']

    client = create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])
    job['output'] = [{
        'folderId': output_folder_id,
        'path': '.'
    }]

    upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload job output complete.')

    return upstream_result
Exemplo n.º 9
0
def postprocess(task, _, run_folder, input_, cluster, job):
    task.taskflow.logger.info('Uploading results from cluster')

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    output_folder = client.createFolder(run_folder['_id'], 'output')
    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])
    job['output'] = [{'folderId': output_folder['_id'], 'path': '.'}]

    upload_job_output_to_folder(cluster,
                                job,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload job output complete.')

    input_file_name = task.taskflow.get_metadata('inputFileName')
    input_file_name
    # Call to ingest the files
    for item in client.listItem(output_folder['_id']):
        if item['name'].endswith('.json'):
            files = list(client.listFile(item['_id']))
            if len(files) != 1:
                raise Exception(
                    'Expecting a single file under item, found: %s' +
                    len(files))

            json_output_file_id = files[0]['_id']
            # Now call endpoint to ingest result
            body = {
                'calculationId': input_['calculation']['_id'],
                'fileId': json_output_file_id,
                'public': True
            }

            client.post('molecules', json=body)
Exemplo n.º 10
0
def postprocess_description(task, _, input_, cluster, image, run_parameters,
                            root_folder, description_job, description_folder):
    task.taskflow.logger.info('Processing description job output.')

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    description_job = client.get('jobs/%s' % description_job['_id'])

    upload_job_output_to_folder(cluster,
                                description_job,
                                girder_token=task.taskflow.girder_token)

    description_items = list(client.listItem(description_folder['_id']))

    description_file = None
    pull_file = None
    for item in description_items:
        if item['name'] == 'description.json':
            files = list(client.listFile(item['_id']))
            if len(files) != 1:
                raise Exception(
                    'Expecting a single file under item, found: %s' +
                    len(files))
            description_file = files[0]

        elif item['name'] == 'pull.json':
            files = list(client.listFile(item['_id']))
            if len(files) != 1:
                raise Exception(
                    'Expecting a single file under item, found: %s' +
                    len(files))
            pull_file = files[0]

    if pull_file is None:
        raise Exception(
            'There was an error trying to pull the requested container image')

    if description_file is None:
        raise Exception(
            'The container does not implement correctly the --description flag'
        )

    with tempfile.TemporaryFile() as tf:
        client.downloadFile(pull_file['_id'], tf)
        tf.seek(0)
        container_pull = json.loads(tf.read().decode())

    image = container_pull

    with tempfile.TemporaryFile() as tf:
        client.downloadFile(description_file['_id'], tf)
        tf.seek(0)
        container_description = json.loads(tf.read().decode())

    # remove temporary description folder
    client.delete('folder/%s' % description_folder['_id'])

    setup_input.delay(input_, cluster, image, run_parameters, root_folder,
                      container_description)
Exemplo n.º 11
0
def upload_output(task, _, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Uploading results from cluster')
    output_folder_id = kwargs['output']['folder']['id']

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])
    job['output'] = [{'folderId': output_folder_id, 'path': '.'}]

    upload_job_output_to_folder(cluster,
                                job,
                                log_write_url=None,
                                job_dir=None,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload job output complete.')

    mesh_file_id = kwargs.pop('meshFileId')

    solution_files = list(_list_solution_files(client, output_folder_id))

    if len(solution_files) == 0:
        raise Exception(
            'No solution files where produced, please check output files for errors.'
        )

    # Generate and save the first vtu file that should be loaded for this
    # run. This can then be used to know which file to open as part of any viz
    # step.
    file_names = [f['name'] for f in solution_files]
    file_names.sort()
    if len(file_names) > 1:
        vtu_file_first = '%s.vtu' % file_names[0].rsplit('.', 1)[0]
        # find where to put the ...*...
        head = tail = size = len(vtu_file_first)
        for fileName in file_names[1:]:
            vtu_name = '%s.vtu' % fileName.rsplit('.', 1)[0]
            for i in range(size):
                if vtu_file_first[i] != vtu_name[i] and head > i:
                    head = i  # not included in the cut
                if vtu_file_first[-i] != vtu_name[-i] and tail >= i:
                    tail = i - 1

        vtu_file = '%s*%s' % (vtu_file_first[:head], vtu_file_first[-tail:])
    else:
        vtu_file = '%s.vtu' % file_names[0].rsplit('.', 1)[0]
    task.taskflow.set_metadata('vtuFile', vtu_file)

    number_files = len(solution_files)

    # By default export solution files to VTK format using a set of batch jobs
    if not 'exportInTaskFlow' in kwargs or not kwargs['exportInTaskFlow']:

        number_of_jobs = kwargs['numberOfProcs']
        task.logger.info('Generating %d export jobs' % number_of_jobs)

        sim_job_dir = job['dir']
        jobs = []
        job_index = 1
        for chunk in [
                solution_files[i::number_of_jobs]
                for i in xrange(number_of_jobs)
        ]:
            if chunk:
                name = 'pyfr_export_%d' % job_index
                mesh_filename = kwargs['meshFilename']
                export_job = create_export_job(task, name, chunk, sim_job_dir,
                                               mesh_filename)
                submit_job(cluster,
                           export_job,
                           log_write_url=None,
                           girder_token=task.taskflow.girder_token,
                           monitor=False)
                jobs.append(export_job)
                job_index += 1

        # Update the jobs list in the metadata
        task.taskflow.set_metadata('jobs', [j for j in jobs] + [job])
        # Also save just the export job ids
        task.taskflow.set_metadata('export_jobs', [j['_id'] for j in jobs])

        monitor_jobs.apply_async(
            (cluster, jobs), {'girder_token': task.taskflow.girder_token},
            link=upload_export_output.s(cluster, job, *args, **kwargs))
    else:
        # The number 100 is pretty arbitrary!
        if number_files < 100:
            export_output.delay(output_folder_id, mesh_file_id, solution_files)
        # Break into chunks a run in parallel
        else:
            for chunk in [
                    solution_files[i::NUMBER__OF_EXPORT_TASKS]
                    for i in xrange(NUMBER__OF_EXPORT_TASKS)
            ]:
                export_output.delay(output_folder_id, mesh_file_id, chunk)
Exemplo n.º 12
0
def postprocess_job(task, _, input_, cluster, image, run_parameters,
                    root_folder, container_description, input_folder,
                    output_folder, scratch_folder, run_folder, job):
    task.taskflow.logger.info('Processing the results of the calculation.')
    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])

    upload_job_output_to_folder(cluster,
                                job,
                                girder_token=task.taskflow.girder_token)

    # remove temporary input folder, this data is attached to the calculation model
    client.delete('folder/%s' % input_folder['_id'])

    # clean up the scratch folder
    keep_scratch = run_parameters.get('keepScratch', False)
    if keep_scratch:
        scratch_folder_id = scratch_folder['_id']
    else:
        client.delete('folder/%s' % scratch_folder['_id'])
        scratch_folder_id = None

    # ingest the output of the calculation
    output_format = container_description['output']['format']
    output_files = []
    output_items = list(client.listItem(output_folder['_id']))
    for i in range(len(input_['calculations'])):
        output_file = None
        for item in output_items:
            if item['name'] == 'output_' + str(i + 1) + '.%s' % output_format:
                files = list(client.listFile(item['_id']))
                if len(files) != 1:
                    log_std_err(task, client, run_folder)
                    log_and_raise(
                        task, 'Expecting a single file under item, found: %s' %
                        len(files))
                output_file = files[0]
                break

        if output_file is None:
            # Log the job stderr
            log_std_err(task, client, run_folder)
            log_and_raise(task,
                          'The calculation did not produce any output file.')

        output_files.append(output_file)

    # remove the run folder, only useful to access the stdout and stderr after the job is done
    client.delete('folder/%s' % run_folder['_id'])

    # Now call endpoint to ingest result
    params = {'detectBonds': True}

    task.taskflow.logger.info(
        'Uploading the results of the calculation to the database.')

    code = task.taskflow.get_metadata('code')
    if isinstance(code, dict):
        # Get the contents of "code" to set it below
        code = code.get('code')

    for i, output_file in enumerate(output_files):
        body = {
            'fileId': output_file['_id'],
            'format': output_format,
            'public': True,
            'image':
            image,  # image now also has a digest field, add it to the calculation
            'scratchFolderId': scratch_folder_id,
            'code': code
        }

        client.put('calculations/%s' % input_['calculations'][i],
                   parameters=params,
                   json=body)

    task.taskflow.logger.log(STATUS_LEVEL, 'Done!')
Exemplo n.º 13
0
def postprocess_description(task, _, input_, user, cluster, image,
                            run_parameters, root_folder, description_job,
                            description_folder):
    task.taskflow.logger.info(
        'Processing the output of the container description job.')

    client = create_girder_client(task.taskflow.girder_api_url,
                                  task.taskflow.girder_token)

    # Refresh state of job
    description_job = client.get('jobs/%s' % description_job['_id'])

    upload_job_output_to_folder(cluster,
                                description_job,
                                girder_token=task.taskflow.girder_token)

    description_items = list(client.listItem(description_folder['_id']))

    description_file = None
    for item in description_items:
        if item['name'] == 'description.json':
            files = list(client.listFile(item['_id']))
            if len(files) != 1:
                log_std_err(task, client, description_folder)
                log_and_raise(
                    task, 'Expecting a single file under item, found: %s' %
                    len(files))
            description_file = files[0]

    if description_file is None:
        log_std_err(task, client, description_folder)
        log_and_raise(
            task,
            'The container does not implement correctly the --description flag'
        )

    with client.session() as session:
        # If we have a NEWT session id we need set as a cookie so the redirect
        # to the NEWT API works ( is authenticated ).
        newt_session_id = parse('newt.sessionId').find(user)
        if newt_session_id:
            newt_session_id = newt_session_id[0].value
            session.cookies.set('newt_sessionid', newt_session_id)

        with tempfile.TemporaryFile() as tf:
            client.downloadFile(description_file['_id'], tf)
            tf.seek(0)
            container_description = json.loads(tf.read().decode())

    # Add code name and version to the taskflow metadata
    code = {
        'name': container_description.get('name'),
        'version': container_description.get('version')
    }
    task.taskflow.set_metadata('code', code)

    # remove temporary description folder
    client.delete('folder/%s' % description_folder['_id'])

    setup_input.delay(input_, cluster, image, run_parameters, root_folder,
                      container_description)
Exemplo n.º 14
0
def upload_output(task, _, cluster, job, *args, **kwargs):
    task.taskflow.logger.info('Uploading results from cluster')
    output_folder_id = kwargs['output']['folder']['id']

    client = _create_girder_client(
        task.taskflow.girder_api_url, task.taskflow.girder_token)

    # Refresh state of job
    job = client.get('jobs/%s' % job['_id'])
    job['output'] = [{
        'folderId': output_folder_id,
        'path': '.'
    }]

    upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None,
                                girder_token=task.taskflow.girder_token)

    task.taskflow.logger.info('Upload job output complete.')

    mesh_file_id = kwargs.pop('meshFileId')

    solution_files = list(_list_solution_files(client, output_folder_id))

    if len(solution_files) == 0:
        raise Exception('No solution files where produced, please check output files for errors.')

    # Generate and save the first vtu file that should be loaded for this
    # run. This can then be used to know which file to open as part of any viz
    # step.
    file_names = [f['name'] for f in solution_files]
    file_names.sort()
    vtu_file = '%s.vtu' % file_names[0].rsplit('.', 1)[0]
    task.taskflow.set_metadata('vtuFile', vtu_file)

    number_files = len(solution_files)

    # By default export solution files to VTK format using a set of batch jobs
    if not 'exportInTaskFlow' in kwargs or not kwargs['exportInTaskFlow']:

        number_of_jobs = kwargs['numberOfProcs']
        task.logger.info('Generating %d export jobs' % number_of_jobs)

        sim_job_dir = job['dir']
        jobs = []
        job_index = 1
        for chunk in [solution_files[i::number_of_jobs] for i in xrange(number_of_jobs)]:
            if chunk:
                name = 'pyfr_export_%d' % job_index
                mesh_filename = kwargs['meshFilename']
                export_job = create_export_job(
                    task, name, chunk, sim_job_dir, mesh_filename)
                submit_job(cluster, export_job, log_write_url=None,
                              girder_token=task.taskflow.girder_token, monitor=False)
                jobs.append(export_job)
                job_index += 1

        # Update the jobs list in the metadata
        task.taskflow.set_metadata('jobs', [j for j in jobs] +
                                   [job])
        # Also save just the export job ids
        task.taskflow.set_metadata('export_jobs', [j['_id'] for j in jobs])

        monitor_jobs.apply_async(
            (cluster, jobs), {'girder_token': task.taskflow.girder_token},
            link=upload_export_output.s(cluster, job, *args, **kwargs))
    else:
        # The number 100 is pretty arbitrary!
        if number_files < 100:
            export_output.delay(
                output_folder_id, mesh_file_id, solution_files)
        # Break into chunks a run in parallel
        else:
            for chunk in [solution_files[i::NUMBER__OF_EXPORT_TASKS] for i in xrange(NUMBER__OF_EXPORT_TASKS)]:
                export_output.delay(output_folder_id, mesh_file_id, chunk)