Пример #1
def notebook_flow(postconf, jobfile) -> Flow:
    """ Provides a Prefect Flow for a running a python script in a Flow context. Originally designed for exported
    Jupyter Notebook .py scripts.

    Not fully tested. Designed for post jobs that require an on-demand compute node.

    postconf : str
        The JSON configuration file for the Cluster to create

    jobfile : str
        The JSON configuration file for the post Job

    nb_flow : prefect.Flow

    with Flow('notebook flow') as nb_flow:
        # POST Processing

        postmach = ctasks.cluster_init(postconf)
        plotjob = tasks.job_init(postmach, jobfile)

        storage_service = tasks.storage_init(provider)
        # Optionally inject a user supplied python script
        #notebook = 'specify'
        #notebook = 'cloudflow/inject/kenny/cloud_sandbot.py'
        injected = tasks.fetchpy_and_run(plotjob, storage_service, notebook)

    return nb_flow
Пример #2
def test_flow(fcstconf, fcstjobfile) -> Flow:
    """ Provides a Prefect Flow for testing purposes.

    fcstconf : str
        The JSON configuration file for the Cluster to create

    fcstjobfile : str
        The JSON configuration file for the Job

    testflow : prefect.Flow

    with Flow('test workflow') as testflow:

        # Create the cluster object
        cluster = ctasks.cluster_init(fcstconf)

        # Setup the job
        fcstjob = tasks.job_init(cluster, fcstjobfile)

        # Copy the results to S3 (optionally. currently only saves LiveOcean)
        storage_service = tasks.storage_init(provider)
        cp2cloud = tasks.save_history(
            storage_service, ['*.nc'],
            upstream_tasks=[storage_service, fcstjob])

    return testflow
Пример #3
def test_flow(fcstconf, fcstjobfile) -> Flow:
    """ Provides a Prefect Flow for testing purposes.

    fcstconf : str
        The JSON configuration file for the Cluster to create

    fcstjobfile : str
        The JSON configuration file for the Job

    testflow : prefect.Flow

    with Flow('test workflow') as testflow:

        # Create the cluster object
        cluster = ctasks.cluster_init(fcstconf)

        # Setup the job
        fcstjob = tasks.job_init(cluster, fcstjobfile)

        # Copy the results to /com (liveocean)
        cp2com = jtasks.ptmp2com(fcstjob)

    return testflow
Пример #4
def debug_model(fcstconf, fcstjobfile, sshuser) -> Flow:

    with Flow('debug workflow') as debugflow:
        # FORECAST

        # Create the cluster object
        cluster = ctasks.cluster_init(fcstconf)

        # Setup the job
        fcstjob = tasks.job_init(cluster, fcstjobfile)

        # Note: These do not run in parallel as hoped

        # Get forcing data
        forcing = jtasks.get_forcing(fcstjob, sshuser)

        # scratch disk
        # TODO: /ptmp should come from the fcstjob?
        #scratch = tasks.create_scratch(provider,fcstconf,'/ptmp', upstream_tasks=[forcing])

        # Start the cluster
        cluster_start = ctasks.cluster_start(cluster, upstream_tasks=[forcing])

        # Mount the scratch disk
        #scratch_mount = tasks.mount_scratch(scratch, cluster, upstream_tasks=[cluster_start])

        # Run the forecast
        fcst_run = tasks.forecast_run(cluster,

        # Terminate the cluster nodes
        #cluster_stop = ctasks.cluster_terminate(cluster, upstream_tasks=[fcst_run])

        # Copy the results to /com (liveocean)
        cp2com = jtasks.ptmp2com(fcstjob, upstream_tasks=[fcst_run])

        # Delete the scratch disk
        #scratch_delete = tasks.delete_scratch(scratch, upstream_tasks=[cp2com])

        # If the fcst fails, then set the whole flow to fail

    return debugflow
Пример #5
def fcst_flow(fcstconf, fcstjobfile) -> Flow:
    """ Forecast workflow

    fcstconf : str
        The cluster config file to use for this forecast.

    fcstjobfile : str
        The job config file to use for this forecast.

    flow : prefect.Flow

    with Flow('fcst workflow') as flow:
        # FORECAST

        # Create the cluster object
        cluster = ctasks.cluster_init(fcstconf)

        # Setup the job
        fcstjob = tasks.job_init(cluster, fcstjobfile)

        # Start the cluster
        cluster_start = ctasks.cluster_start(cluster)

        # Run the forecast
        fcst_run = tasks.forecast_run(cluster, fcstjob)

        # Terminate the cluster nodes
        cluster_stop = ctasks.cluster_terminate(cluster)

        flow.add_edge(cluster, fcstjob)
        flow.add_edge(fcstjob, cluster_start)
        flow.add_edge(cluster_start, fcst_run)
        flow.add_edge(fcst_run, cluster_stop)

        # If the fcst fails, then set the whole flow to fail
        flow.set_reference_tasks([fcst_run, cluster_stop])

    return flow
Пример #6
def fcst_flow(fcstconf, fcstjobfile, sshuser) -> Flow:
    """ Provides a Prefect Flow for a forecast workflow.

    fcstconf : str
        The JSON configuration file for the Cluster to create

    fcstjobfile : str
        The JSON configuration file for the forecast Job

    sshuser : str
        The user and host to use for retrieving data from a remote server.

    fcstflow : prefect.Flow

    with Flow('fcst workflow') as fcstflow:
        # FORECAST

        # Create the cluster object
        cluster = ctasks.cluster_init(fcstconf)

        # Setup the job
        fcstjob = tasks.job_init(cluster, fcstjobfile)

        # scratch disk
        # TODO: /ptmp should come from the fcstjob?
        scratch = tasks.create_scratch('NFS', fcstconf, '/ptmp')
        # Use 'FSx' for FSx scratch disk
        # Use 'NFS' for an already mounted NFS scratch disk

        # Get forcing data
        forcing = jtasks.get_forcing(fcstjob, sshuser)

        # Start the cluster
        cluster_start = ctasks.cluster_start(cluster,
                                             upstream_tasks=[forcing, scratch])

        # Mount the scratch disk
        scratch_mount = tasks.mount_scratch(
            scratch, cluster, upstream_tasks=[scratch, cluster_start])

        # Run the forecast
        fcst_run = tasks.forecast_run(cluster,

        # Terminate the cluster nodes
        cluster_stop = ctasks.cluster_terminate(cluster,

        # Copy the results to /com (liveocean)
        cp2com = jtasks.ptmp2com(fcstjob, upstream_tasks=[fcst_run])

        # Copy the results to S3 (optionally)
        #cp2s3 = jtasks.cp2s3(fcstjob, upstream_tasks=[fcst_run])
        #storage_service = tasks.storage_init(provider)
        #pngtocloud = tasks.save_to_cloud(plotjob, storage_service, ['*.png'], public=True)

        # Delete the scratch disk
        scratch_delete = tasks.delete_scratch(scratch, upstream_tasks=[cp2com])

        # Copy the results to S3 (optionally. currently only saves LiveOcean)
        storage_service = tasks.storage_init(provider)
        cp2cloud = tasks.save_history(fcstjob,
                                      storage_service, ['*.nc'],
                                      upstream_tasks=[storage_service, cp2com])


        # If the fcst fails, then set the whole flow to fail
        fcstflow.set_reference_tasks([fcst_run, cp2com])

    return fcstflow
Пример #7
def diff_plot_flow(postconf, postjobfile, sshuser=None) -> Flow:
    """ Provides a Prefect Flow for a plotting workflow that plots the difference between the experimental or
    quasi-operational forecast and the official operational forecast. Also creates mpegs of sequential plots.
    Plots and mpegs are uploaded to the cloud.

    postconf : str
        The JSON configuration file for the Cluster to create

    postjobfile : str
        The JSON configuration file for the plotting Job

    sshuser : str
        The optional user and host to use for retrieving data from a remote server.

    diff_plotflow : prefect.Flow

    with Flow('diff plotting') as diff_plotflow:
        # POST Processing

        # Read the post machine config
        postmach = ctasks.cluster_init(postconf)

        # Setup the post job
        plotjob = tasks.job_init(postmach, postjobfile)

        # Retrieve the baseline operational forecast data
        getbaseline = jtasks.get_baseline(plotjob, sshuser)

        # Start the machine
        pmStarted = ctasks.cluster_start(postmach,

        # Push the env, install required libs on post machine
        # TODO: install all of the 3rd party dependencies on AMI
        pushPy = ctasks.push_pyEnv(postmach, upstream_tasks=[pmStarted])

        # Start a dask scheduler on the new post machine
        daskclient: Client = ctasks.start_dask(postmach,

        # Get list of files from job specified directory
        FILES = jtasks.ncfiles_from_Job(plotjob)
        BASELINE = jtasks.baseline_from_Job(plotjob,

        # Make plots
        plots = jtasks.daskmake_diff_plots(daskclient, FILES, BASELINE,
        plots.set_upstream([daskclient, getbaseline])

        storage_service = tasks.storage_init(provider)
        #pngtocloud = tasks.save_to_cloud(plotjob, storage_service, ['*diff.png'], public=True)

        # Make movies
        mpegs = jtasks.daskmake_mpegs(daskclient,
        mp4tocloud = tasks.save_to_cloud(plotjob,
                                         storage_service, ['*diff.mp4'],

        closedask = ctasks.dask_client_close(daskclient,
        pmTerminated = ctasks.cluster_terminate(
            postmach, upstream_tasks=[mpegs, closedask])

        # This will add Kenny's script
        # https://ioos-cloud-sandbox.s3.amazonaws.com/cloudflow/inject/kenny/cloud_sandbot.py
        #notebook = 'cloudflow/inject/kenny/cloud_sandbot.py'
        notebook = 'cloudflow/inject/patrick/sandbot_current_fcst.py'
        injected = tasks.fetchpy_and_run(plotjob, storage_service, notebook)

    return diff_plotflow
Пример #8
def plot_flow(postconf, postjobfile) -> Flow:
    """ Provides a Prefect Flow for a plotting workflow. Also creates mpegs of sequential plots.
    Plots and mpegs are uploaded to the cloud.

    postconf : str
        The JSON configuration file for the Cluster to create

    postjobfile : str
        The JSON configuration file for the plotting Job

    plotflow : prefect.Flow

    with Flow('plotting') as plotflow:
        # POST Processing

        # Start a machine
        postmach = ctasks.cluster_init(postconf)

        # Setup the post job
        plotjob = tasks.job_init(postmach, postjobfile)

        # Start the machine
        pmStarted = ctasks.cluster_start(postmach)

        # Push the env, install required libs on post machine
        # TODO: install all of the 3rd party dependencies on AMI
        pushPy = ctasks.push_pyEnv(postmach, upstream_tasks=[pmStarted])

        # Start a dask scheduler on the new post machine
        daskclient: Client = ctasks.start_dask(postmach,

        # Get list of files from job specified directory
        FILES = jtasks.ncfiles_from_Job(plotjob)

        # Make plots
        plots = jtasks.daskmake_plots(daskclient, FILES, plotjob)

        # Make movies
        mpegs = jtasks.daskmake_mpegs(daskclient,
        mp4tocloud = tasks.save_to_cloud(plotjob,
                                         storage_service, ['*.mp4'],

        closedask = ctasks.dask_client_close(daskclient,
        pmTerminated = ctasks.cluster_terminate(
            postmach, upstream_tasks=[mpegs, closedask])

        # Inject notebook
        notebook = 'cloudflow/inject/patrick/sandbot_current_fcst.py'
        injected = tasks.fetchpy_and_run(plotjob, storage_service, notebook)


    return plotflow
Пример #9
sshuser = '******'

with Flow('plot only') as plotonly:
    # Start a machine
    postmach = ctasks.cluster_init(postconf)
    pmStarted = ctasks.cluster_start(postmach)

    # Push the env, install required libs on post machine
    # TODO: install all of the 3rd party dependencies on AMI
    pushPy = ctasks.push_pyEnv(postmach, upstream_tasks=[pmStarted])

    # Start a dask scheduler on the new post machine
    daskclient: Client = ctasks.start_dask(postmach, upstream_tasks=[pmStarted])

    # Setup the post job
    postjob = tasks.job_init(postmach, postjobfile, upstream_tasks=[pmStarted])

    # Get list of files from fcstjob
    FILES = jtasks.ncfiles_from_Job(postjob)

    # Make plots
    plots = jtasks.daskmake_plots(daskclient, FILES, postjob)

    closedask = ctasks.dask_client_close(daskclient, upstream_tasks=[plots])
    pmTerminated = ctasks.cluster_terminate(postmach, upstream_tasks=[plots, closedask])


with Flow('ofs workflow') as flow: