Exemplo n.º 1
0
def runcmd(model_type, config_path, init_run, n_runs, lab_path, dir_path):

    pbs_config = cli.get_config(config_path)
    pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path, dir_path)

    collate_queue = pbs_config.get('collate_queue', 'copyq')
    pbs_config['queue'] = collate_queue

    n_cpus_request = pbs_config.get('collate_ncpus', 1)
    pbs_config['ncpus'] = n_cpus_request

    # Modify jobname
    if 'jobname' in pbs_config:
        pbs_config['jobname'] = pbs_config['jobname'][:13] + '_c'
    else:
        if not dir_path:
            dpath = os.path.basename(os.getcwd())
        else:
            dpath = dir_path

        pbs_config['jobname'] = os.path.normpath(dpath[:13]) + '_c'

    # Replace (or remove) walltime
    collate_walltime = pbs_config.get('collate_walltime')
    if collate_walltime:
        pbs_config['walltime'] = collate_walltime
    else:
        # Remove the model walltime if set
        try:
            pbs_config.pop('walltime')
        except KeyError:
            pass

    # Replace (or remove) memory request
    collate_mem = pbs_config.get('collate_mem')
    if collate_mem:
        pbs_config['mem'] = collate_mem
    else:
        # Remove the model memory request if set
        try:
            pbs_config.pop('mem')
        except KeyError:
            pass

    # Disable hyperthreading
    qsub_flags = []
    for flag in pbs_config.get('qsub_flags', '').split():
        if 'hyperthread' not in flag:
            qsub_flags.append(flag)
    pbs_config['qsub_flags'] = ' '.join(qsub_flags)

    cli.submit_job('payu-collate', pbs_config, pbs_vars)
Exemplo n.º 2
0
def runcmd(model_type, config_path, init_run, n_runs, lab_path, dir_path):

    pbs_config = cli.get_config(config_path)
    pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path, dir_path)

    collate_queue = pbs_config.get('collate_queue', 'copyq')
    pbs_config['queue'] = collate_queue

    n_cpus_request = pbs_config.get('collate_ncpus', 1)
    pbs_config['ncpus'] = n_cpus_request

    # Modify jobname
    if 'jobname' in pbs_config:
        pbs_config['jobname'] = pbs_config['jobname'][:13] + '_c'
    else:
        if not dir_path:
            dpath = os.path.basename(os.getcwd())
        else:
            dpath = dir_path

        pbs_config['jobname'] = os.path.normpath(dpath[:13]) + '_c'

    # Replace (or remove) walltime
    collate_walltime = pbs_config.get('collate_walltime')
    if collate_walltime:
        pbs_config['walltime'] = collate_walltime
    else:
        # Remove the model walltime if set
        try:
            pbs_config.pop('walltime')
        except KeyError:
            pass

    # Replace (or remove) memory request
    collate_mem = pbs_config.get('collate_mem')
    if collate_mem:
        pbs_config['mem'] = collate_mem
    else:
        # Remove the model memory request if set
        try:
            pbs_config.pop('mem')
        except KeyError:
            pass

    # Disable hyperthreading
    qsub_flags = []
    for flag in pbs_config.get('qsub_flags', '').split():
        if 'hyperthread' not in flag:
            qsub_flags.append(flag)
    pbs_config['qsub_flags'] = ' '.join(qsub_flags)

    cli.submit_job('payu-collate', pbs_config, pbs_vars)
Exemplo n.º 3
0
def runcmd(model_type, config_path, init_run, n_runs, lab_path):

    pbs_config = fsops.read_config(config_path)
    pbs_vars = cli.set_env_vars(init_run=init_run,
                                n_runs=n_runs,
                                lab_path=lab_path)

    pbs_config['queue'] = pbs_config.get('profile_queue', 'normal')

    # Collation jobs are (currently) serial
    pbs_config['ncpus'] = 1

    # Modify jobname
    pbs_config['jobname'] = pbs_config['jobname'][:13] + '_p'

    # Replace (or remove) walltime
    profile_walltime = pbs_config.get('profile_walltime')
    if profile_walltime:
        pbs_config['walltime'] = profile_walltime
    else:
        # Remove the model walltime if set
        try:
            pbs_config.pop('walltime')
        except KeyError:
            pass

    # Replace (or remove) memory request
    profile_mem = pbs_config.get('profile_mem')
    if profile_mem:
        pbs_config['mem'] = profile_mem
    else:
        # Remove the model memory request if set
        try:
            pbs_config.pop('mem')
        except KeyError:
            pass

    # Disable hyperthreading
    qsub_flags = []
    for flag in pbs_config.get('qsub_flags', '').split():
        if 'hyperthread' not in flag:
            qsub_flags.append(flag)
    pbs_config['qsub_flags'] = ' '.join(qsub_flags)

    cli.submit_job('payu-profile', pbs_config, pbs_vars)
Exemplo n.º 4
0
def runcmd(model_type, config_path, init_run, n_runs, lab_path):

    pbs_config = cli.get_config(config_path)
    pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path)

    pbs_config['queue'] = pbs_config.get('profile_queue', 'normal')

    # Collation jobs are (currently) serial
    pbs_config['ncpus'] = 1

    # Modify jobname
    pbs_config['jobname'] = pbs_config['jobname'][:13] + '_p'

    # Replace (or remove) walltime
    profile_walltime = pbs_config.get('profile_walltime')
    if profile_walltime:
        pbs_config['walltime'] = profile_walltime
    else:
        # Remove the model walltime if set
        try:
            pbs_config.pop('walltime')
        except KeyError:
            pass

    # Replace (or remove) memory request
    profile_mem = pbs_config.get('profile_mem')
    if profile_mem:
        pbs_config['mem'] = profile_mem
    else:
        # Remove the model memory request if set
        try:
            pbs_config.pop('mem')
        except KeyError:
            pass

    # Disable hyperthreading
    qsub_flags = []
    for flag in pbs_config.get('qsub_flags', '').split():
        if 'hyperthread' not in flag:
            qsub_flags.append(flag)
    pbs_config['qsub_flags'] = ' '.join(qsub_flags)

    cli.submit_job('payu-profile', pbs_config, pbs_vars)
Exemplo n.º 5
0
def runcmd(model_type, config_path, init_run, n_runs, lab_path, reproduce):

    # Get job submission configuration
    pbs_config = fsops.read_config(config_path)
    pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path)

    # Set the queue
    # NOTE: Maybe force all jobs on the normal queue
    if 'queue' not in pbs_config:
        pbs_config['queue'] = 'normal'

    # TODO: Create drivers for servers
    platform = pbs_config.get('platform', {})
    max_cpus_per_node = platform.get('nodesize', 16)
    max_ram_per_node = platform.get('nodemem', 32)

    # Adjust the CPUs for any model-specific settings
    # TODO: Incorporate this into the Model driver
    mask_table = pbs_config.get('mask_table', False)
    if mask_table:

        # Check if a mask table exists
        # TODO: Is control_path defined at this stage?
        mask_table_fname = None
        for fname in os.listdir(os.curdir):
            if f.startswith('mask_table'):
                mask_table_fname = fname

        # TODO TODO

    if 'ncpureq' in pbs_config:
        # Hard override of CPU request
        n_cpus_request = pbs_config.get('ncpureq')

    elif 'submodels' in pbs_config and 'ncpus' not in pbs_config:
        # Increase the cpu request to match a complete node

        n_cpus_request = 0
        submodel_configs = pbs_config['submodels']
        for model_config in submodel_configs:
            n_cpus_request += model_config.get('ncpus', 0)

    else:
        n_cpus_request = pbs_config.get('ncpus', 1)

    n_cpus = n_cpus_request
    n_cpus_per_node = pbs_config.get('npernode', max_cpus_per_node)

    assert n_cpus_per_node <= max_cpus_per_node

    node_misalignment = n_cpus % max_cpus_per_node != 0
    node_increase = n_cpus_per_node < max_cpus_per_node

    # Increase the CPUs to accomodate the cpu-per-node request
    if n_cpus > max_cpus_per_node and (node_increase or node_misalignment):

        # Number of requested nodes
        n_nodes = 1 + (n_cpus - 1) // n_cpus_per_node
        n_cpu_request = max_cpus_per_node * n_nodes
        n_inert_cpus = n_cpu_request - n_cpus

        print('payu: warning: Job request includes {n} unused CPUs.'
              ''.format(n=n_inert_cpus))

        # Increase CPU request to match the effective node request
        n_cpus = max_cpus_per_node * n_nodes

        # Update the ncpus field in the config
        if n_cpus != n_cpus_request:
            print('payu: warning: CPU request increased from {n_req} to {n}'
                  ''.format(n_req=n_cpus_request, n=n_cpus))

    # Update the (possibly unchanged) value of ncpus
    pbs_config['ncpus'] = n_cpus

    # Set memory to use the complete node if unspecified
    pbs_mem = pbs_config.get('mem')
    if not pbs_mem:
        if n_cpus > max_cpus_per_node:
            pbs_mem = (n_cpus // max_cpus_per_node) * max_ram_per_node
        else:
            pbs_mem = n_cpus * (max_ram_per_node // max_cpus_per_node)

        pbs_config['mem'] = '{0}GB'.format(pbs_mem)

    cli.submit_job('payu-run', pbs_config, pbs_vars)
Exemplo n.º 6
0
def runcmd(model_type, config_path, init_run, lab_path, dir_path):

    pbs_config = fsops.read_config(config_path)
    pbs_vars = cli.set_env_vars(init_run=init_run,
                                lab_path=lab_path,
                                dir_path=dir_path)

    collate_config = pbs_config.get('collate', {})

    # The mpi flag implies using mppnccombine-fast
    mpi = collate_config.get('mpi', False)

    default_ncpus = 1
    default_queue = 'copyq'
    if mpi:
        default_ncpus = 2
        default_queue = 'express'

    collate_queue = collate_config.get('queue', default_queue)
    pbs_config['queue'] = collate_queue

    n_cpus_request = collate_config.get('ncpus', default_ncpus)
    pbs_config['ncpus'] = n_cpus_request

    collate_jobname = collate_config.get('jobname')
    if not collate_jobname:
        pbs_jobname = pbs_config.get('jobname')
        if not pbs_jobname:
            if dir_path and os.path.isdir(dir_path):
                pbs_jobname = os.path.basename(dir_path)
            else:
                pbs_jobname = os.path.basename(os.getcwd())

        collate_jobname = pbs_jobname[:13] + '_c'

    # NOTE: Better to construct `collate_config` to pass to `submit_job`
    pbs_config['jobname'] = collate_jobname[:15]

    # Replace (or remove) walltime
    collate_walltime = collate_config.get('walltime')
    if collate_walltime:
        pbs_config['walltime'] = collate_walltime
    else:
        # Remove the model walltime if set
        try:
            pbs_config.pop('walltime')
        except KeyError:
            pass

    # TODO: calcualte default memory request based on ncpus and platform
    pbs_config['mem'] = collate_config.get('mem', '2GB')

    # Disable hyperthreading
    qsub_flags = []
    iflags = iter(pbs_config.get('qsub_flags', '').split())
    for flag in iflags:
        if flag == '-l':
            try:
                flag += ' ' + next(iflags)
            except StopIteration:
                break

        # TODO: Test the sequence, not just existence of characters in string
        if 'hyperthread' not in flag:
            qsub_flags.append(flag)

    pbs_config['qsub_flags'] = ' '.join(qsub_flags)

    cli.submit_job('payu-collate', pbs_config, pbs_vars)
Exemplo n.º 7
0
def runcmd(model_type, config_path, init_run, n_runs, lab_path):

    # Get job submission configuration
    pbs_config = cli.get_config(config_path)
    pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path)

    # Set the queue
    # NOTE: Maybe force all jobs on the normal queue
    if 'queue' not in pbs_config:
        pbs_config['queue'] = 'normal'

    # TODO: Create drivers for servers
    max_cpus_per_node = 16

    # Adjust the CPUs for any model-specific settings
    # TODO: Incorporate this into the Model driver
    mask_table = pbs_config.get('mask_table', False)
    if mask_table:

        # Check if a mask table exists
        # TODO: Is control_path defined at this stage?
        mask_table_fname = None
        for f in os.listdir(os.curdir):
            if f.startswith('mask_table'):
                mask_table_fname = f

        # TODO TODO

    # Increase the cpu request to match a complete node
    if 'submodels' in pbs_config and 'ncpus' not in pbs_config:

        submodel_config = pbs_config['submodels']

        n_cpus_request = 0
        for model in submodel_config:
            n_cpus_request += submodel_config[model].get('ncpus', 0)

    else:
        n_cpus_request = pbs_config.get('ncpus', 1)

    n_cpus = n_cpus_request
    n_cpus_per_node = pbs_config.get('npernode', max_cpus_per_node)

    assert n_cpus_per_node <= max_cpus_per_node

    node_misalignment = n_cpus % max_cpus_per_node != 0
    node_increase = n_cpus_per_node < max_cpus_per_node

    # Increase the CPUs to accomodate the cpu-per-node request
    if n_cpus > max_cpus_per_node and (node_increase or node_misalignment):

        # Number of requested nodes
        n_nodes = 1 + (n_cpus - 1) // n_cpus_per_node
        n_cpu_request = max_cpus_per_node * n_nodes
        n_inert_cpus = n_cpu_request - n_cpus

        print('payu: warning: Job request includes {} unused CPUs.'
              ''.format(n_inert_cpus))

        # Increase CPU request to match the effective node request
        n_cpus = max_cpus_per_node * n_nodes

        # Update the ncpus field in the config
        if n_cpus != n_cpus_request:
            print('payu: warning: CPU request increased from {} to {}'
                  ''.format(n_cpus_request, n_cpus))

    # Update the (possibly unchanged) value of ncpus
    pbs_config['ncpus'] = n_cpus

    # Set memory to use the complete node if unspeficied
    # TODO: Move RAM per node as variable
    pbs_mem = pbs_config.get('mem')
    if not pbs_mem and n_cpus > max_cpus_per_node:
        pbs_config['mem'] = '{}GB'.format((n_cpus // max_cpus_per_node) * 31)

    cli.submit_job('payu-run', pbs_config, pbs_vars)
Exemplo n.º 8
0
def runcmd(model_type, config_path, init_run, n_runs, lab_path, dir_path):

    pbs_config = fsops.read_config(config_path)
    pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path, dir_path)

    collate_config = pbs_config.get('collate', {})

    # The mpi flag implies using mppnccombine-fast
    mpi = collate_config.get('mpi', False)

    default_ncpus = 1
    default_queue = 'copyq'
    if mpi:
        default_ncpus = 2
        default_queue = 'express'

    collate_queue = collate_config.get('queue', default_queue)
    pbs_config['queue'] = collate_queue

    n_cpus_request = collate_config.get('ncpus', default_ncpus)
    pbs_config['ncpus'] = n_cpus_request

    collate_jobname = collate_config.get('jobname')
    if not collate_jobname:
        pbs_jobname = pbs_config.get('jobname')
        if not pbs_jobname:
            if dir_path and os.path.isdir(dir_path):
                pbs_jobname = os.path.basename(dir_path)
            else:
                pbs_jobname = os.path.basename(os.getcwd())

        collate_jobname = pbs_jobname[:13] + '_c'

    # NOTE: Better to construct `collate_config` to pass to `submit_job`
    pbs_config['jobname'] = collate_jobname[:15]

    # Replace (or remove) walltime
    collate_walltime = collate_config.get('walltime')
    if collate_walltime:
        pbs_config['walltime'] = collate_walltime
    else:
        # Remove the model walltime if set
        try:
            pbs_config.pop('walltime')
        except KeyError:
            pass

    # TODO: calcualte default memory request based on ncpus and platform
    pbs_config['mem'] = collate_config.get('mem', '2GB')

    # Disable hyperthreading
    qsub_flags = []
    iflags = iter(pbs_config.get('qsub_flags', '').split())
    for flag in iflags:
        if flag == '-l':
            try:
                flag += ' ' + next(iflags)
            except StopIteration:
                break

        # TODO: Test the sequence, not just existence of characters in string
        if 'hyperthread' not in flag:
            qsub_flags.append(flag)

    pbs_config['qsub_flags'] = ' '.join(qsub_flags)

    cli.submit_job('payu-collate', pbs_config, pbs_vars)
Exemplo n.º 9
0
def runcmd(model_type, config_path, init_run, n_runs, lab_path):

    # Get job submission configuration
    pbs_config = cli.get_config(config_path)
    pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path)

    # Set the queue
    # NOTE: Maybe force all jobs on the normal queue
    if 'queue' not in pbs_config:
        pbs_config['queue'] = 'normal'

    # TODO: Create drivers for servers
    max_cpus_per_node = 16

    # Adjust the CPUs for any model-specific settings
    # TODO: Incorporate this into the Model driver
    mask_table = pbs_config.get('mask_table', False)
    if mask_table:

        # Check if a mask table exists
        # TODO: Is control_path defined at this stage?
        mask_table_fname = None
        for f in os.listdir(os.curdir):
            if f.startswith('mask_table'):
                mask_table_fname = f

        # TODO TODO

    # Increase the cpu request to match a complete node
    if 'submodels' in pbs_config and 'ncpus' not in pbs_config:

        submodel_config = pbs_config['submodels']

        n_cpus_request = 0
        for model in submodel_config:
            n_cpus_request += submodel_config[model].get('ncpus', 0)

    else:
        n_cpus_request = pbs_config.get('ncpus', 1)

    n_cpus = n_cpus_request
    n_cpus_per_node = pbs_config.get('npernode', max_cpus_per_node)

    assert n_cpus_per_node <= max_cpus_per_node

    node_misalignment = n_cpus % max_cpus_per_node != 0
    node_increase = n_cpus_per_node < max_cpus_per_node

    # Increase the CPUs to accomodate the cpu-per-node request
    if n_cpus > max_cpus_per_node and (node_increase or node_misalignment):

        # Number of requested nodes
        n_nodes = 1 + (n_cpus - 1) // n_cpus_per_node
        n_cpu_request = max_cpus_per_node * n_nodes
        n_inert_cpus = n_cpu_request - n_cpus

        print('payu: warning: Job request includes {} unused CPUs.'
              ''.format(n_inert_cpus))

        # Increase CPU request to match the effective node request
        n_cpus = max_cpus_per_node * n_nodes

        # Update the ncpus field in the config
        if n_cpus != n_cpus_request:
            print('payu: warning: CPU request increased from {} to {}'
                  ''.format(n_cpus_request, n_cpus))

    # Update the (possibly unchanged) value of ncpus
    pbs_config['ncpus'] = n_cpus

    # Set memory to use the complete node if unspeficied
    # TODO: Move RAM per node as variable
    pbs_mem = pbs_config.get('mem')
    if not pbs_mem and n_cpus > max_cpus_per_node:
        pbs_config['mem'] = '{}GB'.format((n_cpus // max_cpus_per_node) * 31)

    cli.submit_job('payu-run', pbs_config, pbs_vars)