示例#1
0
文件: um.py 项目: HoWol76/payu
    def archive(self):

        mkdir_p(self.restart_path)

        # Need to figure out the end date of the model.
        nml_path = os.path.join(self.work_path, 'namelists')
        nml = f90nml.read(nml_path)

        resubmit_inc = nml['NLSTCALL']['RUN_RESUBMIT_INC']
        runtime = um_time_to_time(resubmit_inc)
        runtime = datetime.timedelta(seconds=runtime)

        basis_time = nml['NLSTCALL']['MODEL_BASIS_TIME']
        init_date = um_date_to_date(basis_time)

        end_date = date_to_um_dump_date(init_date + runtime)

        restart_dump = os.path.join(self.work_path,
                                    'aiihca.da{0}'.format(end_date))
        f_dst = os.path.join(self.restart_path, self.restart)
        if os.path.exists(restart_dump):
            shutil.copy(restart_dump, f_dst)
        else:
            print('payu: error: Model has not produced a restart dump file'
                  '{} does not exist. '
                  'Check DUMPFREQim in namelists'.format(restart_dump))
示例#2
0
文件: oasis.py 项目: rmholmes/payu
    def setup(self):
        super(Oasis, self).setup()

        # Copy OASIS data to the other submodels

        # TODO: Parse namcouple to determine filelist
        # TODO: Let users map files to models
        input_files = [
            f for f in os.listdir(self.work_path) if f not in self.config_files
        ]

        for model in self.expt.models:

            # Skip the oasis self-reference
            if model == self:
                continue

            # Skip models without a work_path (like access)
            if not hasattr(model, 'work_path'):
                continue

            mkdir_p(model.work_path)
            for f_name in (self.config_files + input_files):
                f_path = os.path.join(self.work_path, f_name)
                f_sympath = os.path.join(model.work_path, f_name)
                make_symlink(f_path, f_sympath)

        if self.expt.runtime:
            # TODO: Implement runtime patch to namcouple
            pass
示例#3
0
文件: mom.py 项目: nicjhan/payu
    def setup(self):
        # FMS initialisation
        super(Mom, self).setup()

        if not self.top_level_model:
            # Make log dir
            mkdir_p(os.path.join(self.work_path, 'log'))

        input_nml_path = os.path.join(self.work_path, 'input.nml')
        input_nml = f90nml.read(input_nml_path)

        use_core2iaf = self.config.get('core2iaf')
        if use_core2iaf:
            self.core2iaf_setup()

        # Set the runtime
        if self.expt.runtime:
            ocean_solo_nml = input_nml['ocean_solo_nml']

            ocean_solo_nml['years'] = self.expt.runtime['years']
            ocean_solo_nml['months'] = self.expt.runtime['months']
            ocean_solo_nml['days'] = self.expt.runtime['days']
            ocean_solo_nml['seconds'] = self.expt.runtime.get('seconds', 0)

            input_nml.write(input_nml_path, force=True)

        # Construct the land CPU mask
        if self.expt.config.get('mask_table', False):
            self.create_mask_table(input_nml)
示例#4
0
    def setup(self, force_archive=False):

        # Confirm that no output path already exists
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        mkdir_p(self.work_path)

        if force_archive:
            mkdir_p(self.archive_path)
            make_symlink(self.archive_path, self.archive_sym_path)

        # Archive the payu config
        # TODO: This just copies the existing config.yaml file, but we should
        #       reconstruct a new file including default values
        config_src = os.path.join(self.control_path, 'config.yaml')
        config_dst = os.path.join(self.work_path)
        shutil.copy(config_src, config_dst)

        # Stripe directory in Lustre
        # TODO: Make this more configurable
        do_stripe = self.config.get('stripedio', False)
        if do_stripe:
            cmd = 'lfs setstripe -c 8 -s 8m {0}'.format(self.work_path)
            sp.check_call(shlex.split(cmd))

        make_symlink(self.work_path, self.work_sym_path)

        # Set up all file manifests
        self.manifest.setup()

        for model in self.models:
            model.setup()

        # Call the macro-model setup
        if len(self.models) > 1:
            self.model.setup()

        # Use manifest to populate work directory
        self.manifest.make_links()

        # Copy manifests to work directory so they archived on completion
        self.manifest.copy_manifests(os.path.join(self.work_path,'manifests'))

        setup_script = self.userscripts.get('setup')
        if setup_script:
            self.run_userscript(setup_script)

        # Profiler setup
        expt_profs = self.config.get('profilers', [])
        if not isinstance(expt_profs, list):
            expt_profs = [expt_profs]

        for prof_name in expt_profs:
            ProfType = payu.profilers.index[prof_name]
            prof = ProfType(self)
            self.profilers.append(prof)

            # Testing
            prof.setup()
示例#5
0
    def setup(self):
        super(Oasis, self).setup()

        # Copy OASIS data to the other submodels

        # TODO: Parse namcouple to determine filelist
        # TODO: Let users map files to models
        input_files = [f for f in os.listdir(self.work_path)
                       if f not in self.config_files]

        for model in self.expt.models:

            # Skip the oasis self-reference
            if model == self:
                continue

            # Skip models without a work_path (like access)
            if not hasattr(model, 'work_path'):
                continue

            mkdir_p(model.work_path)
            for f_name in (self.config_files + input_files):
                f_path = os.path.join(self.work_path, f_name)
                f_sympath = os.path.join(model.work_path, f_name)
                make_symlink(f_path, f_sympath)

        if self.expt.runtime:
            # TODO: Implement runtime patch to namcouple
            pass
示例#6
0
    def archive(self):

        mkdir_p(self.archive_path)
        make_symlink(self.archive_path, self.archive_sym_path)

        # Remove work symlink
        if os.path.islink(self.work_sym_path):
            os.remove(self.work_sym_path)

        mkdir_p(self.restart_path)

        for model in self.models:
            model.archive()

        # Postprocess the model suite
        if len(self.models) > 1:
            self.model.archive()

        # Double-check that the run path does not exist
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        cmd = 'mv {} {}'.format(self.work_path, self.output_path)
        sp.check_call(shlex.split(cmd))

        # Remove old restart files
        # TODO: Move to subroutine
        restart_freq = self.config.get('restart_freq', default_restart_freq)
        restart_history = self.config.get('restart_history',
                                          default_restart_history)

        # Remove any outdated restart files
        prior_restart_dirs = [
            d for d in os.listdir(self.archive_path) if d.startswith('restart')
        ]

        for res_dir in prior_restart_dirs:

            res_idx = int(res_dir.lstrip('restart'))
            if (self.repeat_run
                    or (not res_idx % restart_freq == 0 and res_idx <=
                        (self.counter - restart_history))):

                res_path = os.path.join(self.archive_path, res_dir)

                # Only delete real directories; ignore symbolic restart links
                if os.path.isdir(res_path):
                    shutil.rmtree(res_path)

        if self.config.get('collate', True):
            cmd = 'payu collate -i {}'.format(self.counter)
            sp.check_call(shlex.split(cmd))

        if self.config.get('hpctoolkit', False):
            cmd = 'payu profile -i {}'.format(self.counter)
            sp.check_call(shlex.split(cmd))

        archive_script = self.userscripts.get('archive')
        if archive_script:
            self.run_userscript(archive_script)
示例#7
0
    def sweep(self, hard_sweep=False):
        # TODO: Fix the IO race conditions!

        if hard_sweep:
            if os.path.isdir(self.archive_path):
                print('Removing archive path {0}'.format(self.archive_path))
                cmd = 'rm -rf {0}'.format(self.archive_path)
                cmd = shlex.split(cmd)
                rc = sp.call(cmd)
                assert rc == 0

            if os.path.islink(self.archive_sym_path):
                print('Removing symlink {0}'.format(self.archive_sym_path))
                os.remove(self.archive_sym_path)

        if os.path.isdir(self.work_path):
            print('Removing work path {0}'.format(self.work_path))
            cmd = 'rm -rf {0}'.format(self.work_path)
            cmd = shlex.split(cmd)
            rc = sp.call(cmd)
            assert rc == 0

        if os.path.islink(self.work_sym_path):
            print('Removing symlink {0}'.format(self.work_sym_path))
            os.remove(self.work_sym_path)

        # TODO: model outstreams and pbs logs need to be handled separately
        default_job_name = os.path.basename(os.getcwd())
        short_job_name = str(self.config.get('jobname', default_job_name))[:15]

        logs = [
            f for f in os.listdir(os.curdir) if os.path.isfile(f) and (
                f.startswith(short_job_name + '.o') or
                f.startswith(short_job_name + '.e') or
                f.startswith(short_job_name[:13] + '_c.o') or
                f.startswith(short_job_name[:13] + '_c.e') or
                f.startswith(short_job_name[:13] + '_p.o') or
                f.startswith(short_job_name[:13] + '_p.e')
            )
        ]

        pbs_log_path = os.path.join(self.archive_path, 'pbs_logs')
        legacy_pbs_log_path = os.path.join(self.control_path, 'pbs_logs')

        if os.path.isdir(legacy_pbs_log_path):
            # TODO: New path may still exist!
            assert not os.path.isdir(pbs_log_path)
            print('payu: Moving pbs_logs to {0}'.format(pbs_log_path))
            shutil.move(legacy_pbs_log_path, pbs_log_path)
        else:
            mkdir_p(pbs_log_path)

        for f in logs:
            print('Moving log {0}'.format(f))
            shutil.move(f, os.path.join(pbs_log_path, f))

        # Remove stdout/err
        for f in (self.stdout_fname, self.stderr_fname):
            if os.path.isfile(f):
                os.remove(f)
示例#8
0
    def sweep(self, hard_sweep=False):
        # TODO: Fix the IO race conditions!

        # TODO: model outstreams and pbs logs need to be handled separately
        default_job_name = os.path.basename(os.getcwd())
        short_job_name = str(self.config.get('jobname', default_job_name))[:15]

        logs = [
            f for f in os.listdir(os.curdir) if os.path.isfile(f) and (
                f.startswith(short_job_name + '.o') or
                f.startswith(short_job_name + '.e') or
                f.startswith(short_job_name[:13] + '_c.o') or
                f.startswith(short_job_name[:13] + '_c.e') or
                f.startswith(short_job_name[:13] + '_p.o') or
                f.startswith(short_job_name[:13] + '_p.e')
            )
        ]

        pbs_log_path = os.path.join(self.archive_path, 'pbs_logs')
        legacy_pbs_log_path = os.path.join(self.control_path, 'pbs_logs')

        if os.path.isdir(legacy_pbs_log_path):
            # TODO: New path may still exist!
            assert not os.path.isdir(pbs_log_path)
            print('payu: Moving pbs_logs to {0}'.format(pbs_log_path))
            shutil.move(legacy_pbs_log_path, pbs_log_path)
        else:
            mkdir_p(pbs_log_path)

        for f in logs:
            print('Moving log {0}'.format(f))
            shutil.move(f, os.path.join(pbs_log_path, f))

        if hard_sweep:
            if os.path.isdir(self.archive_path):
                print('Removing archive path {0}'.format(self.archive_path))
                cmd = 'rm -rf {0}'.format(self.archive_path)
                cmd = shlex.split(cmd)
                rc = sp.call(cmd)
                assert rc == 0

            if os.path.islink(self.archive_sym_path):
                print('Removing symlink {0}'.format(self.archive_sym_path))
                os.remove(self.archive_sym_path)

        # Remove stdout/err and yaml dumps
        for f in self.output_fnames:
            if os.path.isfile(f):
                os.remove(f)

        if os.path.isdir(self.work_path):
            print('Removing work path {0}'.format(self.work_path))
            cmd = 'rm -rf {0}'.format(self.work_path)
            cmd = shlex.split(cmd)
            rc = sp.call(cmd)
            assert rc == 0

        if os.path.islink(self.work_sym_path):
            print('Removing symlink {0}'.format(self.work_sym_path))
            os.remove(self.work_sym_path)
示例#9
0
    def setup(self, force_archive=False):

        # Confirm that no output path already exists
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        mkdir_p(self.work_path)

        if force_archive:
            mkdir_p(self.archive_path)
            make_symlink(self.archive_path, self.archive_sym_path)

        # Archive the payu config
        # TODO: This just copies the existing config.yaml file, but we should
        #       reconstruct a new file including default values
        config_src = os.path.join(self.control_path, 'config.yaml')
        config_dst = os.path.join(self.work_path)
        shutil.copy(config_src, config_dst)

        # Stripe directory in Lustre
        # TODO: Make this more configurable
        do_stripe = self.config.get('stripedio', False)
        if do_stripe:
            cmd = 'lfs setstripe -c 8 -s 8m {0}'.format(self.work_path)
            sp.check_call(shlex.split(cmd))

        make_symlink(self.work_path, self.work_sym_path)

        # Set up all file manifests
        self.manifest.setup()

        for model in self.models:
            model.setup()

        # Call the macro-model setup
        if len(self.models) > 1:
            self.model.setup()

        self.manifest.check_manifests()

        # Copy manifests to work directory so they archived on completion
        manifest_path = os.path.join(self.work_path, 'manifests')
        self.manifest.copy_manifests(manifest_path)

        setup_script = self.userscripts.get('setup')
        if setup_script:
            self.run_userscript(setup_script)

        # Profiler setup
        expt_profs = self.config.get('profilers', [])
        if not isinstance(expt_profs, list):
            expt_profs = [expt_profs]

        for prof_name in expt_profs:
            ProfType = payu.profilers.index[prof_name]
            prof = ProfType(self)
            self.profilers.append(prof)

            # Testing
            prof.setup()
示例#10
0
文件: manifest.py 项目: rmholmes/payu
    def copy_manifests(self, path):

        mkdir_p(path)
        try:
            for mf in self.manifests:
                self.manifests[mf].copy(path)
        except IOError:
            pass
示例#11
0
文件: mitgcm.py 项目: penguian/payu
    def archive(self):

        # Need to parse the data namelist file to access the
        # endTime
        data_path = os.path.join(self.work_path, 'data')
        data_nml = self.read_namelist(data_path)

        # Save model time to restart next run
        with open(os.path.join(self.restart_path, self.restart_calendar_file),
                  'w') as restart_file:
            restart = {'endtime': data_nml['parm03']['endTime']}
            restart_file.write(yaml.dump(restart, default_flow_style=False))

        # Remove symbolic links to input or pickup files:
        for f in os.listdir(self.work_path):
            f_path = os.path.join(self.work_path, f)
            if os.path.islink(f_path):
                os.remove(f_path)

        # Move files outside of mnc_* directories
        mnc_paths = [
            os.path.join(self.work_path, d) for d in os.listdir(self.work_path)
            if d.startswith('mnc_')
        ]

        for path in mnc_paths:
            for f in os.listdir(path):
                f_path = os.path.join(path, f)
                sh.move(f_path, self.work_path)
            os.rmdir(path)

        mkdir_p(self.restart_path)

        # Move pickups but don't include intermediate pickupts ('ckpt's)
        restart_files = [
            f for f in os.listdir(self.work_path) if f.startswith('pickup')
            and not f.split('.')[1].startswith('ckpt')
        ]

        # Tar and compress the output files
        stdout_files = [
            f for f in os.listdir(self.work_path) if f.startswith('STDOUT.')
        ]

        if stdout_files:
            cmd = 'tar -C {0} -c -j -f {1}'.format(
                self.work_path, os.path.join(self.work_path, 'STDOUT.tar.bz2'))

            rc = sp.Popen(shlex.split(cmd) + stdout_files).wait()
            assert rc == 0

        for f in stdout_files:
            os.remove(os.path.join(self.work_path, f))

        for f in restart_files:
            f_src = os.path.join(self.work_path, f)
            sh.move(f_src, self.restart_path)
示例#12
0
    def archive(self):

        mkdir_p(self.archive_path)
        make_symlink(self.archive_path, self.archive_sym_path)

        # Remove work symlink
        if os.path.islink(self.work_sym_path):
            os.remove(self.work_sym_path)

        mkdir_p(self.restart_path)

        for model in self.models:
            model.archive()

        # Postprocess the model suite
        if len(self.models) > 1:
            self.model.archive()

        # Double-check that the run path does not exist
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        cmd = 'mv {} {}'.format(self.work_path, self.output_path)
        sp.check_call(shlex.split(cmd))

        # Remove old restart files
        # TODO: Move to subroutine
        restart_freq = self.config.get('restart_freq', default_restart_freq)
        restart_history = self.config.get('restart_history',
                                          default_restart_history)

        # Remove any outdated restart files
        prior_restart_dirs = [d for d in os.listdir(self.archive_path)
                              if d.startswith('restart')]

        for res_dir in prior_restart_dirs:

            res_idx = int(res_dir.lstrip('restart'))
            if (not res_idx % restart_freq == 0 and
                    res_idx <= (self.counter - restart_history)):

                res_path = os.path.join(self.archive_path, res_dir)
                shutil.rmtree(res_path)

        if self.config.get('collate', True):
            cmd = 'payu collate -i {} -l {}'.format(self.counter,
                                                    self.lab.basepath)
            sp.check_call(shlex.split(cmd))

        if self.config.get('hpctoolkit', False):
            cmd = 'payu profile -i {}'.format(self.counter)
            sp.check_call(shlex.split(cmd))

        archive_script = self.userscripts.get('archive')
        if archive_script:
            self.run_userscript(archive_script)
示例#13
0
    def build_model(self):

        if not self.repo_url:
            return

        # Check to see if executable already exists.
        if self.exec_path and os.path.isfile(self.exec_path):
            print('payu: warning: {0} will be overwritten.'
                  ''.format(self.exec_path))

        # First step is always to go to the codebase.
        curdir = os.getcwd()

        # Do the build. First check whether there is a build command in the
        # config. If not check for the model default, otherwise just run make.

        try:
            build_path = self.config['build']['path_to_build_command']
        except KeyError:
            if self.build_path:
                build_path = self.build_path
            else:
                build_path = './'

        os.chdir(os.path.join(self.codebase_path, build_path))

        try:
            cmd = self.config['build']['command']
        except KeyError:
            if self.build_command:
                cmd = self.build_command
            else:
                cmd = 'make'

        print('Running command {0}'.format(cmd))
        sp.check_call(shlex.split(cmd))

        try:
            build_exec_path = os.path.join(self.codebase_path,
                                           self.config['build']['exec_path'])
        except KeyError:
            if self.build_exec_path:
                build_exec_path = self.build_exec_path
            else:
                build_exec_path = self.codebase_path

        # Copy new executable to bin dir
        if self.exec_path:
            # Create the bin path if it doesn't exist
            mkdir_p(self.expt.lab.bin_path)

            build_exec_path = os.path.join(build_exec_path, self.exec_name)
            shutil.copy(build_exec_path, self.exec_path)

        os.chdir(curdir)
示例#14
0
    def build_model(self):

        if not self.repo_url:
            return

        # Check to see if executable already exists.
        if self.exec_path and os.path.isfile(self.exec_path):
            print('payu: warning: {} will be overwritten.'
                  ''.format(self.exec_path))

        # First step is always to go to the codebase.
        curdir = os.getcwd()

        # Do the build. First check whether there is a build command in the
        # config. If not check for the model default, otherwise just run make.

        try:
            build_path = self.config['build']['path_to_build_command']
        except KeyError:
            if self.build_path:
                build_path = self.build_path
            else:
                build_path = './'

        os.chdir(os.path.join(self.codebase_path, build_path))

        try:
            cmd = self.config['build']['command']
        except KeyError:
            if self.build_command:
                cmd = self.build_command
            else:
                cmd = 'make'

        print('Running command {}'.format(cmd))
        sp.check_call(shlex.split(cmd))

        try:
            build_exec_path = os.path.join(self.codebase_path,
                                           self.config['build']['exec_path'])
        except KeyError:
            if self.build_exec_path:
                build_exec_path = self.build_exec_path
            else:
                build_exec_path = self.codebase_path

        # Copy new executable to bin dir
        if self.exec_path:
            # Create the bin path if it doesn't exist
            mkdir_p(self.expt.lab.bin_path)

            build_exec_path = os.path.join(build_exec_path, self.exec_name)
            shutil.copy(build_exec_path, self.exec_path)

        os.chdir(curdir)
示例#15
0
    def postprocess(self):
        gmon_dir = os.path.join(self.expt.work_path, 'gmon')
        mkdir_p(gmon_dir)

        gmon_fnames = [f for f in os.listdir(self.expt.work_path)
                       if f.startswith('gmon.out')]

        for gmon in gmon_fnames:
            f_src = os.path.join(self.expt.work_path, gmon)
            f_dst = os.path.join(gmon_dir, gmon)
            shutil.move(f_src, f_dst)
示例#16
0
文件: mom.py 项目: penguian/payu
    def setup(self):
        # FMS initialisation
        super(Mom, self).setup()

        if not self.top_level_model:
            # Make log dir
            mkdir_p(os.path.join(self.work_path, 'log'))

        input_nml_path = os.path.join(self.work_path, 'input.nml')
        input_nml = f90nml.read(input_nml_path)

        # Set the runtime
        if self.expt.runtime:
            ocean_solo_nml = input_nml['ocean_solo_nml']

            ocean_solo_nml['years'] = self.expt.runtime['years']
            ocean_solo_nml['months'] = self.expt.runtime['months']
            ocean_solo_nml['days'] = self.expt.runtime['days']
            ocean_solo_nml['seconds'] = self.expt.runtime.get('seconds', 0)

            input_nml.write(input_nml_path, force=True)

        # Construct the land CPU mask
        if self.expt.config.get('mask_table', False):
            # NOTE: This function actually creates a mask table using the
            #       `check_mask` command line tool.  But it is not very usable
            #       since you need to know the number of masked CPUs to submit
            #       the job.  It needs a rethink of the submission process.
            self.create_mask_table(input_nml)

        # NOTE: Don't expect this to be here forever...
        # Attempt to set a mask table from the input
        if self.config.get('mask', False):
            mask_path = os.path.join(self.work_input_path, 'ocean_mask_table')

            # Remove any existing mask
            # (If no reference mask is available, then we will not use one)
            if os.path.isfile(mask_path):
                os.remove(mask_path)

            # Reference mask table
            assert ('layout' in input_nml['ocean_model_nml'])
            nx, ny = input_nml['ocean_model_nml'].get('layout')
            n_masked_cpus = nx * ny - self.config.get('ncpus')

            mask_table_fname = 'mask_table.{nmask}.{nx}x{ny}'.format(
                nmask=n_masked_cpus, nx=nx, ny=ny)

            ref_mask_path = os.path.join(self.work_input_path,
                                         mask_table_fname)

            # Set (or replace) mask table if reference is available
            if os.path.isfile(ref_mask_path):
                make_symlink(ref_mask_path, mask_path)
示例#17
0
    def archive(self):

        # TODO: Determine the exchange files
        restart_files = ['a2i.nc', 'i2a.nc', 'i2o.nc', 'o2i.nc']

        mkdir_p(self.restart_path)
        for f in restart_files:
            f_src = os.path.join(self.work_path, f)
            f_dst = os.path.join(self.restart_path, f)

            if os.path.exists(f_src):
                shutil.copy2(f_src, f_dst)
示例#18
0
    def archive(self):

        # TODO: Determine the exchange files
        restart_files = ['a2i.nc', 'i2a.nc', 'i2o.nc', 'o2i.nc']

        mkdir_p(self.restart_path)
        for f in restart_files:
            f_src = os.path.join(self.work_path, f)
            f_dst = os.path.join(self.restart_path, f)

            if os.path.exists(f_src):
                shutil.move(f_src, f_dst)
示例#19
0
    def postprocess(self):
        gmon_dir = os.path.join(self.expt.work_path, 'gmon')
        mkdir_p(gmon_dir)

        gmon_fnames = [
            f for f in os.listdir(self.expt.work_path)
            if f.startswith('gmon.out')
        ]

        for gmon in gmon_fnames:
            f_src = os.path.join(self.expt.work_path, gmon)
            f_dst = os.path.join(gmon_dir, gmon)
            shutil.move(f_src, f_dst)
示例#20
0
    def sweep(self, hard_sweep=False):
        # TODO: Fix the IO race conditions!

        if hard_sweep:
            if os.path.isdir(self.archive_path):
                print('Removing archive path {}'.format(self.archive_path))
                cmd = 'rm -rf {}'.format(self.archive_path)
                cmd = shlex.split(cmd)
                rc = sp.call(cmd)
                assert rc == 0

            if os.path.islink(self.archive_sym_path):
                print('Removing symlink {}'.format(self.archive_sym_path))
                os.remove(self.archive_sym_path)

        if os.path.isdir(self.work_path):
            print('Removing work path {}'.format(self.work_path))
            cmd = 'rm -rf {}'.format(self.work_path)
            cmd = shlex.split(cmd)
            rc = sp.call(cmd)
            assert rc == 0

        if os.path.islink(self.work_sym_path):
            print('Removing symlink {}'.format(self.work_sym_path))
            os.remove(self.work_sym_path)

        # TODO: model outstreams and pbs logs need to be handled separately
        default_job_name = os.path.basename(os.getcwd())
        short_job_name = self.config.get('jobname', default_job_name)[:15]

        logs = [
            f for f in os.listdir(os.curdir) if os.path.isfile(f) and (
                f == self.stdout_fname or
                f == self.stderr_fname or
                f.startswith(short_job_name + '.o') or
                f.startswith(short_job_name + '.e') or
                f.startswith(short_job_name[:13] + '_c.o') or
                f.startswith(short_job_name[:13] + '_c.e') or
                f.startswith(short_job_name[:13] + '_p.o') or
                f.startswith(short_job_name[:13] + '_p.e')
            )
        ]

        pbs_log_path = os.path.join(os.curdir, 'pbs_logs')
        mkdir_p(pbs_log_path)

        for f in logs:
            print('Moving log {}'.format(f))
            os.rename(f, os.path.join(pbs_log_path, f))
示例#21
0
    def initialize(self):
        """Create the laboratory directories."""

        # Create laboratory directories
        mkdir_p(self.archive_path)
        mkdir_p(self.bin_path)
        mkdir_p(self.codebase_path)
        mkdir_p(self.input_basepath)
示例#22
0
    def initialize(self):
        """Create the laboratory directories."""

        # Create laboratory directories
        mkdir_p(self.archive_path)
        mkdir_p(self.bin_path)
        mkdir_p(self.codebase_path)
        mkdir_p(self.input_basepath)
示例#23
0
    def sweep(self, hard_sweep=False):
        # TODO: Fix the IO race conditions!

        if hard_sweep:
            if os.path.isdir(self.archive_path):
                print('Removing archive path {}'.format(self.archive_path))
                cmd = 'rm -rf {}'.format(self.archive_path)
                cmd = shlex.split(cmd)
                rc = sp.call(cmd)
                assert rc == 0

            if os.path.islink(self.archive_sym_path):
                print('Removing symlink {}'.format(self.archive_sym_path))
                os.remove(self.archive_sym_path)

        if os.path.isdir(self.work_path):
            print('Removing work path {}'.format(self.work_path))
            cmd = 'rm -rf {}'.format(self.work_path)
            cmd = shlex.split(cmd)
            rc = sp.call(cmd)
            assert rc == 0

        if os.path.islink(self.work_sym_path):
            print('Removing symlink {}'.format(self.work_sym_path))
            os.remove(self.work_sym_path)

        # TODO: model outstreams and pbs logs need to be handled separately
        default_job_name = os.path.basename(os.getcwd())
        short_job_name = self.config.get('jobname', default_job_name)[:15]

        logs = [
            f for f in os.listdir(os.curdir) if os.path.isfile(f) and
            (f == self.stdout_fname or f == self.stderr_fname
             or f.startswith('slurm-')
             #f.startswith(short_job_name + '.o') or
             #f.startswith(short_job_name + '.e') or
             #f.startswith(short_job_name[:13] + '_c.o') or
             #f.startswith(short_job_name[:13] + '_c.e')
             )
        ]

        pbs_log_path = os.path.join(os.curdir, 'slurm_logs')
        mkdir_p(pbs_log_path)

        for f in logs:
            print('Moving log {}'.format(f))
            os.rename(f, os.path.join(pbs_log_path, f))
示例#24
0
文件: um.py 项目: marshallward/payu
    def archive(self):
        super(UnifiedModel, self).archive()

        # Delete all the stdout log files except the root PE
        # Sorts to ensure root PE is first entry
        files = sorted(glob.glob(
                       os.path.join(self.work_path, 'atm.fort6.pe*')),
                       key=lambda name: int(name.rpartition('.')[-1][2:]))
        if len(files) > 1:
            for f_path in files[1:]:
                os.remove(f_path)

        mkdir_p(self.restart_path)

        # Need to figure out the end date of the model.
        nml_path = os.path.join(self.work_path, 'namelists')
        nml = f90nml.read(nml_path)

        resubmit_inc = nml['NLSTCALL']['RUN_RESUBMIT_INC']
        runtime = um_time_to_time(resubmit_inc)
        # runtime = datetime.timedelta(seconds=runtime)

        basis_time = nml['NLSTCALL']['MODEL_BASIS_TIME']
        init_date = um_date_to_date(basis_time)

        end_date = cal.date_plus_seconds(init_date,
                                         runtime,
                                         cal.GREGORIAN)

        # Save model time to restart next run
        with open(os.path.join(self.restart_path,
                  self.restart_calendar_file), 'w') as restart_file:
            restart_file.write(yaml.dump({'end_date': end_date},
                               default_flow_style=False))

        end_date = date_to_um_dump_date(end_date)

        restart_dump = os.path.join(self.work_path,
                                    'aiihca.da{0}'.format(end_date))
        f_dst = os.path.join(self.restart_path, self.restart)
        if os.path.exists(restart_dump):
            shutil.copy(restart_dump, f_dst)
        else:
            print('payu: error: Model has not produced a restart dump file:\n'
                  '{} does not exist.\n'
                  'Check DUMPFREQim in namelists'.format(restart_dump))
示例#25
0
    def archive(self, **kwargs):

        # Remove symbolic links
        for f in os.listdir(self.work_input_path):
            f_path = os.path.join(self.work_input_path, f)
            if os.path.islink(f_path):
                os.remove(f_path)

        # Archive the restart files
        mkdir_p(self.restart_path)

        restart_files = [f for f in os.listdir(self.work_path)
                         if f.endswith('lastday.nc')]

        for f in restart_files:
            f_src = os.path.join(self.work_path, f)
            shutil.move(f_src, self.restart_path)
示例#26
0
文件: mitgcm.py 项目: coecms/payu
    def archive(self):

        # Remove symbolic links to input or pickup files:
        for f in os.listdir(self.work_path):
            f_path = os.path.join(self.work_path, f)
            if os.path.islink(f_path):
                os.remove(f_path)

        # Move files outside of mnc_* directories
        mnc_paths = [
            os.path.join(self.work_path, d) for d in os.listdir(self.work_path)
            if d.startswith('mnc_')
        ]

        for path in mnc_paths:
            for f in os.listdir(path):
                f_path = os.path.join(path, f)
                sh.move(f_path, self.work_path)
            os.rmdir(path)

        mkdir_p(self.restart_path)

        # Move pickups but don't include intermediate pickupts ('ckpt's)
        restart_files = [
            f for f in os.listdir(self.work_path) if f.startswith('pickup')
            and not f.split('.')[1].startswith('ckpt')
        ]

        # Tar and compress the output files
        stdout_files = [
            f for f in os.listdir(self.work_path) if f.startswith('STDOUT.')
        ]

        if stdout_files:
            cmd = 'tar -C {0} -c -j -f {1}'.format(
                self.work_path, os.path.join(self.work_path, 'STDOUT.tar.bz2'))

            rc = sp.Popen(shlex.split(cmd) + stdout_files).wait()
            assert rc == 0

        for f in stdout_files:
            os.remove(os.path.join(self.work_path, f))

        for f in restart_files:
            f_src = os.path.join(self.work_path, f)
            sh.move(f_src, self.restart_path)
示例#27
0
文件: qgcm.py 项目: strat123123/payu
    def archive(self, **kwargs):

        # Remove symbolic links
        for f in os.listdir(self.work_input_path):
            f_path = os.path.join(self.work_input_path, f)
            if os.path.islink(f_path):
                os.remove(f_path)

        # Archive the restart files
        mkdir_p(self.restart_path)

        restart_files = [
            f for f in os.listdir(self.work_path) if f.endswith('lastday.nc')
        ]

        for f in restart_files:
            f_src = os.path.join(self.work_path, f)
            shutil.move(f_src, self.restart_path)
示例#28
0
文件: um.py 项目: rmholmes/payu
    def archive(self):
        super(UnifiedModel, self).archive()

        # Delete all the stdout log files except the root PE
        # Sorts to ensure root PE is first entry
        files = sorted(glob.glob(os.path.join(self.work_path,
                                              'atm.fort6.pe*')),
                       key=lambda name: int(name.rpartition('.')[-1][2:]))
        if len(files) > 1:
            for f_path in files[1:]:
                os.remove(f_path)

        mkdir_p(self.restart_path)

        # Need to figure out the end date of the model.
        nml_path = os.path.join(self.work_path, 'namelists')
        nml = f90nml.read(nml_path)

        resubmit_inc = nml['NLSTCALL']['RUN_RESUBMIT_INC']
        runtime = um_time_to_time(resubmit_inc)
        # runtime = datetime.timedelta(seconds=runtime)

        basis_time = nml['NLSTCALL']['MODEL_BASIS_TIME']
        init_date = um_date_to_date(basis_time)

        end_date = cal.date_plus_seconds(init_date, runtime, cal.GREGORIAN)

        # Save model time to restart next run
        with open(os.path.join(self.restart_path, self.restart_calendar_file),
                  'w') as restart_file:
            restart_file.write(
                yaml.dump({'end_date': end_date}, default_flow_style=False))

        end_date = date_to_um_dump_date(end_date)

        restart_dump = os.path.join(self.work_path,
                                    'aiihca.da{0}'.format(end_date))
        f_dst = os.path.join(self.restart_path, self.restart)
        if os.path.exists(restart_dump):
            shutil.copy(restart_dump, f_dst)
        else:
            print('payu: error: Model has not produced a restart dump file:\n'
                  '{} does not exist.\n'
                  'Check DUMPFREQim in namelists'.format(restart_dump))
示例#29
0
文件: mitgcm.py 项目: nicjhan/payu
    def archive(self):

        # Remove symbolic links to input or pickup files:
        for f in os.listdir(self.work_path):
            f_path = os.path.join(self.work_path, f)
            if os.path.islink(f_path):
                os.remove(f_path)

        # Move files outside of mnc_* directories
        mnc_paths = [os.path.join(self.work_path, d)
                     for d in os.listdir(self.work_path)
                     if d.startswith('mnc_')]

        for path in mnc_paths:
            for f in os.listdir(path):
                f_path = os.path.join(path, f)
                sh.move(f_path, self.work_path)
            os.rmdir(path)

        mkdir_p(self.restart_path)

        # Move pickups but don't include intermediate pickupts ('ckpt's)
        restart_files = [f for f in os.listdir(self.work_path)
                         if f.startswith('pickup') and
                         not f.split('.')[1].startswith('ckpt')]

        # Tar and compress the output files
        stdout_files = [f for f in os.listdir(self.work_path)
                        if f.startswith('STDOUT.')]

        if stdout_files:
            cmd = 'tar -C {0} -c -j -f {1}'.format(
                self.work_path,
                os.path.join(self.work_path, 'STDOUT.tar.bz2'))

            rc = sp.Popen(shlex.split(cmd) + stdout_files).wait()
            assert rc == 0

        for f in stdout_files:
            os.remove(os.path.join(self.work_path, f))

        for f in restart_files:
            f_src = os.path.join(self.work_path, f)
            sh.move(f_src, self.restart_path)
示例#30
0
文件: um.py 项目: nicjhan/payu
    def archive(self):

        mkdir_p(self.restart_path)

        # Need to figure out the end date of the model.
        nml_path = os.path.join(self.work_path, "namelists")
        nml = f90nml.read(nml_path)

        resubmit_inc = nml["NLSTCALL"]["RUN_RESUBMIT_INC"]
        runtime = um_time_to_time(resubmit_inc)
        runtime = datetime.timedelta(seconds=runtime)

        basis_time = nml["NLSTCALL"]["MODEL_BASIS_TIME"]
        init_date = um_date_to_date(basis_time)

        end_date = date_to_um_dump_date(init_date + runtime)

        restart_dump = os.path.join(self.work_path, "aiihca.da{}".format(end_date))
        f_dst = os.path.join(self.restart_path, self.restart)
        shutil.copy(restart_dump, f_dst)
示例#31
0
    def archive(self):

        mkdir_p(self.restart_path)

        # Need to figure out the end date of the model.
        nml_path = os.path.join(self.work_path, 'namelists')
        nml = f90nml.read(nml_path)

        resubmit_inc = nml['NLSTCALL']['RUN_RESUBMIT_INC']
        runtime = um_time_to_time(resubmit_inc)
        runtime = datetime.timedelta(seconds=runtime)

        basis_time = nml['NLSTCALL']['MODEL_BASIS_TIME']
        init_date = um_date_to_date(basis_time)

        end_date = date_to_um_dump_date(init_date + runtime)

        restart_dump = os.path.join(self.work_path,
                                    'aiihca.da{}'.format(end_date))
        f_dst = os.path.join(self.restart_path, self.restart)
        shutil.copy(restart_dump, f_dst)
示例#32
0
    def __init__(self, config, reproduce):

        # Manifest control configuration
        self.manifest_config = config

        # Not currently supporting specifying hash functions
        self.fast_hashes = self.manifest_config.get('fasthash', fast_hashes)
        self.full_hashes = self.manifest_config.get('fullhash', full_hashes)

        if type(self.fast_hashes) is str:
            self.fast_hashes = [
                self.fast_hashes,
            ]
        if type(self.full_hashes) is str:
            self.full_hashes = [
                self.full_hashes,
            ]

        self.ignore = self.manifest_config.get('ignore', ['.*'])
        if isinstance(self.ignore, str):
            self.ignore = [self.ignore]

        # Initialise manifests and reproduce flags
        self.manifests = {}
        self.have_manifest = {}
        reproduce_config = self.manifest_config.get('reproduce', {})
        self.reproduce = {}
        for mf in ['input', 'restart', 'exe']:
            self.init_mf(mf)
            self.reproduce[mf] = reproduce_config.get(mf, reproduce)

        # Make sure the manifests directory exists
        mkdir_p(os.path.dirname(self.manifests['exe'].path))

        # Set flag to auto-scan input directories
        self.scaninputs = self.manifest_config.get('scaninputs', True)

        if self.reproduce['input'] and self.scaninputs:
            print("scaninputs set to False when reproduce input is True")
            self.scaninputs = False
示例#33
0
文件: manifest.py 项目: rmholmes/payu
    def __init__(self, expt, reproduce):

        # Inherit experiment configuration
        self.expt = expt

        # Manifest control configuration
        self.manifest_config = self.expt.config.get('manifest', {})

        # Not currently supporting specifying hash functions
        # self.hash_functions = manifest_config.get(
        #     'hashfns',
        #     ['nchash','binhash','md5']
        # )

        self.ignore = self.manifest_config.get('ignore', ['.*'])
        if isinstance(self.ignore, str):
            self.ignore = [self.ignore]

        # Initialise manifests
        self.manifests = {}
        for mf in ['input', 'restart', 'exe']:
            self.manifests[mf] = PayuManifest(os.path.join(
                'manifests', '{}.yaml'.format(mf)),
                                              ignore=self.ignore)

        self.have_manifest = {}
        for mf in self.manifests:
            self.have_manifest[mf] = False

        # Set reproduce flags
        self.reproduce_config = self.manifest_config.get('reproduce', {})
        self.reproduce = {}
        for mf in self.manifests.keys():
            self.reproduce[mf] = self.reproduce_config.get(mf, reproduce)

        # Make sure the manifests directory exists
        mkdir_p(os.path.dirname(self.manifests['exe'].path))

        self.scaninputs = self.manifest_config.get('scaninputs', True)
示例#34
0
    def setup(self):

        # Create experiment directory structure
        mkdir_p(self.work_input_path)
        mkdir_p(self.work_restart_path)
        mkdir_p(self.work_output_path)

        # Copy configuration files from control path
        for f_name in self.config_files:
            f_path = os.path.join(self.control_path, f_name)
            shutil.copy(f_path, self.work_path)

        for f_name in self.optional_config_files:
            f_path = os.path.join(self.control_path, f_name)
            try:
                shutil.copy(f_path, self.work_path)
            except IOError as exc:
                if exc.errno == errno.ENOENT:
                    pass
                else:
                    raise

        # Add restart files from prior run to restart manifest
        if not self.expt.manifest.have_manifest[
                'restart'] and self.prior_restart_path:
            restart_files = self.get_prior_restart_files()
            for f_name in restart_files:
                f_orig = os.path.join(self.prior_restart_path, f_name)
                f_link = os.path.join(self.work_init_path_local, f_name)
                self.expt.manifest.add_filepath('restart', f_link, f_orig,
                                                self.copy_inputs)

        # Add input files to manifest if we don't already have a populated
        # input manifest, or we specify scan_inputs is True (default)
        if not self.expt.manifest.have_manifest[
                'input'] or self.expt.manifest.scaninputs:
            # Add files to manifest
            for input_path in self.input_paths:
                input_files = os.listdir(input_path)
                for f_name in input_files:
                    f_orig = os.path.join(input_path, f_name)
                    f_link = os.path.join(self.work_input_path_local, f_name)
                    # Do not use input file if it is in RESTART
                    if not os.path.exists(f_link):
                        self.expt.manifest.add_filepath(
                            'input', f_link, f_orig, self.copy_inputs)

        # Make symlink to executable in work directory
        if self.exec_path:
            # Add to exe manifest (this is always done so any change in exe path will be picked up)
            self.expt.manifest.add_filepath('exe', self.exec_path_local,
                                            self.exec_path)

        timestep = self.config.get('timestep')
        if timestep:
            self.set_timestep(timestep)
示例#35
0
    def setup(self):

        # Create experiment directory structure
        mkdir_p(self.work_input_path)
        mkdir_p(self.work_restart_path)
        mkdir_p(self.work_output_path)

        # Copy configuration files from control path
        for f_name in self.config_files:
            f_path = os.path.join(self.control_path, f_name)
            shutil.copy(f_path, self.work_path)

        for f_name in self.optional_config_files:
            f_path = os.path.join(self.control_path, f_name)
            try:
                shutil.copy(f_path, self.work_path)
            except IOError as exc:
                if exc.errno == errno.ENOENT:
                    pass
                else:
                    raise

        # Link restart files from prior run
        if self.prior_restart_path and not self.expt.repeat_run:
            restart_files = self.get_prior_restart_files()
            for f_name in restart_files:
                f_restart = os.path.join(self.prior_restart_path, f_name)
                f_input = os.path.join(self.work_init_path, f_name)
                if self.copy_restarts:
                    shutil.copy(f_restart, f_input)
                else:
                    make_symlink(f_restart, f_input)

        # Link input data
        for input_path in self.input_paths:
            input_files = os.listdir(input_path)
            for f_name in input_files:
                f_input = os.path.join(input_path, f_name)
                f_work_input = os.path.join(self.work_input_path, f_name)
                # Do not use input file if it is in RESTART
                if not os.path.exists(f_work_input):
                    if self.copy_inputs:
                        shutil.copy(f_input, f_work_input)
                    else:
                        make_symlink(f_input, f_work_input)

                    # Some models overwrite their own input for restarts
                    # (e.g. OASIS)
                    if self.make_inputs_writeable:
                        os.chmod(f_work_input,
                                 stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP)

        t_step = self.config.get('timestep')
        if t_step:
            self.set_timestep(t_step)
示例#36
0
    def setup(self):

        # Create experiment directory structure
        mkdir_p(self.work_input_path)
        mkdir_p(self.work_restart_path)
        mkdir_p(self.work_output_path)

        # Copy configuration files from control path
        for f_name in self.config_files:
            f_path = os.path.join(self.control_path, f_name)
            shutil.copy(f_path, self.work_path)

        for f_name in self.optional_config_files:
            f_path = os.path.join(self.control_path, f_name)
            try:
                shutil.copy(f_path, self.work_path)
            except IOError as exc:
                if exc.errno == errno.ENOENT:
                    pass
                else:
                    raise

        # Add restart files from prior run to restart manifest
        if not self.expt.manifest.have_manifest['restart'] and self.prior_restart_path:
            restart_files = self.get_prior_restart_files()
            for f_name in restart_files:
                f_orig = os.path.join(self.prior_restart_path, f_name)
                f_link = os.path.join(self.work_init_path_local, f_name)
                self.expt.manifest.add_filepath('restart',f_link,f_orig,self.copy_inputs)

        # Add input files to manifest if we don't already have a populated
        # input manifest, or we specify scan_inputs is True (default)
        if not self.expt.manifest.have_manifest['input'] or self.expt.manifest.scaninputs:
            # Add files to manifest
            for input_path in self.input_paths:
                input_files = os.listdir(input_path)
                for f_name in input_files:
                    f_orig = os.path.join(input_path, f_name)
                    f_link = os.path.join(self.work_input_path_local,f_name)
                    # Do not use input file if it is in RESTART
                    if not os.path.exists(f_link):
                        self.expt.manifest.add_filepath('input',f_link,f_orig,self.copy_inputs)

        # Make symlink to executable in work directory
        if self.exec_path: 
            # Add to exe manifest (this is always done so any change in exe path will be picked up)
            self.expt.manifest.add_filepath('exe',self.exec_path_local,self.exec_path)

        timestep = self.config.get('timestep')
        if timestep:
            self.set_timestep(timestep)
示例#37
0
    def setup(self):

        # Create experiment directory structure
        mkdir_p(self.work_input_path)
        mkdir_p(self.work_restart_path)
        mkdir_p(self.work_output_path)

        # Copy configuration files from control path
        for f_name in self.config_files:
            f_path = os.path.join(self.control_path, f_name)
            shutil.copy(f_path, self.work_path)

        for f_name in self.optional_config_files:
            f_path = os.path.join(self.control_path, f_name)
            try:
                shutil.copy(f_path, self.work_path)
            except IOError as exc:
                if exc.errno == errno.ENOENT:
                    pass
                else:
                    raise

        # Link restart files from prior run
        if self.prior_restart_path and not self.expt.repeat_run:
            restart_files = self.get_prior_restart_files()
            for f_name in restart_files:
                f_restart = os.path.join(self.prior_restart_path, f_name)
                f_input = os.path.join(self.work_init_path, f_name)
                if self.copy_restarts:
                    shutil.copy(f_restart, f_input)
                else:
                    make_symlink(f_restart, f_input)

        # Link input data
        for input_path in self.input_paths:
            input_files = os.listdir(input_path)
            for f_name in input_files:
                f_input = os.path.join(input_path, f_name)
                f_work_input = os.path.join(self.work_input_path, f_name)
                # Do not use input file if it is in RESTART
                if not os.path.exists(f_work_input):
                    if self.copy_inputs:
                        shutil.copy(f_input, f_work_input)
                    else:
                        make_symlink(f_input, f_work_input)

        timestep = self.config.get('timestep')
        if timestep:
            self.set_timestep(timestep)
示例#38
0
    def archive(self):

        # Create an empty restart directory
        mkdir_p(self.restart_path)

        shutil.rmtree(self.work_input_path)
示例#39
0
    def setup(self):

        print("Setting up {model}".format(model=self.name))
        # Create experiment directory structure
        mkdir_p(self.work_init_path)
        mkdir_p(self.work_input_path)
        mkdir_p(self.work_restart_path)
        mkdir_p(self.work_output_path)

        # Copy configuration files from control path
        for f_name in self.config_files:
            f_path = os.path.join(self.control_path, f_name)
            shutil.copy(f_path, self.work_path)

        for f_name in self.optional_config_files:
            f_path = os.path.join(self.control_path, f_name)
            try:
                shutil.copy(f_path, self.work_path)
            except IOError as exc:
                if exc.errno == errno.ENOENT:
                    pass
                else:
                    raise

        # Add restart files from prior run to restart manifest
        if (not self.expt.manifest.have_manifest['restart']
                and self.prior_restart_path):
            restart_files = self.get_prior_restart_files()
            for f_name in restart_files:
                f_orig = os.path.join(self.prior_restart_path, f_name)
                f_link = os.path.join(self.work_init_path_local, f_name)
                self.expt.manifest.add_filepath('restart', f_link, f_orig,
                                                self.copy_restarts)

        # Add input files to manifest if we don't already have a populated
        # input manifest, or we specify scaninputs is True (default)
        if (not self.expt.manifest.have_manifest['input']
                or self.expt.manifest.scaninputs):
            for input_path in self.input_paths:
                if os.path.isfile(input_path):
                    # Build a mock walk iterator for a single file
                    fwalk = iter([(os.path.dirname(input_path), [],
                                   [os.path.basename(input_path)])])
                    # Overwrite the input_path as a directory
                    input_path = os.path.dirname(input_path)
                else:
                    fwalk = os.walk(input_path)

                for path, dirs, files in fwalk:
                    workrelpath = os.path.relpath(path, input_path)
                    subdir = os.path.normpath(
                        os.path.join(self.work_input_path_local, workrelpath))

                    if not os.path.exists(subdir):
                        os.mkdir(subdir)

                    for f_name in files:
                        f_orig = os.path.join(path, f_name)
                        f_link = os.path.join(self.work_input_path_local,
                                              workrelpath, f_name)
                        # Do not use input file if it is in RESTART
                        if not os.path.exists(f_link):
                            self.expt.manifest.add_filepath(
                                'input', f_link, f_orig, self.copy_inputs)

        # Make symlink to executable in work directory
        if self.exec_path:
            # Add to exe manifest (this is always done so any change in exe
            # path will be picked up)
            self.expt.manifest.add_filepath('exe', self.exec_path_local,
                                            self.exec_path)

        timestep = self.config.get('timestep')
        if timestep:
            self.set_timestep(timestep)
示例#40
0
    def setup(self):
        super(Yatm, self).setup()

        # Make log dir
        mkdir_p(os.path.join(self.work_path, 'log'))
示例#41
0
文件: matm.py 项目: strat123123/payu
    def archive(self):

        # Create an empty restart directory
        mkdir_p(self.restart_path)
示例#42
0
    def setup(self):

        print("Setting up {model}".format(model=self.name))
        # Create experiment directory structure
        mkdir_p(self.work_init_path)
        mkdir_p(self.work_input_path)
        mkdir_p(self.work_restart_path)
        mkdir_p(self.work_output_path)

        # Copy configuration files from control path
        for f_name in self.config_files:
            f_path = os.path.join(self.control_path, f_name)
            shutil.copy(f_path, self.work_path)

        for f_name in self.optional_config_files:
            f_path = os.path.join(self.control_path, f_name)
            try:
                shutil.copy(f_path, self.work_path)
            except IOError as exc:
                if exc.errno == errno.ENOENT:
                    pass
                else:
                    raise

        # Add restart files from prior run to restart manifest
        if (not self.expt.manifest.have_manifest['restart'] and
                self.prior_restart_path):
            restart_files = self.get_prior_restart_files()
            for f_name in restart_files:
                f_orig = os.path.join(self.prior_restart_path, f_name)
                f_link = os.path.join(self.work_init_path_local, f_name)
                self.expt.manifest.add_filepath(
                    'restart',
                    f_link,
                    f_orig,
                    self.copy_restarts
                )

        # Add input files to manifest if we don't already have a populated
        # input manifest, or we specify scan_inputs is True (default)
        if (not self.expt.manifest.have_manifest['input'] or
                self.expt.manifest.scaninputs):
            for input_path in self.input_paths:
                if os.path.isfile(input_path):
                    # Build a mock walk iterator for a single file
                    fwalk = iter([(
                        os.path.dirname(input_path),
                        [],
                        [os.path.basename(input_path)]
                    )])
                    # Overwrite the input_path as a directory
                    input_path = os.path.dirname(input_path)
                else:
                    fwalk = os.walk(input_path)

                for path, dirs, files in fwalk:
                    workrelpath = os.path.relpath(path, input_path)
                    subdir = os.path.normpath(
                        os.path.join(self.work_input_path_local,
                                     workrelpath)
                    )

                    if not os.path.exists(subdir):
                        os.mkdir(subdir)

                    for f_name in files:
                        f_orig = os.path.join(path, f_name)
                        f_link = os.path.join(
                            self.work_input_path_local,
                            workrelpath,
                            f_name
                        )
                        # Do not use input file if it is in RESTART
                        if not os.path.exists(f_link):
                            self.expt.manifest.add_filepath(
                                'input',
                                f_link,
                                f_orig,
                                self.copy_inputs
                            )

        # Make symlink to executable in work directory
        if self.exec_path:
            # Add to exe manifest (this is always done so any change in exe
            # path will be picked up)
            self.expt.manifest.add_filepath(
                'exe',
                self.exec_path_local,
                self.exec_path
            )

        timestep = self.config.get('timestep')
        if timestep:
            self.set_timestep(timestep)
示例#43
0
    def archive(self):

        # Create an empty restart directory
        mkdir_p(self.restart_path)

        shutil.rmtree(self.work_input_path)
示例#44
0
    def run(self, *user_flags):

        # XXX: This was previously done in reversion
        envmod.setup()

        self.load_modules()

        f_out = open(self.stdout_fname, 'w')
        f_err = open(self.stderr_fname, 'w')

        # Set MPI environment variables
        env = self.config.get('env')

        # Explicitly check for `None`, in case of an empty `env:` entry
        if env is None:
            env = {}

        for var in env:

            if env[var] is None:
                env_value = ''
            else:
                env_value = str(env[var])

            os.environ[var] = env_value

        mpi_config = self.config.get('mpi', {})
        mpi_runcmd = mpi_config.get('runcmd', 'mpirun')

        if self.config.get('scalasca', False):
            mpi_runcmd = ' '.join(['scalasca -analyze', mpi_runcmd])

        # MPI runtime flags
        mpi_flags = mpi_config.get('flags', [])
        if not mpi_flags:
            mpi_flags = self.config.get('mpirun', [])
            # TODO: Legacy config removal warning

        if type(mpi_flags) != list:
            mpi_flags = [mpi_flags]

        # TODO: More uniform support needed here
        if self.config.get('scalasca', False):
            mpi_flags = ['\"{0}\"'.format(f) for f in mpi_flags]

        # XXX: I think this may be broken
        if user_flags:
            mpi_flags.extend(list(user_flags))

        if self.debug:
            mpi_flags.append('--debug')

        mpi_progs = []
        for model in self.models:

            # Skip models without executables (e.g. couplers)
            if not model.exec_path_local:
                continue

            mpi_config = self.config.get('mpi', {})
            mpi_module = mpi_config.get('module', None)

            # Update MPI library module (if not explicitly set)
            # TODO: Check for MPI library mismatch across multiple binaries
            if mpi_module is None:
                mpi_module = envmod.lib_update(
                    model.exec_path_local,
                    'libmpi.so'
                )

            model_prog = []

            # Our MPICH wrapper does not support a working directory flag
            if not mpi_module.startswith('mvapich'):
                model_prog.append('-wdir {0}'.format(model.work_path))

            # Append any model-specific MPI flags
            model_flags = model.config.get('mpiflags', [])
            if not isinstance(model_flags, list):
                model_prog.append(model_flags)
            else:
                model_prog.extend(model_flags)

            model_ncpus = model.config.get('ncpus')
            if model_ncpus:
                model_prog.append('-np {0}'.format(model_ncpus))

            model_npernode = model.config.get('npernode')
            # TODO: New Open MPI format?
            if model_npernode:
                if model_npernode % 2 == 0:
                    npernode_flag = ('-map-by ppr:{0}:socket'
                                     ''.format(model_npernode / 2))
                else:
                    npernode_flag = ('-map-by ppr:{0}:node'
                                     ''.format(model_npernode))

                if self.config.get('scalasca', False):
                    npernode_flag = '\"{0}\"'.format(npernode_flag)
                model_prog.append(npernode_flag)

            if self.config.get('hpctoolkit', False):
                os.environ['HPCRUN_EVENT_LIST'] = 'WALLCLOCK@5000'
                model_prog.append('hpcrun')

            for prof in self.profilers:
                if prof.runscript:
                    model_prog = model_prog.append(prof.runscript)

            model_prog.append(model.exec_prefix)

            # Use the full path to symlinked exec_name in work as some
            # older MPI libraries complained executable was not in PATH
            model_prog.append(os.path.join(model.work_path, model.exec_name))

            mpi_progs.append(' '.join(model_prog))

        cmd = '{runcmd} {flags} {exes}'.format(
            runcmd=mpi_runcmd,
            flags=' '.join(mpi_flags),
            exes=' : '.join(mpi_progs)
        )

        for prof in self.profilers:
            cmd = prof.wrapper(cmd)

        # Expand shell variables inside flags
        if self.expand_shell_vars:
            cmd = os.path.expandvars(cmd)

        # TODO: Consider making this default
        if self.config.get('coredump', False):
            enable_core_dump()

        # Our MVAPICH wrapper does not support working directories
        if mpi_module.startswith('mvapich'):
            curdir = os.getcwd()
            os.chdir(self.work_path)
        else:
            curdir = None

        # Dump out environment
        with open(self.env_fname, 'w') as file:
            file.write(yaml.dump(dict(os.environ), default_flow_style=False))

        self.runlog.create_manifest()
        if self.runlog.enabled:
            self.runlog.commit()

        # NOTE: This may not be necessary, since env seems to be getting
        # correctly updated.  Need to look into this.
        print(cmd)
        if env:
            # TODO: Replace with mpirun -x flag inputs
            proc = sp.Popen(shlex.split(cmd), stdout=f_out, stderr=f_err,
                            env=os.environ.copy())
            proc.wait()
            rc = proc.returncode
        else:
            rc = sp.call(shlex.split(cmd), stdout=f_out, stderr=f_err)

        # Return to control directory
        if curdir:
            os.chdir(curdir)

        f_out.close()
        f_err.close()

        self.finish_time = datetime.datetime.now()

        info = get_job_info()

        if info is None:
            # Not being run under PBS, reverse engineer environment
            info = {
                'PAYU_PATH': os.path.dirname(self.payu_path)
            }

        # Add extra information to save to jobinfo
        info.update(
            {
                'PAYU_CONTROL_DIR': self.control_path,
                'PAYU_RUN_ID': self.run_id,
                'PAYU_CURRENT_RUN': self.counter,
                'PAYU_N_RUNS':  self.n_runs,
                'PAYU_JOB_STATUS': rc,
                'PAYU_START_TIME': self.start_time.isoformat(),
                'PAYU_FINISH_TIME': self.finish_time.isoformat(),
                'PAYU_WALLTIME': "{0} s".format(
                    (self.finish_time - self.start_time).total_seconds()
                ),
            }
        )

        # Dump job info
        with open(self.job_fname, 'w') as file:
            file.write(yaml.dump(info, default_flow_style=False))

        # Remove any empty output files (e.g. logs)
        for fname in os.listdir(self.work_path):
            fpath = os.path.join(self.work_path, fname)
            if os.path.getsize(fpath) == 0:
                os.remove(fpath)

        # Clean up any profiling output
        # TODO: Move after `rc` code check?
        for prof in self.profilers:
            prof.postprocess()

        # TODO: Need a model-specific cleanup method call here
        # NOTE: This does not appear to catch hanging jobs killed by PBS
        if rc != 0:
            # Backup logs for failed runs
            error_log_dir = os.path.join(self.archive_path, 'error_logs')
            mkdir_p(error_log_dir)

            # NOTE: This is PBS-specific
            job_id = get_job_id(short=False)

            if job_id == '':
                job_id = self.run_id[:6]

            for fname in self.output_fnames:

                src = os.path.join(self.control_path, fname)

                stem, suffix = os.path.splitext(fname)
                dest = os.path.join(error_log_dir,
                                    ".".join((stem, job_id)) + suffix)

                print(src, dest)

                shutil.copyfile(src, dest)

            # Create the symlink to the logs if it does not exist
            make_symlink(self.archive_path, self.archive_sym_path)

            # Terminate payu
            sys.exit('payu: Model exited with error code {0}; aborting.'
                     ''.format(rc))

        # Decrement run counter on successful run
        stop_file_path = os.path.join(self.control_path, 'stop_run')
        if os.path.isfile(stop_file_path):
            assert os.stat(stop_file_path).st_size == 0
            os.remove(stop_file_path)
            print('payu: Stop file detected; terminating resubmission.')
            self.n_runs = 0
        else:
            self.n_runs -= 1

        # Move logs to archive (or delete if empty)
        for f in self.output_fnames:
            f_path = os.path.join(self.control_path, f)
            if os.path.getsize(f_path) == 0:
                os.remove(f_path)
            else:
                shutil.move(f_path, self.work_path)

        run_script = self.userscripts.get('run')
        if run_script:
            self.run_userscript(run_script)
示例#45
0
文件: mom.py 项目: marshallward/payu
    def setup(self):
        # FMS initialisation
        super(Mom, self).setup()

        if not self.top_level_model:
            # Make log dir
            mkdir_p(os.path.join(self.work_path, 'log'))

        input_nml_path = os.path.join(self.work_path, 'input.nml')
        input_nml = f90nml.read(input_nml_path)

        use_core2iaf = self.config.get('core2iaf')
        if use_core2iaf:
            self.core2iaf_setup()

        # Set the runtime
        if self.expt.runtime:
            ocean_solo_nml = input_nml['ocean_solo_nml']

            ocean_solo_nml['years'] = self.expt.runtime['years']
            ocean_solo_nml['months'] = self.expt.runtime['months']
            ocean_solo_nml['days'] = self.expt.runtime['days']
            ocean_solo_nml['seconds'] = self.expt.runtime.get('seconds', 0)

            input_nml.write(input_nml_path, force=True)

        # Construct the land CPU mask
        if self.expt.config.get('mask_table', False):
            # NOTE: This function actually creates a mask table using the
            #       `check_mask` command line tool.  But it is not very usable
            #       since you need to know the number of masked CPUs to submit
            #       the job.  It needs a rethink of the submission process.
            self.create_mask_table(input_nml)

        # NOTE: Don't expect this to be here forever...
        # Attempt to set a mask table from the input
        if self.config.get('mask', False):
            mask_path = os.path.join(self.work_input_path, 'ocean_mask_table')

            # Remove any existing mask
            # (If no reference mask is available, then we will not use one)
            if os.path.isfile(mask_path):
                os.remove(mask_path)

            # Reference mask table
            assert('layout' in input_nml['ocean_model_nml'])
            nx, ny = input_nml['ocean_model_nml'].get('layout')
            n_masked_cpus = nx * ny - self.config.get('ncpus')

            mask_table_fname = 'mask_table.{nmask}.{nx}x{ny}'.format(
                nmask=n_masked_cpus,
                nx=nx,
                ny=ny
            )

            ref_mask_path = os.path.join(self.work_input_path,
                                         mask_table_fname)

            # Set (or replace) mask table if reference is available
            if os.path.isfile(ref_mask_path):
                make_symlink(ref_mask_path, mask_path)
示例#46
0
    def run(self, *user_flags):

        self.load_modules()

        f_out = open(self.stdout_fname, 'w')
        f_err = open(self.stderr_fname, 'w')

        # Set MPI environment variables
        env = self.config.get('env')

        # Explicitly check for `None`, in case of an empty `env:` entry
        if env is None:
            env = {}

        for var in env:

            if env[var] is None:
                env_value = ''
            else:
                env_value = str(env[var])

            os.environ[var] = env_value

        mpi_config = self.config.get('mpi', {})
        mpi_runcmd = mpi_config.get('runcmd', 'mpirun')

        if self.config.get('scalasca', False):
            mpi_runcmd = ' '.join(['scalasca -analyze', mpi_runcmd])

        # MPI runtime flags
        mpi_flags = mpi_config.get('flags', [])
        if not mpi_flags:
            mpi_flags = self.config.get('mpirun', [])
            # TODO: Legacy config removal warning

        if type(mpi_flags) != list:
            mpi_flags = [mpi_flags]

        # TODO: More uniform support needed here
        if self.config.get('scalasca', False):
            mpi_flags = ['\"{}\"'.format(f) for f in mpi_flags]

        # XXX: I think this may be broken
        if user_flags:
            mpi_flags.extend(list(user_flags))

        if self.debug:
            mpi_flags.append('--debug')

        mpi_progs = []
        for model in self.models:

            # Skip models without executables (e.g. couplers)
            if not model.exec_path:
                continue

            mpi_config = self.config.get('mpi', {})
            mpi_module = mpi_config.get('module', None)

            # Update MPI library module (if not explicitly set)
            # TODO: Check for MPI library mismatch across multiple binaries
            if mpi_module is None:
                mpi_module = envmod.lib_update(model.exec_path, 'libmpi.so')

            model_prog = []

            # Our MPICH wrapper does not support a working directory flag
            if not mpi_module.startswith('mvapich'):
                model_prog.append('-wdir {}'.format(model.work_path))

            # Append any model-specific MPI flags
            model_flags = model.config.get('mpiflags', [])
            if not isinstance(model_flags, list):
                model_prog.append(model_flags)
            else:
                model_prog.extend(model_flags)

            model_ncpus = model.config.get('ncpus')
            if model_ncpus:
                model_prog.append('-np {}'.format(model_ncpus))

            model_npernode = model.config.get('npernode')
            # TODO: New Open MPI format?
            if model_npernode:
                if model_npernode % 2 == 0:
                    npernode_flag = ('-map-by ppr:{}:socket'
                                     ''.format(model_npernode / 2))
                else:
                    npernode_flag = ('-map-by ppr:{}:node'
                                     ''.format(model_npernode))

                if self.config.get('scalasca', False):
                    npernode_flag = '\"{}\"'.format(npernode_flag)
                model_prog.append(npernode_flag)

            if self.config.get('hpctoolkit', False):
                os.environ['HPCRUN_EVENT_LIST'] = 'WALLCLOCK@5000'
                model_prog.append('hpcrun')

            for prof in self.profilers:
                if prof.runscript:
                    model_prog = model_prog.append(prof.runscript)

            model_prog.append(model.exec_prefix)
            model_prog.append(model.exec_path)

            mpi_progs.append(' '.join(model_prog))

        cmd = '{} {} {}'.format(mpi_runcmd, ' '.join(mpi_flags),
                                ' : '.join(mpi_progs))

        for prof in self.profilers:
            cmd = prof.wrapper(cmd)

        # Expand shell variables inside flags
        if self.expand_shell_vars:
            cmd = os.path.expandvars(cmd)

        print(cmd)

        # Our MVAPICH wrapper does not support working directories
        if mpi_module.startswith('mvapich'):
            curdir = os.getcwd()
            os.chdir(self.work_path)
        else:
            curdir = None

        # NOTE: This may not be necessary, since env seems to be getting
        # correctly updated.  Need to look into this.
        if env:
            # TODO: Replace with mpirun -x flag inputs
            proc = sp.Popen(shlex.split(cmd),
                            stdout=f_out,
                            stderr=f_err,
                            env=os.environ.copy())
            proc.wait()
            rc = proc.returncode
        else:
            rc = sp.call(shlex.split(cmd), stdout=f_out, stderr=f_err)

        # Return to control directory
        if curdir:
            os.chdir(curdir)

        if self.runlog:
            self.runlog.commit()

        f_out.close()
        f_err.close()

        # Remove any empty output files (e.g. logs)
        for fname in os.listdir(self.work_path):
            fpath = os.path.join(self.work_path, fname)
            if os.path.getsize(fpath) == 0:
                os.remove(fpath)

        # Clean up any profiling output
        # TODO: Move after `rc` code check?
        for prof in self.profilers:
            prof.postprocess()

        # TODO: Need a model-specific cleanup method call here
        # NOTE: This does not appear to catch hanging jobs killed by PBS
        if rc != 0:
            # Backup logs for failed runs
            error_log_dir = os.path.join(self.archive_path, 'error_logs')
            mkdir_p(error_log_dir)

            # NOTE: This is PBS-specific
            job_id = os.environ.get('PBS_JOBID', '')

            for fname in (self.stdout_fname, self.stderr_fname):
                src = os.path.join(self.control_path, fname)

                # NOTE: This assumes standard .out/.err extensions
                dest = os.path.join(error_log_dir,
                                    fname[:-4] + '.' + job_id + fname[-4:])
                print(src, dest)

                shutil.copyfile(src, dest)

            # Create the symlink to the logs if it does not exist
            make_symlink(self.archive_path, self.archive_sym_path)

            # Terminate payu
            sys.exit('payu: Model exited with error code {}; aborting.'
                     ''.format(rc))

        # Decrement run counter on successful run
        stop_file_path = os.path.join(self.control_path, 'stop_run')
        if os.path.isfile(stop_file_path):
            assert os.stat(stop_file_path).st_size == 0
            os.remove(stop_file_path)
            print('payu: Stop file detected; terminating resubmission.')
            self.n_runs = 0
        else:
            self.n_runs -= 1

        # Move logs to archive (or delete if empty)
        for f in (self.stdout_fname, self.stderr_fname):
            f_path = os.path.join(self.control_path, f)
            if os.path.getsize(f_path) == 0:
                os.remove(f_path)
            else:
                shutil.move(f_path, self.work_path)

        run_script = self.userscripts.get('run')
        if run_script:
            self.run_userscript(run_script)
示例#47
0
    def archive(self):
        if not self.config.get('archive', True):
            print('payu: not archiving due to config.yaml setting.')
            return

        # Check there is a work directory, otherwise bail
        if not os.path.exists(self.work_sym_path):
            sys.exit('payu: error: No work directory to archive.')

        mkdir_p(self.archive_path)
        make_symlink(self.archive_path, self.archive_sym_path)

        # Remove work symlink
        if os.path.islink(self.work_sym_path):
            os.remove(self.work_sym_path)

        mkdir_p(self.restart_path)

        for model in self.models:
            model.archive()

        # Postprocess the model suite
        if len(self.models) > 1:
            self.model.archive()

        # Double-check that the run path does not exist
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        cmd = 'mv {work} {output}'.format(
            work=self.work_path,
            output=self.output_path
        )
        sp.check_call(shlex.split(cmd))

        # Remove old restart files
        # TODO: Move to subroutine
        restart_freq = self.config.get('restart_freq', default_restart_freq)
        restart_history = self.config.get('restart_history',
                                          default_restart_history)

        # Remove any outdated restart files
        prior_restart_dirs = [d for d in os.listdir(self.archive_path)
                              if d.startswith('restart')]

        for res_dir in prior_restart_dirs:

            res_idx = int(res_dir.lstrip('restart'))
            if (self.repeat_run or
                    (not res_idx % restart_freq == 0 and
                     res_idx <= (self.counter - restart_history))):

                res_path = os.path.join(self.archive_path, res_dir)

                # Only delete real directories; ignore symbolic restart links
                if os.path.isdir(res_path):
                    shutil.rmtree(res_path)

        collate_config = self.config.get('collate', {})
        if collate_config.get('enable', True):
            cmd = '{python} {payu} collate -i {expt}'.format(
                python=sys.executable,
                payu=self.payu_path,
                expt=self.counter
            )
            sp.check_call(shlex.split(cmd))

        if self.config.get('hpctoolkit', False):
            cmd = '{python} {payu} profile -i {expt}'.format(
                python=sys.executable,
                payu=self.payu_path,
                expt=self.counter
            )
            sp.check_call(shlex.split(cmd))

        archive_script = self.userscripts.get('archive')
        if archive_script:
            self.run_userscript(archive_script)
示例#48
0
    def github_setup(self):
        """Set up authentication keys and API tokens."""
        github_auth = self.authenticate()
        github_username = github_auth[0]

        expt_name = self.config.get('name', self.expt.name)
        expt_description = self.expt.config.get('description')
        if not expt_description:
            expt_description = input('Briefly describe the experiment: ')
            assert isinstance(expt_description, str)
        expt_private = self.config.get('private', False)

        # 1. Create the organisation if needed
        github_api_url = 'https://api.github.com'
        org_name = self.config.get('organization')
        if org_name:
            repo_target = org_name

            # Check if org exists
            org_query_url = os.path.join(github_api_url, 'orgs', org_name)
            org_req = requests.get(org_query_url)

            if org_req.status_code == 404:
                # NOTE: Orgs cannot be created via the API
                print('payu: github organization {org} does not exist.'
                      ''.format(org=org_name))
                print('      You must first create this on the website.')

            elif org_req.status_code == 200:
                # TODO: Confirm that the user can interact with the repo
                pass

            else:
                # TODO: Exit with grace
                print('payu: abort!')
                sys.exit(-1)

            repo_query_url = os.path.join(github_api_url, 'orgs', org_name,
                                          'repos')
            repo_api_url = os.path.join(github_api_url, 'repos', org_name,
                                        expt_name)
        else:
            repo_target = github_username

            # Create repo in user account
            repo_query_url = os.path.join(github_api_url, 'user', 'repos')
            repo_api_url = os.path.join(github_api_url, 'repos',
                                        github_username, expt_name)

        # 2. Create the remote repository
        user_repos = []
        page = 1
        while True:
            repo_params = {'page': page, 'per_page': 100}
            repo_query = requests.get(repo_query_url, auth=github_auth,
                                      params=repo_params)
            assert repo_query.status_code == 200
            if repo_query.json():
                user_repos.extend(list(r['name'] for r in repo_query.json()))
                page += 1
            else:
                break

        if expt_name not in user_repos:
            repo_config = {
                'name': expt_name,
                'description': expt_description,
                'private': expt_private,
                'has_issues': True,
                'has_downloads': True,
                'has_wiki': False
            }

            repo_gen = requests.post(repo_query_url, json.dumps(repo_config),
                                     auth=github_auth)

            assert repo_gen.status_code == 201

        # 3. Check if remote is set
        git_remote_out = sp.check_output(shlex.split('git remote -v'),
                                         cwd=self.expt.control_path)

        git_remotes = dict([(r.split()[0], r.split()[1])
                            for r in git_remote_out.split('\n') if r])

        remote_name = self.config.get('remote', 'payu')
        remote_url = os.path.join('ssh://[email protected]', repo_target,
                                  self.expt.name + '.git')

        if remote_name not in git_remotes:
            cmd = ('git remote add {name} {url}'
                   ''.format(name=remote_name, url=remote_url))
            sp.check_call(shlex.split(cmd), cwd=self.expt.control_path)
        elif git_remotes[remote_name] != remote_url:
            print('payu: error: Existing remote URL does not match '
                  'the proposed URL.')
            print('payu: error: To delete the old remote, type '
                  '`git remote rm {name}`.'.format(name=remote_name))
            sys.exit(-1)

        # 4. Generate a payu-specific SSH key
        default_ssh_key = 'id_rsa_payu_' + expt_name
        ssh_key = self.config.get('sshid', default_ssh_key)
        ssh_dir = os.path.join(os.path.expanduser('~'), '.ssh', 'payu')
        mkdir_p(ssh_dir)

        ssh_keypath = os.path.join(ssh_dir, ssh_key)
        if not os.path.isfile(ssh_keypath):
            cmd = 'ssh-keygen -t rsa -f {key} -q -P ""'.format(key=ssh_key)
            sp.check_call(shlex.split(cmd), cwd=ssh_dir)

        # 5. Deploy key to repo
        with open(ssh_keypath + '.pub') as keyfile:
            pubkey = ' '.join(keyfile.read().split()[:-1])

        # TODO: Get this from github?
        repo_keys_url = os.path.join(repo_api_url, 'keys')
        keys_req = requests.get(repo_keys_url, auth=github_auth)
        assert keys_req.status_code == 200

        if not any(k['key'] == pubkey for k in keys_req.json()):
            add_key_param = {'title': 'payu', 'key': pubkey}
            add_key_req = requests.post(repo_keys_url, auth=github_auth,
                                        json=add_key_param)
            assert add_key_req.status_code == 201
示例#49
0
    def run(self, *user_flags):

        # XXX: This was previously done in reversion
        envmod.setup()

        self.load_modules()

        f_out = open(self.stdout_fname, 'w')
        f_err = open(self.stderr_fname, 'w')

        # Set MPI environment variables
        env = self.config.get('env')

        # Explicitly check for `None`, in case of an empty `env:` entry
        if env is None:
            env = {}

        for var in env:

            if env[var] is None:
                env_value = ''
            else:
                env_value = str(env[var])

            os.environ[var] = env_value

        mpi_config = self.config.get('mpi', {})
        mpi_runcmd = mpi_config.get('runcmd', 'mpirun')

        if self.config.get('scalasca', False):
            mpi_runcmd = ' '.join(['scalasca -analyze', mpi_runcmd])

        # MPI runtime flags
        mpi_flags = mpi_config.get('flags', [])
        if not mpi_flags:
            mpi_flags = self.config.get('mpirun', [])
            # TODO: Legacy config removal warning

        if type(mpi_flags) != list:
            mpi_flags = [mpi_flags]

        # TODO: More uniform support needed here
        if self.config.get('scalasca', False):
            mpi_flags = ['\"{0}\"'.format(f) for f in mpi_flags]

        # XXX: I think this may be broken
        if user_flags:
            mpi_flags.extend(list(user_flags))

        if self.debug:
            mpi_flags.append('--debug')

        mpi_progs = []
        for model in self.models:

            # Skip models without executables (e.g. couplers)
            if not model.exec_path:
                continue

            mpi_config = self.config.get('mpi', {})
            mpi_module = mpi_config.get('module', None)

            # Update MPI library module (if not explicitly set)
            # TODO: Check for MPI library mismatch across multiple binaries
            if mpi_module is None:
                mpi_module = envmod.lib_update(model.exec_path, 'libmpi.so')

            model_prog = []

            # Our MPICH wrapper does not support a working directory flag
            if not mpi_module.startswith('mvapich'):
                model_prog.append('-wdir {0}'.format(model.work_path))

            # Append any model-specific MPI flags
            model_flags = model.config.get('mpiflags', [])
            if not isinstance(model_flags, list):
                model_prog.append(model_flags)
            else:
                model_prog.extend(model_flags)

            model_ncpus = model.config.get('ncpus')
            if model_ncpus:
                model_prog.append('-np {0}'.format(model_ncpus))

            model_npernode = model.config.get('npernode')
            # TODO: New Open MPI format?
            if model_npernode:
                if model_npernode % 2 == 0:
                    npernode_flag = ('-map-by ppr:{0}:socket'
                                     ''.format(model_npernode / 2))
                else:
                    npernode_flag = ('-map-by ppr:{0}:node'
                                     ''.format(model_npernode))

                if self.config.get('scalasca', False):
                    npernode_flag = '\"{0}\"'.format(npernode_flag)
                model_prog.append(npernode_flag)

            if self.config.get('hpctoolkit', False):
                os.environ['HPCRUN_EVENT_LIST'] = 'WALLCLOCK@5000'
                model_prog.append('hpcrun')

            for prof in self.profilers:
                if prof.runscript:
                    model_prog = model_prog.append(prof.runscript)

            model_prog.append(model.exec_prefix)
            model_prog.append(model.exec_path)

            mpi_progs.append(' '.join(model_prog))

        cmd = '{runcmd} {flags} {exes}'.format(
            runcmd=mpi_runcmd,
            flags=' '.join(mpi_flags),
            exes=' : '.join(mpi_progs)
        )

        for prof in self.profilers:
            cmd = prof.wrapper(cmd)

        # Expand shell variables inside flags
        if self.expand_shell_vars:
            cmd = os.path.expandvars(cmd)

        print(cmd)

        # Our MVAPICH wrapper does not support working directories
        if mpi_module.startswith('mvapich'):
            curdir = os.getcwd()
            os.chdir(self.work_path)
        else:
            curdir = None

        # NOTE: This may not be necessary, since env seems to be getting
        # correctly updated.  Need to look into this.
        if env:
            # TODO: Replace with mpirun -x flag inputs
            proc = sp.Popen(shlex.split(cmd), stdout=f_out, stderr=f_err,
                            env=os.environ.copy())
            proc.wait()
            rc = proc.returncode
        else:
            rc = sp.call(shlex.split(cmd), stdout=f_out, stderr=f_err)

        # Return to control directory
        if curdir:
            os.chdir(curdir)

        if self.runlog:
            self.runlog.commit()

        f_out.close()
        f_err.close()

        # Remove any empty output files (e.g. logs)
        for fname in os.listdir(self.work_path):
            fpath = os.path.join(self.work_path, fname)
            if os.path.getsize(fpath) == 0:
                os.remove(fpath)

        # Clean up any profiling output
        # TODO: Move after `rc` code check?
        for prof in self.profilers:
            prof.postprocess()

        # TODO: Need a model-specific cleanup method call here
        # NOTE: This does not appear to catch hanging jobs killed by PBS
        if rc != 0:
            # Backup logs for failed runs
            error_log_dir = os.path.join(self.archive_path, 'error_logs')
            mkdir_p(error_log_dir)

            # NOTE: This is PBS-specific
            job_id = os.environ.get('PBS_JOBID', '')

            for fname in (self.stdout_fname, self.stderr_fname):
                src = os.path.join(self.control_path, fname)

                # NOTE: This assumes standard .out/.err extensions
                dest = os.path.join(error_log_dir,
                                    fname[:-4] + '.' + job_id + fname[-4:])
                print(src, dest)

                shutil.copyfile(src, dest)

            # Create the symlink to the logs if it does not exist
            make_symlink(self.archive_path, self.archive_sym_path)

            # Terminate payu
            sys.exit('payu: Model exited with error code {0}; aborting.'
                     ''.format(rc))

        # Decrement run counter on successful run
        stop_file_path = os.path.join(self.control_path, 'stop_run')
        if os.path.isfile(stop_file_path):
            assert os.stat(stop_file_path).st_size == 0
            os.remove(stop_file_path)
            print('payu: Stop file detected; terminating resubmission.')
            self.n_runs = 0
        else:
            self.n_runs -= 1

        # Move logs to archive (or delete if empty)
        for f in (self.stdout_fname, self.stderr_fname):
            f_path = os.path.join(self.control_path, f)
            if os.path.getsize(f_path) == 0:
                os.remove(f_path)
            else:
                shutil.move(f_path, self.work_path)

        run_script = self.userscripts.get('run')
        if run_script:
            self.run_userscript(run_script)
示例#50
0
    def github_setup(self):
        """Set up authentication keys and API tokens."""
        github_auth = self.authenticate()
        github_username = github_auth[0]

        expt_name = self.config.get('name', self.expt.name)
        expt_description = self.expt.config.get('description')
        if not expt_description:
            expt_description = input('Briefly describe the experiment: ')
            assert isinstance(expt_description, str)
        expt_private = self.config.get('private', False)

        # 1. Create the organisation if needed
        github_api_url = 'https://api.github.com'
        org_name = self.config.get('organization')
        if org_name:
            repo_target = org_name

            # Check if org exists
            org_query_url = os.path.join(github_api_url, 'orgs', org_name)
            org_req = requests.get(org_query_url)

            if org_req.status_code == 404:
                # NOTE: Orgs cannot be created via the API
                print('payu: github organization {org} does not exist.'
                      ''.format(org=org_name))
                print('      You must first create this on the website.')

            elif org_req.status_code == 200:
                # TODO: Confirm that the user can interact with the repo
                pass

            else:
                # TODO: Exit with grace
                print('payu: abort!')
                sys.exit(-1)

            repo_query_url = os.path.join(github_api_url, 'orgs', org_name,
                                          'repos')
            repo_api_url = os.path.join(github_api_url, 'repos', org_name,
                                        expt_name)
        else:
            repo_target = github_username

            # Create repo in user account
            repo_query_url = os.path.join(github_api_url, 'user', 'repos')
            repo_api_url = os.path.join(github_api_url, 'repos',
                                        github_username, expt_name)

        # 2. Create the remote repository
        user_repos = []
        page = 1
        while True:
            repo_params = {'page': page, 'per_page': 100}
            repo_query = requests.get(repo_query_url,
                                      auth=github_auth,
                                      params=repo_params)
            assert repo_query.status_code == 200
            if repo_query.json():
                user_repos.extend(list(r['name'] for r in repo_query.json()))
                page += 1
            else:
                break

        if expt_name not in user_repos:
            repo_config = {
                'name': expt_name,
                'description': expt_description,
                'private': expt_private,
                'has_issues': True,
                'has_downloads': True,
                'has_wiki': False
            }

            repo_gen = requests.post(repo_query_url,
                                     json.dumps(repo_config),
                                     auth=github_auth)

            assert repo_gen.status_code == 201

        # 3. Check if remote is set
        git_remote_out = sp.check_output(shlex.split('git remote -v'),
                                         cwd=self.expt.control_path)

        git_remotes = dict([(r.split()[0], r.split()[1])
                            for r in git_remote_out.split('\n') if r])

        remote_name = self.config.get('remote', 'payu')
        remote_url = os.path.join('ssh://[email protected]', repo_target,
                                  self.expt.name + '.git')

        if remote_name not in git_remotes:
            cmd = ('git remote add {name} {url}'
                   ''.format(name=remote_name, url=remote_url))
            sp.check_call(shlex.split(cmd), cwd=self.expt.control_path)
        elif git_remotes[remote_name] != remote_url:
            print('payu: error: Existing remote URL does not match '
                  'the proposed URL.')
            print('payu: error: To delete the old remote, type '
                  '`git remote rm {name}`.'.format(name=remote_name))
            sys.exit(-1)

        # 4. Generate a payu-specific SSH key
        default_ssh_key = 'id_rsa_payu_' + expt_name
        ssh_key = self.config.get('sshid', default_ssh_key)
        ssh_dir = os.path.join(os.path.expanduser('~'), '.ssh', 'payu')
        mkdir_p(ssh_dir)

        ssh_keypath = os.path.join(ssh_dir, ssh_key)
        if not os.path.isfile(ssh_keypath):
            cmd = 'ssh-keygen -t rsa -f {key} -q -P ""'.format(key=ssh_key)
            sp.check_call(shlex.split(cmd), cwd=ssh_dir)

        # 5. Deploy key to repo
        with open(ssh_keypath + '.pub') as keyfile:
            pubkey = ' '.join(keyfile.read().split()[:-1])

        # TODO: Get this from github?
        repo_keys_url = os.path.join(repo_api_url, 'keys')
        keys_req = requests.get(repo_keys_url, auth=github_auth)
        assert keys_req.status_code == 200

        if not any(k['key'] == pubkey for k in keys_req.json()):
            add_key_param = {'title': 'payu', 'key': pubkey}
            add_key_req = requests.post(repo_keys_url,
                                        auth=github_auth,
                                        json=add_key_param)
            assert add_key_req.status_code == 201
示例#51
0
    def archive(self):
        if not self.config.get('archive', True):
            print('payu: not archiving due to config.yaml setting.')
            return

        # Check there is a work directory, otherwise bail
        if not os.path.exists(self.work_sym_path):
            sys.exit('payu: error: No work directory to archive.')

        mkdir_p(self.archive_path)
        make_symlink(self.archive_path, self.archive_sym_path)

        # Remove work symlink
        if os.path.islink(self.work_sym_path):
            os.remove(self.work_sym_path)

        mkdir_p(self.restart_path)

        for model in self.models:
            model.archive()

        # Postprocess the model suite
        if len(self.models) > 1:
            self.model.archive()

        # Double-check that the run path does not exist
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        movetree(self.work_path, self.output_path)

        # Remove old restart files
        # TODO: Move to subroutine
        restart_freq = self.config.get('restart_freq', default_restart_freq)
        restart_history = self.config.get('restart_history',
                                          default_restart_history)

        # Remove any outdated restart files
        prior_restart_dirs = [d for d in os.listdir(self.archive_path)
                              if d.startswith('restart')]

        for res_dir in prior_restart_dirs:

            res_idx = int(res_dir.lstrip('restart'))
            if (self.repeat_run or
                    (not res_idx % restart_freq == 0 and
                     res_idx <= (self.counter - restart_history))):

                res_path = os.path.join(self.archive_path, res_dir)

                # Only delete real directories; ignore symbolic restart links
                if (os.path.isdir(res_path) and not os.path.islink(res_path)):
                    shutil.rmtree(res_path)

        # Ensure dynamic library support for subsequent python calls
        ld_libpaths = os.environ['LD_LIBRARY_PATH']
        py_libpath = sysconfig.get_config_var('LIBDIR')
        if py_libpath not in ld_libpaths.split(':'):
            os.environ['LD_LIBRARY_PATH'] = ':'.join([py_libpath, ld_libpaths])

        collate_config = self.config.get('collate', {})
        if collate_config.get('enable', True):
            cmd = '{python} {payu} collate -i {expt}'.format(
                python=sys.executable,
                payu=self.payu_path,
                expt=self.counter
            )
            sp.check_call(shlex.split(cmd))

        if self.config.get('hpctoolkit', False):
            cmd = '{python} {payu} profile -i {expt}'.format(
                python=sys.executable,
                payu=self.payu_path,
                expt=self.counter
            )
            sp.check_call(shlex.split(cmd))

        archive_script = self.userscripts.get('archive')
        if archive_script:
            self.run_userscript(archive_script)
示例#52
0
    def run(self, *user_flags):

        # XXX: This was previously done in reversion
        envmod.setup()

        self.load_modules()

        f_out = open(self.stdout_fname, 'w')
        f_err = open(self.stderr_fname, 'w')

        # Set MPI environment variables
        env = self.config.get('env')

        # Explicitly check for `None`, in case of an empty `env:` entry
        if env is None:
            env = {}

        for var in env:

            if env[var] is None:
                env_value = ''
            else:
                env_value = str(env[var])

            os.environ[var] = env_value

        mpi_config = self.config.get('mpi', {})
        mpi_runcmd = mpi_config.get('runcmd', 'mpirun')

        if self.config.get('scalasca', False):
            mpi_runcmd = ' '.join(['scalasca -analyze', mpi_runcmd])

        # MPI runtime flags
        mpi_flags = mpi_config.get('flags', [])
        if not mpi_flags:
            mpi_flags = self.config.get('mpirun', [])
            # TODO: Legacy config removal warning

        if type(mpi_flags) != list:
            mpi_flags = [mpi_flags]

        # TODO: More uniform support needed here
        if self.config.get('scalasca', False):
            mpi_flags = ['\"{0}\"'.format(f) for f in mpi_flags]

        # XXX: I think this may be broken
        if user_flags:
            mpi_flags.extend(list(user_flags))

        if self.debug:
            mpi_flags.append('--debug')

        mpi_progs = []
        for model in self.models:

            # Skip models without executables (e.g. couplers)
            if not model.exec_path_local:
                continue

            mpi_config = self.config.get('mpi', {})
            mpi_module = mpi_config.get('module', None)

            # Update MPI library module (if not explicitly set)
            # TODO: Check for MPI library mismatch across multiple binaries
            if mpi_module is None:
                mpi_module = envmod.lib_update(
                    model.exec_path_local,
                    'libmpi.so'
                )

            model_prog = []

            # Our MPICH wrapper does not support a working directory flag
            if not mpi_module.startswith('mvapich'):
                model_prog.append('-wdir {0}'.format(model.work_path))

            # Append any model-specific MPI flags
            model_flags = model.config.get('mpiflags', [])
            if not isinstance(model_flags, list):
                model_prog.append(model_flags)
            else:
                model_prog.extend(model_flags)

            model_ncpus = model.config.get('ncpus')
            if model_ncpus:
                model_prog.append('-np {0}'.format(model_ncpus))

            model_npernode = model.config.get('npernode')
            # TODO: New Open MPI format?
            if model_npernode:
                if model_npernode % 2 == 0:
                    npernode_flag = ('-map-by ppr:{0}:socket'
                                     ''.format(model_npernode / 2))
                else:
                    npernode_flag = ('-map-by ppr:{0}:node'
                                     ''.format(model_npernode))

                if self.config.get('scalasca', False):
                    npernode_flag = '\"{0}\"'.format(npernode_flag)
                model_prog.append(npernode_flag)

            if self.config.get('hpctoolkit', False):
                os.environ['HPCRUN_EVENT_LIST'] = 'WALLCLOCK@5000'
                model_prog.append('hpcrun')

            for prof in self.profilers:
                if prof.runscript:
                    model_prog = model_prog.append(prof.runscript)

            model_prog.append(model.exec_prefix)

            # Use the full path to symlinked exec_name in work as some
            # older MPI libraries complained executable was not in PATH
            model_prog.append(os.path.join(model.work_path, model.exec_name))

            mpi_progs.append(' '.join(model_prog))

        cmd = '{runcmd} {flags} {exes}'.format(
            runcmd=mpi_runcmd,
            flags=' '.join(mpi_flags),
            exes=' : '.join(mpi_progs)
        )

        for prof in self.profilers:
            cmd = prof.wrapper(cmd)

        # Expand shell variables inside flags
        if self.expand_shell_vars:
            cmd = os.path.expandvars(cmd)

        # TODO: Consider making this default
        if self.config.get('coredump', False):
            enable_core_dump()

        # Our MVAPICH wrapper does not support working directories
        if mpi_module.startswith('mvapich'):
            curdir = os.getcwd()
            os.chdir(self.work_path)
        else:
            curdir = None

        # Dump out environment
        with open(self.env_fname, 'w') as file:
            file.write(yaml.dump(dict(os.environ), default_flow_style=False))

        self.runlog.create_manifest()
        if self.runlog.enabled:
            self.runlog.commit()

        # NOTE: This may not be necessary, since env seems to be getting
        # correctly updated.  Need to look into this.
        print(cmd)
        if env:
            # TODO: Replace with mpirun -x flag inputs
            proc = sp.Popen(shlex.split(cmd), stdout=f_out, stderr=f_err,
                            env=os.environ.copy())
            proc.wait()
            rc = proc.returncode
        else:
            rc = sp.call(shlex.split(cmd), stdout=f_out, stderr=f_err)

        # Return to control directory
        if curdir:
            os.chdir(curdir)

        f_out.close()
        f_err.close()

        self.finish_time = datetime.datetime.now()

        info = get_job_info()

        if info is None:
            # Not being run under PBS, reverse engineer environment
            info = {
                'PAYU_PATH': os.path.dirname(self.payu_path)
            }

        # Add extra information to save to jobinfo
        info.update(
            {
                'PAYU_CONTROL_DIR': self.control_path,
                'PAYU_RUN_ID': self.run_id,
                'PAYU_CURRENT_RUN': self.counter,
                'PAYU_N_RUNS':  self.n_runs,
                'PAYU_JOB_STATUS': rc,
                'PAYU_START_TIME': self.start_time.isoformat(),
                'PAYU_FINISH_TIME': self.finish_time.isoformat(),
                'PAYU_WALLTIME': "{0} s".format(
                    (self.finish_time - self.start_time).total_seconds()
                ),
            }
        )

        # Dump job info
        with open(self.job_fname, 'w') as file:
            file.write(yaml.dump(info, default_flow_style=False))

        # Remove any empty output files (e.g. logs)
        for fname in os.listdir(self.work_path):
            fpath = os.path.join(self.work_path, fname)
            if os.path.getsize(fpath) == 0:
                os.remove(fpath)

        # Clean up any profiling output
        # TODO: Move after `rc` code check?
        for prof in self.profilers:
            prof.postprocess()

        # TODO: Need a model-specific cleanup method call here
        # NOTE: This does not appear to catch hanging jobs killed by PBS
        if rc != 0:
            # Backup logs for failed runs
            error_log_dir = os.path.join(self.archive_path, 'error_logs')
            mkdir_p(error_log_dir)

            # NOTE: This is PBS-specific
            job_id = get_job_id(short=False)

            if job_id == '':
                job_id = str(self.run_id)[:6]

            for fname in self.output_fnames:

                src = os.path.join(self.control_path, fname)

                stem, suffix = os.path.splitext(fname)
                dest = os.path.join(error_log_dir,
                                    ".".join((stem, job_id)) + suffix)

                print(src, dest)

                shutil.copyfile(src, dest)

            # Create the symlink to the logs if it does not exist
            make_symlink(self.archive_path, self.archive_sym_path)

            error_script = self.userscripts.get('error')
            if error_script:
                self.run_userscript(error_script)

            # Terminate payu
            sys.exit('payu: Model exited with error code {0}; aborting.'
                     ''.format(rc))

        # Decrement run counter on successful run
        stop_file_path = os.path.join(self.control_path, 'stop_run')
        if os.path.isfile(stop_file_path):
            assert os.stat(stop_file_path).st_size == 0
            os.remove(stop_file_path)
            print('payu: Stop file detected; terminating resubmission.')
            self.n_runs = 0
        else:
            self.n_runs -= 1

        # Move logs to archive (or delete if empty)
        for f in self.output_fnames:
            f_path = os.path.join(self.control_path, f)
            if os.path.getsize(f_path) == 0:
                os.remove(f_path)
            else:
                shutil.move(f_path, self.work_path)

        run_script = self.userscripts.get('run')
        if run_script:
            self.run_userscript(run_script)
示例#53
0
文件: cice5.py 项目: coecms/payu
    def setup(self):
        super(Cice5, self).setup()

        # Make log dir
        mkdir_p(os.path.join(self.work_path, 'log'))