Exemplo n.º 1
0
def test_copy_back():
    """ When workers conclude their work, workers have the option of copying
    back their work into a directory created by the manager."""
    class FakeWorker:
        """ Enough information to test _copy_back() """
        def __init__(self, libE_specs, prefix, startdir, loc_stack):
            self.libE_specs = libE_specs
            self.prefix = prefix
            self.startdir = startdir
            self.loc_stack = loc_stack

    inputdir = './calc'
    copybackdir = './calc_back'
    inputfile = './calc/file'

    for dir in [inputdir, copybackdir]:
        os.makedirs(dir, exist_ok=True)

    libE_specs = {
        'sim_dirs_make': True,
        'ensemble_dir_path': inputdir,
        'ensemble_copy_back': True
    }

    ls = LocationStack()
    ls.register_loc('test', inputfile)
    fake_worker = FakeWorker(libE_specs, inputdir, '.', ls)
    Worker._copy_back(fake_worker)
    assert 'file' in os.listdir(copybackdir), \
        'File not copied back to starting dir'

    for dir in [inputdir, copybackdir]:
        shutil.rmtree(dir)
Exemplo n.º 2
0
def test_copy_back_exception():
    """ Test _copy_back handling of FileExistsError with certain
    settings"""
    class FakeWorker:
        """ Enough information to test _copy_back() """
        def __init__(self, libE_specs, prefix, startdir, loc_stack):
            self.libE_specs = libE_specs
            self.prefix = prefix
            self.startdir = startdir
            self.loc_stack = loc_stack

    inputdir = './calc'
    copybackdir = './calc_back'
    inputfile = './calc/file'

    for dir in [inputdir, copybackdir]:
        os.makedirs(dir, exist_ok=True)

    libE_specs = {
        'sim_dirs_make': False,
        'ensemble_dir_path': inputdir,
        'ensemble_copy_back': True
    }

    ls = LocationStack()
    ls.register_loc('test', inputfile)
    fake_worker = FakeWorker(libE_specs, inputdir, '.', ls)

    # Testing catch and continue
    for i in range(2):
        Worker._copy_back(fake_worker)
    assert 'file' in os.listdir(copybackdir), \
        'File not copied back to starting dir'

    libE_specs = {
        'sim_dirs_make': True,
        'ensemble_dir_path': inputdir,
        'ensemble_copy_back': True
    }
    fake_worker = FakeWorker(libE_specs, inputdir, '.', ls)

    flag = 1

    # Testing catch and raise
    try:
        Worker._copy_back(fake_worker)
    except FileExistsError:
        flag = 0
    assert flag == 0

    for dir in [inputdir, copybackdir]:
        shutil.rmtree(dir)
Exemplo n.º 3
0
def test_worker_dirs_but_no_sim_dirs():
    """Test Worker._make_calc_dir() directory structure without sim_dirs"""
    inputdir = './calc'
    inputfile = './calc/file'
    ensemble_dir = './test_ens'

    for dir in [inputdir, inputfile, ensemble_dir]:
        os.makedirs(dir, exist_ok=True)

    libE_specs = {
        'sim_dirs_make': False,
        'ensemble_dir_path': ensemble_dir,
        'use_worker_dirs': True,
        'sim_input_dir': inputdir
    }

    ls = LocationStack()
    for i in range(4):  # Should work at any range
        Worker._make_calc_dir(libE_specs, 1, 1, 'sim', ls)

    assert 'worker1' in os.listdir(ensemble_dir)
    assert 'file' in os.listdir(os.path.join(ensemble_dir, 'worker1'))

    for dir in [inputdir, ensemble_dir]:
        shutil.rmtree(dir)
Exemplo n.º 4
0
    def _determine_dir_then_calc(self, Work, calc_type, calc_in, calc):
        "Determines choice for calc_dir structure, then performs calculation."

        if not self.loc_stack:
            self.loc_stack = LocationStack()

        if calc_type == EVAL_SIM_TAG:
            H_rows = Worker._extract_H_ranges(Work)
        else:
            H_rows = str(self.calc_iter[calc_type])

        calc_str = calc_type_strings[calc_type]

        calc_dir = Worker._make_calc_dir(self.libE_specs, self.workerID,
                                         H_rows, calc_str, self.loc_stack)

        with self.loc_stack.loc(calc_dir):  # Switching to calc_dir
            return calc(calc_in, Work['persis_info'], Work['libE_info'])
Exemplo n.º 5
0
    def _determine_dir_then_calc(self, Work, calc_type, calc_in, calc):
        "Determines choice for sim_dir structure, then performs calculation."

        if not self.loc_stack:
            self.loc_stack = LocationStack()

        H_rows = Worker._extract_H_ranges(Work)
        calc_str = calc_type_strings[calc_type]

        if any([
                setting in self.libE_specs
                for setting in libE_spec_calc_dir_keys
        ]):
            calc_dir = Worker._make_calc_dir(self.libE_specs, self.workerID,
                                             H_rows, calc_str, self.loc_stack)

            with self.loc_stack.loc(calc_dir):  # Switching to calc_dir
                return calc(calc_in, Work['persis_info'], Work['libE_info'])

        return calc(calc_in, Work['persis_info'], Work['libE_info'])
Exemplo n.º 6
0
class Worker:
    """The worker class provides methods for controlling sim and gen funcs

    **Object Attributes:**

    These are public object attributes.

    :ivar comm comm:
        Comm object for manager communications

    :ivar dict dtypes:
        Dictionary containing type information for sim and gen inputs

    :ivar int workerID:
        The libensemble Worker ID

    :ivar dict sim_specs:
        Parameters/information for simulation calculations

    :ivar dict calc_iter:
        Dictionary containing counts for each type of calc (e.g. sim or gen)

    :ivar LocationStack loc_stack:
        Stack holding directory structure of this Worker
    """
    def __init__(self, comm, dtypes, workerID, sim_specs, gen_specs,
                 libE_specs):
        """Initializes new worker object

        """
        self.comm = comm
        self.dtypes = dtypes
        self.workerID = workerID
        self.sim_specs = sim_specs
        self.libE_specs = libE_specs
        self.startdir = os.getcwd()
        self.prefix = libE_specs.get('ensemble_dir_path', './ensemble')
        self.calc_iter = {EVAL_SIM_TAG: 0, EVAL_GEN_TAG: 0}
        self.loc_stack = None
        self._run_calc = Worker._make_runners(sim_specs, gen_specs)
        self._calc_id_counter = count()
        Worker._set_executor(self.workerID, self.comm)

    @staticmethod
    def _make_calc_dir(libE_specs, workerID, H_rows, calc_str, locs):
        "Create calc dirs and intermediate dirs, copy inputs, based on libE_specs"

        sim_input_dir = libE_specs.get('sim_input_dir', '').rstrip('/')

        do_sim_dirs = libE_specs.get('sim_dirs_make', True)
        prefix = libE_specs.get('ensemble_dir_path', './ensemble')
        copy_files = libE_specs.get('sim_dir_copy_files', [])
        symlink_files = libE_specs.get('sim_dir_symlink_files', [])
        do_work_dirs = libE_specs.get('use_worker_dirs', False)

        # If using sim_input_dir, set of files to copy is contents of provided dir
        if sim_input_dir:
            copy_files = set(copy_files + [
                os.path.join(sim_input_dir, i)
                for i in os.listdir(sim_input_dir)
            ])

        # If identical paths to copy and symlink, remove those paths from symlink_files
        if len(symlink_files):
            symlink_files = [i for i in symlink_files if i not in copy_files]

        # Cases where individual sim_dirs not created.
        if not do_sim_dirs:
            if do_work_dirs:  # Each worker does work in worker dirs
                key = workerID
                dir = "worker" + str(workerID)
            else:  # Each worker does work in prefix (ensemble_dir)
                key = prefix
                dir = prefix
                prefix = None

            locs.register_loc(key,
                              dir,
                              prefix=prefix,
                              copy_files=copy_files,
                              symlink_files=symlink_files,
                              ignore_FileExists=True)
            return key

        # All cases now should involve sim_dirs
        # ensemble_dir/worker_dir registered here, set as parent dir for sim dirs
        if do_work_dirs:
            worker_dir = "worker" + str(workerID)
            worker_path = os.path.abspath(os.path.join(prefix, worker_dir))
            calc_dir = calc_str + str(H_rows)
            locs.register_loc(workerID, worker_dir, prefix=prefix)
            calc_prefix = worker_path

        # Otherwise, ensemble_dir set as parent dir for sim dirs
        else:
            calc_dir = "{}{}_worker{}".format(calc_str, H_rows, workerID)
            if not os.path.isdir(prefix):
                os.makedirs(prefix, exist_ok=True)
            calc_prefix = prefix

        # Register calc dir with adjusted parent dir and source-file location
        locs.register_loc(
            calc_dir,
            calc_dir,  # Dir name also label in loc stack dict
            prefix=calc_prefix,
            copy_files=copy_files,
            symlink_files=symlink_files)

        return calc_dir

    @staticmethod
    def _make_runners(sim_specs, gen_specs):
        "Creates functions to run a sim or gen"

        sim_f = sim_specs['sim_f']

        def run_sim(calc_in, persis_info, libE_info):
            "Calls the sim func."
            return sim_f(calc_in, persis_info, sim_specs, libE_info)

        if gen_specs:
            gen_f = gen_specs['gen_f']

            def run_gen(calc_in, persis_info, libE_info):
                "Calls the gen func."
                return gen_f(calc_in, persis_info, gen_specs, libE_info)
        else:
            run_gen = []

        return {EVAL_SIM_TAG: run_sim, EVAL_GEN_TAG: run_gen}

    @staticmethod
    def _set_executor(workerID, comm):
        "Optional - sets worker ID in the executor, return if set"
        exctr = Executor.executor
        if isinstance(exctr, Executor):
            exctr.set_worker_info(comm, workerID)
            return True
        else:
            logger.info("No executor set on worker {}".format(workerID))
            return False

    @staticmethod
    def _extract_H_ranges(Work):
        """ Convert received H_rows into ranges for logging, labeling """
        work_H_rows = Work['libE_info']['H_rows']
        if len(work_H_rows) == 1:
            return str(work_H_rows[0])
        else:
            # From https://stackoverflow.com/a/30336492
            # Create groups by difference between row values and sequential enumerations:
            # e.g., [2, 3, 5, 6] -> [(0, 2), (1, 3), (2, 5), (3, 6)]
            #  -> diff=-2, group=[(0, 2), (1, 3)], diff=-3, group=[(2, 5), (3, 6)]
            ranges = []
            for diff, group in groupby(enumerate(work_H_rows.tolist()),
                                       lambda x: x[0] - x[1]):
                # Take second values (rows values) from each group element into lists:
                # group=[(0, 2), (1, 3)], group=[(2, 5), (3, 6)] -> group=[2, 3], group=[5, 6]
                group = list(map(itemgetter(1), group))
                if len(group) > 1:
                    ranges.append(str(group[0]) + '-' + str(group[-1]))
                else:
                    ranges.append(str(group[0]))
            return '_'.join(ranges)

    def _copy_back(self):
        """ Cleanup indication file & copy output to init dir, if specified"""
        if os.path.isdir(self.prefix) and self.libE_specs.get(
                'ensemble_copy_back', False):

            ensemble_dir_path = self.libE_specs.get('ensemble_dir_path',
                                                    './ensemble')
            copybackdir = os.path.basename(ensemble_dir_path)
            if os.path.relpath(ensemble_dir_path) == os.path.relpath(
                    copybackdir):
                copybackdir += '_back'
            for dir in self.loc_stack.dirs.values():
                try:
                    shutil.copytree(dir,
                                    os.path.join(copybackdir,
                                                 os.path.basename(dir)),
                                    symlinks=True)
                    if os.path.basename(dir).startswith('worker'):
                        break  # Worker dir (with all sim_dirs) copied.
                except FileExistsError:
                    if not self.libE_specs.get('sim_dirs_make', True):
                        continue
                    else:
                        raise

    def _determine_dir_then_calc(self, Work, calc_type, calc_in, calc):
        "Determines choice for sim_dir structure, then performs calculation."

        if not self.loc_stack:
            self.loc_stack = LocationStack()

        H_rows = Worker._extract_H_ranges(Work)
        calc_str = calc_type_strings[calc_type]

        if any([
                setting in self.libE_specs
                for setting in libE_spec_calc_dir_keys
        ]):
            calc_dir = Worker._make_calc_dir(self.libE_specs, self.workerID,
                                             H_rows, calc_str, self.loc_stack)

            with self.loc_stack.loc(calc_dir):  # Switching to calc_dir
                return calc(calc_in, Work['persis_info'], Work['libE_info'])

        return calc(calc_in, Work['persis_info'], Work['libE_info'])

    def _handle_calc(self, Work, calc_in):
        """Runs a calculation on this worker object.

        This routine calls the user calculations. Exceptions are caught,
        dumped to the summary file, and raised.

        Parameters
        ----------

        Work: :obj:`dict`
            :ref:`(example)<datastruct-work-dict>`

        calc_in: obj: numpy structured array
            Rows from the :ref:`history array<datastruct-history-array>`
            for processing
        """
        calc_type = Work['tag']
        self.calc_iter[calc_type] += 1

        # calc_stats stores timing and summary info for this Calc (sim or gen)
        calc_id = next(self._calc_id_counter)
        timer = Timer()

        try:
            logger.debug("Running {}".format(calc_type_strings[calc_type]))
            calc = self._run_calc[calc_type]
            with timer:
                logger.debug("Calling calc {}".format(calc_type))

                if calc_type == EVAL_SIM_TAG:
                    out = self._determine_dir_then_calc(
                        Work, calc_type, calc_in, calc)
                else:
                    out = calc(calc_in, Work['persis_info'], Work['libE_info'])

                logger.debug("Return from calc call")

            assert isinstance(out, tuple), \
                "Calculation output must be a tuple."
            assert len(out) >= 2, \
                "Calculation output must be at least two elements."

            calc_status = out[2] if len(out) >= 3 else UNSET_TAG

            # Check for buffered receive
            if self.comm.recv_buffer:
                tag, message = self.comm.recv()
                if tag in [STOP_TAG, PERSIS_STOP]:
                    if message is MAN_SIGNAL_FINISH:
                        calc_status = MAN_SIGNAL_FINISH

            return out[0], out[1], calc_status
        except Exception:
            logger.debug("Re-raising exception from calc")
            calc_status = CALC_EXCEPTION
            raise
        finally:
            # This was meant to be handled by calc_stats module.
            if task_timing and Executor.executor.list_of_tasks:
                # Initially supporting one per calc. One line output.
                task = Executor.executor.list_of_tasks[-1]
                calc_msg = "Calc {:5d}: {} {} {} Status: {}".\
                    format(calc_id,
                           calc_type_strings[calc_type],
                           timer,
                           task.timer,
                           calc_status_strings.get(calc_status, "Not set"))
            else:
                calc_msg = "Calc {:5d}: {} {} Status: {}".\
                    format(calc_id,
                           calc_type_strings[calc_type],
                           timer,
                           calc_status_strings.get(calc_status, "Not set"))

            logging.getLogger(LogConfig.config.stats_name).info(calc_msg)

    def _recv_H_rows(self, Work):
        "Unpacks Work request and receives any history rows"

        libE_info = Work['libE_info']
        calc_type = Work['tag']
        if len(libE_info['H_rows']) > 0:
            _, calc_in = self.comm.recv()
        else:
            calc_in = np.zeros(0, dtype=self.dtypes[calc_type])

        logger.debug("Received calc_in ({}) of len {}".format(
            calc_type_strings[calc_type], np.size(calc_in)))
        assert calc_type in [EVAL_SIM_TAG, EVAL_GEN_TAG], \
            "calc_type must either be EVAL_SIM_TAG or EVAL_GEN_TAG"

        return libE_info, calc_type, calc_in

    def _handle(self, Work):
        "Handles a work request from the manager"

        # Check work request and receive second message (if needed)
        libE_info, calc_type, calc_in = self._recv_H_rows(Work)

        # Call user function
        libE_info['comm'] = self.comm
        libE_info['workerID'] = self.workerID
        # libE_info['worker_team'] = [self.workerID] + libE_info.get('blocking', [])
        calc_out, persis_info, calc_status = self._handle_calc(Work, calc_in)
        del libE_info['comm']

        # If there was a finish signal, bail
        if calc_status == MAN_SIGNAL_FINISH:
            return None

        # Otherwise, send a calc result back to manager
        logger.debug("Sending to Manager with status {}".format(calc_status))
        return {
            'calc_out': calc_out,
            'persis_info': persis_info,
            'libE_info': libE_info,
            'calc_status': calc_status,
            'calc_type': calc_type
        }

    def run(self):
        "Runs the main worker loop."

        try:
            logger.info("Worker {} initiated on node {}".format(
                self.workerID, socket.gethostname()))

            for worker_iter in count(start=1):
                logger.debug("Iteration {}".format(worker_iter))

                mtag, Work = self.comm.recv()

                if mtag == STOP_TAG:
                    break

                response = self._handle(Work)
                if response is None:
                    break
                self.comm.send(0, response)

        except Exception as e:
            self.comm.send(0, WorkerErrMsg(str(e), format_exc()))
            self._copy_back()  # Copy back current results on Exception
        else:
            self.comm.kill_pending()
        finally:
            self._copy_back()
Exemplo n.º 7
0
def test_loc_stack_FileExists_exceptions():
    inputdir = './calc'
    copyfile = './calc/copy'
    symlinkfile = './calc/symlink'
    ensemble_dir = './test_ens'

    for dir in [inputdir, copyfile, symlinkfile]:
        os.makedirs(dir, exist_ok=True)

    # Testing loc_stack continuing on FileExistsError when not using sim_dirs
    libE_specs = {
        'sim_dirs_make': False,
        'ensemble_dir_path': ensemble_dir,
        'use_worker_dirs': True,
        'sim_dir_copy_files': [copyfile],
        'sim_dir_symlink_files': [symlinkfile]
    }

    ls = LocationStack()
    for i in range(2):  # Should work at any range
        Worker._make_calc_dir(libE_specs, 1, '1', 'sim', ls)

    assert len(os.listdir(
        ensemble_dir)) == 1, 'Should only be a single worker file in ensemble'
    assert 'worker1' in os.listdir(
        ensemble_dir), 'Only directory should be worker1'
    assert all([i in os.listdir(os.path.join(ensemble_dir, 'worker1')) for i in ['copy', 'symlink']]), \
        'Files to copy and symlink not found in worker directory.'

    # Testing loc_stack exception raising when sim_dir re-used - copy
    libE_specs = {
        'sim_dirs_make': True,
        'ensemble_dir_path': ensemble_dir,
        'use_worker_dirs': True,
        'sim_dir_copy_files': [copyfile]
    }

    flag = 1
    Worker._make_calc_dir(libE_specs, 1, '1', 'sim', ls)
    try:
        Worker._make_calc_dir(libE_specs, 1, '1', 'sim', ls)
    except FileExistsError:
        flag = 0
    assert flag == 0

    # Testing loc_stack exception raising when sim_dir re-used - symlink
    libE_specs = {
        'sim_dirs_make': True,
        'ensemble_dir_path': ensemble_dir,
        'use_worker_dirs': True,
        'sim_dir_symlink_files': [symlinkfile]
    }

    flag = 1
    Worker._make_calc_dir(libE_specs, 1, '2', 'sim', ls)
    try:
        Worker._make_calc_dir(libE_specs, 1, '2', 'sim', ls)
    except FileExistsError:
        flag = 0
    assert flag == 0

    for dir in [inputdir, ensemble_dir]:
        shutil.rmtree(dir)
Exemplo n.º 8
0
def test_location_stack():
    "Test correctness of location stack (all in a temp dir)."

    tmp_dirname = tempfile.mkdtemp()
    assert os.path.isdir(tmp_dirname), \
        "Failed to create temporary directory {}.".format(tmp_dirname)

    try:
        # Record where we started
        start_dir = os.getcwd()

        # Set up directory for clone
        clone_dirname = os.path.join(tmp_dirname, "basedir")
        os.mkdir(clone_dirname)
        test_fname = os.path.join(clone_dirname, "test.txt")
        with open(test_fname, "w+") as f:
            f.write("This is a test file\n")

        s = LocationStack()

        # Register a valid location
        tname = s.register_loc(0,
                               "testdir",
                               prefix=tmp_dirname,
                               copy_files=[test_fname])
        assert os.path.isdir(tname), \
            "New directory {} was not created.".format(tname)
        assert os.path.isfile(os.path.join(tname, "test.txt")), \
            "New directory {} failed to copy test.txt from {}.". \
            format(tname, clone_dirname)

        # Register an empty location
        d = s.register_loc(1, None)
        assert d is None, \
            "Dir stack not correctly register None at location 1."

        # Register a dummy location (del should not work)
        d = s.register_loc(2, os.path.join(tmp_dirname, "dummy"))
        assert ~os.path.isdir(d), \
            "Directory stack registration of dummy should not create dir."

        # Push unregistered location (we should not move)
        s.push_loc(3)
        assert s.stack == [None], \
            "Directory stack push_loc(missing) failed to put None on stack."
        assert os.path.samefile(os.getcwd(), start_dir), \
            "Directory stack push_loc failed to stay put with input None." \
            "Wanted {}, at {}".format(start_dir, os.getcwd())

        # Push registered location (we should move
        s.push_loc(0)
        assert s.stack == [None, start_dir], \
            "Directory stack is incorrect." \
            "Wanted [None, {}], got {}.".format(start_dir, s.stack)
        assert os.path.samefile(os.getcwd(), tname), \
            "Directory stack push_loc failed to end up at desired dir." \
            "Wanted {}, at {}".format(tname, os.getcwd())

        # Pop the registered location
        s.pop()
        assert s.stack == [None], \
            "Directory stack is incorrect after pop." \
            "Wanted [None], got {}.".format(s.stack)
        assert os.path.samefile(os.getcwd(), start_dir), \
            "Directory stack push_loc failed to stay put with input None." \
            "Wanted {}, at {}".format(start_dir, os.getcwd())

        # Context for moving again
        with s.loc(0):
            assert s.stack == [None, start_dir], \
                "Directory stack is incorrect." \
                "Wanted [None, {}], got {}.".format(start_dir, s.stack)
            assert os.path.samefile(os.getcwd(), tname), \
                "Directory stack push_loc failed to end up at desired dir." \
                "Wanted {}, at {}".format(tname, os.getcwd())

        # Check directory after context
        assert s.stack == [None], \
            "Directory stack is incorrect after ctx." \
            "Wanted [None], got {}.".format(s.stack)
        assert os.path.samefile(os.getcwd(), start_dir), \
            "Directory looks wrong after ctx." \
            "Wanted {}, at {}".format(start_dir, os.getcwd())

        with s.dir(None):
            assert s.stack == [None, None], \
                "Directory stack is incorrect in ctx."
        assert s.stack == [None], \
            "Directory stack is incorrect after ctx."

        # Pop the unregistered location
        s.pop()
        assert not s.stack, \
            "Directory stack should be empty, actually {}.".format(s.stack)
        assert os.path.samefile(os.getcwd(), start_dir), \
            "Directory stack push_loc failed to stay put with input None." \
            "Wanted {}, at {}".format(start_dir, os.getcwd())

        # Clean up
        s.clean_locs()
        assert not os.path.isdir(tname), \
            "Directory {} should have been removed on cleanup.".format(tname)

    finally:
        shutil.rmtree(tmp_dirname)