Exemplo n.º 1
0
    def add_realizations(self, number_of_trajectories=None, chunk_size=None, progress_bar=False):
        """ Add a number of realizations to the ensemble. """

        if number_of_trajectories is None:
            self.log.write_log("No number_of_trajectories specified", logging.ERROR)
            raise MolnsUtilException("No number_of_trajectories specified")
        if type(number_of_trajectories) is not int:
            self.log.write_log("number_of_trajectories must be an integer. Provided type: {0}"
                               .format(type(number_of_trajectories)), logging.ERROR)
            raise MolnsUtilException("number_of_trajectories must be an integer. Provided type: {0}".format(
                    type(number_of_trajectories)))

        if chunk_size is None:
            chunk_size = self._determine_chunk_size(number_of_trajectories)

        if not self.log.verbose:
            progress_bar = False
        else:
            if len(self.parameters) > 1:
                self.log.write_log(
                    "Generating {0} realizations of the model at {1} parameter points (chunk size={2})".format(
                        number_of_trajectories, len(self.parameters), chunk_size))
            else:
                self.log.write_log(
                    "Generating {0} realizations of the model (chunk size={1})".format(number_of_trajectories,
                                                                                       chunk_size))
        divid = None
        if progress_bar:
            divid = display_progressbar()

        self.number_of_trajectories += number_of_trajectories

        num_chunks = int(math.ceil(number_of_trajectories / float(chunk_size)))
        chunks = [chunk_size] * (num_chunks - 1)
        chunks.append(number_of_trajectories - chunk_size * (num_chunks - 1))
        # total chunks
        pchunks = chunks * len(self.parameters)
        num_pchunks = num_chunks * len(self.parameters)
        pparams = []
        param_set_ids = []
        self._set_pparams_paramsetids_presultlist(num_chunks, pparams, param_set_ids)
        seed_list = self._get_seed_list(len(self.parameters), number_of_trajectories, chunk_size)

        if self.qsub is False:
            return self._ipython_generate_and_store_realisations(num_pchunks, pparams, param_set_ids, seed_list,
                                                                 pchunks, divid, progress_bar=progress_bar)
        else:
            return self._qsub_generate_and_store_realizations(pparams, param_set_ids, seed_list, pchunks, divid,
                                                              progress_bar=progress_bar)
Exemplo n.º 2
0
def clean_up(dirs_to_delete=None, containers_to_delete=None):
    import shutil

    if type(dirs_to_delete) is not type([]) or type(containers_to_delete) is not type([]):
        raise MolnsUtilException("Unexpected type. Expecting {0}.".format(type([])))

    if dirs_to_delete is not None:
        for directory in dirs_to_delete:
            shutil.rmtree(directory)
Exemplo n.º 3
0
    def _set_pparams_paramsetids_presultlist(self, num_chunks, pparams, param_set_ids, presult_list=None,
                                             chunk_size=None):
        if not isinstance(pparams, list) or not isinstance(param_set_ids, list) or \
                (presult_list is not None and not isinstance(presult_list, list)) or \
                (presult_list is not None and chunk_size is None):
            self.log.write_log("Unexpected arguments. Require pparams, param_set_ids (and presult_list) to be of type "
                               "list. chunk_size cannot be None if presult_list is not None.", logging.ERROR)
            raise MolnsUtilException("Unexpected arguments. Require pparams, param_set_ids (and presult_list) to be of "
                                     "type list. chunk_size cannot be None if presult_list is not None.")
        if self.parameters is None:
            raise MolnsUtilException("self.parameters is None. I don't know (yet) how to proceed.")

        for ide, param in enumerate(self.parameters):
            param_set_ids.extend([ide] * num_chunks)
            pparams.extend([param] * num_chunks)
            if presult_list is not None:
                for i in range(num_chunks):
                    presult_list.append(self.result_list[ide][i * chunk_size:(i + 1) * chunk_size])
Exemplo n.º 4
0
def write_file(storage_mode, filename, result):

    if storage_mode == "Shared":
        storage = SharedStorage()
    elif storage_mode == "Persistent":
        storage = PersistentStorage()
    else:
        raise MolnsUtilException("Unknown storage type '{0}'".format(storage_mode))

    storage.put(filename, result)
Exemplo n.º 5
0
    def __init__(self, model_class=None, parameters=None, qsub=False, client=None, num_engines=None, storage_mode=None,
                 pickled_cluster_input_file=None, log_filename=None):
        """ Constructor.
        Args:
          model_class: a class object of the model for simulation, must be a sub-class of URDMEModel
          parameters:  either a dict or a list.
            If it is a dict, the keys are the arguments to the class constructions and the
              values are a list of values that argument should take.
              e.g.: {'arg1':[1,2,3],'arg2':[1,2,3]}  will produce 9 parameter points.
            If it is a list, where each element of the list is a dict
            """

        if qsub is True:
            DistributedEnsemble.__init__(self, model_class, parameters, qsub=True, storage_mode=storage_mode,
                                         pickled_cluster_input_file=pickled_cluster_input_file,
                                         log_filename=log_filename, num_engines=num_engines)

            self.log.write_log("Parameter sweep on cluster.", level=logging.INFO)

            if client is not None:
                self.log.write_log("unexpected parameter \"client\"")

        else:
            if model_class is None:
                raise MolnsUtilException("Model class is None.")

            DistributedEnsemble.__init__(self, model_class, parameters, client=client, num_engines=num_engines,
                                         storage_mode=storage_mode, log_filename=log_filename)

        self.my_class_name = 'ParameterSweep'
        self.parameters = []

        # process the parameters
        if type(parameters) is dict:
            vals = []
            keys = []
            for key, value in parameters.items():
                keys.append(key)
                vals.append(value)
            pspace = itertools.product(*vals)

            paramsets = []

            for p in pspace:
                pset = {}
                for i, val in enumerate(p):
                    pset[keys[i]] = val
                paramsets.append(pset)

            self.parameters = paramsets
        elif type(parameters) is list:
            self.parameters = parameters
        else:
            #  TODO verify that this can be done safely.
            self.parameters = [None]
Exemplo n.º 6
0
 def load_state(self, name):
     """ Recover the state of an ensemble from a previous save. """
     with open('.molnsutil/{1}-{0}'.format(name, self.my_class_name)) as fd:
         state = pickle.load(fd)
     if state['model_class'] is not self.model_class:
         self.log.write_log("Can only load state of a class that is identical to the original class", logging.ERROR)
         raise MolnsUtilException("Can only load state of a class that is identical to the original class")
     self.parameters = state['parameters']
     self.number_of_trajectories = state['number_of_trajectories']
     self.seed_base = state['seed_base']
     self.result_list = state['result_list']
     self.storage_mode = state['storage_mode']
Exemplo n.º 7
0
def create_model(model_class, parameters):
    try:
        model_class_cls = cloudpickle.loads(model_class)
        if parameters is not None:
            model = model_class_cls(**parameters)
        else:
            print "here *****************************************************"
            model = model_class_cls()
        return model
    except Exception as e:
        notes = "Error instantiation the model class, caught {0}: {1}\n".format(type(e), e)
        notes += "dir={0}\n".format(dir())
        raise MolnsUtilException(notes)
Exemplo n.º 8
0
    def __init__(self, model_class=None, parameters=None, qsub=False, client=None, num_engines=None, storage_mode=None,
                 pickled_cluster_input_file=None, log_filename=None):
        """ Constructor """

        self.my_class_name = 'DistributedEnsemble'
        self.log = Log(log_filename=log_filename)

        if model_class is None and pickled_cluster_input_file is None:
            self.log.write_log("Invalid configuration. Either provide a model class object or its pickled file.",
                               logging.ERROR)
            raise MolnsUtilException("Invalid configuration. Either provide a model class object or its pickled file.")

        if model_class is not None and pickled_cluster_input_file is not None:
            self.log.write_log("Invalid configuration. Both a model class and a pickled file are provided.",
                               logging.ERROR)
            raise MolnsUtilException("Invalid configuration. Both a model class and a pickled file are provided.")

        if model_class is not None:
            self.cluster_execution = False
            self.model_class = cloudpickle.dumps(model_class)
        else:
            self.cluster_execution = True
            self.pickled_cluster_input_file = pickled_cluster_input_file

        # Not checking here for parameters = None, as they could be present in the model class.
        self.parameters = [parameters]
        self.number_of_trajectories = 0
        self.seed_base = generate_seed_base()
        self.storage_mode = storage_mode
        # A chunk list
        self.result_list = {}
        self.qsub = qsub
        self.num_engines = num_engines

        if self.qsub is False:
            # Set the Ipython.parallel client
            self._update_client(client)
Exemplo n.º 9
0
def copy_generated_realizations_to_job_directory(realizations_storage_directory, store_realizations_dir):
    import os
    import shutil

    if not os.access(store_realizations_dir, os.W_OK):
        raise MolnsUtilException(jsonify(logs="Cannot access provided storage directory: {0}"
                                         .format(store_realizations_dir)))

    for f in os.listdir(realizations_storage_directory):
        f_abs = os.path.join(realizations_storage_directory, f)
        if is_generated_realizations_file(f):
            shutil.copy(f_abs, store_realizations_dir)
            os.remove(f_abs)

    if len(os.listdir(realizations_storage_directory)) == 0:
        os.rmdir(realizations_storage_directory)

    return store_realizations_dir
Exemplo n.º 10
0
    def _ipython_generate_and_store_realisations(self, num_pchunks, pparams, param_set_ids, seed_list, pchunks,
                                                 divid=None, progress_bar=False):
        if self.storage_mode is None:
            raise MolnsUtilException("Storage mode is None. Cannot store realizations; aborting.")

        results = self.lv.map_async(run_ensemble, [self.model_class] * num_pchunks, pparams, param_set_ids, seed_list,
                                    pchunks, [self.storage_mode] * num_pchunks)

        # We process the results as they arrive.
        for i, ret in enumerate(results):
            r = ret['filenames']
            param_set_id = ret['param_set_id']
            if param_set_id not in self.result_list:
                self.result_list[param_set_id] = []
            self.result_list[param_set_id].extend(r)
            if divid is not None and progress_bar is not False:
                update_progressbar(divid, i, len(results))

        return {'wall_time': results.wall_time, 'serial_time': results.serial_time}
Exemplo n.º 11
0
 def histogram_density(self, g=None, number_of_trajectories=None):
     """ Estimate the probability density function of g(X) based on number_of_trajectories realizations
         in the ensemble. """
     raise MolnsUtilException('TODO')
Exemplo n.º 12
0
 def moment(self, g=None, order=1, number_of_trajectories=None):
     """ Compute the moment of order 'order' of g(X), using number_of_trajectories
         realizations in the ensemble. """
     raise MolnsUtilException('TODO')
Exemplo n.º 13
0
    def run(self, **kwargs):
        """ Main entry point """

        self.log.verbose = False
        divid = None

        if self.cluster_execution is False:
            if not kwargs.get('reducer', False) or kwargs.get('reducer') is None:
                reducer = builtin_reducer_default
            else:
                reducer = kwargs['reducer']

            if not kwargs.get('verbose', False):
                self.log.verbose = False
            else:
                self.log.verbose = kwargs['verbose']

            if not kwargs.get('progress_bar', False):
                pass
            elif kwargs['progress_bar'] is True:
                divid = display_progressbar()

            if not kwargs.get('cache_results'):
                cache_results = False
            else:
                cache_results = kwargs['cache_results']

        if not kwargs.get('number_of_trajectories', False) and self.number_of_trajectories == 0:
            raise MolnsUtilException("number_of_trajectories is zero")

        number_of_trajectories = kwargs['number_of_trajectories']
        if number_of_trajectories is None:
            raise MolnsUtilException("Number of trajectories provided is None.")

        if not kwargs.get('store_realizations', False):
            store_realizations = False
        else:
            store_realizations = True

        if not kwargs.get('chunk_size', False):
            chunk_size = self._determine_chunk_size(self.number_of_trajectories)
        else:
            chunk_size = kwargs['chunk_size']

        if not kwargs.get('store_realizations_dir', False):
            store_realizations_dir = None
        else:
            store_realizations_dir = kwargs['store_realizations_dir']

        if store_realizations:
            generated_realizations = None
            # Run simulations
            if self.number_of_trajectories < number_of_trajectories:
                generated_realizations = self.add_realizations(number_of_trajectories - self.number_of_trajectories,
                                                               chunk_size=chunk_size)

            if self.qsub is False:
                mapped_results = self._ipython_map_aggregate_stored_realizations(mapper=kwargs['mapper'], divid=divid,
                                                                                 aggregator=kwargs['aggregator'],
                                                                                 cache_results=cache_results,
                                                                                 chunk_size=chunk_size)
            else:
                import json
                realizations_storage_directory = json.loads(generated_realizations)['realizations_directory']
                if store_realizations_dir is not None:
                    # Copy realizations from temporary working directory to job directory.
                    realizations_storage_directory = utils.copy_generated_realizations_to_job_directory(
                        realizations_storage_directory=realizations_storage_directory,
                        store_realizations_dir=store_realizations_dir)

                if self.cluster_execution is False:
                    mapped_results = self.qsub_map_aggregate_stored_realizations(mapper=kwargs['mapper'],
                                                                                 aggregator=kwargs['aggregator'],
                                                                                 chunk_size=chunk_size,
                                                                                 realizations_storage_directory=
                                                                                 realizations_storage_directory)
                else:
                    mapped_results = self.qsub_map_aggregate_stored_realizations(pickled_cluster_input_file=
                                                                                 self.pickled_cluster_input_file,
                                                                                 chunk_size=chunk_size,
                                                                                 realizations_storage_directory=
                                                                                 realizations_storage_directory)
        else:
            if self.qsub is False:
                mapped_results = self._ipython_run_ensemble_map_aggregate(mapper=kwargs['mapper'],
                                                                          aggregator=kwargs['aggregator'],
                                                                          chunk_size=chunk_size,
                                                                          number_of_trajectories=number_of_trajectories,
                                                                          divid=divid)

            else:
                if self.cluster_execution is False:
                    mapped_results = self._qsub_run_ensemble_map_aggregate(mapper=kwargs['mapper'], divid=divid,
                                                                           number_of_trajectories=
                                                                           number_of_trajectories,
                                                                           chunk_size=chunk_size,
                                                                           aggregator=kwargs['aggregator'])
                else:
                    mapped_results = self._qsub_run_ensemble_map_aggregate(
                        pickled_cluster_input_file=self.pickled_cluster_input_file,
                        number_of_trajectories=number_of_trajectories, chunk_size=chunk_size)

        self.log.write_log("Running reducer on mapped and aggregated results (size={0})".format(len(mapped_results)))

        # Run reducer
        if self.cluster_execution is False:
            return self.run_reducer(reducer=reducer, mapped_results=mapped_results)
        else:
            return self.run_reducer(mapped_results=mapped_results,
                                    pickled_cluster_input_file=self.pickled_cluster_input_file)
Exemplo n.º 14
0
    def _qsub_generate_and_store_realizations(self, pparams, param_set_ids, seed_list, pchunks, divid=None,
                                              progress_bar=False):
        counter = 0
        random_string = str(uuid.uuid4())

        base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "realizations_" + random_string)

        job_name_prefix = "ps_job_" + random_string[:8] + "_"
        dirs = []
        containers = []
        job_param_ids = {}

        if not os.path.exists(base_dir):
            os.makedirs(base_dir)

        if self.storage_mode is not constants.local_storage:
            self.log.write_log("Storage mode must be local while using qsub.", logging.ERROR)
            raise MolnsUtilException("Storage mode must be local while using qsub.")

        for pndx, pset, seed, pchunk in zip(param_set_ids, pparams, seed_list, pchunks):
            if self.cluster_execution is True:
                unpickled_list = dict(pchunk=pchunk, seed=seed,
                                      pickled_cluster_input_file=self.pickled_cluster_input_file,
                                      pset=pset, pndx=pndx, storage_mode=constants.local_storage)
            else:
                unpickled_list = dict(pchunk=pchunk, seed=seed,
                                      model_class=self.model_class, pset=pset, pndx=pndx,
                                      storage_mode=constants.local_storage)

            job_name = job_name_prefix + str(counter)

            # create temp directory for this job.
            temp_job_directory = os.path.join(base_dir, job_name + "/")
            if not os.path.exists(temp_job_directory):
                os.makedirs(temp_job_directory)

            self._submit_qsub_job(constants.run_ensemble_job_file, job_name, unpickled_list, containers, dirs,
                                  temp_job_directory)

            job_param_ids[pndx] = temp_job_directory
            counter += 1

        keep_dirs = self._wait_for_all_results_to_return(wait_for_dirs=dirs, divid=divid, progress_bar=progress_bar)

        remove_dirs = [directory for directory in dirs if directory not in keep_dirs]
        for i, directory in enumerate(remove_dirs):
            unpickled_result = get_unpickled_result(directory)
            r = unpickled_result['filenames']
            param_set_id = unpickled_result['param_set_id']
            if param_set_id not in self.result_list:
                self.result_list[param_set_id] = []
            self.result_list[param_set_id].extend(r)

        self.log.write_log("Cleaning up. Job directory: {0}".format(base_dir))

        # Arrange for generated files to be available in a known location - base_dir.
        DistributedEnsemble.__post_process_generated_ensemble(remove_dirs, base_dir)

        # Delete job containers and directories. Preserve base_dir as it contains computed realizations.
        clean_up(dirs_to_delete=remove_dirs, containers_to_delete=containers)

        return jsonify(realizations_directory=base_dir, result_list=self.result_list)
Exemplo n.º 15
0
    def _wait_for_all_results_to_return(self, wait_for_dirs, progress_bar=False, divid=None):
        """ Wait for all jobs to complete. Return list of directories whose corresponding jobs finished
        unsuccessfully."""

        import time
        timer_start = time.time()
        dirs = wait_for_dirs[:]
        completed_jobs = 0
        successful_jobs = 0
        keep_dirs = []
        total_jobs = len(dirs)

        self.log.write_log("Awaiting all results. Job directories:\n{0}".format(wait_for_dirs))

        while len(dirs) > 0:
            for directory in dirs:
                output_file = os.path.join(directory, constants.job_output_file_name)
                completed_file = os.path.join(directory, constants.job_complete_file_name)
                error_file_map_aggregate = os.path.join(directory, constants.job_error_file_name)
                error_file_run_ensemble = os.path.join(directory, constants.job_run_ensemble_error_file_name)
                error_file_reducer = os.path.join(directory, constants.job_reducer_error_file_name)

                if os.path.exists(output_file):
                    dirs.remove(directory)
                    successful_jobs += 1
                    completed_jobs += 1

                if os.path.exists(error_file_map_aggregate):
                    with open(error_file_map_aggregate, 'r') as ef:
                        error_msg = ef.read()
                    self.log.write_log(error_msg, logging.ERROR)
                    raise MolnsUtilException(jsonify(completed_jobs=completed_jobs,
                                                     successful_jobs=successful_jobs,
                                                     total_jobs=total_jobs,
                                                     failed_job_working_directory=directory,
                                                     logs=error_msg, job_directories=wait_for_dirs))

                elif os.path.exists(error_file_reducer):
                    with open(error_file_reducer, 'r') as ef:
                        error_msg = ef.read()
                    self.log.write_log(error_msg, logging.ERROR)
                    raise MolnsUtilException(jsonify(completed_jobs=completed_jobs,
                                                     successful_jobs=successful_jobs,
                                                     total_jobs=total_jobs,
                                                     failed_job_working_directory=directory,
                                                     logs=error_msg, job_directories=wait_for_dirs))

                elif os.path.exists(error_file_run_ensemble):
                    with open(error_file_run_ensemble, 'r') as ef:
                        error_msg = ef.read()
                    self.log.write_log(error_msg, logging.ERROR)
                    raise MolnsUtilException(jsonify(completed_jobs=completed_jobs,
                                                     successful_jobs=successful_jobs,
                                                     total_jobs=total_jobs,
                                                     failed_job_working_directory=directory,
                                                     logs=error_msg, job_directories=wait_for_dirs))

                elif os.path.exists(completed_file):
                    if os.path.exists(output_file):  # There could be a race condition here.
                        continue
                    keep_dirs.append(directory)
                    dirs.remove(directory)
                    completed_jobs += 1

                if divid is not None and progress_bar is True:
                    update_progressbar(divid, completed_jobs, total_jobs)
            time.sleep(1)
            timer_current = time.time()
            if timer_current - timer_start > constants.MaxJobTimeInSeconds:
                self.log.write_log("Job timed out. Time out period is {0} seconds."
                                   .format(constants.MaxJobTimeInSeconds), logging.ERROR)

                raise MolnsUtilException(jsonify(completed_jobs=completed_jobs,
                                                 successful_jobs=successful_jobs,
                                                 total_jobs=total_jobs,
                                                 logs="Job timed out.", job_directories=wait_for_dirs))

        if completed_jobs > successful_jobs:
            self.log.write_log("Jobs did not complete successfully. Their working directories will not be deleted.",
                               logging.ERROR)

        return keep_dirs
Exemplo n.º 16
0
    def qsub_map_aggregate_stored_realizations(self, **kwargs):
        realizations_storage_directory = kwargs['realizations_storage_directory']
        self.result_list = kwargs.get("result_list", self.result_list)
        number_of_trajectories = self.number_of_trajectories if self.number_of_trajectories is not 0 \
            else len(self.result_list)
        chunk_size = kwargs.get('chunk_size', self._determine_chunk_size(number_of_trajectories))

        if self.parameters is None:
            raise MolnsUtilException("self.parameters is None. I don't know how to proceed.")

        self.log.write_log("Running mapper & aggregator on the result objects (number of results={0}, chunk size={1})"
                           .format(number_of_trajectories * len(self.parameters), chunk_size))

        counter = 0
        random_string = str(uuid.uuid4())
        if not os.path.isdir(realizations_storage_directory):
            self.log.write_log("Directory {0} does not exist.".format(realizations_storage_directory), logging.ERROR)
            raise MolnsUtilException("Directory {0} does not exist.".format(realizations_storage_directory))

        base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp_" + random_string)
        job_name_prefix = "ps_job_" + random_string[:8] + "_"
        dirs = []
        containers = []

        # chunks per parameter TODO is number_of_trajectories correct here?
        self.log.write_log("Number of trajectories: {0}".format(number_of_trajectories))
        num_chunks = int(math.ceil(number_of_trajectories / float(chunk_size)))
        chunks = [chunk_size] * (num_chunks - 1)
        chunks.append(number_of_trajectories - chunk_size * (num_chunks - 1))
        # total chunks
        pparams = []
        param_set_ids = []
        presult_list = []
        self._set_pparams_paramsetids_presultlist(num_chunks, pparams, param_set_ids, presult_list, chunk_size)

        for result, pndx in zip(presult_list, param_set_ids):
            # create temp directory for this job.
            job_name = job_name_prefix + str(counter)
            temp_job_directory = os.path.join(base_dir, job_name + "/")
            if not os.path.exists(temp_job_directory):
                os.makedirs(temp_job_directory)

            # copy pre-computed realizations to working directory.
            import shutil
            for i, filename in enumerate(result):
                shutil.copyfile(os.path.join(realizations_storage_directory, filename),
                                os.path.join(temp_job_directory, filename))

            if self.cluster_execution is False:
                unpickled_list = dict(result=result, pndx=pndx, mapper=kwargs['mapper'],
                                      aggregator=kwargs['aggregator'], cache_results=False)
            else:
                unpickled_list = dict(result=result, pndx=pndx, cache_results=False,
                                      pickled_cluster_input_file=kwargs['pickled_cluster_input_file'])

            self._submit_qsub_job(constants.map_and_aggregate_job_file, job_name, unpickled_list, containers, dirs,
                                  temp_job_directory)

            counter += 1

        keep_dirs = self._wait_for_all_results_to_return(wait_for_dirs=dirs, divid=kwargs.get('divid', False))

        remove_dirs = [directory for directory in dirs if directory not in keep_dirs]
        mapped_results = {}
        self._set_qsub_mapped_results(remove_dirs, mapped_results)

        self.log.write_log("Cleaning up job directory {0}".format(base_dir))

        # remove temporary files and finished containers. Keep all files that record errors.
        dirs_to_delete = remove_dirs
        if len(keep_dirs) == 0:
            dirs_to_delete = [base_dir]

        clean_up(dirs_to_delete=dirs_to_delete, containers_to_delete=containers)

        return mapped_results