Пример #1
0
class _algorithm(object):
    """
    Implements an algorithm.

    Input
    ----------
    spot_setup: class
        model: function 
            Should be callable with a parameter combination of the parameter-function 
            and return an list of simulation results (as long as evaluation list)
        parameter: function
            When called, it should return a random parameter combination. Which can 
            be e.g. uniform or Gaussian
        objectivefunction: function 
            Should return the objectivefunction for a given list of a model simulation and 
            observation.
        evaluation: function
            Should return the true values as return by the model.

    dbname: str
        Name of the database where parameter, objectivefunction value and simulation 
        results will be saved.
    dbformat: str
         ram: fast suited for short sampling time. no file will be created and results are saved in an array.
        csv: A csv file will be created, which you can import afterwards.        
    parallel: str
        seq: Sequentiel sampling (default): Normal iterations on one core of your cpu.
        mpc: Multi processing: Iterations on all available cores on your (single) pc
        mpi: Message Passing Interface: Parallel computing on high performance computing clusters, py4mpi needs to be installed
    save_threshold: float or list
        Compares the given value/list of values with return value/list of values from spot_setup.objectivefunction.
        If the objectivefunction value is higher, the results are saved in the database. If not they are ignored (saves storage).
    db_precision:np.float type
        set np.float16, np.float32 or np.float64 for rounding of floats in the output database
        Default is np.float16
    sim_timeout: float, int or None, default: None
        the defined model given in the spot_setup class can be controlled to break after 'sim_timeout' seconds if
        sim_timeout is not None.
        If the model run has been broken simlply '[nan]' will be returned.
    random_state: int or None, default: None
        the algorithms uses the number in random_state as seed for numpy. This way stochastic processes can be reproduced.
    """

    _unaccepted_parameter_types = (parameter.List, )

    def __init__(self,
                 spot_setup,
                 dbname=None,
                 dbformat=None,
                 dbinit=True,
                 dbappend=False,
                 parallel='seq',
                 save_sim=True,
                 breakpoint=None,
                 backup_every_rep=100,
                 save_threshold=-np.inf,
                 db_precision=np.float16,
                 sim_timeout=None,
                 random_state=None,
                 optimization_direction='grid',
                 algorithm_name=''):

        # Initialize the user defined setup class
        self.setup = spot_setup
        param_info = parameter.get_parameters_array(
            self.setup,
            unaccepted_parameter_types=self._unaccepted_parameter_types)
        self.all_params = param_info['random']
        self.constant_positions = parameter.get_constant_indices(spot_setup)
        if self.constant_positions:
            self.non_constant_positions = []
            for i, val in enumerate(self.all_params):
                if self.all_params[i] not in self.constant_positions:
                    self.non_constant_positions.append(i)
        else:
            self.non_constant_positions = np.arange(0, len(self.all_params))
        self.parameter = self.get_parameters
        self.parnames = param_info['name']
        self.algorithm_name = algorithm_name
        # Create a type to hold the parameter values using a namedtuple
        self.partype = parameter.ParameterSet(param_info)

        self.evaluation = self.setup.evaluation()
        self.save_sim = save_sim
        self.optimization_direction = optimization_direction
        self.dbname = dbname or 'customDb'
        self.dbformat = dbformat or 'ram'
        self.db_precision = db_precision
        self.breakpoint = breakpoint
        self.backup_every_rep = backup_every_rep
        # Two parameters to control the data base handling
        # 'dbinit' triggers the initial creation of the data base file
        # 'dbappend' used to append to the existing data base, after restart
        self.dbinit = dbinit
        self.dbappend = dbappend

        # Set the random state
        if random_state is None:  #ToDo: Have to discuss if these 3 lines are neccessary.
            random_state = np.random.randint(low=0, high=2**30)
        np.random.seed(random_state)

        # If value is not None a timeout will set so that the simulation will break after sim_timeout seconds without return a value
        self.sim_timeout = sim_timeout
        self.save_threshold = save_threshold

        if breakpoint == 'read' or breakpoint == 'readandwrite':
            print('Reading backupfile')
            try:
                open(self.dbname + '.break')
            except FileNotFoundError:
                print('Backupfile not found')
            self.dbappend = True

        # Now a repeater (ForEach-object) is loaded
        # A repeater is a convinent wrapper to repeat tasks
        # We have the same interface for sequential and for parallel tasks
        if parallel == 'seq':
            from spotpy.parallel.sequential import ForEach
        elif parallel == 'mpi':
            from spotpy.parallel.mpi import ForEach

        # MPC is based on pathos mutiprocessing and uses ordered map, so results are given back in the order
        # as the parameters are
        elif parallel == 'mpc':
            from spotpy.parallel.mproc import ForEach

        # UMPC is based on pathos mutiprocessing and uses unordered map, so results are given back in the order
        # as the subprocesses are finished which may speed up the whole simulation process but is not recommended if
        # objective functions do their calculation based on the order of the data because the order of the result is chaotic
        # and randomized
        elif parallel == 'umpc':
            from spotpy.parallel.umproc import ForEach
        else:
            raise ValueError(
                "'%s' is not a valid keyword for parallel processing" %
                parallel)

        # This is the repeater for the model runs. The simulate method does the work
        # If you need different tasks, the repeater can be pushed into a "phase" using the
        # setphase function. The simulate method can check the current phase and dispatch work
        # to other functions. This is introduced for sceua to differentiate between burn in and
        # the normal work on the chains
        self.repeat = ForEach(self.simulate)

        # method "save" needs to know whether objective function result is list or float, default is float
        self.like_struct_typ = type(1.1)

    def __str__(self):
        return '{type}({mtype}())->{dbname}'.format(type=type(self).__name__,
                                                    mtype=type(
                                                        self.setup).__name__,
                                                    dbname=self.dbname)

    def __repr__(self):
        return '{type}()'.format(type=type(self).__name__)

    def get_parameters(self):
        """
        Returns the parameter array from the setup
        """
        pars = parameter.get_parameters_array(self.setup)
        return pars[self.non_constant_positions]

    def set_repetiton(self, repetitions):
        self.status = _RunStatistic(repetitions, self.algorithm_name,
                                    self.optimization_direction, self.parnames)
        # In MPI, this command will do nothing on the master process
        # but the worker processes are going to wait for jobs.
        # Hence the workers will only receive parameters for the
        # simulate function, new calculation phases and the termination
        self.repeat.start()

    def final_call(self):
        self.repeat.terminate()
        try:
            self.datawriter.finalize()
        except AttributeError:  # Happens if no database was assigned
            pass
        self.status.print_status_final()

    def _init_database(self, like, randompar, simulations):
        if self.dbinit:
            print('Initialize database...')

            self.datawriter = database.get_datawriter(
                self.dbformat,
                self.dbname,
                self.parnames,
                like,
                randompar,
                simulations,
                save_sim=self.save_sim,
                dbappend=self.dbappend,
                dbinit=self.dbinit,
                db_precision=self.db_precision,
                setup=self.setup)

            self.dbinit = False

    def __is_list_type(self, data):
        if type(data) == type:
            return data == list or data == type(np.array([]))
        else:
            return type(data) == list or type(data) == type(np.array([]))

    def save(self, like, randompar, simulations, chains=1):
        # Initialize the database if no run was performed so far
        self._init_database(like, randompar, simulations)
        # Test if like and the save threshold are float/list and compare accordingly
        if self.__is_list_type(like) and self.__is_list_type(
                self.save_threshold):
            if all(i > j for i, j in zip(
                    like, self.save_threshold)):  #Compares list/list
                self.datawriter.save(like,
                                     randompar,
                                     simulations,
                                     chains=chains)
        if (not self.__is_list_type(like)) and (not self.__is_list_type(
                self.save_threshold)):
            if like > self.save_threshold:  #Compares float/float
                self.datawriter.save(like,
                                     randompar,
                                     simulations,
                                     chains=chains)
        if self.__is_list_type(like) and (not self.__is_list_type(
                self.save_threshold)):
            if like[0] > self.save_threshold:  #Compares list/float
                self.datawriter.save(like,
                                     randompar,
                                     simulations,
                                     chains=chains)
        if (not self.__is_list_type(like)) and self.__is_list_type(
                self.save_threshold):  #Compares float/list
            if (like > self.save_threshold).all:
                self.datawriter.save(like,
                                     randompar,
                                     simulations,
                                     chains=chains)

    def read_breakdata(self, dbname):
        ''' Read data from a pickle file if a breakpoint is set.
            Reason: In case of incomplete optimizations, old data can be restored. '''
        import pickle
        with open(dbname + '.break', 'rb') as breakfile:
            work, backuptime, repos, obmin, obmax, pmin, pmax = pickle.load(
                breakfile)
            self.status.starttime = self.status.starttime - backuptime
            self.status.rep = repos
            self.status.objectivefunction_min = obmin
            self.status.objectivefunction_max = obmax
            self.status.params_min = pmin
            self.status.params_max = pmax
            return work

    def write_breakdata(self, dbname, work):
        ''' Write data to a pickle file if a breakpoint has been set.'''
        import pickle
        work = (work, self.status.last_print - self.status.starttime,
                self.status.rep, self.status.objectivefunction_min,
                self.status.objectivefunction_max, self.status.params_min,
                self.status.params_max)
        with open(str(dbname) + '.break', 'wb') as breakfile:
            pickle.dump(work, breakfile)

    def getdata(self):
        return self.datawriter.getdata()

    def update_params(self, params):
        #Add potential Constant parameters
        self.all_params[self.non_constant_positions] = params
        return self.all_params

    def postprocessing(self,
                       rep,
                       params,
                       simulation,
                       chains=1,
                       save_run=True,
                       negativlike=False,
                       block_print=False):  # TODO: rep not necessaray

        params = self.update_params(params)
        if negativlike is True:
            like = -self.getfitness(simulation=simulation, params=params)
        else:
            like = self.getfitness(simulation=simulation, params=params)

        # Save everything in the database, if save is True
        # This is needed as some algorithms just want to know the fitness,
        # before they actually save the run in a database (e.g. sce-ua)

        self.status(like, params, block_print=block_print)

        if save_run is True and simulation is not None:
            self.save(like, params, simulations=simulation, chains=chains)
        if type(like) == type([]):
            return like[0]
        else:
            return like

    def getfitness(self, simulation, params):
        """
        Calls the user defined spot_setup objectivefunction
        """
        try:
            #print('Using parameters in fitness function')
            return self.setup.objectivefunction(evaluation=self.evaluation,
                                                simulation=simulation,
                                                params=(params, self.parnames))

        except TypeError:  # Happens if the user does not allow to pass parameter in the spot_setup.objectivefunction
            #print('Not using parameters in fitness function')
            return self.setup.objectivefunction(evaluation=self.evaluation,
                                                simulation=simulation)

    def simulate(self, id_params_tuple):
        """This is a simple wrapper of the model, returning the result together with
        the run id and the parameters. This is needed, because some parallel things
        can mix up the ordering of runs
        """
        id, params = id_params_tuple
        self.all_params[
            self.
            non_constant_positions] = params  #TODO: List parameters are not updated if not accepted for the algorithm, we may have to warn/error if list is given
        all_params = self.all_params

        if self.sim_timeout:
            # we need a layer to fetch returned data from a threaded process into a queue.
            def model_layer(q, all_params):
                # Call self.model with a namedtuple instead of another sequence
                q.put(self.setup.simulation(self.partype(*all_params)))

            # starting a queue, where in python2.7 this is a multiprocessing class and can cause errors because of
            # incompability which the main thread. Therefore only for older Python version a workaround follows
            que = Queue()

            sim_thread = threading.Thread(target=model_layer,
                                          args=(que, all_params))
            sim_thread.daemon = True
            sim_thread.start()

            # If self.sim_timeout is not None the self.model will break after self.sim_timeout seconds otherwise is runs as
            # long it needs to run
            sim_thread.join(self.sim_timeout)

            # If no result from the thread is given, i.e. the thread was killed from the watcher the default result is
            # '[nan]' and will not be saved. Otherwise get the result from the thread
            model_result = None
            if not que.empty():
                model_result = que.get()

        else:
            model_result = self.setup.simulation(self.partype(*all_params))

        return id, params, model_result
Пример #2
0
class _algorithm(object):
    """
    Implements an algorithm.

    Input
    ----------
    spot_setup: class
        model: function 
            Should be callable with a parameter combination of the parameter-function 
            and return an list of simulation results (as long as evaluation list)
        parameter: function
            When called, it should return a random parameter combination. Which can 
            be e.g. uniform or Gaussian
        objectivefunction: function 
            Should return the objectivefunction for a given list of a model simulation and 
            observation.
        evaluation: function
            Should return the true values as return by the model.

    dbname: str
        Name of the database where parameter, objectivefunction value and simulation 
        results will be saved.
    dbformat: str
         ram: fast suited for short sampling time. no file will be created and results are saved in an array.
        csv: A csv file will be created, which you can import afterwards.        
    parallel: str
        seq: Sequentiel sampling (default): Normal iterations on one core of your cpu.
        mpc: Multi processing: Iterations on all available cores on your (single) pc
        mpi: Message Passing Interface: Parallel computing on high performance computing clusters, py4mpi needs to be installed

    alt_objfun: str or None, default: 'rmse'
        alternative objectivefunction to be used for algorithm
        * None: the objfun defined in spot_setup.objectivefunction is used
        * any str: if str is found in spotpy.objectivefunctions, 
            this objectivefunction is used, else falls back to None 
            e.g.: 'log_p', 'rmse', 'bias', 'kge' etc.

    """

    def __init__(self, spot_setup, dbname=None, dbformat=None, dbinit=True,
                 parallel='seq', save_sim=True, alt_objfun=None, breakpoint=None, backup_every_rep=100):
        # Initialize the user defined setup class
        self.setup = spot_setup
        self.model = self.setup.simulation
        self.parameter = self.setup.parameters
        self.parnames = self.parameter()['name']
        # use alt_objfun if alt_objfun is defined in objectivefunctions,
        # else self.setup.objectivefunction
        self.objectivefunction = getattr(
            objectivefunctions, alt_objfun or '', None) or self.setup.objectivefunction
        self.evaluation = self.setup.evaluation()
        self.save_sim = save_sim
        self.dbname = dbname
        self.dbformat = dbformat
        self.breakpoint = breakpoint
        self.backup_every_rep = backup_every_rep
        self.dbinit = dbinit
        
        if breakpoint == 'read' or breakpoint == 'readandwrite':
            print('Reading backupfile')
            self.dbinit = False
            self.breakdata = self.read_breakdata(self.dbname)
        #self.initialize_database()

        # Now a repeater (ForEach-object) is loaded
        # A repeater is a convinent wrapper to repeat tasks
        # We have the same interface for sequential and for parallel tasks
        if parallel == 'seq':
            from spotpy.parallel.sequential import ForEach
        elif parallel == 'mpi':
            from spotpy.parallel.mpi import ForEach
        elif parallel == 'mpc':
            print('Multiprocessing is in still testing phase and may result in errors')
            from spotpy.parallel.mproc import ForEach
            #raise NotImplementedError(
            #    'Sorry, mpc is not available by now. Please use seq or mpi')
        else:
            raise ValueError(
                "'%s' is not a valid keyword for parallel processing" % parallel)

        # This is the repeater for the model runs. The simulate method does the work
        # If you need different tasks, the repeater can be pushed into a "phase" using the
        # setphase function. The simulate method can check the current phase and dispatch work
        # to other functions. This is introduced for sceua to differentiate between burn in and
        # the normal work on the chains
        self.repeat = ForEach(self.simulate)

        # In MPI, this command will do nothing on the master process
        # but the worker processes are going to wait for jobs.
        # Hence the workers will only receive parameters for the
        # simulate function, new calculation phases and the termination
        self.repeat.start()
        self.status = _RunStatistic()

    def set_repetiton(self, repetitions):
        self.status.repetitions = repetitions
        
    def final_call(self):
        self.repeat.terminate()
        try:
            self.datawriter.finalize()
        except AttributeError:  # Happens if no database was assigned
            pass
        print('End of sampling')
        text = 'Best run at %i of %i (best like=%g) with parameter set:' % (
            self.status.bestrep, self.status.repetitions, self.status.objectivefunction)
        print(text)
        print(self.status.params)
        text = 'Duration:' + str(round((time.time() - self.status.starttime), 2)) + ' s'
        print(text)
    
    def save(self, like, randompar, simulations, chains=1):
        # Initialize the database if no run was performed so far
        if self.dbformat and self.status.rep == 0:
            print('Initialize database...')
            writerclass = getattr(database, self.dbformat)
            
            self.datawriter = writerclass(
                self.dbname, self.parnames, like, randompar, simulations, save_sim=self.save_sim, 
                dbinit=self.dbinit)
        else:
            self.datawriter.save(like, randompar, simulations, chains=chains)

    def read_breakdata(self, dbname):
        ''' Read data from a pickle file if a breakpoint is set.
            Reason: In case of incomplete optimizations, old data can be restored. 
        '''
        import pickle
        #import pprint
        with open(dbname+'.break', 'rb') as csvfile:
            return pickle.load(csvfile)
#            pprint.pprint(work)
#            pprint.pprint(r)
#            pprint.pprint(icall)
#            pprint.pprint(gnrg)
            # icall = 1000 #TODO:Just for testing purpose

    def write_breakdata(self, dbname, work):
        ''' Write data to a pickle file if a breakpoint has been set.
        '''
        import pickle
        with open(str(dbname)+'.break', 'wb') as csvfile:
            pickle.dump(work, csvfile)

    def getdata(self):
        if self.dbformat == 'ram':
            return self.datawriter.data
        if self.dbformat == 'csv':
            return np.genfromtxt(self.dbname + '.csv', delimiter=',', names=True)[1:]
        if self.dbformat == 'sql':
            return self.datawriter.getdata
        if self.dbformat == 'noData':
            return self.datawriter.getdata

    def postprocessing(self, rep, randompar, simulation, chains=1, save=True, negativlike=False):
        like = self.getfitness(simulation=simulation, params=randompar)
        # Save everything in the database, if save is True
        # This is needed as some algorithms just want to know the fitness,
        # before they actually save the run in a database (e.g. sce-ua)
        if save is True:
            if negativlike is True:
                self.save(-like, randompar, simulations=simulation, chains=chains)              
                self.status(rep, -like, randompar)
            else:
                self.save(like, randompar, simulations=simulation, chains=chains)
                self.status(rep, like, randompar)
        if type(like)==type([]):
            return like[0]
        else:        
            return like
    
    
    def getfitness(self, simulation, params):
        """
        Calls the user defined spot_setup objectivefunction
        """
        try:
            #print('Using parameters in fitness function')
            return self.objectivefunction(evaluation=self.evaluation, simulation=simulation, params = (params,self.parnames))

        except TypeError: # Happens if the user does not allow to pass parameter in the spot_setup.objectivefunction
            #print('Not using parameters in fitness function')            
            return self.objectivefunction(evaluation=self.evaluation, simulation=simulation)
    
    def simulate(self, id_params_tuple):
        """This is a simple wrapper of the model, returning the result together with
        the run id and the parameters. This is needed, because some parallel things
        can mix up the ordering of runs
        """
        id, params = id_params_tuple
        return id, params, self.model(params)
Пример #3
0
class _algorithm(object):
    """
    Implements an algorithm.

    Input
    ----------
    spot_setup: class
        model: function 
            Should be callable with a parameter combination of the parameter-function 
            and return an list of simulation results (as long as evaluation list)
        parameter: function
            When called, it should return a random parameter combination. Which can 
            be e.g. uniform or Gaussian
        objectivefunction: function 
            Should return the objectivefunction for a given list of a model simulation and 
            observation.
        evaluation: function
            Should return the true values as return by the model.

    dbname: str
        Name of the database where parameter, objectivefunction value and simulation 
        results will be saved.
    dbformat: str
         ram: fast suited for short sampling time. no file will be created and results are saved in an array.
        csv: A csv file will be created, which you can import afterwards.        
    parallel: str
        seq: Sequentiel sampling (default): Normal iterations on one core of your cpu.
        mpc: Multi processing: Iterations on all available cores on your (single) pc
        mpi: Message Passing Interface: Parallel computing on high performance computing clusters, py4mpi needs to be installed
    save_thresholde: float or list
        Compares the given value/list of values with return value/list of values from spot_setup.objectivefunction.
        If the objectivefunction value is higher, the results are saved in the database. If not they are ignored (saves storage).
    db_precision:np.float type
        set np.float16, np.float32 or np.float64 for rounding of floats in the output database
        Default is np.float16
    alt_objfun: str or None, default: 'rmse'
        alternative objectivefunction to be used for algorithm
        * None: the objfun defined in spot_setup.objectivefunction is used
        * any str: if str is found in spotpy.objectivefunctions, 
            this objectivefunction is used, else falls back to None 
            e.g.: 'log_p', 'rmse', 'bias', 'kge' etc.

    """
    def __init__(self,
                 spot_setup,
                 dbname=None,
                 dbformat=None,
                 dbinit=True,
                 parallel='seq',
                 save_sim=True,
                 alt_objfun=None,
                 breakpoint=None,
                 backup_every_rep=100,
                 save_threshold=-np.inf,
                 db_precision=np.float16):
        # Initialize the user defined setup class
        self.setup = spot_setup
        self.model = self.setup.simulation
        # Philipp: Changed from Tobi's version, now we are using both new class defined parameters
        # as well as the parameters function. The new method get_parameters
        # can deal with a missing parameters function
        #
        # For me (Philipp) it is totally unclear why all the samplers should call this function
        # again and again instead of
        # TODO: just storing a definite list of parameter objects here
        self.parameter = self.get_parameters
        self.parnames = self.parameter()['name']

        # Create a type to hold the parameter values using a namedtuple
        self.partype = parameter.get_namedtuple_from_paramnames(
            self.setup, self.parnames)

        # use alt_objfun if alt_objfun is defined in objectivefunctions,
        # else self.setup.objectivefunction
        self.objectivefunction = getattr(objectivefunctions, alt_objfun or '',
                                         None) or self.setup.objectivefunction
        self.evaluation = self.setup.evaluation()
        self.save_sim = save_sim
        self.dbname = dbname
        self.dbformat = dbformat
        self.db_precision = db_precision
        self.breakpoint = breakpoint
        self.backup_every_rep = backup_every_rep
        self.dbinit = dbinit

        self.save_threshold = save_threshold

        if breakpoint == 'read' or breakpoint == 'readandwrite':
            print('Reading backupfile')
            self.dbinit = False
            self.breakdata = self.read_breakdata(self.dbname)

        # Now a repeater (ForEach-object) is loaded
        # A repeater is a convinent wrapper to repeat tasks
        # We have the same interface for sequential and for parallel tasks
        if parallel == 'seq':
            from spotpy.parallel.sequential import ForEach
        elif parallel == 'mpi':
            from spotpy.parallel.mpi import ForEach
        elif parallel == 'mpc':
            print(
                'Multiprocessing is in still testing phase and may result in errors'
            )
            from spotpy.parallel.mproc import ForEach
        else:
            raise ValueError(
                "'%s' is not a valid keyword for parallel processing" %
                parallel)

        # This is the repeater for the model runs. The simulate method does the work
        # If you need different tasks, the repeater can be pushed into a "phase" using the
        # setphase function. The simulate method can check the current phase and dispatch work
        # to other functions. This is introduced for sceua to differentiate between burn in and
        # the normal work on the chains
        self.repeat = ForEach(self.simulate)

        # In MPI, this command will do nothing on the master process
        # but the worker processes are going to wait for jobs.
        # Hence the workers will only receive parameters for the
        # simulate function, new calculation phases and the termination
        self.repeat.start()
        self.status = _RunStatistic()

    def __str__(self):
        return '{type}({mtype}(), dbname={dbname}'.format(
            type=type(self).__name__,
            mtype=type(self.setup).__name__,
            dbname=self.dbname)

    def get_parameters(self):
        """
        Returns the parameter array from the setup
        """
        return parameter.get_parameters_array(self.setup)

    def set_repetiton(self, repetitions):
        self.status.repetitions = repetitions

    def final_call(self):
        self.repeat.terminate()
        try:
            self.datawriter.finalize()
        except AttributeError:  # Happens if no database was assigned
            pass
        print('End of sampling')
        text = 'Best run at %i of %i (best like=%g) with parameter set:' % (
            self.status.bestrep, self.status.repetitions,
            self.status.objectivefunction)
        print(text)
        print(self.status.params)
        text = 'Duration:' + str(
            round((time.time() - self.status.starttime), 2)) + ' s'
        print(text)

    def _init_database(self, like, randompar, simulations, chains=1):
        if self.dbinit == True:
            print('Initialize database...')
            writerclass = getattr(database, self.dbformat)

            self.datawriter = writerclass(self.dbname,
                                          self.parnames,
                                          like,
                                          randompar,
                                          simulations,
                                          save_sim=self.save_sim,
                                          dbinit=self.dbinit,
                                          db_precision=self.db_precision)
            self.dbinit = False

    def save(self, like, randompar, simulations, chains=1):

        #try if like is a list of values compare it with save threshold setting
        try:
            if all(i > j for i, j in zip(
                    like, self.save_threshold)):  #Compares list/list
                # Initialize the database if no run was performed so far
                self._init_database(like, randompar, simulations, chains=1)
                self.datawriter.save(like,
                                     randompar,
                                     simulations,
                                     chains=chains)
        #If like value is not a iterable, it is assumed to be a float
        except TypeError:  # This is also used if not threshold was set
            try:
                if like > self.save_threshold:  #Compares float/float
                    # Initialize the database if no run was performed so far
                    self._init_database(like, randompar, simulations, chains=1)
                    self.datawriter.save(like,
                                         randompar,
                                         simulations,
                                         chains=chains)
            except TypeError:  # float/list would result in an error, because it does not make sense
                if like[0] > self.save_threshold:  #Compares list/float
                    # Initialize the database if no run was performed so far
                    self._init_database(like, randompar, simulations, chains=1)
                    self.datawriter.save(like,
                                         randompar,
                                         simulations,
                                         chains=chains)

    def read_breakdata(self, dbname):
        ''' Read data from a pickle file if a breakpoint is set.
            Reason: In case of incomplete optimizations, old data can be restored. '''
        import pickle
        with open(dbname + '.break', 'rb') as breakfile:
            return pickle.load(breakfile)

    def write_breakdata(self, dbname, work):
        ''' Write data to a pickle file if a breakpoint has been set.'''
        import pickle
        with open(str(dbname) + '.break', 'wb') as breakfile:
            pickle.dump(work, breakfile)

    def getdata(self):
        if self.dbformat == 'ram':
            return self.datawriter.data
        if self.dbformat == 'csv':
            return np.genfromtxt(self.dbname + '.csv',
                                 delimiter=',',
                                 names=True)  #[1:]
        if self.dbformat == 'sql':
            return self.datawriter.getdata
        if self.dbformat == 'noData':
            return self.datawriter.getdata

    def postprocessing(self,
                       rep,
                       randompar,
                       simulation,
                       chains=1,
                       save=True,
                       negativlike=False):
        like = self.getfitness(simulation=simulation, params=randompar)
        # Save everything in the database, if save is True
        # This is needed as some algorithms just want to know the fitness,
        # before they actually save the run in a database (e.g. sce-ua)
        if save is True:
            if negativlike is True:
                self.save(-like,
                          randompar,
                          simulations=simulation,
                          chains=chains)
                self.status(rep, -like, randompar)
            else:
                self.save(like,
                          randompar,
                          simulations=simulation,
                          chains=chains)
                self.status(rep, like, randompar)
        if type(like) == type([]):
            return like[0]
        else:
            return like

    def getfitness(self, simulation, params):
        """
        Calls the user defined spot_setup objectivefunction
        """
        try:
            #print('Using parameters in fitness function')
            return self.objectivefunction(evaluation=self.evaluation,
                                          simulation=simulation,
                                          params=(params, self.parnames))

        except TypeError:  # Happens if the user does not allow to pass parameter in the spot_setup.objectivefunction
            #print('Not using parameters in fitness function')
            return self.objectivefunction(evaluation=self.evaluation,
                                          simulation=simulation)

    def simulate(self, id_params_tuple):
        """This is a simple wrapper of the model, returning the result together with
        the run id and the parameters. This is needed, because some parallel things
        can mix up the ordering of runs
        """
        id, params = id_params_tuple
        # Call self.model with a namedtuple instead of another sequence
        return id, params, self.model(self.partype(*params))
Пример #4
0
class _algorithm(object):
    """
    Implements an algorithm.

    Input
    ----------
    spot_setup: class
        model: function 
            Should be callable with a parameter combination of the parameter-function 
            and return an list of simulation results (as long as evaluation list)
        parameter: function
            When called, it should return a random parameter combination. Which can 
            be e.g. uniform or Gaussian
        objectivefunction: function 
            Should return the objectivefunction for a given list of a model simulation and 
            observation.
        evaluation: function
            Should return the true values as return by the model.

    dbname: str
        Name of the database where parameter, objectivefunction value and simulation 
        results will be saved.
    dbformat: str
         ram: fast suited for short sampling time. no file will be created and results are saved in an array.
        csv: A csv file will be created, which you can import afterwards.        
    parallel: str
        seq: Sequentiel sampling (default): Normal iterations on one core of your cpu.
        mpc: Multi processing: Iterations on all available cores on your (single) pc
        mpi: Message Passing Interface: Parallel computing on high performance computing clusters, py4mpi needs to be installed

    alt_objfun: str or None, default: 'rmse'
        alternative objectivefunction to be used for algorithm
        * None: the objfun defined in spot_setup.objectivefunction is used
        * any str: if str is found in spotpy.objectivefunctions, 
            this objectivefunction is used, else falls back to None 
            e.g.: 'log_p', 'rmse', 'bias', 'kge' etc.

    """
    def __init__(self,
                 spot_setup,
                 dbname=None,
                 dbformat=None,
                 dbinit=True,
                 parallel='seq',
                 save_sim=True,
                 alt_objfun=None,
                 breakpoint=None,
                 backup_every_rep=100):
        # Initialize the user defined setup class
        self.setup = spot_setup
        self.model = self.setup.simulation
        self.parameter = self.setup.parameters
        self.parnames = self.parameter()['name']
        # use alt_objfun if alt_objfun is defined in objectivefunctions,
        # else self.setup.objectivefunction
        self.objectivefunction = getattr(objectivefunctions, alt_objfun or '',
                                         None) or self.setup.objectivefunction
        self.evaluation = self.setup.evaluation()
        self.save_sim = save_sim
        self.dbname = dbname
        self.dbformat = dbformat
        self.breakpoint = breakpoint
        self.backup_every_rep = backup_every_rep
        self.dbinit = dbinit

        if breakpoint == 'read' or breakpoint == 'readandwrite':
            print('Reading backupfile')
            self.dbinit = False
            self.breakdata = self.read_breakdata(self.dbname)
        #self.initialize_database()

        # Now a repeater (ForEach-object) is loaded
        # A repeater is a convinent wrapper to repeat tasks
        # We have the same interface for sequential and for parallel tasks
        if parallel == 'seq':
            from spotpy.parallel.sequential import ForEach
        elif parallel == 'mpi':
            from spotpy.parallel.mpi import ForEach
        elif parallel == 'mpc':
            print(
                'Multiprocessing is in still testing phase and may result in errors'
            )
            from spotpy.parallel.mproc import ForEach
            #raise NotImplementedError(
            #    'Sorry, mpc is not available by now. Please use seq or mpi')
        else:
            raise ValueError(
                "'%s' is not a valid keyword for parallel processing" %
                parallel)

        # This is the repeater for the model runs. The simulate method does the work
        # If you need different tasks, the repeater can be pushed into a "phase" using the
        # setphase function. The simulate method can check the current phase and dispatch work
        # to other functions. This is introduced for sceua to differentiate between burn in and
        # the normal work on the chains
        self.repeat = ForEach(self.simulate)

        # In MPI, this command will do nothing on the master process
        # but the worker processes are going to wait for jobs.
        # Hence the workers will only receive parameters for the
        # simulate function, new calculation phases and the termination
        self.repeat.start()
        self.status = _RunStatistic()

    def set_repetiton(self, repetitions):
        self.status.repetitions = repetitions

    def final_call(self):
        self.repeat.terminate()
        try:
            self.datawriter.finalize()
        except AttributeError:  # Happens if no database was assigned
            pass
        print('End of sampling')
        text = 'Best run at %i of %i (best like=%g) with parameter set:' % (
            self.status.bestrep, self.status.repetitions,
            self.status.objectivefunction)
        print(text)
        print(self.status.params)
        text = 'Duration:' + str(
            round((time.time() - self.status.starttime), 2)) + ' s'
        print(text)

    def save(self, like, randompar, simulations, chains=1):
        # Initialize the database if no run was performed so far
        if self.dbformat and self.status.rep == 0:
            print('Initialize database...')
            writerclass = getattr(database, self.dbformat)

            self.datawriter = writerclass(self.dbname,
                                          self.parnames,
                                          like,
                                          randompar,
                                          simulations,
                                          save_sim=self.save_sim,
                                          dbinit=self.dbinit)
        else:
            self.datawriter.save(like, randompar, simulations, chains=chains)

    def read_breakdata(self, dbname):
        ''' Read data from a pickle file if a breakpoint is set.
            Reason: In case of incomplete optimizations, old data can be restored. 
        '''
        import pickle
        #import pprint
        with open(dbname + '.break', 'rb') as csvfile:
            return pickle.load(csvfile)
#            pprint.pprint(work)
#            pprint.pprint(r)
#            pprint.pprint(icall)
#            pprint.pprint(gnrg)
# icall = 1000 #TODO:Just for testing purpose

    def write_breakdata(self, dbname, work):
        ''' Write data to a pickle file if a breakpoint has been set.
        '''
        import pickle
        with open(str(dbname) + '.break', 'wb') as csvfile:
            pickle.dump(work, csvfile)

    def getdata(self):
        if self.dbformat == 'ram':
            return self.datawriter.data
        if self.dbformat == 'csv':
            return np.genfromtxt(self.dbname + '.csv',
                                 delimiter=',',
                                 names=True)[1:]
        if self.dbformat == 'sql':
            return self.datawriter.getdata
        if self.dbformat == 'noData':
            return self.datawriter.getdata

    def postprocessing(self,
                       rep,
                       randompar,
                       simulation,
                       chains=1,
                       save=True,
                       negativlike=False):
        like = self.getfitness(simulation=simulation, params=randompar)
        # Save everything in the database, if save is True
        # This is needed as some algorithms just want to know the fitness,
        # before they actually save the run in a database (e.g. sce-ua)
        if save is True:
            if negativlike is True:
                self.save(-like,
                          randompar,
                          simulations=simulation,
                          chains=chains)
                self.status(rep, -like, randompar)
            else:
                self.save(like,
                          randompar,
                          simulations=simulation,
                          chains=chains)
                self.status(rep, like, randompar)
        if type(like) == type([]):
            return like[0]
        else:
            return like

    def getfitness(self, simulation, params):
        """
        Calls the user defined spot_setup objectivefunction
        """
        try:
            #print('Using parameters in fitness function')
            return self.objectivefunction(evaluation=self.evaluation,
                                          simulation=simulation,
                                          params=(params, self.parnames))

        except TypeError:  # Happens if the user does not allow to pass parameter in the spot_setup.objectivefunction
            #print('Not using parameters in fitness function')
            return self.objectivefunction(evaluation=self.evaluation,
                                          simulation=simulation)

    def simulate(self, id_params_tuple):
        """This is a simple wrapper of the model, returning the result together with
        the run id and the parameters. This is needed, because some parallel things
        can mix up the ordering of runs
        """
        id, params = id_params_tuple
        return id, params, self.model(params)