Exemplo n.º 1
0
	def compute(cls,file_list,callback_loader=None,pool=None,index=None,assemble=np.array,**kwargs):
		
		"""
		Computes an ensemble, can spread the calculations on multiple processors using a MPI pool

		:param file_list: list of files that will constitute the ensemble; the callback_loader is called on each of the files to produce the different realizations
		:type file_list: list. 

		:param callback_loader: This function gets executed on each of the files in the list and populates the ensemble. If None provided, it performs a numpy.load on the specified files. Must return a numpy array with the loaded data
		:type callback_loader: function

		:param pool: MPI pool for multiprocessing (imported from emcee https://github.com/dfm/emcee)
		:type pool: MPI pool object

		:param index: index of the Ensemble
		:type index: pandas Index

		:param assemble: called on the list of features (one feature per file) to assemble them in an array (defaults to np.array)
		:type assemble: callable

		:param kwargs: Any additional keyword arguments to be passed to callback_loader
		:type kwargs: dict.

		>>> from lenstools import Ensemble
		>>> from lenstools.statistics import default_callback_loader

		>>> map_list = ["conv1.fit","conv2.fit","conv3.fit"]
		>>> l_edges = np.arange(200.0,50000.0,200.0)

		>>> conv_ensemble = Ensemble.compute(map_list,callback_loader=default_callback_loader,pool=pool,l_edges=l_edges)

		"""

		#Safety checks
		assert callback_loader is not None, "You must specify a callback loader function that returns a numpy array!"
		if index is not None:
			assert len(index)==len(file_list),"The number of elements in the index hould be the same as the number of files!"

		#Build a function wrapper of the callback loader, so it becomes pickleable
		_callback_wrapper = _function_wrapper(callback_loader,args=tuple(),kwargs=kwargs)

		#Execute the callback on each file in the list (spread calculations with MPI pool if it is not none)
		if pool is not None:
			M = pool.map
		else:
			M = map

		full_data = assemble(filter(lambda r:r is not None,M(_callback_wrapper,file_list)))

		#Check if user provided column labels
		if "columns" in kwargs.keys():
			columns = kwargs["columns"]
		else:
			columns = None

		#Return the created ensemble from the full_data array
		return cls(full_data,file_list,index=index,columns=columns)
Exemplo n.º 2
0
	def bootstrap(self,callback,bootstrap_size=10,resample=10,seed=None,assemble=np.array,pool=None,**kwargs):

		"""
		Computes a custom statistic on the Ensemble using the bootstrap method

		:param callback: statistic to compute on the ensemble; takes the resampled Ensemble data as an input
		:type callback: callable

		:param bootstrap_size: size of the resampled ensembles used in the bootstraping; must be less than or equal to the number of realizations in the Ensemble
		:type bootstrap_size: int.

		:param resample: number of times the Ensemble is resampled
		:type resample: int.

		:param seed: if not None, this is the random seed of the random resamples 
		:type seed: int.

		:param assemble: method that gets called on the resampled statistic list to make it into an Ensemble
		:type assemble: callable

		:param pool: MPI pool for multiprocessing (imported from emcee https://github.com/dfm/emcee)
		:type pool: MPI pool object

		:param kwargs: passed to the callback function
		:type kwargs: dict.

		:returns: the bootstraped statistic
		:rtype: assemble return type

		"""

		#Safety check
		assert bootstrap_size<=self.nobs,"The size of the resampling cannot exceed the original number of realizations"

		#Set the random seed
		if seed is not None:
			np.random.seed(seed)

		#Build a function wrapper of the callback loader, so it becomes pickleable
		_callback_wrapper = _function_wrapper(callback,args=tuple(),kwargs=kwargs)

		#MPI Pool
		if pool is None:
			M = map
		else:
			M = pool.map

		#Construct the randomization matrix
		randomizer = np.random.randint(self.nobs,size=(resample,bootstrap_size))

		#Compute the statistic with the callback
		statistic = assemble(M(_callback_wrapper,[ self.reindex(r) for r in randomizer ]))

		#Return the bootstraped statistic
		return statistic
Exemplo n.º 3
0
    def load(self, callback_loader=None, pool=None, from_old=False, **kwargs):
        """
		Loads the ensemble into memory, can spread the calculations on multiple processors using a MPI pool

		:param callback_loader: This function gets executed on each of the files in the list and populates the ensemble. If None provided, it performs a numpy.load on the specified files. Must return a numpy array with the loaded data
		:type callback_loader: function

		:param pool: MPI pool for multiprocessing (imported from emcee https://github.com/dfm/emcee)
		:type pool: MPI pool object

		:param from_old: If True, the loaded data are interpreted as an old, already existing ensemble, which means that only one file (in which the old ensemble is saved) is loaded, the first dimension of the data is 1 and hence it is discarded 
		:type from_old: bool.

		:param kwargs: Any additional keyword arguments to be passed to callback_loader
		:type kwargs: dict.

		>>> from lenstools import Ensemble
		>>> from lenstools.statistics import default_callback_loader

		>>> map_list = ["conv1.fit","conv2.fit","conv3.fit"]
		>>> l_edges = np.arange(200.0,50000.0,200.0)

		>>> conv_ensemble = Ensemble.fromfilelist(map_list)
		>>> conv_ensemble.load(callback_loader=default_callback_loader,pool=pool,l_edges=l_edges)

		"""

        if callback_loader is None:
            callback_loader = lambda f: np.load(f)

        self.pool = pool

        #Build a function wrapper of the callback loader, so it becomes pickleable
        _callback_wrapper = _function_wrapper(callback_loader,
                                              args=tuple(),
                                              kwargs=kwargs)

        #Execute the callback on each file in the list (spread calculations with MPI pool if it is not none)
        if pool is not None:
            M = pool.map
        else:
            M = map

        full_data = np.array(M(_callback_wrapper, self.file_list))

        assert type(full_data) == np.ndarray
        assert full_data.shape[0] == self.num_realizations

        if from_old:
            full_data = full_data[0]

        self.num_realizations = full_data.shape[0]
        self.data = full_data
Exemplo n.º 4
0
	def train(self,use_parameters="all",method="Rbf",**kwargs):

		"""
		Builds the interpolators for each of the feature bins using a radial basis function approach

		:param use_parameters: which parameters actually vary in the supplied parameter set (it doesn't make sense to interpolate over the constant ones)
		:type use_parameters: list. or "all"

		:param method: interpolation method; can be 'Rbf' or callable. If callable, it must take two arguments, a square distance and a square length smoothing scale
		:type method: str. or callable

		:param kwargs: keyword arguments to be passed to the interpolator constructor

		"""

		#input sanity check
		if use_parameters != "all":
			assert type(use_parameters) == list
			used_parameters = self.parameter_set[:,use_parameters]
		else:
			used_parameters = self.parameter_set

		#Compute total number of feature bins and reshape the training set accordingly
		if "_num_bins" not in self._metadata:
			self._metadata.append("_num_bins")
		self._num_bins = reduce(mul,self.feature_set.shape[1:])

		flattened_feature_set = self.feature_set.reshape((self.feature_set.shape[0],self._num_bins))

		#Build the interpolator
		if "_interpolator" not in self._metadata:
			self._metadata.append("_interpolator")

		if method=="Rbf":

			#Scipy Rbf method
			self._interpolator = list()

			for n in range(self._num_bins):
				self._interpolator.append(_interpolate_wrapper(interpolate.Rbf,args=(tuple(used_parameters.T) + (flattened_feature_set[:,n],)),kwargs=kwargs))

		else:

			#Compute pairwise square distance between points
			distances = ((used_parameters[None] - used_parameters[:,None])**2).sum(-1)
			epsilon = distances[np.triu_indices(len(distances),k=1)].mean()
			kernel = method(distances,epsilon)
			weights = np.linalg.solve(kernel,self.feature_set)

			#Wrap interpolator
			self._interpolator = _function_wrapper(_interpolate_fast,args=[],kwargs={"parameter_grid":used_parameters,"method":method,"weights":weights,"epsilon":epsilon})
Exemplo n.º 5
0
	def load(self,callback_loader=None,pool=None,from_old=False,**kwargs):
		"""
		Loads the ensemble into memory, can spread the calculations on multiple processors using a MPI pool

		:param callback_loader: This function gets executed on each of the files in the list and populates the ensemble. If None provided, it performs a numpy.load on the specified files. Must return a numpy array with the loaded data
		:type callback_loader: function

		:param pool: MPI pool for multiprocessing (imported from emcee https://github.com/dfm/emcee)
		:type pool: MPI pool object

		:param from_old: If True, the loaded data are interpreted as an old, already existing ensemble, which means that only one file (in which the old ensemble is saved) is loaded, the first dimension of the data is 1 and hence it is discarded 
		:type from_old: bool.

		:param kwargs: Any additional keyword arguments to be passed to callback_loader
		:type kwargs: dict.

		>>> from lenstools import Ensemble
		>>> from lenstools.statistics import default_callback_loader

		>>> map_list = ["conv1.fit","conv2.fit","conv3.fit"]
		>>> l_edges = np.arange(200.0,50000.0,200.0)

		>>> conv_ensemble = Ensemble.fromfilelist(map_list)
		>>> conv_ensemble.load(callback_loader=default_callback_loader,pool=pool,l_edges=l_edges)

		"""

		if callback_loader is None:
			callback_loader = _np_load

		self.pool = pool

		#Build a function wrapper of the callback loader, so it becomes pickleable
		_callback_wrapper = _function_wrapper(callback_loader,args=tuple(),kwargs=kwargs)

		#Execute the callback on each file in the list (spread calculations with MPI pool if it is not none)
		if pool is not None:
			M = pool.map
		else:
			M = map

		full_data = np.array(M(_callback_wrapper,self.file_list))
		
		assert type(full_data) == np.ndarray
		assert full_data.shape[0] == self.num_realizations 

		if from_old:
			full_data = full_data[0]

		self.num_realizations = full_data.shape[0]
		self.data = full_data
Exemplo n.º 6
0
	def train(self,use_parameters="all",method="Rbf",**kwargs):

		"""
		Builds the interpolators for each of the feature bins using a radial basis function approach

		:param use_parameters: which parameters actually vary in the supplied parameter set (it doesn't make sense to interpolate over the constant ones)
		:type use_parameters: list. or "all"

		:param method: interpolation method; can be 'Rbf' or callable. If callable, it must take two arguments, a square distance and a square length smoothing scale
		:type method: str. or callable

		:param kwargs: keyword arguments to be passed to the interpolator constructor

		"""

		#input sanity check
		if use_parameters != "all":
			assert type(use_parameters) == list
			used_parameters = self.parameter_set[:,use_parameters]
		else:
			used_parameters = self.parameter_set

		#Compute total number of feature bins and reshape the training set accordingly
		if "_num_bins" not in self._metadata:
			self._metadata.append("_num_bins")
		self._num_bins = reduce(mul,self.feature_set.shape[1:])

		flattened_feature_set = self.feature_set.reshape((self.feature_set.shape[0],self._num_bins))

		#Build the interpolator
		if "_interpolator" not in self._metadata:
			self._metadata.append("_interpolator")

		if method=="Rbf":

			#Scipy Rbf method
			self._interpolator = list()

			for n in range(self._num_bins):
				self._interpolator.append(_interpolate_wrapper(interpolate.Rbf,args=(tuple(used_parameters.T) + (flattened_feature_set[:,n],)),kwargs=kwargs))

		else:

			#Compute pairwise square distance between points
			distances = ((used_parameters[None] - used_parameters[:,None])**2).sum(-1)
			epsilon = distances[np.triu_indices(len(distances),k=1)].mean()
			kernel = method(distances,epsilon)
			weights = np.linalg.solve(kernel,self.feature_set)

			#Wrap interpolator
			self._interpolator = _function_wrapper(_interpolate_fast,args=[],kwargs={"parameter_grid":used_parameters,"method":method,"weights":weights,"epsilon":epsilon})
Exemplo n.º 7
0
    def chi2(self,
             parameters,
             observed_feature,
             features_covariance,
             split_chunks=None,
             pool=None):
        """
		Computes the chi2 part of the parameter likelihood with the usual sandwich product with the covariance matrix; the model features are computed with the interpolators

		:param parameters: new points in parameter space on which to compute the chi2 statistic
		:type parameters: (N,p) array where N is the number of points and p the number of parameters

		:param observed_feature: observed feature on which to condition the parameter likelihood
		:type observed_feature: array

		:param features_covariance: covariance matrix of the features, must be supplied
		:type features_covariance: array

		:param split_chunks: if set to an integer bigger than 0, splits the calculation of the chi2 into subsequent chunks, each that takes care of an equal number of points. Each chunk could be taken care of by a different processor
		:type split_chunks: int.

		:returns: array with the chi2 values, with the same shape of the parameters input

		"""

        #Sanity checks
        assert observed_feature is not None
        assert features_covariance is not None, "No science without the covariance matrix, you must provide one!"
        assert observed_feature.shape == self.training_set.shape[1:]
        assert features_covariance.shape == observed_feature.shape * 2

        #If you didn't do training before, train now with the default settings
        if not hasattr(self, "_interpolator"):
            self.train()

        #Reformat the parameter input into a list of chunks
        if parameters.ndim == 1:
            num_points = 1
        else:
            num_points = parameters.shape[0]

        if split_chunks is None:

            parameter_chunks = [parameters]

        elif split_chunks > 0:

            assert num_points % split_chunks == 0, "split_chunks must divide exactly the number of points!!"
            chunk_length = num_points // split_chunks
            parameter_chunks = [
                parameters[n * chunk_length:(n + 1) * chunk_length]
                for n in range(split_chunks)
            ]

        else:

            raise ValueError("split_chunks must be >0!!")

        #Compute the inverse of the covariance matrix once and for all
        covinv = inv(features_covariance)

        #Build the keyword argument dictionary to be passed to the chi2 calculator
        kwargs = {
            "num_bins": self._num_bins,
            "interpolator": self._interpolator,
            "inverse_covariance": covinv,
            "observed_feature": observed_feature
        }

        #Hack to make the chi2 pickleable (from emcee)
        chi2_wrapper = _function_wrapper(chi2, tuple(), kwargs)

        #Finally map chi2 calculator on the list of chunks
        if pool is not None:
            M = pool.map
        else:
            M = map

        chi2_list = M(chi2_wrapper, parameter_chunks)

        return np.array(chi2_list).reshape(num_points)
Exemplo n.º 8
0
	def chi2(self,parameters,observed_feature,features_covariance,split_chunks=None,pool=None):

		"""
		Computes the chi2 part of the parameter likelihood with the usual sandwich product with the covariance matrix; the model features are computed with the interpolators

		:param parameters: new points in parameter space on which to compute the chi2 statistic
		:type parameters: (N,p) array where N is the number of points and p the number of parameters

		:param observed_feature: observed feature on which to condition the parameter likelihood
		:type observed_feature: array

		:param features_covariance: covariance matrix of the features, must be supplied
		:type features_covariance: array

		:param split_chunks: if set to an integer bigger than 0, splits the calculation of the chi2 into subsequent chunks, each that takes care of an equal number of points. Each chunk could be taken care of by a different processor
		:type split_chunks: int.

		:returns: array with the chi2 values, with the same shape of the parameters input

		"""

		#Sanity checks
		assert observed_feature is not None 
		assert features_covariance is not None,"No science without the covariance matrix, you must provide one!"
		assert observed_feature.shape == self.training_set.shape[1:]
		assert features_covariance.shape == observed_feature.shape * 2

		#If you didn't do training before, train now with the default settings
		if not hasattr(self,"_interpolator"):
			self.train()

		#Reformat the parameter input into a list of chunks
		if parameters.ndim==1:
			num_points = 1
		else:
			num_points = parameters.shape[0]

		if split_chunks is None:
			
			parameter_chunks = [parameters]
		
		elif split_chunks > 0:
			
			assert num_points%split_chunks == 0,"split_chunks must divide exactly the number of points!!"
			chunk_length = num_points//split_chunks
			parameter_chunks = [ parameters[n*chunk_length:(n+1)*chunk_length] for n in range(split_chunks) ]

		else:

			raise ValueError("split_chunks must be >0!!")

		#Compute the inverse of the covariance matrix once and for all
		covinv = inv(features_covariance)

		#Build the keyword argument dictionary to be passed to the chi2 calculator
		kwargs = {"num_bins":self._num_bins,"interpolator":self._interpolator,"inverse_covariance":covinv,"observed_feature":observed_feature}

		#Hack to make the chi2 pickleable (from emcee)
		chi2_wrapper = _function_wrapper(chi2,tuple(),kwargs)

		#Finally map chi2 calculator on the list of chunks
		if pool is not None:
			M = pool.map
		else:
			M = map
		
		chi2_list = M(chi2_wrapper,parameter_chunks)

		return np.array(chi2_list).reshape(num_points)
Exemplo n.º 9
0
    def bootstrap(self,
                  callback,
                  bootstrap_size=10,
                  resample=10,
                  seed=None,
                  assemble=np.array,
                  pool=None,
                  **kwargs):
        """
		Computes a custom statistic on the Ensemble using the bootstrap method

		:param callback: statistic to compute on the ensemble; takes the resampled Ensemble data as an input
		:type callback: callable

		:param bootstrap_size: size of the resampled ensembles used in the bootstraping; must be less than or equal to the number of realizations in the Ensemble
		:type bootstrap_size: int.

		:param resample: number of times the Ensemble is resampled
		:type resample: int.

		:param seed: if not None, this is the random seed of the random resamples 
		:type seed: int.

		:param assemble: method that gets called on the resampled statistic list to make it into an Ensemble
		:type assemble: callable

		:param pool: MPI pool for multiprocessing (imported from emcee https://github.com/dfm/emcee)
		:type pool: MPI pool object

		:param kwargs: passed to the callback function
		:type kwargs: dict.

		:returns: the bootstraped statistic
		:rtype: assemble return type

		"""

        #Safety check
        assert bootstrap_size <= self.nobs, "The size of the resampling cannot exceed the original number of realizations"

        #Set the random seed
        if seed is not None:
            np.random.seed(seed)

        #Build a function wrapper of the callback loader, so it becomes pickleable
        _callback_wrapper = _function_wrapper(callback,
                                              args=tuple(),
                                              kwargs=kwargs)

        #MPI Pool
        if pool is None:
            M = map
        else:
            M = pool.map

        #Construct the randomization matrix
        randomizer = np.random.randint(self.nobs,
                                       size=(resample, bootstrap_size))

        #Compute the statistic with the callback
        statistic = assemble(
            M(_callback_wrapper, [self.reindex(r) for r in randomizer]))

        #Return the bootstraped statistic
        return statistic
Exemplo n.º 10
0
    def compute(cls,
                file_list,
                callback_loader=None,
                pool=None,
                index=None,
                assemble=np.array,
                **kwargs):
        """
		Computes an ensemble, can spread the calculations on multiple processors using a MPI pool

		:param file_list: list of files that will constitute the ensemble; the callback_loader is called on each of the files to produce the different realizations
		:type file_list: list. 

		:param callback_loader: This function gets executed on each of the files in the list and populates the ensemble. If None provided, it performs a numpy.load on the specified files. Must return a numpy array with the loaded data
		:type callback_loader: function

		:param pool: MPI pool for multiprocessing (imported from emcee https://github.com/dfm/emcee)
		:type pool: MPI pool object

		:param index: index of the Ensemble
		:type index: pandas Index

		:param assemble: called on the list of features (one feature per file) to assemble them in an array (defaults to np.array)
		:type assemble: callable

		:param kwargs: Any additional keyword arguments to be passed to callback_loader
		:type kwargs: dict.

		>>> from lenstools import Ensemble
		>>> from lenstools.statistics import default_callback_loader

		>>> map_list = ["conv1.fit","conv2.fit","conv3.fit"]
		>>> l_edges = np.arange(200.0,50000.0,200.0)

		>>> conv_ensemble = Ensemble.compute(map_list,callback_loader=default_callback_loader,pool=pool,l_edges=l_edges)

		"""

        #Safety checks
        assert callback_loader is not None, "You must specify a callback loader function that returns a numpy array!"
        if index is not None:
            assert len(index) == len(
                file_list
            ), "The number of elements in the index hould be the same as the number of files!"

        #Build a function wrapper of the callback loader, so it becomes pickleable
        _callback_wrapper = _function_wrapper(callback_loader,
                                              args=tuple(),
                                              kwargs=kwargs)

        #Execute the callback on each file in the list (spread calculations with MPI pool if it is not none)
        if pool is not None:
            M = pool.map
        else:
            M = map

        full_data = assemble(
            [r for r in M(_callback_wrapper, file_list) if r is not None])

        #Check if user provided column labels
        if "columns" in kwargs.keys():
            columns = kwargs["columns"]
        else:
            columns = None

        #Return the created ensemble from the full_data array
        return cls(full_data, file_list, index=index, columns=columns)