Пример #1
0
    def objective_at_theta(self, theta_values):
        """
        Compute the objective over a range of theta values

        Parameters
        ----------
        theta_values: DataFrame, columns=theta_names
            Values of theta used to compute the objective
            
        Returns
        -------
        obj_at_theta: DataFrame
            Objective values for each theta value (infeasible solutions are 
            omitted).
        """
        # for parallel code we need to use lists and dicts in the loop
        theta_names = theta_values.columns
        all_thetas = theta_values.to_dict('records')
        task_mgr = mpiu.ParallelTaskManager(len(all_thetas))
        local_thetas = task_mgr.global_to_local_data(all_thetas)
        
        # walk over the mesh, return objective function
        all_obj = list()
        for Theta in local_thetas:
            obj, thetvals, worststatus = self._Q_at_theta(Theta)
            if worststatus != pyo.TerminationCondition.infeasible:
                 all_obj.append(list(Theta.values()) + [obj])
            # DLW, Aug2018: should we also store the worst solver status?
            
        global_all_obj = task_mgr.allgather_global_data(all_obj)
        dfcols = list(theta_names) + ['obj']
        obj_at_theta = pd.DataFrame(data=global_all_obj, columns=dfcols)

        return obj_at_theta
Пример #2
0
    def theta_est_leaveNout(self, lNo, lNo_samples=None, seed=None, 
                            return_samples=False):
        """
        Parameter estimation where N data points are left out of each sample

        Parameters
        ----------
        lNo: int
            Number of data points to leave out for parameter estimation
        lNo_samples: int
            Number of leave-N-out samples. If lNo_samples=None, the maximum 
            number of combinations will be used
        seed: int or None, optional
            Random seed
        return_samples: bool, optional
            Return a list of sample numbers that were left out
        
        Returns
        -------
        lNo_theta: DataFrame 
            Theta values for each sample and (if return_samples = True) 
            the sample numbers left out of each estimation
        """
        assert isinstance(lNo, int)
        assert isinstance(lNo_samples, (type(None), int))
        assert isinstance(seed, (type(None), int))
        assert isinstance(return_samples, bool)
        
        samplesize = len(self._numbers_list)-lNo

        if seed is not None:
            np.random.seed(seed)
        
        global_list = self._get_sample_list(samplesize, lNo_samples, replacement=False)
            
        task_mgr = mpiu.ParallelTaskManager(len(global_list))
        local_list = task_mgr.global_to_local_data(global_list)
        
        # Reset numbers_list
        self._numbers_list =  list(range(samplesize))
        
        lNo_theta = list()
        for idx, sample in local_list:
            objval, thetavals = self.theta_est(bootlist=list(sample))
            lNo_s = list(set(range(len(self.callback_data))) - set(sample))
            thetavals['lNo'] = np.sort(lNo_s)
            lNo_theta.append(thetavals)
        
        # Reset numbers_list (back to original)
        self._numbers_list =  list(range(len(self.callback_data)))
        
        global_bootstrap_theta = task_mgr.allgather_global_data(lNo_theta)
        lNo_theta = pd.DataFrame(global_bootstrap_theta)   
        
        if not return_samples:
            del lNo_theta['lNo']
                    
        return lNo_theta
Пример #3
0
    def likelihood_ratio(self, search_ranges=None):
        """
        Compute the likelihood ratio and return the entire mesh

        Parameters
        ----------
        search_ranges: `dictionary` of lists indexed by theta.
            Mesh points (might be optional in the future)

        Returns
        -------
        SSE: `DataFrame`
            Sum of squared errors values for the entire mesh unless
            some mesh points are infeasible, which are omitted.
        """

        ####
        def mesh_generator(search_ranges):
            # return the next theta point given by search_ranges
            """ from the web:
            def product_dict(**kwargs):
                keys = kwargs.keys()
                vals = kwargs.values()
                for instance in itertools.product(*vals):
                    yield dict(zip(keys, instance))
            """
            keys = search_ranges.keys()
            vals = search_ranges.values()
            for prod in itertools.product(*vals):
                yield dict(zip(keys, prod))

        # for parallel code we need to use lists and dicts in the loop
        all_SSE = list()
        global_mesh = list()
        MeshLen = 0
        for Theta in mesh_generator(search_ranges):
            MeshLen += 1
            global_mesh.append(Theta)
        task_mgr = mpiu.ParallelTaskManager(MeshLen)
        local_mesh = task_mgr.global_to_local_data(global_mesh)

        # walk over the mesh, using the objective function to get squared error
        for Theta in local_mesh:
            SSE, thetvals, worststatus = self.Q_at_theta(Theta)
            if worststatus != pyo.TerminationCondition.infeasible:
                all_SSE.append(list(Theta.values()) + [SSE])
            # DLW, Aug2018: should we also store the worst solver status?

        global_all_SSE = task_mgr.allgather_global_data(all_SSE)
        dfcols = list(search_ranges.keys()) + ["SSE"]
        store_all_SSE = pd.DataFrame(data=global_all_SSE, columns=dfcols)

        return store_all_SSE
Пример #4
0
    def bootstrap(self, N):
        """
        Run parameter estimation using N bootstap samples

        Parameters
        ----------
        N: `int`
            Number of bootstrap samples to draw

        Returns
        -------
        bootstrap_theta_list: `DataFrame`
            Samples and theta values from the bootstrap
        """

        bootstrap_theta = list()
        samplesize = len(self.numbers_list)

        task_mgr = mpiu.ParallelTaskManager(N)
        global_bootlist = list()
        for i in range(N):
            j = unique_samples = 0
            while unique_samples <= len(self.thetalist):
                bootlist = np.random.choice(self.numbers_list,
                                            samplesize,
                                            replace=True)
                unique_samples = len(np.unique(bootlist))
                j += 1
                if j > N:  # arbitrary timeout limit
                    raise RuntimeError("Internal error: timeout in bootstrap"+\
                                    " constructing a sample; possible hint:"+\
                                    " the dim of theta may be too close to N")
            global_bootlist.append((i, bootlist))

        local_bootlist = task_mgr.global_to_local_data(global_bootlist)

        for idx, bootlist in local_bootlist:
            #print('Bootstrap Run Number: ', idx + 1, ' out of ', N)
            objval, thetavals = self.theta_est(bootlist=bootlist)
            thetavals['samples'] = bootlist
            bootstrap_theta.append(thetavals)  #, ignore_index=True)

        global_bootstrap_theta = task_mgr.allgather_global_data(
            bootstrap_theta)
        bootstrap_theta = pd.DataFrame(global_bootstrap_theta)
        #bootstrap_theta.set_index('samples', inplace=True)

        return bootstrap_theta
Пример #5
0
    def theta_est_bootstrap(self, N, samplesize=None, replacement=True, seed=None, return_samples=False):
        """
        Run parameter estimation using N bootstap samples

        Parameters
        ----------
        N: int
            Number of bootstrap samples to draw from the data
        samplesize: int or None, optional
            Sample size, if None samplesize will be set to the number of experiments
        replacement: bool, optional
            Sample with or without replacement
        seed: int or None, optional
            Set the random seed
        return_samples: bool, optional
            Return a list of experiment numbers used in each bootstrap estimation
        
        Returns
        -------
        bootstrap_theta: DataFrame 
            Theta values for each bootstrap sample and (if return_samples = True) 
            the sample numbers used in each estimation
        """
        bootstrap_theta = list()
        
        if samplesize is None:
            samplesize = len(self._numbers_list)  
        if seed is not None:
            np.random.seed(seed)
            
        task_mgr = mpiu.ParallelTaskManager(N)
        global_bootlist = list()
        for i in range(N):
            j = unique_samples = 0
            while unique_samples <= len(self.theta_names):
                bootlist = np.random.choice(self._numbers_list,
                                            samplesize,
                                            replace=replacement)
                unique_samples = len(np.unique(bootlist))
                j += 1
                if j > N: # arbitrary timeout limit
                    raise RuntimeError("Internal error: timeout in bootstrap"+\
                                    " constructing a sample; possible hint:"+\
                                    " the dim of theta may be too close to N")
            global_bootlist.append((i, bootlist))

        local_bootlist = task_mgr.global_to_local_data(global_bootlist)

        for idx, bootlist in local_bootlist:
            #print('Bootstrap Run Number: ', idx + 1, ' out of ', N)
            objval, thetavals = self.theta_est(bootlist=bootlist)
            thetavals['samples'] = bootlist
            bootstrap_theta.append(thetavals)#, ignore_index=True)
        
        global_bootstrap_theta = task_mgr.allgather_global_data(bootstrap_theta)
        bootstrap_theta = pd.DataFrame(global_bootstrap_theta)
        #bootstrap_theta.set_index('samples', inplace=True)        

        if not return_samples:
            del bootstrap_theta['samples']
                    
        return bootstrap_theta
Пример #6
0
    def theta_est_bootstrap(self, bootstrap_samples, samplesize=None, 
                            replacement=True, seed=None, return_samples=False):
        """
        Parameter estimation using bootstrap resampling of the data

        Parameters
        ----------
        bootstrap_samples: int
            Number of bootstrap samples to draw from the data
        samplesize: int or None, optional
            Size of each bootstrap sample. If samplesize=None, samplesize will be 
			set to the number of samples in the data
        replacement: bool, optional
            Sample with or without replacement
        seed: int or None, optional
            Random seed
        return_samples: bool, optional
            Return a list of sample numbers used in each bootstrap estimation
        
        Returns
        -------
        bootstrap_theta: DataFrame 
            Theta values for each sample and (if return_samples = True) 
            the sample numbers used in each estimation
        """
        assert isinstance(bootstrap_samples, int)
        assert isinstance(samplesize, (type(None), int))
        assert isinstance(replacement, bool)
        assert isinstance(seed, (type(None), int))
        assert isinstance(return_samples, bool)
        
        if samplesize is None:
            samplesize = len(self._numbers_list)  
        
        if seed is not None:
            np.random.seed(seed)
        
        global_list = self._get_sample_list(samplesize, bootstrap_samples, 
                                            replacement)

        task_mgr = mpiu.ParallelTaskManager(bootstrap_samples)
        local_list = task_mgr.global_to_local_data(global_list)

        # Reset numbers_list
        self._numbers_list =  list(range(samplesize))
        
        bootstrap_theta = list()
        for idx, sample in local_list:
            objval, thetavals = self.theta_est(bootlist=list(sample))
            thetavals['samples'] = sample
            bootstrap_theta.append(thetavals)
            
        # Reset numbers_list (back to original)
        self._numbers_list =  list(range(len(self.callback_data)))
        
        global_bootstrap_theta = task_mgr.allgather_global_data(bootstrap_theta)
        bootstrap_theta = pd.DataFrame(global_bootstrap_theta)       

        if not return_samples:
            del bootstrap_theta['samples']
            
        return bootstrap_theta