Exemplo n.º 1
0
    def __init__(self,
                 cluster_density,
                 alpha=None,
                 alpha_shape=None,
                 alpha_rate=None):
        self.base_measure = PyCloneBaseMeasure()

        self.partition_sampler = AuxillaryParameterPartitionSampler(
            self.base_measure, cluster_density)

        self.atom_sampler = BaseMeasureAtomSampler(self.base_measure,
                                                   cluster_density)

        if alpha is None:
            self.alpha = 1

            self.update_alpha = True

            self.concentration_sampler =\
                GammaPriorConcentrationSampler(alpha_shape, alpha_rate)
        else:
            self.alpha = alpha

            self.update_alpha = False

        self.num_iters = 0
Exemplo n.º 2
0
    def __init__(self,
                 atom_sampler,
                 partition_sampler,
                 alpha=1.0,
                 alpha_priors=None,
                 global_params_sampler=None):
        self.atom_sampler = atom_sampler

        self.partition_sampler = partition_sampler

        self.alpha = alpha

        if alpha_priors is None:
            self.update_alpha = False

        else:
            self.update_alpha = True

            self.concentration_sampler = GammaPriorConcentrationSampler(
                alpha_priors['shape'], alpha_priors['rate'])

        if global_params_sampler is None:
            self.update_global_params = False

        else:
            self.update_global_params = True

            self.global_params_sampler = global_params_sampler

        self.num_iters = 0
Exemplo n.º 3
0
    def __init__(self, base_measure, cluster_density):
        self.base_measure = base_measure

        self.cluster_density = cluster_density

        self.concentration_sampler = GammaPriorConcentrationSampler(1, 1)

        posterior_density = BetaBinomialDensity()

        #        self.partition_sampler = MarginalGibbsPartitionSampler(base_measure, cluster_density, posterior_density)
        self.partition_sampler = AuxillaryParameterPartitionSampler(
            base_measure, cluster_density)

        self.atom_sampler = BetaBinomialGibbsAtomSampler(
            base_measure, cluster_density)
Exemplo n.º 4
0
Arquivo: dp.py Projeto: Q-KIM/PyClone
    def __init__(self, atom_sampler, partition_sampler, alpha=1.0, alpha_priors=None, global_params_sampler=None):
        self.atom_sampler = atom_sampler
        
        self.partition_sampler = partition_sampler
        
        self.alpha = alpha          
        
        if alpha_priors is None:
            self.update_alpha = False

        else:
            self.update_alpha = True
            
            self.concentration_sampler = GammaPriorConcentrationSampler(alpha_priors['shape'], 
                                                                        alpha_priors['rate']) 
        
        if global_params_sampler is None:
            self.update_global_params = False
        
        else:
            self.update_global_params = True
            
            self.global_params_sampler = global_params_sampler
        
        self.num_iters = 0
Exemplo n.º 5
0
class Sampler(object):
    def __init__(self, base_measure, cluster_density):
        self.base_measure = base_measure

        self.cluster_density = cluster_density

        self.concentration_sampler = GammaPriorConcentrationSampler(1, 1)

        posterior_density = BetaBinomialDensity()

        #        self.partition_sampler = MarginalGibbsPartitionSampler(base_measure, cluster_density, posterior_density)
        self.partition_sampler = AuxillaryParameterPartitionSampler(
            base_measure, cluster_density)

        self.atom_sampler = BetaBinomialGibbsAtomSampler(
            base_measure, cluster_density)


#        self.atom_sampler = BaseMeasureAtomSampler(base_measure, cluster_density)

    def sample(self, alpha, partition, data):
        alpha = self.concentration_sampler.sample(alpha,
                                                  partition.number_of_cells,
                                                  partition.number_of_items)

        self.partition_sampler.sample(data, partition, alpha)

        self.atom_sampler.sample(data, partition)

        return alpha, partition
Exemplo n.º 6
0
Arquivo: dp.py Projeto: Q-KIM/PyClone
class DirichletProcessSampler(object):
    def __init__(self, atom_sampler, partition_sampler, alpha=1.0, alpha_priors=None, global_params_sampler=None):
        self.atom_sampler = atom_sampler
        
        self.partition_sampler = partition_sampler
        
        self.alpha = alpha          
        
        if alpha_priors is None:
            self.update_alpha = False

        else:
            self.update_alpha = True
            
            self.concentration_sampler = GammaPriorConcentrationSampler(alpha_priors['shape'], 
                                                                        alpha_priors['rate']) 
        
        if global_params_sampler is None:
            self.update_global_params = False
        
        else:
            self.update_global_params = True
            
            self.global_params_sampler = global_params_sampler
        
        self.num_iters = 0
    
    @property
    def state(self):
        return {
                'alpha' : self.alpha,
                'labels' : self.partition.labels,
                'params' : [param for param in self.partition.item_values],
                'global_params' : self.atom_sampler.cluster_density.params
                }
    
    def initialise_partition(self, data, init_method):
        '''
        Args:
            data : (list) Data points.
            
        Kwargs:
            method : (str) Initialisation method to use. 
                           - 'separate' will allocate each data point to a separate partition.
                           - 'together' will allocate all data points to the same partition.
        '''
        
        self.partition = Partition()
        
        if init_method == 'separate':
            for item, _ in enumerate(data):
                self.partition.add_cell(self.partition_sampler.base_measure.random())
                
                self.partition.add_item(item, item)
        
        elif init_method == 'together':
            self.partition.add_cell(self.partition_sampler.base_measure.random())
            
            for item, _ in enumerate(data):                
                self.partition.add_item(item, 0)
                 
    
    def sample(self, data, trace, num_iters, init_method='separate', print_freq=100):
        self.initialise_partition(data, init_method)
        
        for i in range(num_iters):
            if i % print_freq == 0:
                print self.num_iters, self.partition.number_of_cells, self.alpha 
                
                if self.update_global_params:
                    params = self.atom_sampler.cluster_density.params
                    
                    if isinstance(params, OrderedDict):
                        print ','.join([str(x[0]) for x in self.atom_sampler.cluster_density.params.values()])
                    
                    elif isinstance(params, tuple):
                        print params[0]
                    
                    else:
                        raise Exception('Object type {0} is not a valid cluster parameter'.format(type(params)))
            
            self.interactive_sample(data)
            
            trace.update(self.state)
            
            self.num_iters += 1
    
    def interactive_sample(self, data):
        if self.update_alpha:
            self.alpha = self.concentration_sampler.sample(self.alpha,
                                                           self.partition.number_of_cells,
                                                           self.partition.number_of_items)
        
        self.partition_sampler.sample(data, self.partition, self.alpha)
        
        self.atom_sampler.sample(data, self.partition)
        
        if self.update_global_params:
            self.global_params_sampler.sample(data, self.partition)
Exemplo n.º 7
0
class DirichletProcessSampler(object):
    def __init__(self,
                 cluster_density,
                 alpha=None,
                 alpha_shape=None,
                 alpha_rate=None):
        self.base_measure = PyCloneBaseMeasure()

        self.partition_sampler = AuxillaryParameterPartitionSampler(
            self.base_measure, cluster_density)

        self.atom_sampler = BaseMeasureAtomSampler(self.base_measure,
                                                   cluster_density)

        if alpha is None:
            self.alpha = 1

            self.update_alpha = True

            self.concentration_sampler =\
                GammaPriorConcentrationSampler(alpha_shape, alpha_rate)
        else:
            self.alpha = alpha

            self.update_alpha = False

        self.num_iters = 0

    @property
    def state(self):
        return {
            'alpha':
            self.alpha,
            'cellular_frequencies':
            [param.phi for param in self.partition.item_values],
            'labels':
            self.partition.labels,
            'phi': [param.phi for param in self.partition.cell_values]
        }

    def initialise_partition(self, data):
        self.partition = Partition()

        for item, _ in enumerate(data):
            self.partition.add_cell(self.base_measure.random())
            self.partition.add_item(item, item)

    def sample(self, data, results_db, num_iters, print_freq=100):
        self.initialise_partition(data)

        for i in range(num_iters):
            if i % print_freq == 0:
                print self.num_iters, self.partition.number_of_cells, self.alpha

            self.interactive_sample(data)

            results_db.update_trace(self.state)

            self.num_iters += 1

    def interactive_sample(self, data):
        if self.update_alpha:
            self.alpha = self.concentration_sampler.sample(
                self.alpha, self.partition.number_of_cells,
                self.partition.number_of_items)

        self.partition_sampler.sample(data, self.partition, self.alpha)

        self.atom_sampler.sample(data, self.partition)

    def _init_partition(self, base_measure):
        self.partition = Partition()

        for item, _ in enumerate(self.data):
            self.partition.add_cell(base_measure.random())

            self.partition.add_item(item, item)
Exemplo n.º 8
0
from math import log

from pydp.samplers.concentration import GammaPriorConcentrationSampler

sampler = GammaPriorConcentrationSampler(0.01, 0.01)

x = 0

for i in range(1000000):
    x = sampler.sample(x, 1, 100)

    x /= 2

    print x

    log(x)
Exemplo n.º 9
0
    x = poisson_rvs(25)
    data.append(PoissonData(x))

alpha = 1

base_measure = GammaBaseMeasure(1, 1)

cluster_density = PoissonDensity()

partition = Partition()

for item, data_point in enumerate(data):
    partition.add_cell(base_measure.random())
    partition.add_item(item, item)

concentration_sampler = GammaPriorConcentrationSampler(1, 1)

posterior_density = NegativeBinomialDensity()
partition_sampler = MarginalGibbsPartitionSampler(base_measure,
                                                  cluster_density,
                                                  posterior_density)

#partition_sampler = AuxillaryParameterPartitionSampler(base_measure, cluster_density)

atom_sampler = GammaPoissonGibbsAtomSampler(base_measure, cluster_density)

for i in range(num_iters):
    alpha = concentration_sampler.sample(alpha, partition.number_of_cells,
                                         partition.number_of_items)

    partition_sampler.sample(data, partition, alpha)
Exemplo n.º 10
0
class DirichletProcessSampler(object):
    def __init__(self,
                 atom_sampler,
                 partition_sampler,
                 alpha=1.0,
                 alpha_priors=None,
                 global_params_sampler=None):
        self.atom_sampler = atom_sampler

        self.partition_sampler = partition_sampler

        self.alpha = alpha

        if alpha_priors is None:
            self.update_alpha = False

        else:
            self.update_alpha = True

            self.concentration_sampler = GammaPriorConcentrationSampler(
                alpha_priors['shape'], alpha_priors['rate'])

        if global_params_sampler is None:
            self.update_global_params = False

        else:
            self.update_global_params = True

            self.global_params_sampler = global_params_sampler

        self.num_iters = 0

    @property
    def state(self):
        return {
            'alpha': self.alpha,
            'labels': self.partition.labels,
            'params': [param for param in self.partition.item_values],
            'global_params': self.atom_sampler.cluster_density.params
        }

    def initialise_partition(self, data, init_method):
        '''
        Args:
            data : (list) Data points.
            
        Kwargs:
            method : (str) Initialisation method to use. 
                           - 'separate' will allocate each data point to a separate partition.
                           - 'together' will allocate all data points to the same partition.
        '''

        self.partition = Partition()

        if init_method == 'separate':
            for item, _ in enumerate(data):
                self.partition.add_cell(
                    self.partition_sampler.base_measure.random())

                self.partition.add_item(item, item)

        elif init_method == 'together':
            self.partition.add_cell(
                self.partition_sampler.base_measure.random())

            for item, _ in enumerate(data):
                self.partition.add_item(item, 0)

    def sample(self,
               data,
               trace,
               num_iters,
               init_method='separate',
               print_freq=100):
        self.initialise_partition(data, init_method)

        for i in range(num_iters):
            if i % print_freq == 0:
                print self.num_iters, self.partition.number_of_cells, self.alpha

                if self.update_global_params:
                    params = self.atom_sampler.cluster_density.params

                    if isinstance(params, OrderedDict):
                        print ','.join([
                            str(x[0]) for x in
                            self.atom_sampler.cluster_density.params.values()
                        ])

                    elif isinstance(params, tuple):
                        print params[0]

                    else:
                        raise Exception(
                            'Object type {0} is not a valid cluster parameter'.
                            format(type(params)))

            self.interactive_sample(data)

            trace.update(self.state)

            self.num_iters += 1

    def interactive_sample(self, data):
        if self.update_alpha:
            self.alpha = self.concentration_sampler.sample(
                self.alpha, self.partition.number_of_cells,
                self.partition.number_of_items)

        self.partition_sampler.sample(data, self.partition, self.alpha)

        self.atom_sampler.sample(data, self.partition)

        if self.update_global_params:
            self.global_params_sampler.sample(data, self.partition)