Пример #1
0
def getBayesModel(G, p, mixPrior=None):
    """
    Constructs a PWM CSI BayesMixtureModel.
    
    @param G: number of components
    @param p: number of positions of the binding site
    @return: BayesMixtureModel object
    """

    if not mixPrior:
        piPrior = mixture.DirichletPrior(G, [1.0] * G)
        compPrior = []
        for i in range(p):
            compPrior.append(
                mixture.DirichletPrior(4, [1.02, 1.02, 1.02, 1.02]))

        # arbitrary values of struct and comp parameters. Values should be
        # reset by user using the structPriorHeuristic method.
        mixPrior = mixture.MixtureModelPrior(0.05, 0.05, piPrior, compPrior)

    DNA = mixture.Alphabet(['A', 'C', 'G', 'T'])
    comps = []
    for i in range(G):
        dlist = []
        for j in range(p):
            phi = mixture.random_vector(4)
            dlist.append(mixture.DiscreteDistribution(4, phi, DNA))
        comps.append(mixture.ProductDistribution(dlist))
    pi = mixture.random_vector(G)
    m = mixture.BayesMixtureModel(G, pi, comps, mixPrior, struct=1)
    return m
Пример #2
0
def getRandomCSIMixture_conditionalDists(G,
                                         p,
                                         KL_lower,
                                         KL_upper,
                                         M=8,
                                         dtypes='discgauss',
                                         seed=None,
                                         fullstruct=False,
                                         disc_sampling_dist=None):

    #    if seed:
    #        random.seed(seed)
    #        mixture._C_mixextend.set_gsl_rng_seed(seed)
    #        #print '*** seed=',seed
    #
    #    else: # XXX debug
    #        seed = random.randint(1,9999999)
    #        mixture._C_mixextend.set_gsl_rng_seed(seed)
    #        random.seed(seed)
    #        #print '*** seed=',seed

    if disc_sampling_dist == None:
        discSamp = mixture.DirichletPrior(M, [1.0] * M)  # uniform sampling
    else:
        discSamp = disc_sampling_dist

    min_sigma = 0.3  # minimal std for Normal
    max_sigma = 5.0  # maximal std for Normal
    min_mu = -25.0  # minimal mean
    max_mu = 25.0  # maximal mean

    assert dtypes in ['disc', 'gauss', 'discgauss']

    if dtypes == 'disc':
        featureTypes = [0] * p
    elif dtypes == 'gauss':
        featureTypes = [1] * p
    elif dtypes == 'discgauss':
        # discrete or Normal features for now, chosen uniformly
        # 0 discrete, 1 Normal
        featureTypes = [random.choice((0, 1)) for i in range(p)]
    else:
        raise TypeError

    #print featureTypes

    # generate random CSI structures

    if G < 15:
        P = setPartitions.generate_all_partitions(
            G)  # XXX too slow for large G
    #print P

    C = []

    leaders = []
    groups = []
    for j in range(p):
        c_j = {}

        leaders_j = []
        groups_j = {}

        if fullstruct == True:
            struct_j = [(i, ) for i in range(G)]

        elif G < 15:
            struct_j = random.choice(P)
        else:
            print 'WARNING: improper structure sampling !'
            struct_j = setPartitions.get_random_partition(G)

        #print '\nstruct',j,struct_j

        for i, grp in enumerate(struct_j):

            lg = list(grp)

            #print lg

            lgj = lg.pop(0)

            #print lgj

            leaders_j.append(lgj)
            groups_j[lgj] = lg

            max_tries = 100000
            tries = 0

            if featureTypes[j] == 0:
                acc = 0

                while acc == 0:
                    cand = discSamp.sample()

                    #print 'Cand:', cand

                    acc = 1
                    for d in c_j:
                        KL_dist = mixture.sym_kl_dist(c_j[d], cand)

                        #print c_j[d],cand, KL_dist

                        if KL_dist > KL_upper or KL_dist < KL_lower:
                            acc = 0
                            tries += 1
                            break

                    if tries >= max_tries:
                        raise RuntimeError, 'Failed to find separated parameters !'

                for cind in grp:
                    c_j[cind] = cand

            elif featureTypes[j] == 1:
                acc = 0
                while acc == 0:
                    mu = random.uniform(min_mu, max_mu)
                    sigma = random.uniform(min_sigma, max_sigma)
                    cand = mixture.NormalDistribution(mu, sigma)
                    acc = 1

                    for d in c_j:
                        KL_dist = mixture.sym_kl_dist(c_j[d], cand)
                        if KL_dist > KL_upper or KL_dist < KL_lower:
                            acc = 0
                            tries += 1
                            break

                    if tries >= max_tries:
                        raise RuntimeError

                #    print '.',
                #print

                for cind in grp:
                    c_j[cind] = cand

            else:
                RuntimeError

        leaders.append(leaders_j)
        groups.append(groups_j)

        C.append(c_j)

    comps = []
    for i in range(G):
        comps.append(mixture.ProductDistribution([C[j][i] for j in range(p)]))

    pi = get_random_pi(G, 0.3 / G)
    #print '** pi =',pi

    # create prior
    piprior = mixture.DirichletPrior(G, [2.0] * G)

    cprior = []
    for j in range(p):
        if featureTypes[j] == 0:
            cprior.append(mixture.DirichletPrior(M, [1.02] * M))

        elif featureTypes[j] == 1:
            cprior.append(mixture.NormalGammaPrior(
                0, 0, 0, 0))  # dummy parameters, to be set later

        else:
            RuntimeError

    mprior = mixture.MixtureModelPrior(0.1, 0.1, piprior, cprior)

    m = mixture.BayesMixtureModel(G, pi, comps, mprior, struct=1)
    m.leaders = leaders
    m.groups = groups

    m.identifiable()
    m.updateFreeParams()
    #print m

    return m
Пример #3
0
    2, [1.0, 1.0])  # uniform prior of mixture coefficients

# conjugate priors over the atomar distributions - Normal-Gamma for Normal distribution, Dirichlet for the discrete distribution
compPrior = [
    mixture.NormalGammaPrior(1.5, 0.1, 3.0, 1.0),
    mixture.NormalGammaPrior(-2.0, 0.1, 3.0, 1.0),
    mixture.DirichletPrior(4, [1.0, 1.0, 1.0, 1.0]),
    mixture.DirichletPrior(4, [1.0, 1.0, 1.0, 1.0])
]

# putting together the mixture prior
prior = mixture.MixtureModelPrior(0.03, 0.03, piPr, compPrior)

# intializing Bayesian mixture model
pi = [0.4, 0.6]
m = mixture.BayesMixtureModel(2, pi, [c1, c2], prior, struct=1)
print "Initial parameters"
print m
# Now that the model is complete we can start using it.

# sampling data
data = m.sampleDataSet(600)

# randomize model parameters
m.modelInitialization(data)
print "Randomized parameters"
print m

# parameter training
m.mapEM(data, 40, 0.1)
Пример #4
0
    mixture.DirichletPrior(4, [3.1, 1.2, 1.1, 1.0])
]
dmixPrior = mixture.DirichletMixturePrior(2, 4, [0.5, 0.5], dPrior)

# assembling the model prior
compPrior = [
    mixture.NormalGammaPrior(1.5, 0.1, 3.0, 1.0),
    mixture.NormalGammaPrior(-2.0, 0.1, 3.0, 1.0), dmixPrior, dmixPrior
]

# putting together the prior for the whole mixture
prior = mixture.MixtureModelPrior(0.03, 0.03, piPr, compPrior)

# intializing Bayesian mixture model
pi = [0.4, 0.6]
m = mixture.BayesMixtureModel(2, pi, [c1, c2], prior)
print "Initial parameters"
print m
# Now that the model is complete we can start using it.

# sampling data
data = m.sampleDataSet(600)

# randomize model parameters
m.modelInitialization(data)
print "Randomized parameters"
print m

# parameter training
m.mapEM(data, 40, 0.1)
Пример #5
0
    compPrior.append( mixture.NormalGammaDistribution( 1.0,2.0,3.0,4.0 ) )

mixPrior = mixture.MixturePrior(0.7,0.7,piPrior, compPrior)

DNA = mixture.Alphabet(['A','C','G','T'])
comps = []
for i in range(G):
    dlist = []
    for j in range(2):
       phi = mixture.random_vector(4)
       dlist.append( mixture.DiscreteDistribution(4,phi,DNA))
    for j in range(2):
       mu = j+1.0
       sigma = j+0.5
       dlist.append( mixture.NormalDistribution(mu,sigma))


    comps.append(mixture.ProductDistribution(dlist))
pi = mixture.random_vector(G)

m = mixture.BayesMixtureModel(G,pi, comps, mixPrior, struct = 1)

mixture.writeMixture(m, 'test.bmix')


m2 = mixture.readMixture('test.bmix')


print m2
print m2.prior
Пример #6
0
sp2.setParams(data.getInternalFeature(1), 5)
sp3 = mixture.NormalGammaPrior(1.0, 1.0, 1.0, 1.0)
sp3.setParams(data.getInternalFeature(2), 5)

sp4 = mixture.DirichletPrior(4, [1.02] * 4)
pipr = mixture.DirichletPrior(5, [1.0] * 5)

# the hyperparameter alpha is chosen based on the heuristic below
delta = 0.1
structPrior = 1.0 / (1.0 + delta)**data.N

# creating the model prior
prior = mixture.MixtureModelPrior(structPrior, 0.03, pipr,
                                  [sp1, sp2, sp3, sp4])

# creating the model
tm = mixture.BayesMixtureModel(5,
                               tpi, [tc1, tc2, tc3, tc4, tc5],
                               prior,
                               struct=1)

# call to the learning algorithm
tm.bayesStructureEM(data, 1, 5, 40, 0.1)

# printing out the result of the training. The model should have three components and
# parameters closely matching the generating model.
print "---------------------"
print tm
print tm.leaders
print tm.groups