def setup2(): # Setting up a two component mixture over four features. # Two features are Normal distributions, two discrete. # initializing atomar distributions for first component n1 = mixture.NormalDistribution(1.0,1.5) n2 = mixture.NormalDistribution(2.0,0.5) d1 = mixture.DiscreteDistribution(4,[0.1,0.4,0.4,0.1]) c1 = mixture.ProductDistribution([n1,n2]) c2 = mixture.ProductDistribution([n1,d1]) # intializing mixture pi = [0.4,0.6] m = mixture.MixtureModel(2,pi,[c1,c2]) return m
def testLymphData(): k = 5 d = 11 aux = [0]*d models = [] for i in range(k): aux1 = [0]*d aux2 = [0]*d aux3 = [0]*d models.append(mixture.ProductDistribution([mixture.DependenceTreeDistribution(d,aux1,aux2,aux3)])) pi = [1.0]*k pi = np.array(pi)/k train = mixture.MixtureModel(k,pi,models) data = mixture.DataSet() data.fromFiles(['data/ltree2_2fold.txt'],) train.modelInitialization(data) train.EM(data,100,0.01,silent=1)
def getBayesModel(G, p, mixPrior=None): """ Constructs a PWM CSI BayesMixtureModel. @param G: number of components @param p: number of positions of the binding site @return: BayesMixtureModel object """ if not mixPrior: piPrior = mixture.DirichletPrior(G, [1.0] * G) compPrior = [] for i in range(p): compPrior.append( mixture.DirichletPrior(4, [1.02, 1.02, 1.02, 1.02])) # arbitrary values of struct and comp parameters. Values should be # reset by user using the structPriorHeuristic method. mixPrior = mixture.MixtureModelPrior(0.05, 0.05, piPrior, compPrior) DNA = mixture.Alphabet(['A', 'C', 'G', 'T']) comps = [] for i in range(G): dlist = [] for j in range(p): phi = mixture.random_vector(4) dlist.append(mixture.DiscreteDistribution(4, phi, DNA)) comps.append(mixture.ProductDistribution(dlist)) pi = mixture.random_vector(G) m = mixture.BayesMixtureModel(G, pi, comps, mixPrior, struct=1) return m
def setup(): # Setting up a two component mixture over four features. # Two features are Normal distributions, two discrete. # initializing atomar distributions for first component n11 = mixture.NormalDistribution(1.0,1.5) n12 = mixture.NormalDistribution(2.0,0.5) d13 = mixture.DiscreteDistribution(4,[0.1,0.4,0.4,0.1]) d14 = mixture.DiscreteDistribution(4,[0.25,0.25,0.25,0.25]) # initializing atomar distributions for second component n21 = mixture.NormalDistribution(4.0,0.5) n22 = mixture.NormalDistribution(-6.0,0.5) d23 = mixture.DiscreteDistribution(4,[0.7,0.1,0.1,0.1]) d24 = mixture.DiscreteDistribution(4,[0.1,0.1,0.2,0.6]) # creating component distributions c1 = mixture.ProductDistribution([n11,n12,d13,d14]) c2 = mixture.ProductDistribution([n21,n22,d23,d24]) # intializing mixture pi = [0.4,0.6] m = mixture.MixtureModel(2,pi,[c1,c2]) return m
def getModel(G, p): """ Constructs a PWM MixtureModel. @param G: number of components @param p: number of positions of the binding site @return: MixtureModel object """ DNA = mixture.Alphabet(['A', 'C', 'G', 'T']) comps = [] for i in range(G): dlist = [] for j in range(p): phi = mixture.random_vector(4) dlist.append(mixture.DiscreteDistribution(4, phi, DNA)) comps.append(mixture.ProductDistribution(dlist)) pi = mixture.random_vector(G) m = mixture.MixtureModel(G, pi, comps) return m
def testdtree(): tree = {} tree[0] = -1 tree[1] = 0 tree[2] = 1 n1 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[0, 1, 0], [0, -0.1, 0.1], [0.5,0.5,0.5],tree)]) tree2 = {} tree2[0] = -1 tree2[1] = 0 tree2[2] = 0 n2 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[-1, 0, 1], [0, 0.1, -0.1], [0.5,0.5,0.5],tree2)]) pi = [0.4, 0.6] gen = mixture.MixtureModel(2,pi,[n1,n2]) random.seed(1) data = gen.sampleDataSet(1000) print data n1 = mixture.ProductDistribution([mixture.DependenceTreeDistribution(3,[0.1, 1.1, 0.1], [0, 0, 0], [1.0,1.0,1.0])]) n2 = mixture.ProductDistribution([mixture.DependenceTreeDistribution(3,[-1, 0, -0.1], [0, 0, 0], [1.0,1.0,1.0])]) n1 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[0, 1, 0], [0.0, 0.1, 0.1], [0.1,0.1,0.1],tree)]) n2 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[-1, 0, 1], [0.0, 0.1, 0.1], [0.1,0.1,0.1],tree2)]) train = mixture.MixtureModel(2,pi,[n1,n2]) train.modelInitialization(data) train.EM(data,100,0.01,silent=1)
def getBackgroundModel(p, dist=None): """ Construct background model @param p: number of positions of the binding site @param dist: background nucleotide frequencies, uniform is default @return: MixtureModel representing the background """ DNA = mixture.Alphabet(['A', 'C', 'G', 'T']) dlist = [] if dist == None: phi = [0.25] * 4 else: phi = dist for j in range(p): dlist.append(mixture.DiscreteDistribution(4, phi, DNA)) comps = [mixture.ProductDistribution(dlist)] m = mixture.MixtureModel(1, [1.0], comps) return m
[dist_spelling, missing_spelling], compFix=[0, 2]) # diagnoses for cormobidit disorders #"ODD" "CONDUCT" "SOC PHO" "SEP ANX" "SPEC PHO" "ENUR NOC" "ENUR DIU" "ENCOPRES" "TOURET" "TIC CRON" "TIC TRAN" comor = [] for j in range(COMOR): p_comor = [0.0] + mixture.random_vector(3) comor_missing = mixture.MultinomialDistribution( 1, 4, [1.0, 0.0, 0.0, 0.0], DIAG) comor_mult = mixture.MultinomialDistribution(1, 4, p_comor, DIAG) comor_mix = mixture.MixtureModel(2, [0.999, 0.001], [comor_mult, comor_missing], compFix=[0, 2]) comor.append(comor_mix) pd_comor = mixture.ProductDistribution(comor) # the drd4 VNTR are represented as a discrete distribution over the observed lengths, # the specific repeat sequence tpyes are not considered at this time p_drd4_vntr_len = [0.0] + mixture.random_vector(10) dist_drd4_vntr_len = mixture.MultinomialDistribution( 1, 11, p_drd4_vntr_len, VNTR) vntr_missing = mixture.MultinomialDistribution(1, 11, [1.0] + [0.0] * 10, VNTR) mix_drd4_vntr_len = mixture.MixtureModel( 2, [0.999, 0.001], [dist_drd4_vntr_len, vntr_missing], compFix=[0, 2]) components.append( mixture.ProductDistribution([ mix_bd, mix_voc, mix_read, mix_math, mix_spelling, pd_comor,
1237], [27, 1250], [-35, 1213], [-36, 1176], [-38, 1151], [-38, 1044], [12, 1036], [-19, 995], [-37, 962], [-23, 925], [55, 950], [23, 931], [-20, 873], [20, 855], [23, 800], [-7, 781], [-28, 741], [30, 548], [-16, 482], [9, 289], [-31, 272], [-35, 167], [-19, 155], [30, 151], [62, 149], [-35, 110], [-24, 100], [38, 102], [-30, 73], [-24, 49]]) ndistribs = 50 distribs = [] for mu in pos: #mu = np.random.random(nd) #mu = mins + mu * ranges xd = pm.NormalDistribution(mu[0], 30) yd = pm.NormalDistribution(mu[1], 30) distrib = pm.ProductDistribution([xd, yd]) distribs.append(distrib) # add some extra random ones for i in range(len(pos), ndistribs): mu = np.random.random(nd) mu = mins + mu * ranges xd = pm.NormalDistribution(mu[0], 30) yd = pm.NormalDistribution(mu[1], 30) distrib = pm.ProductDistribution([xd, yd]) distribs.append(distrib) ''' # add background noise distrib, see 2006 Bar-Hillel #datamu = data.mean(axis=0) #datasigma = data.std(axis=0) #k = 2 #xd = pm.NormalDistribution(datamu[0], k*datasigma[0])