Пример #1
0
    def setUp(self):
        s_rng = self.s_rng = RandomStreams(23424)

        self.nr_states = 5
        self.nr_obs = 3
        
        self.observation_model = memoized(lambda state: s_rng.dirichlet([1]*self.nr_obs))
        self.transition_model = memoized(lambda state: s_rng.dirichlet([1]*self.nr_states))
        
        self.transition = lambda state: s_rng.multinomial(1, self.tranisition_model(state))
        self.observation = lambda state: s_rng.multinomial(1, self.observation_model(state))
        
        def transition(obs, state):
            return [self.observation(state), self.transition(state)] ,{}, until(state == numpy.asarray([0,0,0,0,1])) 
            
        [self.sampled_words, self.sampled_states], updates = scan([], [obs, state])
Пример #2
0
    def setUp(self):
        s_rng = self.s_rng = RandomStreams(23424)

        self.nr_states = 5
        self.nr_obs = 3

        self.observation_model = memoized(
            lambda state: s_rng.dirichlet([1] * self.nr_obs))
        self.transition_model = memoized(
            lambda state: s_rng.dirichlet([1] * self.nr_states))

        self.transition = lambda state: s_rng.multinomial(
            1, self.tranisition_model(state))
        self.observation = lambda state: s_rng.multinomial(
            1, self.observation_model(state))

        def transition(obs, state):
            return [self.observation(state),
                    self.transition(state)
                    ], {}, until(state == numpy.asarray([0, 0, 0, 0, 1]))

        [self.sampled_words, self.sampled_states], updates = scan([],
                                                                  [obs, state])
import theano
from theano import tensor
from rstreams import RandomStreams
import distributions
from sample import mh2_sample, mh_sample
from for_theano import memoized, evaluate

s_rng = RandomStreams(123)

nr_words = 4
nr_topics = 2
alpha = 0.8
beta = 1.

# Topic distribution per document
doc_mixture = memoized(lambda doc_id: s_rng.dirichlet([alpha/nr_topics]*nr_topics))

# Word distribution per topic
topic_mixture = memoized(lambda top_id: s_rng.dirichlet([beta/nr_words]*nr_words))

# For each word in the document, draw a topic according to multinomial with document specific prior
# TODO, see comment below: topics = memoized(lambda doc_id, nr: s_rng.multinomial(1, doc_mixture[doc_id], draw_shape=(nr,)))
topics = memoized(lambda doc_id, nr: s_rng.binomial(1, doc_mixture(doc_id)[0], draw_shape=(nr,)))

# Draw words for a specific topic
word_topic = lambda top_id: s_rng.multinomial(1, topic_mixture(top_id))

# TODO: memoized only works on the pre-compiled graph. This makes it fail in the case where we have to map 
# a vector of topics to individual multinomials with as priors the different topics. In the case of two topics
# we can hack around this by using a binomial topic distribution and using a switch statement here:
word_topic_mapper = lambda top_id: tensor.switch(top_id, word_topic(0), word_topic(1))
import theano
from theano import tensor
from rstreams import RandomStreams
import distributions
from sample import mh2_sample, mh_sample
from for_theano import memoized, evaluate

s_rng = RandomStreams(123)

nr_words = 4
nr_topics = 2
alpha = 0.8
beta = 1.

# Topic distribution per document
doc_mixture = memoized(
    lambda doc_id: s_rng.dirichlet([alpha / nr_topics] * nr_topics))

# Word distribution per topic
topic_mixture = memoized(
    lambda top_id: s_rng.dirichlet([beta / nr_words] * nr_words))

# For each word in the document, draw a topic according to multinomial with document specific prior
# TODO, see comment below: topics = memoized(lambda doc_id, nr: s_rng.multinomial(1, doc_mixture[doc_id], draw_shape=(nr,)))
topics = memoized(lambda doc_id, nr: s_rng.binomial(
    1, doc_mixture(doc_id)[0], draw_shape=(nr, )))

# Draw words for a specific topic
word_topic = lambda top_id: s_rng.multinomial(1, topic_mixture(top_id))

# TODO: memoized only works on the pre-compiled graph. This makes it fail in the case where we have to map
# a vector of topics to individual multinomials with as priors the different topics. In the case of two topics
import numpy, pylab
import theano
from rstreams import RandomStreams
import distributions
from sample import mh2_sample
from for_theano import memoized

s_rng = RandomStreams(23424)

phi = s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1]))
alpha = s_rng.gamma(2., 2.)        
prototype = phi*alpha

bag_prototype =  memoized(lambda bag: s_rng.dirichlet(prototype))
draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,))

marbles_bag_1 = numpy.asarray([[1,1,1,1,1,1],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0]], dtype=theano.config.floatX).T                                
marbles_bag_2 = numpy.asarray([[0,0,0,0,0,0],
                               [1,1,1,1,1,1],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0]], dtype=theano.config.floatX).T 
marbles_bag_3 = numpy.asarray([[0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [1,1,1,1,1,1],
                               [0,0,0,0,0,0]], dtype=theano.config.floatX).T 
Пример #6
0
marbles_bag_2 = numpy.asarray([[0,0,0,0,0,0],
                               [1,1,1,1,1,1],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0]], dtype=theano.config.floatX).T 
marbles_bag_3 = numpy.asarray([[0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [1,1,1,1,1,1],
                               [0,0,0,0,0,0]], dtype=theano.config.floatX).T 
marbles_bag_4 = numpy.asarray([[0],[0],[0],[0],[1]], dtype=theano.config.floatX).T 



# Define flat model
bag_prototype =  memoized(lambda bag: s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1])*5))
draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,))

# Generate samples from the model
givens = {draw_marbles(1,6): marbles_bag_1,
            draw_marbles(2,6): marbles_bag_2,
            draw_marbles(3,6): marbles_bag_3,
            draw_marbles(4,1): marbles_bag_4}
            
sampler = mh2_sample(s_rng, [draw_marbles(4,1)], givens)            

samples = sampler(200, 100, 100)
data = samples[0]

# Show histogram
pylab.subplot(211)