Exemplo n.º 1
0
    def new_model(self, emission_dists = None, dirichlet_strength = 10.0):
        model = self.background_model_creator(self.order, self.num_background_mosaics)

        # have we been given any emission dist?
        if None == emission_dists:
            # no - were we initialised with some distribution?
            if None == self.emission_dists:
                # no - create a random one
                emission_dists = [ hmm.dirichlet_draw(numpy.ones(4)*dirichlet_strength) for k in xrange(self.K) ]
            else:
                # yes - use it
                emission_dists = self.emission_dists

        # add the states for the positive strand orientation
        positive_states = [
          self.model_builder.add_order_0_parameterised_state(
                        model,
                        pi=self.p_binding_site,
                        emission_dist=dist)
                for dist in emission_dists
        ]

        # add the states for the negative strand orientation
        negative_states = [
                self.model_builder.add_order_0_rev_comp_state(
                        model,
                        positive_state,
                        pi=self.p_binding_site)
                for positive_state in positive_states
        ]
        negative_states.reverse()

        # connect the background to the both first states with equal prob
        param = model.add_parameter(self.p_binding_site/2)
        for bg in xrange(self.num_background_mosaics):
            model.states[bg].add_successor(positive_states[0], param)
            model.states[bg].add_successor(negative_states[0], param)

        # connect the states in the pssm together
        one_param = model.add_parameter(1.0)
        for k in xrange(self.K-1):
            positive_states[k].add_successor(positive_states[k+1], one_param)
            negative_states[k].add_successor(negative_states[k+1], one_param)

        # connect the last states back to the background
        one_param = model.add_parameter(1.0/self.num_background_mosaics)
        for bg in xrange(self.num_background_mosaics):
            positive_states[-1].add_successor(model.states[bg], one_param)
            negative_states[-1].add_successor(model.states[bg], one_param)

        return model
Exemplo n.º 2
0
    def emission_dist_including_n_mer(self, n_mer, strength=9.0, offset=0, dirichlet_strength = 10.0):
        '''
        Get an emission dist that incorporates the n-mer

        Draws a distribution from a dirichlet of given strength (default 10) then adds the n-mer in the middle
        (default # copies of n-mer = 9)
        '''
        emission_dists = [ hmm.dirichlet_draw(numpy.ones(4)*dirichlet_strength) for k in xrange(self.K_prime) ]
        n = len(n_mer)
        offset = (self.K - n)/2
        for i, base in enumerate(n_mer):
            idx = 2*(i+offset)
            emission_dists[idx].setflags(write=True)
            emission_dists[idx][base] += strength
            emission_dists[idx] /= emission_dists[idx].sum()
        return emission_dists
Exemplo n.º 3
0
def create_mosaic_model(
  num_mosaics,
  p_transition,
  alphabet_size,
  order,
  dirichlet_prior_strength=None
):
    """
    Create a mosaic model.

    Each mosaic has an independent parameter that specifies the probability of transitioning to any other
    given mosaic (this effectively ties these transition probabilities together).

    @arg num_mosaics: The number of mosaics.
    @arg p_transition: The probability of leaving a mosaic.
    @arg alphabet_size: The size of the output alphabet.
    @arg order: The Markov order of the output.
    @arg dirichlet_prior_strength: The strength of the uniform prior on the emission probabilities. If None
    then a uniform distribution is used.
    """

    builder = hmm.pssm.ModelBuilder(order, alphabet_size=alphabet_size)
    model = builder.new_model_by_states()

    for n in xrange(num_mosaics):
        # add the state
        if None == dirichlet_prior_strength:
            emission_dist = numpy.ones(model.M) / alphabet_size
        else:
            emission_dist = hmm.dirichlet_draw(numpy.ones(model.M)*dirichlet_prior_strength)
        state = builder.add_fully_parameterised_state(
          model,
          pi=1./num_mosaics,
          emission_dist=emission_dist
        )

    if 1 == num_mosaics:
        model.states[0].add_successor(model.states[0], model.add_parameter(1.))
    else:
        for s1 in model.states:
            transition_param = model.add_parameter(p_transition)
            no_transition_param = model.add_parameter(1.0 - p_transition)
            for s2 in model.states:
                s1.add_successor(s2, s1 == s2 and no_transition_param or transition_param)

    return model
Exemplo n.º 4
0
 def create_background_mosaic_model(self, num_mosaics, p_transition, dirichlet_prior_strength):
     """
     Create a mosaic model
     """
     model = hmm.ModelByStates(self.M, self.order)
     transition_param = model.add_parameter(p_transition)
     no_transition_param = model.add_parameter(1.0 - p_transition)
     for i in xrange(num_mosaics):
         self.add_fully_parameterised_state(
                 model,
                 emission_dist = hmm.dirichlet_draw(numpy.ones(self.M)*dirichlet_prior_strength)
         )
     for state_1 in model.states:
         for state_2 in model.states:
             if state_1 == state_2: state_1.add_successor(state_2, no_transition_param)
             else: state_1.add_successor(state_2, transition_param)
     return model
Exemplo n.º 5
0
def create_mosaic_model(num_mosaics,
                        p_transition,
                        alphabet_size,
                        order,
                        dirichlet_prior_strength=None):
    """
    Create a mosaic model.

    Each mosaic has an independent parameter that specifies the probability of transitioning to any other
    given mosaic (this effectively ties these transition probabilities together).

    @arg num_mosaics: The number of mosaics.
    @arg p_transition: The probability of leaving a mosaic.
    @arg alphabet_size: The size of the output alphabet.
    @arg order: The Markov order of the output.
    @arg dirichlet_prior_strength: The strength of the uniform prior on the emission probabilities. If None
    then a uniform distribution is used.
    """

    builder = hmm.pssm.ModelBuilder(order, alphabet_size=alphabet_size)
    model = builder.new_model_by_states()

    for n in xrange(num_mosaics):
        # add the state
        if None == dirichlet_prior_strength:
            emission_dist = numpy.ones(model.M) / alphabet_size
        else:
            emission_dist = hmm.dirichlet_draw(
                numpy.ones(model.M) * dirichlet_prior_strength)
        state = builder.add_fully_parameterised_state(
            model, pi=1. / num_mosaics, emission_dist=emission_dist)

    if 1 == num_mosaics:
        model.states[0].add_successor(model.states[0], model.add_parameter(1.))
    else:
        for s1 in model.states:
            transition_param = model.add_parameter(p_transition)
            no_transition_param = model.add_parameter(1.0 - p_transition)
            for s2 in model.states:
                s1.add_successor(
                    s2, s1 == s2 and no_transition_param or transition_param)

    return model
Exemplo n.º 6
0
    def new_model(self, emission_dists = None, dirichlet_strength = 10.0, generate_uniform=False):
        model = self.background_model_creator(self.order, self.num_background_mosaics)

        # are we generating a uniform emission model?
        if generate_uniform:
            emission_dists = [ .25 * numpy.ones(4) for k in xrange(self.K_prime) ]
        else:
            # have we been given any emission dist?
            if None == emission_dists:
                # no - were we initialised with some distribution?
                if None == self.emission_dists:
                    # no - create a random one
                    emission_dists = [ hmm.dirichlet_draw(numpy.ones(4)*dirichlet_strength) for k in xrange(self.K_prime) ]
                else:
                    # yes - use it
                    emission_dists = self.emission_dists
        if len(emission_dists) != self.K_prime:
            raise RuntimeError('Wrong number of emissions')

        # add the states for the positive strand orientation
        positive_states = [
          self.model_builder.add_order_0_parameterised_state(
                        model,
                        pi=self.p_binding_site,
                        emission_dist=dist)
                for dist in emission_dists
        ]

        # add the states for the negative strand orientation
        negative_states = [
                self.model_builder.add_order_0_rev_comp_state(
                        model,
                        positive_state,
                        pi=self.p_binding_site)
                for positive_state in positive_states
        ]
        negative_states.reverse()

        # connect the background to the both first states with equal prob
        param = model.add_parameter(self.p_binding_site/2)
        for bg in xrange(self.num_background_mosaics):
            model.states[bg].add_successor(positive_states[0], param)
            model.states[bg].add_successor(negative_states[0], param)

        # connect the states in the pssm together
        one_param = model.add_parameter(1.0)
        gap_params = [ model.add_parameter(self.p_gap) for i in xrange(self.K-1) ]
        non_gap_params = [ model.add_parameter(1.0-self.p_gap) for i in xrange(self.K-1) ]
        for k in xrange(self.K-1):
            positive_states[2*k+1].add_successor(positive_states[2*k+2], one_param)
            negative_states[2*k+1].add_successor(negative_states[2*k+2], one_param)
            positive_states[2*k].add_successor(positive_states[2*k+1], gap_params[k])
            negative_states[2*k].add_successor(negative_states[2*k+1], gap_params[self.K-2-k])
            positive_states[2*k].add_successor(positive_states[2*k+2], non_gap_params[k])
            negative_states[2*k].add_successor(negative_states[2*k+2], non_gap_params[self.K-2-k])

        # connect the last states back to the background
        one_param = model.add_parameter(1.0/self.num_background_mosaics)
        for bg in xrange(self.num_background_mosaics):
            positive_states[-1].add_successor(model.states[bg], one_param)
            negative_states[-1].add_successor(model.states[bg], one_param)

        return model
Exemplo n.º 7
0
print "Seeding the random number generator with %d" % seed


# seed all the RNGs that we use
hmm.seed_rng(seed)
numpy.random.seed(seed)


# create something to build the gapped pssms
builder = single_gap.SingleGappedPssmBuilder(K=K, gap_position=K / 2, markov_order=0, M=4)


# create our emission distributions
dirichlet_prior_strengths = [0.01, 0.1, 1.0]
emissions = [
    numpy.array([hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for k in xrange(builder.K)])
    for strength in dirichlet_prior_strengths
]
gap_emissions = [hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for strength in dirichlet_prior_strengths]


# create out single gapped pssms
pssms = [builder.create(p_gap, non_gap, gap) for non_gap, gap in zip(emissions, gap_emissions)]


# create our complete models (by adding a background model)
p_binding_site = exp_sites_per_sequence / L
models = [
    hmm.as_model(
        single_gap.add_to_simple_background_model(
            model=pssm[0], in_states=pssm[1], out_states=pssm[2], p_binding_site=p_binding_site
Exemplo n.º 8
0
        complete_model.states[0].add_successor(
            complete_model.states[1 + in_state], binding_site_transition_param)
    for out_state in out_states:
        complete_model.states[1 + out_state].add_successor(
            complete_model.states[0], back_to_bg_transition_param)
    complete_model.states[0].pi = complete_model.add_parameter(1.)
    return complete_model


if '__main__' == __name__:
    import numpy

    # build a single gapped pssm with some random emissions
    builder = SingleGappedPssmBuilder(K=6, gap_index=1, markov_order=0, M=4)
    emissions = numpy.array([
        hmm.dirichlet_draw(numpy.ones(builder.M) * .1)
        for k in xrange(builder.K)
    ])
    emissions[builder.gap_index] = hmm.dirichlet_draw(
        numpy.ones(builder.M) * .3)
    model_by_states, in_states, out_states = builder.create(
        p_gap=.6, emissions=emissions)

    # create a background model and add the single gapped pssm to it
    complete_model = add_to_simple_background_model(model_by_states,
                                                    in_states,
                                                    out_states,
                                                    p_binding_site=.01)

    # convert to other type of model
    model = hmm.as_model(complete_model)
Exemplo n.º 9
0
          back_to_bg_transition_param
        )
    complete_model.states[0].pi = complete_model.add_parameter(1.)
    return complete_model




if '__main__' == __name__:
    import numpy

    # build a single gapped pssm with some random emissions
    builder = SingleGappedPssmBuilder(K=6, gap_index=1, markov_order=0, M=4)
    emissions = numpy.array(
      [
        hmm.dirichlet_draw(numpy.ones(builder.M) * .1)
        for k in xrange(builder.K)
      ]
    )
    emissions[builder.gap_index] = hmm.dirichlet_draw(numpy.ones(builder.M) * .3)
    model_by_states, in_states, out_states = builder.create(
      p_gap=.6,
      emissions=emissions
    )

    # create a background model and add the single gapped pssm to it
    complete_model = add_to_simple_background_model(
      model_by_states,
      in_states,
      out_states,
      p_binding_site=.01)