from pomegranate import HiddenMarkovModel
import calculator
from converter_to import converter_to
from model_maker_utils import sequence_state_factory
from model_maker_utils import classify
from model_maker_utils import add_sequence
from model_maker_utils import equal_distribution
from matrix_from_aln import matrix_from_exa

matrixAcceptor0 = numpy.array(matrix_from_exa('new_acceptor1.exa'))
acceptor0_data = classify(matrixAcceptor0, 2)

model = HiddenMarkovModel('intron_acceptor')

intron = State(DiscreteDistribution(
    calculator.intron_calculator('cuts_intron.txt').p),
               name='in')
acceptor0_states = sequence_state_factory(acceptor0_data, 'acceptor0')
post = State(DiscreteDistribution(equal_distribution), name='post')

model.add_state(intron)
add_sequence(model, acceptor0_states)
model.add_state(post)

model.add_transition(model.start, intron, 1)
model.add_transition(intron, intron, 0.9)
model.add_transition(intron, acceptor0_states[0], 0.1)
model.add_transition(acceptor0_states[-1], post, 1)
model.add_transition(post, post, 0.5)
model.add_transition(post, model.end, 0.5)
Пример #2
0
donor2_data = classify(matrixDonor2, 2)
donor2_states = sequence_state_factory(donor2_data, 'donor2')

acceptor0_data = classify(matrixAcceptor0, 2)
acceptor0_states = sequence_state_factory(acceptor0_data, 'acceptor0')

acceptor1_data = classify(matrixAcceptor1, 2)
acceptor1_states = sequence_state_factory(acceptor1_data, 'acceptor1')

acceptor2_data = classify(matrixAcceptor2, 2)
acceptor2_states = sequence_state_factory(acceptor2_data, 'acceptor2')

coding_model = HiddenMarkovModel()

intron_distribution = calculator.intron_calculator('cuts_intron.txt')
back = State(DiscreteDistribution(
    calculator.intron_calculator('cuts_intron.txt').p),
             name='back')

fake_back = State(DiscreteDistribution(intron_distribution.p), name='back2')

in0 = State(DiscreteDistribution(intron_distribution.p), name='in0')
in1 = State(DiscreteDistribution(intron_distribution.p), name='in1')
in2 = State(DiscreteDistribution(intron_distribution.p), name='in2')

in0_spacers = spacer_states_maker(64, intron_distribution.p, 'in0 spacer')
in1_spacers = spacer_states_maker(64, intron_distribution.p, 'in1 spacer')
in2_spacers = spacer_states_maker(64, intron_distribution.p, 'in2 spacer')

coding_state0 = State(DiscreteDistribution(c0.p), 'coding state 0')
Пример #3
0
from pomegranate import DiscreteDistribution


matrix_TATA = numpy.array(matrix_from_fasta('tata_-5_11_completa.seq'))
matrix_GC = numpy.array(matrix_from_fasta('gc_completo.seq'))
matrix_CCAAT = numpy.array(matrix_from_fasta('CCAAT_completa.seq'))
matrix_Inr = numpy.array(matrix_from_fasta('Inr_completo.seq'))
matrix_no_inr = numpy.array(matrix_from_fasta('no_inr.fa'))

gc_data = classify(matrix_GC, 2)
tata_data = classify(matrix_TATA, 2)
cat_data = classify(matrix_CCAAT, 2)
inr_data = classify(matrix_Inr, 2)
no_inr_data = classify(matrix_no_inr, 2)

no_coding = calculator.intron_calculator('cuts_intron.txt')


# Model
promoter_utr_model = HiddenMarkovModel('promoter')

# States
back = State(DiscreteDistribution(no_coding.p), name='back')

gc_states = sequence_state_factory(gc_data, 'GC')
post_gc_var_spacers_tss = spacer_states_maker(151, no_coding.p, 'post gc var spacer tss')
post_gc_spacers_tss = spacer_states_maker(38, no_coding.p, 'post gc spacer tss')

post_gc_var_spacers_tata = spacer_states_maker(151, no_coding.p, 'post gc var spacer tata')
post_gc_spacers_tata = spacer_states_maker(18, no_coding.p, 'post gc spacer tata')
Пример #4
0
import calculator
from converter_to import converter_to
from model_maker_utils import sequence_state_factory
from model_maker_utils import classify
from model_maker_utils import add_sequence
from model_maker_utils import equal_distribution
from matrix_from_aln import matrix_from_exa

matrixAcceptor0 = numpy.array(
    matrix_from_exa('../data extractors/new_acceptor1.exa'))
acceptor0_data = classify(matrixAcceptor0, 2)

model = HiddenMarkovModel('intron_acceptor')

intron = State(DiscreteDistribution(
    calculator.intron_calculator('../data extractors/new_cuts_intron.txt').p),
               name='in')
acceptor0_states = sequence_state_factory(acceptor0_data, 'acceptor0')
post = State(DiscreteDistribution(equal_distribution), name='post')

model.add_state(intron)
add_sequence(model, acceptor0_states)
model.add_state(post)

model.add_transition(model.start, intron, 1)
model.add_transition(intron, intron, 0.9)
model.add_transition(intron, acceptor0_states[0], 0.1)
model.add_transition(acceptor0_states[-1], post, 1)
model.add_transition(post, post, 0.5)
model.add_transition(post, model.end, 0.5)
Пример #5
0
from pomegranate import State
from pomegranate import HiddenMarkovModel
from pomegranate import DiscreteDistribution
from matrix_from_aln import matrix_from_exa

with open('promoter_utr_model_base.json') as base_model_file:
    promoter_model_json = base_model_file.read()

promoter_model = HiddenMarkovModel.from_json(promoter_model_json)

matrixDonor0 = numpy.array(matrix_from_exa('new_donor0.exa'))
matrixAcceptor0 = numpy.array(matrix_from_exa('new_acceptor0.exa'))

donor0_data = classify(matrixDonor0, 2)
acceptor0_data = classify(matrixAcceptor0, 2)
no_coding_dist = calculator.intron_calculator('cuts_intron.txt').p

donor_states = sequence_state_factory(donor0_data, 'donor0')
acceptor_states = sequence_state_factory(acceptor0_data, 'acceptor0')
intron_spacer_states = spacer_states_maker(10, no_coding_dist, 'intron spacer')

utr_model = HiddenMarkovModel('utr_model')

# States
exon_state = State(DiscreteDistribution(calculator.utr_exon_5('mcutsa.txt').p),
                   name='utr exon')
intron_state = State(DiscreteDistribution(no_coding_dist), name='utr intron')

utr_model.add_model(promoter_model)
utr_model.add_state(exon_state)
utr_model.add_state(intron_state)