def main(args): filenames = (args.out_forward, args.out_scaling, args.out_backward) # aggregate and validate the model parameters model = DGRP.Model() model.from_fieldstorage(args) # see how the states interact with the observations states = (model.get_recent_state(), model.get_ancient_state(), model.get_misaligned_state(args.misalignment_effect), model.get_garbage_state()) # define the transition object nstates = len(states) prandom = min(1.0, (nstates / (nstates - 1.0)) / args.region_size) T = TransitionMatrix.UniformTransitionObject(prandom, nstates) # make the hmm hmm = ExternalHMM.ExternalModel(T, states, filenames) converter = lineario.IntTupleConverter() o_stream = lineario.SequentialDiskIO(converter, args.obsfile) hmm.init_dp(o_stream) o_stream.open_read() for p, obs in itertools.izip(hmm.posterior(), o_stream.read_forward()): p_recent, p_ancient, p_misaligned, p_garbage = p maxpost = get_maxpost(p_recent, p_ancient, p_misaligned, p_garbage) # show the annotation for this position annotation = list(obs) + list(p) + [maxpost] print '\t'.join(str(x) for x in annotation) o_stream.close()
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ out = StringIO() lines = Util.get_stripped_lines(StringIO(fs.param_field)) model = DGRP.Model() model.from_lines(lines) # see how the states interact with the observations states = (model.get_recent_state(), model.get_ancient_state(), model.get_misaligned_state(fs.misalignment_effect), model.get_garbage_state()) # define the transition object nstates = len(states) prandom = min(1.0, (nstates / (nstates - 1.0)) / fs.region_size) T = TransitionMatrix.UniformTransitionObject(prandom, nstates) # use StringIO objects for storage hmm = ExternalHMM.ExternalModel(T, states, (None, None, None)) converter = lineario.IntTupleConverter() o_stream = lineario.SequentialStringIO(converter, fs.data_field) hmm.init_dp(o_stream) o_stream.open_read() for p, obs in itertools.izip(hmm.posterior(), o_stream.read_forward()): p_recent, p_ancient, p_misaligned, p_garbage = p # get the prior probability of polymorphism conditional on state p_recent_AA = states[0].get_posterior_distribution(obs)[2] p_ancient_AA = states[1].get_posterior_distribution(obs)[2] # compute the posterior probability of a polymorphism posterior_polymorphism = 0 posterior_polymorphism += p_recent * p_recent_AA posterior_polymorphism += p_ancient * p_ancient_AA # Given that a polymorphism occurred, # get the probability distribution over the # three non-reference nucleotides. r = model.seqerr log_Pr = math.log(r / 4.0) log_PA = math.log(1 - 3 * r / 4.0) logs = [ obs[1] * log_PA + obs[2] * log_Pr + obs[3] * log_Pr, obs[1] * log_Pr + obs[2] * log_PA + obs[3] * log_Pr, obs[1] * log_Pr + obs[2] * log_Pr + obs[3] * log_PA ] condmaxpost = math.exp(max(logs) - scipy.misc.logsumexp(logs)) # get the posterior probability distribution maxpost = posterior_polymorphism * condmaxpost # show the inference for this position print >> out, obs, p, maxpost o_stream.close() return out.getvalue()
def main(args): filenames = (args.out_forward, args.out_scaling, args.out_backward) # aggregate and validate the model parameters model = DGRP.Model() model.from_fieldstorage(args) # see how the states interact with the observations states = (model.get_recent_state(), model.get_ancient_state(), model.get_misaligned_state(args.misalignment_effect), model.get_garbage_state()) # define the transition object nstates = len(states) prandom = min(1.0, (nstates / (nstates - 1.0)) / args.region_size) T = TransitionMatrix.UniformTransitionObject(prandom, nstates) # make the hmm hmm = ExternalHMM.ExternalModel(T, states, filenames) converter = lineario.IntTupleConverter() o_stream = lineario.SequentialDiskIO(converter, args.obsfile) hmm.init_dp(o_stream) o_stream.open_read() for p, obs in itertools.izip(hmm.posterior(), o_stream.read_forward()): p_recent, p_ancient, p_misaligned, p_garbage = p # get the prior probability of polymorphism conditional on state p_recent_AA = states[0].get_posterior_distribution(obs)[2] p_ancient_AA = states[1].get_posterior_distribution(obs)[2] # compute the posterior probability of a polymorphism posterior_polymorphism = 0 posterior_polymorphism += p_recent * p_recent_AA posterior_polymorphism += p_ancient * p_ancient_AA # Given that a polymorphism occurred, # get the probability distribution over the # three non-reference nucleotides. r = model.seqerr log_Pr = math.log(r / 4.0) log_PA = math.log(1 - 3 * r / 4.0) logs = [ obs[1] * log_PA + obs[2] * log_Pr + obs[3] * log_Pr, obs[1] * log_Pr + obs[2] * log_PA + obs[3] * log_Pr, obs[1] * log_Pr + obs[2] * log_Pr + obs[3] * log_PA ] condmaxpost = math.exp(max(logs) - scipy.misc.logsumexp(logs)) # get the posterior probability distribution maxpost = posterior_polymorphism * condmaxpost # show the annotation for this position annotation = list(obs) + list(p) + [maxpost] print '\t'.join(str(x) for x in annotation) o_stream.close()