示例#1
0
def hmm_iterator(hmms, sequences):
    """
    Given a list of HMMs and a set of sequences, print the sensitivity, specifity and
    accuracy of the models.
    """

    for hmm in hmms:
        #Set values to zero
        sensitivity = 0
        specificity = 0
        accuracy = 0
        hmm_name = hmm.split("/")[-1].split(".")[0]

        #Parse tgf and run viterbi algorithm
        hmm = tgf.parse(hmm)
        alignments = viterbi.viterbi_all(hmm, sequences)

        #Compute true and false positives and false negatives
        true_positives = len(
            filter(lambda align: align.state_path[50] == "*", alignments))
        false_negatives = len(
            filter(lambda align: align.state_path[50] != "*", alignments))
        false_positives = false_negatives  # True only for this concrete project!

        #Calculate sensitivity, specificity and accuracy
        sensitivity = float(true_positives) / (true_positives +
                                               false_negatives)
        specificity = float(true_positives) / (true_positives +
                                               false_positives)
        accuracy = (specificity + sensitivity) / 2.0

        def print_percent(number):
            number = "{:.4f} %".format(number * 100.0)
            return number

        print "{}\tSN: {}\tSP: {}\tavgSNSP: {}".format(
            hmm_name, print_percent(sensitivity), print_percent(specificity),
            print_percent(accuracy))
示例#2
0
 def subject(self, filename="simple.tgf"):
     return tgf.parse("test/files/tgf/{}".format(filename))
示例#3
0
        else:
            if is_correct:
                fn += 1
            else:
                tn += 1
    return (tp, tn, fp, fn)


def print_roc_data_in_tsv(roc_data):
    print "score\ttpr\tfpr\tppv\ttp\ttn\tfp\tfn"
    for score, metrics in sorted(roc_data.items()):
        print "{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}".format(
            score, *metrics)


hmm = tgf_parser.parse(sys.argv[1])
sequences = seq_parser.parse(sys.argv[2])

alignments = viterbi.viterbi_all(hmm, sequences)

evaluated_alignments = map(evaluate_alignment, alignments)

max_score = max(alignments, key=lambda align: align.score).score
min_score = min(alignments, key=lambda align: align.score).score

roc_data = {}

step_size = (max_score - min_score) / STEPS

scores_iterator = frange(float_floor(min_score), float_ceil(max_score),
                         step_size)
示例#4
0
 def subject(self, hmm=None, observations=10):
     if hmm is None: hmm = tgf.parse("test/files/tgf/simple.tgf")
     return sample.sample(hmm, observations)
示例#5
0
    return (action, args)


def get_output_filename_from_args(args, position):
    if len(args) > position:
        return args[position]


if __name__ == '__main__':
    action, args = parse_args()

    if action == 'sample':
        if len(args) < 2: print_help_message(invalid=True)

        hmm = tgf.parse(args[1])
        length_of_sample = int(args[0])
        output_filename = get_output_filename_from_args(args, 2)

        mallet_writer.write([sample.sample(hmm, length_of_sample)],
                            output_filename)

    elif action == 'viterbi':
        if len(args) < 2: print_help_message(invalid=True)

        hmm = tgf.parse(args[0])
        sequences = seq_parser.parse(args[1])
        output_filename = get_output_filename_from_args(args, 2)

        mallet_writer.write(viterbi.viterbi_all(hmm, sequences),
                            output_filename)
示例#6
0
 def subject(self, hmm_filename, sequence):
     return viterbi.viterbi(
         tgf.parse("test/files/tgf/{}".format(hmm_filename)), sequence)