示例#1
0
文件: usage.py 项目: miklou/pycogent
def goldman_q_dna_pair(seq1, seq2):
    """Returns the Goldman rate matrix"""
    if len(seq1) != len(seq2):
        raise ValueError, "seq1 and seq2 are not the same length!"

    seq1, seq2 = ModelDnaSequence(seq1), ModelDnaSequence(seq2)

    m = Counts.fromPair(seq1, seq2, DnaPairs, average=True)._data

    q = m / m.sum(axis=1)[:, NewAxis]
    new_diag = -(q.sum(axis=1) - diag(q))

    for i, v in enumerate(new_diag):
        q[i, i] = v

    return q
示例#2
0
def freqs_from_aln_array(seqs):
    """Returns per-position freqs from arbitrary size alignment.

    Warning: fails if all seqs aren't the same length.
    written by Rob Knight
    
    seqs = list of lines from aligned fasta file
    """
    result = None
    for label, seq in MinimalFastaParser(seqs):
        # Currently cogent does not support . characters for gaps, converting
        # to - characters for compatability.
        seq = ModelDnaSequence(seq.replace('.','-'))
        if result is None:
            result = zeros((len(seq.Alphabet), len(seq)),dtype=int)
            indices = arange(len(seq), dtype=int)
        result[seq._data,indices] += 1
    return Profile(result, seq.Alphabet)
示例#3
0
文件: usage.py 项目: miklou/pycogent
def goldman_q_dna_triple(seq1, seq2, outgroup):
    """Returns the Goldman rate matrix for seq1"""
    if len(seq1) != len(seq2) != len(outgroup):
        raise ValueError, "seq1,seq2 and outgroup are not the same length!"

    seq1 = ModelDnaSequence(seq1)
    seq2 = ModelDnaSequence(seq2)
    outgroup = ModelDnaSequence(outgroup)

    m = Counts.fromTriple(seq1, seq2, outgroup, DnaPairs)._data

    q = m / m.sum(axis=1)[:, NewAxis]
    new_diag = -(q.sum(axis=1) - diag(q))

    for i, v in enumerate(new_diag):
        q[i, i] = v

    return q