示例#1
0
def test_align_subsets():
    aln = fasta.FastaDict()
    aln["a"] = "AAA-A"
    aln["b"] = "-BD-C"
    aln["c"] = "A-D--"

    aln2 = alignlib.remove_empty_columns(aln)
    assert aln2 == {'a': 'AAAA', 'c': 'A-D-', 'b': '-BDC'}

    aln2 = alignlib.remove_gapped_columns(aln)
    assert aln2 == {'a': 'A', 'c': 'D', 'b': 'D'}

    aln2 = alignlib.require_nseqs(aln, 2)
    assert aln2 == {'a': 'AAAA', 'c': 'A-D-', 'b': '-BDC'}
示例#2
0
def find_parsimony(tree, align):

    ptree, nodes, nodelookup = make_ptree(tree)
    nnodes = len(nodes)
    dists = [x.dist for x in nodes]

    nseqs = len(align)
    seqlen = len(align.values()[0])
    leaves = [x for x in nodes if x.is_leaf()]
    calign = (c_char_p * nseqs)(*[align[x.name] for x in leaves])
    cancestral = (c_char_p *
                  (nseqs - 1))(*["-" * seqlen for i in xrange(nseqs - 1)])

    #print ">>>", list(cancestral)
    parsimony(nnodes, ptree, nseqs, calign, dists, True, cancestral)
    #print list(cancestral)

    ancestral = fasta.FastaDict()
    for i, key in enumerate(node.name for node in nodes if not node.is_leaf()):
        ancestral[key] = cancestral[i]

    return ancestral
示例#3
0
def make_alignment(arg, mutations, infsites=False):
    """
    Make FASTA alignment from ARG and sampled mutations.
    """
    aln = fasta.FastaDict()
    alnlen = int(arg.end - arg.start)
    leaves = list(arg.leaf_names())
    nleaves = len(leaves)

    # sort mutations by position
    mutations.sort(key=lambda x: x[2])

    # make align matrix
    mat = []

    muti = 0
    for i in xrange(alnlen):
        ancestral = "ACGT"[random.randint(0, 3)]

        if muti >= len(mutations) or i < int(mutations[muti][2]):
            # no mut
            mat.append(ancestral * nleaves)
        else:
            # mut
            mut_count = defaultdict(int)
            while muti < len(mutations) and i == int(mutations[muti][2]):
                mut_count[mutations[muti][0].name] += 1
                muti += 1

            # enforce infinite sites
            if infsites:
                mut_count = {random.sample(mut_count.items(), 1)[0][0]: 1}

            tree = arg.get_marginal_tree(i - .5)
            bases = {tree.root.name: ancestral}

            for node in tree.preorder():
                if not node.parents:
                    continue

                ancestral = bases[node.parents[0].name]
                if node.name in mut_count:
                    c = mut_count[node.name]
                    i = 0
                    while True:
                        derived = ancestral
                        while derived == ancestral:
                            derived = "ACGT"[random.randint(0, 3)]
                        i += 1
                        if i == c:
                            break
                        ancestral = derived

                    bases[node.name] = derived
                else:
                    bases[node.name] = ancestral

            mat.append("".join(bases[l] for l in leaves))

    # make fasta
    for i, leaf in enumerate(leaves):
        aln[leaf] = "".join(x[i] for x in mat)

    return aln
示例#4
0
 def __init__(self, genomes, chroms, regions, blocks, orths, **options):
     SyntenyVisBase.__init__(self, genomes, chroms, regions, blocks, orths,
                             **options)
     self.click_mode = "gene"
     self.selgenes = []
     self.seqs = fasta.FastaDict()