def GroupFastaParser(data, label_to_name, group_key="Group", aligned=False, moltype=ASCII, done_groups=None, DEBUG=False): """yields related sequences as a separate seq collection Arguments: - data: line iterable data source - label_to_name: LabelParser callback - group_key: name of group key in RichLabel.Info object - aligned: whether sequences are to be considered aligned - moltype: default is ASCII - done_groups: series of group keys to be excluded """ done_groups = [[], done_groups][done_groups is not None] parser = MinimalFastaParser(data, label_to_name=label_to_name, finder=XmfaFinder) group_ids = [] current_collection = {} for label, seq in parser: seq = moltype.makeSequence(seq, Name=label, Info=label.Info) if DEBUG: print "str(label) ", str(label), "repr(label)", repr(label) if not group_ids or label.Info[group_key] in group_ids: current_collection[label] = seq if not group_ids: group_ids.append(label.Info[group_key]) else: # we finish off check of current before creating a collection if group_ids[-1] not in done_groups: info = Info(Group=group_ids[-1]) if DEBUG: print "GroupParser collection keys", current_collection.keys( ) seqs = cogent.LoadSeqs(data=current_collection, moltype=moltype, aligned=aligned) seqs.Info = info yield seqs current_collection = {label: seq} group_ids.append(label.Info[group_key]) info = Info(Group=group_ids[-1]) seqs = cogent.LoadSeqs(data=current_collection, moltype=moltype, aligned=aligned) seqs.Info = info yield seqs
def get_alignment_tree(fname): """Build a neighbour joining tree""" from cogent.phylo import distance, nj from cogent.evolve.models import HKY85, F81 al = cogent.LoadSeqs(fname, format='fasta') d = distance.EstimateDistances(al, submodel=F81()) d.run(show_progress=False) mytree = nj.nj(d.getPairwiseDistances()) mytree = mytree.balanced() print(mytree.asciiArt()) print '''from cogent.draw import dendrogram p = dendrogram.SquareDendrogram(mytree) p.drawToPDF('tree-scaled.pdf', 500, 400, stroke_width=2.0, shade_param = 'r', max_value = 1.0,)''' return
def main(): fdir = sys.argv[1] odir = sys.argv[2] mkdir_p(odir) for fname in glob.iglob("{0}/*.fasta".format(fdir)): groupName = fname.split(os.path.sep)[-1].rstrip(".fasta") print("group {0}".format(groupName)) try: seqs = cogent.LoadSeqs(fname, moltype=cogent.PROTEIN, aligned=False) except Exception as e: print(e) exit(0) aln = align_unaligned_seqs(seqs, cogent.PROTEIN) t = build_tree_from_alignment(aln, cogent.PROTEIN) print("tree for group {0}".format(str(t))) with open(os.path.sep.join([odir, groupName + ".nwk"]), 'wb') as ofile: ofile.write(str(t).replace("'", ""))
def reconstruct(lf, aln, tree, locus=None): if tree.isTip(): y = tree.Name P = lf.getPsubForEdge(y, locus=locus) j = str(aln.NamedSeqs[y]) if 'N' in j: js = lf.model.getAlphabet().resolveAmbiguity(j) L = lambda i: max(P[i, j] for j in js) def C(i): j = argmax([P[i, j] for j in js]) return js[j] else: L = lambda i: P[i, j] C = lambda i: j tree.C = C return L Ls = [reconstruct(lf, aln, c, locus=locus) for c in tree.Children] alphabet = list(lf.model.getAlphabet()) calcedLs = {j: prod([L(j) for L in Ls]) for j in alphabet} if tree.isRoot(): pi = lf.getMotifProbs() j = argmax([pi[j] * calcedLs[j] for j in alphabet]) tree.anc = alphabet[j] result = [(tree.Name, tree.anc)] for child in tree.Children: _get_anc(child, result) return cogent.LoadSeqs(data=result) P = lf.getPsubForEdge(tree.Name, locus=locus) L = lambda i: max(P[i, j] * calcedLs[j] for j in alphabet) def C(i): j = argmax([P[i, j] * calcedLs[j] for j in alphabet]) return alphabet[j] tree.C = C return L
def main(): arguments = docopt(__doc__, version='BuildSingleTree v1.0') print(arguments) netname = arguments['<network>'] seqname = arguments['<seqfile>'] ofname = arguments['-o'] print("creating tree for {0}".format(netname)) print("using sequences from {0}".format(seqname)) G = nx.read_adjlist(netname) try: seqs = cogent.LoadSeqs(seqname, moltype=cogent.PROTEIN, aligned=False) except Exception as e: print(e) sys.exit(0) aln = align_unaligned_seqs(seqs, cogent.PROTEIN) t = build_tree_From_alignment(aln, cogent.PROTEIN) print("tree = {0}".format( str(t) ) ) with open( os.path.sep.join( [ "all" ] ), 'wb') as ofile: ofile.write( str(t).replace("'","") )