def assign_amino_acid_muts_vcf(prots, path): tree_meta = read_tree_meta_data(path) seqNames = prots[prots.keys()[0]]['sequences'].keys() excluded = [] #go through every gene in the prots nested dict for fname, prot in prots.iteritems(): sequences = prot['sequences'] ref = prot['reference'] positions = prot['positions'] pats = [] i=0 #for each position, get the mutation in the right format #[ancestral][position][mutation] while i < len(positions): pi = positions[i] pos = pi+1 #convert to standard numbering for output (# starts at 1) refb = ref[pi] pattern = [ refb+str(pos)+sequences[k][pi] if pi in sequences[k].keys() else "" for k,v in sequences.iteritems() ] #if the exact same mutation in all sequences, don't include! (only mutant against ref..) if not (len(pattern)==len(sequences) and len(np.unique(pattern))==1): pats.append(pattern) i+=1 #convert our list of lists to matrix patMat = np.matrix(pats) #don't include if all the mutations identical across sequences! (only mutant against ref..) if len(pats) != 0: #for every sequence, assign the mutations in tree_meta for i in xrange(len(seqNames)): node_name = seqNames[i] ary = np.array(patMat[:,i]).reshape(-1,) tree_meta[node_name][fname+'_mutations'] = ",".join(ary[ary != '']) else: excluded.append(fname) if len(excluded) != 0: print "{} genes do not differ across the tree. They will not be added to tree meta-data or shown in auspice".format(len(excluded)) #write it out! write_tree_meta_data(path, tree_meta)
def add_drm_tree_meta(path, seqDRM): tree_meta = read_tree_meta_data(path) #add drug resistance to tree_meta, & make list for colouring drugMuts = {} drugMuts["Drug_Resistance"] = ['0'] for seq, v in seqDRM.iteritems(): #in case re-running, don't add mutations to old ones! remove_old_DRM(tree_meta[seq]) tempList = {} for mut, drug in v.iteritems(): drugs = drug.split(';') for drug in drugs: trDrug = drugTranslate(drug) if trDrug in tree_meta[seq]: tree_meta[seq][trDrug] = ",".join( [tree_meta[seq][trDrug], mut]) else: tree_meta[seq][trDrug] = mut if trDrug in drugMuts: if tree_meta[seq][trDrug] not in drugMuts[trDrug]: drugMuts[trDrug].append(tree_meta[seq][trDrug]) else: drugMuts[trDrug] = [tree_meta[seq][trDrug]] tempList[trDrug] = "" numResist = str(len(tempList)) tree_meta[seq]["Drug_Resistance"] = numResist if numResist not in drugMuts["Drug_Resistance"]: drugMuts["Drug_Resistance"].append(numResist) #for any with no resistance, add a 0 to tree_meta for seq, v in tree_meta.iteritems(): if 'Drug_Resistance' not in tree_meta[seq]: tree_meta[seq]["Drug_Resistance"] = '0' write_tree_meta_data(path, tree_meta) return drugMuts
#T = tree_newick(path) meta = read_sequence_meta_data(path) fields = ['branchlength', 'clade'] if args.timetree: tt = timetree(tree=T, aln=ref_alignment(path), confidence=args.confidence, seq_meta=meta, reroot=None if args.keeproot else 'best',Tc=args.Tc) T = tt.tree fields.extend(['mutations', 'mutation_length', 'num_date', 'clock_length']) if args.confidence: fields.append('num_date_confidence') elif args.ancestral: tt = ancestral_sequence_inference(tree=T, aln=ref_alignment(path)) T = tt.tree fields.extend(['mutations', 'mutation_length']) clade_index = 0 for n in T.find_clades(order='preorder'): n.clade = clade_index clade_index+=1 Phylo.write(T, tree_newick(path), 'newick') meta_dic = collect_tree_meta_data(T, fields) write_tree_meta_data(path, meta_dic) with open(sequence_gtr_model(path),'w') as ofile: ofile.write(str(tt.gtr)) if args.timetree or args.ancestral: export_sequence_fasta(T, path)
seqs[seq.name] = seq muts = {} muts[T.root.name]='' for node in T.get_nonterminals(): pseq = seqs[node.name] for c in node: cseq = seqs[c.name] muts[c.name]=','.join([anc+str(pos+1)+der for pos, (anc, der) in enumerate(zip(pseq, cseq)) if anc!=der]) return muts if __name__ == '__main__': parser = generic_argparse("Assign amino acid mutations to the tree") args = parser.parse_args() path = args.path tree_meta = read_tree_meta_data(path) T = Phylo.read(tree_newick(path), 'newick') for gene, aln_fname in get_genes_and_alignments(path, tree=True): if gene!='nuc': muts = get_amino_acid_mutations(T, aln_fname) for node_name in tree_meta: tree_meta[node_name][gene+'_mutations'] = muts[node_name] write_tree_meta_data(path, tree_meta)