def runwindow(input, outgroup): print "Running sliding-window method, using %i bp flanking each SNP." % args.window tmpdir = 'window_tmp' if os.path.isdir(tmpdir): shutil.rmtree(tmpdir) os.makedirs(tmpdir) snpdb = {} count = 0 with open(input+'.tab', 'rU') as data: next(data) #skip header for line in data: count += 1 cols = line.split('\t') scaffold = cols[0] pos = cols[1] name = 'snp'+str(count) if not name in snpdb: snpdb[name] = [scaffold, int(pos) - args.window-1, int(pos) + args.window] sequencedb = {} for num, id in enumerate(samples): if id == 'Reference': fastafile = os.path.join(samples[num+1], 'reference', 'ref.fa') else: fastafile = os.path.join(id, 'snps.consensus.subs.fa') sequence = [] with open(fastafile, 'rU') as fasta: sequence_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) for k,v in natsorted(snpdb.iteritems()): rec = sequence_dict.get(v[0]) sequence.append(str(rec.seq[v[1]:v[2]])) if id not in sequencedb: sequencedb[id] = ''.join(sequence) with open(os.path.join(tmpdir, 'windows.fa'), 'w') as output: for k,v in sequencedb.iteritems(): output.write('>%s\n%s\n' % (k,v)) #run alignment on output with open(os.path.join(tmpdir, 'windows.mafft.fa'), 'w') as output: subprocess.call(['mafft', '--thread', str(args.cpus), '--auto', os.path.join(tmpdir, 'windows.fa')], stdout = output, stderr = FNULL) #now trim alignment subprocess.call(['trimal', '-in', os.path.join(tmpdir, 'windows.mafft.fa'), '-out', os.path.join(tmpdir, 'windows.trimal.fa'), '-automated1']) if not outgroup: subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'GTRGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', 'windows.trimal.fa', '-n', 'windows.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL) else: subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'GTRGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', 'windows.trimal.fa', '-o', outgroup, '-n', 'windows.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL) #parse with biopython and draw trees = list(Phylo.parse(os.path.join(tmpdir, 'RAxML_bootstrap.windows.nwk'), 'newick')) best = Phylo.read(os.path.join(tmpdir,'RAxML_bestTree.windows.nwk'), 'newick') support_tree = get_support(best, trees) Phylo.write(support_tree, args.input+'.windows.phylogeny.nwk', 'newick') Phylo.draw(support_tree, do_show=False) pylab.axis('off') pylab.savefig(args.input+'.windows.phylogeny.pdf', format='pdf', bbox_inches='tight', dpi=1000)
def runbinary(input, outgroup): print "Runnning binary (presence/absence) method." tmpdir = 'binary_tmp' if os.path.isdir(tmpdir): shutil.rmtree(tmpdir) os.makedirs(tmpdir) datadb = [] with open(input+'.binary.fa', 'w') as output: with open(input+'.vcf', 'rU') as vcf: for line in vcf: line = line.replace('\n', '') if line.startswith('##'): continue data = line.split('\t')[9:] if '3' in data: data1 = ['0' if x=='3' else x for x in data] data1 = ['0' if x=='2' else x for x in data1] datadb.append(data1) data2 = ['0' if x=='1' else x for x in data] data2 = ['1' if x=='2' else x for x in data2] data2 = ['0' if x=='3' else x for x in data2] data3 = ['0' if x=='1' else x for x in data] data3 = ['0' if x=='2' else x for x in data3] data3 = ['1' if x=='3' else x for x in data3] datadb.append(data3) elif '2' in data: data1 = ['0' if x=='2' else x for x in data] datadb.append(data1) data2 = ['0' if x=='1' else x for x in data] data2 = ['1' if x=='2' else x for x in data2] datadb.append(data2) else: datadb.append(data) binarydata = [list(x) for x in zip(*datadb)] for i in binarydata: output.write(">%s\n%s\n" % (i[0], ''.join(i[1:]))) if not outgroup: subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'BINGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', os.path.abspath(input+'.binary.fa'), '-n', 'binary.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL) else: subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'BINGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', os.path.abspath(input+'.binary.fa'), '-o', outgroup, '-n', 'binary.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL) #parse with biopython and draw trees = list(Phylo.parse(os.path.join(tmpdir, 'RAxML_bootstrap.binary.nwk'), 'newick')) best = Phylo.read(os.path.join(tmpdir,'RAxML_bestTree.binary.nwk'), 'newick') support_tree = get_support(best, trees) Phylo.write(support_tree, args.input+'.binary.phylogeny.nwk', 'newick') Phylo.draw(support_tree, do_show=False) pylab.axis('off') pylab.savefig(args.input+'.binary.phylogeny.pdf', format='pdf', bbox_inches='tight', dpi=1000)
def main(): args = parser.parse_args() table = args.table if not table: print("No input table provided. This argument is required\n") parser.print_help() exit(0) df = pd.read_csv(table, sep=args.delim, quotechar='"', index_col=False) for c in df.columns: if c.lower() == "sample id": df.set_index(c) else: df[c] = pd.to_numeric(df[c], errors="coerce", downcast="integer") df.dropna(axis=1, how="all", inplace=True) data = df.values.astype(int) names = df.index.tolist() constructor = DistanceTreeConstructor() if args.upgma: const = constructor.upgma else: const = constructor.nj if args.tree: main_tree = Phylo.read(args.tree, "newick") else: main_tree = const(get_distance_matrix(data, names)) trees = [ const(get_distance_matrix(sub, names)) for sub in bootstrap(data, args.times) ] supp_tree = get_support(main_tree, trees) # remove labels of inner clades for clade in supp_tree.get_nonterminals(): clade.name = "" Phylo.write(supp_tree, args.output, "newick")
def runsnps(input, outgroup): print "Running concatenated SNP method." file_in = input+'.aln' tmpdir = 'snps_tmp' if os.path.isdir(tmpdir): shutil.rmtree(tmpdir) os.makedirs(tmpdir) if not outgroup: subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'GTRGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', os.path.abspath(file_in), '-n', 'snps.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL) else: subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', 'GTRGAMMA', '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', os.path.abspath(file_in), '-o', outgroup, '-n', 'snps.nwk'], cwd = tmpdir, stdout = FNULL, stderr = FNULL) #parse with biopython and draw trees = list(Phylo.parse(os.path.join(tmpdir, 'RAxML_bootstrap.snps.nwk'), 'newick')) best = Phylo.read(os.path.join(tmpdir,'RAxML_bestTree.snps.nwk'), 'newick') support_tree = get_support(best, trees) Phylo.write(support_tree, args.input+'.snps.phylogeny.nwk', 'newick') Phylo.draw(support_tree, do_show=False) pylab.axis('off') pylab.savefig(args.input+'.snps.phylogeny.pdf', format='pdf', bbox_inches='tight', dpi=1000)
subprocess.call(['trimal', '-in', mafft_out, '-out', trimal_out, '-automated1', '-phylip'], stderr = FNULL, stdout = FNULL) print "Running RAxML" if args.method == 'prot': method = 'PROTGAMMAAUTO' elif args.method == 'nucl': method = 'GTRGAMMA' if args.cpus == 1: if not args.outgroup: subprocess.call(['raxmlHPC-PTHREADS', '-f', 'a', '-m', method, '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', trimal_out, '-n', 'nwk'], cwd = tmp) else: subprocess.call(['raxmlHPC-PTHREADS', '-f', 'a', '-m', method, '-o', args.outgroup, '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', trimal_out, '-n', 'nwk'], cwd = tmp) else: if not args.outgroup: subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', method, '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', trimal_out, '-n', 'nwk'], cwd = tmp) else: subprocess.call(['raxmlHPC-PTHREADS', '-T', str(args.cpus), '-f', 'a', '-m', method, '-o', args.outgroup, '-p', '12345', '-x', '12345', '-#', str(args.bootstrap), '-s', trimal_out, '-n', 'nwk'], cwd = tmp) print "Drawing tree inferred from RAxML" #parse with biopython and draw trees = list(Phylo.parse(os.path.join(tmp, 'RAxML_bootstrap.nwk'), 'newick')) best = Phylo.read(os.path.join(tmp, 'RAxML_bestTree.nwk'), 'newick') support_tree = get_support(best, trees) Phylo.write(support_tree, args.out.split('.')[0]+'.nwk', 'newick') Phylo.draw(support_tree, do_show=False) pylab.axis('off') pylab.savefig(args.out, format='pdf', bbox_inches='tight', dpi=1000) if not args.debug: shutil.rmtree(tmp)