#!/usr/bin/env python from __future__ import print_function import sys from parsers import parse_fa import collections import numpy as np def print_seq(contig_header, seq, linelen=60): print(contig_header) i = 0 while i < len(seq): print(seq[i:i + linelen]) i += linelen if __name__ == "__main__": if len(sys.argv) != 2: print("usage: {} file.fa".format(sys.argv[0]), file=sys.stderr) exit(1) for label, seq in parse_fa(sys.argv[1]): c = collections.Counter(seq) unmasked = np.sum(c[x] for x in "ACGT") if unmasked == 121: print_seq(label, seq)
from __future__ import print_function import sys import matplotlib matplotlib.use('Agg') # don't try to use $DISPLAY import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages import parsers if __name__ == "__main__": if len(sys.argv) != 2: print("usage: {} in.{{fa,fq}}".format(sys.argv[0]), file=sys.stderr) dist = [] for seq in parsers.parse_fa(sys.argv[1]): label = seq[0] extra = label.split(" ")[1] for ef in extra.split(";"): k, v = ef.split("=") if k == "DIST": dist.append(int(v)) pdf = PdfPages("plot_dist_hist.pdf") fig_w, fig_h = plt.figaspect(9.0 / 16.0) fig1 = plt.figure(figsize=(fig_w, fig_h)) ax1 = fig1.add_subplot(111) ax1.hist(dist, bins=range(0, max(dist) + 1)) ax1.set_xlabel("Edit Distance") ax1.set_ylabel("Frequency")
s.append("C") else: s.append("T") continue return "".join(s) if __name__ == "__main__": if len(sys.argv) != 2: print("usage: {} file.fastq[.gz]".format(sys.argv[0]), file=sys.stderr) exit(1) filename = sys.argv[1] for lineno, line in enumerate(parsers.parse_fa(filename), 1): if len(line) == 3: # fastq, output single sequence label, seq, qual = line bs_seq = bstreat(seq) print("{}\n{}\n+\n{}".format(label, bs_seq, qual)) elif len(line) == 2: # fasta, output two sequences label, seq = line lfields = label.split() sname = lfields[0] if len(lfields) > 1: comment = " " + " ".join(lfields[1:]) else:
#!/usr/bin/env python from __future__ import print_function import sys import parsers import collections if __name__ == "__main__": if len(sys.argv) != 2: print(len(sys.argv), sys.argv) print("usage: {} in.fq".format(sys.argv[0]), file=sys.stderr) exit(1) quals = collections.Counter() for l, s, q in parsers.parse_fa(sys.argv[1]): quals.update(q) for k in sorted(quals.keys(), key=ord): v = quals[k] print("``{}''".format(k), ord(k), v)
help="reads longer than this are ignored") parser.add_argument("in_fa", help="in.{fa,fq}") parser.add_argument("out_pdf", default="plot_fraglen_hist.pdf", help="out.pdf") return parser.parse_args() if __name__ == "__main__": args = parse_args() lengths = np.zeros(args.max_len) lmin = 1e6 lmax = 0 for line in parsers.parse_fa(args.in_fa): length = len(line[1]) if length > args.max_len: continue lengths[length] += 1 if length < lmin: lmin = length elif length > lmax: lmax = length pdf = PdfPages(args.out_pdf) #fig_w, fig_h = plt.figaspect(9.0/16.0) #fig_w, fig_h = plt.figaspect(3.0/4.0) fig_w = fig_h = 307.28987 / 72.27
yield last else: exclude = False last = [chrom, pos, ref, alt, dp] if not exclude: yield last if __name__ == "__main__": if len(sys.argv) != 3: print("usage: {} in.fa in.vcf".format(sys.argv[0]), file=sys.stderr) exit(1) fp = parse_fa(sys.argv[1]) snpdict = {} for label, seq in fp: chrom, extent = label[1:].split(":") start, end = map(int, extent.split("-")) pos = start + (end - start) / 2 if windowmasker(seq) > 0: continue snpdict[(chrom, pos)] = seq plen = 60 print("chrom", "pos",
# # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. import sys import random from parsers import parse_fa sequences = [] for line in parse_fa("/dev/stdin"): assert (len(line) == 2) _, seq = line sequences.append(seq) random.shuffle(sequences) def print_fa(label, seq, linelen=60): print(">{}".format(label)) while len(seq) > 0: print(seq[:linelen]) seq = seq[linelen:] for n, seq in enumerate(sequences, 1):