out.append(copy.deepcopy(ranges[i])) return out rfh = open(sys.argv[1]) sfh = open(sys.argv[2]) afh = open(sys.argv[3]) pout = open(sys.argv[6] +".cor.pileup", "w") corout = open(sys.argv[6] +".cor.fa", "w") alignment_it = lineRecordIterator(afh, NucRecord, NucRecordTypes) snp_it = lineRecordIterator(sfh, NucSNPRecord, NucSNPRecordTypes) reads = dict(map(lambda r : (str(r.name), str(r.seq)), fastaIterator(rfh))) alignments = dict(map(lambda (n,a): (n,list(a)), groupby(alignment_it, lambda x: x.sname))) for pbname, snp_entries in groupby(snp_it, lambda x: x.sname): warnings = [] pblen = len(reads[pbname]) ##no alignments for this pb read if pbname not in alignments: continue ##create ranges of accepted alignments accept_alignment_ranges = [None] * pblen #alignments[pbname].sort(key=lambda a: (a.send-a.sstart) * pow(a.pctid/100.0,2)) alignments[pbname].sort(key=lambda a: (a.send-a.sstart)) for alignment in alignments[pbname]:
print "partition.py <reads_per_file (int)> <files_per_dir (int)> <input.fa>" sys.exit(1) def pstr(num): return "%04d" % num rpf = int(sys.argv[1]) fpd = int(sys.argv[2]) fa_fh = open(sys.argv[3]) total_reads = 0 dnum = 0 fnum = 0 fh = None readidx_fh = open("ReadIndex.txt", "w") for record in fastaIterator(fa_fh): if total_reads % rpf == 0: if total_reads % (rpf * fpd) == 0: dnum += 1 fnum = 0 os.mkdir(pstr(dnum)) fnum += 1 if fh: fh.close() current_file ="%s/p%s" % (pstr(dnum),pstr(fnum)) fh = open(current_file, "w") readidx_fh.write(str(record.name) +"\t" + current_file + "\n") fh.write(">"+str(record.name)+"\n") fh.write(str(record.seq)+"\n")
GC_WINDOW_SIZE = 300 GC_THRESHOLD = 0.7 MIN_COV_GAP = 100 if not len(sys.argv) == 4: print "gc_count.py reads.fa alignments.sc outprefix" sys.exit(1) rfh = open(sys.argv[1]) afh = open(sys.argv[2]) ofh = open(sys.argv[3]+".uncov.gc.bases","w") reads = {} for entry in fastaIterator(rfh): reads[str(entry.name)] = str(entry.seq) sys.stderr.write("Loaded reads\n") alignmentIt = getNucmerAlignmentIterator(afh) sys.stderr.write("Loaded Alignments\n"); counter = 0 for name,group in groupby(alignmentIt, lambda x: x.sname): #build coverage vector cov = getCoverageFromNucAlignments(group) #mark the regions with 0 (no) coverage as 1 and change #everything else to 0
#!/usr/bin/env python import sys from seqio import fastaIterator if not len(sys.argv) == 2: print "qualgen.py read.fa" sys.exit(1) reads = sys.argv[1] with open(reads) as rfh: for record in fastaIterator(rfh): print ">"+str(record.name) print " ".join(["60"]*len(record.seq))
#!/usr/bin/env python import sys from seqio import fastaIterator if not len(sys.argv) == 2: print "qualgen.py read.fa" sys.exit(1) reads = sys.argv[1] with open(reads) as rfh: for record in fastaIterator(rfh): print ">" + str(record.name) print " ".join(["60"] * len(record.seq))
#read.lens is just a file with a list of read lengths if not len(sys.argv) == 5: print "pb_sim.py genome.fa read.lens error_rate out_prefix" sys.exit(1) Chromosome = namedtuple("Chromosome", ["name","seq"]) gfh = open(sys.argv[1]) lfh = open(sys.argv[2]) erate = float(sys.argv[3]) rout = open(sys.argv[4]+".sim.fa", "w") #read genome into mem chromosomes = map(lambda r: Chromosome._make((str(r.name),str(r.seq))), fastaIterator(gfh)) chrom_lengths = map(lambda c: len(c.seq), chromosomes) genome_length = sum(chrom_lengths) chrom_lengths_ivtf = map(misc.accumulator(0), map(lambda x: float(x)/genome_length , chrom_lengths)) count = 0 for l in lfh: #length of read to simulate readlen = int(l.strip()) while True: #choose a chromosome U_c = random.random() chr_idx = misc.first_idx(lambda x : x > U_c, chrom_lengths_ivtf) chromosome = chromosomes[chr_idx] chromosome_len = chrom_lengths[chr_idx]
GC_WINDOW_SIZE = 300 GC_THRESHOLD = 0.7 MIN_COV_GAP = 100 if not len(sys.argv) == 4: print "gc_count.py reads.fa alignments.sc outprefix" sys.exit(1) rfh = open(sys.argv[1]) afh = open(sys.argv[2]) ofh = open(sys.argv[3] + ".uncov.gc.bases", "w") reads = {} for entry in fastaIterator(rfh): reads[str(entry.name)] = str(entry.seq) sys.stderr.write("Loaded reads\n") alignmentIt = getNucmerAlignmentIterator(afh) sys.stderr.write("Loaded Alignments\n") counter = 0 for name, group in groupby(alignmentIt, lambda x: x.sname): #build coverage vector cov = getCoverageFromNucAlignments(group) #mark the regions with 0 (no) coverage as 1 and change #everything else to 0