Exemplo n.º 1
0
            out.append(copy.deepcopy(ranges[i]))
    return out


rfh = open(sys.argv[1])
sfh = open(sys.argv[2])
afh = open(sys.argv[3])

pout = open(sys.argv[6] +".cor.pileup", "w")
corout = open(sys.argv[6] +".cor.fa", "w")

alignment_it = lineRecordIterator(afh, NucRecord, NucRecordTypes)
snp_it = lineRecordIterator(sfh, NucSNPRecord, NucSNPRecordTypes)


reads = dict(map(lambda r : (str(r.name), str(r.seq)), fastaIterator(rfh)))
alignments = dict(map(lambda (n,a): (n,list(a)), 
                      groupby(alignment_it, lambda x: x.sname)))

for pbname, snp_entries in groupby(snp_it, lambda x: x.sname):
    warnings = []
    pblen = len(reads[pbname])

    ##no alignments for this pb read
    if pbname not in alignments:
        continue
    ##create ranges of accepted alignments
    accept_alignment_ranges = [None] * pblen
    #alignments[pbname].sort(key=lambda a: (a.send-a.sstart) * pow(a.pctid/100.0,2))
    alignments[pbname].sort(key=lambda a: (a.send-a.sstart))
    for alignment in alignments[pbname]:
Exemplo n.º 2
0
            out.append(copy.deepcopy(ranges[i]))
    return out


rfh = open(sys.argv[1])
sfh = open(sys.argv[2])
afh = open(sys.argv[3])

pout = open(sys.argv[6] +".cor.pileup", "w")
corout = open(sys.argv[6] +".cor.fa", "w")

alignment_it = lineRecordIterator(afh, NucRecord, NucRecordTypes)
snp_it = lineRecordIterator(sfh, NucSNPRecord, NucSNPRecordTypes)


reads = dict(map(lambda r : (str(r.name), str(r.seq)), fastaIterator(rfh)))
alignments = dict(map(lambda (n,a): (n,list(a)), 
                      groupby(alignment_it, lambda x: x.sname)))

for pbname, snp_entries in groupby(snp_it, lambda x: x.sname):
    warnings = []
    pblen = len(reads[pbname])

    ##no alignments for this pb read
    if pbname not in alignments:
        continue
    ##create ranges of accepted alignments
    accept_alignment_ranges = [None] * pblen
    #alignments[pbname].sort(key=lambda a: (a.send-a.sstart) * pow(a.pctid/100.0,2))
    alignments[pbname].sort(key=lambda a: (a.send-a.sstart))
    for alignment in alignments[pbname]:
Exemplo n.º 3
0
    print "partition.py <reads_per_file (int)> <files_per_dir (int)> <input.fa>"
    sys.exit(1)

def pstr(num):
    return "%04d" % num

rpf = int(sys.argv[1])
fpd = int(sys.argv[2])
fa_fh = open(sys.argv[3])

total_reads = 0
dnum = 0
fnum = 0
fh = None
readidx_fh = open("ReadIndex.txt", "w")
for record in fastaIterator(fa_fh):
    if total_reads % rpf == 0:
        if total_reads % (rpf * fpd) == 0:
            dnum += 1
            fnum = 0
            os.mkdir(pstr(dnum))
        fnum += 1
        if fh:
            fh.close()
        current_file ="%s/p%s" % (pstr(dnum),pstr(fnum))
        fh = open(current_file, "w") 

    readidx_fh.write(str(record.name) +"\t" + current_file + "\n")
    fh.write(">"+str(record.name)+"\n")
    fh.write(str(record.seq)+"\n")
Exemplo n.º 4
0
GC_WINDOW_SIZE = 300
GC_THRESHOLD = 0.7
MIN_COV_GAP = 100

if not len(sys.argv) == 4:
    print "gc_count.py reads.fa alignments.sc outprefix"
    sys.exit(1)


rfh = open(sys.argv[1])
afh = open(sys.argv[2])
ofh = open(sys.argv[3]+".uncov.gc.bases","w")

reads = {}

for entry in fastaIterator(rfh):
    reads[str(entry.name)] = str(entry.seq)
sys.stderr.write("Loaded reads\n")

alignmentIt = getNucmerAlignmentIterator(afh)

sys.stderr.write("Loaded Alignments\n");

counter = 0
for name,group in groupby(alignmentIt, lambda x: x.sname):

    #build coverage vector
    cov = getCoverageFromNucAlignments(group)
    
    #mark the regions with 0 (no) coverage as 1 and change
    #everything else to 0
Exemplo n.º 5
0
#!/usr/bin/env python

import sys

from seqio import fastaIterator

if not len(sys.argv) == 2:
    print "qualgen.py read.fa"
    sys.exit(1)

reads = sys.argv[1]

with open(reads) as rfh:
    for record in fastaIterator(rfh):
        print ">"+str(record.name)
        print " ".join(["60"]*len(record.seq))
Exemplo n.º 6
0
#!/usr/bin/env python

import sys

from seqio import fastaIterator

if not len(sys.argv) == 2:
    print "qualgen.py read.fa"
    sys.exit(1)

reads = sys.argv[1]

with open(reads) as rfh:
    for record in fastaIterator(rfh):
        print ">" + str(record.name)
        print " ".join(["60"] * len(record.seq))
Exemplo n.º 7
0
#read.lens is just a file with a list of read lengths
if not len(sys.argv) == 5:
    print "pb_sim.py genome.fa read.lens error_rate out_prefix"
    sys.exit(1)


Chromosome = namedtuple("Chromosome", ["name","seq"])

gfh = open(sys.argv[1])
lfh = open(sys.argv[2])
erate = float(sys.argv[3])
rout = open(sys.argv[4]+".sim.fa", "w")


#read genome into mem
chromosomes = map(lambda r: Chromosome._make((str(r.name),str(r.seq))), fastaIterator(gfh))
chrom_lengths = map(lambda c: len(c.seq), chromosomes)
genome_length = sum(chrom_lengths)
chrom_lengths_ivtf = map(misc.accumulator(0), map(lambda x: float(x)/genome_length , chrom_lengths))

count = 0
for l in lfh:
    #length of read to simulate
    readlen = int(l.strip())
    
    while True:
        #choose a chromosome
        U_c = random.random()
        chr_idx = misc.first_idx(lambda x : x > U_c, chrom_lengths_ivtf)
        chromosome = chromosomes[chr_idx]
        chromosome_len = chrom_lengths[chr_idx]
Exemplo n.º 8
0
GC_WINDOW_SIZE = 300
GC_THRESHOLD = 0.7
MIN_COV_GAP = 100

if not len(sys.argv) == 4:
    print "gc_count.py reads.fa alignments.sc outprefix"
    sys.exit(1)

rfh = open(sys.argv[1])
afh = open(sys.argv[2])
ofh = open(sys.argv[3] + ".uncov.gc.bases", "w")

reads = {}

for entry in fastaIterator(rfh):
    reads[str(entry.name)] = str(entry.seq)
sys.stderr.write("Loaded reads\n")

alignmentIt = getNucmerAlignmentIterator(afh)

sys.stderr.write("Loaded Alignments\n")

counter = 0
for name, group in groupby(alignmentIt, lambda x: x.sname):

    #build coverage vector
    cov = getCoverageFromNucAlignments(group)

    #mark the regions with 0 (no) coverage as 1 and change
    #everything else to 0