def getTotalAlignments(f): runCommand("samtools idxstats " + f, open("temp", "w")) total = 0 for line in open("temp").readlines(): total += int(line.split()[2]) runCommand("rm temp") return total
def findBamAlignments(bamfile,chrom,start,end): samtoolsViewRegion(bamfile,chrom,start,end,"temp") alignRange = [start,end] def rangeCheck(line): if line[0] == "@": return #header line pieces = line.split() seqname = pieces[0] ch = pieces[2] pos = int(pieces[3]) testnumber = int(pieces[1]) if testnumber&0x4 == 0x4: pass else: if pos < alignRange[0]: alignRange[0] = pos if pos > alignRange[1]: alignRange[1] = pos efficientFileRead("temp",rangeCheck) def lineCheck(line): if line[0] == "@": return #header line pieces = line.split() seqname = pieces[0] ch = pieces[2] pos = int(pieces[3]) testnumber = int(pieces[1]) if testnumber&0x4 == 0x4: pass elif ch == chrom: ret[pos-start]+=1 ret = [0 for x in range(alignRange[1]-alignRange[0]+1)] efficientFileRead("temp",lineCheck) runCommand("rm temp") return ret
def samtoolsViewRegion(bamfile,chrom,start,end,outfile): command = "samtools view " + bamfile + " " + chrom + ":" + str(start) + "-" + str(end) outf = open(outfile,"w") runCommand(command,outf,None) outf.close()
def samtoolsViewRegions(bamfile,outfile,regions): command = "samtools view -h " + bamfile + " " + regions outf = open(outfile,"w") runCommand(command,outf,None) outf.close()
def bedGraphToBigWig(align,chroms,out): bash.runCommand("bedGraphToBigWig "+align+" "+chroms+" "+out)
def sortBedGraph(align,out): bash.runCommand("sort -k1,1 -k2,2n "+align,open(out,"w"))
def bamToBedGraph(align,chroms,out): bash.runCommand("genomeCoverageBed -bg -ibam "+align+" -g "+chroms,open(out,"w"))
changeDir(tissueLocation+tissue+"/") errfile = open(tissue+".rpkmPipeline.err","a") totalAlignments = 1 commands1 = [ #Intersect tissue with uniqueome ("intersectBed -abam " + tissue+".sorted.bam -b " + uniqueomeBed + " -f 1.0", tissue+".unique.bam"), #Sort and Index Bam file ("samtools sort " + tissue+".unique.bam " + tissue + ".unique.sorted",None), ("samtools index " + tissue + ".unique.sorted.bam",None), ("samtools idxstats " + tissue + ".unique.sorted.bam",tissue+".unique.idxstats.txt") ] for command,outfile in commands1: errfile.write("\n\nRUNNING: " + command + "\n") if outfile == None: runCommand(command,None,errfile) else: runCommand(command,open(outfile,"w"),errfile) #runCommand(command,open(outfile,"w"),errfile) #print command + " > " + str(outfile) #Count total number of mapped reads totalAlignments = countReadsAligned(tissue+".unique.idxstats.txt") commands2 = [ #Collect reads in pseudogene locations ("intersectBed -abam " + tissue + ".unique.bam -b " + pseudogeneBed,tissue+".unique.pseudo.bam"), ("bamToBed -i " + tissue+".unique.pseudo.bam", tissue+".unique.pseudo.bed"), #Count unique regions ("intersectBed -c -a " + pseudogeneBed + " -b " + tissue+".unique.pseudo.bed",tissue+".pseudo.align.bed"),
of2.writelines(file2Buffer[:ind]) of1.close() of2.close() #erase used buffer lines if ind < len(file1Buffer): file1Buffer = file1Buffer[ind:] else: file1Buffer = [] if ind < len(file2Buffer): file2Buffer = file2Buffer[ind:] else: file2Buffer = [] #run diff bash.runCommand("diff temp1.txt temp2.txt",open("diff.txt","w"),None) #read result result.extend(open("diff.txt").readlines()) #refill buffers file1Buffer.extend(file1.readlines(readSize)) file2Buffer.extend(file2.readlines(readSize)) bash.runCommand("rm temp1.txt") bash.runCommand("rm temp2.txt") bash.runCommand("rm diff.txt") for line in result: print line.strip()