def callCuffquant(bam, gtf, cpu=3): fo = bam.split("/")[-2] if os.path.exists(fo): logger.info("%s has been generated to %s" % (bam, fo)) cmd = "cuffquant -p {cpu} -o {fout} {gtf} {bam}".format(cpu=cpu, fout=fo, gtf=gtf, bam=bam) callSys([cmd], logger)
def sam2bam(sam, bam): """ SAM to BAM file """ samview = "samtools view -S %s -b -o %s" % (sam, bam) samsort = "samtools sort -@ 2 {bam} -T {pre} -o {bam}".format( bam=bam, pre=bam.replace(".bam", "")) samindex = "samtools index {bam} {bai}".format(bam=bam, bai=bam.replace( ".bam", ".bai")) rmsam = "rm %s" % (sam) cmds = [samview, samsort, samindex, rmsam] callSys(cmds, logger)
def bdg2bw(f): """ Converting .bdg file to .bw file through bedGraphToBigWig. """ n = f.split("/")[-1].replace(".bdg", "") if os.path.isfile(n + ".bw"): return cmd1 = "bedSort {bdg} {sbdg}".format(bdg=f, sbdg=n + ".bdg2") callSys([cmd1], logger) #validation bdg validateBdg(n + ".bdg2") cmd2 = "bedGraphToBigWig {bdg} {chrom} {bw}".format(bdg=n + ".bdg2", chrom=CHROM, bw=n + ".bw") callSys([cmd2, "rm %s.bdg2" % n], logger)
def bdg2washU(f): """ Converting .bdg file to washU track. """ n = f.split("/")[-1].replace(".bdg", "") if os.path.isfile(n + "_washU.txt.gz") and os.path.isfile(n + "_washU.txt.gz.tbi"): return #sort the .bdg file cmd1 = "bedSort {bdg} {sbdg}".format(bdg=f, sbdg=n + "_washU.txt") #bgzip cmd2 = "bgzip %s" % (n + "_washU.txt") #tabix cmd3 = "tabix -p bed %s" % (n + "_washU.txt.gz") callSys([cmd1, cmd2, cmd3], logger)
def validateBdg(bdg): """ Validating .bdg files through chrom size. """ chrs = getChrSize() nbdg = bdg + ".2" with open(nbdg, "w") as f: for line in open(bdg): line = line.split("\n")[0].split("\t") if len(line) < 4: continue if line[0] not in chrs: continue if int(line[1]) >= chrs[line[0]] or int(line[2]) > chrs[line[0]]: continue line = "\t".join(line) + "\n" f.write(line) cmd = "mv %s %s" % (nbdg, bdg) callSys([cmd], logger)
def callCuffnorm(fs, gtf, fo, cpu=40): cmd = "cuffnorm --compatible-hits-norm {gtf} {fs} -o {fo} -p {cpu}".format( gtf=gtf, fs=" ".join(fs), fo=fo, cpu=cpu) callSys([cmd], logger)