def main(args): # debugging info logfile = args.outDirName + "/" + args.outBaseName + "/logs/%d" % os.getpid() + "." + args.outBaseName + ".mergepairs.log" logging.basicConfig(format='%(asctime)s %(message)s',filename=logfile,level=logging.DEBUG) logging.info("\ninDir1=%s\ninDir2=%s\noutBaseName=%s\nconfigFileName=%s" % (args.inDir1,args.inDir2,args.outBaseName,args.configFileName)) # create output directory discordant.prepOutDir(args.outBaseName,args.outDirName,args.overwrite) # make sure input sources exist peakparser.checkOutDir(args.inDir1,args.outDirName) peakparser.checkOutDir(args.inDir2,args.outDirName) # make sure config files exist configPath1 = args.outDirName + "/" + args.inDir1 + "/" + args.configFileName configPath2 = args.outDirName + "/" + args.inDir2 + "/" + args.configFileName discordant.checkfile(configPath1) discordant.checkfile(configPath2) # read parameters for both inputs configDict1 = peakparser.readConfig(configPath1,args.inDir1,args.outDirName) configDict2 = peakparser.readConfig(configPath2,args.inDir2,args.outDirName) maxDist = int(configDict1['insertSize']) + 2*int(configDict1['readLength']) eltLenDict = discordant.makeEltLenDict(configDict1['eltLenFileName']) # merge readfiles outReadFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".readpairs.txt" readFileName1 = args.outDirName + "/" + args.inDir1 + "/" + args.inDir1 + ".readpairs.txt" readFileName2 = args.outDirName + "/" + args.inDir2 + "/" + args.inDir2 + ".readpairs.txt" logging.info("merging readfiles (%s, %s)" % (readFileName1,readFileName2)) # print "merging readfiles (%s, %s)" % (readFileName1,readFileName2) mergeChrPosFiles(readFileName1,readFileName2,outReadFileName,maxDist,eltLenDict) # merge bedfiles outBedFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".reads.bed" bedFileName1 = args.outDirName + "/" + args.inDir1 + "/" + args.inDir1 + ".reads.bed" bedFileName2 = args.outDirName + "/" + args.inDir2 + "/" + args.inDir2 + ".reads.bed" logging.info("merging bedfiles (%s,%s)" % (bedFileName1,bedFileName2)) # print "merging bedfiles (%s,%s)" % (bedFileName1,bedFileName2) mergeChrPosFiles(bedFileName1,bedFileName2,outBedFileName,maxDist,eltLenDict) # write new config file configPath = args.outDirName + "/" + args.outBaseName + "/" + args.configFileName configDict = configDict1 configDict['bamFileName1'] = configDict1['bamFileName'] configDict['bamFileName2'] = configDict2['bamFileName'] configDict['merged'] = 'True' configDict['outBaseName'] = args.outBaseName configDict['outDirName'] = args.outDirName configDict['readFileName'] = outReadFileName del configDict['bamFileName'] f = open(configPath, 'w') for k,v in configDict.iteritems(): f.write(k + "=" + v + "\n") f.close()
def main(args): # create output directory discordant.prepOutDir(args.outBaseName,args.outDirName,args.overwrite) sampleList = [] try: sampleList = args.sampleList except AttributeError: discordant.checkfile(args.sampleListFile) sampleList = open(args.sampleListFile, 'r') readFileNames = [] bamFileNames = [] sampleNames = [] insertSizes = [] readLengths = [] eltLenDict = None lastConfig = None for sampleLine in sampleList: if not re.search("^#", sampleLine): (sampleBam,sampleSubDir,refGenome,groupName) = sampleLine.strip().split() peakparser.checkOutDir(sampleSubDir,args.outDirName) configPath = args.outDirName + "/" + sampleSubDir + "/" + args.configFileName discordant.checkfile(configPath) configDict = peakparser.readConfig(configPath,sampleSubDir,args.outDirName) eltLenDict = discordant.makeEltLenDict(configDict['eltLenFileName']) lastConfig = configDict insertSizes.append(int(configDict['insertSize'])) readLengths.append(int(configDict['readLength'])) readFileName = args.outDirName + "/" + sampleSubDir + "/" + sampleSubDir + ".readpairs.txt" readFileNames.append(readFileName) bamFileNames.append(configDict['bamFileName']) sampleNames.append(sampleSubDir) maxDist = max(insertSizes) + 2*max(readLengths) # merge readfiles outReadFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".readpairs.txt" mergeChrPosFiles(readFileNames,outReadFileName,maxDist,eltLenDict) # write new config file configPath = args.outDirName + "/" + args.outBaseName + "/" + args.configFileName configDict = lastConfig assert len(bamFileNames) == len(sampleNames) bfnum = 0 for bamFileName in bamFileNames: bfvname = "bamFileName" + str(bfnum) configDict[bfvname] = bamFileName bfnum += 1 snum = 0 for sampleName in sampleNames: samname = "sampleName" + str(snum) configDict[samname] = sampleName snum += 1 configDict['merged'] = 'True' configDict['outBaseName'] = args.outBaseName configDict['outDirName'] = args.outDirName configDict['readFileName'] = outReadFileName del configDict['bamFileName'] f = open(configPath, 'w') for k,v in configDict.iteritems(): f.write(k + "=" + v + "\n") f.close()