Exemplo n.º 1
0
def main(args):
    # debugging info
    logfile = args.outDirName + "/" + args.outBaseName + "/logs/%d" % os.getpid() + "." + args.outBaseName + ".mergepairs.log"
    logging.basicConfig(format='%(asctime)s %(message)s',filename=logfile,level=logging.DEBUG)

    logging.info("\ninDir1=%s\ninDir2=%s\noutBaseName=%s\nconfigFileName=%s"
                 % (args.inDir1,args.inDir2,args.outBaseName,args.configFileName))

    # create output directory
    pickreads.prepOutDir(args.outBaseName,args.outDirName,args.overwrite)

    # make sure input sources exist
    peakparser.checkOutDir(args.inDir1,args.outDirName)
    peakparser.checkOutDir(args.inDir2,args.outDirName)

    # make sure config files exist
    configPath1 = args.outDirName + "/" + args.inDir1 + "/" + args.configFileName
    configPath2 = args.outDirName + "/" + args.inDir2 + "/" + args.configFileName
    pickreads.checkfile(configPath1)
    pickreads.checkfile(configPath2)

    # read parameters for both inputs
    configDict1 = peakparser.readConfig(configPath1,args.inDir1,args.outDirName)
    configDict2 = peakparser.readConfig(configPath2,args.inDir2,args.outDirName)

    maxDist = int(configDict1['insertSize']) + 2*int(configDict1['readLength'])

    # merge readfiles
    outReadFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".readpairs.txt"
    readFileName1 = args.outDirName + "/" + args.inDir1 + "/" + args.inDir1 + ".readpairs.txt"
    readFileName2 = args.outDirName + "/" + args.inDir2 + "/" + args.inDir2 + ".readpairs.txt"
    logging.info("merging readfiles (%s, %s)" % (readFileName1,readFileName2))
#   print "merging readfiles (%s, %s)" % (readFileName1,readFileName2)
    mergeChrPosFiles(readFileName1,readFileName2,outReadFileName,maxDist)

    # merge bedfiles
    outBedFileName = args.outDirName +  "/" + args.outBaseName + "/" + args.outBaseName + ".reads.bed"
    bedFileName1 = args.outDirName + "/" + args.inDir1 + "/" + args.inDir1 + ".reads.bed"
    bedFileName2 = args.outDirName + "/" + args.inDir2 + "/" + args.inDir2 + ".reads.bed"
    logging.info("merging bedfiles (%s,%s)" % (bedFileName1,bedFileName2))
#   print "merging bedfiles (%s,%s)" % (bedFileName1,bedFileName2)
    mergeChrPosFiles(bedFileName1,bedFileName2,outBedFileName,maxDist)

    # write new config file
    configPath = args.outDirName + "/" + args.outBaseName + "/" + args.configFileName
    configDict = configDict1
    configDict['bamFileName1'] = configDict1['bamFileName']
    configDict['bamFileName2'] = configDict2['bamFileName']
    configDict['merged'] = 'True'
    configDict['outBaseName'] = args.outBaseName
    configDict['outDirName'] = args.outDirName
    configDict['readFileName'] = outReadFileName

    del configDict['bamFileName']

    f = open(configPath, 'w')
    for k,v in configDict.iteritems():
        f.write(k + "=" + v + "\n")
    f.close()
Exemplo n.º 2
0
def main(args):

    # create output directory
    pickreads.prepOutDir(args.outBaseName,args.outDirName,args.overwrite)

    pickreads.checkfile(args.sampleListFile)

    sampleList = open(args.sampleListFile, 'r')

    readFileNames = []
    bamFileNames  = []
    insertSizes   = []
    readLengths   = []
    lastConfig    = None

    for sampleLine in sampleList:
        if not re.search("^#", sampleLine):
            (sampleBam,sampleSubDir,refGenome) = sampleLine.strip().split()
            peakparser.checkOutDir(sampleSubDir,args.outDirName)
            configPath = args.outDirName + "/" + sampleSubDir + "/" + args.configFileName

            pickreads.checkfile(configPath)
            configDict = peakparser.readConfig(configPath,sampleSubDir,args.outDirName)
            lastConfig = configDict

            insertSizes.append(int(configDict['insertSize']))
            readLengths.append(int(configDict['readLength']))

            readFileName = args.outDirName + "/" + sampleSubDir + "/" + sampleSubDir + ".readpairs.txt"
            readFileNames.append(readFileName)
            bamFileNames.append(configDict['bamFileName'])

    maxDist = max(insertSizes) + 2*max(readLengths)        

    # merge readfiles
    outReadFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".readpairs.txt"
    mergeChrPosFiles(readFileNames,outReadFileName,maxDist)

    # write new config file
    configPath = args.outDirName + "/" + args.outBaseName + "/" + args.configFileName
    configDict = lastConfig

    bfnum = 0
    for bamFileName in bamFileNames:
        bfvname = "bamFileName" + str(bfnum)
        configDict[bfvname] = bamFileName
        bfnum += 1

    configDict['merged'] = 'True'
    configDict['outBaseName'] = args.outBaseName
    configDict['outDirName'] = args.outDirName
    configDict['readFileName'] = outReadFileName

    del configDict['bamFileName']

    f = open(configPath, 'w')
    for k,v in configDict.iteritems():
        f.write(k + "=" + v + "\n")
    f.close()
Exemplo n.º 3
0
def main(args):
    # debugging info
    logfile = args.outDirName + "/" + args.outBaseName + "/logs/%d" % os.getpid(
    ) + "." + args.outBaseName + ".mergepairs.log"
    logging.basicConfig(format='%(asctime)s %(message)s',
                        filename=logfile,
                        level=logging.DEBUG)

    logging.info(
        "\ninDir1=%s\ninDir2=%s\noutBaseName=%s\nconfigFileName=%s" %
        (args.inDir1, args.inDir2, args.outBaseName, args.configFileName))

    # create output directory
    pickreads.prepOutDir(args.outBaseName, args.outDirName, args.overwrite)

    # make sure input sources exist
    peakparser.checkOutDir(args.inDir1, args.outDirName)
    peakparser.checkOutDir(args.inDir2, args.outDirName)

    # make sure config files exist
    configPath1 = args.outDirName + "/" + args.inDir1 + "/" + args.configFileName
    configPath2 = args.outDirName + "/" + args.inDir2 + "/" + args.configFileName
    pickreads.checkfile(configPath1)
    pickreads.checkfile(configPath2)

    # read parameters for both inputs
    configDict1 = peakparser.readConfig(configPath1, args.inDir1,
                                        args.outDirName)
    configDict2 = peakparser.readConfig(configPath2, args.inDir2,
                                        args.outDirName)

    maxDist = int(
        configDict1['insertSize']) + 2 * int(configDict1['readLength'])

    # merge readfiles
    outReadFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".readpairs.txt"
    readFileName1 = args.outDirName + "/" + args.inDir1 + "/" + args.inDir1 + ".readpairs.txt"
    readFileName2 = args.outDirName + "/" + args.inDir2 + "/" + args.inDir2 + ".readpairs.txt"
    logging.info("merging readfiles (%s, %s)" % (readFileName1, readFileName2))
    #   print "merging readfiles (%s, %s)" % (readFileName1,readFileName2)
    mergeChrPosFiles(readFileName1, readFileName2, outReadFileName, maxDist)

    # merge bedfiles
    outBedFileName = args.outDirName + "/" + args.outBaseName + "/" + args.outBaseName + ".reads.bed"
    bedFileName1 = args.outDirName + "/" + args.inDir1 + "/" + args.inDir1 + ".reads.bed"
    bedFileName2 = args.outDirName + "/" + args.inDir2 + "/" + args.inDir2 + ".reads.bed"
    logging.info("merging bedfiles (%s,%s)" % (bedFileName1, bedFileName2))
    #   print "merging bedfiles (%s,%s)" % (bedFileName1,bedFileName2)
    mergeChrPosFiles(bedFileName1, bedFileName2, outBedFileName, maxDist)

    # write new config file
    configPath = args.outDirName + "/" + args.outBaseName + "/" + args.configFileName
    configDict = configDict1
    configDict['bamFileName1'] = configDict1['bamFileName']
    configDict['bamFileName2'] = configDict2['bamFileName']
    configDict['merged'] = 'True'
    configDict['outBaseName'] = args.outBaseName
    configDict['outDirName'] = args.outDirName
    configDict['readFileName'] = outReadFileName

    del configDict['bamFileName']

    f = open(configPath, 'w')
    for k, v in configDict.iteritems():
        f.write(k + "=" + v + "\n")
    f.close()