    def __init__(self, varsP, doref=False, outputdir=None, bnxin=None):
        """doref determines parameter set from optargs.
        outputdir not needed for Pipeline, but used in runAlignMol.py.
        If bnxin supplied, will run single job with it.
        self.varsP = varsP
        self.doref = doref
        self.bnxin = bnxin  #see generateJobList

        self.argStageName = 'alignmol'  #use arguments from alignmol (optArgs, not clusterArgs)
        if not doref:
            self.stageName = 'alignmol'  #also name of dir which is sub-dir of varsP.outputContigFolder
            self.alignTarget = os.path.join(varsP.outputContigFolder,
                                            self.stageName)  #output dir
            self.varsP.alignMolDir = self.alignTarget  #store in varsP for subsequent processing
            self.stageName = self.varsP.alignMolvrefName  #also name of dir which is sub-dir of localRoot
            self.alignTarget = os.path.join(self.varsP.contigFolder,
                                            self.stageName)  #output dir
        if outputdir:
            self.alignTarget = outputdir

        util.checkDir(self.alignTarget)  #will make if doesn't exist
        self.mergedir = os.path.join(
            self.varsP.alignMolvrefMergeName)  #copy from AlignRefModule

        super(AlignModule, self).__init__(

        self.outFileList = []
    def generateJobListChar(self, noise_in, input_file, optSection) :

        if not self.varsP.executeCurrentStage:
            return 1 #tell self.__init__ not to continue processing
        self.varsP.updatePipeReport('%s\n' % (optSection))
        self.output_folder=os.path.join(self.varsP.contigFolder, "auto_noise")
        if not util.checkDir(self.output_folder) : #will make if not exist, only returns False if already exists or can't make
                    print "ERROR in autoNoise.varsPipeline.prepareContigIO: bad dir:", self.output_folder
        self.output_file=os.path.join(self.output_folder, optSection)
        # We use assembly section here because the memory usage is higher than pairwise, while the jobs are quite short.
        super(autoNoise, self).__init__(self.varsP, self.stageName, clusterArgs=self.varsP.getClusterArgs("assembly"))

        #cargs=[self.varsP.RefAlignerBin, '-f', '-i', input_file, "-ref", self.varsP.ref, "-maxthreads", str(self.varsP.maxthreads), "-o", self.output_file] 
        cargs=[self.varsP.RefAlignerBin, '-f', '-i', input_file, "-ref", self.varsP.ref, "-o", self.output_file] #remove maxthreads bc this is always running on its own
        if self.varsP.stdoutlog :
            cargs.extend( ['-stdout', '-stderr'] )
        for v in noise_in.keys():
		cargs.extend(["-"+v, str(noise_in[v])])
	if self.varsP.bnxStatsFile!=None:
		cargs += ['-XmapStatWrite', self.varsP.bnxStatsFile]
        self.addJob(mthread.singleJob(cargs, self.stageName, expectedResultFile, self.stageName, clusterLogDir=self.varsP.clusterLogDir, expectedStdoutFile=self.output_file+".stdout"))

        return 0 #success
    def __init__(self, varsP):
        self.curCharacterizeFileRoots = []
        self.varsP = varsP  #bc Characterize uses this for totAssemblyLenMb
        #this is problematic for bypass (because mergeIntoSingleCmap isn't called)--don't need it
        #if not len(varsP.curCharacterizeCmaps) : #need this, set in mergeIntoSingleCmap
        #    return
        #ccc = varsP.curCharacterizeCmaps[0]
        #outFileName = os.path.split(ccc)[1].replace(".cmap", "")
        #outfile = os.path.join(varsP.contigAlignTarget,outFileName) #WRONG bc contigAlignTarget is wrong...try this

        outdir = os.path.join(varsP.outputContigFolder,
                              self.varsP.characterizeDirName)  #'alignref'
        if not util.checkDir(
                outdir, makeIfNotExist=False
        ):  #if this doesn't exist, we can't get what we need
        outfile = None
        for qfile in os.listdir(outdir):
            if qfile.endswith(".err"):  #just take first .err file
                outfile = qfile
        if not outfile:  #if no .err files found, give up
        outfile = os.path.join(outdir, outfile.replace(".err", ""))
        #also want to get varsP.totAssemblyLenMb
        self.varsP.totAssemblyLenMb = mapClasses.multiCmap(
            varsP.latestMergedCmap, lengthonly=True).totalLength / 1e6
    def checkResults(self):
        if self.varsP.ngsBypass : #this means that pairwise is skipped completely, so do not check anything
            return #return None means success
        self.doAllPipeReport() #loops over self.jobList and calls CheckIfFileFound
        #check for align files
        if not util.checkDir(self.varsP.alignFolder, makeIfNotExist=False) :
            self.varsP.updatePipeReport("ERROR: bad alignFolder:%s\n\n" % self.varsP.alignFolder)
            return 1

        alignFiles = []
        #for sJob in self.jobList:
        #    sJob.CheckIfFileFound()
        #    alignFile = sJob.expectedResultFile
        #    if sJob.resultFound:
        #        alignFiles.append(alignFile)
        #    else:
        #        self.warning += 1
        #        self.messages += '  PW Warning Missing Expected File: %s\n' % alignFile
        #if alignFiles.__len__() == 0:
        #    self.error += 1
        #    self.messages += '  Error: PW  Missing All Align Files\n' 

        #Above uses results in singleJob instances, below reads from disk. Either way should work
        for ifile in os.listdir(self.varsP.alignFolder) :
            if ifile.endswith(".align") :
                alignFiles.append( os.path.join(self.varsP.alignFolder, ifile) )
        if len(alignFiles) == 0 :
            self.varsP.updatePipeReport("ERROR: no align files in alignFolder %s\n\n" % self.varsP.alignFolder)
            return 1

        self.varsP.writeListToFile(alignFiles, self.varsP.alignTarget)
        self.varsP.stageComplete = 'Pairwise'
    def checkResults(self):
        if self.varsP.ngsBypass : #this means that pairwise is skipped completely, so do not check anything
            return #return None means success
        self.doAllPipeReport() #loops over self.jobList and calls CheckIfFileFound
        #check for align files
        if not util.checkDir(self.varsP.alignFolder, makeIfNotExist=False) :
            self.varsP.updatePipeReport("ERROR: bad alignFolder:%s\n\n" % self.varsP.alignFolder)
            return 1

        alignFiles = []
        #for sJob in self.jobList:
        #    sJob.CheckIfFileFound()
        #    alignFile = sJob.expectedResultFile
        #    if sJob.resultFound:
        #        alignFiles.append(alignFile)
        #    else:
        #        self.warning += 1
        #        self.messages += '  PW Warning Missing Expected File: %s\n' % alignFile
        #if alignFiles.__len__() == 0:
        #    self.error += 1
        #    self.messages += '  Error: PW  Missing All Align Files\n' 

        #Above uses results in singleJob instances, below reads from disk. Either way should work
        for ifile in os.listdir(self.varsP.alignFolder) :
            if ifile.endswith(".align") :
                alignFiles.append( os.path.join(self.varsP.alignFolder, ifile) )
        if len(alignFiles) == 0 :
            self.varsP.updatePipeReport("ERROR: no align files in alignFolder %s\n\n" % self.varsP.alignFolder)
            return 1

        self.varsP.writeListToFile(alignFiles, self.varsP.alignTarget)
        self.varsP.stageComplete = 'Pairwise'
def mergeRcmaps(outFileList, outdir, varsP=None, splitByContig=None, stageName="") :
    """Given a list of file prefixes (outFileList), append "_r.cmap" to them, and merge them
    to outdir. Report to varsP if supplied, stdout if not.
    Also support outFileList is full paths (including "_r.cmap").
    If splitByContig < 1, output each contig separately, if == 1, only output single merged cmap,
    and if > 1, do both.
    Always use stagename if supplied; if not, must supply varsP otherwise prefix is empty.
    if not util.checkDir(outdir) :
        err_msg = "Warning in AlignModule.mergeRcmaps: could not make outdir %s, skipping copy number" % outdir
        logOrPrintError(err_msg, varsP)

    if not outFileList : #just an argument check--check for presence on disk is below
        err_msg = "Warning in AlignModule.mergeRcmaps: no maps supplied"
        logOrPrintError(err_msg, varsP)

    outFileList.sort() #for reproducibility with runAlignMerge.py (different order when listing dir)
    rsuf = "_r.cmap"
    #mappref = os.path.split(outFileList[0])[1] #this is just prefix, but with integer suffix--get it before -- no longer used
    #mappref = mappref[:mappref.rfind("_")+1] #remove integer suffix
    #even though outFileList should all be there, a job may have failed--check all, just existence
    present = []
    for outf in outFileList :
        target = (outf+rsuf if not outf.endswith(rsuf) else outf) #now support either
        if not util.checkFile(target) :
            err_msg = "Warning in AlignModule.mergeRcmaps: missing _r.cmap %s" % target
            logOrPrintError(err_msg, varsP)
        else :
    if not present : #no _r.cmaps found (this will also happen for empty outFileList)
        err_msg = "Warning in AlignModule.mergeRcmaps: no _r.cmaps found, skipping copy number"
        logOrPrintError(err_msg, varsP)
    outFileList = present #yes, it's redundant, but now have rsuf appended

    mrgstr = (varsP.alignMolvrefMergeName if varsP else "merge")
    #mergedmappath = os.path.join(outdir, mappref+mrgstr+rsuf) #this is output merged _r.cmap -- unify with filepref

    mergedmap = mc.multiCmap(outFileList[0]) #open original, edit in memory
    #now add other maps
    for rmap in outFileList[1:] : #don't add map 0 to itself
        if mergedmap.addCovOcc( mc.multiCmap(rmap) ) : #when calling addCovOcc, check return, warn if True
            err_msg = "Warning in AlignModule.mergeRcmaps: addCovOcc call failed for map %s" % rmap
            logOrPrintError(err_msg, varsP)
    #now it's merged, but the resulting map need to be written back to disk
    filepref = (varsP.outputContigPrefix if varsP and stageName == "" else stageName) #see split_XMapQcmap_byContig
    if splitByContig < 1 or splitByContig > 1 :
        #print "\nself.varsP.outputContigPrefix", self.varsP.outputContigPrefix, "\n" #debug
        #filepref = (varsP.outputContigPrefix if varsP else stageName) #same as line in split_XMapQcmap_byContig
        mergedmap.writeAllMapsToDisk( os.path.join(outdir, filepref+'_contig'), outsuf="_r" )
        report = "mergeRcmaps: wrote %i cmaps" % len(mergedmap.cmapdict)
    if splitByContig > 0 :
        mergedmap.writeToFile( os.path.join(outdir, filepref+"_"+mrgstr+rsuf) ) #was mergedmappath
        report = "mergeRcmaps: wrote merged cmap with %i contigs" % len(mergedmap.cmapdict)
    #report result
    logOrPrintError(report, varsP, warn=False)
def mergeMap(varsP, outFileList, mergepath):
    """outFileList is list of path+prefixes--each should have a .map file:
    merge them to a merged .map file in dir mergepath."""

    outFileList.sort()  #sort to ensure reproducibility (order of entries)
    maplist = []
    for outpath in outFileList:  #these are file prefixes
        if util.checkFile(outpath + ".map"):
            maplist.append(outpath + ".map")
        elif varsP:
                "Warning in AlignModule.mergeMap: missing map: " + outpath +
                ".map" + "\n")
            print "Warning in AlignModule.mergeMap: missing map: " + outpath + ".map" + "\n"

    if not len(maplist):  #nothing to merge

    if not util.checkDir(mergepath):
            "Warning in AlignModule.mergeMap: merge path invalid: " +
            mergepath + "\n")

    headstart = [
        "#", "S", "M"
    ]  #last two lines of header start with "Software" and "MappedMoleculeId"
    #header = ""
    headerdone = False
    #data = ""
    lineno = 1  #can't just append: need to change index in first column
    sep = "\t"
    mappref = getMergeFilename(outFileList[0])  #also in getAlignStats
    mrgstr = (varsP.alignMolvrefMergeName if varsP else "merge"
              )  #same for vref and not
    outpath = os.path.join(mergepath, mappref + mrgstr + ".map")
    f1 = open(outpath, 'w')
    for path in maplist:
        f = open(path)
        for line in f:
            if line[0] in headstart and not headerdone:
                #header += line
            elif line[0] not in headstart:
                tokens = line.split()
                tokens[0] = str(lineno)
                #data += sep.join(tokens)+"\n" #newline was stripped by split
                f1.write(sep.join(tokens) + "\n")
                lineno += 1
        headerdone = True

 def generateJobList(self):
     curArgs = self.varsP.argsListed('noise0') + self.varsP.argsListed('sampleChar')
     if util.checkFile(self.varsP.bnxTarget) : #file exists only if image processing was run
         bnxFiles = parseExperimentFile(self.varsP.bnxTarget)
         if not bnxFiles : #check that you got at least one
             errstr = "ERROR in SampleChar.generateJobList: no bnx files found in: "+self.varsP.bnxTarget
             print errstr
         basepath = "" #os.path.split(bnxFiles[0])[0] #don't use basepath for this case
     else : #otherwise, assume this is the only bnx file
         bnxFiles = [self.varsP.bnxFile]
         #here, make a dir for the results--should really check results of checkEmptyDir for errors
         basepath = os.path.join(self.varsP.localRoot, "sampleChar")
         if self.varsP.wipe and os.path.isdir(basepath) :
             #util.checkEmptyDir(basepath) #will make if not exist, but if it does, will remove and re-make -- this fn doesn't exist...
         #else :
         util.checkDir(basepath) #will make if not exist, but won't remove anything
     nJobs = len(bnxFiles)
     #for i, bnxFile in enumerate(bnxFiles):
     for bnxFile in bnxFiles :
         #bnxGroupName = '%02d' % (i+1) #get this from the path, ie, bnxFiles
         cargs = [self.varsP.RefAlignerBin, '-i', bnxFile]
         bnxname = os.path.split(bnxFile)[1].replace(".bnx","")
         jobname = 'Sample_Char_' + bnxname
         #outputTarget = os.path.join(basepath, bnxGroupName)
         if basepath : #bnx input
             outputTarget = os.path.join(basepath, bnxname)
         else : #image processing
             outputTarget = bnxFile.replace(".bnx","") + "_sampleChar"
         expectedResultFile = outputTarget + '.err' #this is used in checkResults
         currentArgs = cargs + ['-ref', self.varsP.ref, '-o' , outputTarget, '-f']
         if self.varsP.stdoutlog :
             currentArgs.extend( ['-stdout', '-stderr'] )
         currentArgs += ['-maxthreads', str(self.varsP.maxthreads)] + curArgs
         sJob = mthread.singleJob(currentArgs, jobname, expectedResultFile, jobname, clusterLogDir=self.varsP.clusterLogDir) # peStr is deprecated in favor of clusterargs
         #sJob.expTag = bnxGroupName #removed from checkResults
 def getTargetJobs(self, dormdir=False):
     localDataLocation = os.path.join(self.varsP.localRoot, self.expTag + '/')
     #print "localDataLocation:", localDataLocation #debug
     if dormdir :
         sJobRmName = 'Pre-Remove Folder: ' + shorten(localDataLocation)
         sJobRm = mthread.singleJob(['rm', '-f', '-r', localDataLocation], sJobRmName, '', 'rmDir')
         sJobMkdirName = 'Make Folder: ' + shorten(localDataLocation)
         sJobMkdir = mthread.singleJob(['mkdir', localDataLocation], sJobMkdirName, localDataLocation, 'mkDir')
         allJobs = [sJobRm, sJobMkdir]
         contingentjob = sJobMkdir
     else :
         util.checkDir(localDataLocation) #will make dir localDataLocation
         allJobs = []
         contingentjob = None
     for scan in self.scans:
         scanjobs = scan.getDetectJobs(contingentjob)
         if not scanjobs : #no scan jobs means the scan has already been processed--clear all jobs
             self.varsP.updatePipeReport("Device.getTargetJobs: skipping path "+scan.nameStr()+"\n") #localDataLocation
         else :
             allJobs += scanjobs
     return allJobs
    def __init__(self, varsP) :
        jobName = "reference_process"
        opta_section = "referenceSvdetect"
        default_mres = "2.9"
        mres = "-mres"
        self.varsP = varsP
        usedefault = False
        if self.varsP.argData.has_key(opta_section) : #check if in optargs
            opta = self.varsP.argsListed(opta_section)
            if not mres in opta : #must have mres
                self.varsP.updatePipeReport("Warning in referenceProcess: "+mres+" missing in optArguments section "+opta_section+"\n")
                usedefault = True
        else :
            self.varsP.updatePipeReport("Warning in referenceProcess: optArguments section "+opta_section+" missing\n")
            usedefault = True
        if usedefault :
            opta = [mres, default_mres]

        mresstr = opta[opta.index(mres)+1] #get string for mres value for output name
        mresstr = mresstr.replace(".","")

        if not util.checkDir(self.varsP.refFolder) :
            self.varsP.updatePipeReport( "ERROR in referenceProcess: could not make output dir %s\n" % self.varsP.refFolder )
            return None
        refpref = os.path.basename(self.varsP.ref[:self.varsP.ref.rfind(".")]) + "_res" + mresstr
        outarg = os.path.join(self.varsP.refFolder, refpref) #refFolder is new output folder for this job
        expectedResultFile = outarg+".cmap" #if ref is spots, is this spots?
        args = [self.varsP.RefAlignerBin, '-o', outarg, '-i', self.varsP.ref, '-f', '-merge'] + opta
        stdoutf = None
        if self.varsP.stdoutlog :
            args.extend( ['-stdout', '-stderr'] )
            stdoutf = outarg+".stdout"
        args += ['-maxthreads', str(self.varsP.nThreads)]

        super(referenceProcess, self).__init__(self.varsP, jobName, clusterArgs=self.varsP.getClusterArgs("assembly"))

        job = mthread.singleJob(args, jobName, expectedResultFile, jobName, maxThreads=self.varsP.nThreads, clusterLogDir=self.varsP.clusterLogDir, expectedStdoutFile=stdoutf)

        util.LogStatus("progress", "stage_start", jobName)
        self.varsP.runJobs(self, "referenceProcess")
        if not self.allResultsFound() : #this is an error, but we'll continue processing without SV detect
            err = "ERROR in referenceProcess: job failed, disabling SV detect"
            self.varsP.updatePipeReport( err+"\n" )
            util.LogError("error", err)
            #self.varsP.runSV = False #no need since this class is used in SVModule
        else :
            self.varsP.refDeresed = expectedResultFile #store good result for SV detect
            self.varsP.updatePipeReport( "referenceProcess: using reference %s for svdetect\n" % self.varsP.refDeresed )
        util.LogStatus("progress", "stage_complete", jobName)            
def runAlignMol() :    
    parser = argparse.ArgumentParser(description=description)

    parser.add_argument('-q', dest='queryDir', help='Path to merged cmap to align molecules (-b) to OR alignmol dir from Pipeline for merge (if latter, no alignments are performed), required', type=str)
    parser.add_argument('-b', dest='bnx', help='Input molecule (.bnx) file, required if aligning molecules', type=str)
    #parser.add_argument('-b', dest='bnx', help='Input molecule (.bnx) file OR path to dir containing split bnx pieces, required if aligning molecules', type=str) #I should add the split feature; for now, just do single bnx
    parser.add_argument('-a', dest='optArguments', help='Path to optArguments.xml (optional, default optArguments_human.xml in Pipeline dir if found, otherwise required)', default="", type=str)
    parser.add_argument('-r', help='If this flag is used, alignmolvref arguments are used, otherwise alignmol arguments are used (default alignmol; optional)', dest='ref', action='store_true')
    parser.add_argument('-o', dest='outputDir', help='output dir (optional, defaults to sub-dir of input map dir called "alignmol")', default="", type=str)
    parser.add_argument('-t', dest='RefAligner', help='Path to RefAligner or dir containing it (required)', type=str) 
    parser.add_argument('-T', dest='numThreads', help='Total number of threads (cores) to use (optional, default 4)', default=4, type=int)
    parser.add_argument('-j', dest='maxthreads', help='Threads per Job, -maxthreads (non-cluster only;optional, default 4)', default=4, type=int)
    parser.add_argument('-e', dest='errFile', help='.err file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise', default="", type=str)
    parser.add_argument('-E', dest='errbinFile', help='.errbin file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise', default="", type=str)
    parser.add_argument('-p', dest='pipelineDir', help='Pipeline dir (optional, defaults to script dir, or current directory)', default="", type=str)
    parser.add_argument('-v', dest='pvalue', help='Alignment pvalue', default="1e-12")
    result = parser.parse_args()

    outprefix = "exp_refineFinal1" #this is the default; assume for now

    #check all Pipeline dependencies
    if result.pipelineDir :
        cwd = result.pipelineDir
    else :
        cwd = os.path.split(os.path.realpath(__file__))[0] #this is path of this script
        if not os.path.isfile(os.path.join(cwd,"utilities.py")) : #if still not here, last try is actual cwd
            cwd = os.getcwd() #still check this below

    #this is the only one imported here and in runCharacterize
    if not os.path.isfile(os.path.join(cwd,"utilities.py")):
        print "ERROR: utilities.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    import utilities as util

    if not os.path.isfile(os.path.join(cwd,"AlignModule.py")):
        print "ERROR: AlignModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    import AlignModule as alignmod

    if not util.checkFile(os.path.join(cwd,"Pipeline.py")):
        print "ERROR: Pipeline.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    import Pipeline

    #input dir
    if not result.queryDir :
        print "ERROR: Query (-q) argument not supplied."
    qrypath = os.path.realpath(result.queryDir)
    if util.checkDir(qrypath, checkWritable=False, makeIfNotExist=False) : #output elsewhere so not writeable is ok
        runaligns = False
    elif util.checkCmap(qrypath) :
        runaligns = True
    else :
        print "ERROR: Query argument ("+qrypath+") not found or not a dir or cmap. Check -q argument."

    #this check isn't really necessary...make it a warning -- left over from runAlignMerge.py
    #if not os.path.split(qrypath)[1].endswith("alignmol") :
    #    print "Warning: Query dir ("+qrypath+") does not end with 'alignmol'; please be sure this is a Pipeline alignmol dir\n"

    #RefAligner -- check for either path to RefAligner, or dir containing it, depending on cluster args
    rabin = "" #need empty string for generateJobList even though no jobs are run
    if runaligns :
        rabin = result.RefAligner
        #replicate Pipeline behavior: RefAligner is always required
        if os.path.isdir(rabin) :
            rabin = os.path.join(rabin, "RefAligner")
        if not util.checkExecutable(rabin):
            print "ERROR: RefAligner not found or not executable at", rabin, "\nPlease supply RefAligner dir or full path as -t arg."

    #optargs file
    optargs = None
    if runaligns and result.optArguments : #supplied on command line
        optargs = result.optArguments
        if not util.checkFile(optargs, ".xml") :
            print "optArguments path is supplied ("+optargs+") but not found or doesn't end in .xml, check -a argument."
    elif runaligns : #load from Pipeline dir if running alignments
        optargs = os.path.join(cwd,"optArguments_human.xml")
        if not util.checkFile(optargs):
            print "optArguments.xml missing in Pipeline directory ("+cwd+"). Try supplying path explicitly using -a."

    #output dir
    if not result.outputDir :
        outdir = os.path.join(qrypath, "merge") #should be same as in AlignModule
    else :
        outdir = os.path.realpath(result.outputDir)
    if os.path.isdir(outdir) :
        if not util.checkDir(outdir) : #check writeable
            print "\nERROR: Output dir is not writeable:\n", outdir, "\n"                
        #this is ok here
        #elif outdir == contigdir :
        #    print "\nERROR: Output dir cannot be same as input dir:\n", outdir, "\n"                
        #    sys.exit(1)                
        print "\nWARNING: Output dir already exists, results will be overwritten:\n", outdir, "\n"
    elif not util.checkDir(outdir) : #does not exist, make, if False, can't make or not writeable
        print "\nERROR: Output dir cannot be created or is not writeable:\n", outdir, "\n"
    #bnx file
    bnxfile = result.bnx
    if bnxfile : #must check for empty string BEFORE you do realpath, or it returns cwd
        bnxfile = os.path.realpath(bnxfile)
        if not util.checkFile(bnxfile, ".bnx") :
            print "ERROR: bnx file supplied but not found or incorrect suffix:", bnxfile
    elif runaligns :
        print "ERROR: bnx file not supplied but running alignments; please supply bnx file as -b argument"

    nthreads = result.numThreads
    if nthreads <= 0 :
        print "ERROR: Number of threads value invalid (must be > 0): %i" % nthreads

    maxthreads = result.maxthreads
    if maxthreads <= 0 :
        print "ERROR: Max threads value invalid (must be > 0): %i" % maxthreads
    elif nthreads < maxthreads :
        print "Warning: num threads (-T: %i) < max threads (-j: %i): increasing num threads to equal max threads\n" % (nthreads, maxthreads)
        nthreads = maxthreads

    if result.pvalue : #supplied on command line
        pvalue = result.pvalue
    else :
        pvalue = "1e-12"    

    #.errbin file
    errbinfile = result.errbinFile
    if errbinfile :
        errbinfile = os.path.realpath(result.errbinFile)
        if not util.checkFile(errbinfile, ".errbin") :
            print "ERROR: errbin file supplied but not found or incorrect suffix:", errbinfile

    #.err file
    errfile = result.errFile
    if errfile and errbinfile :
        print "Warning: .err and .errbin arguments supplied; ignoring .err file"
        errfile = ""
    elif errfile :
        errfile = os.path.realpath(result.errFile)
        if not util.checkFile(errfile, ".err") :
            print "err file supplied but not found or incorrect suffix:", errfile

    if errfile and not util.checkFile(os.path.join(cwd,"SampleCharModule.py")):
        print "SampleCharModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    elif errfile :
        import SampleCharModule as scm

    doref = result.ref

    #DONE checking arguments

    print "Using output dir", outdir
    if runaligns :
        print "Aligning", bnxfile, "\nTo", qrypath, "\n"
    else :
        print "Merging", qrypath, "\n"

    startTime = time.time() #time since Epoch
    memory_log = os.path.join(outdir, "memory_log.txt")

    varsP = Pipeline.varsPipeline()
    varsP.RefAlignerBin        = rabin
    varsP.contigFolder         = "" #not used but needs to be an attr
    varsP.outputContigFolder   = "" #not used but needs to be a string attr
    varsP.pipeReportFile = os.path.join(outdir, "alignmol_jobs_log.txt")
    varsP.infoReportFile = os.path.join(outdir, "alignmol_log.txt")
    util.InitStatus( os.path.join(outdir, "status.xml") )

    if runaligns :
        varsP.optArgumentsFileIn   = optargs
        varsP.latestMergedCmap     = qrypath #if !doref, need this one
        varsP.ref                  = qrypath #and if doref, need this one
        varsP.nThreads             = nthreads #necessary otherwise job won't start -- max threads per node
        varsP.maxthreads           = maxthreads #threads per job
        p = os.path.split(qrypath)[1]
        varsP.outputContigPrefix   = p[:p.rfind(".")] #filename prefix
        varsP.stdoutlog    = True #use -stdout -stderr

        varsP.memoryLogpath  = os.path.join(outdir, "memory_log.txt")
        varsP.parseArguments() #parses optArgumentsFile
        varsP.replaceParam("alignmol", "-T", pvalue)
        varsP.RefAlignerBinOrig = rabin
        varsP.prerunLog() #general information in log -- needed for refaligner_version

        noisep = {}
        if errbinfile :
            noisep = {"readparameters": errbinfile}
            #print "Using noise parameters from "+errbinfile+"\n" #move below
        elif errfile :
            noisep = scm.readNoiseParameters(errfile.replace(".err",""))
            if noisep.has_key('readparameters') : #remove this because it's redundant, and it can cause problems with RefAligner compatibility
                del noisep['readparameters']
            if not noisep : #readNoiseParameters returns empty dict on failure
                print "ERROR reading noise parameters, check .err file:", errfile
            #redundant with below?
            print "Using noise parameters from "+errfile+":\n" + " ".join(["-"+str(k)+" "+str(v) for k,v in noisep.iteritems()])+"\n"

        #some code from SampleCharModule to load args into noise0
        infoReport="Loaded noise parameters:\n"
        klist = ["FP", "FN", "sf", "sd", "sr", "bpp", "readparameters"] #hardcoding parameters is kind of bad, but it fixes the order without using OrderedDict.
        #noiseargs = self.varsP.argsListed('noise0') #not necessary
        for v in klist :
            if not noisep.has_key(v) :
            util.LogStatus("parameter", "auto_"+v, param)
            varsP.replaceParam("noise0", "-"+v, param)
        varsP.updateInfoReport(infoReport + '\n', printalso=True)

    else :
        print "Getting file list from", qrypath
        outFileList = getOutFileList(util, qrypath)
        if not outFileList :
            print "ERROR: Query dir ("+qrypath+") does not contain alignmol data. Check -q argument."
        else :
            print "Found", len(outFileList), "alignment results"
    #end if runaligns

    amod = alignmod.AlignModule(varsP, doref, outdir, bnxfile) #constructor will call generateJobList

    if runaligns :
    else :
        amod.outFileList = outFileList
        p = os.path.split(outFileList[0])[1]
        if p.count("_") > 1 : #expect something like "EXP_REFINEFINAL1_4"
            #p = p[:p.rfind("_")+1] #remove integer suffix
            p = p[:p.rfind("_")] #remove integer suffix (and underscore)
        #else :
        #    p += "_" #because mrgstr is appended
        varsP.outputContigPrefix = p

    if not runaligns or len(amod.jobList) > 0 :

    if runaligns :
        #copy from Pipeline.py
        if util.SummarizeErrors(varsP=varsP)==0:
            varsP.updatePipeReport("Pipeline has successfully completed\n") 
            util.LogStatus("progress", "pipeline", "success")
            varsP.updatePipeReport("Pipeline has completed with errors\n") 
            util.LogStatus("progress", "pipeline", "failure")



    #in Pipeline, this is called first
    #print "Calling getAlignStats:" #but it won't work without varsP atm; skip it
    #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir)
    #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir)

    print "Calling mergeMap"
    print outFileList[0] #, "\n", outputdir #moved above
    util.logMemory(memory_log, startTime, "mergeMap_start")
    #mergeMap(self.varsP, self.outFileList, mergepath=self.outputdir) #varsP is optional
    alignmod.mergeMap(None, outFileList, outputdir) 
    util.logMemory(memory_log, startTime, "mergeMap_end")

    print "Calling mergeRcmaps"
    util.logMemory(memory_log, startTime, "mergeRcmaps_start")
    #mergeRcmaps(outFileList, outdir, varsP=None, splitByContig=None, stageName="alignmol") :
    alignmod.mergeRcmaps(outFileList, outputdir, splitByContig=True, stageName=outprefix) 
    util.logMemory(memory_log, startTime, "mergeRcmaps_end")

    print "Calling split_XMap_byContig" #split_XMapQcmap_byContig"
    util.logMemory(memory_log, startTime, "split_XMap_byContig_start")
    #xmapdict = alignmod.split_XMap_byContig(outFileList, outputdir, stageName=outprefix) #old
    xmapdict = alignmod.split_XMap_byContig_new(outFileList, outputdir, stageName=outprefix)
    util.logMemory(memory_log, startTime, "split_XMap_byContig_end")

    print "Calling split_Qcmap_byContig" 
    util.logMemory(memory_log, startTime, "split_Qcmap_byContig_start")
    #alignmod.split_Qcmap_byContig(outFileList, outputdir, xmapdict) #old
    alignmod.split_Qcmap_byContig_new(outFileList, outputdir, xmapdict, stageName=outprefix) #new: better performance
    util.logMemory(memory_log, startTime, "split_Qcmap_byContig_end")

    print "AlignMerge successfully completed"
def getAlignStats(varsP,
    '''Standalone fn for alignment statistics for both AlignModule and AlignRefModule.
    reflen should be in Mb. If mergepath supplied, put merged .err there.
    If bnxpath == None, assume varsP.sorted_file; otherwise, just report stats of this
    file and ignore outFileList.

    statonly = False  #bnx stats only
    skipbnx = False  #.err file processing only
    if bnxpath == None:
        if not varsP.sorted_file:  #for runAlignMol, this is empty: nothing to do in this case
            skipbnx = True
            bnxpath = varsP.sorted_file + ".bnx"  #set in PairwiseModule.sort_BNX even if bypassed, but needs suffix
    else:  #if bnxpath != None :
        statonly = True
    if not skipbnx and not util.checkFile(bnxpath):
            "Warning in AlignModule.getAlignStats: bnxpath supplied but not found: %s\n"
            % bnxpath)

    #find the minlen used for bnx_sort, which is a required arg set
    sortargs = []
    if varsP.argData.has_key('bnx_sort'):  #for runAlignMol.py
        sortargs = varsP.argsListed('bnx_sort')
    minlen = 0
    validminlen = False
    if "-minlen" in sortargs:
        minlen = sortargs[
            sortargs.index("-minlen") +
            1]  #next ele should be the len, if next ele isn't in list, the sort job will fail
        minlen = util.getIntFromString(
            minlen)  #returns None if can't cast to int
        if minlen:
            validminlen = True

    if not validminlen and bnxpath == None and sortargs:
            "Warning in AlignModule.getAlignStats: unable to obtain minlen from bnx_sort arguments; defaulting to 0\n"
    if bnxpath != None:  #if bnxpath, ignore minlen
        minlen = 0

    nmol = 0  #total n mol above minlen
    totlen = 0  #total mol len above minlen
    if util.checkFile(bnxpath):
        #the bnxfile class is very wasteful. replace with below
        #bnx = util.bnxfile(bnxpath, [minlen]) #second arg are minlen thresholds, just use one for now
        outstr = "Reading molecule stats from %s:\n" % bnxpath
        outstr += "Molecule Stats:\n"
        moldict = util.simpleBnxStats(bnxpath, minlen)
        nmol = moldict["nmol"]
        totlen = moldict["totlen"]
        #if isref : #this is the same for isref or not, but just print twice bc no easy way to tell if was printed previously
        outstr += "N mols: %i\n" % nmol
        outstr += ("Total len (Mb): %10.3f\n") % totlen
        outstr += ("Avg len (kb)  : %10.3f\n") % moldict["avglen"]
        outstr += ("Mol N50 (kb)  : %10.3f\n") % moldict["n50"]
        outstr += ("Lab (/100kb)  : %10.3f\n") % moldict["labdensity"]
        #    if reflen : #disable the "Genome Cov" line bc its redundant with Ref Cov below
        #        bnx.molstats[minlen].genomesizemb = 0
        #    outstr += str(bnx.molstats[minlen])
        #nmol = bnx.molstats[minlen].nmol
        #totlen = bnx.molstats[minlen].totlen

        if reflen:
            cov = totlen / reflen  #totlen is in Mb
            outstr += ("%-6s Cov (x): %10.3f\n") % ("Ref" if isref else
                                                    "Contig", cov)
        if isref or reflen or statonly:  #if neither, nothing to print
            varsP.updateInfoReport(outstr + "\n", printalso=True)
    elif not skipbnx:
            "Warning in AlignModule.getAlignStats: missing bnx path:" +
            bnxpath + "\n")

    if statonly:

    #lastly, load .xmaps and .errs from alignmol jobs and report on stats
    totmaplen = 0  #sum of lengths of mapped portions of all molecules, on reference
    totmapqrylen = 0  #sum of lengths of mapped portions of all molecules, on query
    totconf = 0  #sum of confidence of all alignments
    nalign = 0  #total number of alignments
    fplist = []  #lists for error rates
    fprlist = []
    fnlist = []
    bpplist = []
    nmaplist = []  #from .err
    gmaplist = []  #from .err
    llrmlist = []
    llrgmlist = []
    bppsdlist = []
    sflist = []
    sdlist = []
    srlist = []
    reslist = []
    resdlist = []
    header = ""
    err = None  #will be the alignParams object if any .err files are found
    mappref = ""
    if len(outFileList) > 0:
        mappref = getMergeFilename(
        )  #make function to unify with same convention in mergeMap
    for outpath in outFileList:  #these are file prefixes
        if util.checkFile(outpath + ".xmap"):
            xmap = mc.xmap(outpath + ".xmap")
            nalign += len(xmap.xmapLookup)
            totmaplen += xmap.getSumMappedRefLen()  #in kb
            totmapqrylen += xmap.getSumMappedQryLen()  #in kb
            totconf += sum([x.Confidence for x in xmap.xmapLookup.values()])
                "Warning in AlignModule.getAlignStats: missing xmap:" +
                outpath + ".xmap" + "\n")
        if util.checkFile(outpath + ".err"):
            err = mc.alignParams(outpath + ".err")
            if not header:
                header = err.header

    #nalign from xmap should be the same as goodmaps from .err
    sumgoodmaps = sum(gmaplist)
    if sumgoodmaps != nalign:
            "Warning in getAlignStats: n mol align differ in .err files (%i) and .xmaps (%i)\n"
            % (sumgoodmaps, nalign),
    if totmaplen or totconf or nalign:
        outstr = "Molecules Aligned to %s:\n" % ("Reference"
                                                 if isref else "Assembly")
        outstr += "N mol align       : %9i\n" % nalign
        outstr += "Mol fraction align: %13.3f\n" % (float(nalign) /
                                                    nmol if nmol else 0)
        outstr += "Tot align len (Mb): %11.1f\n" % (totmapqrylen / 1e3)  #Mb
        if reflen > 0:
            outstr += ("Effective Cov (x) : %13.3f\n") % (
                totmaplen / 1e3 / reflen)  #totlen is in kb
        outstr += "Avg align len (kb): %11.1f\n" % (totmapqrylen /
                                                    nalign if nalign else 0)
        outstr += "Fraction align len: %13.3f\n" % (
            totmapqrylen / 1e3 / totlen if totlen else 0
        )  #totmapqrylen is in kb, totlen is in mb
        outstr += "Tot confidence    : %11.1f\n" % totconf
        outstr += "Avg confidence    : %11.1f\n" % (totconf /
                                                    nalign if nalign else 0)
        varsP.updateInfoReport(outstr, printalso=True)
    avgfp = (sum(fplist) / len(fplist) if len(fplist) else 0)
    avgfpr = (sum(fprlist) / len(fprlist) if len(fprlist) else 0)
    avgfn = (sum(fnlist) / len(fnlist) if len(fnlist) else 0)
    avgbpp = (sum(bpplist) / len(bpplist) if len(bpplist) else 0)
    avgres = (sum(reslist) / len(reslist) if len(reslist) else 0)
    avgllr = (sum(llrmlist) / len(llrmlist) if len(llrmlist) else 0)
    avgllg = (sum(llrgmlist) / len(llrgmlist) if len(llrgmlist) else 0)
    avgbps = (sum(bppsdlist) / len(bppsdlist) if len(bppsdlist) else 0)
    avgsf = (sum(sflist) / len(sflist) if len(sflist) else 0)
    avgsd = (sum(sdlist) / len(sdlist) if len(sdlist) else 0)
    avgsr = (sum(srlist) / len(srlist) if len(srlist) else 0)
    avgrsd = (sum(resdlist) / len(resdlist) if len(resdlist) else 0)
    if avgfp or avgfn or avgbpp:
        outstr = "Avg FP(/100kb)    : %12.2f\n" % avgfp
        outstr += "Avg FP ratio      : %13.3f\n" % avgfpr
        outstr += "Avg FN ratio      : %13.3f\n" % avgfn
        outstr += "Avg bpp           : %11.1f\n" % avgbpp
        outstr += "Avg sf            : %13.3f\n" % avgsf
        outstr += "Avg sd            : %13.3f\n" % avgsd
        outstr += "Avg sr            : %13.3f\n" % avgsr
        varsP.updateInfoReport(outstr + "\n", printalso=True)
    if err and mergepath:  #have an error file (alignParams) object
        mrgstr = (varsP.alignMolvrefMergeName if varsP else "merge")
        outpath = os.path.join(mergepath, mappref + mrgstr + ".err")
        err.fp = avgfp
        err.fn = avgfn
        err.sf = avgsf
        err.sd = avgsd
        err.bpp = avgbpp
        err.res = avgres
        err.nmaps = sum(nmaplist)
        err.llrm = avgllr
        err.goodmaps = sumgoodmaps
        err.llrgm = avgllg
        err.bppsd = avgbps
        err.fprate = avgfpr
        err.sr = avgsr
        err.ressd = avgrsd
    def __init__(self, varsP):
        jobName = "reference_process"
        opta_section = "referenceSvdetect"
        default_mres = "2.9"
        mres = "-mres"
        self.varsP = varsP
        usedefault = False
        if self.varsP.argData.has_key(opta_section):  #check if in optargs
            opta = self.varsP.argsListed(opta_section)
            if not mres in opta:  #must have mres
                    "Warning in referenceProcess: " + mres +
                    " missing in optArguments section " + opta_section + "\n")
                usedefault = True
                "Warning in referenceProcess: optArguments section " +
                opta_section + " missing\n")
            usedefault = True
        if usedefault:
            opta = [mres, default_mres]

        mresstr = opta[opta.index(mres) +
                       1]  #get string for mres value for output name
        mresstr = mresstr.replace(".", "")

        if not util.checkDir(self.varsP.refFolder):
                "ERROR in referenceProcess: could not make output dir %s\n" %
            return None
        refpref = os.path.basename(
            self.varsP.ref[:self.varsP.ref.rfind(".")]) + "_res" + mresstr
        outarg = os.path.join(
            refpref)  #refFolder is new output folder for this job
        expectedResultFile = outarg + ".cmap"  #if ref is spots, is this spots?
        args = [
            self.varsP.RefAlignerBin, '-f', '-o', outarg, '-i', self.varsP.ref,
        ] + opta
        stdoutf = None
        if self.varsP.stdoutlog:
            args.extend(['-stdout', '-stderr'])
            stdoutf = outarg + ".stdout"
        args += ['-maxthreads', str(self.varsP.nThreads)]


        job = mthread.singleJob(args,

        util.LogStatus("progress", "stage_start", jobName)
        self.varsP.runJobs(self, "referenceProcess")
        if not self.allResultsFound(
        ):  #this is an error, but we'll continue processing without SV detect
            err = "ERROR in referenceProcess: job failed, disabling SV detect"
            self.varsP.updatePipeReport(err + "\n")
            util.LogError("error", err)
            #self.varsP.runSV = False #no need since this class is used in SVModule
            self.varsP.refDeresed = expectedResultFile  #store good result for SV detect
                "referenceProcess: using reference %s for svdetect\n" %
        util.LogStatus("progress", "stage_complete", jobName)
def mergeRcmaps(outFileList,
    """Given a list of file prefixes (outFileList), append "_r.cmap" to them, and merge them
    to outdir. Report to varsP if supplied, stdout if not.
    Also support outFileList is full paths (including "_r.cmap").
    If splitByContig < 1, output each contig separately, if == 1, only output single merged cmap,
    and if > 1, do both.
    Always use stagename if supplied; if not, must supply varsP otherwise prefix is empty.

    if not util.checkDir(outdir):
        err_msg = "Warning in AlignModule.mergeRcmaps: could not make outdir %s, skipping copy number" % outdir
        logOrPrintError(err_msg, varsP)

    if not outFileList:  #just an argument check--check for presence on disk is below
        err_msg = "Warning in AlignModule.mergeRcmaps: no maps supplied"
        logOrPrintError(err_msg, varsP)

    )  #for reproducibility with runAlignMerge.py (different order when listing dir)
    rsuf = "_r.cmap"
    #mappref = os.path.split(outFileList[0])[1] #this is just prefix, but with integer suffix--get it before -- no longer used
    #mappref = mappref[:mappref.rfind("_")+1] #remove integer suffix
    #even though outFileList should all be there, a job may have failed--check all, just existence
    present = []
    for outf in outFileList:
        target = (outf + rsuf if not outf.endswith(rsuf) else outf
                  )  #now support either
        if not util.checkFile(target):
            err_msg = "Warning in AlignModule.mergeRcmaps: missing _r.cmap %s" % target
            logOrPrintError(err_msg, varsP)
    if not present:  #no _r.cmaps found (this will also happen for empty outFileList)
        err_msg = "Warning in AlignModule.mergeRcmaps: no _r.cmaps found, skipping copy number"
        logOrPrintError(err_msg, varsP)
    outFileList = present  #yes, it's redundant, but now have rsuf appended

    mrgstr = (varsP.alignMolvrefMergeName if varsP else "merge")
    #mergedmappath = os.path.join(outdir, mappref+mrgstr+rsuf) #this is output merged _r.cmap -- unify with filepref

    mergedmap = mc.multiCmap(outFileList[0])  #open original, edit in memory
    #now add other maps
    for rmap in outFileList[1:]:  #don't add map 0 to itself
        if mergedmap.addCovOcc(mc.multiCmap(
                rmap)):  #when calling addCovOcc, check return, warn if True
            err_msg = "Warning in AlignModule.mergeRcmaps: addCovOcc call failed for map %s" % rmap
            logOrPrintError(err_msg, varsP)
    #now it's merged, but the resulting map need to be written back to disk
    filepref = (
        varsP.outputContigPrefix if varsP and stageName == "" else stageName
    )  #see split_XMapQcmap_byContig
    if splitByContig < 1 or splitByContig > 1:
        #print "\nself.varsP.outputContigPrefix", self.varsP.outputContigPrefix, "\n" #debug
        #filepref = (varsP.outputContigPrefix if varsP else stageName) #same as line in split_XMapQcmap_byContig
                                                  filepref + '_contig'),
        report = "mergeRcmaps: wrote %i cmaps" % len(mergedmap.cmapdict)
    if splitByContig > 0:
                         filepref + "_" + mrgstr + rsuf))  #was mergedmappath
        report = "mergeRcmaps: wrote merged cmap with %i contigs" % len(
    #report result
    logOrPrintError(report, varsP, warn=False)
def getAlignStats(varsP, outFileList, reflen=0, isref=False, mergepath="", bnxpath=None) :
    '''Standalone fn for alignment statistics for both AlignModule and AlignRefModule.
    reflen should be in Mb. If mergepath supplied, put merged .err there.
    If bnxpath == None, assume varsP.sorted_file; otherwise, just report stats of this
    file and ignore outFileList.

    statonly = False #bnx stats only
    skipbnx = False #.err file processing only
    if bnxpath == None :
        if not varsP.sorted_file : #for runAlignMol, this is empty: nothing to do in this case
            skipbnx = True
        else :
            bnxpath = varsP.sorted_file+".bnx" #set in PairwiseModule.sort_BNX even if bypassed, but needs suffix
    else : #if bnxpath != None :
        statonly = True
    if not skipbnx and not util.checkFile(bnxpath) :
        varsP.updatePipeReport("Warning in AlignModule.getAlignStats: bnxpath supplied but not found: %s\n" % bnxpath)

    #find the minlen used for bnx_sort, which is a required arg set
    sortargs = []
    if varsP.argData.has_key('bnx_sort') : #for runAlignMol.py
        sortargs = varsP.argsListed('bnx_sort')
    minlen = 0
    validminlen = False
    if "-minlen" in sortargs :
        minlen = sortargs[sortargs.index("-minlen")+1] #next ele should be the len, if next ele isn't in list, the sort job will fail
        minlen = util.getIntFromString(minlen) #returns None if can't cast to int
        if minlen :
            validminlen = True

    if not validminlen and bnxpath == None and sortargs :
        varsP.updatePipeReport("Warning in AlignModule.getAlignStats: unable to obtain minlen from bnx_sort arguments; defaulting to 0\n")
    if bnxpath != None : #if bnxpath, ignore minlen
        minlen = 0

    nmol = 0 #total n mol above minlen
    totlen = 0 #total mol len above minlen
    if util.checkFile(bnxpath) :
        #the bnxfile class is very wasteful. replace with below
        #bnx = util.bnxfile(bnxpath, [minlen]) #second arg are minlen thresholds, just use one for now
        outstr = "Reading molecule stats from %s:\n" % bnxpath
        outstr += "Molecule Stats:\n"
        moldict = util.simpleBnxStats(bnxpath, minlen)
        nmol = moldict["nmol"]
        totlen = moldict["totlen"]
        #if isref : #this is the same for isref or not, but just print twice bc no easy way to tell if was printed previously
        outstr += "N mols: %i\n" % nmol
        outstr += ("Total len (Mb): %10.3f\n") % totlen
        outstr += ("Avg len (kb)  : %10.3f\n") % moldict["avglen"]
        outstr += ("Mol N50 (kb)  : %10.3f\n") % moldict["n50"]
        outstr += ("Lab (/100kb)  : %10.3f\n") % moldict["labdensity"]
        #    if reflen : #disable the "Genome Cov" line bc its redundant with Ref Cov below
        #        bnx.molstats[minlen].genomesizemb = 0 
        #    outstr += str(bnx.molstats[minlen]) 
        #nmol = bnx.molstats[minlen].nmol
        #totlen = bnx.molstats[minlen].totlen

        if reflen : 
            cov = totlen / reflen #totlen is in Mb
            outstr += ("%-6s Cov (x): %10.3f\n") % ("Ref" if isref else "Contig", cov)
        if isref or reflen or statonly : #if neither, nothing to print
            varsP.updateInfoReport(outstr + "\n", printalso=True)
    elif not skipbnx :
        varsP.updatePipeReport("Warning in AlignModule.getAlignStats: missing bnx path:"+bnxpath+"\n")

    if statonly :

    #lastly, load .xmaps and .errs from alignmol jobs and report on stats
    totmaplen = 0 #sum of lengths of mapped portions of all molecules, on reference
    totmapqrylen = 0 #sum of lengths of mapped portions of all molecules, on query
    totconf = 0 #sum of confidence of all alignments
    nalign = 0 #total number of alignments
    fplist = [] #lists for error rates
    fprlist = []
    fnlist = []
    bpplist = []
    nmaplist = [] #from .err
    gmaplist = [] #from .err
    llrmlist  = []; llrgmlist = []; bppsdlist = []
    sflist = []; sdlist = []; srlist = []; reslist = []; resdlist = []
    header = ""
    err = None #will be the alignParams object if any .err files are found
    mappref = ""
    if len(outFileList) > 0 :
        mappref = getMergeFilename(outFileList[0]) #make function to unify with same convention in mergeMap
    for outpath in outFileList : #these are file prefixes
        if util.checkFile(outpath+".xmap") :
            xmap = mc.xmap(outpath+".xmap")
            nalign += len(xmap.xmapLookup)
            totmaplen += xmap.getSumMappedRefLen() #in kb
            totmapqrylen += xmap.getSumMappedQryLen() #in kb
            totconf += sum([x.Confidence for x in xmap.xmapLookup.values()])
        else :
            varsP.updatePipeReport("Warning in AlignModule.getAlignStats: missing xmap:"+outpath+".xmap"+"\n")
        if util.checkFile(outpath+".err") :
            err = mc.alignParams(outpath+".err")
            if not header :
                header = err.header

    #nalign from xmap should be the same as goodmaps from .err
    sumgoodmaps = sum(gmaplist)
    if sumgoodmaps != nalign :
        varsP.updateInfoReport("Warning in getAlignStats: n mol align differ in .err files (%i) and .xmaps (%i)\n" % (sumgoodmaps, nalign), printalso=True)
    if totmaplen or totconf or nalign : 
        outstr =  "Molecules Aligned to %s:\n" % ("Reference" if isref else "Assembly")
        outstr += "N mol align       : %9i\n" % nalign
        outstr += "Mol fraction align: %13.3f\n" % (float(nalign)/nmol if nmol else 0)
        outstr += "Tot align len (Mb): %11.1f\n" % (totmapqrylen / 1e3) #Mb
        if reflen > 0 : 
            outstr += ("Effective Cov (x) : %13.3f\n") % (totmaplen / 1e3 / reflen) #totlen is in kb
        outstr += "Avg align len (kb): %11.1f\n" % (totmapqrylen/nalign if nalign else 0)
        outstr += "Fraction align len: %13.3f\n" % (totmapqrylen/1e3/totlen if totlen else 0) #totmapqrylen is in kb, totlen is in mb
        outstr += "Tot confidence    : %11.1f\n" % totconf
        outstr += "Avg confidence    : %11.1f\n" % (totconf/nalign if nalign else 0)
        varsP.updateInfoReport(outstr, printalso=True)
    avgfp  = (sum(fplist)/len(fplist)   if len(fplist) else 0)
    avgfpr = (sum(fprlist)/len(fprlist) if len(fprlist) else 0)
    avgfn  = (sum(fnlist)/len(fnlist)   if len(fnlist) else 0)
    avgbpp = (sum(bpplist)/len(bpplist) if len(bpplist) else 0)
    avgres = (sum(reslist)/len(reslist) if len(reslist) else 0)
    avgllr = (sum(llrmlist)/len(llrmlist) if len(llrmlist) else 0)
    avgllg = (sum(llrgmlist)/len(llrgmlist) if len(llrgmlist) else 0)
    avgbps = (sum(bppsdlist)/len(bppsdlist) if len(bppsdlist) else 0)
    avgsf  = (sum(sflist)/len(sflist) if len(sflist) else 0)
    avgsd  = (sum(sdlist)/len(sdlist) if len(sdlist) else 0)
    avgsr  = (sum(srlist)/len(srlist) if len(srlist) else 0)
    avgrsd = (sum(resdlist)/len(resdlist) if len(resdlist) else 0)
    if avgfp or avgfn or avgbpp :
        outstr =  "Avg FP(/100kb)    : %12.2f\n" % avgfp
        outstr += "Avg FP ratio      : %13.3f\n" % avgfpr
        outstr += "Avg FN ratio      : %13.3f\n" % avgfn
        outstr += "Avg bpp           : %11.1f\n" % avgbpp
        outstr += "Avg sf            : %13.3f\n" % avgsf
        outstr += "Avg sd            : %13.3f\n" % avgsd
        outstr += "Avg sr            : %13.3f\n" % avgsr
        varsP.updateInfoReport(outstr + "\n", printalso=True)
    if err and mergepath : #have an error file (alignParams) object
        mrgstr = (varsP.alignMolvrefMergeName if varsP else "merge")
        outpath = os.path.join(mergepath, mappref+mrgstr+".err")
        err.fp = avgfp
        err.fn = avgfn
        err.sf = avgsf
        err.sd = avgsd
        err.bpp = avgbpp
        err.res = avgres
        err.nmaps = sum(nmaplist)
        err.llrm  = avgllr
        err.goodmaps = sumgoodmaps
        err.llrgm = avgllg
        err.bppsd = avgbps
        err.fprate = avgfpr
        err.sr = avgsr
        err.ressd = avgrsd
def runSV(cwd, rabin, refcmap, contigdir, contigbase, runaligns, xmappath,
          optargs, nthreads, maxthreads, bedfile, errfile, outdir, errbinfile,
          clustargs, groupsv):
    '''Load Pipeline files from first arg; configure CharacterizeModule; run alignments if runaligns;
    report on those alignments or the xmap provided as xmappath.

    printargs = True

    if not os.path.isfile(os.path.join(cwd, "utilities.py")):
        print "utilities.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    import utilities as util

    if not util.checkFile(os.path.join(cwd, "Pipeline.py")):
        print "Pipeline.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    import Pipeline

    if not util.checkFile(os.path.join(cwd, "SVModule.py")):
        print "SVModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    import SVModule as svm

    if errfile and not util.checkFile(os.path.join(cwd,
        print "SampleCharModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    elif errfile:
        import SampleCharModule as scm

    #use Pipeline objects

    varsP = Pipeline.varsPipeline()

    varsP.optArgumentsFileIn = optargs
    varsP.RefAlignerBin = rabin
    varsP.latestMergedCmap = os.path.join(
        contigdir, contigbase + ".cmap")  #file suffix required to be .cmap
    varsP.contigFolder = os.path.split(contigdir)[0]
    varsP.nThreads = nthreads  #necessary otherwise job won't start -- max threads per node
    varsP.maxthreads = maxthreads  #threads per job
    varsP.ref = refcmap
    varsP.stdoutlog = True  #enable -stdout -stderr args to RefAligner
    varsP.curCharacterizeCmaps = [varsP.latestMergedCmap]
    varsP.contigSubDirectories = True  #needed for prepareContigIO
    varsP.doAlignMolvRef = False  #do not look for copy number
    varsP.groupSV = groupsv  #mimic Pipeline behavior: group or not

    if runaligns:
        #varsP.contigAlignTarget = outdir
        varsP.runSV = False
        varsP.groupContigs = False
        varsP.stdoutlog = True  #use -stdout -stderr
        varsP.stageComplete = contigbase
        varsP.outputContigPrefix = getContigPrefix(
            util, contigdir
        )  #if outdir is not supplied, this is used as dir prefix; also used as file pref for -o arg
        varsP.outputContigFolder = contigdir  #cmaps are copied from here

        if not outdir:
            outdir = contigdir + "_sv"  #this will be outdir of sv jobs
        if os.path.isdir(outdir):
            if not util.checkDir(outdir):  #check writeable
                print "\nERROR: Output dir is not writeable:\n", outdir, "\n"
            elif outdir == contigdir:
                print "\nERROR: Output dir cannot be same as input dir:\n", outdir, "\n"
            print "\nWARNING: Output dir already exists, results will be overwritten:\n", outdir, "\n"
        elif not util.checkDir(
        ):  #does not exist, make, if False, can't make or not writeable
            print "\nERROR: Output dir cannot be created or is not writeable:\n", outdir, "\n"

        if clustargs:
            #os.putenv('SGE_ROOT', '/var/lib/gridengine') #do I want this??? NO! It could very well be wrong.
            varsP.onCluster = True
            varsP.checkCluster()  #call varsPipeline method to check SGE_ROOT
            #note: before, above default is wrong. Now, there is no default--user is required to set environment variable; but this is consistent with the Pipeline
            varsP.clusterLogDir = os.path.join(outdir, 'ClusterLogs')
            util.checkDir(varsP.clusterLogDir)  #make it
            varsP.clusterArgumentsFileIn = clustargs  #required for parseArguments
            if varsP.error:
                print varsP.message
            varsP.RefAlignerBin += "${BINARY_SUFFIX:=}"  #copy from varsPipeline, handled by external script on phi host

        varsP.pipeReportFile = os.path.join(outdir, "sv_jobs_log.txt")
        varsP.infoReportFile = os.path.join(outdir, "sv_log.txt")
        varsP.memoryLogpath = os.path.join(outdir, "memory_log.txt")
        if bedfile:
            varsP.bedFile = bedfile
        util.InitStatus(os.path.join(outdir, "status.xml"))
        varsP.parseArguments()  #parses optArgumentsFile
        varsP.RefAlignerBinOrig = rabin
        )  #general information in log -- needed for refaligner_version
        if printargs:
            print "\nRunning SV detection with arguments (" + os.path.split(
                optargs)[1] + "):\n" + " ".join(
                    varsP.argsListed('svdetect')) + '\n'

        noisep = {}
        if errbinfile:
            noisep = {"readparameters": errbinfile}
            print "Using noise parameters from " + errbinfile + "\n"
        elif errfile:
            noisep = scm.readNoiseParameters(errfile.replace(".err", ""))
            if noisep.has_key(
            ):  #remove this because it's redundant, and it can cause problems with RefAligner compatibility
                del noisep['readparameters']
            if not noisep:  #readNoiseParameters returns empty dict on failure
                print "ERROR reading noise parameters, check .err file:", errfile
            print "Using noise parameters from " + errfile + ":\n" + " ".join(
                ["-" + str(k) + " " + str(v)
                 for k, v in noisep.iteritems()]) + "\n"

        varsP.outputContigFolder = contigdir  #cmaps are copied from here

        #make merged cmap to replace merged _q.cmap if not produced by RefAligner
        cmaps = util.getListOfFilesFromDir(varsP.outputContigFolder,
        if len(cmaps) > 1:
            varsP.contigPathTxtFile = os.path.join(
                "contig_list.txt")  #mergeIntoSingleCmap creates this file
            print "Creating merged cmap"
            print "Merged cmap created:", varsP.latestMergedCmap, "\n"
            if varsP.groupSV == 0:  #if it is a single job, use merged map just created
                varsP.outputContigFolder = outdir  #input == output
                #print "varsP.outputContigFolder =", varsP.outputContigFolder #debug
        elif len(cmaps) == 1:
            varsP.latestMergedCmap = cmaps[0]
        else:  #this is already checked in getContigPrefix (redundant)
            print "No cmaps found in input dir; check dir %s\n" % varsP.outputContigFolder

        svmodule = svm.SVdetect(varsP, noisep, outdir, skipderes=True)
        #this got duplicated above
        #if hasattr(util, "InitStatus") : #if old version, skip -- do this after SVdetect.__init__ bc makes outdir
        #    util.InitStatus(os.path.join(outdir, "status.xml")) #needed otherwise call to status_log fails

        varsP.contigAlignTarget = contigdir  #this is dir in which _q and _r cmaps must be located
        print "ERROR: feature not supported"  #not implemented to not run jobs
def getArgs():
    parser = argparse.ArgumentParser(description=description)

        help='Path to RefAligner or dir containing it (required)',
        help='Path to reference maps (.cmap), 1 file only (required)',
        help='Path to dir containing query maps (.cmaps) (required)',
    #parser.add_argument('-x', dest='xmap', help='Path to .xmap, 1 file only (optional, if specified, no alignment is done, if not specified, -t, -r, and -q must be specified)') #not supported
        'output dir (optional, defaults to input map dir with suffix "_sv")',
        'Pipeline dir (optional, defaults to script dir, or current directory)',
        'Path to optArguments.xml (optional, default optArguments_human.xml in Pipeline dir if found, otherwise required)',
        help='Total number of threads (cores) to use (optional, default 4)',
        'Threads per Job, -maxthreads (non-cluster only;optional, default 4)',
        '.bed file with gaps in reference for flagging SVs which overlap N-base gaps (optional)',
        '.err file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)',
        '.errbin file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)',
        'Run on cluster, read XML file for submission arguments (optional--will not use cluster submission if absent)',
        'SV jobs configuration: 0 = single job (required for correct haplotype calls), 1 = single job per contig (not recommended), 2 = grouped (default 0; optional)',
    #parser.add_argument('-s', help='Disable grouping of SV jobs (default grouped; optional)', dest='groupsv', action='store_false') #old one
    result = parser.parse_args()

    #check all Pipeline dependencies
    if result.pipelineDir:
        cwd = result.pipelineDir
        cwd = os.path.split(
            os.path.realpath(__file__))[0]  #this is path of this script
        if not os.path.isfile(os.path.join(
                "utilities.py")):  #if still not here, last try is actual cwd
            cwd = os.getcwd()  #still check this below

    #this is the only one imported here and in runCharacterize
    if not os.path.isfile(os.path.join(cwd, "utilities.py")):
        print "utilities.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    import utilities as util

    #xmap -- don't use this
    runaligns = True  #default is to run the alignment
    xmappath = None
    #if result.xmap :
    #    xmappath = result.xmap
    #    if not util.checkFile(xmappath, ".xmap") :
    #        print "Xmap path is supplied ("+xmappath+") but not found or doesn't end in .xmap."
    #        sys.exit(1)
    #    runaligns = False

    #RefAligner -- check for either path to RefAligner, or dir containing it, depending on cluster args
    rabin = result.RefAligner
    #replicate Pipeline behavior: RefAligner is always required
    if os.path.isdir(rabin):
        rabin = os.path.join(rabin, "RefAligner")
    if not util.checkExecutable(rabin):
        print "RefAligner not found or not executable at", rabin, "\nPlease supply RefAligner dir or full path as -t arg."

    #reference maps -- only required if xmap not specified
    refcmap = os.path.realpath(result.referenceMap)
    if runaligns and not util.checkFile(
            refcmap, ".cmap"):  #and not util.checkFile(refcmap, ".spots") :
        print "Reference map file (" + refcmap + ") not found or does not end in .cmap or .spots. Check -r argument."

    #query maps
    qrypath = os.path.realpath(result.queryDir)
    #if runaligns and not util.checkFile(qrypath, ".cmap") :
    #    print "Query map file ("+qrypath+") not found or does not end in .cmap or .spots. Check -q argument."
    #    sys.exit(1)
    if not util.checkDir(qrypath, checkWritable=False,
                         makeIfNotExist=False):  #does NOT have to be writeable
        print "Query dir (" + qrypath + ") not found or not a dir. Check -q argument."
    if runaligns:
        contigdir = qrypath  #os.path.split(qrypath)[0] #dir of query maps
        contigbase = os.path.split(qrypath)[1]  #filename
        contigdir = os.path.split(xmappath)[0]
        contigbase = os.path.split(xmappath)[1]  #filename
    #contigbase = contigbase[:contigbase.find(".")] #remove suffix

    #optargs file
    optargs = None
    if result.optArguments:  #supplied on command line
        optargs = result.optArguments
        if not util.checkFile(optargs, ".xml"):
            print "optArguments path is supplied (" + optargs + ") but not found or doesn't end in .xml, check -a argument."
    elif runaligns:  #load from Pipeline dir if running alignments
        optargs = os.path.join(cwd, "optArguments_human.xml")
        if not util.checkFile(optargs):
            print "optArguments.xml missing in Pipeline directory (" + cwd + "). Try supplying path explicitly using -a."

    #cluster args
    clustargs = None
    if result.cxml:
        clustargs = os.path.realpath(result.cxml)
        if not util.checkFile(clustargs, ".xml"):
            print "clusterArguments path is supplied (" + clustargs + ") but not found or doesn't end in .xml, check -C argument."

    nthreads = result.numThreads
    if nthreads <= 0:
        print "Number of threads value invalid (must be > 0): %i" % nthreads

    maxthreads = result.maxthreads
    if maxthreads <= 0:
        print "Max threads value invalid (must be > 0): %i" % maxthreads

    #bed file
    bedfile = result.bedFile  #must make local for return statement below
    if bedfile:  #must check for empty string BEFORE you do realpath, or it returns cwd
        bedfile = os.path.realpath(result.bedFile)
        if not util.checkFile(bedfile, ".bed"):
            print "bed file supplied but not found or incorrect suffix:", bedfile

    #.errbin file
    errbinfile = result.errbinFile
    if errbinfile:
        errbinfile = os.path.realpath(result.errbinFile)
        if not util.checkFile(errbinfile, ".errbin"):
            print "errbin file supplied but not found or incorrect suffix:", errbinfile

    #.err file
    errfile = result.errFile
    if errfile and errbinfile:
        print "Warning: .err and .errbin arguments supplied; ignoring .err file"
        errfile = ""
    elif errfile:
        errfile = os.path.realpath(result.errFile)
        if not util.checkFile(errfile, ".err"):
            print "err file supplied but not found or incorrect suffix:", errfile

    outdir = os.path.realpath(result.outputDir)

    groupsv = result.groupsv
    if groupsv < 0 or groupsv > 2:
        print 'ERROR: -s (grouped SV) must be 0, 1, or 2\n'

    #yes, this is messy...but I don't want another class (besides varsPipeline) and they just go to runCharacterize
    return cwd, rabin, refcmap, contigdir, contigbase, runaligns, xmappath, optargs, nthreads, maxthreads, bedfile, errfile, outdir, errbinfile, clustargs, groupsv
def runAlignMol():
    parser = argparse.ArgumentParser(description=description)

        'Path to merged cmap to align molecules (-b) to OR alignmol dir from Pipeline for merge (if latter, no alignments are performed), required',
        help='Input molecule (.bnx) file, required if aligning molecules',
    #parser.add_argument('-b', dest='bnx', help='Input molecule (.bnx) file OR path to dir containing split bnx pieces, required if aligning molecules', type=str) #I should add the split feature; for now, just do single bnx
        'Path to optArguments.xml (optional, default optArguments_human.xml in Pipeline dir if found, otherwise required)',
        'If this flag is used, alignmolvref arguments are used, otherwise alignmol arguments are used (default alignmol; optional)',
        'output dir (optional, defaults to sub-dir of input map dir called "alignmol")',
        help='Path to RefAligner or dir containing it (required)',
        help='Total number of threads (cores) to use (optional, default 4)',
        'Threads per Job, -maxthreads (non-cluster only;optional, default 4)',
        '.err file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise',
        '.errbin file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise',
        'Pipeline dir (optional, defaults to script dir, or current directory)',
    result = parser.parse_args()

    outprefix = "exp_refineFinal1"  #this is the default; assume for now

    #check all Pipeline dependencies
    if result.pipelineDir:
        cwd = result.pipelineDir
        cwd = os.path.split(
            os.path.realpath(__file__))[0]  #this is path of this script
        if not os.path.isfile(os.path.join(
                "utilities.py")):  #if still not here, last try is actual cwd
            cwd = os.getcwd()  #still check this below

    #this is the only one imported here and in runCharacterize
    if not os.path.isfile(os.path.join(cwd, "utilities.py")):
        print "ERROR: utilities.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    import utilities as util

    if not os.path.isfile(os.path.join(cwd, "AlignModule.py")):
        print "ERROR: AlignModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    import AlignModule as alignmod

    if not util.checkFile(os.path.join(cwd, "Pipeline.py")):
        print "ERROR: Pipeline.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    import Pipeline

    if not util.checkFile(os.path.join(cwd, "mapClasses.py")):
        print "ERROR: mapClasses.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    import mapClasses as mc

    #input dir
    if not result.queryDir:
        print "ERROR: Query (-q) argument not supplied."
    qrypath = os.path.realpath(result.queryDir)
    if util.checkDir(
            qrypath, checkWritable=False,
            makeIfNotExist=False):  #output elsewhere so not writeable is ok
        runaligns = False
    elif util.checkCmap(qrypath):
        runaligns = True
        print "ERROR: Query argument (" + qrypath + ") not found or not a dir or cmap. Check -q argument."

    #this check isn't really necessary...make it a warning -- left over from runAlignMerge.py
    #if not os.path.split(qrypath)[1].endswith("alignmol") :
    #    print "Warning: Query dir ("+qrypath+") does not end with 'alignmol'; please be sure this is a Pipeline alignmol dir\n"

    #RefAligner -- check for either path to RefAligner, or dir containing it, depending on cluster args
    rabin = ""  #need empty string for generateJobList even though no jobs are run
    if runaligns:
        rabin = result.RefAligner
        #replicate Pipeline behavior: RefAligner is always required
        if os.path.isdir(rabin):
            rabin = os.path.join(rabin, "RefAligner")
        if not util.checkExecutable(rabin):
            print "ERROR: RefAligner not found or not executable at", rabin, "\nPlease supply RefAligner dir or full path as -t arg."

    #optargs file
    optargs = None
    if runaligns and result.optArguments:  #supplied on command line
        optargs = result.optArguments
        if not util.checkFile(optargs, ".xml"):
            print "optArguments path is supplied (" + optargs + ") but not found or doesn't end in .xml, check -a argument."
    elif runaligns:  #load from Pipeline dir if running alignments
        optargs = os.path.join(cwd, "optArguments_human.xml")
        if not util.checkFile(optargs):
            print "optArguments.xml missing in Pipeline directory (" + cwd + "). Try supplying path explicitly using -a."

    #output dir
    if not result.outputDir:
        outdir = os.path.join(qrypath,
                              "merge")  #should be same as in AlignModule
        outdir = os.path.realpath(result.outputDir)
    if os.path.isdir(outdir):
        if not util.checkDir(outdir):  #check writeable
            print "\nERROR: Output dir is not writeable:\n", outdir, "\n"
        #this is ok here
        #elif outdir == contigdir :
        #    print "\nERROR: Output dir cannot be same as input dir:\n", outdir, "\n"
        #    sys.exit(1)
        print "\nWARNING: Output dir already exists, results will be overwritten:\n", outdir, "\n"
    elif not util.checkDir(
    ):  #does not exist, make, if False, can't make or not writeable
        print "\nERROR: Output dir cannot be created or is not writeable:\n", outdir, "\n"

    #bnx file
    bnxfile = result.bnx
    if bnxfile:  #must check for empty string BEFORE you do realpath, or it returns cwd
        bnxfile = os.path.realpath(bnxfile)
        if not util.checkFile(bnxfile, ".bnx"):
            print "ERROR: bnx file supplied but not found or incorrect suffix:", bnxfile
    elif runaligns:
        print "ERROR: bnx file not supplied but running alignments; please supply bnx file as -b argument"

    nthreads = result.numThreads
    if nthreads <= 0:
        print "ERROR: Number of threads value invalid (must be > 0): %i" % nthreads

    maxthreads = result.maxthreads
    if maxthreads <= 0:
        print "ERROR: Max threads value invalid (must be > 0): %i" % maxthreads
    elif nthreads < maxthreads:
        print "Warning: num threads (-T: %i) < max threads (-j: %i): increasing num threads to equal max threads\n" % (
            nthreads, maxthreads)
        nthreads = maxthreads

    #.errbin file
    errbinfile = result.errbinFile
    if errbinfile:
        errbinfile = os.path.realpath(result.errbinFile)
        if not util.checkFile(errbinfile, ".errbin"):
            print "ERROR: errbin file supplied but not found or incorrect suffix:", errbinfile

    #.err file
    errfile = result.errFile
    if errfile and errbinfile:
        print "Warning: .err and .errbin arguments supplied; ignoring .err file"
        errfile = ""
    elif errfile:
        errfile = os.path.realpath(result.errFile)
        if not util.checkFile(errfile, ".err"):
            print "err file supplied but not found or incorrect suffix:", errfile

    if errfile and not util.checkFile(os.path.join(cwd,
        print "SampleCharModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
    elif errfile:
        import SampleCharModule as scm

    doref = result.ref

    #DONE checking arguments

    print "Using output dir", outdir
    if runaligns:
        print "Aligning", bnxfile, "\nTo", qrypath, "\n"
        print "Merging", qrypath, "\n"

    startTime = time.time()  #time since Epoch
    memory_log = os.path.join(outdir, "memory_log.txt")

    varsP = Pipeline.varsPipeline()
    varsP.RefAlignerBin = rabin
    varsP.contigFolder = ""  #not used but needs to be an attr
    varsP.outputContigFolder = ""  #not used but needs to be a string attr
    varsP.pipeReportFile = os.path.join(outdir, "alignmol_jobs_log.txt")
    varsP.infoReportFile = os.path.join(outdir, "alignmol_log.txt")
    util.InitStatus(os.path.join(outdir, "status.xml"))

    if runaligns:
        varsP.optArgumentsFileIn = optargs
        varsP.latestMergedCmap = qrypath  #if !doref, need this one
        varsP.ref = qrypath  #and if doref, need this one
        varsP.nThreads = nthreads  #necessary otherwise job won't start -- max threads per node
        varsP.maxthreads = maxthreads  #threads per job
        p = os.path.split(qrypath)[1]
        varsP.outputContigPrefix = p[:p.rfind(".")]  #filename prefix
        varsP.stdoutlog = True  #use -stdout -stderr
        varsP.sorted_file = bnxfile[:bnxfile.rfind(
            ".")]  #enables the mol fraction align in AlignModule.getAlignStats
        if qrypath.endswith(".cmap"):  #enable the mol stats
            varsP.totAssemblyLenMb = mc.multiCmap(
                qrypath, lengthonly=True).totalLength / 1e6

        varsP.memoryLogpath = os.path.join(outdir, "memory_log.txt")
        varsP.parseArguments()  #parses optArgumentsFile
        varsP.RefAlignerBinOrig = rabin
        )  #general information in log -- needed for refaligner_version

        noisep = {}
        if errbinfile:
            noisep = {"readparameters": errbinfile}
            #print "Using noise parameters from "+errbinfile+"\n" #move below
        elif errfile:
            noisep = scm.readNoiseParameters(errfile.replace(".err", ""))
            if noisep.has_key(
            ):  #remove this because it's redundant, and it can cause problems with RefAligner compatibility
                del noisep['readparameters']
            if not noisep:  #readNoiseParameters returns empty dict on failure
                print "ERROR reading noise parameters, check .err file:", errfile
            #redundant with below?
            print "Using noise parameters from " + errfile + ":\n" + " ".join(
                ["-" + str(k) + " " + str(v)
                 for k, v in noisep.iteritems()]) + "\n"

        #some code from SampleCharModule to load args into noise0
        infoReport = "Loaded noise parameters:\n"
        klist = [
            "FP", "FN", "sf", "sd", "sr", "bpp", "readparameters"
        ]  #hardcoding parameters is kind of bad, but it fixes the order without using OrderedDict.
        #noiseargs = self.varsP.argsListed('noise0') #not necessary
        for v in klist:
            if not noisep.has_key(v):
            param = str(noisep[v])
            util.LogStatus("parameter", "auto_" + v, param)
            infoReport += v + ":" + param + "\n"
            varsP.replaceParam("noise0", "-" + v, param)
        varsP.updateInfoReport(infoReport + '\n', printalso=True)

        print "Getting file list from", qrypath
        outFileList = getOutFileList(util, qrypath)
        if not outFileList:
            print "ERROR: Query dir (" + qrypath + ") does not contain alignmol data. Check -q argument."
            print "Found", len(outFileList), "alignment results"
    #end if runaligns

    amod = alignmod.AlignModule(
        varsP, doref, outdir, bnxfile)  #constructor will call generateJobList

    if runaligns:
        amod.outFileList = outFileList
        p = os.path.split(outFileList[0])[1]
        if p.count("_") > 1:  #expect something like "EXP_REFINEFINAL1_4"
            #p = p[:p.rfind("_")+1] #remove integer suffix
            p = p[:p.rfind("_")]  #remove integer suffix (and underscore)
        #else :
        #    p += "_" #because mrgstr is appended
        varsP.outputContigPrefix = p

    if not runaligns or len(amod.jobList) > 0:

    if runaligns:
        #copy from Pipeline.py
        if util.SummarizeErrors(varsP=varsP) == 0:
            varsP.updatePipeReport("Pipeline has successfully completed\n")
            util.LogStatus("progress", "pipeline", "success")
            varsP.updatePipeReport("Pipeline has completed with errors\n")
            util.LogStatus("progress", "pipeline", "failure")



    #in Pipeline, this is called first
    #print "Calling getAlignStats:" #but it won't work without varsP atm; skip it
    #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir)
    #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir)

    print "Calling mergeMap"
    print outFileList[0]  #, "\n", outputdir #moved above
    util.logMemory(memory_log, startTime, "mergeMap_start")
    #mergeMap(self.varsP, self.outFileList, mergepath=self.outputdir) #varsP is optional
    alignmod.mergeMap(None, outFileList, outputdir)
    util.logMemory(memory_log, startTime, "mergeMap_end")

    print "Calling mergeRcmaps"
    util.logMemory(memory_log, startTime, "mergeRcmaps_start")
    #mergeRcmaps(outFileList, outdir, varsP=None, splitByContig=None, stageName="alignmol") :
    util.logMemory(memory_log, startTime, "mergeRcmaps_end")

    print "Calling split_XMap_byContig"  #split_XMapQcmap_byContig"
    util.logMemory(memory_log, startTime, "split_XMap_byContig_start")
    #xmapdict = alignmod.split_XMap_byContig(outFileList, outputdir, stageName=outprefix) #old
    xmapdict = alignmod.split_XMap_byContig_new(outFileList,
    util.logMemory(memory_log, startTime, "split_XMap_byContig_end")

    print "Calling split_Qcmap_byContig"
    util.logMemory(memory_log, startTime, "split_Qcmap_byContig_start")
    #alignmod.split_Qcmap_byContig(outFileList, outputdir, xmapdict) #old
        outFileList, outputdir, xmapdict,
        stageName=outprefix)  #new: better performance
    util.logMemory(memory_log, startTime, "split_Qcmap_byContig_end")

    print "AlignMerge successfully completed"
