Exemplo n.º 1
0
    def RegisterExistingTmpFile(self, thisPath, own=False, isDir=False):
        """Register an existing temporary file/directory if it exists.
           Input:
                thisPath: path of the temporary file/directory to register.
                own     : Whether this object owns this file.
                isDir   : True = directory, False = file.
           Output:
                the abosolute expanded path of the input
        """
        errMsg = ""
        thisPath = path.abspath(path.expanduser(thisPath))
        fileOrDir = "directory" if isDir else "file"

        if not isDir and not isExist(thisPath):
            errMsg = "Failed to register a directory as a file."

        if isDir and not path.isdir(thisPath):
            errMsg = "Failied to register a file as a directory."

        if self._isRegistered(thisPath):
            errMsg = "Failed to register {0} {1} as it has been registered.".\
                format(fileOrDir, thisPath)

        if not isExist(thisPath):
            errMsg = "Failed to register {0} {1} as it does not exist.".\
                format(fileOrDir, thisPath)

        if errMsg != "":
            logging.error(errMsg)
            raise IOError(errMsg)

        return self._RegisterTmpFile(TempFile(thisPath,
                                              own=own, isDir=isDir))
Exemplo n.º 2
0
    def RegisterExistingTmpFile(self, thisPath, own=False, isDir=False):
        """Register an existing temporary file/directory if it exists.
           Input:
                thisPath: path of the temporary file/directory to register.
                own     : Whether this object owns this file.
                isDir   : True = directory, False = file.
           Output:
                the abosolute expanded path of the input
        """
        errMsg = ""
        thisPath = path.abspath(path.expanduser(thisPath))
        fileOrDir = "directory" if isDir else "file"

        if not isDir and not isExist(thisPath):
            errMsg = "Failed to register a directory as a file."

        if isDir and not path.isdir(thisPath):
            errMsg = "Failied to register a file as a directory."

        if self._isRegistered(thisPath):
            errMsg = "Failed to register {0} {1} as it has been registered.".\
                format(fileOrDir, thisPath)

        if not isExist(thisPath):
            errMsg = "Failed to register {0} {1} as it does not exist.".\
                format(fileOrDir, thisPath)

        if errMsg != "":
            logging.error(errMsg)
            raise IOError(errMsg)

        return self._RegisterTmpFile(TempFile(thisPath,
                                              own=own, isDir=isDir))
Exemplo n.º 3
0
 def test_isExist(self):
     """Test isExist(ff)."""
     self.assertFalse(isExist(None))
     foo = os.path.join(self.outDir, 'foo')
     self.assertFalse(isExist(foo))
     open(foo, 'w').write('hello')
     assert os.path.exists(foo)
     self.assertTrue(isExist(foo))
     PATH = os.environ['PATH']
     os.environ['PATH'] = ''
     self.assertTrue(isExist(foo)) # test regression for missing 'ls'
     os.unlink(foo)
     self.assertFalse(isExist(foo))
     os.environ['PATH'] = PATH
Exemplo n.º 4
0
 def test_isExist(self):
     """Test isExist(ff)."""
     self.assertFalse(isExist(None))
     foo = os.path.join(self.outDir, 'foo')
     self.assertFalse(isExist(foo))
     open(foo, 'w').write('hello')
     assert os.path.exists(foo)
     self.assertTrue(isExist(foo))
     PATH = os.environ['PATH']
     os.environ['PATH'] = ''
     self.assertTrue(isExist(foo))  # test regression for missing 'ls'
     os.unlink(foo)
     self.assertFalse(isExist(foo))
     os.environ['PATH'] = PATH
Exemplo n.º 5
0
    def SetRootDir(self, rootDir):
        """ Set default root directory for temporary files. """
        changeRootDir = True
        if (rootDir != ""):
            rootDir = path.abspath(path.expanduser(rootDir))
            if path.isdir(rootDir):
                # self.dirDB.append(TempFile(rootDir, own=False, isDir=True))
                # In case a dir (such as /scratch) is specified, create
                # another layer of sub-dir, and use it as the real rootDir.
                rootDir = tempfile.mkdtemp(dir=rootDir)
                self.dirDB.append(TempFile(rootDir, own=True, isDir=True))
                changeRootDir = False
            elif not isExist(rootDir):
                # Make the user-specified temporary directory.
                try:
                    makedirs(rootDir)
                    self.dirDB.append(TempFile(rootDir, own=True, isDir=True))
                    changeRootDir = False
                except (IOError, OSError):
                    # If fail to make the user-specified temp dir,
                    # create a new temp dir using tempfile.mkdtemp
                    changeRootDir = True

        if changeRootDir:
            try:
                rootDir = tempfile.mkdtemp()
                self.dirDB.append(TempFile(rootDir, own=True, isDir=True))
            except (IOError, OSError):
                # If fail to make temp dir
                rootDir = ""

        self.defaultRootDir = rootDir
Exemplo n.º 6
0
    def SetRootDir(self, rootDir):
        """ Set default root directory for temporary files. """
        changeRootDir = True
        if (rootDir != ""):
            rootDir = path.abspath(path.expanduser(rootDir))
            if path.isdir(rootDir):
                # self.dirDB.append(TempFile(rootDir, own=False, isDir=True))
                # In case a dir (such as /scratch) is specified, create
                # another layer of sub-dir, and use it as the real rootDir.
                rootDir = tempfile.mkdtemp(dir=rootDir)
                self.dirDB.append(TempFile(rootDir, own=True, isDir=True))
                changeRootDir = False
            elif not isExist(rootDir):
                # Make the user-specified temporary directory.
                try:
                    makedirs(rootDir)
                    self.dirDB.append(TempFile(rootDir, own=True, isDir=True))
                    changeRootDir = False
                except (IOError, OSError):
                    # If fail to make the user-specified temp dir,
                    # create a new temp dir using tempfile.mkdtemp
                    changeRootDir = True

        if changeRootDir:
            try:
                rootDir = tempfile.mkdtemp()
                self.dirDB.append(TempFile(rootDir, own=True, isDir=True))
            except (IOError, OSError):
                # If fail to make temp dir
                rootDir = ""

        self.defaultRootDir = rootDir
Exemplo n.º 7
0
    def CleanUp(self, realDelete=True):
        """Deregister all temporary files and directories, and delete them from
        the file system if realDelete is True.
        """
        # Always clean up temp files first.
        while len(self.fileDB) > 0:
            obj = self.fileDB.pop()
            if realDelete and obj.own and isExist(obj.name):
                logging.debug("Remove a temporary file {0}".format(obj.name))
                remove(obj.name)

        # Then clean up temp dirs
        while len(self.dirDB) > 0:
            obj = self.dirDB.pop()
            if realDelete and obj.own and isExist(obj.name):
                logging.debug("Remove a temporary dir {0}".format(obj.name))
                # bug 25074, in some systems occationally there might be a NFS
                # lock error: "Device or resource busy, unable to delete
                # .nfsxxxxxx".
                # This is because although all temp files have been deleted,
                # nfs still takes a while to send back an ack for the rpc call.
                # In that case, wait a few seconds before deleting the temp
                # directory, and try this several times.
                # If the temporary dir could not be deleted anyway, print a
                # warning instead of exiting with an error.
                times = 0
                maxTry = 5
                while times < maxTry:
                    try:
                        shutil.rmtree(obj.name)
                        break
                    except (IOError, OSError):
                        times += 1
                        # wait 3 seconds
                        time.sleep(3)
                if times >= maxTry:
                    logging.warn("Unable to remove a temporary dir {0}".
                                 format(obj.name))

        self.defaultRootDir = ""
Exemplo n.º 8
0
    def CleanUp(self, realDelete=True):
        """Deregister all temporary files and directories, and delete them from
        the file system if realDelete is True.
        """
        # Always clean up temp files first.
        while len(self.fileDB) > 0:
            obj = self.fileDB.pop()
            if realDelete and obj.own and isExist(obj.name):
                logging.debug("Remove a temporary file {0}".format(obj.name))
                remove(obj.name)

        # Then clean up temp dirs
        while len(self.dirDB) > 0:
            obj = self.dirDB.pop()
            if realDelete and obj.own and isExist(obj.name):
                logging.debug("Remove a temporary dir {0}".format(obj.name))
                # bug 25074, in some systems occationally there might be a NFS
                # lock error: "Device or resource busy, unable to delete
                # .nfsxxxxxx".
                # This is because although all temp files have been deleted,
                # nfs still takes a while to send back an ack for the rpc call.
                # In that case, wait a few seconds before deleting the temp
                # directory, and try this several times.
                # If the temporary dir could not be deleted anyway, print a
                # warning instead of exiting with an error.
                times = 0
                maxTry = 5
                while times < maxTry:
                    try:
                        shutil.rmtree(obj.name)
                        break
                    except (IOError, OSError):
                        times += 1
                        # wait 3 seconds
                        time.sleep(3)
                if times >= maxTry:
                    logging.warn("Unable to remove a temporary dir {0}".
                                 format(obj.name))

        self.defaultRootDir = ""
Exemplo n.º 9
0
    def _toCmd(self, inSamFile, refFile, outSamFile,
               alnServiceName, scoreSign, options, adapterGffFile):
        """ Generate a samFilter command line from options.
            Input:
                inSamFile : the input SAM file
                refFile   : the reference FASTA file
                outSamFile: the output SAM file
                alnServiceName: aligner service name
                scoreSign : score sign, can be -1 or 1
                options   : argument options
            Output:
                a command-line string
        """
        cmdStr = self.progName + \
            " {inSamFile} {refFile} {outSamFile} ".format(
                inSamFile=inSamFile,
                refFile=refFile,
                outSamFile=outSamFile)

        if options.maxDivergence is not None:
            maxDivergence = int(options.maxDivergence if options.maxDivergence
                                > 1.0 else (options.maxDivergence * 100))
            cmdStr += " -minPctSimilarity {0}".format(100 - maxDivergence)

        if options.minAccuracy is not None:
            minAccuracy = int(options.minAccuracy if options.minAccuracy > 1.0
                              else (options.minAccuracy * 100))
            cmdStr += " -minAccuracy {0}".format(minAccuracy)

        if options.minLength is not None:
            cmdStr += " -minLength {0}".format(options.minLength)

        if options.seed is not None:
            cmdStr += " -seed {0}".format(options.seed)

        if scoreSign in [1, -1]:
            cmdStr += " -scoreSign {0}".format(scoreSign)
        else:
            logging.error("{0}'s score sign is neither 1 nor -1.".format(
                alnServiceName))

        if options.scoreCutoff is not None:
            cmdStr += " -scoreCutoff {0}".format(options.scoreCutoff)

        if options.hitPolicy is not None:
            cmdStr += " -hitPolicy {0}".format(options.hitPolicy)

        if options.filterAdapterOnly is True and \
            isExist(adapterGffFile):
            cmdStr += " -filterAdapterOnly {gffFile}".format(
                    gffFile=adapterGffFile)
        return cmdStr
Exemplo n.º 10
0
    def _gmapCreateDB(self, referenceFile, isWithinRepository, tempRootDir):
        """
        Create gmap database for reference sequences if no DB exists.
        Wait for gmap DB to be created if gmap_db.lock exists.
        return (gmap_DB_root_path, gmap_DB_name).
        """
        # Determine dbRoot according to whether the reference file is wihtin
        # a reference repository.
        if isWithinRepository:
            # If the reference file is within a reference repository, create
            # gmap_db under the root of the repository, then the gmap DB root
            # is the repo root, and gmap DB name is 'gmap_db', e.g.,
            # refrepo/
            # --------sequence/
            # --------gmap_db/
            # --------reference.info.xml
            dbRoot = path.split(path.dirname(referenceFile))[0]
            dbName = "gmap_db"
        else:  # Otherwise, create gmap_db under the tempRootDir, and give the
            # gmap DB a random name
            dbRoot = tempRootDir
            dbName = "gmap_db_{sfx}".format(sfx=randint(100000, 1000000))

        dbPath = path.join(dbRoot, dbName)
        dbLock = dbPath + ".lock"
        # Check if DB already exists
        if isExist(dbPath) and not isExist(dbLock):
            # gmap_db already exists
            logging.info(self.name + ": GMAP database {dbPath} found".format(dbPath=dbPath))
            return (dbRoot, dbName)

        # Check if DB is being created by other pbalign calls
        while isExist(dbLock):
            logging.info(
                self.name + ": Waiting for GMAP database to be " + "created for {inFa}".format(inFa=referenceFile)
            )
            sleep(10)

        # Create DB if it does not exist
        if not isExist(dbPath):
            # Touch the lock file
            _output, errCode, errMsg = backticks("touch {dbLock}".format(dbLock=dbLock))
            logging.debug(self.name + ": Create a lock when GMAP DB is " + "being built.")
            if errCode != 0:
                logging.error(self.name + ": Failed to create {dbLock}.\n" + errMsg)
                self._releaseLock(dbLock)
                raise RuntimeError(errMsg)

            logging.info(self.name + ": Create GMAP DB for {inFa}.".format(inFa=referenceFile))
            cmdStr = "gmap_build -k 12 --db={dbName} --dir={dbRoot} {inFa}".format(
                dbName=dbName, dbRoot=dbRoot, inFa=referenceFile
            )
            _output, errCode, errMsg = backticks(cmdStr)
            logging.debug(self.name + ": Call {cmdStr}".format(cmdStr=cmdStr))
            if errCode != 0:
                logging.error(self.name + ": Failed to build GMAP db.\n" + errMsg)
                self._releaseLock(dbLock)
                raise RuntimeError(errMsg)

            # Delete the lock file to notify others pbalign who are waiting
            # for this DB to be created.
            self._releaseLock(dbLock)

        return (dbRoot, dbName)
Exemplo n.º 11
0
    def _gmapCreateDB(self, referenceFile, isWithinRepository, tempRootDir):
        """
        Create gmap database for reference sequences if no DB exists.
        Wait for gmap DB to be created if gmap_db.lock exists.
        return (gmap_DB_root_path, gmap_DB_name).
        """
        # Determine dbRoot according to whether the reference file is wihtin
        # a reference repository.
        if isWithinRepository:
            # If the reference file is within a reference repository, create
            # gmap_db under the root of the repository, then the gmap DB root
            # is the repo root, and gmap DB name is 'gmap_db', e.g.,
            # refrepo/
            # --------sequence/
            # --------gmap_db/
            # --------reference.info.xml
            dbRoot = path.split(path.dirname(referenceFile))[0]
            dbName = "gmap_db"
        else:  # Otherwise, create gmap_db under the tempRootDir, and give the
            # gmap DB a random name
            dbRoot = tempRootDir
            dbName = "gmap_db_{sfx}".format(sfx=randint(100000, 1000000))

        dbPath = path.join(dbRoot, dbName)
        dbLock = dbPath + ".lock"
        # Check if DB already exists
        if isExist(dbPath) and not isExist(dbLock):
            # gmap_db already exists
            logging.info(self.name + ": GMAP database {dbPath} found".format(
                dbPath=dbPath))
            return (dbRoot, dbName)

        # Check if DB is being created by other pbalign calls
        while isExist(dbLock):
            logging.info(self.name + ": Waiting for GMAP database to be " + \
                         "created for {inFa}".format(inFa=referenceFile))
            sleep(10)

        # Create DB if it does not exist
        if not isExist(dbPath):
            # Touch the lock file
            _output, errCode, errMsg = backticks(
                "touch {dbLock}".format(dbLock=dbLock))
            logging.debug(self.name + ": Create a lock when GMAP DB is " +
                          "being built.")
            if (errCode != 0):
                logging.error(self.name + ": Failed to create {dbLock}.\n" +
                              errMsg)
                self._releaseLock(dbLock)
                raise RuntimeError(errMsg)

            logging.info(self.name + ": Create GMAP DB for {inFa}.".format(
                inFa=referenceFile))
            cmdStr = "gmap_build -k 12 --db={dbName} --dir={dbRoot} {inFa}".\
                format(dbName=dbName, dbRoot=dbRoot, inFa=referenceFile)
            _output, errCode, errMsg = backticks(cmdStr)
            logging.debug(self.name + ": Call {cmdStr}".format(cmdStr=cmdStr))
            if (errCode != 0):
                logging.error(self.name + ": Failed to build GMAP db.\n" +
                              errMsg)
                self._releaseLock(dbLock)
                raise RuntimeError(errMsg)

            # Delete the lock file to notify others pbalign who are waiting
            # for this DB to be created.
            self._releaseLock(dbLock)

        return (dbRoot, dbName)
Exemplo n.º 12
0
 def test_isExist(self):
     """Test isExist(ff)."""
     self.assertFalse(isExist(None))
Exemplo n.º 13
0
 def test_isExist(self):
     """Test isExist(ff)."""
     self.assertFalse(isExist(None))
Exemplo n.º 14
0
    def _toCmd(self, inSamFile, refFile, outSamFile,
            alignerName, scoreSign, options, adapterGffFile):
        """ Generate a samFilter command line from options.
            Input:
                inSamFile : the input SAM file
                refFile   : the reference FASTA file
                outSamFile: the output SAM file
                alignerName: aligner service name
                scoreSign : score sign, can be -1 or 1
                options   : argument options
            Output:
                a command-line string
        """
        # blasr supports in-line alignment filteration,
        # no need to call samFilter at all.
        if alignerName == "blasr" and \
            not self.options.filterAdapterOnly:
            cmdStr = "rm -f {outFile} && ln -s {inFile} {outFile}".format(
                    inFile=inSamFile, outFile=outSamFile)
            return cmdStr

        # if aligner is not blasr, call samFilter instead
        cmdStr = self.progName + \
            " {inSamFile} {refFile} {outSamFile} ".format(
                inSamFile=inSamFile,
                refFile=refFile,
                outSamFile=outSamFile)

        if options.maxDivergence is not None:
            maxDivergence = int(options.maxDivergence if options.maxDivergence
                                > 1.0 else (options.maxDivergence * 100))
            cmdStr += " -minPctSimilarity {0}".format(100 - maxDivergence)

        if options.minAccuracy is not None:
            minAccuracy = int(options.minAccuracy if options.minAccuracy > 1.0
                              else (options.minAccuracy * 100))
            cmdStr += " -minAccuracy {0}".format(minAccuracy)

        if options.minLength is not None:
            cmdStr += " -minLength {0}".format(options.minLength)

        if options.seed is not None:
            cmdStr += " -seed {0}".format(options.seed)

        if scoreSign in [1, -1]:
            cmdStr += " -scoreSign {0}".format(scoreSign)
        else:
            logging.error("{0}'s score sign is neither 1 nor -1.".format(
                alignerName))

        if options.scoreCutoff is not None:
            cmdStr += " -scoreCutoff {0}".format(options.scoreCutoff)

        if options.hitPolicy is not None:
            cmdStr += " -hitPolicy {0}".format(options.hitPolicy)

        if options.filterAdapterOnly is True and \
            isExist(adapterGffFile):
            cmdStr += " -filterAdapterOnly {gffFile}".format(
                    gffFile=adapterGffFile)

        return cmdStr