Пример #1
0
    def test_run(self):
        """Test FilterService.run()."""
        options = Opt(30, 70, 50, 1, None, "random")

        obj = FilterService(self.alignedSam, self.targetFileName,
                            self.filteredSam, "BlasrService", -1,
                            options)

        _output, errCode, _errMsg = obj.run()

        self.assertEqual(errCode, 0)
Пример #2
0
    def test_run_without_scoreCutoff(self):
        """Test FilterService.run() without score cutoff."""
        options2 = Opt(40, 50, None, None, None, "allbest")
        obj2 = FilterService(self.alignedSam, self.targetFileName,
                             self.filteredSam, "BowtieService", 1,
                             options2)

        self.assertNotIn("-seed", obj2.cmd)
        self.assertNotIn("-scoreCutoff", obj2.cmd)
        self.assertIn("-scoreSign 1", obj2.cmd)

        _output, errCode, _errMsg = obj2.run()

        self.assertEqual(errCode, 0)
Пример #3
0
class PBAlign(PBToolRunner):
    """PBAlign tool runner."""

    def __init__(self, argumentList):
        """Initialize a PBAlign object.
           argumentList is a list of arguments, such as:
           ['--debug', '--maxHits', '10', 'in.fasta', 'ref.fasta', 'out.sam']
        """
        desc = "Utilities for aligning PacBio reads to reference sequences."
        super(PBAlign, self).__init__(desc)
        self._argumentList = argumentList
        self._alnService = None
        self._filterService = None
        self.fileNames = PBAlignFiles()
        self._tempFileManager = TempFileManager()

        self.parser, self.args, _infoMsg = parseOptions(
            argumentList=self._argumentList, parser=self.parser)
        # args.verbosity is computed by counting # of 'v's in '-vv...'.
        # However in parseOptions, arguments are parsed twice to import config
        # options and then overwrite them with argumentList (e.g. command-line)
        # options.
        self.args.verbosity = 0 if (self.args.verbosity is None) else \
            int(self.args.verbosity) / 2

    def getVersion(self):
        """Return PBAlign version."""
        return __VERSION__

    def _createAlignService(self, name, args, fileNames, tempFileManager):
        """
        Create and return an AlignService by algorithm name.
        Input:
            name           : an algorithm name such as blasr
            fileNames      : an PBAlignFiles object
            args           : pbalign options
            tempFileManager: a temporary file manager
        Output:
            an object of AlignService subclass (such as BlasrService).
        """
        if name not in ALGORITHM_CANDIDATES:
            errMsg = "ERROR: unrecognized algorithm {algo}".format(algo=name)
            logging.error(errMsg)
            raise ValueError(errMsg)

        service = None
        if name == "blasr":
            service = BlasrService(args, fileNames, tempFileManager)
        elif name == "bowtie":
            service = BowtieService(args, fileNames, tempFileManager)
        elif name == "gmap":
            service = GMAPService(args, fileNames, tempFileManager)
        else:
            errMsg = "Service for {algo} is not implemented.".\
                     format(algo=name)
            logging.error(errMsg)
            raise ValueError(errMsg)

        service.checkAvailability()
        return service

    def _makeSane(self, args, fileNames):
        """
        Check whether the input arguments make sense or not.
        """
        errMsg = ""
        if args.useccs == "useccsdenovo":
            args.readType = "CCS"

        if fileNames.inputFileFormat == FILE_FORMATS.CCS:
            args.readType = "CCS"

        if args.forQuiver:
            if fileNames.pulseFileName is None:
                errMsg = "Neither the input file is in bas/pls/ccs.h5 " + \
                         "format, nor --pulseFile is specified, "
            if getFileFormat(fileNames.outputFileName) != FILE_FORMATS.CMP:
                errMsg = "The output file is not in cmp.h5 format, "
            if errMsg != "":
                errMsg += ", while --forQuiver is true."
                logging.error(errMsg)
                raise ValueError(errMsg)

    def _parseArgs(self):
        """Overwrite ToolRunner.parseArgs(self).
        Parse PBAlign arguments considering both args in argumentList and
        args in a config file (specified by --configFile).
        """
        pass

    def _output(self, inSam, refFile, outFile, readType=None, smrtTitle=False):
        """Generate a sam or a cmp.h5 file.
        Input:
            inSam   : an input SAM file. (e.g. fileName.filteredSam)
            refFile : the reference file. (e.g. fileName.targetFileName)
            outFile : the output SAM or CMP.H5 file.
                      (i.e. fileName.outputFileName)
            readType: standard or cDNA or CCS (can be None if not specified)
        Output:
            output, errCode, errMsg
        """
        output, errCode, errMsg = "", 0, ""

        if getFileFormat(outFile) == FILE_FORMATS.SAM:
            #`mv inSam outFile`
            logging.info("OutputService: Genearte the output SAM file.")
            logging.debug("OutputService: Move {src} as {dst}".format(
                src=inSam, dst=outFile))
            try:
                shutil.move(inSam, outFile)
            except shutil.Error as e:
                output, errCode, errMsg = "", 1, str(e)
        elif getFileFormat(outFile) == FILE_FORMATS.CMP:
            #`samtoh5 inSam outFile -readType readType
            logging.info("OutputService: Genearte the output CMP.H5 " +
                         "file using samtoh5.")
            prog = "samtoh5"
            cmd = "samtoh5 {samFile} {refFile} {outFile}".format(
                samFile=inSam, refFile=refFile, outFile=outFile)
            if readType is not None:
                cmd += " -readType {0} ".format(readType)
            if smrtTitle:
                cmd += " -smrtTitle "
            # Execute the command line
            logging.debug("OutputService: Call \"{0}\"".format(cmd))
            output, errCode, errMsg = backticks(cmd)

        if errCode != 0:
            errMsg = prog + " returned a non-zero exit status." + errMsg
            logging.error(errMsg)
            raise RuntimeError(errMsg)
        return output, errCode, errMsg

    def _cleanUp(self, realDelete=False):
        """ Clean up temporary files and intermediate results. """
        logging.debug("Clean up temporary files and directories.")
        self._tempFileManager.CleanUp(realDelete)

#    def _setupLogging(self):
#        LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s"
#        if self.args.verbosity >= 2:
#            print "logLevel = debug"
#            logLevel = logging.DEBUG
#        elif self.args.verbosity == 1:
#            print "logLevel = info"
#            logLevel = logging.INFO
#        else:
#            print "logLevel = warn"
#            logLevel = logging.WARN
#        logging.basicConfig(level=logLevel, format=LOG_FORMAT)

    def run(self):
        """
        The main function, it is called by PBToolRunner.start().
        """
        startTime = time.time()
        logging.info("pbalign version: {version}".format(version=__VERSION__))
        logging.debug("Original arguments: " + str(self._argumentList))

        # Create an AlignService by algorithm name.
        self._alnService = self._createAlignService(self.args.algorithm,
                                                    self.args,
                                                    self.fileNames,
                                                    self._tempFileManager)

        # Make sane.
        self._makeSane(self.args, self.fileNames)

        # Run align service.
        try:
            self._alnService.run()
        except RuntimeError:
            return 1

        # Create a temporary filtered SAM file as output for FilterService.
        self.fileNames.filteredSam = self._tempFileManager.\
            RegisterNewTmpFile(suffix=".sam")

        # Call filter service.
        self._filterService = FilterService(self.fileNames.alignerSamOut,
                                            self.fileNames.targetFileName,
                                            self.fileNames.filteredSam,
                                            self._alnService.name,
                                            self._alnService.scoreSign,
                                            self.args,
                                            self.fileNames.adapterGffFileName)
        try:
            self._filterService.run()
        except RuntimeError:
            return 1

        # Output all hits either in SAM or CMP.H5.
        try:
            useSmrtTitle = False
            if (self.args.algorithm != "blasr" or
                self.fileNames.inputFileFormat == FILE_FORMATS.FASTA):
                useSmrtTitle = True

            self._output(
                self.fileNames.filteredSam,
                self.fileNames.targetFileName,
                self.fileNames.outputFileName,
                self.args.readType,
                useSmrtTitle)
        except RuntimeError:
            return 1

        # Call post service for quiver.
        if self.args.forQuiver:
            postService = ForQuiverService(self.fileNames,
                                           self.args)
            try:
                postService.run()
            except RuntimeError:
                return 1

        # Delete temporay files anyway to make
        self._cleanUp(False if (hasattr(self.args, "keepTmpFiles") and
                               self.args.keepTmpFiles is True) else True)

        endTime = time.time()
        logging.info("Total time: {:.2f} s.".format(float(endTime - startTime)))
        return 0
Пример #4
0
    def run(self):
        """
        The main function, it is called by PBToolRunner.start().
        """
        startTime = time.time()
        logging.info("pbalign version: {version}".format(version=__VERSION__))
        logging.debug("Original arguments: " + str(self._argumentList))

        # Create an AlignService by algorithm name.
        self._alnService = self._createAlignService(self.args.algorithm,
                                                    self.args,
                                                    self.fileNames,
                                                    self._tempFileManager)

        # Make sane.
        self._makeSane(self.args, self.fileNames)

        # Run align service.
        try:
            self._alnService.run()
        except RuntimeError:
            return 1

        # Create a temporary filtered SAM file as output for FilterService.
        self.fileNames.filteredSam = self._tempFileManager.\
            RegisterNewTmpFile(suffix=".sam")

        # Call filter service.
        self._filterService = FilterService(self.fileNames.alignerSamOut,
                                            self.fileNames.targetFileName,
                                            self.fileNames.filteredSam,
                                            self._alnService.name,
                                            self._alnService.scoreSign,
                                            self.args,
                                            self.fileNames.adapterGffFileName)
        try:
            self._filterService.run()
        except RuntimeError:
            return 1

        # Output all hits either in SAM or CMP.H5.
        try:
            useSmrtTitle = False
            if (self.args.algorithm != "blasr" or
                self.fileNames.inputFileFormat == FILE_FORMATS.FASTA):
                useSmrtTitle = True

            self._output(
                self.fileNames.filteredSam,
                self.fileNames.targetFileName,
                self.fileNames.outputFileName,
                self.args.readType,
                useSmrtTitle)
        except RuntimeError:
            return 1

        # Call post service for quiver.
        if self.args.forQuiver:
            postService = ForQuiverService(self.fileNames,
                                           self.args)
            try:
                postService.run()
            except RuntimeError:
                return 1

        # Delete temporay files anyway to make
        self._cleanUp(False if (hasattr(self.args, "keepTmpFiles") and
                               self.args.keepTmpFiles is True) else True)

        endTime = time.time()
        logging.info("Total time: {:.2f} s.".format(float(endTime - startTime)))
        return 0