示例#1
0
 def test_isValidOutputFormat(self):
     """Test isOutputFormat()."""
     self.assertFalse(isValidOutputFormat( getFileFormat("ab.fasta")) )
     self.assertFalse(isValidOutputFormat( getFileFormat("ab.fa")) )
     self.assertFalse(isValidOutputFormat( getFileFormat("ab.pls.h5")) )
     self.assertFalse(isValidOutputFormat( getFileFormat("ab.plx.h5")) )
     self.assertFalse(isValidOutputFormat( getFileFormat("ab.bas.h5")) )
     self.assertFalse(isValidOutputFormat( getFileFormat("ab.bax.h5")) )
     self.assertFalse(isValidOutputFormat( getFileFormat("ab.fofn")) )
     self.assertTrue(isValidOutputFormat( getFileFormat("ab.sam")) )
     self.assertTrue(isValidOutputFormat( getFileFormat("ab.cmp.h5")) )
     self.assertFalse(isValidOutputFormat( getFileFormat("ab.xyz")) )
示例#2
0
    def _output(self, inSam, refFile, outFile, readType=None, smrtTitle=False):
        """Generate a sam or a cmp.h5 file.
        Input:
            inSam   : an input SAM file. (e.g. fileName.filteredSam)
            refFile : the reference file. (e.g. fileName.targetFileName)
            outFile : the output SAM or CMP.H5 file.
                      (i.e. fileName.outputFileName)
            readType: standard or cDNA or CCS (can be None if not specified)
        Output:
            output, errCode, errMsg
        """
        output, errCode, errMsg = "", 0, ""

        if getFileFormat(outFile) == FILE_FORMATS.SAM:
            #`mv inSam outFile`
            logging.info("OutputService: Genearte the output SAM file.")
            logging.debug("OutputService: Move {src} as {dst}".format(
                src=inSam, dst=outFile))
            try:
                shutil.move(inSam, outFile)
            except shutil.Error as e:
                output, errCode, errMsg = "", 1, str(e)
        elif getFileFormat(outFile) == FILE_FORMATS.CMP:
            #`samtoh5 inSam outFile -readType readType
            logging.info("OutputService: Genearte the output CMP.H5 " +
                         "file using samtoh5.")
            prog = "samtoh5"
            cmd = "samtoh5 {samFile} {refFile} {outFile}".format(
                samFile=inSam, refFile=refFile, outFile=outFile)
            if readType is not None:
                cmd += " -readType {0} ".format(readType)
            if smrtTitle:
                cmd += " -smrtTitle "
            # Execute the command line
            logging.debug("OutputService: Call \"{0}\"".format(cmd))
            output, errCode, errMsg = backticks(cmd)

        if errCode != 0:
            errMsg = prog + " returned a non-zero exit status." + errMsg
            logging.error(errMsg)
            raise RuntimeError(errMsg)
        return output, errCode, errMsg
示例#3
0
    def _makeSane(self, args, fileNames):
        """
        Check whether the input arguments make sense or not.
        """
        errMsg = ""
        if args.useccs == "useccsdenovo":
            args.readType = "CCS"

        if fileNames.inputFileFormat == FILE_FORMATS.CCS:
            args.readType = "CCS"

        if args.forQuiver:
            if fileNames.pulseFileName is None:
                errMsg = "Neither the input file is in bas/pls/ccs.h5 " + \
                         "format, nor --pulseFile is specified, "
            if getFileFormat(fileNames.outputFileName) != FILE_FORMATS.CMP:
                errMsg = "The output file is not in cmp.h5 format, "
            if errMsg != "":
                errMsg += ", while --forQuiver is true."
                logging.error(errMsg)
                raise ValueError(errMsg)
示例#4
0
    def _pls2fasta(self, inputFileName, regionTable, noSplitSubreads):
        """ Call pls2fasta to convert a PacBio BASE/PULSe/FOFN file to FASTA.
            Input:
                inputFilieName : a PacBio BASE/PULSE/FOFN file.
                regionTable    : a region table RGN.H5/FOFN file.
                noSplitSubreads: whether to split subreads or not.
            Output:
                a FASTA file which can be used as an input by an aligner.
        """
        # If the incoming file is a FASTA file, no conversion is needed.
        if getFileFormat(inputFileName) == FILE_FORMATS.FASTA:
            return inputFileName

        # Otherwise, create a temporary FASTA file to write.
        outFastaFile = self._tempFileManager.RegisterNewTmpFile(
            suffix=".fasta")

        cmdStr = "pls2fasta {plsFile} {fastaFile} ".format(
            plsFile=inputFileName, fastaFile=outFastaFile)

        if regionTable is not None and regionTable != "":
            cmdStr += " -regionTable {rt} ".format(rt=regionTable)

        if noSplitSubreads:
            cmdStr += " -noSplitSubreads "

        logging.info(self.name + ": Convert {inFile} to FASTA format.".
                     format(inFile=inputFileName))
        logging.debug(self.name + ": Call \"{cmd}\"".format(cmd=cmdStr))

        _output, errCode, errMsg = backticks(cmdStr)
        if errCode != 0:
            errMsg += "Failed to convert {i} to {o}.".format(
                      i=inputFileName, o=outFastaFile)
            logging.error(errMsg)
            raise RuntimeError(errMsg)

        # Return the converted FASTA file which can be used by an aligner.
        return outFastaFile