Exemplo n.º 1
0
    def parseHmmerResults(self, fileName, resultsManager, bSkipAdjCorrection):
        """Parse HMMER results."""
        try:
            with open(fileName, 'r') as hmmerHandle:
                try:
                    HP = HMMERParser(hmmerHandle)
                except:
                    print("Error opening HMM file: ", fileName)
                    raise

                while True:
                    hit = HP.next()
                    if hit is None:
                        break
                    resultsManager.addHit(hit)

            # retain only best hit to PFAM clan
            pfam = PFAM(DefaultValues.PFAM_CLAN_FILE)
            resultsManager.markerHits = pfam.filterHitsFromSameClan(resultsManager.markerHits)

            # correct for errors in ORF calling
            if not bSkipAdjCorrection:
                resultsManager.identifyAdjacentMarkerGenes()

        except IOError as detail:
            sys.stderr.write(str(detail) + "\n")
Exemplo n.º 2
0
    def __createMarkerHMMs(self, binMarkerSet, outputFile, bReportProgress=True):
        """Create HMM file for markers."""

        # get list of marker genes
        markerGenes = binMarkerSet.getMarkerGenes()

        # get all genes from the same clan as any marker gene
        pfam = PFAM(DefaultValues.PFAM_CLAN_FILE)
        genesInSameClan = pfam.genesInSameClan(markerGenes)

        # extract marker genes along with all genes from the same clan
        allMarkers = markerGenes | genesInSameClan

        if bReportProgress:
            self.logger.info("  There are %d genes in the marker set and %d genes from the same PFAM clan." % (len(markerGenes), len(genesInSameClan)))

        # create file with all model accession numbers
        keyFile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
        fout = open(keyFile, 'w')
        for modelAcc in allMarkers:
            fout.write(modelAcc + '\n')
        fout.close()

        # fetch specified models
        HF = HMMERRunner(mode='fetch')
        HF.fetch(DefaultValues.HMM_MODELS, keyFile, outputFile, bKeyFile=True)

        # index the HMM file
        if os.path.exists(outputFile + '.ssi'):
            os.remove(outputFile + '.ssi')
        HF.index(outputFile)

        # remove key file
        os.remove(keyFile)
Exemplo n.º 3
0
    def parseHmmerResults(self, fileName, resultsManager, bSkipAdjCorrection):
        """Parse HMMER results."""
        try:
            with open(fileName, 'r') as hmmerHandle:
                try:
                    HP = HMMERParser(hmmerHandle)
                except:
                    print("Error opening HMM file: ", fileName)
                    raise

                while True:
                    hit = HP.next()
                    if hit is None:
                        break
                    resultsManager.addHit(hit)

            # retain only best hit to PFAM clan
            pfam = PFAM(DefaultValues.PFAM_CLAN_FILE)
            resultsManager.markerHits = pfam.filterHitsFromSameClan(
                resultsManager.markerHits)

            # correct for errors in ORF calling
            if not bSkipAdjCorrection:
                resultsManager.identifyAdjacentMarkerGenes()

        except IOError as detail:
            sys.stderr.write(str(detail) + "\n")
Exemplo n.º 4
0
    def __createMarkerHMMs(self, binMarkerSet, outputFile, bReportProgress=True):
        """Create HMM file for markers."""

        # get list of marker genes
        markerGenes = binMarkerSet.getMarkerGenes()

        # get all genes from the same clan as any marker gene
        pfam = PFAM(DefaultValues.PFAM_CLAN_FILE)
        genesInSameClan = pfam.genesInSameClan(markerGenes)

        # extract marker genes along with all genes from the same clan
        allMarkers = markerGenes | genesInSameClan

        if bReportProgress:
            self.logger.info("  There are %d genes in the marker set and %d genes from the same PFAM clan." % (len(markerGenes), len(genesInSameClan)))

        # create file with all model accession numbers
        keyFile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
        fout = open(keyFile, 'w')
        for modelAcc in allMarkers:
            fout.write(modelAcc + '\n')
        fout.close()

        # fetch specified models
        HF = HMMERRunner(mode='fetch')
        HF.fetch(DefaultValues.HMM_MODELS, keyFile, outputFile, bKeyFile=True)

        # index the HMM file
        if os.path.exists(outputFile + '.ssi'):
            os.remove(outputFile + '.ssi')
        HF.index(outputFile)

        # remove key file
        os.remove(keyFile)