Python Gff3Record示例，pbcore.io.Gff3Record Python示例

示例#1

0

显示文件

文件： writeSummaryToCmp.py 项目： jgurtowski/kineticsTools

    def _mainLoop(self):

        # Read in the existing modifications.gff
        modReader = GffReader(self.args.modifications)

        # Set up some additional headers to be injected
        headers = [
            ('source', 'kineticModificationCaller 1.3.1'),
            ('source-commandline', " ".join(sys.argv)),
            ('attribute-description', 'modsfwd - count of detected DNA modifications on forward strand'),
            ('attribute-description', 'modsrev - count of detected DNA modifications on reverse strand')
        ]

        # Get modification calls
        hits = [{"pos": x.start, "strand": x.strand} for x in modReader if x.type == 'modified_base']

        # Summary reader
        summaryFile = file(self.args.alignmentSummary)

        # Modified gff file
        summaryWriter = file(self.args.outfile, "w")

        self.seqMap = {}
        inHeader = True

        # Loop through
        for line in summaryFile:
            # Pass any metadata line straight through
            if line[0] == "#":

                # Parse headers
                splitFields = line.replace('#', '').split(' ')
                field = splitFields[0]
                value = " ".join(splitFields[1:])
                if field == 'sequence-header':
                    [internalTag, delim, externalTag] = value.strip().partition(' ')
                    self.seqMap[internalTag] = externalTag
                print >>summaryWriter, line.strip()
                continue

            if inHeader:
                # We are at the end of the header -- write the tool-specific headers
                for field in headers:
                    print >>summaryWriter, ("##%s %s" % field)
                inHeader = False

            # Parse the line
            rec = Gff3Record.fromString(line)

            if rec.type == 'region':
                # Get the hits in this interval, add them to the gff record
                intervalHits = [h for h in hits if rec.start <= h['pos'] <= rec.end]
                strand0Hits = len([h for h in intervalHits if h['strand'] == '+'])
                strand1Hits = len([h for h in intervalHits if h['strand'] == '-'])

                rec.modsfwd = strand0Hits
                rec.modsrev = strand1Hits

                print >>summaryWriter, str(rec)

示例#2

0

显示文件

文件： test_pbcore_io_GffIO.py 项目： dyermd/pbcore

 def test_fromString(self):
     newRecord = Gff3Record.fromString(str(self.record))
     assert_equal(str(self.record),  str(newRecord))

示例#3

0

显示文件

文件： summarizeModifications.py 项目： PacificBiosciences/kineticsTools

    def _mainLoop(self):

        # Read in the existing modifications.gff
        modReader = GffReader(self.modifications)

        headerString = ",".join(['"' + x + '"' for x in self.knownModificationEvents])

        # Set up some additional headers to be injected
        headers = [
            ("source", "kineticModificationCaller 1.3.3"),
            ("source-commandline", " ".join(sys.argv)),
            (
                "attribute-description",
                "modsfwd - count of detected DNA modifications on forward strand by modification event type",
            ),
            (
                "attribute-description",
                "modsrev - count of detected DNA modifications on reverse strand by modification event type",
            ),
            ("region-modsfwd", headerString),
            ("region-modsfwd", headerString),
        ]

        hitsByEvent = dict([(x, []) for x in self.knownModificationEvents])

        # Get modification calls
        hits = [
            {"pos": x.start, "strand": x.strand, "seqid": x.seqid, "type": x.type}
            for x in modReader
            if x.type in self.knownModificationEvents
        ]

        # Summary reader
        summaryFile = file(self.alignmentSummary)

        # Modified gff file
        summaryWriter = file(self.outfile, "w")

        self.seqMap = {}
        inHeader = True

        # Loop through
        for line in summaryFile:
            # Pass any metadata line straight through
            if line[0] == "#":

                # Parse headers
                splitFields = line.replace("#", "").split(" ")
                field = splitFields[0]
                value = " ".join(splitFields[1:])
                if field == "sequence-header":
                    [internalTag, delim, externalTag] = value.strip().partition(" ")
                    self.seqMap[internalTag] = externalTag
                print >> summaryWriter, line.strip()
                continue

            if inHeader:
                # We are at the end of the header -- write the tool-specific headers
                for field in headers:
                    print >> summaryWriter, ("##%s %s" % field)
                inHeader = False

            # Parse the line
            rec = Gff3Record.fromString(line)

            if rec.type == "region":
                # Get the hits in this interval, add them to the gff record
                intervalHits = [h for h in hits if rec.start <= h["pos"] <= rec.end and rec.seqid == h["seqid"]]

                cFwd = self.countModificationTypes([h for h in intervalHits if h["strand"] == "+"])
                cRev = self.countModificationTypes([h for h in intervalHits if h["strand"] == "-"])

                rec.modsfwd = ",".join([str(cFwd[x]) for x in self.knownModificationEvents])
                rec.modsrev = ",".join([str(cRev[x]) for x in self.knownModificationEvents])

                print >> summaryWriter, str(rec)
        return 0

示例#4

0

显示文件

文件： summarizeModifications.py 项目： smoe/kineticsTools

    def _mainLoop(self):

        # Read in the existing modifications.gff
        modReader = GffReader(self.modifications)

        headerString = ",".join(
            ['"' + x + '"' for x in self.knownModificationEvents])

        # Set up some additional headers to be injected
        headers = [
            ('source', 'kineticModificationCaller 1.3.3'),
            ('source-commandline', " ".join(sys.argv)),
            ('attribute-description',
             'modsfwd - count of detected DNA modifications on forward strand by modification event type'
             ),
            ('attribute-description',
             'modsrev - count of detected DNA modifications on reverse strand by modification event type'
             ), ('region-modsfwd', headerString),
            ('region-modsfwd', headerString)
        ]

        hitsByEvent = dict([(x, []) for x in self.knownModificationEvents])

        # Get modification calls
        hits = [{
            "pos": x.start,
            "strand": x.strand,
            "seqid": x.seqid,
            "type": x.type
        } for x in modReader if x.type in self.knownModificationEvents]

        self.seqMap = {}
        inHeader = True

        # Loop through
        with open(self.alignmentSummary) as summaryFile:
            with open(self.outfile, "w") as summaryWriter:
                for line in summaryFile:
                    # Pass any metadata line straight through
                    if line[0] == "#":

                        # Parse headers
                        splitFields = line.replace('#', '').split(' ')
                        field = splitFields[0]
                        value = " ".join(splitFields[1:])
                        if field == 'sequence-header':
                            [internalTag, delim,
                             externalTag] = value.strip().partition(' ')
                            self.seqMap[internalTag] = externalTag
                        print(line.strip(), file=summaryWriter)
                        continue

                    if inHeader:
                        # We are at the end of the header -- write the
                        # tool-specific headers
                        for field in headers:
                            print(("##%s %s" % field), file=summaryWriter)
                        inHeader = False

                    # Parse the line
                    rec = Gff3Record.fromString(line)

                    if rec.type == 'region':
                        # Get the hits in this interval, add them to the gff
                        # record
                        intervalHits = [
                            h for h in hits if rec.start <= h['pos'] <= rec.end
                            and rec.seqid == h['seqid']
                        ]

                        cFwd = self.countModificationTypes(
                            [h for h in intervalHits if h['strand'] == '+'])
                        cRev = self.countModificationTypes(
                            [h for h in intervalHits if h['strand'] == '-'])

                        rec.modsfwd = ",".join([
                            str(cFwd[x]) for x in self.knownModificationEvents
                        ])  # pylint: disable=assigning-non-slot
                        rec.modsrev = ",".join([
                            str(cRev[x]) for x in self.knownModificationEvents
                        ])  # pylint: disable=assigning-non-slot

                        print(str(rec), file=summaryWriter)
        return 0

示例#5

0

显示文件

    def makeGffRecord(self, siteObs):
        """
        Convert the internal site observation object into a GFF entry
        """
        # Some useful attributes about the observation
        # - cognate base
        # - context snippet
        # - ipd ratio
        # - coverage
        snippet = self.snippetFunc(siteObs['tpl'], siteObs['strand'])
        attributes = [('coverage', siteObs['coverage']), ('context', snippet),
                      ('IPDRatio', siteObs['ipdRatio'])]

        # Base of detected mod -- single position, closed,open
        # interval.
        # Note -- internally the tool uses 0-based reference
        # coordinates, however in gff the template indices are
        # 1-based.  Make that adjustment here.
        # On start vs. end: My reading of the gff spec
        # (http://www.sequenceontology.org/resources/gff3.html) says
        # to me that 1-base long feature (e.g. a modified base) should
        # have start + 1 == end, and 0-base long features
        # (e.g. insertions) should have start == end. This is not the
        # convention that Marco has apdopted in SMRTView, or the
        # convention that EviCons originally used.  We will adopt
        # their convention here, for now.
        start = siteObs['tpl'] + 1
        end = siteObs['tpl'] + 1

        if siteObs.has_key('motif'):
            attributes.append(('motif', "%s" % siteObs['motif']))

        if siteObs.has_key('id'):
            attributes.append(('id', "%s" % siteObs['id']))

        if self.options.methylFraction and siteObs.has_key(FRAC):
            attributes.append(('frac', "%.3f" % siteObs[FRAC]))
            attributes.append(('fracLow', "%.3f" % siteObs[FRAClow]))
            attributes.append(('fracUp', "%.3f" % siteObs[FRACup]))

        if siteObs.has_key('modificationScore'):
            # Report the QV from the modification identification module as a special tag
            attributes.append(
                ('identificationQv',
                 "%d" % int(round(siteObs['modificationScore']))))

        if siteObs.has_key('modification'):

            if siteObs['modification'] == '.':
                recordType = 'modified_base'

            elif siteObs['modification'] == 'nMd':
                recordType = '.'

            else:
                # if we have an identified mod, use it; otherwise use the old generic term
                recordType = siteObs['modification']

        else:
            recordType = 'modified_base'

        refName = siteObs['refName']
        score = int(round(siteObs['score']))
        strand = '+' if siteObs['strand'] == 0 else '-'

        return Gff3Record(refName,
                          start,
                          end,
                          type=recordType,
                          score=score,
                          strand=strand,
                          source='kinModCall',
                          attributes=attributes)
        return rec

示例#6

0

显示文件

文件： writeSummaryToCmp.py 项目： smoe/kineticsTools

    def _mainLoop(self):

        # Read in the existing modifications.gff
        modReader = GffReader(self.args.modifications)

        # Set up some additional headers to be injected
        headers = [
            ('source', 'kineticModificationCaller 1.3.1'),
            ('source-commandline', " ".join(sys.argv)),
            ('attribute-description',
             'modsfwd - count of detected DNA modifications on forward strand'
             ),
            ('attribute-description',
             'modsrev - count of detected DNA modifications on reverse strand')
        ]

        # Get modification calls
        hits = [{
            "pos": x.start,
            "strand": x.strand
        } for x in modReader if x.type == 'modified_base']

        # Summary reader
        summaryFile = file(self.args.alignmentSummary)

        # Modified gff file
        summaryWriter = file(self.args.outfile, "w")

        self.seqMap = {}
        inHeader = True

        # Loop through
        for line in summaryFile:
            # Pass any metadata line straight through
            if line[0] == "#":

                # Parse headers
                splitFields = line.replace('#', '').split(' ')
                field = splitFields[0]
                value = " ".join(splitFields[1:])
                if field == 'sequence-header':
                    [internalTag, delim,
                     externalTag] = value.strip().partition(' ')
                    self.seqMap[internalTag] = externalTag
                print(line.strip(), file=summaryWriter)
                continue

            if inHeader:
                # We are at the end of the header -- write the tool-specific
                # headers
                for field in headers:
                    print(("##%s %s" % field), file=summaryWriter)
                inHeader = False

            # Parse the line
            rec = Gff3Record.fromString(line)

            if rec.type == 'region':
                # Get the hits in this interval, add them to the gff record
                intervalHits = [
                    h for h in hits if rec.start <= h['pos'] <= rec.end
                ]
                strand0Hits = len(
                    [h for h in intervalHits if h['strand'] == '+'])
                strand1Hits = len(
                    [h for h in intervalHits if h['strand'] == '-'])

                rec.modsfwd = strand0Hits
                rec.modsrev = strand1Hits

                print(str(rec), file=summaryWriter)