def _mainLoop(self): # Read in the existing modifications.gff modReader = GffReader(self.args.modifications) # Set up some additional headers to be injected headers = [ ('source', 'kineticModificationCaller 1.3.1'), ('source-commandline', " ".join(sys.argv)), ('attribute-description', 'modsfwd - count of detected DNA modifications on forward strand'), ('attribute-description', 'modsrev - count of detected DNA modifications on reverse strand') ] # Get modification calls hits = [{"pos": x.start, "strand": x.strand} for x in modReader if x.type == 'modified_base'] # Summary reader summaryFile = file(self.args.alignmentSummary) # Modified gff file summaryWriter = file(self.args.outfile, "w") self.seqMap = {} inHeader = True # Loop through for line in summaryFile: # Pass any metadata line straight through if line[0] == "#": # Parse headers splitFields = line.replace('#', '').split(' ') field = splitFields[0] value = " ".join(splitFields[1:]) if field == 'sequence-header': [internalTag, delim, externalTag] = value.strip().partition(' ') self.seqMap[internalTag] = externalTag print >>summaryWriter, line.strip() continue if inHeader: # We are at the end of the header -- write the tool-specific headers for field in headers: print >>summaryWriter, ("##%s %s" % field) inHeader = False # Parse the line rec = Gff3Record.fromString(line) if rec.type == 'region': # Get the hits in this interval, add them to the gff record intervalHits = [h for h in hits if rec.start <= h['pos'] <= rec.end] strand0Hits = len([h for h in intervalHits if h['strand'] == '+']) strand1Hits = len([h for h in intervalHits if h['strand'] == '-']) rec.modsfwd = strand0Hits rec.modsrev = strand1Hits print >>summaryWriter, str(rec)
def test_fromString(self): newRecord = Gff3Record.fromString(str(self.record)) assert_equal(str(self.record), str(newRecord))
def _mainLoop(self): # Read in the existing modifications.gff modReader = GffReader(self.modifications) headerString = ",".join(['"' + x + '"' for x in self.knownModificationEvents]) # Set up some additional headers to be injected headers = [ ("source", "kineticModificationCaller 1.3.3"), ("source-commandline", " ".join(sys.argv)), ( "attribute-description", "modsfwd - count of detected DNA modifications on forward strand by modification event type", ), ( "attribute-description", "modsrev - count of detected DNA modifications on reverse strand by modification event type", ), ("region-modsfwd", headerString), ("region-modsfwd", headerString), ] hitsByEvent = dict([(x, []) for x in self.knownModificationEvents]) # Get modification calls hits = [ {"pos": x.start, "strand": x.strand, "seqid": x.seqid, "type": x.type} for x in modReader if x.type in self.knownModificationEvents ] # Summary reader summaryFile = file(self.alignmentSummary) # Modified gff file summaryWriter = file(self.outfile, "w") self.seqMap = {} inHeader = True # Loop through for line in summaryFile: # Pass any metadata line straight through if line[0] == "#": # Parse headers splitFields = line.replace("#", "").split(" ") field = splitFields[0] value = " ".join(splitFields[1:]) if field == "sequence-header": [internalTag, delim, externalTag] = value.strip().partition(" ") self.seqMap[internalTag] = externalTag print >> summaryWriter, line.strip() continue if inHeader: # We are at the end of the header -- write the tool-specific headers for field in headers: print >> summaryWriter, ("##%s %s" % field) inHeader = False # Parse the line rec = Gff3Record.fromString(line) if rec.type == "region": # Get the hits in this interval, add them to the gff record intervalHits = [h for h in hits if rec.start <= h["pos"] <= rec.end and rec.seqid == h["seqid"]] cFwd = self.countModificationTypes([h for h in intervalHits if h["strand"] == "+"]) cRev = self.countModificationTypes([h for h in intervalHits if h["strand"] == "-"]) rec.modsfwd = ",".join([str(cFwd[x]) for x in self.knownModificationEvents]) rec.modsrev = ",".join([str(cRev[x]) for x in self.knownModificationEvents]) print >> summaryWriter, str(rec) return 0
def _mainLoop(self): # Read in the existing modifications.gff modReader = GffReader(self.modifications) headerString = ",".join( ['"' + x + '"' for x in self.knownModificationEvents]) # Set up some additional headers to be injected headers = [ ('source', 'kineticModificationCaller 1.3.3'), ('source-commandline', " ".join(sys.argv)), ('attribute-description', 'modsfwd - count of detected DNA modifications on forward strand by modification event type' ), ('attribute-description', 'modsrev - count of detected DNA modifications on reverse strand by modification event type' ), ('region-modsfwd', headerString), ('region-modsfwd', headerString) ] hitsByEvent = dict([(x, []) for x in self.knownModificationEvents]) # Get modification calls hits = [{ "pos": x.start, "strand": x.strand, "seqid": x.seqid, "type": x.type } for x in modReader if x.type in self.knownModificationEvents] self.seqMap = {} inHeader = True # Loop through with open(self.alignmentSummary) as summaryFile: with open(self.outfile, "w") as summaryWriter: for line in summaryFile: # Pass any metadata line straight through if line[0] == "#": # Parse headers splitFields = line.replace('#', '').split(' ') field = splitFields[0] value = " ".join(splitFields[1:]) if field == 'sequence-header': [internalTag, delim, externalTag] = value.strip().partition(' ') self.seqMap[internalTag] = externalTag print(line.strip(), file=summaryWriter) continue if inHeader: # We are at the end of the header -- write the # tool-specific headers for field in headers: print(("##%s %s" % field), file=summaryWriter) inHeader = False # Parse the line rec = Gff3Record.fromString(line) if rec.type == 'region': # Get the hits in this interval, add them to the gff # record intervalHits = [ h for h in hits if rec.start <= h['pos'] <= rec.end and rec.seqid == h['seqid'] ] cFwd = self.countModificationTypes( [h for h in intervalHits if h['strand'] == '+']) cRev = self.countModificationTypes( [h for h in intervalHits if h['strand'] == '-']) rec.modsfwd = ",".join([ str(cFwd[x]) for x in self.knownModificationEvents ]) # pylint: disable=assigning-non-slot rec.modsrev = ",".join([ str(cRev[x]) for x in self.knownModificationEvents ]) # pylint: disable=assigning-non-slot print(str(rec), file=summaryWriter) return 0
def makeGffRecord(self, siteObs): """ Convert the internal site observation object into a GFF entry """ # Some useful attributes about the observation # - cognate base # - context snippet # - ipd ratio # - coverage snippet = self.snippetFunc(siteObs['tpl'], siteObs['strand']) attributes = [('coverage', siteObs['coverage']), ('context', snippet), ('IPDRatio', siteObs['ipdRatio'])] # Base of detected mod -- single position, closed,open # interval. # Note -- internally the tool uses 0-based reference # coordinates, however in gff the template indices are # 1-based. Make that adjustment here. # On start vs. end: My reading of the gff spec # (http://www.sequenceontology.org/resources/gff3.html) says # to me that 1-base long feature (e.g. a modified base) should # have start + 1 == end, and 0-base long features # (e.g. insertions) should have start == end. This is not the # convention that Marco has apdopted in SMRTView, or the # convention that EviCons originally used. We will adopt # their convention here, for now. start = siteObs['tpl'] + 1 end = siteObs['tpl'] + 1 if siteObs.has_key('motif'): attributes.append(('motif', "%s" % siteObs['motif'])) if siteObs.has_key('id'): attributes.append(('id', "%s" % siteObs['id'])) if self.options.methylFraction and siteObs.has_key(FRAC): attributes.append(('frac', "%.3f" % siteObs[FRAC])) attributes.append(('fracLow', "%.3f" % siteObs[FRAClow])) attributes.append(('fracUp', "%.3f" % siteObs[FRACup])) if siteObs.has_key('modificationScore'): # Report the QV from the modification identification module as a special tag attributes.append( ('identificationQv', "%d" % int(round(siteObs['modificationScore'])))) if siteObs.has_key('modification'): if siteObs['modification'] == '.': recordType = 'modified_base' elif siteObs['modification'] == 'nMd': recordType = '.' else: # if we have an identified mod, use it; otherwise use the old generic term recordType = siteObs['modification'] else: recordType = 'modified_base' refName = siteObs['refName'] score = int(round(siteObs['score'])) strand = '+' if siteObs['strand'] == 0 else '-' return Gff3Record(refName, start, end, type=recordType, score=score, strand=strand, source='kinModCall', attributes=attributes) return rec
def _mainLoop(self): # Read in the existing modifications.gff modReader = GffReader(self.args.modifications) # Set up some additional headers to be injected headers = [ ('source', 'kineticModificationCaller 1.3.1'), ('source-commandline', " ".join(sys.argv)), ('attribute-description', 'modsfwd - count of detected DNA modifications on forward strand' ), ('attribute-description', 'modsrev - count of detected DNA modifications on reverse strand') ] # Get modification calls hits = [{ "pos": x.start, "strand": x.strand } for x in modReader if x.type == 'modified_base'] # Summary reader summaryFile = file(self.args.alignmentSummary) # Modified gff file summaryWriter = file(self.args.outfile, "w") self.seqMap = {} inHeader = True # Loop through for line in summaryFile: # Pass any metadata line straight through if line[0] == "#": # Parse headers splitFields = line.replace('#', '').split(' ') field = splitFields[0] value = " ".join(splitFields[1:]) if field == 'sequence-header': [internalTag, delim, externalTag] = value.strip().partition(' ') self.seqMap[internalTag] = externalTag print(line.strip(), file=summaryWriter) continue if inHeader: # We are at the end of the header -- write the tool-specific # headers for field in headers: print(("##%s %s" % field), file=summaryWriter) inHeader = False # Parse the line rec = Gff3Record.fromString(line) if rec.type == 'region': # Get the hits in this interval, add them to the gff record intervalHits = [ h for h in hits if rec.start <= h['pos'] <= rec.end ] strand0Hits = len( [h for h in intervalHits if h['strand'] == '+']) strand1Hits = len( [h for h in intervalHits if h['strand'] == '-']) rec.modsfwd = strand0Hits rec.modsrev = strand1Hits print(str(rec), file=summaryWriter)