示例#1
0
 def analyseRecombindationResults(self,oligoReport,featureList,outputFile="oligo_recomb_analysis.txt"):
     
     result = Report()
     result.extend(oligoReport)
     rowNames = oligoReport.returnRowNames()
     recordAlignment = SeqRecord(Seq(""))
     recordAlignment.features = featureList
     for rName in rowNames:
         output.write(rName + "\n")
         
         start = float(oligoReport["genomic_start"][rName])
         end = float(oligoReport["genomic_end"][rName])
         oligo = oligoReport["best"][rName]
         
         keyString = "\n(%s,%s) %s\n\n" % (start,end,oligo)
         output.write(keyString)
         
         subFeatures = self._selectFeatures(featureList,start,end)
         index = 0
         for feature in subFeatures:
             matchName = "match_" + str(index)
             id = feature.id
             qValue = feature.qualifiers["query"]
             sValue = feature.qualifiers["subject"]
             matchString = feature.qualifiers["alignment"]
             output.write(id + "\n")
             output.write(matchString + "\n\n")
             result.add(rName,matchName,qValue)
             index += 1
     output.close()
     return result
 def geneControlReport(self,model,controlMap,outputName):
     report = Report()
     geneMap = model.controlMap
     for (target,controls) in geneMap.keys():
         for control in controls:
             report.add(target,control,"control")
     
     writer = ReportWriter()
     writer.setFile(outputName)
     
     writer.write(report)
     writer.closeFile()
 
     return None
示例#3
0
 def parseGenericReport(self, fileName, keyTag=None, header = None, unique = True):
     '''
     @var fileName: name of flat (delimited) in text format to be parsed
     @type fileName: String
     @var keyTag: ID of column to be used for report key row
     @type keyTag: String
     @summary: 
     Primary Function
     Parses flat file returns report object.
     '''
     
     result = Report()
     kIndex = None
     lines = open( fileName, 'r' )
     index = 0
     
     for line in lines:
         
         if self.isComment( line ):
             pass
         
         elif self.endLine < index < self.startLine:
             index += 1
             continue
         
         elif index == self.headerLine:
             header = self.parseHeader( line, unique )
             if keyTag in header:
                 kIndex = header.index(keyTag)
                 
         elif self.endLine > index > self.startLine:
             line = line.replace('\n','')
             sLine =self._safeSplit(line)
             if kIndex != None:
                 rName = sLine[kIndex]
             else:
                 rName = str(index)
             for i in range(len(sLine)):
                 if i != kIndex:
                     cName = header[i]
                     value = sLine[i]
                     result.add(rName,cName,value)
                 
         index += 1
         
     lines.close()
     return result
def enzymeBoundaryControl(model,targets,bounds,objectiveName,productionName,searchSize):
    targets = bounds.keys()
    controlNames = model.getControlsForNames(targets)
    eControlMap = model.getEnzymeControlMap()
    
    controlSubSets = combinations(controlNames,searchSize)
    xcontrolSubSets = set(controlSubSets)
    
    result = Report()
    iter = 0
    for icontrolNames in xcontrolSubSets:
        ibounds = {}
        controlTag = ''
        ienzymeNames = model.annotateGeneList(list(icontrolNames))
        ienzymeNames.sort()
        for ieName in ienzymeNames:
            controlTag += "(%s)" % (ieName)
        for icontrolName in icontrolNames:
            itargets = eControlMap[icontrolName]
            for itarget in itargets:
                if itarget in bounds.keys():
                    ibound = bounds[itarget]
                    ibounds[itarget] = ibound
            #print ("control %s => [%s]") % (icontrolName,ibounds)
            pass
        if len(ibounds) == 0:
            continue
        (fluxMap,oflux,pflux) = findBoundaryProduction(model, ibounds, None, objectiveName, productionName)
        print "[%s] objective %s => production %s" % (controlTag, oflux, pflux)
        iter += 1
        #iEnzymeNames = model.annotateGeneList(icontrolNames) 
        #for icontrolName in iEnzymeNames:
        #    result.add(iter, icontrolName, "active")
        result.add(controlTag,"natural",oflux)
        result.add(controlTag,"production",pflux)
    
    return result
示例#5
0
    def parseAlignments(self,records,featureLocations):
        result = Report()
        index = 0
        
        logFile = open("oligoLog.txt","w")
        targetMap = {}
        
        for r in records:
            id = r.id

            features = featureLocations[index]
            hits =  len(features)
            genomicStart = features[0].location.start.position
            genomicEnd = features[0].location.end.position
            genomicStrand = features[0].strand
            laggingComplementStrand = self.strandChooser(features[0])
            
            if "alignment" in features[0].qualifiers.keys():
                aMatch = features[0].qualifiers["alignment"]
            else: 
                aMatch = ''
            
            targetMap[id] = (genomicStart + genomicEnd)/2
            
            logFile.write(id+"\n")
            logFile.write(aMatch+"\n")
            #if self.verbose: print aMatch
            
            s = str(r.seq)
            originalString = s
            
            result.add(id,"original", originalString) 
            result.add(id,"hits", hits) 
            result.add(id,"genomic_start", genomicStart) 
            result.add(id,"genomic_end", genomicEnd) 
            result.add(id,"genomic_strand", genomicStrand)
            result.add(id,"match", aMatch)
        
        return result
示例#6
0
 def getControlReport(self,genes,targetRecord,sequence,boundary=0,range=1000):
     '''
     Create report of control regions for listed genes
     '''
     result = Report()
     seqProp = RecombinationOligoFactory()
     
     sdata = str(sequence).lower() 
     promoters = {}
     locations = {}
     arrow= ["<-","-","->"]
     
     if self.verbose: print "finding local features"
     
     localFeatures = self.localFeatures(genes,targetRecord,range=range)
 
     if self.verbose: print "features found, creating report"
     
     for feature in genes:
         name = feature.qualifiers["gene"][0]
         locTag = feature.qualifiers["locus_tag"][0]
         start = feature.location.start.position
         end = feature.location.end.position
         strand = feature.strand
         
         result.add(name,"locus_tag",locTag)                                                     
         result.add(name,"gene_start",start)                                                     
         result.add(name,"gene_end",end)                                                     
         result.add(name,"gene_strand",strand)                                                     
         
         if strand == 1:
             loc = start
         if strand == -1:
             loc = end
             
         rbsSeq = self.getSequenceRegion(sdata, loc, 3, boundary, strand)
         oligoStrandRbs = seqProp.strandChooser(feature)
             
         result.add(name,"rbs_start",loc)                                                     
         result.add(name,"rbs_region",rbsSeq)
         result.add(name,"rbs_Oligo_Strand",str(oligoStrandRbs))
         
         iLocalFeatures = localFeatures[name]
                                                    
         count = 0
         
         for tFeature in iLocalFeatures:
             count = count + 1
             tName = tFeature.qualifiers["gene"][0]
             tStart = tFeature.location.start.position
             tEnd = tFeature.location.end.position
             tStrand = tFeature.strand
             oligoStrand = seqProp.strandChooser(tFeature)
             iArrow = arrow[tStrand + 1]
             if tStrand == strand:
                 tSize = -1*tStrand
                 pSeq = self.getSequenceRegion(sdata, tStart, tSize, boundary, strand)
             tag = "%s:[%s %s %s] t[%s] = %s" % (tName,tStart,iArrow,tEnd,oligoStrand,pSeq)
             colName = "promoter_%s" % (count)
             result.add(name,colName,tag)
         
     return result
示例#7
0
    def findPrimers(self,seq,targetMap,boundary,oligoSize,searchSize,targetTm):
        '''
        @input targetMap: a list of genomic locations with sequence names
        
        find a list of sequencing primers for a list of target locations
        
        usage:
        set boundary and oligo size
        read in report, list of locations
        run find sequence primers.
        write report of result
        '''
        result = Report()
 
        for k in targetMap.keys():
            targetLocation = targetMap[k]
            
            upLocation = targetLocation - boundary
            upStart = targetLocation - boundary - searchSize - oligoSize
            upEnd = targetLocation - boundary + searchSize
            upSeq = seq[upStart:upEnd]
            
            downLocation = targetLocation + boundary
            downStart = targetLocation + boundary - searchSize
            downEnd = targetLocation + boundary + searchSize + oligoSize
            downSeq = seq[downStart:downEnd]
            downSeq = downSeq.reverse_complement()
            
            start = searchSize
            (uTm,uAdjust,oUpSeq) = self.scanOligoTm(upSeq,start,oligoSize,searchSize,targetTm)
            (dTm,dAdjust,oDownSeq) = self.scanOligoTm(downSeq,start,oligoSize,searchSize,targetTm)
            dAdjust = -dAdjust
            
            ucLocation = upLocation + uAdjust
            dcLocation = downLocation + dAdjust
            
            result.add(k,"sequencing location",targetLocation)
            
            result.add(k,"foward primer",oUpSeq)
            result.add(k,"foward adjust",uAdjust)
            result.add(k,"foward location",ucLocation)
            result.add(k,"foward TM",uTm)
            
            result.add(k,"reverse primer",oDownSeq)
            result.add(k,"reverse adjust",dAdjust)
            result.add(k,"reverse location",dcLocation + dAdjust)
            result.add(k,"reverse TM",dTm)
            
        return result        
示例#8
0
    def generateTargetingOligos(self, records, featureLocations, tagRE, boundary, searchSize, cutOff):
        '''
        Generate a list of oligos for recombination in target locations
        and return report with the targets and sequencing oligos
        
        All oligos printed 5' -> 3'
        Control upstream will be to the left if strands are preserved and to the right 
        if strands are switched when matching lagging complement
        
        oligos are selected discovered as an optimized subsection of the presented sequences 
        
        @records: sequences from which to select oligos
        @featureLocations: a list of locations that place the features in a genome
        @tagRE: regular expression for finding taged sequence with in feature sequences.
        @bounary: oligo flanking region size
        @searchSize: distance in base pairs to search for optimal oligo
        @cutOff: limit of viable fold change energy for chose oligos.
        '''
        
        result = Report()
        index = 0
        
        logFile = open("oligoLog.txt","w")
        targetMap = {}
        sRegions = []
        
        for r in records:
            id = r.id

            features = featureLocations[index]
            hits =  len(features)
            genomicStart = features[0].location.start.position
            genomicEnd = features[0].location.end.position
            genomicStrand = features[0].strand
            laggingComplementStrand = self.strandChooser(features[0])
            
            if "alignment" in features[0].qualifiers.keys():
                aMatch = features[0].qualifiers["alignment"]
            else: 
                aMatch = ''
            
            targetMap[id] = (genomicStart + genomicEnd)/2
            
            logFile.write(id+"\n")
            logFile.write(aMatch+"\n")
            #if self.verbose: print aMatch
            
            s = str(r.seq)
            originalString = s
            
            #Find larget section using special targeting tag
            matchTag = re.search(tagRE,s)
            if matchTag == None:
                tagLoc = len(s)/2
            else:
                targetTag = matchTag.group(0)
                tagLoc = s.index(targetTag) + len(targetTag)/2
                
            if self.verbose: print "Target [%s] location [%s]" % (targetTag,tagLoc)

            start = int(tagLoc - boundary)
            end = int(tagLoc + boundary)
            
            #!May need a little touch up
            if start < 0:
                start = 0
                end = int(boundary*2)
            if end > len(s):
                end = len(s)
            
            if self.verbose: print"region %s -> %s of %s" % (start,end,len(s))

            lowSearchBound = start - searchSize - 5
            highSearchBound = end + searchSize + 5
            
            if lowSearchBound < 0:
                lowSearchBound = 0
            if highSearchBound > len(s):
                highSearchBound = len(s)
                
            if genomicStrand == laggingComplementStrand:            
                s = r.seq[lowSearchBound:highSearchBound]
                sx = r.seq[start:end]
            else:
                s = r.seq[lowSearchBound:highSearchBound].reverse_complement()
                sx = r.seq[start:end].reverse_complement()
            
            searchEnd = (end-start+searchSize+5)
            searchStart = searchSize + 5
            
            try:
                testSeqs = self.optimizeSecondaryStructure(s, searchStart, searchEnd, searchSize, cutOff, ratio = 2)
                testSeqs.sort()
                if len(testSeqs) == 0:
                    (score,adjust,foldScore,bestSeq) = ("na","na","na","na")
                else:
                    (score,adjust,foldScore,bestSeq) = testSeqs.pop(0)
            except:
                testSeqs = []    
                (score,adjust,foldScore,bestSeq) = ("na","na","na",sx)
                print "failed to exicute secondary structure test"
                #print "[%s]" % (s)
            
            #bestSeq = self.addStars(bestSeq,self.stars)
            if len(bestSeq) < boundary*2:
                print "Short Sequence"
            if self.verbose: print "%s best %s S:%s [%s] (%s)" % (len(testSeqs), adjust, score, foldScore, len(bestSeq))
            if self.verbose: print "[%s]" % (bestSeq)
            
            result.add(id,"original", originalString) 
            result.add(id,"hits", hits) 
            result.add(id,"genomic_start", genomicStart) 
            result.add(id,"genomic_end", genomicEnd) 
            result.add(id,"genomic_strand", genomicStrand) 
            result.add(id,"lagging_complement_strand", laggingComplementStrand) 
            result.add(id,"best", bestSeq) 
            result.add(id,"fold score", foldScore) 
            result.add(id,"off center", adjust) 
            
            #append to list of sequence regions
            sRegion = r
            sRegion.seq = sx
            sRegions.append(sRegion)
            
            index = index + 1
        
        logFile.close()
        
        return (targetMap,result,sRegions)