Пример #1
0
def updateAvgNumSS(oFN):

    bn = os.path.basename(oFN)
    print 'basename', bn

    oID_numSS = {}

    numSims = 100

    print 'getting avg for %s simulations' % numSims
    for i in range(0, numSims):

        #simFN = '/home/chrisgre/scripts/simulations/simsk50FilteredMasked/simulation.%s/%s' % (i, bn)
        #simFN = '/home/chrisgre/scripts/simulations/simsk50Fix/simulation.%s/%s' % (i, bn)
        #simFN = '/home/chrisgre/scripts/simulations/mm9/simulation.%s/%s' % (i, bn)
        #simFN = '/home/chrisgre/scripts/simulations/hg19.hela/simulation.%s/%s' % (i, bn)
        simFN = '/home/chrisgre/scripts/simulations/hg19.U87/simulation.%s/%s' % (
            i, bn)
        osNX = cgNexusFlat.Nexus(simFN, cgOriginRNAFlat.OriginRNA)
        osNX.load(['numSignificantSequences'])
        for oID in osNX.numSignificantSequences:
            oID_numSS[oID] = oID_numSS.get(
                oID, 0) + osNX.numSignificantSequences[oID]

    #now save it
    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['avgNumSS'])

    for oID in oNX.avgNumSS:
        totalNum = oID_numSS.get(oID, 0)
        avgNum = float(totalNum) / float(numSims)
        oNX.avgNumSS[oID] = avgNum
    oNX.save()
Пример #2
0
def updateTargetIDsFiltered(oFN, aFN, rn=None, tn=None):
    '''CAUTION: NO SELECTION BEING MADE!!!'''

    #load the data
    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['filteredTargets'], [rn, tn])
    '''
        #get ids of alignments I need (set)
        oIDs = set()
        for oID in oNX.filteredTargets: oIDs.add(oID)
        '''

    #load only alignments I need
    '''c = {'sID' : lambda x: x in oIDs}'''
    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(['sID'])

    #clear targets that are there.
    for oID in oNX.filteredTargets:
        oNX.filteredTargets[oID] = []

    #update the targets for oRNAs
    for aID in aNX.sID:
        oID = aNX.sID[aID]
        try:
            oNX.filteredTargets[oID].append(aID)
        except KeyError:  #another process is taking care of this one
            pass

    #save
    oNX.save()
Пример #3
0
def appendTInfoFlat(aFN, dFN, rn = None, tn = None):

        aNX = cgNexusFlat.Nexus(aFN, cgAlignment)
        aNX.load(['tID', 'tTcc', 'transcriptOverlap', 'tELevel', 'context', 'repeat', 'targetSequence', 'gScore'], [rn, tn])

        dNX = cgNexusFlat.Nexus(dFN, cgDegPeak.Peak)
        dNX.load(['tOverlap', 'eLevel', 'tcc', 'context', 'repeatStatus', 'sequence', 'gScore'])

        tID_aIDs = {}
        for aID in aNX.tID:
                tID_aIDs.setdefault(aNX.tID[aID], []).append(aID)


        for dID in dNX.tcc: 
                
                for aID in tID_aIDs.get(dID, list()):
                        aNX.tTcc[aID] = dNX.tcc[dID]
                        aNX.tELevel[aID] = dNX.eLevel[dID]
                        aNX.transcriptOverlap[aID] = dNX.tOverlap[dID]
                        aNX.context[aID] = dNX.context[dID]
                        aNX.repeat[aID] = dNX.repeatStatus[dID]
                        aNX.gScore[aID] = dNX.gScore[dID]
                        aNX.targetSequence[aID] = dNX.sequence[dID]
                        #aNX.repeatCount[aID] = dNX.repeatCount[dID]
                        #aNX.totalContig[aID] = dNX.totalContig[dID]
        aNX.save()                        
Пример #4
0
def updateAvgNumTargets(oFN):

    bn = os.path.basename(oFN)
    print 'basename', bn

    oID_numTargets = {}

    for i in range(0, 10):

        #simFN = '/home/chrisgre/scripts/simulations/simsk50FilteredMasked/simulation.%s/%s' % (i, bn)
        simFN = '/home/chrisgre/scripts/simulations/simsk50/simulation.%s/%s' % (
            i, bn)
        print simFN
        osNX = cgNexusFlat.Nexus(simFN, cgOriginRNAFlat.OriginRNA)
        osNX.load(['filteredTargets'])
        for oID in osNX.filteredTargets:
            currTargets = oID_numTargets.get(oID, 0)
            oID_numTargets[oID] = currTargets + len(osNX.filteredTargets[oID])

    #now save it
    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['avgNumSimulationTargets'])

    for oID in oNX.avgNumSimulationTargets:
        totalNum = oID_numTargets.get(oID, 0)
        avgNum = float(totalNum) / float(10.0)
        oNX.avgNumSimulationTargets[oID] = avgNum

    oNX.save()
Пример #5
0
def eLevelHistogram(oFN, aFN, oRNA=True):

    oRNA = 'True' in oRNA

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['filteredTargets', 'eLevel'])

    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(['tELevel'])

    histValues = []
    for oID in oNX.eLevel:
        if oRNA:
            histValues.append(oNX.eLevel[oID])
        else:
            for aID in oNX.filteredTargets[oID]:
                histValues.append(aNX.tELevel[aID])

    histVals = [math.log(x, 10) for x in histValues]
    plt.hist(histVals, 50)
    type = 'oRNA'
    if not oRNA: type = 'Targets (degradome)'
    plt.title('Expression Level for %s' % type)
    plt.xlabel('log(Expression Level)')
    plt.ylabel('Number of %s' % type)

    plt.show()
Пример #6
0
def countRepeatStatusTargets(oFN, aFN, oContext=None, oType=None):
    if oContext == 'None': oContext = None
    if oType == 'None': oType = None

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(
        ['snrSS', 'context', 'transcriptType', 'filteredTargets', 'gScore'],
        [rn, tn])

    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(['context', 'gScore', 'repeat'], [rn, tn])

    context_rStatuss = {}

    for oID in oNX.context:

        if oNX.snrSS[oID] < 2.00: continue
        #gather targets' context info if oRNA is okay
        for aID in oNX.filteredTargets[oID]:
            aCon = aNX.context[aID]
            rStatus = aNX.repeat[aID]
            context_rStatuss.setdefault(aCon, []).append(rStatus)

    #plot
    for context in context_rStatuss:
        print context, context_rStatuss[context].count(
            True), context_rStatuss[context].count(False)
Пример #7
0
def gZipContextECDF(oFN, aFN, imgName, oContext=None, oType=None):
    if oContext == 'None': oContext = None
    if oType == 'None': oType = None

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(
        ['snrSS', 'context', 'transcriptType', 'filteredTargets', 'gScore'],
        [rn, tn])

    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(['context', 'gScore'], [rn, tn])

    context_gzips = {}

    for oID in oNX.context:

        if oNX.snrSS[oID] < 2.00: continue
        #gather targets' context info if oRNA is okay
        for aID in oNX.filteredTargets[oID]:
            aCon = aNX.context[aID]
            gScore = aNX.gScore[aID]
            context_gzips.setdefault(aCon, []).append(gScore)

    #plot
    for context in context_gzips:
        plt.hist(context_gzips[context],
                 bins=10000,
                 cumulative=True,
                 histtype='step',
                 normed=True,
                 label='%s' % context)

    plt.legend()
    plt.savefig(imgName, bbox_inches='tight', pad_inches=1)
Пример #8
0
def updateTargetIDs(oFN, aFN, rn=None, tn=None):

    #load the data
    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['targets'], [rn, tn])

    #get ids of alignments I need (set)
    oIDs = set()
    for oID in oNX.targets:
        oIDs.add(oID)

    #load only alignments I need
    c = {'sID': lambda x: x in oIDs}
    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(['sID'], conditions=c)

    #clear targets that are there.
    for oID in oNX.targets:
        oNX.targets[oID] = []

    #update the targets for oRNAs
    for aID in aNX.sID:
        oID = aNX.sID[aID]
        oNX.targets[oID].append(aID)

    #save
    oNX.save()
Пример #9
0
def updatePolySeqs(mFN, readsFN, alignFN):

    tim = bioLibCG.cgTimer()
    tim.start()
    variousAs = ["A" * x for x in range(1,20)]
    variousGs = ["G" * x for x in range(1,20)]
    variousTs = ["T" * x for x in range(1,20)]
    variousCs = ["C" * x for x in range(1,20)]

    letter_variousLetters = [ ("A", variousAs),
                            ("G", variousGs),
                            ("T", variousTs),
                            ("C", variousCs)]


    checkRange = range(1,8)

    NX = cgNexusFlat.Nexus(mFN, miR)
    NX.load(['sequence', 'polySeqs'])
    #print 'load micro', tim.split() 

    reads = cgNexusFlat.quickTable(('read','string', '.', 1))
    rNX = cgNexusFlat.Nexus(readsFN, reads)
    rNX.load(['read'])
    #print 'load reads', tim.split() 

    aNX = cgNexusFlat.Nexus(alignFN, cgAlignment)
    aNX.load(['sID', 'tID'])
    #print 'load alignments', tim.split() 

    for id in aNX.ids:

        theRead = rNX.read[aNX.sID[id]]
        mID = aNX.tID[id]
        microSeq = NX.sequence[mID]

        #may be a read for expression, but wont count...
        if theRead in microSeq: continue

        #just for expression
        if microSeq == theRead: 
            print tabIt(microSeq, theRead, 0, 0, "N")

        #first check full
        elif microSeq in theRead and (len(theRead) != len(microSeq)):
            tail = theRead.split(microSeq)[1]
            for let, variousLetters in letter_variousLetters:
                if tail in variousLetters:
                    print tabIt(microSeq, theRead, 0, len(tail), let)

        #now check trimmed (cant do [:-0])
        else:
            for i in checkRange:
                if microSeq[:-i] in theRead and (len(theRead) != len(microSeq[:-i])):
                    tail = theRead.split(microSeq[:-i])[1]
                    for let, variousLetters in letter_variousLetters:
                        if tail in variousLetters:
                            print tabIt(microSeq, theRead, i, len(tail), let)
                            print "TRIMMED"
                    break #dont trim after the first trimmed one works                           
Пример #10
0
def targetContextPercentageVsExpression(oFN,
                                        aFN,
                                        oContext=None,
                                        oType=None,
                                        rn=None,
                                        tn=None):

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['context', 'transcriptType', 'filteredTargets'])

    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(['context', 'tELevel'])

    context_level_count = {}

    for lev in range(50, 500, 50):
        for oID in oNX.context:

            #filter oID types here
            con = oNX.context[oID]
            typ = oNX.transcriptType[oID]

            if oContext:
                if oContext != con: continue

            if oType:
                if oType != typ: continue

            #gather targets' context info if oRNA is okay
            for aID in oNX.filteredTargets[oID]:
                eLevel = aNX.tELevel[aID]
                if eLevel < lev: continue
                aCon = aNX.context[aID]
                context_level_count[aCon][
                    lev] = context_level_count.setdefault(aCon, {}).get(
                        lev, 0) + 1

    #fracs = pieFractions(counts)
    plots_labels = [[], []]
    for con in context_level_count:
        x = []
        y = []
        sortedLevs = sorted(context_level_count[con].keys())
        for lev in sortedLevs:
            x.append(lev)
            y.append(context_level_count[con][lev])
        plots_labels[0].append(plt.plot(
            x,
            y,
        ))
        plots_labels[1].append(con)

    #plot
    plt.legend(plots_labels[0], plots_labels[1])
    plt.title(
        'oRNA Targets\' Context Proportion Stability w/ Expression Increase')
    plt.xlabel('Degradome Expression Cutoff')
    plt.ylabel('Number of oRNA Targets')
    plt.show()
Пример #11
0
def phastScoreByNT(oFN, aFN, oIDFilter=None):

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['phastScores', 'snrSS', 'filteredTargets'])

    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(['mismatchPositions'])

    misPositionsX = []
    misScoresY = []
    positionsX = []
    scoresY = []

    for oID in oNX.phastScores:

        avgScore = sum(oNX.phastScores[oID]) / float(len(oNX.phastScores[oID]))

        #filter
        if (avgScore < .90) or (oNX.snrSS[oID] < 2):
            continue

        if oIDFilter:
            if oID != int(oIDFilter):
                continue

        misPositions = set()
        #get consolidated mismatches
        for aID in oNX.filteredTargets[oID]:
            for mPos in aNX.mismatchPositions[aID]:

                misPositions.add(mPos)

        for i, pScore in enumerate(oNX.phastScores[oID]):
            if i in misPositions:
                misPositionsX.append(
                    i + 1.2
                )  #1 is for 0BASE, .2 is for differentiating between mis and reg
                misScoresY.append(pScore)
            else:
                positionsX.append(i + 1)
                scoresY.append(pScore)

    highestNT = max(positionsX)
    for i in range(1, highestNT + 1):
        plt.axvspan(i - .15, i + .35, facecolor='g', alpha=.25)

    plt.plot(positionsX, scoresY, 'bo')
    plt.plot(misPositionsX, misScoresY, 'ro')
    plt.ylim(0, 1.1)
    plt.xlim(0, 24)
    plt.title('Conservation by Position of Conserved oRNA')
    plt.ylabel('PhastCons Score')
    plt.xlabel('Nucleotide Position')

    plt.show()
Пример #12
0
def filterTargets(oFN,
                  aFN,
                  inTranscript,
                  misLevel,
                  centerLevel,
                  minCenterLevel,
                  rn=None,
                  tn=None):
    if inTranscript == 'True': inTranscript = True
    if inTranscript == 'False': inTranscript = False
    misLevel, centerLevel, minCenterLevel = int(misLevel), int(
        centerLevel), float(minCenterLevel)

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['filteredTargets', 'targets'], [rn, tn])

    print inTranscript, misLevel, centerLevel, minCenterLevel, oFN, aFN, rn, tn

    #make selection set
    targets = set()
    for oID in oNX.targets:
        for target in oNX.targets[oID]:
            targets.add(target)

    c = {'ID': lambda x: x in targets}
    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(['transcriptOverlap', 'mismatchStatus', 'centerExpression'],
             conditions=c)

    for oID in oNX.filteredTargets:
        oNX.filteredTargets[oID] = []
        for aID in oNX.targets[oID]:

            #transcriptOverlap
            if inTranscript:
                if not aNX.transcriptOverlap[aID]:
                    #print 'tOverlap Fail', cgAlignment.pretty#print(alignment)
                    continue

            #misLevel
            if aNX.mismatchStatus[aID][misLevel]:
                #print 'mismatch Fail', cgAlignment.pretty#print(alignment)
                continue

            #centerLevel
            if aNX.centerExpression[aID][centerLevel] < minCenterLevel:
                #print 'expression Fail', cgAlignment.pretty#print(alignment)
                continue

            oNX.filteredTargets[oID].append(aID)

    oNX.save()
Пример #13
0
def conservedHisto(oFN):

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['phastScores'])

    scores = []
    for oID in oNX.phastScores:

        avgScore = sum(oNX.phastScores[oID]) / float(len(oNX.phastScores[oID]))

        #scores.extend(oNX.phastScores[oID])
        scores.append(avgScore)

    print len(scores)
    plt.title('PhastCons Scores By Nucleotide')
    plt.title('PhastCons Scores By oRNA')

    plt.ylabel('Number of Nucleotides')
    plt.ylabel('Number of oRNA')

    plt.xlabel('PhastCons Score')
    plt.xlabel('PhastCons Score Average')

    plt.hist(scores, 50)
    plt.show()
Пример #14
0
def updateTranscriptOverlap(oFN, wigDir, chrom, strand, rn=None, tn=None):

    oNX = cgNexusFlat.Nexus(oFN, cgDegPeak.Peak)
    oNX.load(['tOverlap', 'tcc'], [rn, tn])

    #load the AS wig file for this degradome strand
    if strand == '1':
        strand = '-1'
    else:
        strand = '1'

    coord_transcripts = cgWig.loadSingleWigTranscript(wigDir, chrom, strand,
                                                      'transcript')

    for oID in oNX.tOverlap:

        tChrom, tStrand, start, end = bioLibCG.tccSplit(oNX.tcc[oID])
        if tStrand == '1':
            tStrand = '-1'
        else:
            tStrand = '1'

        if tChrom != chrom or tStrand != strand: continue

        oNX.tOverlap[oID] = False
        for i in xrange(start, end + 1):
            if i in coord_transcripts:
                oNX.tOverlap[oID] = True
                break

    oNX.save()
Пример #15
0
def markMismatchedPairs(aFN, rn=None, tn=None):

    #make mismatchDict
    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(['mismatchStatus', 'mismatchPositions'], [rn, tn])

    lowRange = range(
        8, 12)  # remember the small locations are 0-based, so 10 is 9
    midRange = range(7, 13)
    highRange = range(6, 14)
    for aID in aNX.mismatchStatus:

        aNX.mismatchStatus[aID] = [False, False, False]
        #check mismatches
        for i in lowRange:
            if i in aNX.mismatchPositions[aID]:
                aNX.mismatchStatus[aID][0] = True
                break

        for i in midRange:
            if i in aNX.mismatchPositions[aID]:
                aNX.mismatchStatus[aID][1] = True
                break

        for i in highRange:
            if i in aNX.mismatchPositions[aID]:
                aNX.mismatchStatus[aID][2] = True
                break

    aNX.save()
Пример #16
0
def getGeneGC(genePropFN, rn=None, tn=None):

    myGF = gf.GenomeFetch('hg19')

    NX = cgNexusFlat.Nexus(genePropFN, geneProperty)
    NX.load([
        'geneName', 'geneChrom', 'geneStrand', 'geneStarts', 'geneEnds',
        'geneGCContent'
    ], [rn, tn])

    for id in NX.ids:

        spanPairs = zip(NX.geneStarts[id], NX.geneEnds[id])
        spanTccs = [
            '%s:%s:%s:%s' %
            (NX.geneChrom[id], NX.geneStrand[id], pair[0], pair[1])
            for pair in spanPairs
        ]

        totalSequenceLength = 0
        numGC = 0
        for tcc in spanTccs:
            seq = myGF.getSequence(tcc)
            totalSequenceLength += len(seq)
            numGC += seq.count('G') + seq.count('C')

        GCC = float(numGC) / totalSequenceLength

        NX.geneGCContent[id] = GCC

    NX.save()
Пример #17
0
def updateGeneData(geneRanges, genePropFN):

    myGF = gf.GenomeFetch('hg19')

    NX = cgNexusFlat.Nexus(genePropFN, geneProperty)
    NX.load(['geneName', 'geneChrom', 'geneStrand', 'geneStarts', 'geneEnds'])

    #make inverse dictionary
    gName_nID = {}
    for id in NX.ids:
        gName_nID[NX.geneName[id]] = id

    f = open(geneRanges, 'r')
    for line in f:
        ls = line.strip().split('\t')

        sChrom, sStrand = ls[1], ls[2]
        geneName = ls[0]
        geneStarts = [int(x) for x in ls[3].split(',')]
        geneEnds = [int(x) for x in ls[4].split(',')]

        #get id
        nID = gName_nID.get(geneName, None)

        if nID:
            NX.geneChrom[nID] = sChrom
            NX.geneStrand[nID] = sStrand
            NX.geneStarts[nID] = geneStarts
            NX.geneEnds[nID] = geneEnds

    f.close()

    NX.save()
Пример #18
0
def filterOrigin(oFN, rn=None, tn=None):

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load([
        'filteredTargets', 'endContigLength', 'totalContigLength',
        'sequenceDuplicate', 'passedFilter', 'entropy'
    ], [rn, tn])

    for oID in oNX.entropy:

        oNX.passedFilter[oID] = False

        if oNX.entropy[oID] < 1.15:
            continue

        if oNX.endContigLength[oID] > 6:
            continue

        if oNX.totalContigLength[oID] > 6:
            continue

        if oNX.sequenceDuplicate[oID]:
            continue

        if not oNX.filteredTargets[oID]:
            continue

        oNX.passedFilter[oID] = True

    oNX.save()
Пример #19
0
def getGeneLength(geneRanges, genePropFN):

    NX = cgNexusFlat.Nexus(genePropFN, geneProperty)
    NX.load(['geneName', 'geneLength'])

    #make inverse dictionary
    gName_nID = {}
    for id in NX.ids:
        gName_nID[NX.geneName[id]] = id

    f = open(geneRanges, 'r')
    for line in f:
        ls = line.strip().split('\t')

        sChrom, sStrand = ls[1], ls[2]
        geneName = ls[0]
        geneStarts = [int(x) for x in ls[3].split(',')]
        geneEnds = [int(x) for x in ls[4].split(',')]
        spanPairs = zip(geneStarts, geneEnds)
        totalLength = sum([pair[1] - pair[0] for pair in spanPairs])

        #get id
        nID = gName_nID.get(geneName, None)

        if nID:
            NX.geneLength[nID] = totalLength

    f.close()

    NX.save()
Пример #20
0
def totalSNRSS(oFN, SNRToggle=False):

    if SNRToggle == 'True':
        SNRToggle = True
    else:
        SNRToggle = False

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['snrSS', 'numSignificantSequences', 'avgNumSS'])

    highSNRs = []
    for oID in oNX.snrSS:

        snrSS = oNX.snrSS[oID]
        if snrSS > 2:
            highSNRs.append(snrSS)

    if SNRToggle:
        try:
            avgSNR = sum(highSNRs) / len(highSNRs)
            print avgSNR,
        except:
            print '0.0',
    else:
        print len(highSNRs),
Пример #21
0
def totalSNR(oFN):

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(
        ['snr', 'filteredTargets', 'avgNumSimulationTargets', 'passedFilter'])

    totalRun = 0
    totalSim = 0
    simsTotals = []
    n = 0
    highSNR = 0
    for oID in oNX.avgNumSimulationTargets:

        #filter out
        if not oNX.passedFilter[oID]:
            continue

        #collect stats
        totalRun += len(oNX.filteredTargets[oID])
        simsTotal = oNX.avgNumSimulationTargets[oID] * 10
        simsTotals.append(simsTotal)
        totalSim += oNX.avgNumSimulationTargets[oID]

        if oNX.snr[oID] > 2:
            highSNR += 1
        n += 1

    print oFN
    print 'Total Number Targets for my run:', totalRun
    print 'Total Number Targets for Simulations:', totalSim
    print 'SNR', float(totalRun) / float(totalSim)
    print 'Total oRNA:', n, 'Total oRNA w/ SNR > 2:', highSNR
    print '\n'
Пример #22
0
def updateFiltered(oFN):

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load([
        'filteredTargets', 'endContigLength', 'totalContigLength', 'entropy',
        'sequenceDuplicate', 'passedFilter'
    ])

    for oID in oNX.passedFilter:

        oNX.passedFilter[oID] = False

        if len(oNX.filteredTargets[oID]) == 0:
            continue

        if oNX.endContigLength[oID] > 6:
            continue

        if oNX.totalContigLength[oID] > 6:
            continue

        if oNX.entropy[oID] < 1.2:
            continue

        if oNX.sequenceDuplicate[oID]:
            continue

        #if it passed, update
        oNX.passedFilter[oID] = True

    oNX.save()
Пример #23
0
def correlationSNR(oFN):

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['phastScores', 'snrSS'])

    snrX = []
    scoreY = []
    for oID in oNX.phastScores:

        snr = oNX.snrSS[oID]
        avgScore = sum(oNX.phastScores[oID]) / float(len(oNX.phastScores[oID]))

        snrX.append(snr)
        scoreY.append(avgScore)
        '''                
                for pScore in oNX.phastScores[oID]:
                        snrX.append(snr)
                        scoreY.append(pScore)
                '''

    conserved = [True if x > .8 else False for x in scoreY]
    print conserved.count(True)
    print len(conserved)
    plt.title('SNR vs PhastCons Score')

    plt.ylabel('Avg PhastCons Score of oRNA')

    plt.xlabel('SNR')

    plt.plot(snrX, scoreY, 'ro')
    plt.show()
Пример #24
0
def oRNAContextPie(oFN, imgName):
    '''REMEMBER!!! Have to do with grouped results...'''
    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['context', 'snrSS'])

    context_count = {}

    for oID in oNX.context:

        if oNX.snrSS[oID] < 2.00: continue
        con = oNX.context[oID]
        print con
        context_count[con] = context_count.get(con, 0) + 1

    labels = sorted(context_count.keys())
    counts = [context_count[x] for x in labels]
    fracs = pieFractions(counts)

    #add numbers to labels
    labels = ['%s (%s)' % (x, context_count[x]) for x in labels]

    #plot
    plt.title('Context of oRNA (results > 2.00 SNR)')
    plt.pie(fracs, labels=labels, shadow=True)
    plt.savefig(imgName, bbox_inches='tight', pad_inches=1)
Пример #25
0
def updateEndContig(oFN, rn = None, tn = None):

        oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
        oNX.load(['sequence', 'endContigLength'], [rn, tn])
        
        for oID in oNX.sequence:
                seq = oNX.sequence[oID]

                #5'
                cLength5 = 1
                for i,letter in enumerate(seq):
                        if i == 0: continue

                        if seq[i] == seq[i-1]:
                                cLength5 += 1
                        else:
                                break
                #3'
                cLength = 1
                revSeq = [x for x in reversed(seq)]
                for i,letter in enumerate(revSeq):
                        if i == 0: continue

                        if revSeq[i] == revSeq[i-1]:
                                cLength += 1
                        else:
                                break

                highest = cLength5
                if cLength > cLength5:
                        highest = cLength

                oNX.endContigLength[oID] = highest                        
               
        oNX.save()
Пример #26
0
def loadSeqs(seqFN):

    seqNX = cgNexusFlat.Nexus(seqFN, Seq)
    seqNX.load(['length', 'sequence'])

    print seqNX.length[100000], seqNX.sequence[100000]
    print 'done loading'
def markMismatchedPairs(aFN, rn=None, tn=None):

    #make mismatchDict
    aNX = cgNexusFlat.Nexus(aFN, cgAlignmentFlat.cgAlignment)
    aNX.load(['mismatchStatus', 'mismatchPositions'], [rn, tn])

    for aID in aNX.mismatchStatus:

        aNX.mismatchStatus[aID] = [False, False, False]
        lowRange = range(9, 13)
        midRange = range(8, 14)
        highRange = range(7, 15)

        #check mismatches
        for i in lowRange:
            if i in aNX.mismatchPositions[aID]:
                aNX.mismatchStatus[aID][0] = True
                break

        for i in midRange:
            if i in aNX.mismatchPositions[aID]:
                aNX.mismatchStatus[aID][1] = True
                break

        for i in highRange:
            if i in aNX.mismatchPositions:
                aNX.mismatchStatus[aID][2] = True
                break

    aNX.save()
def updateSharedTargets(oFN, rn=None, tn=None):
    '''Just because there are duplicate sequences does not mean that
        the genomic position of each results is the correct one.  The 
        targets for each genomic position should be the same as the targets
        for each duplicate sequence

        make set of targets for each oID --> set each oid's targets'''

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['sequence', 'filteredTargets'], [rn, tn])

    knownSeq_targets = {}

    #create oID groups and target sets.
    for oID in oNX.sequence:
        currSeq = oNX.sequence[oID]

        #add targets to set
        for tID in oNX.filteredTargets[oID]:
            knownSeq_targets.setdefault(currSeq, set()).add(tID)

    for oID in oNX.sequence:

        currSeq = oNX.sequence[oID]

        newTargets = list(knownSeq_targets.get(currSeq, set()))
        oNX.filteredTargets[oID] = newTargets

    oNX.save()
Пример #29
0
def oRNATypePie(oFN, imgName):

    oNX = cgNexusFlat.Nexus(oFN, cgOriginRNAFlat.OriginRNA)
    oNX.load(['snrSS', 'transcriptType', 'transcriptTypes'], [rn, tn])

    context_count = {}

    for oID in oNX.transcriptType:

        #con = oNX.transcriptType[oID]
        if oNX.snrSS[oID] < 2.00: continue
        cons = oNX.transcriptTypes[oID]
        for con in cons:
            context_count[con] = context_count.get(con, 0) + 1
        #context_count[con] = context_count.get(con, 0) + 1

    labels = sorted(context_count.keys())
    counts = [context_count[x] for x in labels]
    fracs = pieFractions(counts)

    #add numbers to labels
    labels = ['%s (%s)' % (x, context_count[x]) for x in labels]

    #plot
    plt.pie(fracs, labels=labels, shadow=True)
    plt.savefig(imgName, bbox_inches='tight', pad_inches=1)
Пример #30
0
def updateContext(oFN, wigDir, chrom, strand, rn=None, tn=None):

    oNX = cgNexusFlat.Nexus(oFN, degPeak.degPeak)
    oNX.load(['context', 'tcc'], [rn, tn])

    print 'loading wig'
    coord_contexts = cgWig.loadSingleWigContext(wigDir, chrom, strand,
                                                'context')
    print 'done loading'

    ds = bioLibCG.dominantSpotter([
        'C_EXON', 'C_3UTR', 'C_5UTR', 'NC_EXON', 'NC_3UTR', 'NC_5UTR',
        'C_INTRON', 'NC_INTRON', 'INTER'
    ])

    for oID in oNX.tcc:

        oChrom, oStrand, start, end = bioLibCG.tccSplit(oNX.tcc[oID])

        #deg wigs is AS to actual clipping site
        if oStrand == '1':
            oStrand = '-1'
        else:
            oStrand = '1'

        if oChrom == chrom and oStrand == strand:

            contexts = coord_contexts.get(start, 'INTER').split(',')
            oNX.context[oID] = ds.spotItem(contexts)

    oNX.save()