def transcriptSetOverlap(aDir, AS): AS = bool(AS) geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv' allExons = cgGenes.createGeneSetFromFile(geneSetFN) #get degradome TCCS #note that you need to test the AS peaks, this is the location of the targetted transcript oRNA_DC = cgNexusFlat.dataController(aDir, cgOriginRNA.OriginRNA) id_oRNA = oRNA_DC.load() if AS == True: degTccs = [cg.convertToAS(x.tcc) for x in id_oRNA.values()] else: degTccs = [x.tcc for x in id_oRNA.values()] #find all overlapping exons/transcripts, then all results sequences that overlap exons overlappingExons = allExons.transcriptOverlaps(degTccs) #print len(overlappingExons), "num of overlapping exons" overlappingExonTccs = [x.tcc for x in overlappingExons] overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1) #write new file for obj in id_oRNA.values(): if AS: degTcc = cg.convertToAS(obj.tcc) else: degTcc = obj.tcc if degTcc in overlappingDegTccs: obj.transcriptOverlap = True else: obj.transcriptOverlap = False oRNA_DC.commit(id_oRNA)
def transcriptSetOverlapTargets(aDir): geneSetFN = '/home/chrisgre/dataSources/known/Human/geneSets/ensemblAllTranscripts.tsv' allExons = cgGenes.createGeneSetFromFile(geneSetFN) #get degradome TCCS #note that you need to test the AS peaks, this is the location of the targetted transcript aDC = cgNexusFlat.dataController(aDir, cgAlignment.cgAlignment) id_alignment = aDC.load() #create list of unique tccs. uniqTccs = [] for alignment in id_alignment.values(): chrom, strand, start, end = cg.tccSplit(alignment.tTcc) offset = alignment.tStart sLen = alignment.sLength if strand == '1': start = start - 19 + offset end = start + sLen else: end = end + 19 - offset start = end - sLen tcc = cg.makeTcc(chrom, strand, start, end) if tcc not in uniqTccs: uniqTccs.append(tcc) degTccs = [cg.convertToAS(x) for x in uniqTccs] #find all overlapping exons/transcripts, then all results sequences that overlap exons overlappingExons = allExons.transcriptOverlaps(degTccs) overlappingExonTccs = [x.tcc for x in overlappingExons] overlappingDegTccs = compare.compareTwoTcc(degTccs, overlappingExonTccs, 1) #update for obj in id_alignment.values(): chrom, strand, start, end = cg.tccSplit(alignment.tTcc) offset = alignment.tStart sLen = alignment.sLength if strand == '1': start = start - 19 + offset end = start + sLen else: end = end + 19 - offset start = end - sLen tcc = cg.makeTcc(chrom, strand, start, end) degTcc = cg.convertToAS(tcc) if degTcc in overlappingDegTccs: obj.transcriptOverlap = True else: obj.transcriptOverlap = False aDC.commit(id_alignment)
def updateMicroRNAOverlap(aDir, microFN): oRNA_DC = cgNexusFlat.dataController(aDir, cgOriginRNA.OriginRNA) id_oRNA = oRNA_DC.load() #Put micro and small coords into lists microCoords = [] smallCoords = [] f = open(microFN, 'r') microCoords = [x.strip() for x in f] f.close() smallCoords = [x.tcc for x in id_oRNA.values()] #overlap them smallOverlaps = compare.compareTwoTcc(microCoords, smallCoords, 2) #For each sRNA, save overlap value. for oRNA in id_oRNA.values(): oRNA.microOverlap = oRNA.tcc in smallOverlaps oRNA_DC.commit(id_oRNA)
def countForError(oDir, filteredFile): fList = [] f = open(filteredFile, 'r') for line in f: ls = line.strip().split('\t') fList.append(int(line.strip())) oID_numTargets = {} for i in range(0, 10): print i simDirRNA = '/home/chrisgre/scripts/simulations/simsk50Filtered/simulation.%s/oRNA' % i oDC = cgNexusFlat.dataController(simDirRNA, cgOriginRNA.OriginRNA) id_sRNA = oDC.load() groupTotal = 0 for id, sRNA in id_sRNA.items(): if not id in fList: continue groupTotal += len(sRNA.filteredTargets) print groupTotal
def countForError(oDir, filteredFile): fList = [] f = open(filteredFile, 'r') for line in f: ls = line.strip().split('\t') fList.append(int(line.strip())) oID_numTargets = {} for i in range(0,10): print i simDirRNA = '/home/chrisgre/scripts/simulations/simsk50Filtered/simulation.%s/oRNA' % i oDC = cgNexusFlat.dataController(simDirRNA, cgOriginRNA.OriginRNA) id_sRNA = oDC.load() groupTotal = 0 for id, sRNA in id_sRNA.items(): if not id in fList: continue groupTotal += len(sRNA.filteredTargets) print groupTotal