def sortClass2sBySpcsPair(line,genomeTokens): """ genomeTokens -> ['SpcA','SpcB','SpcC'] writes results to outFile. """ tokPairs = [tuple(sorted(x)) for x in xuniqueCombinations(genomeTokens,2)] tokPairs.sort() data = {tokPairs[0]:[], tokPairs[1]:[], tokPairs[2]:[]} genePairs = eval(line[-1]) for pair in genePairs: if (tokPairs[0][0] in ''.join(pair)) and (tokPairs[0][1] in ''.join(pair)): data[tokPairs[0]].append(pair) elif (tokPairs[1][0] in ''.join(pair)) and (tokPairs[1][1] in ''.join(pair)): data[tokPairs[1]].append(pair) elif (tokPairs[2][0] in ''.join(pair)) and (tokPairs[2][1] in ''.join(pair)): data[tokPairs[2]].append(pair) line[-1:-1] = [str(len(data[tokPairs[0]])),str(len(data[tokPairs[1]])),str(len(data[tokPairs[2]]))] line[-1] = data[tokPairs[0]]+data[tokPairs[1]]+data[tokPairs[2]] # Write the counts to each pair outFile.write('%s\t%s\n' % ('\t'.join(line[:-1]),line[-1]))
for line in miR: if line[1].startswith("allPassedSeedsFor_"): orthoType = int(line[1][-1]) data.orthoTypes.append(orthoType) data.numTot[orthoType] = len(getAGAP(line[5])) data.AGAPgenes[orthoType] = getAGAP(line[5]) #for i in miR: #if i[2].startswith('orthoType_%s' % (orthoType)): #if i[1]+'_fdr' in data: #data[i[1]+'_fdr'][orthoType] = i[4] mirDict[data.name] = data # ---- Work out combinations ---- mirCombos = sorted([sorted(x) for x in xuniqueCombinations(mirDict.keys(),2)]) l = len(mirCombos) outTmp = [] for mCombo in mirCombos: for i in range(2,4): if (mirDict[mCombo[0]].numTot[i] == None) or (mirDict[mCombo[1]].numTot[i] == None): pass else: cmbo = ':'.join(mCombo) clas = 'Class %s' % (i) eatot = '%s:%s' % (mirDict[mCombo[0]].numTot[i],mirDict[mCombo[1]].numTot[i]) c1Set = mirDict[mCombo[0]].AGAPgenes[i] c2Set = mirDict[mCombo[1]].AGAPgenes[i] inTot = '%s' % (len(c1Set.intersection(c2Set))) inter = '%s' % (sorted(list(c1Set.intersection(c2Set))))
def countHitsInOrthos4(self,genomeToken,returnGenes=True): """ Uses results of miRNA.tallyHits() and self.orthos to count how many genes the miRNA seed hits in at least one genome, in at least two orthologs, and in all three orthologs. If returnGenes: returns tuple of two dicts: matchDict(keys=seedType : vals=[None,genesWithMatch,genePairsWithMatch,geneTriplesWithMatch]) ctrlDict(keys=seedType : vals=[[],genesWithMatch_1,genePairsWithMatch_1,geneTriplesWithMatch_1], [],genesWithMatch_2,genePairsWithMatch_2,geneTriplesWithMatch_2], ...]) """ # make sure we have tallied the hits already. assert self.matchData and self.ctrlData, \ 'ERROR: It looks like we have not tallied the hits yet. Call miRNA.tallyHits() first.' if returnGenes: rGeneNames = {} rCtrlNames = {} for seedType in _seedModels: rGeneNames[seedType] = [None,[],[],[]] rCtrlNames[seedType] = JamesDefs.initList(len(self.matchVersions[seedType][1]),[None,[],[],[]]) # Initialize self.matchCounts/self.ctrlCounts for seedType in _seedModels: self.matchCounts[seedType] = [0,0,0,0] self.ctrlCounts[seedType] = [[0]*4 for i in range(len(self.matchVersions[seedType][1]))] # Cycle through self.orthos for orthoSet in self.orthos: assert len(orthoSet) == 3,\ 'ERROR: It seems len(%s) != 3.' # Query the matcheData and ctrlData for hits in orthoSet for seedType in _seedModels: genesInMatchD = 0 genesInCtrlD = [0]*len(self.matchVersions[seedType][1]) if returnGenes: geneNames = [] ctrlNames = JamesDefs.initList(len(self.matchVersions[seedType][1]),[]) # Count how many genes in each orthoSet were hit by the respective seedTypes for gene in orthoSet: if gene in self.matchData[seedType]: genesInMatchD += 1 if returnGenes: geneNames.append(gene) for i in range(len(self.ctrlData[seedType])): if gene in self.ctrlData[seedType][i]: genesInCtrlD[i] += 1 if returnGenes: ctrlNames[i].append(gene) # Update self.matchData based on how many hits the orthoSet got for seedType if genesInMatchD == 0: ##self.matchCounts[seedType][0] += 3 pass elif genesInMatchD == 1: if ''.join(geneNames).find(genomeToken) != -1: ##self.matchCounts[seedType][0] += 2 self.matchCounts[seedType][1] += 1 if returnGenes: rGeneNames[seedType][1].extend(geneNames) elif genesInMatchD == 2: if ''.join(geneNames).find(genomeToken) != -1: ##self.matchCounts[seedType][0] += 1 ##self.matchCounts[seedType][1] += 1 # self.matchCounts[seedType][1] += 2 ##self.matchCounts[seedType][2] += 1 if returnGenes: rGeneNames[seedType][1].extend([x for x in geneNames if x.find(genomeToken) != -1]) rGeneNames[seedType][2].append(tuple(sorted(geneNames))) elif genesInMatchD == 3: if ''.join(geneNames).find(genomeToken) != -1: ##self.matchCounts[seedType][1] += 1 # self.matchCounts[seedType][1] += 3 ##self.matchCounts[seedType][2] += 2 # self.matchCounts[seedType][2] += 3 ##self.matchCounts[seedType][3] += 1 if returnGenes: rGeneNames[seedType][1].extend([x for x in geneNames if x.find(genomeToken) != -1]) type2 = [tuple(sorted(x)) for x in xpermutations.xuniqueCombinations(geneNames,2) if ''.join(x).find(genomeToken) != -1] type3 = tuple(sorted(geneNames)) rGeneNames[seedType][2].extend(type2) rGeneNames[seedType][3].append(type3) # Update self.ctrlData based on how many hits the orthoSet got in each ctrl for seedType for i in range(len(self.ctrlData[seedType])): if genesInCtrlD[i] == 0: ##self.ctrlCounts[seedType][i][0] += 3 pass elif genesInCtrlD[i] == 1: if ''.join(ctrlNames[i]).find(genomeToken) != -1: ##self.ctrlCounts[seedType][i][1] += 1 if returnGenes: rCtrlNames[seedType][i][1].extend(ctrlNames[i]) elif genesInCtrlD[i] == 2: if ''.join(ctrlNames[i]).find(genomeToken) != -1: ##self.ctrlCounts[seedType][i][1] += 1 # self.ctrlCounts[seedType][i][1] += 2 ##self.ctrlCounts[seedType][i][2] += 1 if returnGenes: rCtrlNames[seedType][i][1].extend([x for x in ctrlNames[i] if x.find(genomeToken) != -1]) rCtrlNames[seedType][i][2].append(tuple(sorted(ctrlNames[i]))) elif genesInCtrlD[i] == 3: if ''.join(ctrlNames[i]).find(genomeToken) != -1: ##self.ctrlCounts[seedType][i][1] += 1 # self.ctrlCounts[seedType][i][1] += 3 ##self.ctrlCounts[seedType][i][2] += 2 ##self.ctrlCounts[seedType][i][3] += 1 if returnGenes: rCtrlNames[seedType][i][1].extend([x for x in ctrlNames[i] if x.find(genomeToken) != -1]) type2 = [tuple(sorted(x)) for x in xpermutations.xuniqueCombinations(ctrlNames[i],2) if ''.join(x).find(genomeToken) != -1] type3 = tuple(sorted(ctrlNames[i])) rCtrlNames[seedType][i][2].extend(type2) rCtrlNames[seedType][i][3].append(type3) if returnGenes: for i in range(1,4): assert len(rGeneNames[seedType][i]) == len(set(rGeneNames[seedType][i])),\ "ERROR: rGeneNames[%s] in miRNA(%s) has redundancy." % (i, self.name) if returnGenes: # store and return rGeneNames self.matchEvents = rGeneNames self.ctrlEvents = rCtrlNames return (rGeneNames,rCtrlNames)
def findBestPairAlignments(listOfMotifObjs, minoverlap=6, verbose=None): """ Takes: list of TAMO motif objects. Finds best pairwise alignments among list members, trying both orientations. Motifs in list are numbered by original index in results. Returns: 2D list of results for each combination of motifs with the matrix coords corresponding to motif index in original list (exp: dist of motif0 and motif4 == 2dList[0][4]; BUT 2dList[4][0] == None). Always put lower index first or you will get 'None'. Same index twice also gives 'None' Each value at the 2D coords contains a tuple: (alignOri,distScore,alignment,offset). alignOri = 1 == both motifs in original ori. alignOri = -1 == motif with higher index was revComped to get best score. verbose == True prints the scores, orientations and alignments for each motif pair. """ # rename listOfMotifObjs for brevity motifs = listOfMotifObjs # Initialize empty return-matrix rMat = [] for i in range(len(motifs)): rMat.append([None]*len(motifs)) # Create list of non-redundant index combos for comparing toCompare = [x for x in xpermutations.xuniqueCombinations(range(len(motifs)),2)] for i in range(len(toCompare)): alignOri = None distScore = None alignment = None offset = None minDiffOri = getMinDiffOri(motifs[toCompare[i][0]],motifs[toCompare[i][1]],minoverlap=minoverlap, getOffset=1) # If pos ori, then motif obj returned will be ref to original motifs[toCompare[i][1]] # else: newly constructed revComp is returned if motifs[toCompare[i][1]] is minDiffOri[0]: alignOri = 1 else: alignOri = -1 distScore = minDiffOri[1] alignment = alignPairWithOffSet(motifs[toCompare[i][0]], minDiffOri[0], minDiffOri[2]) offset = minDiffOri[2] # Assign tuple to matrix coords: rMat[toCompare[i][0]][toCompare[i][1]] = (alignOri, distScore, alignment, offset) # Write out the results if verbose if verbose: oString = '#MotifPair\tAlignOri\tAlignScore\tAlignment\n' for pair in toCompare: tmp = '%s:%s\t%s\t%.3G\t%s' \ % (str(pair[0])+'_'+motifs[pair[0]].oneletter, str(pair[1])+'_'+motifs[pair[1]].oneletter, rMat[pair[0]][pair[1]][0], rMat[pair[0]][pair[1]][1], rMat[pair[0]][pair[1]][2]) # Futz with formating to allow alignments to match when pasted in an exclFile tmp = tmp.split('\n') spc = ' '*3 add = spc.join(['\t']*tmp[0].count('\t')) tmp[1] = '%s%s%s\n' % (spc,add,tmp[1]) print '\n'.join(tmp) oString += '\n'.join(tmp) return rMat