def __findTs(edges,ciss,proms): ''' findTs will give the transactions lists indexed by the TF name proms is the transformed dictionary of promoter Parameters ----------- edges Returns ---------- result: the dict with ''' result=dict() nkey='' for key in edges.keys(): cis_lst=edges[key] prom_set=set() filt=[] for cis in cis_lst: if ciss[cis].promname in filt: continue filt.append(ciss[cis].promname) nkey=key+str(ciss[cis].slen) dictInsert(result,nkey,proms[ciss[cis].promname]) return result
def getEntropyWeights(e,ciss): ''' getEntropyWeights calculates the similarity based on partial edges e Retruns ----------- weights: a dictionary of weight associated with each cis-element {2019:[0.1,0.4,0.9...]} ''' weights=dict.fromkeys(ciss.keys()) groups=dict.fromkeys(e.keys()) for key in ciss.keys(): weights[key]=[1.0]*ciss[key].slen for key in groups.keys(): all_cis=e[key] if len(all_cis)==0: continue groups[key]=dict() for cis in all_cis: cc=ciss[cis] dictInsert(groups[key],cc.slen,cis) for key in groups.keys(): for l in groups[key].keys(): pwm=PWM() for cisId in groups[key][l]: elem=ciss[cisId] pwm.addSeq(elem.seq) tmp_weights=pwm.getEntropyWeights() for cisId in groups[key][l]: weights[cisId]=tmp_weights return weights
def listToDict(lst): """ restore the dictionary from list of tuples. See ref of dictToList() """ result = dict() for (x, y) in lst: dictInsert(result, x, y) return result
def summarizePtns(pPtns, nPtns): """ summarizePtns will summarize the patterns into sccinct rules """ # the edges are going from super set to subset ppatterns = dict() # the edges are going from subset to supber set npatterns = dict() for key in pPtns: sbNet = dict() spNet = dict() A = pPtns[key] B = nPtns[key] C = list(set(A + B)) tempP = [] tempN = [] for i in C: for j in C: if i == j: continue if i.issubset(j): dictInsert(sbNet, i, j) elif i.issuperset(j): dictInsert(spNet, i, j) for x in A: if x not in sbNet: tempP.append(x) continue plst = sbNet[x] print "#####", x, plst if all([y in B for y in plst]): tempP.append(x) for x in B: if x not in spNet: tempN.append(x) continue plst = spNet[x] if all([y in A for y in plst]): tempN.append(x) ppatterns[key] = tempP npatterns[key] = tempN return (ppatterns, npatterns)
def getCis(path): """ getCis takes 1 argument as the file path to 'BindingSiteSet.txt' and returns the result as a tulple of two dictionaries 1.Dictionary edges: key=TF name, data=key of attrs 2.Dictionary attrs: key=key of attrs (row #) data= tuple of detailed attributes 3.Dicionary promoters: key=gene name, data= key of attrs """ edges=dict() attrs=dict() proms=dict() result=(edges,attrs,proms) """ cnames=[ 'TFId','TFName','TFBSID','lend' 'rend','strand','interactionID' 'TU','type','promoter','pos' 'BSSeq','evidence' ] """ eindx=0 with open(path,'rb') as file: for line in file: if line.startswith('#'): continue tokens=line.split('\t') aseq=tokens[-3] attrCleaner(tokens) if len(tokens[-3])<=0: continue attrs[eindx]=Cis(tokens[0],tokens[2],\ tokens[1],tokens[9],\ tokens[3],tokens[4],\ tokens[10],tokens[11],\ tokens[8],tokens[13],\ tokens[5],aseq) dictInsert(edges,tokens[1],eindx) dictInsert(proms,tokens[9],eindx) eindx+=1 file.close() return result
def constPatternReal(ciss,proms,degree=2): ''' constPatternReal creates groups of patterns based on their real labelings ''' result=dict() temp=constPattern(ciss,proms,degree) for key in temp.keys(): for p in temp[key]: name='' existing=[] for cis in p.ciss: cKey=cis.tfname if not cKey in existing: existing.append(cKey) name+=cKey if len(existing)<degree: continue dictInsert(result,name,p) return result
def constPattern(ciss,proms,degree=2): ''' constPattern create new data points that are tuples of cis elements in order ''' result=dict() for key in proms.keys(): prom=proms[key] if len(prom)<degree: continue clst=[] for c in prom: clst.append(ciss[c]) clst.sort(key=lambda x: float(x.lend)) patterns=__getSubset(clst,degree) for elem in patterns: dictInsert(result,key,Pattern(elem,key)) return result
def evalCisCollection(rules,weighted=False): ''' evalCisCollection stores the (length,support,confidence) in the dictionary list ''' results=dict() max_len=0 maks=dict() for key in rules.keys(): if len(rules[key])==0: continue max_len=0 for elem in rules[key]: if len(elem[0])>1: continue if not elem[0].issuperset({key}): continue dictInsert(results,key,\ (len(elem[1]),elem[2],elem[3])) if len(elem[1])>max_len: maks[key]=(elem[1],elem[2],elem[3]) max_len=len(elem[1]) perfs=dict() for key in results.keys(): for elem in results[key]: l=elem[0] if weighted: for i in range(elem[1]): dictInsert(perfs,l,elem[2]) else: dictInsert(perfs,l,elem[2]) return (perfs,maks)
netpath='netdata.p' (edges,ciss,proms)=exp.getPickle(netpath) #sims=exp.getPickle('dmSimsL.p') patterns=exp.constPatternReal(ciss,proms,degree=4) sorted_sums=vs.sortSum(vs.summ(patterns,4,oU=True)) groups=exp.__regroup(ciss) max_group=dict() unique=dict() for key in groups.keys(): if key in unique.keys(): if ciss[key].seq in unique[key]: continue dictInsert(unique,groups[key],ciss[key].seq) dictInsert(max_group,groups[key],key) max_len=0 result=0 for key in max_group.keys(): temp=len(max_group[key]) if temp>max_len: max_len=temp result=key