def computeParametersU(self, P, PD, k1, k2): """ Utility function to compute paramters for a potential candidate constraint pair when Input graph is undirected Parameters ---------- P : Pattern Input patter by merging two constraints PD : PDClass Background Distribution k1 : int identifier of first constraint k2 : int identifier of second constraint """ Params = dict() Params['Pat'] = P nlambda = PD.updateDistribution( Params['Pat'].G, idx=None, val_return='return', case=3, dropLidx=[k1, k2] ) #// TODO: handle this issue, code it !!!!!!!! Params['codeLengthC'] = getCodeLengthParallel( Params['Pat'].G, PD, gtype=self.gtype, case=2, NL=Params['Pat'].NL, isSimple=self.isSimple ) Params['codeLengthCprime'] = getCodeLengthParallel( Params['Pat'].G, PD, gtype=self.gtype, case=5, NL=Params['Pat'].NL, isSimple=self.isSimple, dropLidx=[k1, k2], nlambda=nlambda ) Params['Pat'].setIC_dssg( Params['codeLengthC'] - Params['codeLengthCprime'] ) Params['Pat'].setDL( computeDescriptionLength( dlmode=8, excActionType=False, l=6, gtype=self.gtype, W=Params['Pat'].NCount, kw=Params['Pat'].ECount, C=len(PD.lprevUpdate), kws=Params['Pat'].kws, isSimple=self.isSimple ) ) Params['Pat'].setI( computeInterestingness( Params['Pat'].IC_dssg, Params['Pat'].DL, mode=self.imode) ) if Params['Pat'].I > 0: Params['Pat'].setPrevOrder((int(k1),int(k2))) Params['Pat'].setPatType('merge') Params['Pat'].setLambda(nlambda) if int(k1) in self.curAdds and int(k2) in self.curAdds: raise Exception('ADD ADD MERGE EVALUATE HUA') self.Data[(k1,k2)] = Params return
def computeCodeLengthSplitU(self, G, PD, condition, Params, Lidx=None): """ function to compute codelength, if input graph is undirected Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution condition : int condition to compute codelength 1: Codelength of initial pattern or a single component 2: Condition 1 + intra-component codelength computation 3: Condition 2 but with a new lambda for each component Params : dict value of prameters corresponding to the current split of the candidate constraint Lidx : int, optional identifier of the constarint which is evaluated and dropped in some cases, by default None Returns ------- float commputed codelength """ codelength = 0.0 if condition == 1: codelength = getCodeLengthParallel( G, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, NL=Params['Pat'].NL ) return codelength elif condition == 2: # intra-component codelength computation for k,v in Params['compos'].items(): codelength += getCodeLengthParallel( G, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, NL=v.NL ) # inter-component codelength computation, i.e., one list of the component and other list of all the rest of the node keys = Params['compos'].keys() for k1 in range(len(keys)-1): for k2 in range(k1+1, len(keys)): codelength += getCodeLengthParallel( G, PD, gtype='D', case=4, isSimple=self.isSimple, inNL=Params['compos'][k1].NL, outNL=Params['compos'][k2].NL, dropLidx=[Lidx] ) # compute for excluded nodes if len(Params['excludedNL']) > 0: codelength += getCodeLengthParallel( G, PD, gtype=self.gtype, case=4, isSimple=self.isSimple, NL=Params['excludedNL'], dropLidx=[Lidx] ) for k,v in Params['compos'].items(): codelength += getCodeLengthParallel( G, PD, gtype='D', case=4, isSimple=self.isSimple, inNL=v.NL, outNL=Params['excludedNL'], dropLidx=[Lidx] ) elif condition == 3: # intra-component codelength computation for k,v in Params['compos'].items(): codelength += getCodeLengthParallel( G, PD, gtype=self.gtype, case=5, isSimple=self.isSimple, NL=v.NL, dropLidx=[Lidx], nlambda=v.la ) # inter-component codelength computation, i.e., one list of the component and other list of all the rest of the node keys = Params['compos'].keys() for k1 in range(len(keys)-1): for k2 in range(k1+1, len(keys)): codelength += getCodeLengthParallel( G, PD, gtype='D', case=4, isSimple=self.isSimple, inNL=Params['compos'][k1].NL, outNL=Params['compos'][k2].NL, dropLidx=[Lidx] ) # compute for excluded nodes if len(Params['excludedNL']) > 0: codelength += getCodeLengthParallel( G, PD, gtype=self.gtype, case=4, isSimple=self.isSimple, NL=Params['excludedNL'], dropLidx=[Lidx] ) for k,v in Params['compos'].items(): codelength += getCodeLengthParallel( G, PD, gtype='D', case=4, isSimple=self.isSimple, inNL=v.NL, outNL=Params['excludedNL'], dropLidx=[Lidx] ) return codelength
def processAsU(self, G, PD, id): """ Utility function for split action when the input graph is undirected. This function idenfies the final components from each possible candidate split and compute the corresponding measures. Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution id : int identifier of a constraint to be evaluated """ NL = PD.lprevUpdate[id][1] H = G.subgraph(NL) components = nx.connected_component_subgraphs(H, copy=True) fcomponents = dict() it = 0 for comp in components: if comp.number_of_nodes() > self.minsize: # print('Comp:{}\t #Nodes:{}'.format(it, comp.number_of_nodes())) fcomponents[it] = comp it += 1 # print(fcomponents) if len(fcomponents) > 1: #* If components are more than one then only we can split this pattern baseParams = dict() baseParams['Pat'] = Pattern(H) baseParams['NodesInc'] = 0 compPats = dict() nodes_union = set() for k,v in fcomponents.items(): compPats[k] = Pattern(v) baseParams['NodesInc'] += v.number_of_nodes() nodes_union = nodes_union.union(set(compPats[k].NL)) baseParams['compos'] = compPats baseParams['excludedNL'] = list( set(baseParams['Pat'].NL) - nodes_union ) baseParams['codeLengthC'] = getCodeLengthParallel( H, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, NL=baseParams['Pat'].NL ) baseParams['codeLengthCprime'] = self.computeCodeLengthSplitU(G, PD, 2, baseParams, id) #// Todo : write code for this part baseParams['Pat'].setIC_dssg( baseParams['codeLengthC'] - baseParams['codeLengthCprime'] ) baseParams['Pat'].setDL( computeDescriptionLength( dlmode=7, C=len(PD.lprevUpdate), gtype=self.gtype, WS=baseParams['Pat'].NCount, compos=baseParams['compos'], isSimple=self.isSimple ) ) baseParams['Pat'].setI( computeInterestingness( baseParams['Pat'].IC_dssg, baseParams['Pat'].DL, mode=2 ) ) baseParams['Pat'].setPatType('split') baseParams['Pat'].setPrevOrder(id) # print(baseParams) #now try reducing each component FinalParams = baseParams for k in baseParams['compos'].keys(): FinalParams = self.getReducedComponentU(G, PD, FinalParams, id, k) #compute new lambdas for each new pattern/component for k,v in FinalParams['compos'].items(): v.setLambda( PD.updateDistribution( pat=v.G, idx=None, val_return='return', case=3, dropLidx=[id]) ) FinalParams['codeLengthCprime'] = self.computeCodeLengthSplitU(G, PD, 3, FinalParams, id) #// Todo : write code for this part FinalParams['Pat'].setIC_dssg( FinalParams['codeLengthC'] - FinalParams['codeLengthCprime'] ) FinalParams['Pat'].setDL( computeDescriptionLength( dlmode=7, C=len(PD.lprevUpdate), gtype=self.gtype, WS=FinalParams['Pat'].NCount, compos=FinalParams['compos'], excActionType=False, l=self.l, isSimple=self.isSimple ) ) FinalParams['Pat'].setI( computeInterestingness( FinalParams['Pat'].IC_dssg, FinalParams['Pat'].DL, mode=2 ) ) FinalParams['Pat'].setPatType('split') FinalParams['Pat'].setPrevOrder(id) # Now set these values to all component patterns for k,v in FinalParams['compos'].items(): v.setIC_dssg( FinalParams['Pat'].IC_dssg ) v.setDL( FinalParams['Pat'].DL ) v.setI( FinalParams['Pat'].I ) v.setPrevOrder(id) v.setPatType('split') self.Data[id] = FinalParams return self.Data
def processAsU(self, G, PD, id): """ Utility function for shrink action when the input graph is undirected. This function idenfies the final subgraph from a possible candidate shrink and compute the corresponding measures. Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution id : int identifier of a constraint to be evaluated """ NL = PD.lprevUpdate[id][1] H = G.subgraph(NL) components = nx.connected_component_subgraphs(H, copy=True) fcomponents = dict() it = 0 for comp in components: if comp.number_of_nodes() > self.minsize: fcomponents[it] = comp if len( fcomponents ) == 1: # * if valid components is more than 1 than split shall be performed baseParams = dict() baseParams['Pat'] = Pattern(H) baseParams['codeLengthC'] = getCodeLengthParallel( H, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, NL=baseParams['Pat'].NL) baseParams['codeLengthCprime'] = baseParams['codeLengthC'] baseParams['Pat'].setIC_dssg(baseParams['codeLengthC'] - baseParams['codeLengthCprime']) baseParams['Pat'].setDL( computeDescriptionLength(dlmode=6, C=len(PD.lprevUpdate), gtype=self.gtype, WS=baseParams['Pat'].NCount, W=baseParams['Pat'].NCount, kw=baseParams['Pat'].ECount, isSimple=self.isSimple, kws=baseParams['Pat'].kws)) baseParams['Pat'].setI( computeInterestingness(baseParams['Pat'].IC_dssg, baseParams['Pat'].DL, mode=self.imode)) curPat = fcomponents[0] bestParams = None if curPat.number_of_nodes() < baseParams['Pat'].NCount: bestParams = dict() bestParams['Pat'] = Pattern(curPat) bestParams['codeLengthCprime'] = self.computeCodeLengthShrinkU( G, PD, 2, baseParams, bestParams, id) bestParams['Pat'].setIC_dssg(baseParams['codeLengthC'] - bestParams['codeLengthCprime']) bestParams['Pat'].setDL( computeDescriptionLength(dlmode=6, C=len(PD.lprevUpdate), gtype=self.gtype, WS=baseParams['Pat'].NCount, W=bestParams['Pat'].NCount, kw=bestParams['Pat'].ECount, isSimple=self.isSimple, kws=bestParams['Pat'].kws)) bestParams['Pat'].setI( computeInterestingness(bestParams['Pat'].IC_dssg, bestParams['Pat'].DL, mode=self.imode)) else: bestParams = baseParams # * Now reduce the only component in fcomponents FinalParams = self.getReducedSubgraphU(G, PD, baseParams, bestParams, id) FinalParams['SPat'] = FinalParams['Pat'].copy() FinalParams['Pat'] = baseParams['Pat'].copy() if bestParams['Pat'].I > FinalParams['SPat'].I: FinalParams['Pat'].setPrevOrder(id) FinalParams['Pat'].setPatType('shrink') FinalParams['SPat'].setPrevOrder(id) FinalParams['SPat'].setPatType('shrink') self.Data[id] = FinalParams return
def computeCodeLengthShrinkD(self, G, PD, condition, baseParams, curParams=None, Lidx=None, nlambda=None): """ function to compute codelength, if input graph is directed Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution condition : int condition to compute codelength 1: Codelength of initial pattern or a single component 2: Condition 1 + removing some nodes for codelength computation 3: Condition 2 but with a new lambda for reduced pattern/constraint baseParams : dict value of prameters corresponding to the initial pattern before shrink curParams : dict, optional value of prameters corresponding to the initial pattern after shrink, by default None Lidx : int, optional identifier of the constarint which is evaluated and dropped in some cases, by default None nlambda : float, optional new lambda if condition is 3, by default None Returns ------- float commputed codelength """ codelength = 0.0 if condition == 1: codelength = getCodeLengthParallel(G, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, inNL=baseParams['Pat'].inNL, outNL=baseParams['Pat'].outNL) return codelength elif condition == 2: codelength += getCodeLengthParallel(G, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, inNL=curParams['Pat'].inNL, outNL=curParams['Pat'].outNL) inNodesDropped = list( set(baseParams['Pat'].inNL) - set(curParams['Pat'].inNL)) outNodesDropped = list( set(baseParams['Pat'].outNL) - set(curParams['Pat'].outNL) ) # * left here .. # Todo start code from here codelength += getCodeLengthParallel(G, PD, gtype=self.gtype, case=4, isSimple=self.isSimple, inNL=inNodesDropped, outNL=outNodesDropped, dropLidx=[Lidx]) codelength += getCodeLengthParallel(G, PD, gtype=self.gtype, case=4, isSimple=self.isSimple, inNL=curParams['Pat'].inNL, outNL=outNodesDropped, dropLidx=[Lidx]) codelength += getCodeLengthParallel(G, PD, gtype=self.gtype, case=4, isSimple=self.isSimple, inNL=inNodesDropped, outNL=curParams['Pat'].outNL, dropLidx=[Lidx]) elif condition == 3: codelength += getCodeLengthParallel(G, PD, gtype=self.gtype, case=5, isSimple=self.isSimple, inNL=curParams['Pat'].inNL, outNL=curParams['Pat'].outNL, dropLidx=[Lidx], nlambda=nlambda) inNodesDropped = list( set(baseParams['Pat'].inNL) - set(curParams['Pat'].inNL)) outNodesDropped = list( set(baseParams['Pat'].outNL) - set(curParams['Pat'].outNL) ) # * left here .. # Todo start code from here codelength += getCodeLengthParallel(G, PD, gtype=self.gtype, case=4, isSimple=self.isSimple, inNL=inNodesDropped, outNL=outNodesDropped, dropLidx=[Lidx]) codelength += getCodeLengthParallel(G, PD, gtype=self.gtype, case=4, isSimple=self.isSimple, inNL=curParams['Pat'].inNL, outNL=outNodesDropped, dropLidx=[Lidx]) codelength += getCodeLengthParallel(G, PD, gtype=self.gtype, case=4, isSimple=self.isSimple, inNL=inNodesDropped, outNL=curParams['Pat'].outNL, dropLidx=[Lidx]) return codelength
def getBestOption(self, G, PD): """ function to return the best candidate to add Parameters ---------- G : networkx graph input graph PD : PDClass Input background distribution Returns ------- dict dictionary containing a Pattern to add and correspoding prior and posterior codelengths """ if len(self.Data) > 0: bestPattern = max(self.Data, key=lambda x: x.I) codeLengthC = None codeLengthCprime = None DL = None dlmode = 3 if self.gtype == 'U': nlambda = PD.updateDistribution(bestPattern.G, None, 'return', 2, None) codeLengthC = getCodeLengthParallel(G, PD, NL=bestPattern.NL, case=2, gtype=self.gtype, isSimple=self.isSimple) codeLengthCprime = getCodeLengthParallel( G, PD, NL=bestPattern.NL, case=3, gtype=self.gtype, isSimple=self.isSimple, nlambda=nlambda) DL = computeDescriptionLength(dlmode=dlmode, V=G.number_of_nodes(), W=bestPattern.NCount, kw=bestPattern.ECount, q=self.q, isSimple=self.isSimple, kws=bestPattern.kws, excActionType=False, l=self.l) else: nlambda = PD.updateDistribution(bestPattern.G, None, 'return', 2, None) codeLengthC = getCodeLengthParallel(G, PD, NL=bestPattern.NL, case=2, gtype=self.gtype, isSimple=self.isSimple) codeLengthCprime = getCodeLengthParallel( G, PD, inNL=bestPattern.inNL, outNL=bestPattern.outNL, case=3, gtype=self.gtype, isSimple=self.isSimple, nlambda=nlambda) DL = computeDescriptionLength(dlmode=dlmode, V=G.number_of_nodes(), WI=bestPattern.inNL, WO=bestPattern.outNL, kw=bestPattern.ECount, q=self.q, isSimple=self.isSimple, kws=bestPattern.kws, excActionType=False, l=self.l) IC_dssg = codeLengthC - codeLengthCprime bestPattern.setIC_dssg(IC_dssg) bestPattern.setDL(DL) bestPattern.setI( computeInterestingness(bestPattern.IC_dssg, bestPattern.DL, mode=self.imode)) bestPattern.setPatType('add') Params = dict() Params['Pat'] = bestPattern Params['codeLengthC'] = codeLengthC Params['codeLengthCprime'] = codeLengthCprime return Params else: return None
def evaluateConstraint(self, G, PD, id): """ function to evaluate if a constraint is a feasible candidate for remove Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution id : int identifier of a constraint to be evaluated """ if self.gtype == 'U': Params = dict() NL = PD.lprevUpdate[id][1] H = G.subgraph(NL) Params['Pat'] = Pattern(H) Params['codeLengthC'] = getCodeLengthParallel( Params['Pat'].G, PD, NL=Params['Pat'].NL, case=2, isSimple=self.isSimple, gtype=self.gtype ) #now case is 1 as none of teh lambdas shall be removed Params['codeLengthCprime'] = getCodeLengthParallel( G, PD, NL=NL, case=4, dropLidx=[id], isSimple=self.isSimple, gtype=self.gtype ) #now case is 4 as one lambda is to be dropped to compute new codelength Params['Pat'].setIC_dssg(Params['codeLengthC'] - Params['codeLengthCprime']) Params['Pat'].setDL( computeDescriptionLength(dlmode=4, gtype=self.gtype, C=len(PD.lprevUpdate), l=self.l)) Params['Pat'].setI( computeInterestingness(Params['Pat'].IC_dssg, Params['Pat'].DL, mode=self.imode)) if Params['Pat'].I > 0: Params['Pat'].setPrevOrder(id) Params['Pat'].setPatType('remove') Params['Pat'].setLambda(PD.lprevUpdate[id][0]) self.Data[id] = Params else: Params = dict() inNL = PD.lprevUpdate[id][1] outNL = PD.lprevUpdate[id][2] HD = getDirectedSubgraph(G, inNL, outNL, self.isSimple) Params['Pat'] = Pattern(HD) Params['codeLengthC'] = getCodeLengthParallel( G, PD, inNL=inNL, outNL=outNL, case=1, isSimple=self.isSimple, gtype=self.gtype ) #now case is 1 as none of teh lambdas shall be removed Params['codeLengthCprime'] = getCodeLengthParallel( G, PD, inNL=inNL, outNL=outNL, case=4, dropLidx=[id], isSimple=self.isSimple, gtype=self.gtype ) #now case is 4 as one lambda is to be dropped to compute new codelength Params['Pat'].setIC_dssg(Params['codeLengthC'] - Params['codeLengthCprime']) Params['Pat'].setDL( computeDescriptionLength(dlmode=4, gtype=self.gtype, C=len(PD.lprevUpdate), l=self.l, excActionType=False)) Params['Pat'].setI( computeInterestingness(Params['Pat'].IC_dssg, Params['Pat'].DL, mode=self.imode)) if Params['Pat'].I > 0: Params['Pat'].setPrevOrder(id) Params['Pat'].setPatType('remove') Params['Pat'].setLambda(PD.lprevUpdate[id][0]) self.Data[id] = Params