示例#1
0
    def computeParametersU(self, P, PD, k1, k2):
        """
        Utility function to compute paramters for a potential candidate constraint pair when Input graph is undirected

        Parameters
        ----------
        P : Pattern
            Input patter by merging two constraints
        PD : PDClass
            Background Distribution
        k1 : int
            identifier of first constraint
        k2 : int
            identifier of second constraint
        """
        Params = dict()
        Params['Pat'] = P
        nlambda = PD.updateDistribution( Params['Pat'].G, idx=None, val_return='return', case=3, dropLidx=[k1, k2] ) #// TODO: handle this issue, code it !!!!!!!!
        Params['codeLengthC'] = getCodeLengthParallel( Params['Pat'].G, PD, gtype=self.gtype, case=2, NL=Params['Pat'].NL, isSimple=self.isSimple )
        Params['codeLengthCprime'] = getCodeLengthParallel( Params['Pat'].G, PD, gtype=self.gtype, case=5, NL=Params['Pat'].NL, isSimple=self.isSimple, dropLidx=[k1, k2], nlambda=nlambda )
        Params['Pat'].setIC_dssg( Params['codeLengthC'] - Params['codeLengthCprime'] )
        Params['Pat'].setDL( computeDescriptionLength( dlmode=8, excActionType=False, l=6, gtype=self.gtype, W=Params['Pat'].NCount, kw=Params['Pat'].ECount, C=len(PD.lprevUpdate), kws=Params['Pat'].kws, isSimple=self.isSimple ) )
        Params['Pat'].setI( computeInterestingness( Params['Pat'].IC_dssg, Params['Pat'].DL, mode=self.imode) )
        if Params['Pat'].I > 0:
            Params['Pat'].setPrevOrder((int(k1),int(k2)))
            Params['Pat'].setPatType('merge')
            Params['Pat'].setLambda(nlambda)
            if int(k1) in self.curAdds and int(k2) in self.curAdds:
                raise Exception('ADD ADD MERGE EVALUATE HUA')
            self.Data[(k1,k2)] = Params
        return
示例#2
0
    def computeCodeLengthSplitU(self, G, PD, condition, Params, Lidx=None):
        """
        function to compute codelength, if input graph is undirected

        Parameters
        ----------
        G : Networkx Graph
            Input Graph
        PD : PDClass
            Background Distribution
        condition : int
            condition to compute codelength
            1: Codelength of initial pattern or a single component
            2: Condition 1 + intra-component codelength computation
            3: Condition 2 but with a new lambda for each component
        Params : dict
            value of prameters corresponding to the current split of the candidate constraint
        Lidx : int, optional
            identifier of the constarint which is evaluated and dropped in some cases, by default None

        Returns
        -------
        float
            commputed codelength
        """
        codelength = 0.0
        if condition == 1:
            codelength = getCodeLengthParallel( G, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, NL=Params['Pat'].NL )
            return codelength
        elif condition == 2:
            # intra-component codelength computation
            for k,v in Params['compos'].items():
                codelength += getCodeLengthParallel( G, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, NL=v.NL )
            # inter-component codelength computation, i.e., one list of the component and other list of all the rest of the node
            keys = Params['compos'].keys()
            for k1 in range(len(keys)-1):
                for k2 in range(k1+1, len(keys)):
                    codelength += getCodeLengthParallel( G, PD, gtype='D', case=4, isSimple=self.isSimple, inNL=Params['compos'][k1].NL, outNL=Params['compos'][k2].NL, dropLidx=[Lidx] )
            # compute for excluded nodes
            if len(Params['excludedNL']) > 0:
                codelength += getCodeLengthParallel( G, PD, gtype=self.gtype, case=4, isSimple=self.isSimple, NL=Params['excludedNL'], dropLidx=[Lidx] )
                for k,v in Params['compos'].items():
                    codelength += getCodeLengthParallel( G, PD, gtype='D', case=4, isSimple=self.isSimple, inNL=v.NL, outNL=Params['excludedNL'], dropLidx=[Lidx] )
        elif condition == 3:
            # intra-component codelength computation
            for k,v in Params['compos'].items():
                codelength += getCodeLengthParallel( G, PD, gtype=self.gtype, case=5, isSimple=self.isSimple, NL=v.NL, dropLidx=[Lidx], nlambda=v.la )
            # inter-component codelength computation, i.e., one list of the component and other list of all the rest of the node
            keys = Params['compos'].keys()
            for k1 in range(len(keys)-1):
                for k2 in range(k1+1, len(keys)):
                    codelength += getCodeLengthParallel( G, PD, gtype='D', case=4, isSimple=self.isSimple, inNL=Params['compos'][k1].NL, outNL=Params['compos'][k2].NL, dropLidx=[Lidx] )
            # compute for excluded nodes
            if len(Params['excludedNL']) > 0:
                codelength += getCodeLengthParallel( G, PD, gtype=self.gtype, case=4, isSimple=self.isSimple, NL=Params['excludedNL'], dropLidx=[Lidx] )
                for k,v in Params['compos'].items():
                    codelength += getCodeLengthParallel( G, PD, gtype='D', case=4, isSimple=self.isSimple, inNL=v.NL, outNL=Params['excludedNL'], dropLidx=[Lidx] )
        return codelength
示例#3
0
    def processAsU(self, G, PD, id):
        """
        Utility function for split action when the input graph is undirected.
        This function idenfies the final components from each possible candidate split and compute the corresponding measures.

        Parameters
        ----------
        G : Networkx Graph
            Input Graph
        PD : PDClass
            Background Distribution
        id : int
            identifier of a constraint to be evaluated
        """
        NL = PD.lprevUpdate[id][1]
        H = G.subgraph(NL)
        components = nx.connected_component_subgraphs(H, copy=True)
        fcomponents = dict()
        it = 0
        for comp in components:
            if comp.number_of_nodes() > self.minsize:
                # print('Comp:{}\t #Nodes:{}'.format(it, comp.number_of_nodes()))
                fcomponents[it] = comp
                it += 1
        # print(fcomponents)
        if len(fcomponents) > 1: #* If components are more than one then only we can split this pattern
            baseParams = dict()
            baseParams['Pat'] = Pattern(H)
            baseParams['NodesInc'] = 0
            compPats = dict()
            nodes_union = set()
            for k,v in fcomponents.items():
                compPats[k] = Pattern(v)
                baseParams['NodesInc'] += v.number_of_nodes()
                nodes_union = nodes_union.union(set(compPats[k].NL))
            baseParams['compos'] = compPats
            baseParams['excludedNL'] = list( set(baseParams['Pat'].NL) - nodes_union )
            baseParams['codeLengthC'] = getCodeLengthParallel( H, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, NL=baseParams['Pat'].NL )
            baseParams['codeLengthCprime'] = self.computeCodeLengthSplitU(G, PD, 2, baseParams, id) #// Todo : write code for this part
            baseParams['Pat'].setIC_dssg( baseParams['codeLengthC'] - baseParams['codeLengthCprime'] )
            baseParams['Pat'].setDL( computeDescriptionLength( dlmode=7, C=len(PD.lprevUpdate), gtype=self.gtype, WS=baseParams['Pat'].NCount, compos=baseParams['compos'], isSimple=self.isSimple ) )
            baseParams['Pat'].setI( computeInterestingness( baseParams['Pat'].IC_dssg, baseParams['Pat'].DL, mode=2 ) )
            baseParams['Pat'].setPatType('split')
            baseParams['Pat'].setPrevOrder(id)
            # print(baseParams)
            #now try reducing each component
            FinalParams = baseParams
            for k in baseParams['compos'].keys():
                FinalParams = self.getReducedComponentU(G, PD, FinalParams, id, k)

            #compute new lambdas for each new pattern/component
            for k,v in FinalParams['compos'].items():
                v.setLambda( PD.updateDistribution( pat=v.G, idx=None, val_return='return', case=3, dropLidx=[id]) )
            FinalParams['codeLengthCprime'] = self.computeCodeLengthSplitU(G, PD, 3, FinalParams, id) #// Todo : write code for this part
            FinalParams['Pat'].setIC_dssg( FinalParams['codeLengthC'] - FinalParams['codeLengthCprime'] )
            FinalParams['Pat'].setDL( computeDescriptionLength( dlmode=7, C=len(PD.lprevUpdate), gtype=self.gtype, WS=FinalParams['Pat'].NCount, compos=FinalParams['compos'], excActionType=False, l=self.l, isSimple=self.isSimple ) )
            FinalParams['Pat'].setI( computeInterestingness( FinalParams['Pat'].IC_dssg, FinalParams['Pat'].DL, mode=2 ) )
            FinalParams['Pat'].setPatType('split')
            FinalParams['Pat'].setPrevOrder(id)
            # Now set these values to all component patterns
            for k,v in FinalParams['compos'].items():
                v.setIC_dssg( FinalParams['Pat'].IC_dssg )
                v.setDL( FinalParams['Pat'].DL )
                v.setI( FinalParams['Pat'].I )
                v.setPrevOrder(id)
                v.setPatType('split')
            self.Data[id] = FinalParams
        return self.Data
示例#4
0
    def processAsU(self, G, PD, id):
        """
        Utility function for shrink action when the input graph is undirected.
        This function idenfies the final subgraph from a possible candidate shrink and compute the corresponding measures.

        Parameters
        ----------
        G : Networkx Graph
            Input Graph
        PD : PDClass
            Background Distribution
        id : int
            identifier of a constraint to be evaluated
        """
        NL = PD.lprevUpdate[id][1]
        H = G.subgraph(NL)
        components = nx.connected_component_subgraphs(H, copy=True)
        fcomponents = dict()
        it = 0
        for comp in components:
            if comp.number_of_nodes() > self.minsize:
                fcomponents[it] = comp

        if len(
                fcomponents
        ) == 1:  # * if valid components is more than 1 than split shall be performed
            baseParams = dict()
            baseParams['Pat'] = Pattern(H)
            baseParams['codeLengthC'] = getCodeLengthParallel(
                H,
                PD,
                gtype=self.gtype,
                case=2,
                isSimple=self.isSimple,
                NL=baseParams['Pat'].NL)
            baseParams['codeLengthCprime'] = baseParams['codeLengthC']
            baseParams['Pat'].setIC_dssg(baseParams['codeLengthC'] -
                                         baseParams['codeLengthCprime'])
            baseParams['Pat'].setDL(
                computeDescriptionLength(dlmode=6,
                                         C=len(PD.lprevUpdate),
                                         gtype=self.gtype,
                                         WS=baseParams['Pat'].NCount,
                                         W=baseParams['Pat'].NCount,
                                         kw=baseParams['Pat'].ECount,
                                         isSimple=self.isSimple,
                                         kws=baseParams['Pat'].kws))
            baseParams['Pat'].setI(
                computeInterestingness(baseParams['Pat'].IC_dssg,
                                       baseParams['Pat'].DL,
                                       mode=self.imode))

            curPat = fcomponents[0]

            bestParams = None
            if curPat.number_of_nodes() < baseParams['Pat'].NCount:
                bestParams = dict()
                bestParams['Pat'] = Pattern(curPat)
                bestParams['codeLengthCprime'] = self.computeCodeLengthShrinkU(
                    G, PD, 2, baseParams, bestParams, id)
                bestParams['Pat'].setIC_dssg(baseParams['codeLengthC'] -
                                             bestParams['codeLengthCprime'])
                bestParams['Pat'].setDL(
                    computeDescriptionLength(dlmode=6,
                                             C=len(PD.lprevUpdate),
                                             gtype=self.gtype,
                                             WS=baseParams['Pat'].NCount,
                                             W=bestParams['Pat'].NCount,
                                             kw=bestParams['Pat'].ECount,
                                             isSimple=self.isSimple,
                                             kws=bestParams['Pat'].kws))
                bestParams['Pat'].setI(
                    computeInterestingness(bestParams['Pat'].IC_dssg,
                                           bestParams['Pat'].DL,
                                           mode=self.imode))
            else:
                bestParams = baseParams

            # * Now reduce the only component in fcomponents
            FinalParams = self.getReducedSubgraphU(G, PD, baseParams,
                                                   bestParams, id)
            FinalParams['SPat'] = FinalParams['Pat'].copy()
            FinalParams['Pat'] = baseParams['Pat'].copy()
            if bestParams['Pat'].I > FinalParams['SPat'].I:
                FinalParams['Pat'].setPrevOrder(id)
                FinalParams['Pat'].setPatType('shrink')
                FinalParams['SPat'].setPrevOrder(id)
                FinalParams['SPat'].setPatType('shrink')
                self.Data[id] = FinalParams
        return
示例#5
0
    def computeCodeLengthShrinkD(self,
                                 G,
                                 PD,
                                 condition,
                                 baseParams,
                                 curParams=None,
                                 Lidx=None,
                                 nlambda=None):
        """
        function to compute codelength, if input graph is directed

        Parameters
        ----------
        G : Networkx Graph
            Input Graph
        PD : PDClass
            Background Distribution
        condition : int
            condition to compute codelength
            1: Codelength of initial pattern or a single component
            2: Condition 1 + removing some nodes for codelength computation
            3: Condition 2 but with a new lambda for reduced pattern/constraint
        baseParams : dict
            value of prameters corresponding to the initial pattern before shrink
        curParams : dict, optional
            value of prameters corresponding to the initial pattern after shrink, by default None
        Lidx : int, optional
            identifier of the constarint which is evaluated and dropped in some cases, by default None
        nlambda : float, optional
            new lambda if condition is 3, by default None

        Returns
        -------
        float
            commputed codelength
        """
        codelength = 0.0
        if condition == 1:
            codelength = getCodeLengthParallel(G,
                                               PD,
                                               gtype=self.gtype,
                                               case=2,
                                               isSimple=self.isSimple,
                                               inNL=baseParams['Pat'].inNL,
                                               outNL=baseParams['Pat'].outNL)
            return codelength
        elif condition == 2:
            codelength += getCodeLengthParallel(G,
                                                PD,
                                                gtype=self.gtype,
                                                case=2,
                                                isSimple=self.isSimple,
                                                inNL=curParams['Pat'].inNL,
                                                outNL=curParams['Pat'].outNL)
            inNodesDropped = list(
                set(baseParams['Pat'].inNL) - set(curParams['Pat'].inNL))
            outNodesDropped = list(
                set(baseParams['Pat'].outNL) - set(curParams['Pat'].outNL)
            )  # * left here .. # Todo start code from here
            codelength += getCodeLengthParallel(G,
                                                PD,
                                                gtype=self.gtype,
                                                case=4,
                                                isSimple=self.isSimple,
                                                inNL=inNodesDropped,
                                                outNL=outNodesDropped,
                                                dropLidx=[Lidx])
            codelength += getCodeLengthParallel(G,
                                                PD,
                                                gtype=self.gtype,
                                                case=4,
                                                isSimple=self.isSimple,
                                                inNL=curParams['Pat'].inNL,
                                                outNL=outNodesDropped,
                                                dropLidx=[Lidx])
            codelength += getCodeLengthParallel(G,
                                                PD,
                                                gtype=self.gtype,
                                                case=4,
                                                isSimple=self.isSimple,
                                                inNL=inNodesDropped,
                                                outNL=curParams['Pat'].outNL,
                                                dropLidx=[Lidx])
        elif condition == 3:
            codelength += getCodeLengthParallel(G,
                                                PD,
                                                gtype=self.gtype,
                                                case=5,
                                                isSimple=self.isSimple,
                                                inNL=curParams['Pat'].inNL,
                                                outNL=curParams['Pat'].outNL,
                                                dropLidx=[Lidx],
                                                nlambda=nlambda)
            inNodesDropped = list(
                set(baseParams['Pat'].inNL) - set(curParams['Pat'].inNL))
            outNodesDropped = list(
                set(baseParams['Pat'].outNL) - set(curParams['Pat'].outNL)
            )  # * left here .. # Todo start code from here
            codelength += getCodeLengthParallel(G,
                                                PD,
                                                gtype=self.gtype,
                                                case=4,
                                                isSimple=self.isSimple,
                                                inNL=inNodesDropped,
                                                outNL=outNodesDropped,
                                                dropLidx=[Lidx])
            codelength += getCodeLengthParallel(G,
                                                PD,
                                                gtype=self.gtype,
                                                case=4,
                                                isSimple=self.isSimple,
                                                inNL=curParams['Pat'].inNL,
                                                outNL=outNodesDropped,
                                                dropLidx=[Lidx])
            codelength += getCodeLengthParallel(G,
                                                PD,
                                                gtype=self.gtype,
                                                case=4,
                                                isSimple=self.isSimple,
                                                inNL=inNodesDropped,
                                                outNL=curParams['Pat'].outNL,
                                                dropLidx=[Lidx])
        return codelength
    def getBestOption(self, G, PD):
        """
        function to return the best candidate to add

        Parameters
        ----------
        G : networkx graph
            input graph
        PD : PDClass
            Input background distribution

        Returns
        -------
        dict
            dictionary containing a Pattern to add and correspoding prior and posterior codelengths
        """
        if len(self.Data) > 0:
            bestPattern = max(self.Data, key=lambda x: x.I)
            codeLengthC = None
            codeLengthCprime = None
            DL = None
            dlmode = 3
            if self.gtype == 'U':
                nlambda = PD.updateDistribution(bestPattern.G, None, 'return',
                                                2, None)
                codeLengthC = getCodeLengthParallel(G,
                                                    PD,
                                                    NL=bestPattern.NL,
                                                    case=2,
                                                    gtype=self.gtype,
                                                    isSimple=self.isSimple)
                codeLengthCprime = getCodeLengthParallel(
                    G,
                    PD,
                    NL=bestPattern.NL,
                    case=3,
                    gtype=self.gtype,
                    isSimple=self.isSimple,
                    nlambda=nlambda)
                DL = computeDescriptionLength(dlmode=dlmode,
                                              V=G.number_of_nodes(),
                                              W=bestPattern.NCount,
                                              kw=bestPattern.ECount,
                                              q=self.q,
                                              isSimple=self.isSimple,
                                              kws=bestPattern.kws,
                                              excActionType=False,
                                              l=self.l)
            else:
                nlambda = PD.updateDistribution(bestPattern.G, None, 'return',
                                                2, None)
                codeLengthC = getCodeLengthParallel(G,
                                                    PD,
                                                    NL=bestPattern.NL,
                                                    case=2,
                                                    gtype=self.gtype,
                                                    isSimple=self.isSimple)
                codeLengthCprime = getCodeLengthParallel(
                    G,
                    PD,
                    inNL=bestPattern.inNL,
                    outNL=bestPattern.outNL,
                    case=3,
                    gtype=self.gtype,
                    isSimple=self.isSimple,
                    nlambda=nlambda)
                DL = computeDescriptionLength(dlmode=dlmode,
                                              V=G.number_of_nodes(),
                                              WI=bestPattern.inNL,
                                              WO=bestPattern.outNL,
                                              kw=bestPattern.ECount,
                                              q=self.q,
                                              isSimple=self.isSimple,
                                              kws=bestPattern.kws,
                                              excActionType=False,
                                              l=self.l)
            IC_dssg = codeLengthC - codeLengthCprime
            bestPattern.setIC_dssg(IC_dssg)
            bestPattern.setDL(DL)
            bestPattern.setI(
                computeInterestingness(bestPattern.IC_dssg,
                                       bestPattern.DL,
                                       mode=self.imode))
            bestPattern.setPatType('add')
            Params = dict()
            Params['Pat'] = bestPattern
            Params['codeLengthC'] = codeLengthC
            Params['codeLengthCprime'] = codeLengthCprime
            return Params
        else:
            return None
    def evaluateConstraint(self, G, PD, id):
        """
        function to evaluate if a constraint is a feasible candidate for remove

        Parameters
        ----------
        G : Networkx Graph
            Input Graph
        PD : PDClass
            Background Distribution
        id : int
            identifier of a constraint to be evaluated
        """
        if self.gtype == 'U':
            Params = dict()
            NL = PD.lprevUpdate[id][1]
            H = G.subgraph(NL)
            Params['Pat'] = Pattern(H)
            Params['codeLengthC'] = getCodeLengthParallel(
                Params['Pat'].G,
                PD,
                NL=Params['Pat'].NL,
                case=2,
                isSimple=self.isSimple,
                gtype=self.gtype
            )  #now case is 1 as none of teh lambdas shall be removed
            Params['codeLengthCprime'] = getCodeLengthParallel(
                G,
                PD,
                NL=NL,
                case=4,
                dropLidx=[id],
                isSimple=self.isSimple,
                gtype=self.gtype
            )  #now case is 4 as one lambda is to be dropped to compute new codelength
            Params['Pat'].setIC_dssg(Params['codeLengthC'] -
                                     Params['codeLengthCprime'])
            Params['Pat'].setDL(
                computeDescriptionLength(dlmode=4,
                                         gtype=self.gtype,
                                         C=len(PD.lprevUpdate),
                                         l=self.l))
            Params['Pat'].setI(
                computeInterestingness(Params['Pat'].IC_dssg,
                                       Params['Pat'].DL,
                                       mode=self.imode))

            if Params['Pat'].I > 0:
                Params['Pat'].setPrevOrder(id)
                Params['Pat'].setPatType('remove')
                Params['Pat'].setLambda(PD.lprevUpdate[id][0])
                self.Data[id] = Params
        else:
            Params = dict()
            inNL = PD.lprevUpdate[id][1]
            outNL = PD.lprevUpdate[id][2]
            HD = getDirectedSubgraph(G, inNL, outNL, self.isSimple)
            Params['Pat'] = Pattern(HD)
            Params['codeLengthC'] = getCodeLengthParallel(
                G,
                PD,
                inNL=inNL,
                outNL=outNL,
                case=1,
                isSimple=self.isSimple,
                gtype=self.gtype
            )  #now case is 1 as none of teh lambdas shall be removed
            Params['codeLengthCprime'] = getCodeLengthParallel(
                G,
                PD,
                inNL=inNL,
                outNL=outNL,
                case=4,
                dropLidx=[id],
                isSimple=self.isSimple,
                gtype=self.gtype
            )  #now case is 4 as one lambda is to be dropped to compute new codelength
            Params['Pat'].setIC_dssg(Params['codeLengthC'] -
                                     Params['codeLengthCprime'])
            Params['Pat'].setDL(
                computeDescriptionLength(dlmode=4,
                                         gtype=self.gtype,
                                         C=len(PD.lprevUpdate),
                                         l=self.l,
                                         excActionType=False))
            Params['Pat'].setI(
                computeInterestingness(Params['Pat'].IC_dssg,
                                       Params['Pat'].DL,
                                       mode=self.imode))

            if Params['Pat'].I > 0:
                Params['Pat'].setPrevOrder(id)
                Params['Pat'].setPatType('remove')
                Params['Pat'].setLambda(PD.lprevUpdate[id][0])
                self.Data[id] = Params