Exemplo n.º 1
0
    def __init__(self,
                 formula,
                 solver='g3',
                 adapt=False,
                 exhaust=False,
                 minz=False,
                 trim=False,
                 verbose=0):
        """
            Constructor.
        """

        # verbosity level
        self.verbose = verbose

        # constructing a local copy of the formula
        self.formula = WCNFPlus()
        self.formula.hard = formula.hard[:]
        self.formula.wght = formula.wght[:]
        self.formula.topw = formula.topw
        self.formula.nv = formula.nv

        # top variable identifier
        self.topv = formula.nv

        # processing soft clauses
        self._process_soft(formula)
        self.formula.nv = self.topv

        # creating an unweighted copy
        unweighted = self.formula.copy()
        unweighted.wght = [1 for w in unweighted.wght]

        # enumerating disjoint MCSes (including unit-size MCSes)
        to_hit, self.units = self._disjoint(unweighted, solver, adapt, exhaust,
                                            minz, trim)

        if self.verbose > 2:
            print('c mcses: {0} unit, {1} disj'.format(
                len(self.units),
                len(to_hit) + len(self.units)))

        # hitting set enumerator
        self.hitman = Hitman(bootstrap_with=to_hit,
                             weights=self.weights,
                             solver=solver,
                             htype='sorted',
                             mxs_adapt=adapt,
                             mxs_exhaust=exhaust,
                             mxs_minz=minz,
                             mxs_trim=trim)

        # SAT oracle bootstrapped with the hard clauses; note that
        # clauses of the unit-size MCSes are enforced to be enabled
        self.oracle = Solver(name=solver,
                             bootstrap_with=unweighted.hard +
                             [[mcs] for mcs in self.units])
Exemplo n.º 2
0
    def prepare_formula(self):
        """
            Prepare a MaxSAT formula for rule enumeration.
        """

        # creating a formula
        self.formula = WCNFPlus()

        # formula's variables
        self.orig_vars = max(self.data.fvmap.opp.keys())
        self.formula.nv = self.orig_vars * 2

        # creating soft clauses and hard p-clauses
        # as well as a mapping between dual-rail variables and input variables
        self.drvmap = {}
        for v in range(1, self.orig_vars + 1):
            if v not in self.data.deleted:
                self.formula.soft.append([-v])
                self.formula.soft.append([-v - self.orig_vars])

                self.formula.hard.append([-v,
                                          -v - self.orig_vars])  # p clauses

                self.drvmap[v] = v
                self.drvmap[v + self.orig_vars] = -v

        self.formula.wght = [1 for cl in self.formula.soft]
        self.formula.topw = len(self.formula.soft) + 1

        # hard clauses, discrimination constraints
        self.discrimination()

        # hard clauses, coverage constraints
        self.coverage()

        if self.options.pdump:
            fname = 'rules.{0}@{1}.wcnf'.format(os.getpid(),
                                                socket.gethostname())
            self.formula.to_file(fname)

        if self.options.verb:
            print('c1 formula: {0}v, {1}c ({2}h+{3}s)'.format(
                self.formula.nv,
                len(self.formula.hard) + len(self.formula.soft),
                len(self.formula.hard), len(self.formula.soft)))
Exemplo n.º 3
0
 def __createWCNFWithMinimization(self, cnf):
     '''
     This function creates the WCNF object and solves it.
     Variables are weighted, it's a MaxSAT problem.
     :param cnf (list)
     :return:
     '''
     # thanks to pysat library
     self.__wcnf = WCNFPlus()
     self.__wcnf.extend(cnf)
     # most of the traces should be clustered
     self.__minimizingUnclusteredTraces()
     # minimizing BOOLEAN_VAR_COMMON_T variables
     self.__minimizingCommonTransitions()
     # minimizing BOOLEAN_VAR_diff_TRACE_CENTROIDS variables
     self.__minimizingDiff()
     #
     self.__maxTransitionsPerClusterAtMost(
         self.__vars.getFunction(BOOLEAN_VAR_K_CONTAINS_T))
     # RC2 is a MaxSAT algorithm
     solver = RC2(self.__wcnf, solver="mc")
     solver.compute()
     self.__endComputationTime = time.time()
     self.__model = solver.model
Exemplo n.º 4
0
        # reading standard CNF or WCNF
        if re.search('cnf(\.(gz|bz2|lzma|xz))?$', files[0]):
            if re.search('\.wcnf(\.(gz|bz2|lzma|xz))?$', files[0]):
                formula = WCNF(from_file=files[0])
            else:  # expecting '*.cnf'
                formula = CNF(from_file=files[0]).weighted()

            lsu = LSU(formula,
                      solver=solver,
                      pb_enc_type=pb_enc,
                      expect_interrupt=(timeout != None),
                      verbose=verbose)

        # reading WCNF+
        elif re.search('\.wcnf[p,+](\.(gz|bz2|lzma|xz))?$', files[0]):
            formula = WCNFPlus(from_file=files[0])
            lsu = LSUPlus(formula,
                          pb_enc_type=pb_enc,
                          expect_interrupt=(timeout != None),
                          verbose=verbose)

        # setting a timer if necessary
        if timeout is not None:
            if verbose > 1:
                print('c timeout: {0}'.format(timeout))

            timer = Timer(timeout, lambda s: s.interrupt(), [lsu])
            timer.start()

        if lsu.solve():
            if print_model:
Exemplo n.º 5
0
class Amstc:
    '''
    Alignment and Model Subnet-based Trace Clustering
    Creates clusters based on subnets
    '''
    def __init__(self,
                 pn,
                 m0,
                 mf,
                 traces_xes,
                 size_of_run,
                 max_d,
                 max_t,
                 nb_clusters,
                 silent_label="tau",
                 nbTraces=20):
        '''
        Initialization of the object that directly launches the clustering
        :param pn (Petrinet)
        :param m0 (Marking) : initial marking
        :param mf (Marking) : final marking
        :param traces_xes (Log) : data
        :param size_of_run (int) : maximal studied size of run
        :param max_d (int) : distance maximal of the traces to their subnet centroids
        :param max_t (int) : maximal number of transitions in a subnet centroid
        :param nb_clusters (int) : maximal number of cluster
        :param silent_label (string) : non-cost label transition
        '''
        self.__max_d = max_d
        self.__max_t = max_t
        self.__size_of_run = size_of_run
        self.__copy_net(pn, m0, mf)
        self.__nb_clusters = nb_clusters
        self.__silent_transititons = [
            t for t in self.__transitions
            if t.label is None or silent_label in t.label
        ]
        # add wait transitions that represents log and model move for alignment
        self.__addWaitTransitions(self.__pn, self.__mf)
        self.__start = time.time()
        self.__createSATformula(self.__pn, self.__m0, self.__mf, max_d, max_t,
                                traces_xes, nbTraces)

    def __copy_net(self, pn, m0, mf):
        self.__pn = deepcopy(pn)
        self.__transitions = list(self.__pn.transitions)
        self.__places = list(self.__pn.places)
        self.__arcs = list(self.__pn.arcs)
        self.__m0 = Marking()
        self.__mf = Marking()
        for p in self.__pn.places:
            for n in m0.keys():
                if n.name == p.name:
                    self.__m0[p] = 1
            for n in mf.keys():
                if n.name == p.name:
                    self.__mf[p] = 1

    def __addWaitTransitions(self, pn, mf):
        '''
        This function add 2 type of wait transitions :
            - log moves
            - model moves
        Those transitions cost.
        :param pn (Petrinet)
        :param mf (Marking)
        '''
        # creates the transitions with labels WAIT_LABEL_TRACE for log moves
        # and WAIT_LABEL_MODEL for model moves
        self.__wait_transition_trace = PetriNet.Transition(
            WAIT_LABEL_TRACE, WAIT_LABEL_TRACE)
        self.__wait_transition_model = PetriNet.Transition(
            WAIT_LABEL_MODEL, WAIT_LABEL_MODEL)
        final_places = [p for p in pn.places if p in mf]
        # WAIT_LABEL_MODEL is added in the Petrinet at the end of the
        # TODO I'm not sure here
        #petri.utils.add_arc_from_to(self.__wait_transition_model, final_places[0], pn)
        #petri.utils.add_arc_from_to(final_places[0],self.__wait_transition_model, pn)
        # add both transitions but notice that WAIT_LABEL_TRACE will be forbidden for the centroids
        self.__transitions.append(self.__wait_transition_trace)
        self.__transitions.append(self.__wait_transition_model)

    def __createSATformula(self, pn, m0, mf, max_d, max_t, traces_xes,
                           nbTraces):
        '''
        This function creates and solve the SAT formula of the clustering problem.
        :param pn (Petrinet)
        :param m0 (Marking)
        :param mf (Marking)
        :param max_d (int)
        :param max_t (int)
        :param traces_xes (Log)
        '''
        # this object creates variable numbers of the SAT formula
        self.__vars = VariablesGenerator()
        # formula version of data event log
        log_to_PN_w_formula, self.__traces = log_to_Petri_with_w(
            traces_xes,
            self.__transitions,
            self.__vars,
            self.__size_of_run,
            self.__wait_transition_trace,
            self.__wait_transition_model,
            label_l=BOOLEAN_VAR_TRACES_ACTIONS,
            max_nbTraces=nbTraces)
        # creates the boolean variables for the next formulas
        self.__createBooleanVariables()
        # formula of centroids
        centroidsFormulasList = self.__createCentroids(m0, mf)
        # formula that describes maximal distance
        diffTracesCentroids = self.__getDiffTracesCentroids(
            self.__vars.getFunction(BOOLEAN_VAR_CHI_TRANSITIONS),
            self.__vars.getFunction(BOOLEAN_VAR_DIFF_l),
            self.__vars.getFunction(BOOLEAN_VAR_DIFF_m),
            self.__vars.getFunction(BOOLEAN_VAR_TRACES_ACTIONS))

        # formula that create BOOLEAN_VAR_COMMON_T variables
        listOfCommonTransitions = self.__commonTransitions(
            self.__vars.getFunction(BOOLEAN_VAR_COMMON_T),
            self.__vars.getFunction(BOOLEAN_VAR_K_CONTAINS_T))
        # formula that describes that a trace belongs to at most one cluster
        aClusterMax = self.__tracesInAClusterOnly(
            self.__vars.getFunction(BOOLEAN_VAR_J_CLUSTERISED),
            self.__vars.getFunction(BOOLEAN_VAR_J_IN_K))
        # concat the formula
        full_formula = And([], [], log_to_PN_w_formula +
                           centroidsFormulasList + diffTracesCentroids +
                           listOfCommonTransitions + aClusterMax)
        # formula to cnf
        cnf = full_formula.operatorToCnf(self.__vars.iterator)

        # CNF is completed with minimisation and solved
        self.__createWCNFWithMinimization(cnf)

    def __createBooleanVariables(self):
        '''
        This function creates the boolean variables needed in this class.
        '''
        self.__vars.add(BOOLEAN_VAR_DIFF_m, [(0, len(self.__traces)),
                                             (1, self.__size_of_run + 1)])
        self.__vars.add(BOOLEAN_VAR_DIFF_l, [(0, len(self.__traces)),
                                             (1, self.__size_of_run + 1)])

        self.__vars.add(BOOLEAN_VAR_J_IN_K, [(0, len(self.__traces)),
                                             (0, self.__nb_clusters)])
        self.__vars.add(BOOLEAN_VAR_J_CLUSTERISED, [(0, len(self.__traces))])
        self.__vars.add(BOOLEAN_VAR_CHI_MARKINGS, [(0, len(self.__traces)),
                                                   (0, self.__size_of_run + 1),
                                                   (0, len(self.__places))])
        self.__vars.add(BOOLEAN_VAR_CHI_TRANSITIONS,
                        [(0, len(self.__traces)), (1, self.__size_of_run + 1),
                         (0, len(self.__transitions))])
        self.__vars.add(BOOLEAN_VAR_K_CONTAINS_T,
                        [(0, self.__nb_clusters),
                         (0, len(self.__transitions))])
        self.__vars.add(BOOLEAN_VAR_COMMON_T, [(0, self.__nb_clusters),
                                               (0, self.__nb_clusters),
                                               (0, len(self.__transitions))])

    def __createWCNFWithMinimization(self, cnf):
        '''
        This function creates the WCNF object and solves it.
        Variables are weighted, it's a MaxSAT problem.
        :param cnf (list)
        :return:
        '''
        # thanks to pysat library
        self.__wcnf = WCNFPlus()
        self.__wcnf.extend(cnf)
        # most of the traces should be clustered
        self.__minimizingUnclusteredTraces()
        # minimizing BOOLEAN_VAR_COMMON_T variables
        self.__minimizingCommonTransitions()
        # minimizing BOOLEAN_VAR_diff_TRACE_CENTROIDS variables
        self.__minimizingDiff()
        #
        self.__maxTransitionsPerClusterAtMost(
            self.__vars.getFunction(BOOLEAN_VAR_K_CONTAINS_T))
        # RC2 is a MaxSAT algorithm
        solver = RC2(self.__wcnf, solver="mc")
        solver.compute()
        self.__endComputationTime = time.time()
        self.__model = solver.model

    def __createCentroids(self, m0, mf):
        '''
        Create formula of the subnet centroids. There are one centroid per trace and transitions are affected to cluster.
        As the number of cluster is limited, centroid will naturally be joined : traces are clustered that way.
        Centroids finally are run of the model that allows alignments. When alignment are found, transitions go in
        clusters.
        Creates BOOLEAN_VAR_CHI_MARKINGS, BOOLEAN_VAR_CHI_TRANSITIONS, BOOLEAN_VAR_K_CONTAINS_T, BOOLEAN_VAR_J_IN_K
        boolean variables.
        This function has subfunctions because formula differ from normal petri nets (@see pnToFormula).
        :param m0 (Marking)
        '''
        def in_cluster_of_j(tr, c_kt, j, chi_jk, nb_clusters):
            '''
            Subfunction of __createCentroids. in_cluster_of_j function affects a transition to the cluster of the trace.
            :param tr (Transition)
            :param c_kt (function) : @see variablesGenerator.py, function c_kt(k,t) gets BOOLEAN_VAR_K_CONTAINS_T
            variables
            :param j (int) : index of the current trace
            :param chi_jk (function) : @see variablesGenerator.py, function chi_jk gets BOOLEAN_VAR_J_IN_K variables
            :param nb_clusters (int) : number of cluster
            :return:
            '''
            # BOOLEAN_VAR_J_IN_K => BOOLEAN_VAR_K_CONTAINS_T
            return And([], [], [
                Or([c_kt([k, tr])], [chi_jk([j, k])], [])
                for k in range(0, nb_clusters)
            ])

        def is_transition_centroid(j, places, tr, i, m_ip):
            '''
            This function verifies marking to fire transition of a centroid.
            :param j (int) : index of centroid
            :param places (list) : list of places
            :param tr (Transition) : transition that wants to fire
            :param i (int) : instant of firing
            :param m_ip (function) : @see @variablesGenerator.py function to get BOOLEAN_VAR_CHI_MARKINGS variables
            '''
            formulas = []
            prePlaces = [a.source for a in tr.in_arcs]
            postPlaces = [a.target for a in tr.out_arcs]
            # token game
            for p in places:
                if p in prePlaces and p in postPlaces:
                    formulas.append(
                        And([
                            m_ip([j, i, places.index(p)]),
                            m_ip([j, i - 1, places.index(p)])
                        ], [], []))
                elif p in prePlaces and p not in postPlaces:
                    formulas.append(
                        And([m_ip([j, i - 1, places.index(p)])],
                            [m_ip([j, i, places.index(p)])], []))
                elif p not in prePlaces and p in postPlaces:
                    formulas.append(
                        And([m_ip([j, i, places.index(p)])],
                            [m_ip([j, i - 1, places.index(p)])], []))
                elif p not in prePlaces and p not in postPlaces:
                    formulas.append(
                        Or([], [], [
                            And([
                                m_ip([j, i, places.index(p)]),
                                m_ip([j, i - 1, places.index(p)])
                            ], [], []),
                            And([], [
                                m_ip([j, i, places.index(p)]),
                                m_ip([j, i - 1, places.index(p)])
                            ], [])
                        ]))
            return And([], [], formulas)

        def is_action_centroid(j, places, transitions, i, m_jip, tau_jip, c_kt,
                               chi_jk, nb_clusters):
            '''
            @see pnToFormula.py, is_action_centroid says if transition is fired.
            :param j (int) : index of trace
            :param places (list) : places of the petri net, indexes are important
            :param transitions (list) : transitions of the petri net, indexes are important
            :param i (int) : instant in the run of the centroid
            :param m_jip (function) : @see variablesGenerator.py, function chi_jk gets BOOLEAN_VAR_CHI_MARKINGS variables
            :param tau_jip (function) : @see variablesGenerator.py, function chi_jk gets BOOLEAN_VAR_CHI_TRANSITIONS
             variables
            :param c_kt (function) : @see variablesGenerator.py, function chi_jk gets BOOLEAN_VAR_K_CONTAINS_T variables
            :param chi_jk (function) : @see variablesGenerator.py, function chi_jk gets BOOLEAN_VAR_J_IN_K variables
            :param nb_clusters (int) : number of cluster
            :return:
            '''
            # a transition fires and only one
            aTransitionPerInstant = [
                And([tau_jip([j, i, t])], [
                    tau_jip([j, i, t2])
                    for t2 in range(len(transitions)) if t != t2
                ], []) for t in range(len(transitions))
            ]
            formulas = [Or([], [], aTransitionPerInstant)]

            # runs is_transition for the fired transition
            indexOfTraceWait = transitions.index(self.__wait_transition_trace)
            for t in range(len(transitions)):
                # WAIT_TRANSITION_TRACE is forbidden for centroid
                if t == indexOfTraceWait:
                    formulas.append(And([], [tau_jip([j, i, t])], []))
                else:
                    formulas.append(
                        Or([], [tau_jip([j, i, t])], [
                            And([], [], [
                                is_transition_centroid(
                                    j, places, transitions[t], i, m_jip),
                                in_cluster_of_j(t, c_kt, j, chi_jk,
                                                nb_clusters)
                            ])
                        ]))
            return And([], [], formulas)

        def is_run_centroid(j, size_of_run, m0, mf, m_jip, tau_jip, c_kt,
                            chi_jk, nb_clusters, transitions, places):
            '''
            Initialization of centroids. There is a run per trace. This run represents alignment of the trace to the
            model. If trace is clusterised then transition of it run are containing in the centroid of its cluster.
            :param j (int) : index of trace
            :param size_of_run (int) : maximal size of the run, prefix
            :param m0 (Marking) : initial marking
            :param m_jip (function) : @see variablesGenerator.py, function chi_jk gets BOOLEAN_VAR_CHI_MARKINGS variables
            :param tau_jip (function) : @see variablesGenerator.py, function chi_jk gets BOOLEAN_VAR_CHI_TRANSITIONS
             variables
            :param c_kt (function) : @see variablesGenerator.py, function chi_jk gets BOOLEAN_VAR_K_CONTAINS_T variables
            :param chi_jk (function) : @see variablesGenerator.py, function chi_jk gets BOOLEAN_VAR_J_IN_K variables
            :param nb_clusters (int) : number of cluster
            :param transitions (list) : list of Transitions
            :param places (list) : list of Places
            :return:
            '''
            positives = [m_jip([j, 0, places.index(m)]) for m in m0]
            for m in mf:
                positives.append(m_jip([j, size_of_run, places.index(m)]))
            negatives = [
                m_jip([j, 0, places.index(m)]) for m in places if m not in m0
            ]
            formulas = [
                is_action_centroid(j, places, transitions, i, m_jip, tau_jip,
                                   c_kt, chi_jk, nb_clusters)
                for i in range(1, size_of_run + 1)
            ]
            run_of_pn = And(positives, negatives, formulas)
            return run_of_pn

        # .....................................................................................
        # here starts __createCentroids function
        centroidsFormulas = []
        for j in range(0, len(self.__traces)):
            centroidOfJ = is_run_centroid(
                j, self.__size_of_run, m0, mf,
                self.__vars.getFunction(BOOLEAN_VAR_CHI_MARKINGS),
                self.__vars.getFunction(BOOLEAN_VAR_CHI_TRANSITIONS),
                self.__vars.getFunction(BOOLEAN_VAR_K_CONTAINS_T),
                self.__vars.getFunction(BOOLEAN_VAR_J_IN_K),
                self.__nb_clusters, self.__transitions, self.__places)
            centroidIfClusterised = Or(
                [], [self.__vars.get(BOOLEAN_VAR_J_CLUSTERISED, [j])],
                [centroidOfJ])
            centroidsFormulas.append(centroidIfClusterised)
        return centroidsFormulas

    def __getDiffTracesCentroids(self, chi_jia, diffl_ji, diffm_ji,
                                 lambda_jia):
        '''
        This function that defines the difference between the traces and its centroid and calls
        __maxDiffTracesCentroids().
        :param chi_jia (function)
        :param diff_ji (function)
        :param lambda_jia (function)
        :return: list of formula
        '''
        formulas = []
        for j in range(0, len(self.__traces)):
            aDiffPerInstant = []
            for i in range(1, self.__size_of_run + 1):
                # for each instant, a transition is true and there is or not a diff
                for t in range(0, len(self.__transitions)):
                    # if silent transition : diffjit is false
                    if self.__transitions[t] in self.__silent_transititons:
                        indexOfWaitModel = self.__transitions.index(
                            self.__wait_transition_model)
                        indexOfWaitTrace = self.__transitions.index(
                            self.__wait_transition_trace)
                        diffjit = Or([
                            diffl_ji([j, i]),
                            lambda_jia([j, i, indexOfWaitModel]),
                            lambda_jia([j, i, indexOfWaitTrace])
                        ], [chi_jia([j, i, t])], [])
                    # chi_jia => lambda_jia or diff_ji
                    elif self.__transitions[t] == self.__wait_transition_model:
                        diffjit = Or([diffl_ji([j, i]),
                                      lambda_jia([j, i, t])],
                                     [chi_jia([j, i, t])], [])
                    elif self.__transitions[t] == self.__wait_transition_trace:
                        diffjit = Or([diffm_ji([j, i])], [chi_jia([j, i, t])],
                                     [])
                    else:
                        indexOfWaitTrace = self.__transitions.index(
                            self.__wait_transition_trace)
                        diffjit = Or([], [], [
                            Or([lambda_jia([j, i, t])], [chi_jia([j, i, t])], [
                                And([diffl_ji([j, i]),
                                     diffm_ji([j, i])], [], [])
                            ]),
                            And([
                                diffm_ji([j, i]),
                                lambda_jia([j, i, indexOfWaitTrace]),
                                chi_jia([j, i, t])
                            ], [], [])
                        ])
                    aDiffPerInstant.append(diffjit)
            diffPerJ = And([], [], aDiffPerInstant)
            formulas.append(diffPerJ)

        # then there is maximal number of diff :
        self.__maxDiffTracesCentroids(formulas)
        return formulas

    def __maxDiffTracesCentroids(self, formulas):
        '''
        This function uses self.__max_d that determines the maximal distance of the trace to its centroid.
        Idea of the threshold : there are at least N diff variables false per trace.
        :param formulas (list of formula to fill)
        :return: void
        '''
        # this function uses combinations of itertools to get all the combinations : this is better than parameter
        # at_most of pysat library
        list_to_size_of_run = list(range(1, (self.__size_of_run * 2) + 1))
        max_distance = (self.__size_of_run * 2) - self.__max_d
        # IDEA : there are at least max_distance number of false variables
        combinaisons_of_instants = list(
            itertools.combinations(list_to_size_of_run, max_distance))
        for j in range(0, len(self.__traces)):
            distFalseVariables = []
            for instants in combinaisons_of_instants:
                list_distances = []
                for i in instants:
                    if i <= self.__size_of_run:
                        list_distances.append(
                            self.__vars.get(BOOLEAN_VAR_DIFF_l, [j, i]))
                    else:
                        list_distances.append(
                            self.__vars.get(BOOLEAN_VAR_DIFF_m,
                                            [j, (i - self.__size_of_run)]))
                distFalseVariables.append(And([], list_distances, []))
            formulas.append(Or([], [], distFalseVariables))

    def __commonTransitions(self, common_kkt, ckt):
        '''
        When two clusters share a transition, BOOLEAN_VAR_COMMON_T are True.
        :paramc common_kkt (function)
        :param ckt (function)
        :return list of formula
        '''
        listOfCommunTransitionsFormulas = []
        for k1 in range(0, self.__nb_clusters):
            for k2 in range(k1 + 1, self.__nb_clusters):
                for t in range(len(self.__transitions)):
                    # (c_k1t and c_k2t) => common_k1k2t
                    haveATransitionInCommon = Or(
                        [common_kkt([k1, k2, t])],
                        [ckt([k1, t]), ckt([k2, t])], [])
                    listOfCommunTransitionsFormulas.append(
                        haveATransitionInCommon)
        return listOfCommunTransitionsFormulas

    def __maxTransitionsPerClusterAtMost(self, c_kt):
        '''
        This function uses self.__max_t that determines the maximal number of transition per centroid.
        :return: void
        '''
        for k1 in range(0, self.__nb_clusters):
            self.__wcnf.append([[
                c_kt([k1, t]) for t in range(0, len(self.__transitions))
                if self.__transitions[t] != self.__wait_transition_model
            ], self.__max_t],
                               is_atmost=True)

    def __tracesInAClusterOnly(self, inC_j, chi_jk):
        '''
        Verifies that j is in a unique cluster or any
        :param inC_j (function)
        :param chi_jk (function)
        '''
        formulas = []
        for j in range(0, len(self.__traces)):
            inCorNot = []
            for k1 in range(0, self.__nb_clusters):
                # if in k1 then not in other k
                inCorNot.append(
                    And([inC_j([j]), chi_jk([j, k1])], [
                        chi_jk([j, k])
                        for k in range(0, self.__nb_clusters) if k != k1
                    ], []))
            # if in any k, then not inC
            allKNot = [chi_jk([j, k]) for k in range(0, self.__nb_clusters)]
            allKNot.append(inC_j([j]))
            inCorNot.append(And([], allKNot, []))
            formulas.append(Or([], [], inCorNot))
        return formulas

    def __minimizingDiff(self):
        '''
        Fills WCNF formula with weights on BOOLEAN_VAR_DIFF variables
        '''
        for j in range(0, len(self.__traces)):
            for i in range(1, self.__size_of_run + 1):
                self.__wcnf.append(
                    [-1 * self.__vars.get(BOOLEAN_VAR_DIFF_l, [j, i])], 2)
                self.__wcnf.append(
                    [-1 * self.__vars.get(BOOLEAN_VAR_DIFF_m, [j, i])], 2)

    def __minimizingCommonTransitions(self):
        '''
        Fills WCNF formula with weights on BOOLEAN_VAR_COMMON_T variables
        '''
        for k1 in range(0, self.__nb_clusters):
            for transition in self.__transitions:
                t = self.__transitions.index(transition)
                for k2 in (k1 + 1, self.__nb_clusters):
                    self.__wcnf.append([
                        -1 * self.__vars.get(BOOLEAN_VAR_COMMON_T, [k1, k2, t])
                    ], 2)
                self.__wcnf.append(
                    [-1 * self.__vars.get(BOOLEAN_VAR_K_CONTAINS_T, [k1, t])],
                    1)

    def __minimizingUnclusteredTraces(self):
        '''
        Fills WCNF formula with weights on BOOLEAN_VAR_J_IN_K variables
        '''
        for j in range(0, len(self.__traces)):
            for k in range(0, len(self.__traces)):
                self.__wcnf.append(
                    [self.__vars.get(BOOLEAN_VAR_J_IN_K, [j, k])], 100)

    def getClustering(self):
        '''
        This function reads the result of the SAT problem. Very dirty function... sorry.
        From a Boolean solution of variables, find the informative ones and extract results.
        :return: a simple dictionary of list of letter and Petri nets (centroids)
        '''
        clusters = {}
        traces = {}
        trs = {}
        clusterized = []
        for var in self.__model:
            if self.__vars.getVarName(var) != None and self.__vars.getVarName(
                    var).startswith("diff"):
                print(self.__vars.getVarName(var))
            if self.__vars.getVarName(var) != None and self.__vars.getVarName(
                    var).startswith(BOOLEAN_VAR_K_CONTAINS_T):
                k = self.__vars.getVarName(var).split("[")[1].split(",")[0]
                t = self.__transitions[int(
                    self.__vars.getVarName(var).split("]")[0].split(",")[1])]
                if int(k) not in clusters.keys():
                    clusters[int(k)] = []
                clusters[int(k)].append(t)
            elif self.__vars.getVarName(
                    var) != None and self.__vars.getVarName(var).startswith(
                        BOOLEAN_VAR_J_IN_K):
                j = self.__vars.getVarName(var).split("[")[1].split(",")[0]
                clusterized.append(int(j))
                k = (self.__vars.getVarName(var).split("]")[0].split(",")[1])
                if int(k) not in traces.keys():
                    traces[int(k)] = []
                traces[int(k)].append(j)
            elif self.__vars.getVarName(
                    var) != None and self.__vars.getVarName(var).startswith(
                        BOOLEAN_VAR_TRACES_ACTIONS):
                j = self.__vars.getVarName(var).split("[")[1].split(",")[0]
                i = (self.__vars.getVarName(var).split("]")[0].split(",")[1])
                a = (self.__vars.getVarName(var).split("]")[0].split(",")[2])
                if int(j) not in trs.keys():
                    trs[int(j)] = []
                trs[int(j)].append(str(self.__transitions[int(a)]))

        clustering = []
        for i in clusters:
            pn_i = PetriNet()
            for a in self.__arcs:
                if type(a.source
                        ) is PetriNet.Transition and a.source in clusters[i]:
                    p_i = a.target
                    t_i = a.source
                    if t_i not in pn_i.transitions:
                        pn_i.transitions.add(t_i)
                    if p_i not in pn_i.places:
                        pn_i.places.add(p_i)
                    a = petri.petrinet.PetriNet.Arc(t_i, p_i, 1)
                    pn_i.arcs.add(a)
                elif type(a.target
                          ) is PetriNet.Transition and a.target in clusters[i]:
                    p_i = a.source
                    t_i = a.target
                    if t_i not in pn_i.transitions:
                        pn_i.transitions.add(t_i)
                    if p_i not in pn_i.places:
                        pn_i.places.add(p_i)
                    a = petri.petrinet.PetriNet.Arc(p_i, t_i, 1)
                    pn_i.arcs.add(a)
            pn_i_f = deepcopy(pn_i)
            m_i_0 = Marking()
            m_i_f = Marking()
            for p in pn_i_f.places:
                for n in self.__m0.keys():
                    if n.name == p.name:
                        m_i_0[p] = 1
                for n in self.__mf.keys():
                    if n.name == p.name:
                        m_i_f[p] = 1
            cluster = ((pn_i_f, m_i_0, m_i_f), [])
            if i in traces:
                for j in traces[i]:
                    cluster[1].append([a for a in trs[int(j)]])
            clustering.append(cluster)
        unclusterized = [
            t for (i, t) in enumerate(self.__traces) if i not in clusterized
        ]
        clustering.append(({"Unclusterized"}, unclusterized))
        return clustering

    def getTime(self):
        return self.__endComputationTime - self.__start
Exemplo n.º 6
0
class Ruler(object):
    """
        MaxSAT/MCS-based rule enumerator.
    """
    def __init__(self, clusters, target, data, options):
        """
            Constructor.
        """

        self.init_stime = resource.getrusage(resource.RUSAGE_SELF).ru_utime
        self.init_ctime = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime

        # sample clusters for each class
        self.clusters = clusters

        # target class
        self.target = target

        # saving data
        self.data = data

        # saving options
        self.options = options

        # create a MaxSAT formula for rule enumeration
        self.prepare_formula()

        # create and initialize primer
        self.init_solver()

    def prepare_formula(self):
        """
            Prepare a MaxSAT formula for rule enumeration.
        """

        # creating a formula
        self.formula = WCNFPlus()

        # formula's variables
        self.orig_vars = max(self.data.fvmap.opp.keys())
        self.formula.nv = self.orig_vars * 2

        # creating soft clauses and hard p-clauses
        # as well as a mapping between dual-rail variables and input variables
        self.drvmap = {}
        for v in range(1, self.orig_vars + 1):
            if v not in self.data.deleted:
                self.formula.soft.append([-v])
                self.formula.soft.append([-v - self.orig_vars])

                self.formula.hard.append([-v,
                                          -v - self.orig_vars])  # p clauses

                self.drvmap[v] = v
                self.drvmap[v + self.orig_vars] = -v

        self.formula.wght = [1 for cl in self.formula.soft]
        self.formula.topw = len(self.formula.soft) + 1

        # hard clauses, discrimination constraints
        self.discrimination()

        # hard clauses, coverage constraints
        self.coverage()

        if self.options.pdump:
            fname = 'rules.{0}@{1}.wcnf'.format(os.getpid(),
                                                socket.gethostname())
            self.formula.to_file(fname)

        if self.options.verb:
            print('c1 formula: {0}v, {1}c ({2}h+{3}s)'.format(
                self.formula.nv,
                len(self.formula.hard) + len(self.formula.soft),
                len(self.formula.hard), len(self.formula.soft)))

    def discrimination(self):
        """
            Add hard clauses enforcing the discrimination constraints,
            each rule discriminates all the instances of wrong classes.
        """

        ncls = len(self.formula.hard)

        for label, instances in self.clusters.items():
            if label != self.target:
                for i in instances:
                    cl = list(
                        map(lambda l: -l if l < 0 else l + self.orig_vars,
                            self.data.samps[i][:-1]))
                    self.formula.hard.append(cl)

        if self.options.verb:
            print('c1 discrimination constraints: {0}h'.format(
                len(self.formula.hard) - ncls))

    def coverage(self):
        """
            Add hard clauses enforcing the coverage constraints such that
            each rule covers at least one instance of the target class.
        """

        topv = self.formula.nv
        ncls = len(self.formula.hard)
        self.tvars = []  # auxiliary variables

        allv = []
        for v in range(1, self.data.fvars + 1):
            allv.append(v)
            allv.append(v + self.orig_vars)
        allv = set(allv)

        # traversing instances of the target class
        for i in self.clusters[self.target]:
            sample = self.data.samps[i]

            # magic to get the set of literals in the sample
            s = set([l if l > 0 else -l + self.orig_vars for l in sample[:-1]])

            # computing the complement of the sample
            compl = allv.difference(s)

            # encoding the complement (as a term) into a set of clauses
            if compl:
                topv += 1
                self.tvars.append(topv)

                compl = sorted(compl)
                for l in compl:
                    self.formula.hard.append([-l, -topv])

                self.formula.hard.append(compl + [topv])

        # add final clause forcing to cover at least one sample
        self.formula.hard.append(self.tvars[:])

        if self.options.plimit:
            self.nof_p = {t: 0 for t in self.tvars}

        if self.options.verb:
            print('c1 coverage constraints: {0}v+{1}h'.format(
                topv - self.formula.nv,
                len(self.formula.hard) - ncls))

        self.formula.nv = topv

    def init_solver(self):
        """
            Create an initialize a solver for rule enumeration.
        """

        # initializing rule enumerator
        if self.options.primer == 'lbx':
            self.mcsls = LBXPlus(self.formula,
                                 use_cld=self.options.use_cld,
                                 solver_name=self.options.solver,
                                 get_model=True,
                                 use_timer=False)
        elif self.options.primer == 'mcsls':
            self.mcsls = MCSlsPlus(self.formula,
                                   use_cld=self.options.use_cld,
                                   solver_name=self.options.solver,
                                   get_model=True,
                                   use_timer=False)
        else:  # sorted or maxsat
            MaxSAT = RC2Stratified if self.options.blo else RC2
            self.rc2 = MaxSAT(self.formula,
                              solver=self.options.solver,
                              adapt=self.options.am1,
                              exhaust=self.options.exhaust,
                              trim=self.options.trim,
                              minz=self.options.minz)

            # disabling soft clause hardening
            if type(self.rc2) == RC2Stratified:
                self.rc2.hard = True

    def enumerate(self):
        """
            Enumerate all the rules.
        """

        if self.options.primer in ('lbx', 'mcsls'):
            return self.enumerate_mcsls()
        else:  # sorted or maxsat
            return self.enumerate_sorted()

    def enumerate_mcsls(self):
        """
            MCS-based rule enumeration.
        """

        if self.options.verb:
            print('c1 enumerating rules (mcs-based)')

        self.rules = []

        for mcs in self.mcsls.enumerate():
            mod = self.mcsls.get_model()
            mcs = list(
                filter(lambda l: l > 0 and abs(l) <= 2 * self.orig_vars, mod))

            rule = self.process_mcs(mcs)

            # recording rule
            self.rules.append(rule)

            # block
            self.mcsls.add_clause([-l for l in mcs])

            if self.options.bsymm:
                # breaking symmetric solutions
                symmpr = sorted(set(self.tvars).difference(set(mod)))
                self.mcsls.add_clause(symmpr)

            # check if there are enough MCSes
            if self.options.plimit:
                model = self.mcsls.get_model()

                i, reduced = 0, False
                while i < len(self.tvars):
                    t = self.tvars[i]
                    if model[t - 1] > 0:
                        self.nof_p[t] += 1

                    if self.nof_p[t] < self.options.plimit:
                        i += 1
                    else:
                        self.tvars[i] = self.tvars[-1]
                        self.tvars.pop()
                        reduced = True

                if reduced:
                    self.mcsls.oracle.add_clause(self.tvars)

                    if not self.tvars:
                        break

        self.mcsls.delete()

        # recording time
        self.stime = resource.getrusage(
            resource.RUSAGE_SELF).ru_utime - self.init_stime
        self.ctime = resource.getrusage(
            resource.RUSAGE_CHILDREN).ru_utime - self.init_ctime
        self.time = self.stime + self.ctime

        return self.rules

    def enumerate_sorted(self):
        """
            MaxSAT-based rule enumeration.
        """

        if self.options.verb:
            print('c1 enumerating rules (maxsat-based)')

        self.rules = []
        self.mcses = []

        for mod in self.rc2.enumerate():
            mcs = list(
                filter(lambda l: l > 0 and abs(l) <= 2 * self.orig_vars, mod))

            # blocking the mcs properly
            self.rc2.add_clause([-l for l in mcs])

            # processing it
            rule = self.process_mcs(mcs)

            # recording the mcs for future blocking
            self.mcses.append(mcs)

            # recording rule
            self.rules.append(rule)

            if self.options.bsymm:
                # breaking symmetric solutions
                symmpr = sorted(set(self.tvars).difference(set(mod)))
                self.rc2.add_clause(symmpr)

            # check if there are enough MCSes
            if self.options.plimit:
                model = self.rc2.model

                i, reduced = 0, False
                while i < len(self.tvars):
                    t = self.tvars[i]
                    if model[t - 1] > 0:
                        self.nof_p[t] += 1

                    if self.nof_p[t] < self.options.plimit:
                        i += 1
                    else:
                        self.tvars[i] = self.tvars[-1]
                        self.tvars.pop()
                        reduced = True

                if reduced:
                    self.rc2.add_clause(self.tvars)

                    if not self.tvars:
                        break

        self.rc2.delete()

        # recording time
        self.stime = resource.getrusage(
            resource.RUSAGE_SELF).ru_utime - self.init_stime
        self.ctime = resource.getrusage(
            resource.RUSAGE_CHILDREN).ru_utime - self.init_ctime
        self.time = self.stime + self.ctime

        return self.rules

    def process_mcs(self, mcs):
        """
            Extract a rule from MCS.
        """

        # getting the corresponding variables
        rule = Rule(fvars=[self.drvmap[i] for i in mcs],
                    label=self.target,
                    mapping=self.data.fvmap)

        # printing rule
        if self.options.verb > 1:
            if self.options.verb > 2:
                print('c1 mcs: {0}'.format(' '.join([str(l) for l in mcs])))

        return rule
Exemplo n.º 7
0
class OptUx(object):
    """
        A simple Python version of the implicit hitting set based optimal MUS
        extractor and enumerator. Given a (weighted) (partial) CNF formula,
        i.e. formula in the :class:`.WCNF` format, this class can be used to
        compute a given number of optimal MUS (starting from the *best* one)
        of the input formula. :class:`OptUx` roughly follows the
        implementation of Forqes [1]_ but lacks a few additional heuristics,
        which however aren't applied in Forqes by default.

        As a result, OptUx applies exhaustive *disjoint* minimal correction
        subset (MCS) enumeration [1]_, [2]_, [3]_, [4]_ with the incremental
        use of RC2 [5]_ as an underlying MaxSAT solver. Once disjoint MCSes
        are enumerated, they are used to bootstrap a hitting set solver. This
        implementation uses :class:`.Hitman` as a hitting set solver, which is
        again based on RC2.

        Note that in the main implicit hitting enumeration loop of the
        algorithm, OptUx follows Forqes in that it does not reduce correction
        subsets detected to minimal correction subsets. As a result,
        correction subsets computed in the main loop are added to
        :class:`Hitman` *unreduced*.

        :class:`OptUx` can use any SAT solver available in PySAT. The default
        SAT solver to use is ``g3``, which stands for Glucose 3 [6]_ (see
        :class:`.SolverNames`). Boolean parameters ``adapt``, ``exhaust``, and
        ``minz`` control whether or not the underlying :class:`.RC2` oracles
        should apply detection and adaptation of intrinsic AtMost1
        constraints, core exhaustion, and core reduction. Also, unsatisfiable
        cores can be trimmed if the ``trim`` parameter is set to a non-zero
        integer. Finally, verbosity level can be set using the ``verbose``
        parameter.

        .. [5] Alexey Ignatiev, Antonio Morgado, Joao Marques-Silva. *RC2: an
            Efficient MaxSAT Solver*. J. Satisf. Boolean Model. Comput. 11(1).
            2019. pp. 53-64

        .. [6] Gilles Audemard, Jean-Marie Lagniez, Laurent Simon.
            *Improving Glucose for Incremental SAT Solving with
            Assumptions: Application to MUS Extraction*. SAT 2013.
            pp. 309-317

        :param formula: (weighted) (partial) CNF formula
        :param solver: SAT oracle name
        :param adapt: detect and adapt intrinsic AtMost1 constraints
        :param exhaust: do core exhaustion
        :param minz: do heuristic core reduction
        :param trim: do core trimming at most this number of times
        :param verbose: verbosity level

        :type formula: :class:`.WCNF`
        :type solver: str
        :type adapt: bool
        :type exhaust: bool
        :type minz: bool
        :type trim: int
        :type verbose: int
    """
    def __init__(self,
                 formula,
                 solver='g3',
                 adapt=False,
                 exhaust=False,
                 minz=False,
                 trim=False,
                 verbose=0):
        """
            Constructor.
        """

        # verbosity level
        self.verbose = verbose

        # constructing a local copy of the formula
        self.formula = WCNFPlus()
        self.formula.hard = formula.hard[:]
        self.formula.wght = formula.wght[:]
        self.formula.topw = formula.topw
        self.formula.nv = formula.nv

        # copying atmost constraints, if any
        if isinstance(formula, WCNFPlus) and formula.atms:
            self.formula.atms = formula.atms[:]

        # top variable identifier
        self.topv = formula.nv

        # processing soft clauses
        self._process_soft(formula)
        self.formula.nv = self.topv

        # creating an unweighted copy
        unweighted = self.formula.copy()
        unweighted.wght = [1 for w in unweighted.wght]

        # enumerating disjoint MCSes (including unit-size MCSes)
        to_hit, self.units = self._disjoint(unweighted, solver, adapt, exhaust,
                                            minz, trim)

        if self.verbose > 2:
            print('c mcses: {0} unit, {1} disj'.format(
                len(self.units),
                len(to_hit) + len(self.units)))

        # hitting set enumerator
        self.hitman = Hitman(bootstrap_with=to_hit,
                             weights=self.weights,
                             solver=solver,
                             htype='sorted',
                             mxs_adapt=adapt,
                             mxs_exhaust=exhaust,
                             mxs_minz=minz,
                             mxs_trim=trim)

        # SAT oracle bootstrapped with the hard clauses; note that
        # clauses of the unit-size MCSes are enforced to be enabled
        self.oracle = Solver(name=solver,
                             bootstrap_with=unweighted.hard +
                             [[mcs] for mcs in self.units])

        if unweighted.atms:
            assert self.oracle.supports_atmost(), \
                    '{0} does not support native cardinality constraints. Make sure you use the right type of formula.'.format(self.solver)

            for atm in unweighted.atms:
                self.oracle.add_atmost(*atm)

    def __del__(self):
        """
            Destructor.
        """

        self.delete()

    def __enter__(self):
        """
            'with' constructor.
        """

        return self

    def __exit__(self, exc_type, exc_value, traceback):
        """
            'with' destructor.
        """

        self.delete()

    def delete(self):
        """
            Explicit destructor of the internal hitting set and SAT oracles.
        """

        if self.hitman:
            self.hitman.delete()
            self.hitman = None

        if self.oracle:
            self.oracle.delete()
            self.oracle = None

    def _process_soft(self, formula):
        """
            The method is for processing the soft clauses of the input
            formula. Concretely, it checks which soft clauses must be relaxed
            by a unique selector literal and applies the relaxation.

            :param formula: input formula
            :type formula: :class:`.WCNF`
        """

        # list of selectors
        self.sels = []

        # mapping from selectors to clause ids
        self.smap = {}

        # duplicate unit clauses
        processed_dups = set()

        # processing the soft clauses
        for cl in formula.soft:
            # if the clause is unit-size, its sole literal acts a selector
            selv = cl[0]

            # if clause is not unit, we relax it
            if len(cl) > 1:
                self.topv += 1
                selv = self.topv
                self.formula.hard.append(cl + [-selv])
            elif selv in self.smap:
                # the clause is unit but a there is a previously seen
                # duplicate of this clause; this means we have to
                # reprocess the previous clause again and relax it
                if selv not in processed_dups:
                    self.topv += 1
                    nsel = self.topv
                    self.sels[self.smap[selv] - 1] = nsel
                    self.formula.hard.append(
                        self.formula.soft[self.smap[selv] - 1] + [-nsel])
                    self.formula.soft[self.smap[selv] - 1] = [nsel]
                    self.smap[nsel] = self.smap[selv]
                    processed_dups.add(selv)

                # processing the current clause
                self.topv += 1
                selv = self.topv
                self.formula.hard.append(cl + [-selv])

            self.sels.append(selv)
            self.formula.soft.append([selv])
            self.smap[selv] = len(self.sels)

        # garbage-collecting the duplicates
        for selv in processed_dups:
            del self.smap[selv]

        # these numbers should be equal after the processing
        assert len(self.sels) == len(self.smap) == len(self.formula.wght)

        # creating a dictionary of weights
        self.weights = {l: w for l, w in zip(self.sels, self.formula.wght)}

    def _disjoint(self, formula, solver, adapt, exhaust, minz, trim):
        """
            This method constitutes the preliminary step of the implicit
            hitting set paradigm of Forqes. Namely, it enumerates all the
            disjoint *minimal correction subsets* (MCSes) of the formula,
            which will be later used to bootstrap the hitting set solver.

            Note that the MaxSAT solver in use is :class:`.RC2`. As a result,
            all the input parameters of the method, namely, ``formula``,
            ``solver``, ``adapt``, `exhaust``, ``minz``, and ``trim`` -
            represent the input and the options for the RC2 solver.

            :param formula: input formula
            :param solver: SAT solver name
            :param adapt: detect and adapt AtMost1 constraints
            :param exhaust: exhaust unsatisfiable cores
            :param minz: apply heuristic core minimization
            :param trim: trim unsatisfiable cores at most this number of times

            :type formula: :class:`.WCNF`
            :type solver: str
            :type adapt: bool
            :type exhaust: bool
            :type minz: bool
            :type trim: int
        """

        # these will store disjoint MCSes
        # (unit-size MCSes are stored separately)
        to_hit, units = [], []

        with RC2(formula,
                 solver=solver,
                 adapt=adapt,
                 exhaust=exhaust,
                 minz=minz,
                 trim=trim,
                 verbose=0) as oracle:

            # iterating over MaxSAT solutions
            while True:
                # a new MaxSAT model
                model = oracle.compute()

                if model is None:
                    # no model => no more disjoint MCSes
                    break

                # extracting the MCS corresponding to the model
                falsified = list(
                    filter(lambda l: model[abs(l) - 1] == -l, self.sels))

                # unit size or not?
                if len(falsified) > 1:
                    to_hit.append(falsified)
                else:
                    units.append(falsified[0])

                # blocking the MCS;
                # next time, all these clauses will be satisfied
                for l in falsified:
                    oracle.add_clause([l])

                # reporting the MCS
                if self.verbose > 3:
                    print('c mcs: {0} 0'.format(' '.join(
                        [str(self.smap[s]) for s in falsified])))

            # RC2 will be destroyed next; let's keep the oracle time
            self.disj_time = oracle.oracle_time()

        return to_hit, units

    def compute(self):
        """
            This method implements the main look of the implicit hitting set
            paradigm of Forqes to compute a best-cost MUS. The result MUS is
            returned as a list of integers, each representing a soft clause
            index.

            :rtype: list(int)
        """

        # correctly computed cost of the unit-mcs component
        units_cost = sum(
            map(lambda l: self.weights[l], (l for l in self.units)))

        while True:
            # computing a new optimal hitting set
            hs = self.hitman.get()

            if hs is None:
                # no more hitting sets exist
                break

            # setting all the selector polarities to true
            self.oracle.set_phases(self.sels)

            # testing satisfiability of the {self.units + hs} subset
            res = self.oracle.solve(assumptions=hs)

            if res == False:
                # the candidate subset of clauses is unsatisfiable,
                # i.e. it is an optimal MUS we are searching for;
                # therefore, blocking it and returning
                self.hitman.block(hs)
                self.cost = self.hitman.oracle.cost + units_cost
                return sorted(map(lambda s: self.smap[s], self.units + hs))
            else:
                # the candidate subset is satisfiable,
                # thus extracting a correction subset
                model = self.oracle.get_model()
                cs = list(filter(lambda l: model[abs(l) - 1] == -l, self.sels))

                # hitting the new correction subset
                self.hitman.hit(cs, weights=self.weights)

    def enumerate(self):
        """
            This is generator method iterating through MUSes and enumerating
            them until the formula has no more MUSes, or a user decides to
            stop the process.

            :rtype: list(int)
        """

        done = False

        while not done:
            mus = self.compute()

            if mus != None:
                yield mus
            else:
                done = True

    def oracle_time(self):
        """
            This method computes and returns the total SAT solving time
            involved.

            :rtype: float
        """

        return self.disj_time + self.hitman.oracle_time(
        ) + self.oracle.time_accum()
Exemplo n.º 8
0
    def init(self, bootstrap_with, weights=None, subject_to=[]):
        """
            This method serves for initializing the hitting set solver with a
            given list of sets to hit. Concretely, the hitting set problem is
            encoded into partial MaxSAT as outlined above, which is then fed
            either to a MaxSAT solver or an MCS enumerator.

            An additional optional parameter is ``weights``, which can be used
            to specify non-unit weights for the target objects in the sets to
            hit. This only works if ``'sorted'`` enumeration of hitting sets
            is applied.

            Another optional parameter is available, namely, ``subject_to``.
            It can be used to specify arbitrary hard constraints that must be
            respected when computing hitting sets of the given sets. Note that
            ``subject_to`` should be an iterable containing pure clauses
            and/or native AtMostK constraints. Finally, note that these hard
            constraints must be defined over the set of signed atomic objects,
            i.e. instances of class :class:`.Atom`.

            :param bootstrap_with: input set of sets to hit
            :param weights: weights of the objects in case the problem is weighted
            :param subject_to: hard constraints (either clauses or native AtMostK constraints)
            :type bootstrap_with: iterable(iterable(obj))
            :type weights: dict(obj)
            :type subject_to: iterable(iterable(Atom))
        """

        # formula encoding the sets to hit
        formula = WCNFPlus()

        # hard clauses
        for to_hit in bootstrap_with:
            to_hit = list(map(lambda obj: self.idpool.id(obj), to_hit))

            formula.append(to_hit)

        # additional hard constraints
        for cl in subject_to:
            if not len(cl) == 2 or not type(cl[0]) in (list, tuple, set):
                # this is a pure clause
                formula.append(list(map(lambda a: self.idpool.id(a.obj) * (2 * a.sign - 1), cl)))
            else:
                # this is a native AtMostK constraint
                formula.append([list(map(lambda a: self.idpool.id(a.obj) * (2 * a.sign - 1), cl[0])), cl[1]], is_atmost=True)

        # soft clauses
        for obj_id in six.iterkeys(self.idpool.id2obj):
            formula.append([-obj_id],
                    weight=1 if not weights else weights[self.idpool.obj(obj_id)])

        if self.htype == 'rc2':
            if not weights or min(weights.values()) == max(weights.values()):
                self.oracle = RC2(formula, solver=self.solver, adapt=self.adapt,
                        exhaust=self.exhaust, minz=self.minz, trim=self.trim)
            else:
                self.oracle = RC2Stratified(formula, solver=self.solver,
                        adapt=self.adapt, exhaust=self.exhaust, minz=self.minz,
                        nohard=True, trim=self.trim)
        elif self.htype == 'lbx':
            self.oracle = LBX(formula, solver_name=self.solver,
                    use_cld=self.usecld)
        else:
            self.oracle = MCSls(formula, solver_name=self.solver,
                    use_cld=self.usecld)
Exemplo n.º 9
0
    def compute_mxsat(self):
        """
            Cover samples for all labels using MaxSAT or MCS enumeration.
        """

        if self.options.verb:
            print('c2 (using rc2)')

        # we model a set cover problem with MaxSAT
        formula = WCNFPlus()

        # hard part of the formula
        if self.options.accuracy == 100.0:
            for sid in self.cluster:  # for every sample in the cluster
                to_hit = []

                for rid, rule in enumerate(self.rules):
                    if rule.issubset(self.samps[sid]):
                        to_hit.append(rid + 1)

                formula.append(to_hit)
        else:
            topv = len(self.rules)
            allvars = []

            # hard clauses first
            for sid in self.cluster:  # for every sample in cluster
                to_hit = []

                for rid, rule in enumerate(self.rules):
                    if rule.issubset(self.samps[sid]):
                        to_hit.append(rid + 1)

                topv += 1
                allvars.append(topv)
                formula.append([-topv] + to_hit)
                for rid in to_hit:
                    formula.append([topv, -rid])

            # forcing at least the given percentage of samples to be covered
            cnum = int(math.ceil(self.options.accuracy * len(allvars) / 100.0))
            al = CardEnc.atleast(allvars,
                                 bound=cnum,
                                 top_id=topv,
                                 encoding=self.options.enc)
            if al:
                for cl in al.clauses:
                    formula.append(cl)

        # soft clauses
        for rid in range(len(self.rules)):
            formula.append([-rid - 1], weight=1)

        if self.options.weighted and not self.options.approx:
            # it is safe to add weights for all rules
            # because each rule covers at least one sample

            formula.wght = [len(rule) + 1 for rule in self.rules]

        if self.options.pdump:
            fname = 'cover{0}.{1}@{2}.wcnf'.format(self.target, os.getpid(),
                                                   socket.gethostname())
            formula.to_file(fname)

        # choosing the right solver
        if not self.options.approx:
            MaxSAT = RC2Stratified if self.options.blo else RC2
            hitman = MaxSAT(formula,
                            solver=self.options.solver,
                            adapt=self.options.am1,
                            exhaust=self.options.exhaust,
                            trim=self.options.trim,
                            minz=self.options.minz)
        else:
            hitman = LBX(formula,
                         use_cld=self.options.use_cld,
                         solver_name=self.options.solver,
                         use_timer=False)

        # and the cover is...
        if not self.options.approx:
            self.cover = list(
                filter(lambda l: 0 < l <= len(self.rules) + 1,
                       hitman.compute()))
            self.cost += hitman.cost

            if self.options.weighted:
                self.cost -= len(self.cover)
        else:
            # approximating by computing a number of MCSes
            covers = []
            for i, cover in enumerate(hitman.enumerate()):
                hitman.block(cover)
                if self.options.weighted:
                    cost = sum([len(self.rules[rid - 1]) for rid in cover])
                else:
                    cost = len(cover)

                covers.append([cover, cost])

                if i + 1 == self.options.approx:
                    break

            self.cover, cost = min(covers, key=lambda x: x[1])
            self.cost += cost

        hitman.delete()
Exemplo n.º 10
0
    def __init__(self,
                 formula,
                 solver='g3',
                 adapt=False,
                 cover=None,
                 dcalls=False,
                 exhaust=False,
                 minz=False,
                 unsorted=False,
                 trim=False,
                 verbose=0):
        """
            Constructor.
        """

        # verbosity level
        self.verbose = verbose

        # constructing a local copy of the formula
        self.formula = WCNFPlus()
        self.formula.hard = formula.hard[:]
        self.formula.wght = formula.wght[:]
        self.formula.topw = formula.topw
        self.formula.nv = formula.nv

        # copying atmost constraints, if any
        if isinstance(formula, WCNFPlus) and formula.atms:
            self.formula.atms = formula.atms[:]

        # top variable identifier
        self.topv = formula.nv

        # processing soft clauses
        self._process_soft(formula)
        self.formula.nv = self.topv

        # creating an unweighted copy
        unweighted = self.formula.copy()
        unweighted.wght = [1 for w in unweighted.wght]

        # enumerating disjoint MCSes (including unit-size MCSes)
        to_hit, self.units = self._disjoint(unweighted, solver, adapt, exhaust,
                                            minz, trim)

        if self.verbose > 2:
            print('c mcses: {0} unit, {1} disj'.format(
                len(self.units),
                len(to_hit) + len(self.units)))

        if not unsorted:
            # MaxSAT-based hitting set enumerator
            self.hitman = Hitman(bootstrap_with=to_hit,
                                 weights=self.weights,
                                 solver=solver,
                                 htype='sorted',
                                 mxs_adapt=adapt,
                                 mxs_exhaust=exhaust,
                                 mxs_minz=minz,
                                 mxs_trim=trim)
        else:
            # MCS-based hitting set enumerator
            self.hitman = Hitman(bootstrap_with=to_hit,
                                 weights=self.weights,
                                 solver=solver,
                                 htype='lbx',
                                 mcs_usecld=dcalls)

        # adding the formula to cover to the hitting set enumerator
        self.cover = cover is not None
        if cover:
            # mapping literals to Hitman's atoms
            m = lambda l: Atom(
                l, sign=True) if -l not in self.weights else Atom(-l,
                                                                  sign=False)

            for cl in cover:
                if len(cl) != 2 or not type(cl[0]) in (list, tuple, set):
                    cl = [m(l) for l in cl]
                else:
                    cl = [[m(l) for l in cl[0]], cl[1]]

                self.hitman.add_hard(cl, weights=self.weights)

        # SAT oracle bootstrapped with the hard clauses; note that
        # clauses of the unit-size MCSes are enforced to be enabled
        self.oracle = Solver(name=solver,
                             bootstrap_with=unweighted.hard +
                             [[mcs] for mcs in self.units])

        if unweighted.atms:
            assert self.oracle.supports_atmost(), \
                    '{0} does not support native cardinality constraints. Make sure you use the right type of formula.'.format(self.solver)

            for atm in unweighted.atms:
                self.oracle.add_atmost(*atm)