示例#1
0
    def compute_consensus_rankings(self,
                                   dataset: Dataset,
                                   scoring_scheme: ScoringScheme,
                                   return_at_most_one_ranking=False,
                                   bench_mode=False) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """
        sc = asarray(scoring_scheme.penalty_vectors)

        consensus = []
        elements_translated_target = []
        var = self.prepare_internal_vars(elements_translated_target,
                                         dataset.rankings)
        self.kwik_sort(consensus, elements_translated_target, var, sc)
        return Consensus(
            consensus_rankings=[consensus],
            dataset=dataset,
            scoring_scheme=scoring_scheme,
            att={ConsensusFeature.AssociatedAlgorithm: self.get_full_name()})
示例#2
0
 def _run_final_data(self, raw_data: str) -> str:
     top_k_all = self.__top_k_to_test
     res = "k"
     for scoring_scheme in self.__scoring_schemes:
         res += ";b5-b4=" + str(scoring_scheme.b5-scoring_scheme.b4)
     res += "\n"
     h_res = {}
     for top_k in top_k_all:
         h_res[top_k] = {}
         for sc in self.__scoring_schemes:
             h_res[top_k][sc.b5] = []
     for top_k in top_k_all:
         h_res_topk = h_res[top_k]
         for line in raw_data.split("\n")[1:]:
             if len(line) > 1:
                 cols = line.split(";")
                 b5 = float(cols[0])
                 h_res_topk_sc = h_res_topk[b5]
                 consensus = Consensus([parse_ranking_with_ties_of_int(cols[-1])])
                 gs = set()
                 gs_str = cols[3][1:-1]
                 for elem in gs_str.split(", "):
                     gs.add(int(elem))
                 h_res_topk_sc.append(consensus.evaluate_topk_ranking(gs, top_k=top_k))
     for top_k in top_k_all:
         res += str(top_k)
         h_topk = h_res[top_k]
         for sc in self.__scoring_schemes:
             res += ";" + str(np.sum(np.asarray(h_topk[sc.b5])))
         res += "\n"
     return res
示例#3
0
    def compute_consensus_rankings(self,
                                   dataset: Dataset,
                                   scoring_scheme: ScoringScheme,
                                   return_at_most_one_ranking=False,
                                   bench_mode=False) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """

        if not dataset.is_complete and not self.is_scoring_scheme_relevant_when_incomplete_rankings(
                scoring_scheme):
            raise ScoringSchemeNotHandledException

        if scoring_scheme.is_equivalent_to([[0, 1, 1, 0, 1, 1],
                                            [1, 1, 0, 1, 1, 0]]):
            rankings_to_use = dataset.unified_rankings()
        else:
            rankings_to_use = dataset.rankings

        nb_rankings = len(rankings_to_use)
        rankings_copy = list(rankings_to_use)
        shuffle(rankings_copy)
        h = {}
        id_ranking = 0
        for ranking in rankings_copy:
            id_bucket = 0
            for bucket in ranking:
                for element in bucket:
                    if element not in h:
                        h[element] = zeros(nb_rankings, dtype=int) - 1
                    h[element][id_ranking] = id_bucket
                id_bucket += 1
            id_ranking += 1

        res = []
        for el in sorted(h.items(), key=cmp_to_key(RepeatChoice.__compare)):
            res.append([el[0]])

        # kem = KemenyComputingFactory(scoring_scheme=self.scoring_scheme)
        # kem = KendallTauGeneralizedNlogN()
        return Consensus(
            consensus_rankings=[res],
            dataset=dataset,
            scoring_scheme=scoring_scheme,
            att={ConsensusFeature.AssociatedAlgorithm: self.get_full_name()})
示例#4
0
    def compute_consensus_rankings(
            self,
            dataset: Dataset,
            scoring_scheme: ScoringScheme,
            return_at_most_one_ranking=False,
            bench_mode=False
    ) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """
        sc = scoring_scheme.penalty_vectors
        if not dataset.is_complete:
            for i in range(3):
                if sc[0][i] > sc[0][i+3] or sc[1][i] > sc[1][i+3]:
                    raise InompleteRankingsIncompatibleWithScoringSchemeException
            rankings_to_use = dataset.unified_rankings()
        else:
            rankings_to_use = dataset.rankings

        k = KemenyComputingFactory(scoring_scheme)
        dst_min = float('inf')
        consensus = [[]]
        for ranking in rankings_to_use:
            dist = k.get_kemeny_score(ranking, dataset.rankings)
            if dist < dst_min:
                dst_min = dist
                consensus.clear()
                consensus.append(ranking)
            elif dist == dst_min and not return_at_most_one_ranking:
                consensus.append(ranking)

        return Consensus(consensus_rankings=consensus,
                         dataset=dataset,
                         scoring_scheme=scoring_scheme,
                         att={ConsensusFeature.KemenyScore: dst_min,
                              ConsensusFeature.AssociatedAlgorithm: self.get_full_name()
                              }
                         )
示例#5
0
    def compute_consensus_rankings(
            self,
            dataset: Dataset,
            scoring_scheme: ScoringScheme,
            return_at_most_one_ranking=False,
            bench_mode=False
    ) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """

        rankings = dataset.rankings
        elem_id = {}
        id_elements = {}
        id_elem = 0
        for ranking in rankings:
            for bucket in ranking:
                for element in bucket:
                    if element not in elem_id:
                        elem_id[element] = id_elem
                        id_elements[id_elem] = element
                        id_elem += 1
        nb_elem = len(elem_id)

        positions = ExactAlgorithmCplex.__positions(rankings, elem_id)

        sc = asarray(scoring_scheme.penalty_vectors)
        graph_elements = Graph()
        sub_problems = []
        if self.__preprocess:
            graph_elements, mat_score = self.__graph_of_elements(positions, asarray(sc))
            scc_s = graph_elements.components()
            for scc in scc_s:
                sub_problem = []
                for elem in scc:
                    sub_problem.append(elem)
                sub_problems.append(sub_problem)
        else:
            mat_score = self.__cost_matrix(positions, asarray(sc))
            sub_problems.append(list(range(nb_elem)))


        return Consensus(consensus_rankings=medianes,
                         dataset=dataset,
                         scoring_scheme=scoring_scheme,
                         att={ConsensusFeature.IsNecessarilyOptimal: True,
                              ConsensusFeature.KemenyScore: my_prob.solution.get_objective_value(),
                              ConsensusFeature.AssociatedAlgorithm: self.get_full_name()
                              })
示例#6
0
    def compute_consensus_rankings(
            self,
            dataset: Dataset,
            scoring_scheme: ScoringScheme,
            return_at_most_one_ranking=False,
            bench_mode=False
    ) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """
        if self.bound_for_exact > 0:
            from corankco.algorithms.exact.exactalgorithm import ExactAlgorithm

        optimal = True
        sc = asarray(scoring_scheme.penalty_vectors)
        rankings = dataset.rankings
        res = []
        elem_id = {}
        id_elements = {}
        id_elem = 0
        for ranking in rankings:
            for bucket in ranking:
                for element in bucket:
                    if element not in elem_id:
                        elem_id[element] = id_elem
                        id_elements[id_elem] = element
                        id_elem += 1

        positions = dataset.get_positions(elem_id)
        gr1, mat_score = self.__graph_of_elements(positions, sc)
        scc = gr1.components()
        for scc_i in scc:
            if len(scc_i) == 1:
                res.append([id_elements.get(scc_i[0])])
            else:
                all_tied = True
                for e1, e2 in combinations(scc_i, 2):
                    if mat_score[e1][e2][2] > mat_score[e1][e2][0] or mat_score[e1][e2][2] > mat_score[e1][e2][1]:
                        all_tied = False
                        break
                if all_tied:
                    buck = []
                    for el in scc_i:
                        buck.append(id_elements.get(el))
                    res.append(buck)
                else:
                    set_scc = set(scc_i)
                    project_rankings = []
                    for ranking in rankings:
                        project_ranking = []
                        for bucket in ranking:
                            project_bucket = []
                            for elem in bucket:
                                if elem_id.get(elem) in set_scc:
                                    project_bucket.append(elem)
                            if len(project_bucket) > 0:
                                project_ranking.append(project_bucket)
                        if len(project_ranking) > 0:
                            project_rankings.append(project_ranking)
                    if len(scc_i) > self.bound_for_exact:
                        cons_ext = self.auxiliary_alg.compute_consensus_rankings(Dataset(project_rankings),
                                                                                 scoring_scheme,
                                                                                 True).consensus_rankings[0]
                        res.extend(cons_ext)
                        optimal = False
                    else:
                        cons_ext = ExactAlgorithm(preprocess=False).compute_consensus_rankings(
                                                                            Dataset(project_rankings),
                                                                            scoring_scheme,
                                                                            True).consensus_rankings[0]
                        res.extend(cons_ext)
        hash_information = {ConsensusFeature.IsNecessarilyOptimal: optimal,
                            ConsensusFeature.AssociatedAlgorithm: self.get_full_name()
                            }
        if not bench_mode:
            cfc_name = []
            for scc_i in scc:
                group = set()
                for elem in scc_i:
                    group.add(id_elements.get(elem))
                cfc_name.append(group)
            hash_information[ConsensusFeature.WeakPartitioning] = cfc_name

        return Consensus(consensus_rankings=[res],
                         dataset=dataset,
                         scoring_scheme=scoring_scheme,
                         att=hash_information)
示例#7
0
    def compute_consensus_rankings(self,
                                   dataset: Dataset,
                                   scoring_scheme: ScoringScheme,
                                   return_at_most_one_ranking=False,
                                   bench_mode=False) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """

        if not dataset.is_complete and not self.is_scoring_scheme_relevant_when_incomplete_rankings(
                scoring_scheme):
            raise ScoringSchemeNotHandledException

        if scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme().penalty_vectors) or \
                scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme_p(0.5).penalty_vectors):
            rankings_to_use = dataset.unified_rankings()
        else:
            rankings_to_use = dataset.rankings

        points = {}
        for ranking in rankings_to_use:
            id_bucket = 1
            for bucket in ranking:
                for elem in bucket:
                    if elem not in points:
                        points[elem] = {}
                        points[elem][0] = 0
                        points[elem][1] = 0

                    points[elem][0] += id_bucket
                    points[elem][1] += 1
                if self.useBucketIdAndNotBucketSize:
                    id_bucket += 1
                else:
                    id_bucket += len(bucket)
        lis = []
        for elem in points.keys():
            lis.append((elem, points[elem][0] * 1.0 / points[elem][1]))
        tri = sorted(lis, key=lambda col: col[1])
        consensus = []
        bucket = []
        last = -1
        for duo in tri:
            if duo[1] != last:
                last = duo[1]
                bucket = []
                consensus.append(bucket)
            bucket.append(duo[0])
        return Consensus(
            consensus_rankings=[consensus],
            dataset=dataset,
            scoring_scheme=scoring_scheme,
            att={ConsensusFeature.AssociatedAlgorithm: self.get_full_name()})
    def compute_consensus_rankings(
            self,
            dataset: Dataset,
            scoring_scheme: ScoringScheme,
            return_at_most_one_ranking=False,
            bench_mode=False
    ) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """
        rankings = dataset.rankings
        elem_id = {}
        id_elements = {}
        id_elem = 0
        for ranking in rankings:
            for bucket in ranking:
                for element in bucket:
                    if element not in elem_id:
                        elem_id[element] = id_elem
                        id_elements[id_elem] = element
                        id_elem += 1
        nb_elem = len(elem_id)

        positions = ExactAlgorithmGeneric.__positions(rankings, elem_id)

        sc = asarray(scoring_scheme.penalty_vectors)

        graph, mat_score, ties_must_be_checked = self.__graph_of_elements(positions, sc)

        my_values = []
        my_vars = []

        h_vars = {}
        cpt = 0

        for i in range(nb_elem):
            for j in range(nb_elem):
                if not i == j:
                    name_var = "x_%s_%s" % (i, j)
                    my_values.append(mat_score[i][j][0])
                    my_vars.append(pulp.LpVariable(name_var, 0, 1, cat="Binary"))
                    h_vars[name_var] = cpt
                    cpt += 1
                    if i < j:
                        name_var = "t_%s_%s" % (i, j)
                        my_values.append(mat_score[i][j][2])
                        my_vars.append(pulp.LpVariable(name_var, 0, 1, cat="Binary"))
                        h_vars[name_var] = cpt
                        cpt += 1

        prob = pulp.LpProblem("myProblem", pulp.LpMinimize)

        # add the binary order constraints
        for i in range(0, nb_elem - 1):
            for j in range(i + 1, nb_elem):
                if not i == j:
                    prob += my_vars[h_vars["x_%s_%s" % (i, j)]] \
                            + my_vars[h_vars["x_%s_%s" % (j, i)]] \
                            + my_vars[h_vars["t_%s_%s" % (i, j)]] == 1

        # add the transitivity constraints
        for i in range(0, nb_elem):
            for j in range(nb_elem):
                if j != i:
                    i_bef_j = "x_%s_%s" % (i, j)
                    if i < j:
                        i_tie_j = "t_%s_%s" % (i, j)
                    else:
                        i_tie_j = "t_%s_%s" % (j, i)
                    for k in range(nb_elem):
                        if k != i and k != j:
                            j_bef_k = "x_%s_%s" % (j, k)
                            i_bef_k = "x_%s_%s" % (i, k)
                            if j < k:
                                j_tie_k = "t_%s_%s" % (j, k)
                            else:
                                j_tie_k = "t_%s_%s" % (k, j)

                            if i < k:
                                i_tie_k = "t_%s_%s" % (i, k)
                            else:
                                i_tie_k = "t_%s_%s" % (k, i)

                            prob += my_vars[h_vars[i_bef_j]] +\
                                my_vars[h_vars[j_bef_k]] \
                                + my_vars[h_vars[j_tie_k]] \
                                - my_vars[h_vars[i_bef_k]] <= 1

                            prob += my_vars[h_vars[i_bef_j]] + \
                                my_vars[h_vars[i_tie_j]] \
                                + my_vars[h_vars[j_bef_k]] - my_vars[h_vars[i_bef_k]] <= 1

                            prob += 2 * my_vars[h_vars[i_tie_j]] \
                                + 2 * my_vars[h_vars[j_tie_k]] \
                                - my_vars[h_vars[i_tie_k]] <= 3

        # optimization
        if not ties_must_be_checked:
            for i in range(0, nb_elem - 1):
                for j in range(i + 1, nb_elem):
                    if not i == j:
                        prob += my_vars[h_vars["t_%s_%s" % (i, j)]] == 0

        cfc = graph.components()

        for i in range(len(cfc)):
            group_i = cfc[i]
            for j in range(i+1, len(cfc)):
                for elem_i in group_i:
                    for elem_j in cfc[j]:
                        prob += my_vars[h_vars["x_%s_%s" % (elem_i, elem_j)]] == 1
                        prob += my_vars[h_vars["x_%s_%s" % (elem_j, elem_i)]] == 0
                        if elem_i < elem_j:
                            prob += my_vars[h_vars["t_%s_%s" % (elem_i, elem_j)]] == 0
                        else:
                            prob += my_vars[h_vars["t_%s_%s" % (elem_j, elem_i)]] == 0

        # objective function
        prob += pulp.lpSum(my_vars[cpt] * my_values[cpt] for cpt in range(len(my_vars)))

        try:
            prob.solve(pulp.CPLEX(msg=False))
        except:
            prob.solve(pulp.PULP_CBC_CMD(msg=False))

        h_def = {i: 0 for i in range(nb_elem)}

        for var in my_vars:
            if abs(var.value() - 1) < 0.01 and var.name[0] == "x":
                h_def[int(var.name.split("_")[2])] += 1

        ranking = []
        current_nb_def = 0
        bucket = []

        for elem, nb_defeats in (sorted(h_def.items(), key=itemgetter(1))):
            if nb_defeats == current_nb_def:
                bucket.append(id_elements[elem])
            else:
                ranking.append(bucket)
                bucket = [id_elements[elem]]
                current_nb_def = nb_defeats
        ranking.append(bucket)
        return Consensus(consensus_rankings=[ranking],
                         dataset=dataset,
                         scoring_scheme=scoring_scheme,
                         att={ConsensusFeature.IsNecessarilyOptimal: True,
                              ConsensusFeature.KemenyScore: prob.objective.value(),
                              ConsensusFeature.AssociatedAlgorithm: self.get_full_name()
                              })
示例#9
0
    def compute_consensus_rankings(self,
                                   dataset: Dataset,
                                   scoring_scheme: ScoringScheme,
                                   return_at_most_one_ranking=False,
                                   bench_mode=False) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """

        sc = asarray(scoring_scheme.penalty_vectors)
        rankings = dataset.rankings

        res = []
        elem_id = {}
        id_elements = {}
        id_elem = 0
        nb_rankings = len(rankings)
        for ranking in rankings:
            for bucket in ranking:
                for element in bucket:
                    if element not in elem_id:
                        elem_id[element] = id_elem
                        id_elements[id_elem] = element
                        id_elem += 1
        nb_elements = len(elem_id)

        positions = BioConsert.__get_positions(rankings, elem_id)
        (departure,
         dst_res) = self.__departure_rankings(dataset, positions, elem_id,
                                              scoring_scheme)

        departure_c = array(departure.flatten(), dtype=int32)

        bioconsertinc.bioconsertinc(
            array(positions.flatten(), dtype=int32),
            departure_c,
            array(sc[0], dtype=float64),
            array(sc[1], dtype=float64),
            int32(nb_elements),
            int32(nb_rankings),
            int32(len(departure)),
            dst_res,
        )
        departure = departure_c.reshape(-1, nb_elements)

        ranking_dict = {}

        lowest_distance = amin(dst_res)
        best_rankings = departure[where(
            dst_res == lowest_distance)[0]].tolist()
        if return_at_most_one_ranking:
            best_rankings = [best_rankings[-1]]
        distinct_rankings = set()
        for ranking_result in best_rankings:
            st_ranking = str(ranking_result)
            if st_ranking not in distinct_rankings:
                distinct_rankings.add(st_ranking)
                ranking_dict.clear()
                el = 0
                for id_bucket in ranking_result:
                    if id_bucket not in ranking_dict:
                        ranking_dict[id_bucket] = [id_elements.get(el)]
                    else:
                        ranking_dict[id_bucket].append(id_elements.get(el))
                    el += 1

                ranking_list = []
                nb_buckets_ranking_i = len(ranking_dict)
                for id_bucket in range(nb_buckets_ranking_i):
                    ranking_list.append(ranking_dict.get(id_bucket))
                res.append(ranking_list)

        return Consensus(consensus_rankings=res,
                         dataset=dataset,
                         scoring_scheme=scoring_scheme,
                         att={
                             ConsensusFeature.KemenyScore:
                             lowest_distance,
                             ConsensusFeature.AssociatedAlgorithm:
                             self.get_full_name()
                         })
示例#10
0
    def compute_consensus_rankings(self,
                                   dataset: Dataset,
                                   scoring_scheme: ScoringScheme,
                                   return_at_most_one_ranking=False,
                                   bench_mode=False) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """

        if not dataset.is_complete and not self.is_scoring_scheme_relevant_when_incomplete_rankings(
                scoring_scheme):
            raise ScoringSchemeNotHandledException

        if scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme().penalty_vectors) or \
                scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme_p(0.5).penalty_vectors):
            rankings_to_use = dataset.unified_rankings()
        else:
            rankings_to_use = dataset.rankings
        has = {}

        nb_rankings_needed = {}
        already_put = set()

        for ranking in rankings_to_use:
            for bucket in ranking:
                for element in bucket:
                    if element not in nb_rankings_needed:
                        nb_rankings_needed[element] = self.__h
                    else:
                        nb_rankings_needed[element] += self.__h

        bucket_res = []
        ranking_res = []

        for reorganized in zip_longest(*rankings_to_use):
            for bucket in reorganized:
                if bucket is not None:
                    for element in bucket:
                        if element not in already_put:
                            if element not in has:
                                has[element] = 1
                                if nb_rankings_needed[element] <= 1:
                                    bucket_res.append(element)
                                    already_put.add(element)
                            else:
                                has[element] += 1
                                if has[element] >= nb_rankings_needed[element]:
                                    bucket_res.append(element)
                                    already_put.add(element)
            if len(bucket_res) > 0:
                ranking_res.append(bucket_res)
                bucket_res = []

        rankings_consensus = [ranking_res] if len(ranking_res) > 0 else [[]]
        return Consensus(
            consensus_rankings=rankings_consensus,
            dataset=dataset,
            scoring_scheme=scoring_scheme,
            att={ConsensusFeature.AssociatedAlgorithm: self.get_full_name()})
    def compute_consensus_rankings(self,
                                   dataset: Dataset,
                                   scoring_scheme: ScoringScheme,
                                   return_at_most_one_ranking=False,
                                   bench_mode=False) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """

        rankings = dataset.rankings
        elem_id = {}
        id_elements = {}
        id_elem = 0
        for ranking in rankings:
            for bucket in ranking:
                for element in bucket:
                    if element not in elem_id:
                        elem_id[element] = id_elem
                        id_elements[id_elem] = element
                        id_elem += 1
        nb_elem = len(elem_id)

        positions = ExactAlgorithmCplex.__positions(rankings, elem_id)

        sc = asarray(scoring_scheme.penalty_vectors)
        graph_elements = Graph()
        if self.__preprocess:
            graph_elements, mat_score = self.__graph_of_elements(
                positions, asarray(sc))
        else:
            mat_score = self.__cost_matrix(positions, asarray(sc))

        map_elements_cplex = {}
        my_prob = cplex.Cplex()  # initiate
        my_prob.set_results_stream(None)  # mute
        my_prob.parameters.mip.tolerances.mipgap.set(0.000001)
        my_prob.parameters.mip.pool.absgap.set(0.000001)

        my_prob.objective.set_sense(
            my_prob.objective.sense.minimize
        )  # we want to minimize the objective function
        if not return_at_most_one_ranking:
            my_prob.parameters.mip.pool.intensity.set(4)
            my_prob.parameters.mip.limits.populate.set(10000000)
        my_obj = []
        my_ub = []
        my_lb = []
        my_names = []

        cpt = 0
        should_consider_ties = False
        for i in range(nb_elem):
            for j in range(nb_elem):
                if not i == j:
                    if not should_consider_ties:
                        calc = mat_score[i][j][0] + mat_score[i][j][
                            1] - 2 * mat_score[i][j][2]
                        if (-0.00001 <= calc <= 0.00001 and
                                not return_at_most_one_ranking) or calc > 0:
                            should_consider_ties = True
                    s = "x_%s_%s" % (i, j)
                    my_obj.append(mat_score[i][j][0])
                    my_ub.append(1.0)
                    my_lb.append(0.0)
                    my_names.append(s)
                    map_elements_cplex[cpt] = ("x", i, j)
                    cpt += 1

        for i in range(nb_elem):
            for j in range(i + 1, nb_elem):
                s = "t_%s_%s" % (i, j)
                my_obj.append(mat_score[i][j][2])
                my_ub.append(1.0)
                my_lb.append(0.0)
                my_names.append(s)
                map_elements_cplex[cpt] = ("t", i, j)
                cpt += 1
        my_prob.variables.add(obj=my_obj,
                              lb=my_lb,
                              ub=my_ub,
                              types="B" * cpt,
                              names=my_names)

        # rhs = right hand side
        my_rhs = []
        my_rownames = []

        # inequations : E for Equality, G for >=  and L for <=
        my_sense = "E" * int(nb_elem *
                             (nb_elem - 1) / 2) + "L" * (3 * nb_elem *
                                                         (nb_elem - 1) *
                                                         (nb_elem - 2))

        rows = []

        # add the binary order constraints
        count = 0
        for i in range(0, nb_elem - 1):
            for j in range(i + 1, nb_elem):
                if not i == j:
                    s = "c%s" % count
                    count += 1
                    my_rhs.append(1)
                    my_rownames.append(s)
                    first_var = "x_%s_%s" % (i, j)
                    second_var = "x_%s_%s" % (j, i)
                    third_var = "t_%s_%s" % (i, j)

                    row = [[first_var, second_var, third_var], [1.0, 1.0, 1.0]]
                    rows.append(row)
        # add the transitivity constraints
        for i in range(0, nb_elem):
            for j in range(nb_elem):
                if j != i:
                    i_bef_j = "x_%s_%s" % (i, j)
                    if i < j:
                        i_tie_j = "t_%s_%s" % (i, j)
                    else:
                        i_tie_j = "t_%s_%s" % (j, i)
                    for k in range(nb_elem):
                        if k != i and k != j:
                            my_rownames.append("c%s" % count)
                            my_rhs.append(1)
                            count += 1
                            if j < k:
                                j_tie_k = "t_%s_%s" % (j, k)
                            else:
                                j_tie_k = "t_%s_%s" % (k, j)
                            rows.append([[
                                i_bef_j,
                                "x_%s_%s" % (j, k), j_tie_k,
                                "x_%s_%s" % (i, k)
                            ], [1., 1., 1., -1.]])

                            my_rownames.append("c%s" % count)
                            my_rhs.append(1)
                            count += 1
                            rows.append([[
                                i_bef_j, i_tie_j,
                                "x_%s_%s" % (j, k),
                                "x_%s_%s" % (i, k)
                            ], [1., 1., 1., -1.]])

                            if i < k:
                                i_tie_k = "t_%s_%s" % (i, k)
                            else:
                                i_tie_k = "t_%s_%s" % (k, i)

                            my_rownames.append("c%s" % count)
                            my_rhs.append(3)
                            count += 1
                            rows.append([[i_tie_j, j_tie_k, i_tie_k],
                                         [2.0, 2.0, -1.0]])

        if self.__optimize and not should_consider_ties:
            my_sense += "E" * int(nb_elem * (nb_elem - 1) / 2)
            for i in range(0, nb_elem - 1):
                for j in range(i + 1, nb_elem):
                    if j != i:
                        my_rownames.append("c%s" % count)
                        my_rhs.append(0)
                        count += 1
                        i_tie_j = "t_%s_%s" % (i, j)
                        rows.append([[i_tie_j], [1.]])

        if self.__preprocess:
            cpt = 0
            scc = graph_elements.components()
            for i in range(len(scc) - 1):
                elems_scc_i = scc[i]
                for j in range(i + 1, len(scc)):
                    elems_scc_j = scc[j]
                    cpt += len(scc[i]) * len(scc[j])
                    for elem1 in elems_scc_i:
                        for elem2 in elems_scc_j:
                            my_rownames.append("c%s" % count)
                            my_rhs.append(1)
                            count += 1
                            i_bef_j = "x_%s_%s" % (i, j)
                            rows.append([[i_bef_j], [1.]])
            my_sense += "E" * cpt

        my_prob.linear_constraints.add(lin_expr=rows,
                                       senses=my_sense,
                                       rhs=my_rhs,
                                       names=my_rownames)
        medianes = []

        if not return_at_most_one_ranking:
            my_prob.populate_solution_pool()

            nb_optimal_solutions = my_prob.solution.pool.get_num()
            for i in range(nb_optimal_solutions):
                names = my_prob.solution.pool.get_values(i)
                medianes.append(
                    ExactAlgorithmCplex.__create_consensus(
                        nb_elem, names, map_elements_cplex, id_elements))
        else:
            my_prob.solve()
            x = my_prob.solution.get_values()
            medianes.append(
                ExactAlgorithmCplex.__create_consensus(nb_elem, x,
                                                       map_elements_cplex,
                                                       id_elements))

        return Consensus(consensus_rankings=medianes,
                         dataset=dataset,
                         scoring_scheme=scoring_scheme,
                         att={
                             ConsensusFeature.IsNecessarilyOptimal:
                             True,
                             ConsensusFeature.KemenyScore:
                             my_prob.solution.get_objective_value(),
                             ConsensusFeature.AssociatedAlgorithm:
                             self.get_full_name()
                         })