Пример #1
0
    def test_all_formulas(self):
        self.assertEqual(
            get_all_formulas(read_sample("x,y\n1,2\n3,4")),
            set(
                [
                    frozendict({}),
                    frozendict({"x": "1"}),
                    frozendict({"y": "2"}),
                    frozendict({"x": "3"}),
                    frozendict({"y": "4"}),
                    frozendict({"y": "2", "x": "1"}),
                    frozendict({"y": "4", "x": "3"}),
                ]
            ),
        )

        self.assertEqual(
            get_all_formulas(read_sample("x\n{1 2}\n{3 4}")),
            set(
                [
                    frozendict({}),
                    frozendict({"x": frozenset(["1"])}),
                    frozendict({"x": frozenset(["4"])}),
                    frozendict({"x": frozenset(["3"])}),
                    frozendict({"x": frozenset(["2"])}),
                    frozendict({"x": frozenset(["1", "2"])}),
                    frozendict({"x": frozenset(["3", "4"])}),
                ]
            ),
        )
Пример #2
0
    def test_all_formulas(self):
        self.assertEqual(
            get_all_formulas(read_sample('x,y\n1,2\n3,4')),
            {frozenset([Cell(attribute='x', sequence=0, value='1')]),
             frozenset([Cell(attribute='x', sequence=0, value='3')]),
             frozenset([Cell(attribute='y', sequence=0, value='4')]),
             frozenset([Cell(attribute='x', sequence=0, value='1'), Cell(attribute='y', sequence=0, value='2')]),
             frozenset([Cell(attribute='y', sequence=0, value='2')]),
             frozenset([Cell(attribute='y', sequence=0, value='4'), Cell(attribute='x', sequence=0, value='3')])})

        self.assertEqual(
            get_all_formulas(read_sample('x\n{1 2}\n{3 4}')),
            {frozenset([Cell(attribute='x', sequence=0, value='3'), Cell(attribute='x', sequence=0, value='4')]),
             frozenset([Cell(attribute='x', sequence=0, value='4')]),
             frozenset([Cell(attribute='x', sequence=0, value='1')]),
             frozenset([Cell(attribute='x', sequence=0, value='1'), Cell(attribute='x', sequence=0, value='2')]),
             frozenset([Cell(attribute='x', sequence=0, value='3')]),
             frozenset([Cell(attribute='x', sequence=0, value='2')])})
Пример #3
0
def find_optimum(relation, k):

    formulas = get_all_formulas(relation, True)

    logger.info("# formulas: %s", len(formulas))

    logger.debug("possible formulas: %s", relation_rep(formulas))

    all_subsets = list(subsets(formulas, k))

    logger.info("# subsets: %s", len(all_subsets))

    subset_costs = map(lambda x: cost(x, relation), all_subsets)

    ordered = [x for x in sorted(zip(subset_costs, all_subsets), key=lambda x: x[0])]

    best_cost = ordered[0][0]
    best = filter(lambda x: x[0] == best_cost, ordered)

    return best_cost, best
Пример #4
0
def find_optimum(relation, k):
    """ Finds the optimum set of formulas and its cost.
    This method generates all formulas, all subsets of formulas and
    then calculates the cost for every one of them. This can be very slow."""

    formulas = get_all_formulas(relation, True)

    logger.info('# formulas: %s', len(formulas))

    logger.debug('possible formulas: %s', relation_rep(formulas))

    all_subsets = list(subsets(formulas, k))

    logger.info('# subsets: %s', len(all_subsets))

    subset_costs = map(lambda x: cost(x, relation), all_subsets)

    ordered = [x for x in sorted(zip(subset_costs, all_subsets), key=lambda x: x[0])]

    best_cost = ordered[0][0]
    best = filter(lambda x: x[0] == best_cost, ordered)

    return best_cost, best