def test_all_formulas(self): self.assertEqual( get_all_formulas(read_sample("x,y\n1,2\n3,4")), set( [ frozendict({}), frozendict({"x": "1"}), frozendict({"y": "2"}), frozendict({"x": "3"}), frozendict({"y": "4"}), frozendict({"y": "2", "x": "1"}), frozendict({"y": "4", "x": "3"}), ] ), ) self.assertEqual( get_all_formulas(read_sample("x\n{1 2}\n{3 4}")), set( [ frozendict({}), frozendict({"x": frozenset(["1"])}), frozendict({"x": frozenset(["4"])}), frozendict({"x": frozenset(["3"])}), frozendict({"x": frozenset(["2"])}), frozendict({"x": frozenset(["1", "2"])}), frozendict({"x": frozenset(["3", "4"])}), ] ), )
def test_all_formulas(self): self.assertEqual( get_all_formulas(read_sample('x,y\n1,2\n3,4')), {frozenset([Cell(attribute='x', sequence=0, value='1')]), frozenset([Cell(attribute='x', sequence=0, value='3')]), frozenset([Cell(attribute='y', sequence=0, value='4')]), frozenset([Cell(attribute='x', sequence=0, value='1'), Cell(attribute='y', sequence=0, value='2')]), frozenset([Cell(attribute='y', sequence=0, value='2')]), frozenset([Cell(attribute='y', sequence=0, value='4'), Cell(attribute='x', sequence=0, value='3')])}) self.assertEqual( get_all_formulas(read_sample('x\n{1 2}\n{3 4}')), {frozenset([Cell(attribute='x', sequence=0, value='3'), Cell(attribute='x', sequence=0, value='4')]), frozenset([Cell(attribute='x', sequence=0, value='4')]), frozenset([Cell(attribute='x', sequence=0, value='1')]), frozenset([Cell(attribute='x', sequence=0, value='1'), Cell(attribute='x', sequence=0, value='2')]), frozenset([Cell(attribute='x', sequence=0, value='3')]), frozenset([Cell(attribute='x', sequence=0, value='2')])})
def find_optimum(relation, k): formulas = get_all_formulas(relation, True) logger.info("# formulas: %s", len(formulas)) logger.debug("possible formulas: %s", relation_rep(formulas)) all_subsets = list(subsets(formulas, k)) logger.info("# subsets: %s", len(all_subsets)) subset_costs = map(lambda x: cost(x, relation), all_subsets) ordered = [x for x in sorted(zip(subset_costs, all_subsets), key=lambda x: x[0])] best_cost = ordered[0][0] best = filter(lambda x: x[0] == best_cost, ordered) return best_cost, best
def find_optimum(relation, k): """ Finds the optimum set of formulas and its cost. This method generates all formulas, all subsets of formulas and then calculates the cost for every one of them. This can be very slow.""" formulas = get_all_formulas(relation, True) logger.info('# formulas: %s', len(formulas)) logger.debug('possible formulas: %s', relation_rep(formulas)) all_subsets = list(subsets(formulas, k)) logger.info('# subsets: %s', len(all_subsets)) subset_costs = map(lambda x: cost(x, relation), all_subsets) ordered = [x for x in sorted(zip(subset_costs, all_subsets), key=lambda x: x[0])] best_cost = ordered[0][0] best = filter(lambda x: x[0] == best_cost, ordered) return best_cost, best