def test_partition(self): test_set = OperationsTests.test_set1 even_part = partition.partition(test_set["input"], lambda elem: elem.left.value % 2 == 0) self.assertEqual(even_part, test_set["even_part"]) if OperationsTests._print_examples: print("Even partition Actual: ", even_part) print("Even partition Expected:", test_set["even_part"]) thirds_part = partition.partition(test_set["input2"], lambda elem: elem.left.value % 3) self.assertEqual(thirds_part, test_set["thirds_part"]) if OperationsTests._print_examples: print("Thirds partition Actual: ", thirds_part) print("Thirds partition Expected:", test_set["thirds_part"]) even_part_equiv = partition.make_labeled_partition( test_set["input"], lambda elem: elem.left.value % 2 == 0) self.assertEqual(even_part_equiv, test_set["even_part_equiv"]) if OperationsTests._print_examples: print(even_part_equiv) # Negative test, returning something that can not be put inside an atom my_equiv_rel_fun = lambda elem: "even" if elem.left.value % 2 == 0 else Undef() self.assertRaises(TypeError, lambda: partition.partition(test_set["input2"], my_equiv_rel_fun)) my_left_eq_rel_fn = lambda: partition.make_labeled_partition( test_set["input2"], my_equiv_rel_fun) self.assertRaises(TypeError, my_left_eq_rel_fn)
def test_partition_multiset(self): test_set = OperationsTests.test_set2 even_part = partition.partition(test_set["input"], lambda elem: elem.left.value % 2 == 0) self.assertEqual(even_part, test_set["even_part"]) if OperationsTests._print_examples: print("Even partition (msets) Actual: ", even_part) print("Even partition (msets) Expected:", test_set["even_part"]) thirds_part = partition.partition(test_set["input2"], lambda elem: elem.left.value % 3) self.assertEqual(thirds_part, test_set["thirds_part"]) if OperationsTests._print_examples: print("Thirds partition (msets) Actual: ", thirds_part) print("Thirds partition (msets) Expected:", test_set["thirds_part"]) even_part_equiv = partition.make_labeled_partition( test_set["input"], lambda elem: elem.left.value % 2 == 0) self.assertEqual(even_part_equiv, test_set["even_part_equiv"]) if OperationsTests._print_examples: print(even_part_equiv) # Negative test, returning something that can not be put inside an atom my_equiv_rel_fun = lambda elem: "even" if elem.left.value % 2 == 0 else Undef( ) self.assertRaises( TypeError, lambda: partition.partition(test_set["input2"], my_equiv_rel_fun)) my_left_eq_rel_fn = lambda: partition.make_labeled_partition( test_set["input2"], my_equiv_rel_fun) self.assertRaises(TypeError, my_left_eq_rel_fn)
def check_blocks(_board): """Check each block. If there is only one value missing...""" if VERBOSE: print("* check_blocks") board = get_filled_cells(_board) blocks = partition.partition(board, partial(by_keys, 'band', 'stack')) for block_clan in _SORT(blocks, key=partial(by_clan_keys, 'band', 'stack')): new_possible, conflict = get_block_candidates(block_clan, board) if new_possible.is_empty: continue if new_possible.cardinality == 1: _board = get_new_board(_board, new_possible) continue if block_clan.cardinality == GRID_SIZE - 2: # Knowing that the value in conflict can't be placed in the conflict cell # ..it must go in the other... first_choice = clans.superstrict(new_possible, project(conflict, 'value')) if first_choice.cardinality == 2: # place both values _board = get_new_board(_board, first_choice) continue # Remove the first choice for all_possible remaining_possible = sets.minus(new_possible, first_choice) # Knowing that first_choice goes in a row/col, remove other value from that cell first_rowcol = project(first_choice, 'row', 'col') # The remaining cell is the second choice second_choice = sets.minus(remaining_possible, clans.superstrict(remaining_possible, first_rowcol)) new_cells = sets.union(first_choice, second_choice) _board = get_new_board(_board, new_cells) continue # Partition by value candidates = partition.partition(new_possible, partial(by_key, 'value')) for candidate in _SORT(candidates, key=partial(by_clan_key, 'value')): # If any value fits in only 1 cell, place it if candidate.cardinality == 1: # Remove band/stack new_cell = project(candidate, 'row', 'col', 'value') _board = get_new_board(_board, new_cell) return _board
def check_values(_board): """Look for values where only one is missing. If there is only one missing, then there is only one cell where adding the value would not cause a duplicate in a row or column. Fill in those cells if they exist.""" if verbose: print("* check_values") board = get_filled_cells(_board) new_cells = Set() value_clans = partition.partition(board, partial(by_key, 'value')) for value_clan in _sort(value_clans, key=partial(by_clan_key, 'value')): # If there is only 1 missing value..fill in the cell if value_clan.cardinality == GRID_SIZE-1: # Get the set of rows and cols containing value occupied_rows = project(value_clan, 'row') occupied_cols = project(value_clan, 'col') # Get the entire set of rows and cols based on the occupied rows and cols occupied = clans.superstrict(_board, sets.union(occupied_rows, occupied_cols)) # Remove all occupied rows to get the only candidate row_col left row_col = sets.minus(_board, occupied) value = project(value_clan, 'value') new_cells = sets.union(new_cells, clans.cross_union(row_col, value)) if new_cells: return get_new_board(_board, new_cells) return _board
def check_values(_board): """Look for values where only one is missing. If there is only one missing, then there is only one cell where adding the value would not cause a duplicate in a row or column. Fill in those cells if they exist.""" if VERBOSE: print("* check_values") board = get_filled_cells(_board) new_cells = Set() value_clans = partition.partition(board, partial(by_key, 'value')) for value_clan in _SORT(value_clans, key=partial(by_clan_key, 'value')): # If there is only 1 missing value..fill in the cell if value_clan.cardinality == GRID_SIZE - 1: # Get the set of rows and cols containing value occupied_rows = project(value_clan, 'row') occupied_cols = project(value_clan, 'col') # Get the entire set of rows and cols based on the occupied rows and cols occupied = clans.superstrict(_board, sets.union(occupied_rows, occupied_cols)) # Remove all occupied rows to get the only candidate row_col left row_col = sets.minus(_board, occupied) value = project(value_clan, 'value') new_cells = sets.union(new_cells, clans.cross_union(row_col, value)) if new_cells: return get_new_board(_board, new_cells) return _board
def check_blocks(_board): """Check each block. If there is only one value missing...""" if verbose: print("* check_blocks") board = get_filled_cells(_board) blocks = partition.partition(board, partial(by_keys, 'band', 'stack')) for block_clan in _sort(blocks, key=partial(by_clan_keys, 'band', 'stack')): # Get the set of missing values...see if any can be placed due to row/col information values_clan = get_missing_values(block_clan) # Get the set of missing values...see if any can be placed due to row/col information target_rowcols = get_missing_rowcols(block_clan) if block_clan.cardinality == GRID_SIZE-1: new_cells = clans.cross_union(target_rowcols, values_clan) _board = get_new_board(_board, new_cells) continue # Need cross union values with rows rows_clan = project(target_rowcols, 'row') cols_clan = project(target_rowcols, 'col') possible_rows_values = clans.cross_union(values_clan, rows_clan) possible_cols_values = clans.cross_union(values_clan, cols_clan) possible_rows_cols_values = sets.union(possible_rows_values, possible_cols_values) # The occupied_clan is the row/col/value set that is a conflict for values occupied_clan = project(clans.superstrict(board, possible_rows_cols_values), 'value', 'row', 'col') # If there are no conflicts then no cells can be placed if occupied_clan.is_empty: continue all_possible = clans.cross_union(values_clan, target_rowcols).cache_is_left_functional(True) for rel in all_possible: rel.cache_is_left_functional(True) # Get the set of conflicts...conflicting row/value + col/value conflict = sets.union( clans.superstrict(all_possible, project(occupied_clan, 'value', 'col')), clans.superstrict(all_possible, project(occupied_clan, 'value', 'row'))) # Remove the conflicts from all_possible new_possible = sets.minus(all_possible, conflict) if block_clan.cardinality == GRID_SIZE-2: # Knowing that the value in conflict can't be placed in the conflict cell # ..it must go in the other... first_choice = clans.superstrict(new_possible, project(conflict, 'value')) if first_choice.cardinality == 2: # place both values _board = get_new_board(_board, first_choice) continue # Remove the first choice for all_possible remaining_possible = sets.minus(new_possible, first_choice) # Knowing that first_choice goes in a row/col, remove other value from that cell first_rowcol = project(first_choice, 'row', 'col') # The remaining cell is the second choice second_choice = sets.minus(remaining_possible, clans.superstrict(remaining_possible, first_rowcol)) new_cells = sets.union(first_choice, second_choice) _board = get_new_board(_board, new_cells) continue # Partition by value candidates = partition.partition(new_possible, partial(by_key, 'value')) for candidate in _sort(candidates, key=partial(by_clan_key, 'value')): # If any value fits in only 1 cell, place it if candidate.cardinality == 1: # Remove band/stack new_cell = project(candidate, 'row', 'col', 'value') _board = get_new_board(_board, new_cell) return _board
def check_rows(_board): """Look for rows where there is only one missing value. If any are found fill in the missing value. Look for rows where there are two missing values. If either missing value is blocked by the same value in the candidate row, col, or block then the other value can be placed in the blocked cell. The other value can be placed in the other cell. Look for rows with more than two missing values. Check each empty cell to see only one of the missing values can be placed in it. Check each value to see if there is only one cell where it can be placed.""" if verbose: print("* check_rows") board = get_filled_cells(_board) all_rows_clans = partition.partition(board, partial(by_key, 'row')) for row_clan in _sort(all_rows_clans, key=partial(by_clan_key, 'row')): row = project(row_clan, 'row') board_row = clans.superstrict(_board, row) values_clan = get_missing_values(row_clan) if row_clan.cardinality == GRID_SIZE-1: # Row is missing only 1 value, remove row_clan from the board leaving target row_col row_col = sets.minus(board_row, row_clan) new_cells = clans.cross_union(row_col, values_clan) _board = get_new_board(_board, new_cells) continue # Get the set of candidate col/value pairs row_possible = clans.cross_union(values_clan, project(sets.minus(board_row, row_clan), 'col')) if row_clan.cardinality == GRID_SIZE-2: # The occupied_clan is the col/value pair that is a conflict for each col/value occupied_clan = project(clans.superstrict(board, row_possible), 'col', 'value') # If there are no conflicts neither value can be placed without checking entire board if not occupied_clan.is_empty: # ..remove occupied_clan col/value pairs from all possible new_possible = sets.minus(row_possible, occupied_clan) if new_possible.cardinality == 2: # Of the 4 possibilities (2 values * 2 cols), 2 were removed, place remaining new_cells = clans.cross_union(row, new_possible) _board = get_new_board(_board, new_cells) continue # 3 of the possibilities remain... occupied_col = project(occupied_clan, 'col') # Remove the occupied_col choices to get the first col/value pair col_value1 = clans.superstrict(new_possible, occupied_col) occupied_val = project(col_value1, 'value') # Remove the occupied_val choices to get the second col/value pair col_value2 = sets.minus(new_possible, clans.superstrict(new_possible, occupied_val)) new_cells = clans.cross_union(row, col_value1) new_cells = sets.union(new_cells, clans.cross_union(row, col_value2)) _board = get_new_board(_board, new_cells) continue # The occupied_clan is the row/col/value set that could be a conflict for values occupied_clan = clans.superstrict(board, values_clan) # If there are no conflicts then no cells can be placed if occupied_clan.is_empty: continue # Add row to row_possible for remaining checks all_possible = clans.cross_union(row_possible, row) # Get the set of conflicts...conflicting row/value + col/value conflict = sets.union( clans.superstrict(all_possible, project(occupied_clan, 'value', 'col')), clans.superstrict(all_possible, project(occupied_clan, 'value', 'row'))) # Remove the conflicts from all_possible new_possible = sets.minus(all_possible, conflict) if new_possible.is_empty: continue # All possible may have been excluded due to row/col conflicts # Otherwise...need to check for block (band+stack) conflicts too!! # ...if value exists in same block as element of all_possible # Add band/stack new_targets = clans.superstrict(BANDS_STACKS, project(new_possible, 'row', 'col')) new_possible3 = clans.functional_cross_union(new_targets, new_possible) occupied_clan2 = occupied_clan # Remove block (band+stack) conflicts new_possible4a = sets.minus(project(new_possible3, 'value', 'band', 'stack'), project(occupied_clan2, 'value', 'band', 'stack')) new_possible4 = clans.superstrict(new_possible3, new_possible4a) # Partition by row/col placed = 0 candidates = partition.partition(new_possible4, partial(by_keys, 'row', 'col')) for candidate in _sort(candidates, key=partial(by_clan_key, 'col')): # If any row/col has only 1 candidate, place it if candidate.cardinality == 1: # Remove band/stack cell = project(candidate, 'row', 'col', 'value') _board = get_new_board(_board, cell) placed += 1 if placed: continue # Partition by value candidates = partition.partition(new_possible4, partial(by_key, 'value')) for candidate in _sort(candidates, key=partial(by_clan_key, 'value')): # If any value fits in only 1 cell, place it if candidate.cardinality == 1: # Remove band/stack cell = project(candidate, 'row', 'col', 'value') _board = get_new_board(_board, cell) return _board
def query5(): # select # nationname, # sum(lineitem.extendedprice * (1 - lineitem.discount)) as revenue # from # customer, orders, lineitem, -- Loaded from CSV # nation, region -- Loaded from XML # where # customer.custkey = orders.custkey # and lineitem.orderkey = orders.orderkey # and customer.nationkey = nation.nationkey # and supplier.nationkey = nation.nationkey # and nation.regionkey = region.regionkey # and region.name = 'AMERICA' # and orders.orderdate >= date '1996-01-01' # and orders.orderdate < date '1996-01-01' + interval '1' year # group by # n_name timer = FunctionTimer() short_prints = True # Join supplier_solutions and customers_nations_projected on 'nationkey'. result1 = clans.cross_functional_union( get_supplier_solutions(), get_customers_nations_projected(get_nations(region_name))) timer.lap('result1', short=short_prints) # Join result1 with orders_restricted_projected on 'custkey'. result2 = clans.cross_functional_union( result1, get_orders_restricted_projected(start_date, end_date)) timer.lap('result2', short=short_prints) # Join result with lineitem on 'orderkey' and 'suppkey'. lineitem_types = { 'orderkey': int, 'suppkey': int, 'extendedprice': float, 'discount': float, 'partkey': int, 'linenumber': int, 'quantity': int, 'tax': float, } result3 = clans.cross_functional_union( result2, csv.import_csv('lineitem.csv', lineitem_types)) timer.lap('result3', short=short_prints) # Add the 'revenue' column. def calc_revenue(rel): return Couplet( 'revenue', rel('extendedprice').value * (1 - rel('discount').value)) result4 = Set( relations.functional_add(rel, calc_revenue(rel)) for rel in result3) timer.lap('result4', short=short_prints) # Remove unnecessary columns. revenue_by_nations = clans.project(result4, 'revenue', 'nationname') # Partition the result on 'nationname'. revenue_grouped_by_nations = partition.partition( revenue_by_nations, lambda rel: rel('nationname')) timer.lap('revenue_grouped_by_nations', short=short_prints) # Almost generic aggregation function. (Handles 'normal' cases, but not all edge cases.) def aggregate(horde, group_left, aggregation_left, aggregate_func): aggregation = {} for clan in horde: aggregation_value = aggregate_func.identity for relation in clan: aggregation_value = aggregate_func( aggregation_value, relation(aggregation_left).value) first_relation = next(iter(clan)) aggregation[first_relation(group_left)] = aggregation_value return Set([ Set(Couplet(group_left, key), Couplet(aggregation_left, aggregation[key])) for key in aggregation ]) # Our aggregation function (adding two numbers, identity is 0). def aggregate_sum(arg1, arg2): return arg1 + arg2 aggregate_sum.identity = 0 # Calculate the aggregation result. # noinspection PyTypeChecker query5_result = aggregate(revenue_grouped_by_nations, 'nationname', 'revenue', aggregate_sum) timer.end('query5_result') return query5_result
def check_rows(_board, try_harder=0): """Look for rows where there is only one missing value. If any are found fill in the missing value. Look for rows where there are two missing values. If either missing value is blocked by the same value in the candidate row, col, or block then the other value can be placed in the blocked cell. The other value can be placed in the other cell. Look for rows with more than two missing values. Check each empty cell to see only one of the missing values can be placed in it. Check each value to see if there is only one cell where it can be placed.""" if VERBOSE: print("* check_rows") board = get_filled_cells(_board) all_rows_clans = partition.partition(board, partial(by_key, 'row')) for row_clan in _SORT(all_rows_clans, key=partial(by_clan_key, 'row')): row = project(row_clan, 'row') board_row = clans.superstrict(_board, row) values_clan = get_missing_values(row_clan) if row_clan.cardinality == GRID_SIZE - 1: # Row is missing only 1 value, remove row_clan from the board leaving target row_col row_col = sets.minus(board_row, row_clan) new_cells = clans.cross_union(row_col, values_clan) _board = get_new_board(_board, new_cells) try_harder = 0 continue # Get the set of candidate col/value pairs row_possible = clans.cross_union(values_clan, project(sets.minus(board_row, row_clan), 'col')) if row_clan.cardinality == GRID_SIZE - 2: # The occupied_clan is the col/value pair that is a conflict for each col/value occupied_clan = project(clans.superstrict(board, row_possible), 'col', 'value') # If there are no conflicts neither value can be placed without checking entire board if not occupied_clan.is_empty: # ..remove occupied_clan col/value pairs from all possible new_possible = sets.minus(row_possible, occupied_clan) if new_possible.cardinality == 2: # Of the 4 possibilities (2 values * 2 cols), 2 were removed, place remaining new_cells = clans.cross_union(row, new_possible) _board = get_new_board(_board, new_cells) try_harder = 0 continue # 3 of the possibilities remain... occupied_col = project(occupied_clan, 'col') # Remove the occupied_col choices to get the first col/value pair col_value1 = clans.superstrict(new_possible, occupied_col) occupied_val = project(col_value1, 'value') # Remove the occupied_val choices to get the second col/value pair col_value2 = sets.minus(new_possible, clans.superstrict(new_possible, occupied_val)) new_cells = clans.cross_union(row, col_value1) new_cells = sets.union(new_cells, clans.cross_union(row, col_value2)) _board = get_new_board(_board, new_cells) try_harder = 0 continue # The occupied_clan is the row/col/value set that could be a conflict for values occupied_clan = clans.superstrict(board, values_clan) # If there are no conflicts then no cells can be placed if occupied_clan.is_empty: continue # Add row to row_possible for remaining checks all_possible = clans.cross_union(row_possible, row) # Get the set of conflicts...conflicting row/value + col/value conflict = sets.union( clans.superstrict(all_possible, project(occupied_clan, 'value', 'col')), clans.superstrict(all_possible, project(occupied_clan, 'value', 'row'))) # Remove the conflicts from all_possible new_possible = sets.minus(all_possible, conflict) if new_possible.is_empty: continue # All possible may have been excluded due to row/col conflicts # Otherwise...need to check for block (band+stack) conflicts too!! # ...if value exists in same block as element of all_possible # Add band/stack new_targets = clans.superstrict(BANDS_STACKS, project(new_possible, 'row', 'col')) new_possible3 = clans.cross_functional_union(new_targets, new_possible) occupied_clan2 = occupied_clan # Remove block (band+stack) conflicts new_possible4a = sets.minus(project(new_possible3, 'value', 'band', 'stack'), project(occupied_clan2, 'value', 'band', 'stack')) new_possible4 = clans.superstrict(new_possible3, new_possible4a) while True: candidates_updated = False # Partition by row/col placed = 0 candidates = partition.partition(new_possible4, partial(by_keys, 'row', 'col')) for candidate in _SORT(candidates, key=partial(by_clan_key, 'col')): # If any row/col has only 1 candidate, place it if candidate.cardinality == 1: # Remove band/stack _board = get_new_board(_board, candidate) try_harder = 0 placed += 1 if placed: break # Partition by value candidates = partition.partition(new_possible4, partial(by_key, 'value')) for candidate in _SORT(candidates, key=partial(by_clan_key, 'value')): # If any value fits in only 1 cell, place it if candidate.cardinality == 1: # Remove band/stack _board = get_new_board(_board, candidate) try_harder = 0 else: # If any value must be placed elsewhere, remove as candidate for this cell if try_harder: value = project(candidate, 'value') # If this row of a sibling block must contain this value... blocks = partition.partition(candidate, partial(by_keys, 'band', 'stack')) if blocks.cardinality > 1: for block_clan in _SORT(blocks, key=partial(by_clan_keys, 'band', 'stack')): block = project(block_clan, 'band', 'stack') board_block = clans.superstrict(board, block) if board_block.is_empty: continue new_possible, conflict = get_block_candidates(board_block, board) new_possible_value = clans.superstrict(new_possible, value) if new_possible_value['row'].cardinality == 1: # Value must be placed in this block # ...other block candidates can be removed remove = sets.minus(candidate, block_clan) new_possible4 = sets.minus(new_possible4, remove) candidates_updated = True if not candidates_updated or not try_harder: break return _board
def query5(): # select # nationname, # sum(lineitem.extendedprice * (1 - lineitem.discount)) as revenue # from # customer, orders, lineitem, -- Loaded from CSV # nation, region -- Loaded from XML # where # customer.custkey = orders.custkey # and lineitem.orderkey = orders.orderkey # and customer.nationkey = nation.nationkey # and supplier.nationkey = nation.nationkey # and nation.regionkey = region.regionkey # and region.name = 'AMERICA' # and orders.orderdate >= date '1996-01-01' # and orders.orderdate < date '1996-01-01' + interval '1' year # group by # n_name timer = FunctionTimer() short_prints = True # Join supplier_solutions and customers_nations_projected on 'nationkey'. result1 = clans.functional_cross_union( get_supplier_solutions(), get_customers_nations_projected(get_nations(region_name)) ) timer.lap("result1", short=short_prints) # Join result1 with orders_restricted_projected on 'custkey'. result2 = clans.functional_cross_union(result1, get_orders_restricted_projected(start_date, end_date)) timer.lap("result2", short=short_prints) # Join result with lineitem on 'orderkey' and 'suppkey'. lineitem_types = { "orderkey": int, "suppkey": int, "extendedprice": float, "discount": float, "partkey": int, "linenumber": int, "quantity": int, "tax": float, } result3 = clans.functional_cross_union(result2, csv.import_csv("lineitem.csv", lineitem_types)) timer.lap("result3", short=short_prints) # Add the 'revenue' column. def calc_revenue(rel): return Couplet("revenue", rel("extendedprice").value * (1 - rel("discount").value)) result4 = Set(relations.functional_add(rel, calc_revenue(rel)) for rel in result3) timer.lap("result4", short=short_prints) # Remove unnecessary columns. revenue_by_nations = clans.project(result4, "revenue", "nationname") # Partition the result on 'nationname'. revenue_grouped_by_nations = partition.partition(revenue_by_nations, lambda rel: rel("nationname")) timer.lap("revenue_grouped_by_nations", short=short_prints) # Almost generic aggregation function. (Handles 'normal' cases, but not all edge cases.) def aggregate(horde, group_left, aggregation_left, aggregate_func): aggregation = {} for clan in horde: aggregation_value = aggregate_func.identity for relation in clan: aggregation_value = aggregate_func(aggregation_value, relation(aggregation_left).value) first_relation = next(iter(clan)) aggregation[first_relation(group_left)] = aggregation_value return Set([Set(Couplet(group_left, key), Couplet(aggregation_left, aggregation[key])) for key in aggregation]) # Our aggregation function (adding two numbers, identity is 0). def aggregate_sum(arg1, arg2): return arg1 + arg2 aggregate_sum.identity = 0 # Calculate the aggregation result. # noinspection PyTypeChecker query5_result = aggregate(revenue_grouped_by_nations, "nationname", "revenue", aggregate_sum) timer.end("query5_result") return query5_result