Exemplo n.º 1
0
def join(rdf_solution1, rdf_solution2, *rdf_solutions):
    """Return the :term:`cross-functional union` of all arguments.

    This is a thin wrapper around :func:`.clans.cross_functional_union`.
    """
    result = _clans.cross_functional_union(rdf_solution1, rdf_solution2)
    for sln in rdf_solutions:
        result = _clans.cross_functional_union(result, sln)
    return result
Exemplo n.º 2
0
def join(rdf_solution1, rdf_solution2, *rdf_solutions):
    """Return the :term:`cross-functional union` of all arguments.

    This is a thin wrapper around :func:`.clans.cross_functional_union`.
    """
    result = _clans.cross_functional_union(rdf_solution1, rdf_solution2)
    for sln in rdf_solutions:
        result = _clans.cross_functional_union(result, sln)
    return result
Exemplo n.º 3
0
def get_customers_nations_projected(nations):
    """Execute the equivalent of the following SQL query, querying the CSV file customer.csv:
        SELECT
            custkey, nationkey, nationname
        FROM
            customer
        JOIN
            nations
        ON
            customer.nationkey = nations.nationkey
    """
    timer = FunctionTimer()
    short_prints = True

    customer_types = {'custkey': int, 'nationkey': int, 'acctbal': float}
    customers = csv.import_csv('customer.csv', customer_types)
    timer.lap('customers', short=short_prints)

    customers_nations = clans.cross_functional_union(customers, nations)
    timer.lap('customers_nations', short=short_prints)

    customers_nations_projected = clans.project(customers_nations, 'custkey',
                                                'nationkey', 'nationname')
    timer.end('customers_nations_projected', short=short_prints)

    return customers_nations_projected
Exemplo n.º 4
0
def get_customers_nations_projected(nations):
    """Execute the equivalent of the following SQL query, querying the CSV file customer.csv:
        SELECT
            custkey, nationkey, nationname
        FROM
            customer
        JOIN
            nations
        ON
            customer.nationkey = nations.nationkey
    """
    timer = FunctionTimer()
    short_prints = True

    customer_types = {'custkey': int, 'nationkey': int, 'acctbal': float}
    customers = csv.import_csv('customer.csv', customer_types)
    timer.lap('customers', short=short_prints)

    customers_nations = clans.cross_functional_union(customers, nations)
    timer.lap('customers_nations', short=short_prints)

    customers_nations_projected = clans.project(customers_nations,
                                                'custkey', 'nationkey', 'nationname')
    timer.end('customers_nations_projected', short=short_prints)

    return customers_nations_projected
Exemplo n.º 5
0
def get_supplier_solutions():
    """Execute the equivalent of the following SPARQL query, querying the Turtle file supplier.ttl:
        SELECT
            ?suppkey, ?nationkey
        FROM
            supplier
        WHERE {
            ?supplier <tpch:suppkey> ?suppkey .
            ?supplier <tpch:nationkey> ?nationkey .
        }
    """
    timer = FunctionTimer()
    short_prints = True

    suppliers = rdf.import_graph('supplier.ttl')
    timer.lap('suppliers', short=short_prints)

    # Find all triples that define a 'suppkey' (as predicate).
    bgp_suppkey_matches = clans.superstrict(
        suppliers, clans.from_dict({'p': rdflib.URIRef('tpch:suppkey')}))
    # Give the subject a name for later joining and object the name we need in the output.
    bgp_suppkey = clans.compose(
        bgp_suppkey_matches, clans.from_dict({
            'supplier': 's',
            'suppkey': 'o'
        }))

    # Find all triples that define a 'nationkey' (as predicate) and give the subject a name for
    # later joining and object the name we need in the output.
    bgp_nationkey = clans.compose(
        clans.superstrict(
            suppliers, clans.from_dict({'p':
                                        rdflib.URIRef('tpch:nationkey')})),
        clans.from_dict({
            'supplier': 's',
            'nationkey': 'o'
        }))

    # Join the previous results on 'supplier' and project the columns we need.
    supplier_solutions = clans.project(
        clans.cross_functional_union(bgp_suppkey, bgp_nationkey), 'nationkey',
        'suppkey')
    timer.end('supplier_solutions', short=short_prints)

    return supplier_solutions
Exemplo n.º 6
0
def get_new_board(board, new_cells):
    if VERBOSE:
        for cell in new_cells:
            row = cell('row').value
            col = cell('col').value
            value = cell('value').value
            print("*** value %d goes in Row %d, Col %d" % (value, row, col))

    cell_filter = project(new_cells, 'row', 'col')
    old_cells = clans.superstrict(board, cell_filter)
    new_board = sets.minus(board, old_cells)

    band = new_cells['band']
    if not band:
        # print("missing band")
        bands_stacks = clans.superstrict(BANDS_STACKS, cell_filter)
        new_cells = clans.cross_functional_union(new_cells, bands_stacks)

    new_board = sets.union(new_board, new_cells)
    # if VERBOSE:
    #     print(get_string(new_board))
    assert len(new_board) == GRID_SIZE * GRID_SIZE
    return new_board
Exemplo n.º 7
0
def get_new_board(board, new_cells):
    if VERBOSE:
        for cell in new_cells:
            row = cell('row').value
            col = cell('col').value
            value = cell('value').value
            print("*** value %d goes in Row %d, Col %d" % (value, row, col))

    cell_filter = project(new_cells, 'row', 'col')
    old_cells = clans.superstrict(board, cell_filter)
    new_board = sets.minus(board, old_cells)

    band = new_cells['band']
    if not band:
        # print("missing band")
        bands_stacks = clans.superstrict(BANDS_STACKS, cell_filter)
        new_cells = clans.cross_functional_union(new_cells, bands_stacks)

    new_board = sets.union(new_board, new_cells)
    # if VERBOSE:
    #     print(get_string(new_board))
    assert len(new_board) == GRID_SIZE * GRID_SIZE
    return new_board
Exemplo n.º 8
0
def get_supplier_solutions():
    """Execute the equivalent of the following SPARQL query, querying the Turtle file supplier.ttl:
        SELECT
            ?suppkey, ?nationkey
        FROM
            supplier
        WHERE {
            ?supplier <tpch:suppkey> ?suppkey .
            ?supplier <tpch:nationkey> ?nationkey .
        }
    """
    timer = FunctionTimer()
    short_prints = True

    suppliers = rdf.import_graph('supplier.ttl')
    timer.lap('suppliers', short=short_prints)

    # Find all triples that define a 'suppkey' (as predicate).
    bgp_suppkey_matches = clans.superstrict(
        suppliers, clans.from_dict({'p': rdflib.URIRef('tpch:suppkey')}))
    # Give the subject a name for later joining and object the name we need in the output.
    bgp_suppkey = clans.compose(
        bgp_suppkey_matches, clans.from_dict({'supplier': 's', 'suppkey': 'o'}))

    # Find all triples that define a 'nationkey' (as predicate) and give the subject a name for
    # later joining and object the name we need in the output.
    bgp_nationkey = clans.compose(
        clans.superstrict(suppliers, clans.from_dict({'p': rdflib.URIRef('tpch:nationkey')})),
        clans.from_dict({'supplier': 's', 'nationkey': 'o'}))

    # Join the previous results on 'supplier' and project the columns we need.
    supplier_solutions = clans.project(
        clans.cross_functional_union(bgp_suppkey, bgp_nationkey), 'nationkey', 'suppkey')
    timer.end('supplier_solutions', short=short_prints)

    return supplier_solutions
Exemplo n.º 9
0
def query5():
    # select
    #     nationname,
    #     sum(lineitem.extendedprice * (1 - lineitem.discount)) as revenue
    # from
    #     customer, orders, lineitem,   -- Loaded from CSV
    #     nation, region                -- Loaded from XML
    # where
    #     customer.custkey = orders.custkey
    #     and lineitem.orderkey = orders.orderkey
    #     and customer.nationkey = nation.nationkey
    #     and supplier.nationkey = nation.nationkey
    #     and nation.regionkey = region.regionkey
    #     and region.name = 'AMERICA'
    #     and orders.orderdate >= date '1996-01-01'
    #     and orders.orderdate < date '1996-01-01' + interval '1' year
    # group by
    #     n_name
    timer = FunctionTimer()
    short_prints = True

    # Join supplier_solutions and customers_nations_projected on 'nationkey'.
    result1 = clans.cross_functional_union(
        get_supplier_solutions(),
        get_customers_nations_projected(get_nations(region_name)))
    timer.lap('result1', short=short_prints)

    # Join result1 with orders_restricted_projected on 'custkey'.
    result2 = clans.cross_functional_union(
        result1, get_orders_restricted_projected(start_date, end_date))
    timer.lap('result2', short=short_prints)

    # Join result with lineitem on 'orderkey' and 'suppkey'.
    lineitem_types = {
        'orderkey': int,
        'suppkey': int,
        'extendedprice': float,
        'discount': float,
        'partkey': int,
        'linenumber': int,
        'quantity': int,
        'tax': float,
    }
    result3 = clans.cross_functional_union(
        result2, csv.import_csv('lineitem.csv', lineitem_types))
    timer.lap('result3', short=short_prints)

    # Add the 'revenue' column.
    def calc_revenue(rel):
        return Couplet(
            'revenue',
            rel('extendedprice').value * (1 - rel('discount').value))

    result4 = Set(
        relations.functional_add(rel, calc_revenue(rel)) for rel in result3)
    timer.lap('result4', short=short_prints)
    # Remove unnecessary columns.
    revenue_by_nations = clans.project(result4, 'revenue', 'nationname')

    # Partition the result on 'nationname'.
    revenue_grouped_by_nations = partition.partition(
        revenue_by_nations, lambda rel: rel('nationname'))
    timer.lap('revenue_grouped_by_nations', short=short_prints)

    # Almost generic aggregation function. (Handles 'normal' cases, but not all edge cases.)
    def aggregate(horde, group_left, aggregation_left, aggregate_func):
        aggregation = {}
        for clan in horde:
            aggregation_value = aggregate_func.identity
            for relation in clan:
                aggregation_value = aggregate_func(
                    aggregation_value,
                    relation(aggregation_left).value)
            first_relation = next(iter(clan))
            aggregation[first_relation(group_left)] = aggregation_value
        return Set([
            Set(Couplet(group_left, key),
                Couplet(aggregation_left, aggregation[key]))
            for key in aggregation
        ])

    # Our aggregation function (adding two numbers, identity is 0).
    def aggregate_sum(arg1, arg2):
        return arg1 + arg2

    aggregate_sum.identity = 0

    # Calculate the aggregation result.
    # noinspection PyTypeChecker
    query5_result = aggregate(revenue_grouped_by_nations, 'nationname',
                              'revenue', aggregate_sum)
    timer.end('query5_result')

    return query5_result
Exemplo n.º 10
0
# Import and print the input graph.
graph_algebra = import_graph(io.StringIO(sample_graph), rdf_format='turtle')
if print_examples:
    print('Input graph:', sample_graph)
    print('Input graph (as MathObject):', graph_algebra)

# Query the imported graph using general pattern matching APIs.
names = match_and_project(graph_algebra, {'p': rdflib.URIRef('rdf:name')}, {
    's': '?eng',
    'o': '?name'
})
engineers = match_and_project(graph_algebra, {
    'p': rdflib.URIRef('rdf:type'),
    'o': rdflib.URIRef('cat:engineer')
}, {'s': '?eng'})
engs_and_names = clans.cross_functional_union(names, engineers)

if print_examples:
    print('Engineers and their names:', engs_and_names)

# Present results.
if show_results_as_webpage:
    descriptors = [
        HtmlDesc('Input Graph', graph_algebra,
                 'The RDF graph source:' + sample_graph),
        HtmlDesc('Result', engs_and_names, 'Only engineers with names.')
    ]
    html = math_object_as_html('Simple Pattern Match Example', descriptors)
    open_webpage_from_html_str(html)
Exemplo n.º 11
0
# Our next task will be to relate these clans to each other in a way that preserves the functional
# characteristic of every relation. We can define a partial binary operation
# functional_union(A, B) on relations to be union(A, B) if union(A, B) is left functional else
# undefined.
func_union_pos = relations.functional_union(
    hello_relation, Set(Couplet('language', 'English'),
                        Couplet('more', 'info')))
func_union_neg = relations.functional_union(
    hello_relation, Set(Couplet('language', 'Spanish'),
                        Couplet('more', 'info')))
print(func_union_pos)
print(func_union_neg)

# Lifting this operation to clans models natural join-like behavior.
salutations_words_langs_clan = clans.cross_functional_union(
    salutations_n_langs_clan, earths_n_langs_clan)
print("salutations_words_langs_clan:", salutations_words_langs_clan)

# Now that the clans have been related to each other through their language attributes, we can
# do another projection. Notice how the "renaming" of 'word' to 'salutation' and 'earth' allows us
# to distinguish each of the words' meaning after joining the clans.
salutations_n_words_clan = clans.compose(salutations_words_langs_clan,
                                         clans.diag('salutation', 'earth'))
print("result")
print("salutations_n_words_clan:", salutations_n_words_clan)

# Finally, we will distill this data down to a single relation describing "Hello, World" phrases.
greeting_relation = Set(
    Couplet(rel('earth'), rel('salutation'))
    for rel in salutations_n_words_clan)
print("Greetings:", greeting_relation)
Exemplo n.º 12
0
def check_rows(_board, try_harder=0):
    """Look for rows where there is only one missing value.  If any are found fill in the missing
    value.  Look for rows where there are two missing values.  If either missing value is blocked
    by the same value in the candidate row, col, or block then the other value can be placed in
    the blocked cell.  The other value can be placed in the other cell.  Look for rows with more
    than two missing values.  Check each empty cell to see only one of the missing values can be
    placed in it.  Check each value to see if there is only one cell where it can be placed."""
    if VERBOSE:
        print("* check_rows")
    board = get_filled_cells(_board)

    all_rows_clans = partition.partition(board, partial(by_key, 'row'))
    for row_clan in _SORT(all_rows_clans, key=partial(by_clan_key, 'row')):
        row = project(row_clan, 'row')
        board_row = clans.superstrict(_board, row)
        values_clan = get_missing_values(row_clan)

        if row_clan.cardinality == GRID_SIZE - 1:
            # Row is missing only 1 value, remove row_clan from the board leaving target row_col
            row_col = sets.minus(board_row, row_clan)
            new_cells = clans.cross_union(row_col, values_clan)
            _board = get_new_board(_board, new_cells)
            try_harder = 0
            continue

        # Get the set of candidate col/value pairs
        row_possible = clans.cross_union(values_clan,
                                         project(sets.minus(board_row, row_clan), 'col'))

        if row_clan.cardinality == GRID_SIZE - 2:

            # The occupied_clan is the col/value pair that is a conflict for each col/value
            occupied_clan = project(clans.superstrict(board, row_possible), 'col', 'value')

            # If there are no conflicts neither value can be placed without checking entire board
            if not occupied_clan.is_empty:
                # ..remove occupied_clan col/value pairs from all possible
                new_possible = sets.minus(row_possible, occupied_clan)

                if new_possible.cardinality == 2:
                    # Of the 4 possibilities (2 values * 2 cols), 2 were removed, place remaining
                    new_cells = clans.cross_union(row, new_possible)
                    _board = get_new_board(_board, new_cells)
                    try_harder = 0
                    continue

                # 3 of the possibilities remain...
                occupied_col = project(occupied_clan, 'col')

                # Remove the occupied_col choices to get the first col/value pair
                col_value1 = clans.superstrict(new_possible, occupied_col)

                occupied_val = project(col_value1, 'value')

                # Remove the occupied_val choices to get the second col/value pair
                col_value2 = sets.minus(new_possible, clans.superstrict(new_possible, occupied_val))

                new_cells = clans.cross_union(row, col_value1)
                new_cells = sets.union(new_cells, clans.cross_union(row, col_value2))
                _board = get_new_board(_board, new_cells)
                try_harder = 0
                continue

        # The occupied_clan is the row/col/value set that could be a conflict for values
        occupied_clan = clans.superstrict(board, values_clan)

        # If there are no conflicts then no cells can be placed
        if occupied_clan.is_empty:
            continue

        # Add row to row_possible for remaining checks
        all_possible = clans.cross_union(row_possible, row)

        # Get the set of conflicts...conflicting row/value + col/value
        conflict = sets.union(
            clans.superstrict(all_possible,
                              project(occupied_clan, 'value', 'col')),
            clans.superstrict(all_possible,
                              project(occupied_clan, 'value', 'row')))

        # Remove the conflicts from all_possible
        new_possible = sets.minus(all_possible, conflict)

        if new_possible.is_empty:
            continue  # All possible may have been excluded due to row/col conflicts

        # Otherwise...need to check for block (band+stack) conflicts too!!
        # ...if value exists in same block as element of all_possible

        # Add band/stack
        new_targets = clans.superstrict(BANDS_STACKS, project(new_possible, 'row', 'col'))
        new_possible3 = clans.cross_functional_union(new_targets, new_possible)
        occupied_clan2 = occupied_clan

        # Remove block (band+stack) conflicts
        new_possible4a = sets.minus(project(new_possible3, 'value', 'band', 'stack'),
                                    project(occupied_clan2, 'value', 'band', 'stack'))
        new_possible4 = clans.superstrict(new_possible3, new_possible4a)

        while True:
            candidates_updated = False
            # Partition by row/col
            placed = 0
            candidates = partition.partition(new_possible4, partial(by_keys, 'row', 'col'))
            for candidate in _SORT(candidates, key=partial(by_clan_key, 'col')):
                # If any row/col has only 1 candidate, place it
                if candidate.cardinality == 1:
                    # Remove band/stack
                    _board = get_new_board(_board, candidate)
                    try_harder = 0
                    placed += 1
            if placed:
                break

            # Partition by value
            candidates = partition.partition(new_possible4, partial(by_key, 'value'))
            for candidate in _SORT(candidates, key=partial(by_clan_key, 'value')):
                # If any value fits in only 1 cell, place it
                if candidate.cardinality == 1:
                    # Remove band/stack
                    _board = get_new_board(_board, candidate)
                    try_harder = 0
                else:  # If any value must be placed elsewhere, remove as candidate for this cell
                    if try_harder:
                        value = project(candidate, 'value')
                        # If this row of a sibling block must contain this value...
                        blocks = partition.partition(candidate, partial(by_keys, 'band', 'stack'))
                        if blocks.cardinality > 1:
                            for block_clan in _SORT(blocks,
                                                    key=partial(by_clan_keys, 'band', 'stack')):
                                block = project(block_clan, 'band', 'stack')
                                board_block = clans.superstrict(board, block)
                                if board_block.is_empty:
                                    continue

                                new_possible, conflict = get_block_candidates(board_block, board)
                                new_possible_value = clans.superstrict(new_possible, value)

                                if new_possible_value['row'].cardinality == 1:
                                    # Value must be placed in this block
                                    # ...other block candidates can be removed
                                    remove = sets.minus(candidate, block_clan)
                                    new_possible4 = sets.minus(new_possible4, remove)
                                    candidates_updated = True
            if not candidates_updated or not try_harder:
                break
    return _board
Exemplo n.º 13
0
def check_rows(_board, try_harder=0):
    """Look for rows where there is only one missing value.  If any are found fill in the missing
    value.  Look for rows where there are two missing values.  If either missing value is blocked
    by the same value in the candidate row, col, or block then the other value can be placed in
    the blocked cell.  The other value can be placed in the other cell.  Look for rows with more
    than two missing values.  Check each empty cell to see only one of the missing values can be
    placed in it.  Check each value to see if there is only one cell where it can be placed."""
    if VERBOSE:
        print("* check_rows")
    board = get_filled_cells(_board)

    all_rows_clans = partition.partition(board, partial(by_key, 'row'))
    for row_clan in _SORT(all_rows_clans, key=partial(by_clan_key, 'row')):
        row = project(row_clan, 'row')
        board_row = clans.superstrict(_board, row)
        values_clan = get_missing_values(row_clan)

        if row_clan.cardinality == GRID_SIZE - 1:
            # Row is missing only 1 value, remove row_clan from the board leaving target row_col
            row_col = sets.minus(board_row, row_clan)
            new_cells = clans.cross_union(row_col, values_clan)
            _board = get_new_board(_board, new_cells)
            try_harder = 0
            continue

        # Get the set of candidate col/value pairs
        row_possible = clans.cross_union(values_clan,
                                         project(sets.minus(board_row, row_clan), 'col'))

        if row_clan.cardinality == GRID_SIZE - 2:

            # The occupied_clan is the col/value pair that is a conflict for each col/value
            occupied_clan = project(clans.superstrict(board, row_possible), 'col', 'value')

            # If there are no conflicts neither value can be placed without checking entire board
            if not occupied_clan.is_empty:
                # ..remove occupied_clan col/value pairs from all possible
                new_possible = sets.minus(row_possible, occupied_clan)

                if new_possible.cardinality == 2:
                    # Of the 4 possibilities (2 values * 2 cols), 2 were removed, place remaining
                    new_cells = clans.cross_union(row, new_possible)
                    _board = get_new_board(_board, new_cells)
                    try_harder = 0
                    continue

                # 3 of the possibilities remain...
                occupied_col = project(occupied_clan, 'col')

                # Remove the occupied_col choices to get the first col/value pair
                col_value1 = clans.superstrict(new_possible, occupied_col)

                occupied_val = project(col_value1, 'value')

                # Remove the occupied_val choices to get the second col/value pair
                col_value2 = sets.minus(new_possible, clans.superstrict(new_possible, occupied_val))

                new_cells = clans.cross_union(row, col_value1)
                new_cells = sets.union(new_cells, clans.cross_union(row, col_value2))
                _board = get_new_board(_board, new_cells)
                try_harder = 0
                continue

        # The occupied_clan is the row/col/value set that could be a conflict for values
        occupied_clan = clans.superstrict(board, values_clan)

        # If there are no conflicts then no cells can be placed
        if occupied_clan.is_empty:
            continue

        # Add row to row_possible for remaining checks
        all_possible = clans.cross_union(row_possible, row)

        # Get the set of conflicts...conflicting row/value + col/value
        conflict = sets.union(
            clans.superstrict(all_possible,
                              project(occupied_clan, 'value', 'col')),
            clans.superstrict(all_possible,
                              project(occupied_clan, 'value', 'row')))

        # Remove the conflicts from all_possible
        new_possible = sets.minus(all_possible, conflict)

        if new_possible.is_empty:
            continue  # All possible may have been excluded due to row/col conflicts

        # Otherwise...need to check for block (band+stack) conflicts too!!
        # ...if value exists in same block as element of all_possible

        # Add band/stack
        new_targets = clans.superstrict(BANDS_STACKS, project(new_possible, 'row', 'col'))
        new_possible3 = clans.cross_functional_union(new_targets, new_possible)
        occupied_clan2 = occupied_clan

        # Remove block (band+stack) conflicts
        new_possible4a = sets.minus(project(new_possible3, 'value', 'band', 'stack'),
                                    project(occupied_clan2, 'value', 'band', 'stack'))
        new_possible4 = clans.superstrict(new_possible3, new_possible4a)

        while True:
            candidates_updated = False
            # Partition by row/col
            placed = 0
            candidates = partition.partition(new_possible4, partial(by_keys, 'row', 'col'))
            for candidate in _SORT(candidates, key=partial(by_clan_key, 'col')):
                # If any row/col has only 1 candidate, place it
                if candidate.cardinality == 1:
                    # Remove band/stack
                    _board = get_new_board(_board, candidate)
                    try_harder = 0
                    placed += 1
            if placed:
                break

            # Partition by value
            candidates = partition.partition(new_possible4, partial(by_key, 'value'))
            for candidate in _SORT(candidates, key=partial(by_clan_key, 'value')):
                # If any value fits in only 1 cell, place it
                if candidate.cardinality == 1:
                    # Remove band/stack
                    _board = get_new_board(_board, candidate)
                    try_harder = 0
                else:  # If any value must be placed elsewhere, remove as candidate for this cell
                    if try_harder:
                        value = project(candidate, 'value')
                        # If this row of a sibling block must contain this value...
                        blocks = partition.partition(candidate, partial(by_keys, 'band', 'stack'))
                        if blocks.cardinality > 1:
                            for block_clan in _SORT(blocks,
                                                    key=partial(by_clan_keys, 'band', 'stack')):
                                block = project(block_clan, 'band', 'stack')
                                board_block = clans.superstrict(board, block)
                                if board_block.is_empty:
                                    continue

                                new_possible, conflict = get_block_candidates(board_block, board)
                                new_possible_value = clans.superstrict(new_possible, value)

                                if new_possible_value['row'].cardinality == 1:
                                    # Value must be placed in this block
                                    # ...other block candidates can be removed
                                    remove = sets.minus(candidate, block_clan)
                                    new_possible4 = sets.minus(new_possible4, remove)
                                    candidates_updated = True
            if not candidates_updated or not try_harder:
                break
    return _board
Exemplo n.º 14
0
if print_examples:
    print('Input graph:', sample_graph)
    print('Input graph (as MathObject):', graph_algebra)


# Query the imported graph using general pattern matching APIs.
names = match_and_project(
    graph_algebra,
    {'p': rdflib.URIRef('rdf:name')},
    {'s': '?eng', 'o': '?name'}
)
engineers = match_and_project(
    graph_algebra,
    {'p': rdflib.URIRef('rdf:type'), 'o': rdflib.URIRef('cat:engineer')},
    {'s': '?eng'}
)
engs_and_names = clans.cross_functional_union(names, engineers)

if print_examples:
    print('Engineers and their names:', engs_and_names)


# Present results.
if show_results_as_webpage:
    descriptors = [
        HtmlDesc('Input Graph', graph_algebra, 'The RDF graph source:' + sample_graph),
        HtmlDesc('Result', engs_and_names, 'Only engineers with names.')
    ]
    html = math_object_as_html('Simple Pattern Match Example', descriptors)
    open_webpage_from_html_str(html)
Exemplo n.º 15
0
def query5():
    # select
    #     nationname,
    #     sum(lineitem.extendedprice * (1 - lineitem.discount)) as revenue
    # from
    #     customer, orders, lineitem,   -- Loaded from CSV
    #     nation, region                -- Loaded from XML
    # where
    #     customer.custkey = orders.custkey
    #     and lineitem.orderkey = orders.orderkey
    #     and customer.nationkey = nation.nationkey
    #     and supplier.nationkey = nation.nationkey
    #     and nation.regionkey = region.regionkey
    #     and region.name = 'AMERICA'
    #     and orders.orderdate >= date '1996-01-01'
    #     and orders.orderdate < date '1996-01-01' + interval '1' year
    # group by
    #     n_name
    timer = FunctionTimer()
    short_prints = True

    # Join supplier_solutions and customers_nations_projected on 'nationkey'.
    result1 = clans.cross_functional_union(
        get_supplier_solutions(), get_customers_nations_projected(get_nations(region_name)))
    timer.lap('result1', short=short_prints)

    # Join result1 with orders_restricted_projected on 'custkey'.
    result2 = clans.cross_functional_union(
        result1, get_orders_restricted_projected(start_date, end_date))
    timer.lap('result2', short=short_prints)

    # Join result with lineitem on 'orderkey' and 'suppkey'.
    lineitem_types = {
        'orderkey': int, 'suppkey': int, 'extendedprice': float, 'discount': float,
        'partkey': int, 'linenumber': int, 'quantity': int, 'tax': float,
    }
    result3 = clans.cross_functional_union(result2, csv.import_csv('lineitem.csv', lineitem_types))
    timer.lap('result3', short=short_prints)

    # Add the 'revenue' column.
    def calc_revenue(rel):
        return Couplet('revenue', rel('extendedprice').value * (1 - rel('discount').value))
    result4 = Set(relations.functional_add(rel, calc_revenue(rel)) for rel in result3)
    timer.lap('result4', short=short_prints)
    # Remove unnecessary columns.
    revenue_by_nations = clans.project(result4, 'revenue', 'nationname')

    # Partition the result on 'nationname'.
    revenue_grouped_by_nations = partition.partition(
        revenue_by_nations, lambda rel: rel('nationname'))
    timer.lap('revenue_grouped_by_nations', short=short_prints)

    # Almost generic aggregation function. (Handles 'normal' cases, but not all edge cases.)
    def aggregate(horde, group_left, aggregation_left, aggregate_func):
        aggregation = {}
        for clan in horde:
            aggregation_value = aggregate_func.identity
            for relation in clan:
                aggregation_value = aggregate_func(aggregation_value,
                                                   relation(aggregation_left).value)
            first_relation = next(iter(clan))
            aggregation[first_relation(group_left)] = aggregation_value
        return Set([Set(Couplet(group_left, key),
                        Couplet(aggregation_left, aggregation[key])) for key in aggregation])

    # Our aggregation function (adding two numbers, identity is 0).
    def aggregate_sum(arg1, arg2):
        return arg1 + arg2
    aggregate_sum.identity = 0

    # Calculate the aggregation result.
    # noinspection PyTypeChecker
    query5_result = aggregate(
        revenue_grouped_by_nations, 'nationname', 'revenue', aggregate_sum)
    timer.end('query5_result')

    return query5_result
Exemplo n.º 16
0
                                            Couplet("language", "language"))))
print("earths_n_langs_clan:", earths_n_langs_clan)

# Our next task will be to relate these clans to each other in a way that preserves the functional
# characteristic of every relation. We can define a partial binary operation
# functional_union(A, B) on relations to be union(A, B) if union(A, B) is left functional else
# undefined.
func_union_pos = relations.functional_union(hello_relation,
    Set(Couplet('language', 'English'), Couplet('more', 'info')))
func_union_neg = relations.functional_union(hello_relation,
    Set(Couplet('language', 'Spanish'), Couplet('more', 'info')))
print(func_union_pos)
print(func_union_neg)

# Lifting this operation to clans models natural join-like behavior.
salutations_words_langs_clan = clans.cross_functional_union(salutations_n_langs_clan,
    earths_n_langs_clan)
print("salutations_words_langs_clan:", salutations_words_langs_clan)

# Now that the clans have been related to each other through their language attributes, we can
# do another projection. Notice how the "renaming" of 'word' to 'salutation' and 'earth' allows us
# to distinguish each of the words' meaning after joining the clans.
salutations_n_words_clan = clans.compose(salutations_words_langs_clan,
                                         clans.diag('salutation', 'earth'))
print("result")
print("salutations_n_words_clan:", salutations_n_words_clan)

# Finally, we will distill this data down to a single relation describing "Hello, World" phrases.
greeting_relation = Set(Couplet(rel('earth'), rel('salutation'))
                        for rel in salutations_n_words_clan)
print("Greetings:", greeting_relation)