Exemplos de import_csv em Python, exemplos de algebraixlib.io.csv.import_csv em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_algebras_clans.py Projeto: Libardo1/algebraixlib

    def test_lhs_cross_functional_union(self):
        """Test for functional_cross_union."""
        table_a = import_csv(self._get_table_a())
        table_b = import_csv(self._get_table_b())

        self.assertTrue(is_functional(table_a))
        self.assertTrue(is_functional(table_b))

        # Calculate left join.
        result = lhs_cross_functional_union(table_a, table_b)

        # Test result set properties
        self.assertEqual(result.cached_functional, CacheStatus.IS)
        self.assertFalse(result.is_empty)
        self.assertEqual(result.cardinality, 8)
        expected = import_csv(self._get_result_cross_functional_union())
        self.assertEqual(result, expected)

        if self.print_examples:
            print('Expected set: {0}'.format(expected))
            print('Result set: {0}'.format(result))

        import algebraixlib.algebras.sets as sets
        table_aa = sets.union(table_a, Set(Set(Couplet('PK', '-1'), Couplet('PK', '-2'))))
        self.assertFalse(is_functional(table_aa))
        result = lhs_cross_functional_union(table_aa, table_b)
        self.assertNotEqual(result.cached_functional, CacheStatus.IS)

        table_bb = sets.union(table_b, Set(Set(Couplet('PK', '-1'), Couplet('PK', '-2'))))
        self.assertFalse(is_functional(table_bb))
        result = lhs_cross_functional_union(table_a, table_bb)
        self.assertEqual(result.cached_functional, CacheStatus.IS)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_algebras_clans.py Projeto: jmcgonnell/algebraixlib

    def test_lhs_cross_functional_union(self):
        """Test for functional_cross_union."""
        table_a = import_csv(self._get_table_a())
        table_b = import_csv(self._get_table_b())

        self.assertTrue(is_functional(table_a))
        self.assertTrue(is_functional(table_b))

        # Calculate left join.
        result = lhs_cross_functional_union(table_a, table_b)

        # Test result set properties
        self.assertEqual(result.cached_functional, CacheStatus.IS)
        self.assertFalse(result.is_empty)
        self.assertEqual(result.cardinality, 8)
        expected = import_csv(self._get_result_cross_functional_union())
        self.assertEqual(result, expected)

        import algebraixlib.algebras.sets as sets
        table_aa = sets.union(
            table_a, Set(Set(Couplet('PK', '-1'), Couplet('PK', '-2'))))
        self.assertFalse(is_functional(table_aa))
        result = lhs_cross_functional_union(table_aa, table_b)
        self.assertNotEqual(result.cached_functional, CacheStatus.IS)

        table_bb = sets.union(
            table_b, Set(Set(Couplet('PK', '-1'), Couplet('PK', '-2'))))
        self.assertFalse(is_functional(table_bb))
        result = lhs_cross_functional_union(table_a, table_bb)
        self.assertEqual(result.cached_functional, CacheStatus.IS)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_io_csv.py Projeto: jdcasi/algebraixlib

    def test_csv(self):
        """Test loading clan from csv."""
        clan = Set({Set({Couplet('a', '1'), Couplet('b', '2')})})
        st1 = import_csv(IoCsvTests.path('set1.csv'))
        self.assertEqual(clan, st1)
        self.assertTrue(st1.cached_is_clan)
        self.assertTrue(st1.cached_is_left_functional)

        clan = Set({Set({Couplet('a', '1'), Couplet('b', '2')})})
        st1a = import_csv(IoCsvTests.path('set1a.csv'))
        # NOTE: duplicate row is removed
        self.assertEqual(clan, st1a)

        clan = Set(Set({Couplet('a', '1'), Couplet('b', '2'), Couplet('row', 0)}),
                   Set({Couplet('a', '1'), Couplet('b', '2'), Couplet('row', 1)}))
        st1a = import_csv(IoCsvTests.path('set1a.csv'), index_column='row')
        # NOTE: duplicate row is NOT removed
        self.assertEqual(clan, st1a)

        clan = Set({Set({Couplet('a', '1'), Couplet('b', '2')}), Set({Couplet('a', '3'),
                                                                      Couplet('b', '4')})})
        st2 = import_csv(IoCsvTests.path('set2.csv'))
        self.assertEqual(clan, st2)

        clan = Set(Set([Couplet(s, c) for s, c in zip('abcd', [1, 2, 3, 4])]),
                   Set([Couplet(s, c) for s, c in zip('abc', [5, 6, 7])]),
                   Set([Couplet(s, c) for s, c in zip('bd', [8, 9])]))
        types = {'a': int, 'b': int, 'c': int, 'd': int}
        st3 = import_csv(IoCsvTests.path('set3.csv'), types)
        # print("expected", clan)
        # print("actual", st3)
        self.assertEqual(clan, st3)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: TPC-H_Query5.py Projeto: jdcasi/algebraixlib

def get_orders_restricted_projected(startdate, enddate):
    """Execute the equivalent of the following SQL query, querying the CSV file orders.csv:
        SELECT
            orderkey, custkey
        FROM
            orders
        WHERE
            startdate <= orders.orderdate and orders.orderdate < enddate

    :param startdate: The lower boundary (inclusive) of the date range for the column 'orderdate'.
    :param enddate: The upper boundary (exclusive) of the date range for the column 'orderdate'.
    """
    timer = FunctionTimer()
    short_prints = True

    def read_date(date_str: str) -> datetime:
        return datetime.strptime(date_str, "%Y-%m-%d").date()

    orders_types = {"orderkey": int, "custkey": int, "orderdate": read_date, "totalprice": float, "shippriority": int}
    orders = csv.import_csv("orders.csv", orders_types)
    timer.lap("orders", short=short_prints)

    def select_dates(rel) -> bool:
        orderdate = rel("orderdate").value
        return (startdate <= orderdate) and (orderdate < enddate)

    orders_restricted = sets.restrict(orders, select_dates)
    timer.lap("orders_restricted", short=short_prints)

    orders_restricted_projected = clans.project(orders_restricted, "orderkey", "custkey")
    timer.end("orders_restricted_projected", short=short_prints)

    return orders_restricted_projected

Exemplo n.º 5

0

Exibir arquivo

Arquivo: TPC-H_Query5.py Projeto: jdcasi/algebraixlib

def get_customers_nations_projected(nations):
    """Execute the equivalent of the following SQL query, querying the CSV file customer.csv:
        SELECT
            custkey, nationkey, nationname
        FROM
            customer
        JOIN
            nations
        ON
            customer.nationkey = nations.nationkey
    """
    timer = FunctionTimer()
    short_prints = True

    customer_types = {"custkey": int, "nationkey": int, "acctbal": float}
    customers = csv.import_csv("customer.csv", customer_types)
    timer.lap("customers", short=short_prints)

    customers_nations = clans.functional_cross_union(customers, nations)
    timer.lap("customers_nations", short=short_prints)

    customers_nations_projected = clans.project(customers_nations, "custkey", "nationkey", "nationname")
    timer.end("customers_nations_projected", short=short_prints)

    return customers_nations_projected

Exemplo n.º 6

0

Exibir arquivo

Arquivo: TPC-H_Query5.py Projeto: jmcgonnell/algebraixlib

def get_customers_nations_projected(nations):
    """Execute the equivalent of the following SQL query, querying the CSV file customer.csv:
        SELECT
            custkey, nationkey, nationname
        FROM
            customer
        JOIN
            nations
        ON
            customer.nationkey = nations.nationkey
    """
    timer = FunctionTimer()
    short_prints = True

    customer_types = {'custkey': int, 'nationkey': int, 'acctbal': float}
    customers = csv.import_csv('customer.csv', customer_types)
    timer.lap('customers', short=short_prints)

    customers_nations = clans.cross_functional_union(customers, nations)
    timer.lap('customers_nations', short=short_prints)

    customers_nations_projected = clans.project(customers_nations,
                                                'custkey', 'nationkey', 'nationname')
    timer.end('customers_nations_projected', short=short_prints)

    return customers_nations_projected

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_algebras_clans.py Projeto: jdcasi/algebraixlib

    def test_left_functional_cross_union(self):
        """Test for left_functional_cross_union."""
        table_a = import_csv(self._get_table_a())
        table_b = import_csv(self._get_table_b())

        # Calculate left join.
        result = lhs_functional_cross_union(table_a, table_b)

        # Test result set properties
        self.assertEqual(result.is_empty, False)
        self.assertEqual(result.cardinality, 8)
        expected = import_csv(self._get_result_left_functional_cross_union())
        self.assertEqual(result, expected)

        if self.print_examples:
            print('Expected set: {0}'.format(expected))
            print('Result set: {0}'.format(result))

Exemplo n.º 8

0

Exibir arquivo

Arquivo: TPC-H_Query5.py Projeto: jmcgonnell/algebraixlib

def get_orders_restricted_projected(startdate, enddate):
    """Execute the equivalent of the following SQL query, querying the CSV file orders.csv:
        SELECT
            orderkey, custkey
        FROM
            orders
        WHERE
            startdate <= orders.orderdate and orders.orderdate < enddate

    :param startdate: The lower boundary (inclusive) of the date range for the column 'orderdate'.
    :param enddate: The upper boundary (exclusive) of the date range for the column 'orderdate'.
    """
    timer = FunctionTimer()
    short_prints = True

    def read_date(date_str: str) -> datetime:
        return datetime.strptime(date_str, '%Y-%m-%d').date()

    orders_types = {
        'orderkey': int, 'custkey': int, 'orderdate': read_date,
        'totalprice': float, 'shippriority': int
    }
    orders = csv.import_csv('orders.csv', orders_types)
    timer.lap('orders', short=short_prints)

    def select_dates(rel) -> bool:
        orderdate = rel('orderdate').value
        return (startdate <= orderdate) and (orderdate < enddate)

    orders_restricted = sets.restrict(orders, select_dates)
    timer.lap('orders_restricted', short=short_prints)

    orders_restricted_projected = clans.project(orders_restricted, 'orderkey', 'custkey')
    timer.end('orders_restricted_projected', short=short_prints)

    return orders_restricted_projected

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_io_csv.py Projeto: edwardt/algebraixlib

    def test_csv(self):
        """Test loading clan from csv."""
        clan = Set({Set({Couplet('a', '1'), Couplet('b', '2')})})
        st1 = import_csv(IoCsvTests.path('set1.csv'))
        self.assertEqual(clan, st1)
        self.assertEqual(st1.cached_clan, CacheStatus.IS)
        self.assertEqual(st1.cached_functional, CacheStatus.IS)
        self.assertEqual(st1.cached_regular, CacheStatus.IS)

        clan = Set({Set({Couplet('a', '1'), Couplet('b', '2')})})
        st1a = import_csv(IoCsvTests.path('set1a.csv'))
        # NOTE: duplicate row is removed
        self.assertEqual(clan, st1a)
        self.assertEqual(st1a.cached_clan, CacheStatus.IS)
        self.assertEqual(st1a.cached_functional, CacheStatus.IS)
        self.assertEqual(st1a.cached_regular, CacheStatus.IS)

        clan = Set(Set({Couplet('a', '1'), Couplet('b', '2'), Couplet('row', 0)}),
                   Set({Couplet('a', '1'), Couplet('b', '2'), Couplet('row', 1)}))
        st1a = import_csv(IoCsvTests.path('set1a.csv'), index_column='row')
        # NOTE: duplicate row is NOT removed
        self.assertEqual(clan, st1a)
        self.assertEqual(st1a.cached_clan, CacheStatus.IS)
        self.assertEqual(st1a.cached_functional, CacheStatus.IS)
        self.assertEqual(st1a.cached_regular, CacheStatus.IS)

        expected = Multiset({Set(Couplet('a', '1'), Couplet('b', '2')): 2})
        actual = import_csv(IoCsvTests.path('set1a.csv'), has_dup_rows=True)
        # NOTE: duplicate row is not removed
        self.assertEqual(actual, expected)

        self.assertEqual(actual.cached_multiclan, CacheStatus.IS)
        self.assertEqual(actual.cached_functional, CacheStatus.IS)
        self.assertEqual(actual.cached_regular, CacheStatus.IS)

        clan = Set({Set({Couplet('a', '1'), Couplet('b', '2')}),
                    Set({Couplet('a', '3'), Couplet('b', '4')})})
        st2 = import_csv(IoCsvTests.path('set2.csv'))
        self.assertEqual(clan, st2)
        self.assertEqual(st2.cached_clan, CacheStatus.IS)
        self.assertEqual(st2.cached_functional, CacheStatus.IS)
        self.assertEqual(st2.cached_regular, CacheStatus.IS)

        clan = Set(Set([Couplet(s, c) for s, c in zip('abcd', [1, 2, 3, 4])]),
                   Set([Couplet(s, c) for s, c in zip('abc', [5, 6, 7])]),
                   Set([Couplet(s, c) for s, c in zip('bd', [8, 9])]))
        types = {'a': int, 'b': int, 'c': int, 'd': int}
        st3 = import_csv(IoCsvTests.path('set3.csv'), types)
        # print("expected", clan)
        # print("actual", st3)
        self.assertEqual(clan, st3)
        self.assertEqual(st3.cached_clan, CacheStatus.IS)
        self.assertEqual(st3.cached_functional, CacheStatus.IS)
        self.assertEqual(st3.cached_regular, CacheStatus.IS_NOT)

        expected = Multiset({Set(Couplet('a', '1'), Couplet('b', '2'), Couplet('c', '3'),
                                 Couplet('d', '4')): 1,
                             Set(Couplet('a', '5'), Couplet('b', '6'), Couplet('c', '7')): 1,
                             Set(Couplet('b', '8'), Couplet('d', '9')): 1})
        actual = import_csv(IoCsvTests.path('set3.csv'), has_dup_rows=True)
        self.assertEqual(actual, expected)

        self.assertEqual(actual.cached_multiclan, CacheStatus.IS)
        self.assertEqual(actual.cached_functional, CacheStatus.IS)
        self.assertEqual(actual.cached_regular, CacheStatus.IS_NOT)

        expected = Multiset({Set(Couplet('a', '5'), Couplet('b', '6'), Couplet('c', '7')): 1,
                             Set(Couplet('b', '8'), Couplet('d', '9')): 1,
                             Set(Couplet('a', '1'), Couplet('b', '2'), Couplet('c', '3'),
                                 Couplet('d', '4')): 1})
        export_path = IoCsvTests.path('export.csv')
        self.assertFalse(export_csv(expected, export_path))
        self.assertTrue(export_csv(expected, export_path, ordered_lefts='abcd'))
        actual = import_csv(export_path, has_dup_rows=True)
        self.assertEqual(actual, expected)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: hello_world.py Projeto: jdcasi/algebraixlib

gallo,Spanish,rooster
Duniy?,Hindi,earth
Kon'nichiwa,Japanese,salutation
hallo,German,salutation
nuqneH,Klingon,salutation
sekai,Japanese,earth
schmetterling,German,butterfly
mariposa,Spanish,butterfly
"""

# Tables can be modeled as sets of binary relations, which we call clans.
from io import StringIO
from algebraixlib.io.csv import import_csv

file = StringIO(vocab_csv)
vocab_clan = import_csv(file)
print("vocab_clan:", vocab_clan)

# Superstriction(A, B) is a partial binary operation on sets. It is defined as A if A is a superset
# of B, otherwise it is undefined.
hello_relation = Set(Couplet('word', 'hello'), Couplet('language', 'English'),
                     Couplet('meaning', 'salutation'))
super_pos = sets.superstrict(hello_relation, Set(Couplet('language', 'English')))
super_neg = sets.superstrict(hello_relation, Set(Couplet('language', 'Mandarin')))
print(super_pos)
print(super_neg)

# By extending superstriction to clans, which are sets of sets (of Couplets), we can define a helpful
# mechanism to restrict vocab_clan to only those relations that contain particular values.
import algebraixlib.algebras.clans as clans
salutation_records_clan = clans.superstrict(vocab_clan, Set(Set(Couplet('meaning', 'salutation'))))

Exemplo n.º 11

0

Exibir arquivo

Arquivo: multi_example.py Projeto: jdcasi/algebraixlib

peach,doug
rice,frank
rice,frank
banana,doug
apple,doug
rice,jane
apple,jane
"""

# Tables can be modeled as multisets of binary relations, which we call clans.
from io import StringIO
from algebraixlib.io.csv import import_csv
from algebraixlib.io.csv import export_csv

file = StringIO(sales_csv)
sales_multiclan = import_csv(file, has_dup_rows=True)  # note the use of flag to return a multiclan
print(sales_multiclan)

# lets see how our cashiers are doing
cashier_diagonal = _mo.Multiset({_mo.Set(_mo.Couplet('cashier', 'cashier')): 1})
cashier_sales = _multiclans.compose(sales_multiclan, cashier_diagonal)
print("To find out how many sales each cashier has had, we can do the following:")
print(str("sales_multiclan") + ' COMPOSE ' + str(cashier_diagonal))
print('=> EVALUATES TO ' + str(cashier_sales) + "\n")

# lets see what products we sold for the day
product_diagonal = _mo.Multiset({_mo.Set(_mo.Couplet('product', 'product')): 1})
product_sales = _multiclans.compose(sales_multiclan, product_diagonal)
print("To find out how many products we sold, we can do the following:")
print(str("sales_multiclan") + ' COMPOSE ' + str(product_diagonal))
print('=> EVALUATES TO ' + str(product_sales) + "\n")

Exemplo n.º 12

0

Exibir arquivo

gallo,Spanish,rooster
Duniy?,Hindi,earth
Kon'nichiwa,Japanese,salutation
hallo,German,salutation
nuqneH,Klingon,salutation
sekai,Japanese,earth
schmetterling,German,butterfly
mariposa,Spanish,butterfly
"""

# Tables can be modeled as sets of binary relations, which we call clans.
from io import StringIO
from algebraixlib.io.csv import import_csv

file = StringIO(vocab_csv)
vocab_clan = import_csv(file)
print("vocab_clan:", vocab_clan)

# Superstriction(A, B) is a partial binary operation on sets. It is defined as A if A is a superset
# of B, otherwise it is undefined.
hello_relation = Set(Couplet('word', 'hello'), Couplet('language', 'English'),
                     Couplet('meaning', 'salutation'))
super_pos = sets.superstrict(hello_relation,
                             Set(Couplet('language', 'English')))
super_neg = sets.superstrict(hello_relation,
                             Set(Couplet('language', 'Mandarin')))
print(super_pos)
print(super_neg)

# By extending superstriction to clans, which are sets of sets (of Couplets), we can define a
# helpful mechanism to restrict vocab_clan to only those relations that contain particular values.

Exemplo n.º 13

0

Exibir arquivo

Arquivo: multi_example.py Projeto: jmcgonnell/algebraixlib

peach,doug
rice,frank
rice,frank
banana,doug
apple,doug
rice,jane
apple,jane
"""

# Tables can be modeled as multisets of binary relations, which we call clans.
from io import StringIO
from algebraixlib.io.csv import import_csv
from algebraixlib.io.csv import export_csv

file = StringIO(sales_csv)
sales_multiclan = import_csv(
    file, has_dup_rows=True)  # note the use of flag to return a multiclan
print(sales_multiclan)

# lets see how our cashiers are doing
cashier_diagonal = _mo.Multiset(
    {_mo.Set(_mo.Couplet('cashier', 'cashier')): 1})
cashier_sales = _multiclans.compose(sales_multiclan, cashier_diagonal)
print(
    "To find out how many sales each cashier has had, we can do the following:"
)
print(str("sales_multiclan") + ' COMPOSE ' + str(cashier_diagonal))
print('=> EVALUATES TO ' + str(cashier_sales) + "\n")

# lets see what products we sold for the day
product_diagonal = _mo.Multiset(
    {_mo.Set(_mo.Couplet('product', 'product')): 1})

Exemplo n.º 14

0

Exibir arquivo

Arquivo: TPC-H_Query5.py Projeto: jdcasi/algebraixlib

def query5():
    # select
    #     nationname,
    #     sum(lineitem.extendedprice * (1 - lineitem.discount)) as revenue
    # from
    #     customer, orders, lineitem,   -- Loaded from CSV
    #     nation, region                -- Loaded from XML
    # where
    #     customer.custkey = orders.custkey
    #     and lineitem.orderkey = orders.orderkey
    #     and customer.nationkey = nation.nationkey
    #     and supplier.nationkey = nation.nationkey
    #     and nation.regionkey = region.regionkey
    #     and region.name = 'AMERICA'
    #     and orders.orderdate >= date '1996-01-01'
    #     and orders.orderdate < date '1996-01-01' + interval '1' year
    # group by
    #     n_name
    timer = FunctionTimer()
    short_prints = True

    # Join supplier_solutions and customers_nations_projected on 'nationkey'.
    result1 = clans.functional_cross_union(
        get_supplier_solutions(), get_customers_nations_projected(get_nations(region_name))
    )
    timer.lap("result1", short=short_prints)

    # Join result1 with orders_restricted_projected on 'custkey'.
    result2 = clans.functional_cross_union(result1, get_orders_restricted_projected(start_date, end_date))
    timer.lap("result2", short=short_prints)

    # Join result with lineitem on 'orderkey' and 'suppkey'.
    lineitem_types = {
        "orderkey": int,
        "suppkey": int,
        "extendedprice": float,
        "discount": float,
        "partkey": int,
        "linenumber": int,
        "quantity": int,
        "tax": float,
    }
    result3 = clans.functional_cross_union(result2, csv.import_csv("lineitem.csv", lineitem_types))
    timer.lap("result3", short=short_prints)

    # Add the 'revenue' column.
    def calc_revenue(rel):
        return Couplet("revenue", rel("extendedprice").value * (1 - rel("discount").value))

    result4 = Set(relations.functional_add(rel, calc_revenue(rel)) for rel in result3)
    timer.lap("result4", short=short_prints)
    # Remove unnecessary columns.
    revenue_by_nations = clans.project(result4, "revenue", "nationname")

    # Partition the result on 'nationname'.
    revenue_grouped_by_nations = partition.partition(revenue_by_nations, lambda rel: rel("nationname"))
    timer.lap("revenue_grouped_by_nations", short=short_prints)

    # Almost generic aggregation function. (Handles 'normal' cases, but not all edge cases.)
    def aggregate(horde, group_left, aggregation_left, aggregate_func):
        aggregation = {}
        for clan in horde:
            aggregation_value = aggregate_func.identity
            for relation in clan:
                aggregation_value = aggregate_func(aggregation_value, relation(aggregation_left).value)
            first_relation = next(iter(clan))
            aggregation[first_relation(group_left)] = aggregation_value
        return Set([Set(Couplet(group_left, key), Couplet(aggregation_left, aggregation[key])) for key in aggregation])

    # Our aggregation function (adding two numbers, identity is 0).
    def aggregate_sum(arg1, arg2):
        return arg1 + arg2

    aggregate_sum.identity = 0

    # Calculate the aggregation result.
    # noinspection PyTypeChecker
    query5_result = aggregate(revenue_grouped_by_nations, "nationname", "revenue", aggregate_sum)
    timer.end("query5_result")

    return query5_result

Exemplo n.º 15

0

Exibir arquivo

Arquivo: TPC-H_Query5.py Projeto: jmcgonnell/algebraixlib

def query5():
    # select
    #     nationname,
    #     sum(lineitem.extendedprice * (1 - lineitem.discount)) as revenue
    # from
    #     customer, orders, lineitem,   -- Loaded from CSV
    #     nation, region                -- Loaded from XML
    # where
    #     customer.custkey = orders.custkey
    #     and lineitem.orderkey = orders.orderkey
    #     and customer.nationkey = nation.nationkey
    #     and supplier.nationkey = nation.nationkey
    #     and nation.regionkey = region.regionkey
    #     and region.name = 'AMERICA'
    #     and orders.orderdate >= date '1996-01-01'
    #     and orders.orderdate < date '1996-01-01' + interval '1' year
    # group by
    #     n_name
    timer = FunctionTimer()
    short_prints = True

    # Join supplier_solutions and customers_nations_projected on 'nationkey'.
    result1 = clans.cross_functional_union(
        get_supplier_solutions(), get_customers_nations_projected(get_nations(region_name)))
    timer.lap('result1', short=short_prints)

    # Join result1 with orders_restricted_projected on 'custkey'.
    result2 = clans.cross_functional_union(
        result1, get_orders_restricted_projected(start_date, end_date))
    timer.lap('result2', short=short_prints)

    # Join result with lineitem on 'orderkey' and 'suppkey'.
    lineitem_types = {
        'orderkey': int, 'suppkey': int, 'extendedprice': float, 'discount': float,
        'partkey': int, 'linenumber': int, 'quantity': int, 'tax': float,
    }
    result3 = clans.cross_functional_union(result2, csv.import_csv('lineitem.csv', lineitem_types))
    timer.lap('result3', short=short_prints)

    # Add the 'revenue' column.
    def calc_revenue(rel):
        return Couplet('revenue', rel('extendedprice').value * (1 - rel('discount').value))
    result4 = Set(relations.functional_add(rel, calc_revenue(rel)) for rel in result3)
    timer.lap('result4', short=short_prints)
    # Remove unnecessary columns.
    revenue_by_nations = clans.project(result4, 'revenue', 'nationname')

    # Partition the result on 'nationname'.
    revenue_grouped_by_nations = partition.partition(
        revenue_by_nations, lambda rel: rel('nationname'))
    timer.lap('revenue_grouped_by_nations', short=short_prints)

    # Almost generic aggregation function. (Handles 'normal' cases, but not all edge cases.)
    def aggregate(horde, group_left, aggregation_left, aggregate_func):
        aggregation = {}
        for clan in horde:
            aggregation_value = aggregate_func.identity
            for relation in clan:
                aggregation_value = aggregate_func(aggregation_value,
                                                   relation(aggregation_left).value)
            first_relation = next(iter(clan))
            aggregation[first_relation(group_left)] = aggregation_value
        return Set([Set(Couplet(group_left, key),
                        Couplet(aggregation_left, aggregation[key])) for key in aggregation])

    # Our aggregation function (adding two numbers, identity is 0).
    def aggregate_sum(arg1, arg2):
        return arg1 + arg2
    aggregate_sum.identity = 0

    # Calculate the aggregation result.
    # noinspection PyTypeChecker
    query5_result = aggregate(
        revenue_grouped_by_nations, 'nationname', 'revenue', aggregate_sum)
    timer.end('query5_result')

    return query5_result

Exemplo n.º 16

0

Exibir arquivo

    def test_csv(self):
        """Test loading clan from csv."""
        clan = Set({Set({Couplet('a', '1'), Couplet('b', '2')})})
        st1 = import_csv(IoCsvTests.path('set1.csv'))
        self.assertEqual(clan, st1)
        self.assertEqual(st1.cached_clan, CacheStatus.IS)
        self.assertEqual(st1.cached_functional, CacheStatus.IS)
        self.assertEqual(st1.cached_regular, CacheStatus.IS)

        clan = Set({Set({Couplet('a', '1'), Couplet('b', '2')})})
        st1a = import_csv(IoCsvTests.path('set1a.csv'))
        # NOTE: duplicate row is removed
        self.assertEqual(clan, st1a)
        self.assertEqual(st1a.cached_clan, CacheStatus.IS)
        self.assertEqual(st1a.cached_functional, CacheStatus.IS)
        self.assertEqual(st1a.cached_regular, CacheStatus.IS)

        clan = Set(
            Set({Couplet('a', '1'),
                 Couplet('b', '2'),
                 Couplet('row', 0)}),
            Set({Couplet('a', '1'),
                 Couplet('b', '2'),
                 Couplet('row', 1)}))
        st1a = import_csv(IoCsvTests.path('set1a.csv'), index_column='row')
        # NOTE: duplicate row is NOT removed
        self.assertEqual(clan, st1a)
        self.assertEqual(st1a.cached_clan, CacheStatus.IS)
        self.assertEqual(st1a.cached_functional, CacheStatus.IS)
        self.assertEqual(st1a.cached_regular, CacheStatus.IS)

        expected = Multiset({Set(Couplet('a', '1'), Couplet('b', '2')): 2})
        actual = import_csv(IoCsvTests.path('set1a.csv'), has_dup_rows=True)
        # NOTE: duplicate row is not removed
        self.assertEqual(actual, expected)

        self.assertEqual(actual.cached_multiclan, CacheStatus.IS)
        self.assertEqual(actual.cached_functional, CacheStatus.IS)
        self.assertEqual(actual.cached_regular, CacheStatus.IS)

        clan = Set({
            Set({Couplet('a', '1'), Couplet('b', '2')}),
            Set({Couplet('a', '3'), Couplet('b', '4')})
        })
        st2 = import_csv(IoCsvTests.path('set2.csv'))
        self.assertEqual(clan, st2)
        self.assertEqual(st2.cached_clan, CacheStatus.IS)
        self.assertEqual(st2.cached_functional, CacheStatus.IS)
        self.assertEqual(st2.cached_regular, CacheStatus.IS)

        clan = Set(Set([Couplet(s, c) for s, c in zip('abcd', [1, 2, 3, 4])]),
                   Set([Couplet(s, c) for s, c in zip('abc', [5, 6, 7])]),
                   Set([Couplet(s, c) for s, c in zip('bd', [8, 9])]))
        types = {'a': int, 'b': int, 'c': int, 'd': int}
        st3 = import_csv(IoCsvTests.path('set3.csv'), types)
        # print("expected", clan)
        # print("actual", st3)
        self.assertEqual(clan, st3)
        self.assertEqual(st3.cached_clan, CacheStatus.IS)
        self.assertEqual(st3.cached_functional, CacheStatus.IS)
        self.assertEqual(st3.cached_regular, CacheStatus.IS_NOT)

        expected = Multiset({
            Set(Couplet('a', '1'), Couplet('b', '2'), Couplet('c', '3'),
                Couplet('d', '4')):
            1,
            Set(Couplet('a', '5'), Couplet('b', '6'), Couplet('c', '7')):
            1,
            Set(Couplet('b', '8'), Couplet('d', '9')):
            1
        })
        actual = import_csv(IoCsvTests.path('set3.csv'), has_dup_rows=True)
        self.assertEqual(actual, expected)

        self.assertEqual(actual.cached_multiclan, CacheStatus.IS)
        self.assertEqual(actual.cached_functional, CacheStatus.IS)
        self.assertEqual(actual.cached_regular, CacheStatus.IS_NOT)

        expected = Multiset({
            Set(Couplet('a', '5'), Couplet('b', '6'), Couplet('c', '7')):
            1,
            Set(Couplet('b', '8'), Couplet('d', '9')):
            1,
            Set(Couplet('a', '1'), Couplet('b', '2'), Couplet('c', '3'),
                Couplet('d', '4')):
            1
        })
        export_path = IoCsvTests.path('export.csv')
        self.assertFalse(export_csv(expected, export_path))
        self.assertTrue(export_csv(expected, export_path,
                                   ordered_lefts='abcd'))
        actual = import_csv(export_path, has_dup_rows=True)
        self.assertEqual(actual, expected)