Exemplo n.º 1
0
        )

data = [
    [1, 1, 2009, 10],
    [2, 1, 2010, 20],
    [3, 1, 2011, 20],
    [4, 1, 2012, 50],
    [5, 2, 2010, 50],
    [6, 2, 2012, 40],
    [7, 3, 2011, 100],
    [8, 3, 2012, 150],
    [9, 3, 2013, 120]
]

# Stores for SQL alternative, if enabled (see below)
stores = { "default": open_store("sql","sqlite:///") }

#
# Create the pipeline
#

p = Pipeline(stores=stores)
p.source_object("iterable_data_source", iterable=data, fields=fields)

# Uncomment this to get SQL operations instead of python iterator
p.create("default", "data")

# Find last purchase date
last_purchase = p.fork()
last_purchase.aggregate(["customer_id"],
                        [["year", "max"]],
Exemplo n.º 2
0
import bubbles

# Follow the comments – there is a line to be uncommented

URL = "https://raw.github.com/Stiivi/cubes/master/examples/hello_world/data.csv"

# Prepare list of stores, we just need one temporary SQL store

stores = {
    "target": bubbles.open_store("sql", "sqlite:///")
}


p = bubbles.Pipeline(stores=stores)
p.source_object("csv_source", resource=URL, infer_fields=True)

# We create a table
# Uncomment this line and see the difference in debug messages
# p.create("target", "data")

p.distinct("Category")
p.pretty_print()
p.run()

Exemplo n.º 3
0
import bubbles

# Follow the comments – there is a line to be uncommented

URL = "https://raw.github.com/Stiivi/cubes/master/examples/hello_world/data.csv"

# Prepare list of stores, we just need one temporary SQL store

stores = {"target": bubbles.open_store("sql", "sqlite:///")}

p = bubbles.Pipeline(stores=stores)
p.source_object("csv_source", resource=URL, encoding="utf8")
p.retype({"Amount (US$, Millions)": "integer"})

# We create a table
# Uncomment this line and see the difference in debug messages
p.create("target", "data")

p.aggregate("Category", "Amount (US$, Millions)")
p.pretty_print()
p.run()
Exemplo n.º 4
0
from bubbles import Pipeline, FieldList, data_object, open_store

# Sample order data with fields:
fields = FieldList(["id", "integer"], ["customer_id", "integer"],
                   ["year", "integer"], ["amount", "integer"])

data = [[1, 1, 2009, 10], [2, 1, 2010, 20], [3, 1, 2011, 20], [4, 1, 2012, 50],
        [5, 2, 2010, 50], [6, 2, 2012, 40], [7, 3, 2011, 100],
        [8, 3, 2012, 150], [9, 3, 2013, 120]]

# Stores for SQL alternative, if enabled (see below)
stores = {"default": open_store("sql", "sqlite:///")}

#
# Create the pipeline
#

p = Pipeline(stores=stores)
p.source_object("iterable", iterable=data, fields=fields)

# Uncomment this to get SQL operations instead of python iterator
p.create("default", "data")

# Find last purchase date
last_purchase = p.fork()
last_purchase.aggregate(["customer_id"], [["year", "max"]],
                        include_count=False)
last_purchase.rename_fields({"year_max": "last_purchase_year"})
p.join_details(last_purchase, "customer_id", "customer_id")

p.pretty_print()
Exemplo n.º 5
0
csv_dir = '../data'

# the tables to migrate
tables = [
    'departments',
    'dept_emp',
    'dept_manager',
    'employees',
    'salaries',
    'titles',
]

start_time = time.time()

# connect to databases
sqlite_store = open_store('sql', 'sqlite:////' + sqlite_path)
mysql_store = open_store(
    'sql', 'mysql+mysqldb://' + mysql_user + ':' + mysql_pw + '@' +
    mysql_host + '/' + mysql_db)
csv_store = open_store('csv', csv_dir)

# migrate tables
for table in tables:
    print(table)
    sqlite_obj = sqlite_store.get_object(table)
    fields = sqlite_obj.fields
    # add email column
    if table == 'employees':
        email_field = Field(name='email', storage_type='string', size=255)
        fields.append(email_field)
    mysql_obj = mysql_store.create(table, fields)