Пример #1
0
def setup_scd(dbname, number):
    open(os.path.expanduser(dbname), 'w')

    output_conn = sqlite3.connect(dbname)
    output_cur = output_conn.cursor()

    output_cur.execute("CREATE TABLE scd "
                       "(key INTEGER PRIMARY KEY, attr1 INTEGER, "
                       "attr2 INTEGER, version INTEGER)")

    output_conn.commit()
    output_wrapper = pygrametl.ConnectionWrapper(connection=output_conn)

    scd = SlowlyChangingDimension(
        name='scd',
        key='key',
        attributes=['attr1', 'attr2', 'version'],
        lookupatts=['attr2'],
        versionatt='version'
    )

    data = []
    total_elapsed = 0.
    print('Generating scd data')
    start = time.monotonic()
    for i in range(1, number + 1):
        data.append({'attr1': i, 'attr2': number + 1 - i})
    end = time.monotonic()
    elapsed = end - start
    print('Generated: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Inserting data into scd')
    start = time.monotonic()
    for row in data:
        scd.scdensure(row)
    end = time.monotonic()
    elapsed = end - start
    print('Inserted: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('DW populated')
    print('Total time: {}{}\n'.format(round(total_elapsed, 3), 's'))
    output_conn.commit()
Пример #2
0
    AGE            INT    NOT NULL,
    ADDRESS        CHAR(50),
    VERSION         INT);''')

company_info = [('Anders', 43, 'Denmark', 1.0), ('Charles', 50, 'Texas', 1),
                ('Wolf', 28, 'Sweden', 1), ('Hannibal', 45, 'America', 1),
                ('Anders', 43, 'Denmark', 2.0)]

# ... and inserting the necessary data.
c.executemany(
    "INSERT INTO COMPANY (NAME,AGE,ADDRESS,VERSION) VALUES (?,?,?,?)",
    company_info)
conn.commit()

ConnectionWrapper(conn)

s = SlowlyChangingDimension('COMPANY', 'ID',
                            ['NAME', 'AGE', 'ADDRESS', 'VERSION'],
                            ['NAME', 'AGE'], 'VERSION')

dim = SCDType2DimRepresentation(s, conn)

dw = DWRepresentation([dim], conn)

a = SCDVersionPredicate('COMPANY', {'NAME': 'Anders', 'AGE': 43}, 2)
b = SCDVersionPredicate('COMPANY', {'NAME': 'Anders', 'AGE': 43}, 3)
print(a.run(dw))
print(b.run(dw))

conn.close()
Пример #3
0
book_conn = sqlite3.connect(book_path)
country_handle = open(country_path, "r")

wrapper = pygrametl.ConnectionWrapper(dw_conn)

# Sources
author_src = SQLSource(connection=author_conn, query="SELECT * FROM author")
book_src = SQLSource(connection=book_conn, query="SELECT * FROM book")
country_src = CSVSource(f=country_handle, delimiter=",")


# Tables
author_dim = Dimension(name="authordim", key="aid", attributes=["name", "city", "cid"])

book_dim = SlowlyChangingDimension(
    name="bookdim", key="bid", attributes=["title", "year", "version"], lookupatts=["title"], versionatt="version"
)

country_dim = Dimension(name="countrydim", key="cid", attributes=["country"], lookupatts=["country"])

fact_table = FactTable(name="facttable", keyrefs=["aid", "bid"])

snowflake = SnowflakedDimension([(author_dim, country_dim)])

# We map cities to countries and populate the countrydim
cid_map = {}
for row in country_src:
    cid = country_dim.ensure(row)
    cid_map[row["city"]] = cid

# We populate the authordim and the fact table
Пример #4
0
    row['is_fall_semester'] = month < 2 or month >= 6
    row['is_holiday'] = datetime.date(year, month, day) in dk_holidays
    row['t_timestamp'] = timestamp
    return row


### Dimensions and Facts ###

product_dimension = SlowlyChangingDimension(name="dim.product",
                                            key="product_id",
                                            attributes=[
                                                "name", "price",
                                                "alcohol_content_ml",
                                                "activate_date",
                                                "deactivate_date", "version",
                                                "valid_from", "valid_to"
                                            ],
                                            lookupatts=["name"],
                                            versionatt="version",
                                            fromatt="valid_from",
                                            toatt="valid_to",
                                            srcdateatt="lastmoddate",
                                            cachesize=-1)

time_dimension = CachedDimension(
    name='dim.time',
    key='time_id',
    attributes=[
        't_year', 't_month', 't_day', 't_hour', 'day_of_week',
        'is_fall_semester', 'is_holiday', 't_timestamp'
    ],
Пример #5
0
serverdim = CachedDimension(name='server',
                            key='serverid',
                            attributes=['server'])

serverversiondim = CachedDimension(name='serverversion',
                                   key='serverversionid',
                                   attributes=['serverversion', 'serverid'])

pagedim = SlowlyChangingDimension(
    name='page',
    key='pageid',
    attributes=[
        'url', 'size', 'validfrom', 'validto', 'version', 'domainid',
        'serverversionid'
    ],
    lookupatts=['url'],
    versionatt='version',
    fromatt='validfrom',
    toatt='validto',
    #fromfinder=pygrametl.datereader('lastmoddate'),# API updated
    srcdateatt='lastmoddate',  # this is new
    cachesize=-1)

pagesf = SnowflakedDimension([(pagedim, (serverversiondim, domaindim)),
                              (serverversiondim, serverdim),
                              (domaindim, topleveldim)])

testdim = CachedDimension(name='test',
                          key='testid',
                          attributes=['testname', 'testauthor'],
                          lookupatts=['testname'],
Пример #6
0
                            lookupatts=['domain'])

serverdim = CachedDimension(name='server',
                            key='serverid',
                            attributes=['server'])

serverversiondim = CachedDimension(name='serverversion',
                                   key='serverversionid',
                                   attributes=['serverversion', 'serverid'])

pagedim = SlowlyChangingDimension(name='page',
                                  key='pageid',
                                  attributes=[
                                      'url', 'size', 'validfrom', 'validto',
                                      'version', 'domainid', 'serverversionid'
                                  ],
                                  lookupatts=['url'],
                                  versionatt='version',
                                  fromatt='validfrom',
                                  toatt='validto',
                                  srcdateatt='lastmoddate',
                                  cachesize=-1)

pagesf = SnowflakedDimension([(pagedim, (serverversiondim, domaindim)),
                              (serverversiondim, serverdim),
                              (domaindim, topleveldim)])

testdim = CachedDimension(name='test',
                          key='testid',
                          attributes=['testname', 'testauthor'],
                          lookupatts=['testname'],
                          prefill=True,