Пример #1
0
    def load_cdr(self):
        call_dimension = Dimension(
            name='"DimCall"',
            key='uuid',
            attributes=[
                "SipFromUser", 'SipToUser', 'HangupCause', 'Direction',
                'SwitchName', 'CallerContext', 'IsAnswered', 'Duration',
                'ObjClass', 'ObjType', 'ObjCategory', 'CompanyId', 'TenantId',
                'AppId', 'AgentSkill', 'OriginatedLegs', 'DVPCallDirection',
                'HangupDisposition', 'AgentAnswered', 'IsQueued',
                'SipResource', 'BusinessUnit', 'CampaignId', 'CampaignName',
                'ExtraData'
            ])

        call_fact_table = FactTable(
            name='"FactCall"',
            keyrefs=[
                'uuid', 'CallUuid', 'BridgeUuid', 'CreatedDateDimId',
                'AnsweredDateDimId', 'BridgedDateDimId', 'HangupDateDimId',
                'CreatedTimeDimId', 'AnsweredTimeDimId', 'BridgedTimeDimId',
                'HangupTimeDimId'
            ],
            measures=[
                'CreatedTime', 'AnsweredTime', 'BridgedTime', 'HangupTime',
                'BillSec', 'HoldSec', 'ProgressSec', 'QueueSec', 'AnswerSec',
                'WaitSec', 'ProgressMediaSec', 'FlowBillSec'
            ])

        self.row['uuid'] = call_dimension.ensure(self.row)
        call_fact_table.insert(self.row)
Пример #2
0
    def load_tickets(self):
        ticket_dimension = Dimension(
            name='"DimTicket"',
            key='ticket_id',
            attributes=[
                'active', 'tid', 'is_sub_ticket', 'subject', 'reference',
                'description', 'priority', 'requester', 'submitter',
                'collaborators', 'watchers', 'attachments', 'sub_tickets',
                'related_tickets', 'merged_tickets', 'isolated_tags', 'sla',
                'comments', 'tags', 'ticket_type', 'status_type', 'channel',
                'company', 'tenant', 'assignee', 'assignee_group',
                'security_level', 'event_author'
            ])

        ticket_fact_table = FactTable(
            name='"FactTicket"',
            keyrefs=[
                'engagement_session_id', 'ticket_id', 'created_at_dim_id',
                'updated_at_dim_id', 'due_at_dim_id', 'created_time_dim_id',
                'updated_time_dim_id', 'due_time_dim_id'
            ],
            measures=[
                'created_at', 'updated_at', 'due_at', 'time_estimation',
                're_opens', 'waited_time', 'worked_time', 'resolution_time'
            ])

        # Updates the row with the primary keys of each dimension while at the same time inserting new data into
        # each dimension

        self.row['ticket_id'] = ticket_dimension.ensure(self.row)
        ticket_fact_table.insert(self.row)
Пример #3
0
    def load_engagement(self):
        engagement_dimension = Dimension(
            name='"DimEngagement"',
            key='engagement_id',
            attributes=['profile', 'channel_from', 'channel_to', 'company', 'has_profile', 'tenant',  'notes', 'channel'])

        engagement_fact_table = FactTable(
            name='"FactEngagement"',
            keyrefs=['engagement_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'],
            measures=['updated_at', 'created_at'])

        self.row['eng_dim_id'] = engagement_dimension.ensure(self.row)
        engagement_fact_table.insert(self.row)
        return True
Пример #4
0
    def load_ticket_events(self):
        ticket_event_dimension = Dimension(
            name='"DimTicketEvent"',
            key='ticket_event_id',
            attributes=['event_type', 'event_author', 'event_body'])

        ticket_event_fact_table = FactTable(
            name='"FactTicketEvent"',
            keyrefs=['ticket_id', 'ticket_event_id'],
            measures=['event_created_at_dim_id', 'event_created_at'])

        # Update the row with the primary keys of each dimension while at the same time inserting new data into
        # each dimension

        ticket_event_dimension.ensure(self.row)
        ticket_event_fact_table.insert(self.row)
Пример #5
0
    def load_external_users(self):

        external_users_dimension = Dimension(
            name='"DimExternalUsers"',
            key='external_user_id',
            attributes=['title', 'name', 'avatar', 'birthday', 'gender', 'first_name', 'last_name',
                        'locale', 'ssn', 'password',  'phone', 'email', 'landnumber', 'facebook', 'twitter', 'linkedin',
                        'googleplus', 'skype', 'thirdpartyreference', 'company', 'tenant', 'custom_fields', 'tags',
                        'contacts', 'zipcode', 'address_number', 'city', 'province', 'country'])

        external_users_fact_table = FactTable(
            name='"FactExternalUsers"',
            keyrefs=['external_user_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'],
            measures=['created_at', 'updated_at'])

        self.row['external_user_id'] = external_users_dimension.ensure(self.row)
        external_users_fact_table.insert(self.row)
Пример #6
0
    # We do not use ensure() for the location dimension, as city
    # and region information of all stores has already been loaded into
    # the table, and having a sale from a store we do not know about is
    # probably either an error in the sales or region data. We use lookup
    # instead which does not insert data and returns None, if no row with
    # the requested data is available, allowing for simple implementation
    # of error handling in ETL flow, which is shown here as an exception
    row['locationid'] = location_dimension.lookup(row)

    # A simple example of how to check if a lookup was successful, so
    # errors can be handled in some way. Here we just give up, and throw
    # an error.
    if not row['locationid']:
       raise ValueError("City was not present in the location dimension")

    # As the number of sales was already conveniently aggregated in the
    # sales table, the row can now be inserted into the data warehouse as
    # we have all the IDs we need. If aggregations, or other more advanced
    # manipulation is required, the full power Python is available as shown
    # with the call to the split_timestamp(row) function.
    fact_table.insert(row)

# After all the data is inserted, we close the connection in order to
# ensure that all data is committed to the database and that the
# connection is correctly released
dw_conn_wrapper.commit()
dw_conn_wrapper.close()

# Finally, the connection to the sales database is closed
sales_pgconn.close()
Пример #7
0
def setup(dbname, number):

    open(os.path.expanduser(dbname), 'w')

    output_conn = sqlite3.connect(dbname)
    output_cur = output_conn.cursor()

    output_cur.execute("CREATE TABLE dim1 "
                       "(key1 INTEGER PRIMARY KEY, attr1 INTEGER, "
                       "attr2 INTEGER)")

    output_cur.execute("CREATE TABLE dim2 "
                       "(key2 INTEGER PRIMARY KEY, attr3 INTEGER, "
                       "attr4 INTEGER)")

    output_cur.execute("CREATE TABLE ft1 "
                       "(measure INTEGER, key1 INTEGER, key2 INTEGER, "
                       "PRIMARY KEY(key1, key2))")

    output_conn.commit()
    output_wrapper = pygrametl.ConnectionWrapper(connection=output_conn)

    dim1 = Dimension(
        name='dim1',
        key='key1',
        attributes=['attr1', 'attr2']
    )

    dim2 = Dimension(
        name='dim2',
        key='key2',
        attributes=['attr3', 'attr4'],
    )

    ft1 = FactTable(
        name='ft1',
        keyrefs=['key1', 'key2'],
        measures=['measure']
    )

    facts = []
    dim1_data = []
    dim2_data = []

    total_elapsed = 0.
    print('Generating ft1 data')
    start = time.monotonic()
    counter = 0
    for i in range(1, number + 1):
        for j in range(1, number + 1):
            counter += 10
            facts.append({'key1': i, 'key2': j, 'measure': counter})
    end = time.monotonic()
    elapsed = end - start
    print('Generated: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Generating dim1 data')
    start = time.monotonic()
    for i in range(1, number + 1):
        dim1_data.append({'attr1': i, 'attr2': number + 1 - i})
    end = time.monotonic()
    elapsed = end - start
    print('Generated: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Generating dim2 data')
    start = time.monotonic()
    for i in range(1, number + 1):
        dim2_data.append({'attr3': i, 'attr4': number + 1 - i})
    end = time.monotonic()
    elapsed = end - start
    print('Generated: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Inserting data into ft1')
    start = time.monotonic()
    for row in facts:
        ft1.insert(row)
    end = time.monotonic()
    elapsed = end - start
    print('Inserted: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Inserting data into dim1')
    start = time.monotonic()
    for row in dim1_data:
        dim1.insert(row)
    end = time.monotonic()
    elapsed = end - start
    print('Inserted: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Inserting data into dim2')
    start = time.monotonic()
    for row in dim2_data:
        dim2.insert(row)
    end = time.monotonic()
    elapsed = end - start
    print('Inserted: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('DW populated')
    print('Total time: {}{}\n'.format(round(total_elapsed, 3), 's'))
    output_conn.commit()
Пример #8
0
# We put the regions into our location_dimension
[location_dimension.insert(row) for row in region_source]
csv_file_handle.close()

# Each row in the sales database is iterated through and inserted
for row in sales_source:

    # Each row is passed to the timestamp split function for splitting
    split_timestamp(row)

    # Lookups are performed to find the key in each dimension for the fact
    # and if the data is not there, it is inserted from the sales row
    row['bookid'] = book_dimension.ensure(row)
    row['timeid'] = time_dimension.ensure(row)

    # For the location dimension, all the data is already present, so a
    # missing row must be an error
    row['locationid'] = location_dimension.lookup(row)
    if not row['locationid']:
       raise ValueError("city was not present in the location dimension")

    # The row can then be inserted into the fact table
    fact_table.insert(row)

# The data warehouse connection is then ordered to commit and close
dw_conn_wrapper.commit()
dw_conn_wrapper.close()

sales_conn.close()