def load_cdr(self): call_dimension = Dimension( name='"DimCall"', key='uuid', attributes=[ "SipFromUser", 'SipToUser', 'HangupCause', 'Direction', 'SwitchName', 'CallerContext', 'IsAnswered', 'Duration', 'ObjClass', 'ObjType', 'ObjCategory', 'CompanyId', 'TenantId', 'AppId', 'AgentSkill', 'OriginatedLegs', 'DVPCallDirection', 'HangupDisposition', 'AgentAnswered', 'IsQueued', 'SipResource', 'BusinessUnit', 'CampaignId', 'CampaignName', 'ExtraData' ]) call_fact_table = FactTable( name='"FactCall"', keyrefs=[ 'uuid', 'CallUuid', 'BridgeUuid', 'CreatedDateDimId', 'AnsweredDateDimId', 'BridgedDateDimId', 'HangupDateDimId', 'CreatedTimeDimId', 'AnsweredTimeDimId', 'BridgedTimeDimId', 'HangupTimeDimId' ], measures=[ 'CreatedTime', 'AnsweredTime', 'BridgedTime', 'HangupTime', 'BillSec', 'HoldSec', 'ProgressSec', 'QueueSec', 'AnswerSec', 'WaitSec', 'ProgressMediaSec', 'FlowBillSec' ]) self.row['uuid'] = call_dimension.ensure(self.row) call_fact_table.insert(self.row)
def load_tickets(self): ticket_dimension = Dimension( name='"DimTicket"', key='ticket_id', attributes=[ 'active', 'tid', 'is_sub_ticket', 'subject', 'reference', 'description', 'priority', 'requester', 'submitter', 'collaborators', 'watchers', 'attachments', 'sub_tickets', 'related_tickets', 'merged_tickets', 'isolated_tags', 'sla', 'comments', 'tags', 'ticket_type', 'status_type', 'channel', 'company', 'tenant', 'assignee', 'assignee_group', 'security_level', 'event_author' ]) ticket_fact_table = FactTable( name='"FactTicket"', keyrefs=[ 'engagement_session_id', 'ticket_id', 'created_at_dim_id', 'updated_at_dim_id', 'due_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id', 'due_time_dim_id' ], measures=[ 'created_at', 'updated_at', 'due_at', 'time_estimation', 're_opens', 'waited_time', 'worked_time', 'resolution_time' ]) # Updates the row with the primary keys of each dimension while at the same time inserting new data into # each dimension self.row['ticket_id'] = ticket_dimension.ensure(self.row) ticket_fact_table.insert(self.row)
def load_engagement(self): engagement_dimension = Dimension( name='"DimEngagement"', key='engagement_id', attributes=['profile', 'channel_from', 'channel_to', 'company', 'has_profile', 'tenant', 'notes', 'channel']) engagement_fact_table = FactTable( name='"FactEngagement"', keyrefs=['engagement_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'], measures=['updated_at', 'created_at']) self.row['eng_dim_id'] = engagement_dimension.ensure(self.row) engagement_fact_table.insert(self.row) return True
def load_ticket_events(self): ticket_event_dimension = Dimension( name='"DimTicketEvent"', key='ticket_event_id', attributes=['event_type', 'event_author', 'event_body']) ticket_event_fact_table = FactTable( name='"FactTicketEvent"', keyrefs=['ticket_id', 'ticket_event_id'], measures=['event_created_at_dim_id', 'event_created_at']) # Update the row with the primary keys of each dimension while at the same time inserting new data into # each dimension ticket_event_dimension.ensure(self.row) ticket_event_fact_table.insert(self.row)
def load_external_users(self): external_users_dimension = Dimension( name='"DimExternalUsers"', key='external_user_id', attributes=['title', 'name', 'avatar', 'birthday', 'gender', 'first_name', 'last_name', 'locale', 'ssn', 'password', 'phone', 'email', 'landnumber', 'facebook', 'twitter', 'linkedin', 'googleplus', 'skype', 'thirdpartyreference', 'company', 'tenant', 'custom_fields', 'tags', 'contacts', 'zipcode', 'address_number', 'city', 'province', 'country']) external_users_fact_table = FactTable( name='"FactExternalUsers"', keyrefs=['external_user_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'], measures=['created_at', 'updated_at']) self.row['external_user_id'] = external_users_dimension.ensure(self.row) external_users_fact_table.insert(self.row)
# We do not use ensure() for the location dimension, as city # and region information of all stores has already been loaded into # the table, and having a sale from a store we do not know about is # probably either an error in the sales or region data. We use lookup # instead which does not insert data and returns None, if no row with # the requested data is available, allowing for simple implementation # of error handling in ETL flow, which is shown here as an exception row['locationid'] = location_dimension.lookup(row) # A simple example of how to check if a lookup was successful, so # errors can be handled in some way. Here we just give up, and throw # an error. if not row['locationid']: raise ValueError("City was not present in the location dimension") # As the number of sales was already conveniently aggregated in the # sales table, the row can now be inserted into the data warehouse as # we have all the IDs we need. If aggregations, or other more advanced # manipulation is required, the full power Python is available as shown # with the call to the split_timestamp(row) function. fact_table.insert(row) # After all the data is inserted, we close the connection in order to # ensure that all data is committed to the database and that the # connection is correctly released dw_conn_wrapper.commit() dw_conn_wrapper.close() # Finally, the connection to the sales database is closed sales_pgconn.close()
def setup(dbname, number): open(os.path.expanduser(dbname), 'w') output_conn = sqlite3.connect(dbname) output_cur = output_conn.cursor() output_cur.execute("CREATE TABLE dim1 " "(key1 INTEGER PRIMARY KEY, attr1 INTEGER, " "attr2 INTEGER)") output_cur.execute("CREATE TABLE dim2 " "(key2 INTEGER PRIMARY KEY, attr3 INTEGER, " "attr4 INTEGER)") output_cur.execute("CREATE TABLE ft1 " "(measure INTEGER, key1 INTEGER, key2 INTEGER, " "PRIMARY KEY(key1, key2))") output_conn.commit() output_wrapper = pygrametl.ConnectionWrapper(connection=output_conn) dim1 = Dimension( name='dim1', key='key1', attributes=['attr1', 'attr2'] ) dim2 = Dimension( name='dim2', key='key2', attributes=['attr3', 'attr4'], ) ft1 = FactTable( name='ft1', keyrefs=['key1', 'key2'], measures=['measure'] ) facts = [] dim1_data = [] dim2_data = [] total_elapsed = 0. print('Generating ft1 data') start = time.monotonic() counter = 0 for i in range(1, number + 1): for j in range(1, number + 1): counter += 10 facts.append({'key1': i, 'key2': j, 'measure': counter}) end = time.monotonic() elapsed = end - start print('Generated: {}{}'.format(round(elapsed, 3), 's')) total_elapsed += elapsed print('Generating dim1 data') start = time.monotonic() for i in range(1, number + 1): dim1_data.append({'attr1': i, 'attr2': number + 1 - i}) end = time.monotonic() elapsed = end - start print('Generated: {}{}'.format(round(elapsed, 3), 's')) total_elapsed += elapsed print('Generating dim2 data') start = time.monotonic() for i in range(1, number + 1): dim2_data.append({'attr3': i, 'attr4': number + 1 - i}) end = time.monotonic() elapsed = end - start print('Generated: {}{}'.format(round(elapsed, 3), 's')) total_elapsed += elapsed print('Inserting data into ft1') start = time.monotonic() for row in facts: ft1.insert(row) end = time.monotonic() elapsed = end - start print('Inserted: {}{}'.format(round(elapsed, 3), 's')) total_elapsed += elapsed print('Inserting data into dim1') start = time.monotonic() for row in dim1_data: dim1.insert(row) end = time.monotonic() elapsed = end - start print('Inserted: {}{}'.format(round(elapsed, 3), 's')) total_elapsed += elapsed print('Inserting data into dim2') start = time.monotonic() for row in dim2_data: dim2.insert(row) end = time.monotonic() elapsed = end - start print('Inserted: {}{}'.format(round(elapsed, 3), 's')) total_elapsed += elapsed print('DW populated') print('Total time: {}{}\n'.format(round(total_elapsed, 3), 's')) output_conn.commit()
# We put the regions into our location_dimension [location_dimension.insert(row) for row in region_source] csv_file_handle.close() # Each row in the sales database is iterated through and inserted for row in sales_source: # Each row is passed to the timestamp split function for splitting split_timestamp(row) # Lookups are performed to find the key in each dimension for the fact # and if the data is not there, it is inserted from the sales row row['bookid'] = book_dimension.ensure(row) row['timeid'] = time_dimension.ensure(row) # For the location dimension, all the data is already present, so a # missing row must be an error row['locationid'] = location_dimension.lookup(row) if not row['locationid']: raise ValueError("city was not present in the location dimension") # The row can then be inserted into the fact table fact_table.insert(row) # The data warehouse connection is then ordered to commit and close dw_conn_wrapper.commit() dw_conn_wrapper.close() sales_conn.close()