def getSessionDetails(self, session): '''Gets the session and visits from the table. Parameters ---------- session : Session The session requested from the table. Returns ------- data : dict The result of getting the session from the table. This contains either the error that occurred or the session and its visits. ''' try: result = self.client.query( TableName=self.tableName, IndexName='GSI2', KeyConditionExpression='#gsi2 = :gsi2', ExpressionAttributeNames={'#gsi2': 'GSI2PK'}, ExpressionAttributeValues={':gsi2': session.gsi2pk()}, ScanIndexForward=True) if len(result['Items']) == 0: return {'error': 'Session not in table'} data = {'visits': []} for item in result['Items']: if item['Type']['S'] == 'visit': data['visits'].append(itemToVisit(item)) elif item['Type']['S'] == 'session': data['session'] = itemToSession(item) # DynamoDB is limited in 1MB of query results. Continue to query from the # 'LastEvaluatedKey' when this condition is met. if 'LastEvaluatedKey' in result.keys(): still_querying = True while still_querying: result = self.client.query.scan( TableName=self.tableName, IndexName='GSI2', KeyConditionExpression='#gsi2 = :gsi2', ExpressionAttributeNames={'#gsi2': 'GSI2PK'}, ExpressionAttributeValues={':gsi2': session.gsi2pk()}, ScanIndexForward=True, ExclusiveStartKey=result['LastEvaluatedKey']) for item in result['Items']: if item['Type']['S'] == 'visit': data['visits'].append(itemToVisit(item)) elif item['Type']['S'] == 'session': data['session'] = itemToSession(item) if 'LastEvaluatedKey' not in result.keys(): still_querying = False return data except ClientError as e: print(f'ERROR getSessionDetails: { e }') return {'error': 'Could not get session from table'}
def test_processPages(table_name): ip = randomIP() this_event = event(ip, table_name) visits = [ itemToVisit(record['dynamodb']['NewImage']) for record in this_event['Records'] if record['dynamodb']['NewImage']['Type']['S'] == 'visit' ] client = DynamoClient(table_name) client.addVisitor(Visitor(ip)) client.addVisits(visits) client.addBrowsers([ itemToBrowser(record['dynamodb']['NewImage']) for record in this_event['Records'] if record['dynamodb']['NewImage']['Type']['S'] == 'browser' ]) for session in [ itemToSession(record['dynamodb']['NewImage']) for record in this_event['Records'] if record['dynamodb']['NewImage']['Type']['S'] == 'session' ]: client.addSession(session) assert processPages( client, this_event ) == 'Successfully added ' + \ f'{ len( { visit.slug for visit in visits } ) } pages and updated 0 ' + \ f'from { len( visits ) } records.'
def _parsePageDetails( data, result ): '''Parses the DynamoDB items to their respective objects. Parameters ---------- data : dict The parsed data as a dictionary. result : dict The result of the DynamoDB query. Returns data : dict The original parsed data combined with the new parsed data. ''' for item in result['Items']: if item['Type']['S'] == 'visit': data['visits'].append( itemToVisit( item ) ) elif item['Type']['S'] == 'page': data['page'] = itemToPage( item ) elif item['Type']['S'] == 'day': data['days'].append( itemToDay( item ) ) elif item['Type']['S'] == 'week': data['weeks'].append( itemToWeek( item ) ) elif item['Type']['S'] == 'month': data['months'].append( itemToMonth( item ) ) elif item['Type']['S'] == 'year': data['years'].append( itemToYear( item ) ) return data
def _parseVisitorDetails(data, result): '''Parses the DynamoDB items to their respective objects. Parameters ---------- data : dict The parsed data as a dictionary. result : dict The result of the DynamoDB query. Returns data : dict The original parsed data combined with the new parsed data. ''' for item in result['Items']: if item['Type']['S'] == 'visitor': data['visitor'] = itemToVisitor(item) elif item['Type']['S'] == 'visit': data['visits'].append(itemToVisit(item)) elif item['Type']['S'] == 'session': data['sessions'].append(itemToSession(item)) elif item['Type']['S'] == 'location': data['location'] = itemToLocation(item) elif item['Type']['S'] == 'browser': data['browsers'].append(itemToBrowser(item)) return data
def processPages( dynamo_client, event ): '''Creates the page and day/week/month/year from a DynamoDB event. Parameters ---------- dynamo_client : DynamoClient The DynamoDB client used to access the table. event : dict The DynamoDB PUT event. Returns ------- result : str The result of the number of pages processed. ''' # Parse the visits from the event. visits = [ itemToVisit( record['dynamodb']['NewImage'] ) for record in event['Records'] if record['dynamodb']['NewImage']['Type']['S'] == 'visit' ] if len( visits ) == 0: return 'No pages to process' # Store the number of new and updated pages new_pages = 0 update_pages = 0 # The unique slugs are iterated over because there may be multiple visits to # the same slug. for visit in [ [ visit for visit in visits if visit.slug == slug ][0] for slug in { visit.slug for visit in visits } ]: # Query the page and its details page_details = dynamo_client.getPageDetails( visit ) # Raise an exception when an error occurs. if 'error' in page_details.keys(): raise Exception( page_details['error'] ) # Update the page with new results update_result = dynamo_client.updatePage( page_details['visits'] ) if 'error' in update_result.keys(): raise Exception( update_result['error'] ) # Add the page to the table when there isn't one if 'page' not in page_details.keys(): new_pages += 1 # Update the page when there is one else: update_pages += 1 # Return what was done during execution return f'Successfully added { new_pages } pages and updated ' + \ f'{ update_pages } from { len( visits ) } records.'
def test_itemToVisit(): visit = Visit(visitor_id, visit_date, user_number, page_title, page_slug, session_start, scroll_events) item = visit.toItem() newVisit = itemToVisit(item) assert newVisit.id == visit.id assert newVisit.date == visit.date assert newVisit.user == visit.user assert newVisit.title == visit.title assert newVisit.slug == visit.slug assert newVisit.sessionStart == visit.sessionStart assert newVisit.prevTitle == visit.prevTitle assert newVisit.prevSlug == visit.prevTitle assert newVisit.nextTitle == visit.nextTitle assert newVisit.nextSlug == visit.nextSlug assert newVisit.timeOnPage == visit.timeOnPage
def test_itemToVisit(): visit = Visit('2020-12-23T20:32:26.000Z', '0.0.0.0', '0', 'Tyler Norlund', '/', '2020-12-23T20:32:26.000Z') item = visit.toItem() newVisit = itemToVisit(item) assert newVisit.date == visit.date assert newVisit.ip == visit.ip assert newVisit.user == visit.user assert newVisit.title == visit.title assert newVisit.slug == visit.slug assert newVisit.sessionStart == visit.sessionStart assert newVisit.prevTitle == visit.prevTitle assert newVisit.prevSlug == visit.prevTitle assert newVisit.nextTitle == visit.nextTitle assert newVisit.nextSlug == visit.nextSlug assert newVisit.timeOnPage == visit.timeOnPage
def test_itemToVisit_exception(): with pytest.raises(Exception) as e: assert itemToVisit({}) assert str(e.value) == "Could not parse visit"