示例#1
0
def month_visits():
  return [
    Visit(
      visitor_id, '2020-01-01T00:00:00.000Z', '0', 'Tyler Norlund', '/',
      '2020-01-01T00:00:00.000Z', scroll_events, '60', None, None,
      'Blog', '/blog'
    ),
    Visit(
      '171a0329-f8b2-499c-867d-1942384ddd5a', '2020-01-03T00:00:01.000Z', '0',
      'Tyler Norlund', '/',
      '2020-01-01T00:00:00.000Z', scroll_events, None, 'Tyler Norlund', '/',
      None, None
    ),
    Visit(
      '171a0329-f8b2-499c-867d-1942384ddd5a', '2020-01-25T00:00:00.000Z', '0',
      'Tyler Norlund', '/',
      '2020-01-01T00:00:00.000Z', scroll_events, '120', None, None,
      'Resume', '/resume'
    ),
    Visit(
      '171a0329-f8b2-499c-867d-1942384ddd5s', '2020-01-30T00:00:00.000Z', '0',
      'Tyler Norlund', '/',
      '2020-01-01T00:00:00.000Z', scroll_events, '120', None, None,
      'Resume', '/resume'
    )
  ]
示例#2
0
def visits():
    return [
        Visit('2020-01-01T00:00:00.000Z', '0.0.0.0', '0', 'Tyler Norlund', '/',
              '2020-01-01T00:00:00.000Z', '60', None, None, 'Blog', '/blog'),
        Visit('2020-01-01T00:01:00.000Z', '0.0.0.0', '0', 'Blog', '/blog',
              '2020-01-01T00:00:00.000Z', None, 'Tyler Norlund', '/', None,
              None)
    ]
示例#3
0
def visits():
    '''A list of proper Visit objects.'''
    return [
        Visit('2020-01-03T00:00:00.000Z', '0.0.0.0', '0', 'Tyler Norlund', '/',
              '2020-01-03T00:00:00.000Z', '60', None, None, 'Blog', '/blog'),
        Visit('2020-01-03T00:01:00.000Z', '0.0.0.0', '0', 'Blog', '/blog',
              '2020-01-03T00:00:00.000Z', None, 'Tyler Norlund', '/', None,
              None)
    ]
示例#4
0
def day_visits():
    return [
        Visit('2020-01-03T00:00:00.000Z', '0.0.0.0', '0', 'Tyler Norlund', '/',
              '2020-01-03T00:00:00.000Z', '60', None, None, 'Blog', '/blog'),
        Visit('2020-01-03T00:00:01.000Z', '0.0.0.1', '0', 'Tyler Norlund', '/',
              '2020-01-03T00:00:00.000Z', None, 'Tyler Norlund', '/', None,
              None),
        Visit('2020-01-03T00:00:00.000Z', '0.0.0.1', '0', 'Tyler Norlund', '/',
              '2020-01-03T00:00:00.000Z', '120', None, None, 'Resume',
              '/resume'),
    ]
示例#5
0
def test_key():
    visit = Visit(visitor_id, visit_date, user_number, page_title, page_slug,
                  session_start, scroll_events)
    assert visit.key() == {
        'PK': {
            'S': f'VISITOR#{ visitor_id }'
        },
        'SK': {
            'S': f'VISIT#{ visit_date }#{ page_slug }'
        }
    }
示例#6
0
def test_gsi1():
    visit = Visit('2020-12-23T20:32:26.000Z', '0.0.0.0', '0', 'Tyler Norlund',
                  '/', '2020-12-23T20:32:26.000Z')
    assert visit.gsi1() == {
        'GSI1PK': {
            'S': 'PAGE#/'
        },
        'GSI1SK': {
            'S': 'VISIT#2020-12-23T20:32:26.000Z'
        }
    }
示例#7
0
def test_gsi1():
    visit = Visit(visitor_id, visit_date, user_number, page_title, page_slug,
                  session_start, scroll_events)
    assert visit.gsi1() == {
        'GSI1PK': {
            'S': f'PAGE#{ page_slug }'
        },
        'GSI1SK': {
            'S': f'VISIT#{ visit_date }'
        }
    }
示例#8
0
def test_gsi2():
    visit = Visit(visitor_id, visit_date, user_number, page_title, page_slug,
                  session_start, scroll_events)
    assert visit.gsi2() == {
        'GSI2PK': {
            'S': f'SESSION#{ visitor_id }#{ session_start }'
        },
        'GSI2SK': {
            'S': f'VISIT#{ visit_date }'
        }
    }
示例#9
0
def test_gsi2():
    visit = Visit('2020-12-23T20:32:26.000Z', '0.0.0.0', '0', 'Tyler Norlund',
                  '/', '2020-12-23T20:32:26.000Z')
    assert visit.gsi2() == {
        'GSI2PK': {
            'S': 'SESSION#0.0.0.0#2020-12-23T20:32:26.000Z'
        },
        'GSI2SK': {
            'S': 'VISIT#2020-12-23T20:32:26.000Z'
        }
    }
示例#10
0
def test_key():
    visit = Visit('2020-12-23T20:32:26.000Z', '0.0.0.0', '0', 'Tyler Norlund',
                  '/', '2020-12-23T20:32:26.000Z')
    assert visit.key() == {
        'PK': {
            'S': 'VISITOR#0.0.0.0'
        },
        'SK': {
            'S': 'VISIT#2020-12-23T20:32:26.000Z#/'
        }
    }
示例#11
0
def day_visits():
    '''A list of proper Visit objects that span a day.'''
    return [
        Visit('2020-01-03T00:00:00.000Z', '0.0.0.0', '0', 'Tyler Norlund', '/',
              '2020-01-03T00:00:00.000Z', '60', None, None, 'Blog', '/blog'),
        Visit('2020-01-03T00:00:01.000Z', '0.0.0.1', '0', 'Tyler Norlund', '/',
              '2020-01-03T00:00:00.000Z', None, 'Tyler Norlund', '/', None,
              None),
        Visit('2020-01-03T00:00:00.000Z', '0.0.0.1', '0', 'Tyler Norlund', '/',
              '2020-01-03T00:00:00.000Z', '120', None, None, 'Resume',
              '/resume'),
    ]
示例#12
0
def test_toItem():
    visit = Visit(visitor_id, visit_date, user_number, page_title, page_slug,
                  session_start, scroll_events)
    assert visit.toItem() == {
        'PK': {
            'S': f'VISITOR#{ visitor_id }'
        },
        'SK': {
            'S': f'VISIT#{ visit_date }#{ page_slug }'
        },
        'GSI1PK': {
            'S': f'PAGE#{ page_slug }'
        },
        'GSI1SK': {
            'S': f'VISIT#{ visit_date }'
        },
        'GSI2PK': {
            'S': f'SESSION#{ visitor_id }#{ session_start }'
        },
        'GSI2SK': {
            'S': f'VISIT#{ visit_date }'
        },
        'Type': {
            'S': 'visit'
        },
        'User': {
            'N': '0'
        },
        'ScrollEvents': objectToItemAtr(scroll_events),
        'Title': {
            'S': page_title
        },
        'Slug': {
            'S': page_slug
        },
        'PreviousTitle': {
            'NULL': True
        },
        'PreviousSlug': {
            'NULL': True
        },
        'NextTitle': {
            'NULL': True
        },
        'NextSlug': {
            'NULL': True
        },
        'TimeOnPage': {
            'NULL': True
        }
    }
示例#13
0
def test_toItem():
    visit = Visit('2020-12-23T20:32:26.000Z', '0.0.0.0', '0', 'Tyler Norlund',
                  '/', '2020-12-23T20:32:26.000Z')
    assert visit.toItem() == {
        'PK': {
            'S': 'VISITOR#0.0.0.0'
        },
        'SK': {
            'S': 'VISIT#2020-12-23T20:32:26.000Z#/'
        },
        'GSI1PK': {
            'S': 'PAGE#/'
        },
        'GSI1SK': {
            'S': 'VISIT#2020-12-23T20:32:26.000Z'
        },
        'GSI2PK': {
            'S': 'SESSION#0.0.0.0#2020-12-23T20:32:26.000Z'
        },
        'GSI2SK': {
            'S': 'VISIT#2020-12-23T20:32:26.000Z'
        },
        'Type': {
            'S': 'visit'
        },
        'User': {
            'N': '0'
        },
        'Title': {
            'S': 'Tyler Norlund'
        },
        'Slug': {
            'S': '/'
        },
        'PreviousTitle': {
            'NULL': True
        },
        'PreviousSlug': {
            'NULL': True
        },
        'NextTitle': {
            'NULL': True
        },
        'NextSlug': {
            'NULL': True
        },
        'TimeOnPage': {
            'NULL': True
        }
    }
示例#14
0
def test_itemToVisit():
    visit = Visit(visitor_id, visit_date, user_number, page_title, page_slug,
                  session_start, scroll_events)
    item = visit.toItem()
    newVisit = itemToVisit(item)
    assert newVisit.id == visit.id
    assert newVisit.date == visit.date
    assert newVisit.user == visit.user
    assert newVisit.title == visit.title
    assert newVisit.slug == visit.slug
    assert newVisit.sessionStart == visit.sessionStart
    assert newVisit.prevTitle == visit.prevTitle
    assert newVisit.prevSlug == visit.prevTitle
    assert newVisit.nextTitle == visit.nextTitle
    assert newVisit.nextSlug == visit.nextSlug
    assert newVisit.timeOnPage == visit.timeOnPage
示例#15
0
def test_itemToVisit():
    visit = Visit('2020-12-23T20:32:26.000Z', '0.0.0.0', '0', 'Tyler Norlund',
                  '/', '2020-12-23T20:32:26.000Z')
    item = visit.toItem()
    newVisit = itemToVisit(item)
    assert newVisit.date == visit.date
    assert newVisit.ip == visit.ip
    assert newVisit.user == visit.user
    assert newVisit.title == visit.title
    assert newVisit.slug == visit.slug
    assert newVisit.sessionStart == visit.sessionStart
    assert newVisit.prevTitle == visit.prevTitle
    assert newVisit.prevSlug == visit.prevTitle
    assert newVisit.nextTitle == visit.nextTitle
    assert newVisit.nextSlug == visit.nextSlug
    assert newVisit.timeOnPage == visit.timeOnPage
示例#16
0
def test_parameter_title_addPage(dynamo_client, table_init, table_name,
                                 year_visits):
    with pytest.raises(ValueError) as e:
        assert DynamoClient(table_name).addPage(year_visits + [
            Visit('2020-12-23T20:32:26.000Z', '0.0.0.0', '0', 'Resume', '/',
                  '2020-12-23T20:32:26.000Z')
        ])
    assert str(e.value) == 'List of visits must have the same title'
示例#17
0
def test_parameter_year_addWeek(dynamo_client, table_init, table_name,
                                week_visits):
    with pytest.raises(ValueError) as e:
        assert DynamoClient(table_name).addWeek(week_visits + [
            Visit('2021-12-23T20:32:26.000Z', '0.0.0.0', '0', 'Tyler Norlund',
                  '/', '2020-12-23T20:32:26.000Z')
        ])
    assert str(e.value) == 'List of visits must be from the same year and week'
示例#18
0
def test_parameter_title_addDay(
  dynamo_client, table_init, table_name, day_visits
):
  with pytest.raises( ValueError ) as e:
    assert DynamoClient( table_name ).addDay( day_visits + [
      Visit(
        visitor_id, '2020-12-23T20:32:26.000Z',  '0', 'Resume', '/',
        '2020-12-23T20:32:26.000Z', scroll_events
      )
    ] )
  assert str( e.value ) == 'List of visits must have the same title'
示例#19
0
def test_parameter_year_addMonth(
  dynamo_client, table_init, table_name, month_visits
):
  with pytest.raises( ValueError ) as e:
    assert DynamoClient( table_name ).addMonth( month_visits + [
      Visit(
        visitor_id, '2021-12-23T20:32:26.000Z', '0', 'Tyler Norlund', '/',
        '2020-12-23T20:32:26.000Z', scroll_events
      )
    ] )
  assert str( e.value ) == 'List of visits must be from the same year and month'
示例#20
0
def test_dict():
    visit = dict(
        Visit('2020-12-23T20:32:26.000Z', '0.0.0.0', '0', 'Tyler Norlund', '/',
              '2020-12-23T20:32:26.000Z'))
    assert visit['date'] == datetime.datetime(2020, 12, 23, 20, 32, 26)
    assert visit['ip'] == '0.0.0.0'
    assert visit['user'] == 0
    assert visit['title'] == 'Tyler Norlund'
    assert visit['slug'] == '/'
    assert visit['prevTitle'] is None
    assert visit['prevSlug'] is None
    assert visit['nextTitle'] is None
    assert visit['nextSlug'] is None
    assert visit['timeOnPage'] is None
示例#21
0
def test_no_user_init():
    visit = Visit('2020-12-23T20:32:26.000Z', '0.0.0.0', None, 'Tyler Norlund',
                  '/', '2020-12-23T20:32:26.000Z')
    assert visit.date == datetime.datetime(2020, 12, 23, 20, 32, 26)
    assert visit.ip == '0.0.0.0'
    assert visit.user == 0
    assert visit.title == 'Tyler Norlund'
    assert visit.slug == '/'
    assert visit.sessionStart == datetime.datetime(2020, 12, 23, 20, 32, 26)
    assert visit.prevTitle is None
    assert visit.prevSlug is None
    assert visit.nextTitle is None
    assert visit.nextSlug is None
    assert visit.timeOnPage is None
示例#22
0
def visits():
  return[
    Visit(
      visitor_id, session_start, '0', 'Tyler Norlund', '/',
      session_start, scroll_events, '1.647', nextTitle='Resume',
      nextSlug='/resume'
    ),
    Visit(
      visitor_id, '2021-02-10T11:27:51.216Z', '0', 'Resume', '/resume',
      session_start, scroll_events, '3.084', 
      prevTitle='Tyler Norlund', prevSlug='/',
      nextTitle='Continuous Integration and Continuous Delivery', 
      nextSlug='/blog/cicd'
    ),
    Visit(
      visitor_id, '2021-02-10T11:27:57.886Z', '0', 
      'Continuous Integration and Continuous Delivery', '/blog/cicd',
      session_start, scroll_events, 
      timeOnPage='3.747', 
      prevTitle='Continuous Integration and Continuous Delivery', 
      prevSlug='/blog/cicd'
    )
  ]
示例#23
0
def test_dict():
    visit = dict(
        Visit(visitor_id, visit_date, user_number, page_title, page_slug,
              session_start, scroll_events))
    assert visit['date'] == datetime.datetime.strptime(
        visit_date, '%Y-%m-%dT%H:%M:%S.%fZ')
    assert visit['id'] == visitor_id
    assert visit['user'] == 0
    assert visit['title'] == page_title
    assert visit['slug'] == page_slug
    assert visit['prevTitle'] is None
    assert visit['prevSlug'] is None
    assert visit['nextTitle'] is None
    assert visit['nextSlug'] is None
    assert visit['timeOnPage'] is None
示例#24
0
def test_no_user_init():
    visit = Visit(visitor_id, visit_date, None, page_title, page_slug,
                  session_start, scroll_events)
    assert visit.id == visitor_id
    assert visit.date == datetime.datetime.strptime(visit_date,
                                                    '%Y-%m-%dT%H:%M:%S.%fZ')
    assert visit.user == 0
    assert visit.title == page_title
    assert visit.slug == page_slug
    assert visit.sessionStart == datetime.datetime.strptime(
        session_start, '%Y-%m-%dT%H:%M:%S.%fZ')
    assert visit.prevTitle is None
    assert visit.prevSlug is None
    assert visit.nextTitle is None
    assert visit.nextSlug is None
    assert visit.timeOnPage is None
示例#25
0
def processVisits(visits):
    '''Formats a list of visits to have the proper attributes.

  Parameters
  ----------
  visits : list[ Visit ]
    The list of visits to be modified to fit the session's attributes.

  Returns
  -------
  visits : list[ Visit ]
    The list of visits that have the corrected attributes.
  '''
    v_df = pd.DataFrame({
        'id': [visit.date for visit in visits],
        'title': [visit.title for visit in visits],
        'slug': [visit.slug for visit in visits],
        'ip': [visit.ip for visit in visits],
        'user': [visit.user for visit in visits],
    })
    v_df = v_df.drop_duplicates().sort_values(by='id').reset_index()
    # Format the datetimes to be dates and then calculate the amount of time
    # between each request.
    v_df['seconds'] = v_df['id'].diff(+1).dt.total_seconds()[1:].append(
        pd.Series([None])).reset_index()[0]
    # Shift the slugs and title up and down in order to associate the
    # previous and next slugs and titles per each visit.
    v_df['prevSlug'] = v_df['slug'].shift(1)
    v_df['prevTitle'] = v_df['title'].shift(1)
    v_df['nextSlug'] = v_df['slug'].shift(-1)
    v_df['nextTitle'] = v_df['title'].shift(-1)
    # Replace the NaN's with the None type for the entities
    v_df = v_df.replace({np.nan: None})
    return [
        Visit(row['id'], row['ip'], row['user'], row['title'], row['slug'],
              v_df.iloc[0]['id'], row['seconds'], row['prevTitle'],
              row['prevSlug'], row['nextTitle'], row['nextSlug'])
        for index, row in v_df.iterrows()
    ]
示例#26
0
def visit():
  return Visit(
    visitor_id, visit_date, user_number, page_title, page_slug,
    session_start, scroll_events
  )
示例#27
0
def test_repr():
    visit = Visit(visitor_id, visit_date, user_number, page_title, page_slug,
                  session_start, scroll_events)
    assert repr(visit) == f'{ visitor_id } - { visit_date}'
示例#28
0
def s3_processor(event, context):
  """[summary]

  Args:
      event ([type]): [description]
      context ([type]): [description]

  Returns:
      [type]: [description]
  """  
  new = 0
  updated = 0
  additional = 0
  # Get the necessary data from the S3 event.
  key = urllib.parse.unquote_plus(
    event['Records'][0]['s3']['object']['key'], encoding='utf-8'
  )
  aws_region = event['Records'][0]['awsRegion']
  bucket_name = event['Records'][0]['s3']['bucket']['name']
  # Create the necessary clients
  dynamo_client = DynamoClient( os.environ['TABLE_NAME'], aws_region )
  s3_client = S3Client( bucket_name, aws_region )
  # Parse the record to get the browsers, visits, and session.
  record = processDF( key, s3_client )
  # Get the visitor from the table
  visitor_details = dynamo_client.getVisitorDetails( 
    Visitor( record['session'].id ) 
  )
  # Add the visitor, visits, session, and browsers if the visitor is not in 
  # the table.
  if not 'visitor' in visitor_details:
    dynamo_client.addVisitor( Visitor( record['session'].id ) )
    dynamo_client.addSession( record['session'] )
    dynamo_client.addVisits( record['visits'] )
    dynamo_client.addBrowsers( record['browsers'] ) 
    new += 1
  # Check to see if the last session can be combined with the one in this
  # record.
  else:
    last_session = visitor_details['sessions'][-1]
    last_sessions_visits = [ 
      visit for visit in visitor_details['visits'] 
      if visit.sessionStart == last_session.sessionStart
    ]
    # Combine the visits and update the session when the last session was
    # less than 30 minutes from this record,
    if (
      (
        last_sessions_visits[-1].date - record['visits'][0].date
      ).total_seconds() < 60 * 30
    ):
      # Update all of the record's with the previous session start
      for visit in record['visits']:
        visit.sessionStart = last_session.sessionStart
      # Update the last visit of the last session when the first visit of
      # the record is the last page visited in the previous session.
      if ( last_sessions_visits[-1].title == record['visits'][0].title ):
        updated_visit = Visit(
          last_sessions_visits[-1].id, # visitor_id 
          last_sessions_visits[-1].date, # date 
          last_sessions_visits[-1].user, # user 
          last_sessions_visits[-1].title, # title
          last_sessions_visits[-1].slug, # slug
          last_sessions_visits[-1].sessionStart, # sessionStart 
          {
            **last_sessions_visits[-1].scrollEvents,
            **record['visits'][0].scrollEvents
          }, # scrollEvents
          (
            # The total time on the updated page is the last scroll
            # event on the record's first visit minus the first 
            # scroll event of the last visit of the session to 
            # update.
            datetime.datetime.strptime(
              list( 
                record['visits'][0].scrollEvents.keys()
              )[-1],
              '%Y-%m-%dT%H:%M:%S.%fZ'
            ) - datetime.datetime.strptime(
              list(
                last_sessions_visits[-1].scrollEvents.keys()
              )[0],
              '%Y-%m-%dT%H:%M:%S.%fZ'
            )
          ).total_seconds(), #timeOnPage 
          last_sessions_visits[-1].prevTitle, # prevTitle
          last_sessions_visits[-1].prevSlug, # prevSlug
          record['visits'][0].nextTitle, # nextTitle
          record['visits'][0].nextSlug # nextSlug
        )
        visits_to_update = [ updated_visit ] + record['visits'][1:] + \
          last_sessions_visits[:-1]
      else:
        visits_to_update = record['visits'] + last_sessions_visits
      # Update all of the visits in the record to have the session
      dynamo_client.updateVisits( visits_to_update )
      dynamo_client.addBrowsers( record['browsers'] ) 
      dynamo_client.updateSession(
        Session( 
          last_session.sessionStart, # Start date-time
          last_session.id, # Visitor ID
          np.mean( [
            visit.timeOnPage for visit in visits_to_update
          ] ), # avgTime
          np.sum( [
            visit.timeOnPage for visit in visits_to_update
          ] ) # totalTime
        ),
        []
      )
      updated += 1
    # Add a the new session, visits, and browsers when the last session was
    # more than 30 minutes from this record.
    else: 
      dynamo_client.addSession( record['session'] )
      dynamo_client.addVisits( record['visits'] )
      dynamo_client.addBrowsers( record['browsers'] ) 
      additional += 1
  
  return {
    'statusCode': 200,
    'body': json.dumps(f'updated { updated }\nnew { new }\nadditional {additional}')
  }
示例#29
0
def test_gsi1pk():
    visit = Visit(visitor_id, visit_date, user_number, page_title, page_slug,
                  session_start, scroll_events)
    assert visit.gsi1pk() == {'S': f'PAGE#{ page_slug }'}
示例#30
0
def test_pk():
    visit = Visit(visitor_id, visit_date, user_number, page_title, page_slug,
                  session_start, scroll_events)
    assert visit.pk() == {'S': f'VISITOR#{ visitor_id }'}