示例#1
0
def test_toItem(ip, pixel_app):
    currentTime = datetime.datetime.now()
    browser = Browser(pixel_app,
                      ip,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.toItem() == {
      'PK': { 'S': f'VISITOR#{ ip }' },
      'SK': { 'S': 'BROWSER#2020-01-01T00:00:00.000Z' },
      'Type': { 'S': 'browser' },
      'App': { 'S': pixel_app },
      'Width': { 'N': '100' },
      'Height': { 'N': '200' },
      'DateVisited': { 'S': '2020-01-01T00:00:00.000Z' },
      'Device': { 'S': 'Pixel 4 XL' },
      'DeviceType': { 'S': 'mobile' },
      'Browser': { 'S': 'chrome' },
      'OS': { 'S': '11' },
      'Webkit': { 'S': '537.36' },
      'Version': { 'S': '86.0.4240.198' },
      'DateAdded': { 'S': currentTime.strftime( '%Y-%m-%dT%H:%M:%S.' ) \
        + currentTime.strftime('%f')[:3] + 'Z' }
    }
示例#2
0
def test_pk(ip, pixel_app):
    currentTime = datetime.datetime.now()
    browser = Browser(pixel_app,
                      ip,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.pk() == {'S': f'VISITOR#{ ip }'}
示例#3
0
def browsers():
    return[
      Browser(
        'Mozilla/5.0 (iPhone; CPU iPhone OS 14_3 like Mac OS X) ' + \
          'AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.2 ' + \
          'Mobile/15E148 Safari/604.1',
        '0.0.0.0', 100, 200, '2020-01-01T00:00:00.000Z',
        dateAdded = '2020-01-01T00:00:00.000Z'
      ),
      Browser(
        'Mozilla/5.0 (Linux; Android 11; Pixel 4 XL) AppleWebKit/537.36 ' + \
          '(KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36',
        '0.0.0.0', 100, 200, '2020-01-01T00:01:00.000Z',
        dateAdded = '2020-01-01T00:00:00.000Z'
      )
    ]
示例#4
0
def test_key(ip, pixel_app):
    currentTime = datetime.datetime.now()
    browser = Browser(pixel_app,
                      ip,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.key() == {
        'PK': {
            'S': f'VISITOR#{ ip }'
        },
        'SK': {
            'S': 'BROWSER#2020-01-01T00:00:00.000Z'
        }
    }
示例#5
0
def browser():
    return Browser(
      'Mozilla/5.0 (iPhone; CPU iPhone OS 14_3 like Mac OS X) ' + \
        'AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.2 ' + \
        'Mobile/15E148 Safari/604.1',
      '0.0.0.0', 100, 200, '2020-01-01T00:00:00.000Z'
    )
示例#6
0
def test_itemToBrowser(ip, pixel_app):
    currentTime = datetime.datetime.now()
    browser = Browser(
      pixel_app, ip, 100, 200, '2020-01-01T00:00:00.000Z',
      dateAdded = currentTime.strftime( '%Y-%m-%dT%H:%M:%S.' ) \
        + currentTime.strftime('%f')[:3] + 'Z'
    )
    newBrowser = itemToBrowser(browser.toItem())
    assert browser.app == newBrowser.app
    assert browser.ip == newBrowser.ip
    assert browser.width == newBrowser.width
    assert browser.height == newBrowser.height
    assert browser.dateVisited == newBrowser.dateVisited
    assert browser.dateAdded == newBrowser.dateAdded
    assert browser.device == newBrowser.device
    assert browser.deviceType == newBrowser.deviceType
    assert browser.browser == newBrowser.browser
    assert browser.os == newBrowser.os
    assert browser.webkit == newBrowser.webkit
    assert browser.version == newBrowser.version
示例#7
0
def test_default_mac_safari_init(ip, mac_safari_app):
    currentTime = datetime.datetime.now()
    browser = Browser(mac_safari_app,
                      ip,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.app == mac_safari_app
    assert browser.ip == ip
    assert browser.width == 100
    assert browser.height == 200
    assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0)
    assert browser.dateAdded == currentTime
    assert browser.device == 'mac'
    assert browser.deviceType == 'desktop'
    assert browser.browser == 'safari'
    assert browser.os == '10.15.6'
    assert browser.webkit == '605.1.15'
    assert browser.version == '14.0.2'
示例#8
0
def test_default_mac_chrome_init(ip, mac_chrome_app):
    currentTime = datetime.datetime.now()
    browser = Browser(mac_chrome_app,
                      ip,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.app == mac_chrome_app
    assert browser.ip == ip
    assert browser.width == 100
    assert browser.height == 200
    assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0)
    assert browser.dateAdded == currentTime
    assert browser.device == 'mac'
    assert browser.deviceType == 'desktop'
    assert browser.browser == 'chrome'
    assert browser.os == '11.1.0'
    assert browser.webkit == '537.36'
    assert browser.version == '87.0.4280.88'
示例#9
0
def test_default_pixel_init(ip, pixel_app):
    currentTime = datetime.datetime.now()
    browser = Browser(pixel_app,
                      ip,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.app == pixel_app
    assert browser.ip == ip
    assert browser.width == 100
    assert browser.height == 200
    assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0)
    assert browser.dateAdded == currentTime
    assert browser.device == 'Pixel 4 XL'
    assert browser.deviceType == 'mobile'
    assert browser.browser == 'chrome'
    assert browser.os == '11'
    assert browser.webkit == '537.36'
    assert browser.version == '86.0.4240.198'
示例#10
0
def test_default_samsung_G981U1_init(ip, samsung_G981U1_app):
    currentTime = datetime.datetime.now()
    browser = Browser(samsung_G981U1_app,
                      ip,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.app == samsung_G981U1_app
    assert browser.ip == ip
    assert browser.width == 100
    assert browser.height == 200
    assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0)
    assert browser.dateAdded == currentTime
    assert browser.device == 'SAMSUNG SM-G981U1'
    assert browser.deviceType == 'mobile'
    assert browser.browser == 'samsung'
    assert browser.os == '10'
    assert browser.webkit == '537.36'
    assert browser.version == '13.0'
示例#11
0
def test_default_samsung_G950U_init(ip, samsung_G950U_app):
    currentTime = datetime.datetime.now()
    browser = Browser(samsung_G950U_app,
                      ip,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.app == samsung_G950U_app
    assert browser.ip == ip
    assert browser.width == 100
    assert browser.height == 200
    assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0)
    assert browser.dateAdded == currentTime
    assert browser.device == 'SM-G950U'
    assert browser.deviceType == 'mobile'
    assert browser.browser == 'chrome'
    assert browser.os == '9'
    assert browser.webkit == '537.36'
    assert browser.version == '87.0.4280.101'
示例#12
0
def test_default_windows_chrome_init(windows_chrome_app):
    currentTime = datetime.datetime.now()
    browser = Browser(visitor_id,
                      windows_chrome_app,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.app == windows_chrome_app
    assert browser.id == visitor_id
    assert browser.width == 100
    assert browser.height == 200
    assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0)
    assert browser.dateAdded == currentTime
    assert browser.device == 'windows'
    assert browser.deviceType == 'desktop'
    assert browser.browser == 'chrome'
    assert browser.os == '10.0'
    assert browser.webkit == '537.36'
    assert browser.version == '87.0.4280.88'
示例#13
0
def test_default_unknown_init(ip):
    currentTime = datetime.datetime.now()
    browser = Browser('unknown',
                      ip,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.app == 'unknown'
    assert browser.ip == ip
    assert browser.width == 100
    assert browser.height == 200
    assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0)
    assert browser.dateAdded == currentTime
    assert browser.device is None
    assert browser.deviceType is None
    assert browser.browser is None
    assert browser.os is None
    assert browser.webkit is None
    assert browser.version is None
示例#14
0
def test_default_iphone_linkedin_init(ip, iphone_linkedin_app):
    currentTime = datetime.datetime.now()
    browser = Browser(iphone_linkedin_app,
                      ip,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.app == iphone_linkedin_app
    assert browser.ip == ip
    assert browser.width == 100
    assert browser.height == 200
    assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0)
    assert browser.dateAdded == currentTime
    assert browser.device == 'iphone'
    assert browser.deviceType == 'mobile'
    assert browser.browser == '[LinkedInApp]'
    assert browser.os == '14.2'
    assert browser.webkit == '605.1.15'
    assert browser.version is None
示例#15
0
def test_default_iphone_safari_init(iphone_safari_app):
    currentTime = datetime.datetime.now()
    browser = Browser(visitor_id,
                      iphone_safari_app,
                      100,
                      200,
                      '2020-01-01T00:00:00.000Z',
                      dateAdded=currentTime)
    assert browser.app == iphone_safari_app
    assert browser.id == visitor_id
    assert browser.width == 100
    assert browser.height == 200
    assert browser.dateVisited == datetime.datetime(2020, 1, 1, 0, 0, 0)
    assert browser.dateAdded == currentTime
    assert browser.device == 'iphone'
    assert browser.deviceType == 'mobile'
    assert browser.browser == 'safari'
    assert browser.os == '14.3'
    assert browser.webkit == '605.1.15'
    assert browser.version == '14.0.2'
示例#16
0
def test_repr(ip, pixel_app):
    assert repr(Browser(pixel_app, ip, 100, 200,
                        '2020-01-01T00:00:00.000Z')) == f'{ ip } - chrome'
示例#17
0
def processDF( key, s3_client ):
  '''Reads a raw csv file S3 and parses the browsers, visits, and sessions.

  Parameters
  ----------
  key : str
    The key of the '.parquet' file in the S3 bucket.
  s3_client : S3Client
    The S3 client used to get the '.parquet' file from.

  Returns
  -------
  result : dict
    The browsers, visits, and sessions parsed from the file.
  '''
  request = s3_client.getObject( key )
  # Read the parquet file as a pandas DF
  df = pd.read_csv(
    io.BytesIO( request['Body'].read() ),
    sep = ',\t', engine = 'python',
    names = [
      'process', 'id', 'time', 'title', 'slug', 'userAgent', 'width',
      'height', 'x', 'y'
    ],
    usecols = [
      'id', 'time', 'title', 'slug', 'userAgent', 'width', 'height', 'x', 'y'
    ],
    index_col = 'time'
  )
  df = df.drop_duplicates().sort_index()
  index_change = df.ne(
    df.shift()
  ).apply( lambda x: x.index[x].tolist() ).title
  indexes = [
    ( index_change[index], index_change[index + 1] - 1 )
      if index != len( index_change ) - 1
    else (index_change[index], df.tail(1).index[0])
    for index in  range( len( index_change ) )
  ]
  visits = []
  for ( start, stop ) in indexes:
    temp = df.loc[ start: stop ]
    visits.append(
      Visit(
        temp.id.unique()[0],
        formatEpoch( temp.iloc[[0]].index[0] ),
        '0',
        temp.title.unique()[0],
        temp.slug.unique()[0],
        formatEpoch( temp.iloc[[0]].index[0] ),
        {
          formatEpoch( index ): { 'x': row.x, 'y': row.y }
          for index, row in temp.iterrows()
        },
        ( temp.iloc[[-1]].index[0] - temp.iloc[[0]].index[0] ) / 1000
      )
    )
  for visit in visits:
    visit.sessionStart=visits[0].date
  for index in range( 1, len( visits ) ):
    visits[index - 1].nextTitle = visits[index].title
    visits[index - 1].nextSlug = visits[index].slug
  for index in range( len( visits ) - 1 ):
    visits[index + 1].prevTitle = visits[index].title
    visits[index + 1].prevSlug = visits[index].slug
  session = Session(
    visits[0].sessionStart,
    df.id.unique()[0],
    np.mean( [ visit.timeOnPage for visit in visits ] ),
    np.sum( [ visit.timeOnPage for visit in visits ] )
  )
  browsers = [
    Browser(
      df.id.unique()[0],
      row.userAgent,
      row.width,
      row.height,
      formatEpoch(
        df.loc[
          ( df['height'] == row.height ) & ( df['width'] == row.width )
        ].head(1).index[0]
      )
    )
    for index, row in df.groupby(
      ['userAgent','height','width']
    ).size().reset_index().rename(
      columns={0:'count'}
    ).iterrows()
  ]
  return{ 'visits': visits, 'session': session, 'browsers': browsers }
示例#18
0
def test_repr(pixel_app):
    assert repr(
        Browser(visitor_id, pixel_app, 100, 200,
                '2020-01-01T00:00:00.000Z')) == f'{ visitor_id } - chrome'