def test_api_stats_query_colls(self): params = {'range': {'from': today_str(), 'to': today_str() }, 'targets': [ {'target': COLL_TABLE, 'type': 'table'}, {'target': COLL_COUNT, 'type': 'timeserie'}, {'target': COLL_COUNT_PUBLIC, 'type': 'timeserie'}, {'target': COLL_COUNT_PUBLIC_W_LISTS, 'type': 'timeserie'}, {'target': COLL_SIZES_CREATED, 'type': 'timeserie'}, {'target': COLL_SIZES_UPDATED, 'type': 'timeserie'}, {'target': COLL_SIZES_PUBLIC, 'type': 'timeserie'}, {'target': COLL_SIZES_PUBLIC_W_LISTS, 'type': 'timeserie'}, ] } res = self.testapp.post_json('/api/v1/stats/query', params=params) assert isinstance(res.json, list) assert len(res.json) == 8 colls = res.json[0]['rows'] # one public coll assert len(colls) == 1 for coll in colls: assert coll[0] == 'default-collection' assert coll[1] == 'Default Collection' assert coll[3] == 'adminuser'
def test_api_stats_query_custom_timeseries(self): params = { 'range': { 'from': today_str(), 'to': today_str() }, 'targets': [ { 'target': USER_LOGINS, 'type': 'timeserie' }, { 'target': USER_LOGINS_100, 'type': 'timeserie' }, { 'target': ACTIVE_SESSIONS, 'type': 'timeserie' }, ] } res = self.testapp.post_json('/api/v1/stats/query', params=params) assert isinstance(res.json, list) assert len(res.json) == 3 # 3 user logins (for 3 users!) assert res.json[0]['datapoints'][0][0] == 3 # 0 user logins for users with >100MB assert res.json[1]['datapoints'][0][0] == 0 # 1 active session assert res.json[2]['datapoints'][0][0] == 1
def test_stats(self): assert self.redis.hget(Stats.DOWNLOADS_USER_COUNT_KEY, today_str()) == '1' assert self.redis.hget(Stats.UPLOADS_COUNT_KEY, today_str()) == '4' assert self.redis.hget(User.INFO_KEY.format(user='******'), Stats.UPLOADS_PROP) == '4'
def test_api_stats_query_temps(self): params = { 'range': { 'from': today_str(), 'to': today_str() }, 'targets': [ { 'target': 'Temp Table', 'type': 'table' }, { 'target': 'not found', 'type': 'table' }, ] } res = self.testapp.post_json('/api/v1/stats/query', params=params) assert isinstance(res.json, list) assert len(res.json) == 2 assert res.json[1] == {} data = res.json[0] assert len(data['rows']) == 1 assert set(data[0] for data in data['rows']) == {self.anon_user}
def test_api_stats_query_users(self): # user table cached assert self.redis.get(AdminController.CACHE_USER_TABLE) params = { 'range': { 'from': today_str(), 'to': today_str() }, 'targets': [{ 'target': USER_TABLE, 'type': 'table' }, { 'target': TOTAL_USERS, 'type': 'timeserie' }] } res = self.testapp.post_json('/api/v1/stats/query', params=params) assert isinstance(res.json, list) assert len(res.json) == 2 data = res.json[0] assert len(data['rows']) == 3 assert set(data[0] for data in data['rows']) == { 'test', 'another', 'adminuser' } # total query assert res.json[1]['datapoints'][0][0] == 3
def test_api_stats_query_custom_timeseries(self): params = {'range': {'from': today_str(), 'to': today_str() }, 'targets': [ {'target': USER_CREATED, 'type': 'timeserie'}, {'target': USER_LOGINS, 'type': 'timeserie'}, {'target': USER_LOGINS_100, 'type': 'timeserie'}, {'target': ACTIVE_SESSIONS, 'type': 'timeserie'}, ] } res = self.testapp.post_json('/api/v1/stats/query', params=params) assert isinstance(res.json, list) assert len(res.json) == 4 # 3 users created assert res.json[0]['datapoints'][0][0] == 3 # 3 user logins (for 3 users!) assert res.json[1]['datapoints'][0][0] == 3 # 0 user logins for users with >100MB assert res.json[2]['datapoints'][0][0] == 0 # 1 active session assert res.json[3]['datapoints'][0][0] == 1
def test_api_stats_query_timeseries(self): params = { 'range': { 'from': today_str(), 'to': today_str() }, 'targets': [ { 'target': 'All Capture Logged In', 'type': 'timeserie' }, { 'target': 'All Capture Temp', 'type': 'timeserie' }, { 'target': 'not_found', 'type': 'timeserie' }, ] } res = self.testapp.post_json('/api/v1/stats/query', params=params) assert isinstance(res.json, list) assert len(res.json) == 3
def test_api_stats_query_colls(self): params = { 'range': { 'from': today_str(), 'to': today_str() }, 'targets': [ { 'target': COLL_TABLE, 'type': 'table' }, { 'target': COLL_COUNT, 'type': 'timeserie' }, { 'target': COLL_COUNT_PUBLIC, 'type': 'timeserie' }, { 'target': COLL_COUNT_PUBLIC_W_LISTS, 'type': 'timeserie' }, { 'target': COLL_SIZES_CREATED, 'type': 'timeserie' }, { 'target': COLL_SIZES_UPDATED, 'type': 'timeserie' }, { 'target': COLL_SIZES_PUBLIC, 'type': 'timeserie' }, { 'target': COLL_SIZES_PUBLIC_W_LISTS, 'type': 'timeserie' }, ] } res = self.testapp.post_json('/api/v1/stats/query', params=params) assert isinstance(res.json, list) assert len(res.json) == 8 colls = res.json[0]['rows'] # one public coll assert len(colls) == 1 for coll in colls: assert coll[0] == 'default-collection' assert coll[1] == 'Default Collection' assert coll[3] == 'adminuser'
def test_api_stats_query_timeseries(self): params = {'range': {'from': today_str(), 'to': today_str() }, 'targets': [{'target': 'All Capture Logged In', 'type': 'timeserie'}, {'target': 'All Capture Temp', 'type': 'timeserie'}, {'target': 'not_found', 'type': 'timeserie'}, ] } res = self.testapp.post_json('/api/v1/stats/query', params=params) assert isinstance(res.json, list) assert len(res.json) == 3
def incr_replay(self, size, username): if username.startswith(self.TEMP_PREFIX): key = self.REPLAY_TEMP_KEY else: key = self.REPLAY_USER_KEY self.redis.hincrby(key, today_str(), size)
def check(): today = today_str() storage_dir = os.path.join(self.storage_dir, today, coll_id) # moved to store dir assert set(os.listdir(storage_dir)) == {'warcs', 'indexes'} assert len(os.listdir(os.path.join(storage_dir, 'warcs'))) == 1 assert len(os.listdir(os.path.join(storage_dir, 'indexes'))) == 1
def test_init_coll_and_user(self): res = self.testapp.post_json('/api/v1/collections?user={user}'.format(user=self.anon_user), params={'title': 'temp'}) assert res.json['collection'] self.manager.create_user('*****@*****.**', 'test', 'TestTest123', 'archivist', 'Test') today = today_str() TestDatShare.coll_store_dir = today + '/' + self.COLL_ID
def move_temp_to_user_usage(self, collection): today = today_str() date_str = collection.get_created_iso_date() size = collection.size with redis_pipeline(self.redis) as pi: pi.hincrby(self.TEMP_MOVE_COUNT_KEY, today, 1) pi.hincrby(self.TEMP_MOVE_SIZE_KEY, today, size) pi.hincrby(self.ALL_CAPTURE_USER_KEY, date_str, size) pi.hincrby(self.ALL_CAPTURE_TEMP_KEY, date_str, -size)
def incr_behavior_stat(self, stat, behavior, browser): if stat not in ('start', 'done'): return if not behavior: return key = self.BEHAVIOR_KEY.format(stat=stat, name=behavior) self.redis.hincrby(key, today_str(), 1)
def test_api_stats_query_temps(self): params = {'range': {'from': today_str(), 'to': today_str() }, 'targets': [{'target': 'Temp Table', 'type': 'table'}, {'target': 'not found', 'type': 'table'}, ] } res = self.testapp.post_json('/api/v1/stats/query', params=params) assert isinstance(res.json, list) assert len(res.json) == 2 assert res.json[1] == {} data = res.json[0] assert len(data['rows']) == 1 assert set(data[0] for data in data['rows']) == {self.anon_user}
def test_stats(self): today = today_str() assert int(self.redis.hget(Stats.TEMP_MOVE_COUNT_KEY, today)) == 1 assert int(self.redis.hget(Stats.TEMP_MOVE_SIZE_KEY, today)) > 0 keys = set(self.redis.keys('st:*')) assert keys == { Stats.TEMP_MOVE_COUNT_KEY, Stats.TEMP_MOVE_SIZE_KEY, Stats.ALL_CAPTURE_TEMP_KEY, Stats.ALL_CAPTURE_USER_KEY, Stats.REPLAY_USER_KEY, Stats.DOWNLOADS_USER_COUNT_KEY, Stats.DOWNLOADS_USER_SIZE_KEY, Stats.DELETE_USER_KEY }
def check(): today = today_str() storage_dir = os.environ['S3_ROOT'] + today keys = self._list_keys() assert len(keys) == 2 assert today in keys[0] assert keys[0].endswith('.cdxj') assert today in keys[1] assert keys[1].endswith('.warc.gz')
def incr_download(self, collection): user = collection.get_owner() if user.name.startswith(self.TEMP_PREFIX): count_key = self.DOWNLOADS_TEMP_COUNT_KEY size_key = self.DOWNLOADS_TEMP_SIZE_KEY else: count_key = self.DOWNLOADS_USER_COUNT_KEY size_key = self.DOWNLOADS_USER_SIZE_KEY collection.incr_key(self.DOWNLOADS_PROP, 1) today = today_str() self.redis.hincrby(count_key, today, 1) self.redis.hincrby(size_key, today, collection.size)
def incr_delete(self, recording): try: user = recording.get_owner().get_owner() if user.name.startswith(self.TEMP_PREFIX): key = self.DELETE_TEMP_KEY else: key = self.DELETE_USER_KEY self.redis.hincrby(key, today_str(), recording.size) user.incr_key(self.DELETE_PROP, recording.size) except Exception as e: print('Error Counting Delete: ' + str(e))
def test_api_stats_query_users(self): # user table cached assert self.redis.get(AdminController.CACHE_USER_TABLE) params = {'range': {'from': today_str(), 'to': today_str() }, 'targets': [{'target': USER_TABLE, 'type': 'table'}, {'target': TOTAL_USERS, 'type': 'timeserie'} ] } res = self.testapp.post_json('/api/v1/stats/query', params=params) assert isinstance(res.json, list) assert len(res.json) == 2 data = res.json[0] assert len(data['rows']) == 3 assert set(data[0] for data in data['rows']) == {'test', 'another', 'adminuser'} # total query assert res.json[1]['datapoints'][0][0] == 3
def incr_record(self, params, size, cdx_list): username = params.get('param.user') if not username: return today = today_str() with redis_pipeline(self.redis) as pi: # rate limiting rate_limit_key = self.get_rate_limit_key(params) if rate_limit_key: pi.incrby(rate_limit_key, size) pi.expire(rate_limit_key, self.RATE_LIMIT_TTL) # write size to usage hashes if username.startswith(self.TEMP_PREFIX): key = self.ALL_CAPTURE_TEMP_KEY else: key = self.ALL_CAPTURE_USER_KEY if key: pi.hincrby(key, today, size) is_extract = params.get('sources') != None is_patch = params.get('param.recorder.rec') != None if is_extract or is_patch: with redis_pipeline(self.redis) as pi: for cdx in cdx_list: try: cdx = CDXObject(cdx) source_id = cdx['orig_source_id'] size = int(cdx['length']) if source_id and size: pi.hincrby(self.SOURCES_KEY.format(source_id), today, size) except Exception as e: pass if is_patch: if username.startswith(self.TEMP_PREFIX): key = self.PATCH_TEMP_KEY else: key = self.PATCH_USER_KEY pi.hincrby(key, today, size)
def test_dat_share(self): responses.add(responses.POST, 'http://dat:3000/init', status=200, json=self.dat_info) responses.add(responses.POST, 'http://dat:3000/share', status=200, json=self.dat_info) params = {'collDir': self.coll_store_dir} res = self.testapp.post_json('/api/v1/collection/default-collection/dat/share?user=test', params=params) assert res.json['dat_key'] == self.dat_info['datKey'] assert res.json['dat_updated_at'] <= datetime.utcnow().isoformat() assert res.json['dat_share'] == True assert len(responses.calls) == 2 assert responses.calls[0].request.url == 'http://dat:3000/init' assert responses.calls[1].request.url == 'http://dat:3000/share' today = today_str() # test dat.json with open(os.path.join(self.storage_dir, today, self.COLL_ID, 'dat.json'), 'rt') as fh: datjson = json.loads(fh.read()) assert datjson['url'] == 'dat://' + self.dat_info['datKey'] assert datjson['author'] == 'Test' assert datjson['title'] == 'Default Collection' assert datjson['desc'].startswith('*This is your first collection') # test metadata.yaml with open(os.path.join(self.storage_dir, today, self.COLL_ID, 'metadata', 'metadata.yaml'), 'rt') as fh: metadata = yaml.load(fh.read()) assert metadata['collection'] # pages in recordings assert 'pages' not in metadata['collection'] assert 'recordings' in metadata['collection'] for recording in metadata['collection']['recordings']: assert 'pages' in recording assert 'lists' in metadata['collection']
def incr_upload(self, user, size): user.incr_key(self.UPLOADS_PROP, 1) today = today_str() self.redis.hincrby(self.UPLOADS_COUNT_KEY, today, 1) self.redis.hincrby(self.UPLOADS_SIZE_KEY, today, size)
def incr_bookmark_mod(self, num=1): self.redis.hincrby(self.BOOKMARK_MOD_KEY, today_str(), num)
def incr_bookmark_del(self, num=1): self.redis.hincrby(self.BOOKMARK_DEL_KEY, today_str(), num)
def test_stats(self): assert self.redis.hget(Stats.BOOKMARK_ADD_KEY, today_str()) == '8'
def assert_deleted(self): storage_dir = os.path.join(self.storage_dir, today_str()) assert not os.path.isdir(storage_dir)
def incr_bookmark_add(self, num=1): self.redis.hincrby(self.BOOKMARK_ADD_KEY, today_str(), num)
def incr_browser(self, browser_id): browser_key = self.BROWSERS_KEY.format(browser_id) self.redis.hincrby(browser_key, today_str(), 1)
def test_stats(self): assert self.redis.hget(Stats.BOOKMARK_ADD_KEY, today_str()) == '11' assert self.redis.hget(Stats.BOOKMARK_MOD_KEY, today_str()) == '1' # only includes explicit deletions or from list deletion assert self.redis.hget(Stats.BOOKMARK_DEL_KEY, today_str()) == '3'
def incr_bookmark_mod(self): self.redis.hincrby(self.BOOKMARK_MOD_KEY, today_str(), 1)
def incr_bookmark_del(self): self.redis.hincrby(self.BOOKMARK_DEL_KEY, today_str(), 1)
def test_stats(self): today = today_str() assert int(self.redis.hget(Stats.TEMP_MOVE_COUNT_KEY, today)) == 1 assert int(self.redis.hget(Stats.TEMP_MOVE_SIZE_KEY, today)) > 0
def setup_class(cls, extra_config_file='test_no_invites_config.yaml', init_anon=True, **kwargs): super(BaseWRTests, cls).setup_class() cls.warcs_dir = to_path(cls.root_dir + '/warcs/') cls.storage_dir = os.path.join(to_path(cls.root_dir + '/storage/')) os.makedirs(cls.warcs_dir) os.environ['RECORD_ROOT'] = cls.warcs_dir os.environ['STORAGE_ROOT'] = cls.storage_dir cls.storage_today = os.path.join(cls.storage_dir, today_str()) os.environ['WR_CONFIG'] = 'pkg://webrecorder/config/wr.yaml' if extra_config_file: os.environ['WR_USER_CONFIG'] = os.path.join(cls.get_curr_dir(), extra_config_file) os.environ['REDIS_BASE_URL'] = 'redis://*****:*****@localhost') cls.set_nx_env('EMAIL_SMTP_URL', 'smtp://[email protected]:test@localhost:25') cls.set_nx_env('NO_REMOTE_BROWSERS', '1') def load_wr_config(): config = load_overlay_config('WR_CONFIG', 'pkg://webrecorder/config/wr.yaml', 'WR_USER_CONFIG', '') config['dyn_stats_key_templ'] = { 'rec': 'r:{rec}:<sesh_id>:stats:', 'coll': 'c:{coll}:<sesh_id>:stats:' } config['dyn_ref_templ'] = { 'rec': 'r:{rec}:<sesh_id>:ref:', 'coll': 'c:{coll}:<sesh_id>:ref:', } return config import webrecorder.maincontroller webrecorder.maincontroller.load_wr_config = load_wr_config cls.redis = FakeStrictRedis.from_url(os.environ['REDIS_BASE_URL'], decode_responses=True) cls.sesh_redis = FakeStrictRedis.from_url(os.environ['REDIS_SESSION_URL'], decode_responses=True) cls.custom_init(kwargs) if kwargs.get('no_app'): return cls.maincont = MainController() cls.testapp = webtest.TestApp(cls.maincont.app) if init_anon: res = cls.testapp.post('/api/v1/auth/anon_user') cls.anon_user = res.json['user']['username'] cls.assert_temp_user_sesh(cls.anon_user) else: cls.anon_user = None
def test_stats(self): assert self.redis.exists(Stats.SOURCES_KEY.format('ia')) assert int(self.redis.hget(Stats.SOURCES_KEY.format('ia'), today_str())) > 0 assert int(self.redis.hget(Stats.PATCH_TEMP_KEY, today_str())) > 0
def test_browser_stats(self): assert self.redis.keys(Stats.BROWSERS_KEY.format('*')) == [Stats.BROWSERS_KEY.format('chrome:60')] assert self.redis.hget(Stats.BROWSERS_KEY.format('chrome:60'), today_str()) == '4'
def test_browser_stats(self): assert self.redis.keys(Stats.BROWSERS_KEY.format('*')) == [ Stats.BROWSERS_KEY.format('chrome:60') ] assert self.redis.hget(Stats.BROWSERS_KEY.format('chrome:60'), today_str()) == '4'