def init_props(config): from webrecorder.models import User, Collection, Recording, Stats User.init_props(config) Collection.init_props(config) Recording.init_props(config) Stats.init_props(config) import webrecorder.rec.storage.storagepaths as storagepaths storagepaths.init_props(config)
def init_props(config): from webrecorder.models import User, Collection, Recording, Stats, Auto User.init_props(config) Collection.init_props(config) Recording.init_props(config) Stats.init_props(config) Auto.init_props(config) import webrecorder.rec.storage.storagepaths as storagepaths storagepaths.init_props(config)
def __init__(self, *args, **kwargs): super(WebRecRedisIndexer, self).__init__(*args, **kwargs) self.info_keys = kwargs.get('info_keys', []) self.rec_info_key_templ = kwargs.get('rec_info_key_templ') config = kwargs['config'] self.coll_cdxj_key = Collection.COLL_CDXJ_KEY self.rec_file_key_template = Recording.REC_WARC_KEY self.wam_loader = WAMLoader() # set shared wam_loader for CDXJIndexer index writers CDXJIndexer.wam_loader = self.wam_loader self.stats = Stats(self.redis)
class WebRecRedisIndexer(WritableRedisIndexer): def __init__(self, *args, **kwargs): super(WebRecRedisIndexer, self).__init__(*args, **kwargs) self.info_keys = kwargs.get('info_keys', []) self.rec_info_key_templ = kwargs.get('rec_info_key_templ') config = kwargs['config'] self.coll_cdxj_key = Collection.COLL_CDXJ_KEY self.rec_file_key_template = Recording.REC_WARC_KEY self.wam_loader = WAMLoader() # set shared wam_loader for CDXJIndexer index writers CDXJIndexer.wam_loader = self.wam_loader self.stats = Stats(self.redis) def add_warc_file(self, full_filename, params): base_filename = self._get_rel_or_base_name(full_filename, params) file_key = res_template(self.file_key_template, params) rec_key = res_template(self.rec_file_key_template, params) full_load_path = storagepaths.add_local_store_prefix(full_filename) self.redis.hset(file_key, base_filename, full_load_path) self.redis.sadd(rec_key, base_filename) def add_urls_to_index(self, stream, params, filename, length): upload_key = params.get('param.upid') if upload_key: stream = SizeTrackingReader(stream, length, self.redis, upload_key) params['writer_cls'] = CDXJIndexer cdx_list = (super(WebRecRedisIndexer, self).add_urls_to_index(stream, params, filename, length)) # if replay key exists, add to it as well! coll_cdxj_key = res_template(self.coll_cdxj_key, params) if self.redis.exists(coll_cdxj_key): for cdx in cdx_list: if cdx: self.redis.zadd(coll_cdxj_key, 0, cdx) dt_now = datetime.utcnow() ts_sec = int(dt_now.timestamp()) with redis_pipeline(self.redis) as pi: for key_templ in self.info_keys: key = res_template(key_templ, params) pi.hincrby(key, 'size', length) if cdx_list: pi.hset(key, 'updated_at', ts_sec) if key_templ == self.rec_info_key_templ: pi.hset(key, 'recorded_at', ts_sec) self.stats.incr_record(params, length, cdx_list) return cdx_list