Exemplo n.º 1
0
def init_props(config):
    from webrecorder.models import User, Collection, Recording, Stats
    User.init_props(config)
    Collection.init_props(config)
    Recording.init_props(config)
    Stats.init_props(config)

    import webrecorder.rec.storage.storagepaths as storagepaths
    storagepaths.init_props(config)
Exemplo n.º 2
0
def init_props(config):
    from webrecorder.models import User, Collection, Recording, Stats, Auto
    User.init_props(config)
    Collection.init_props(config)
    Recording.init_props(config)
    Stats.init_props(config)
    Auto.init_props(config)

    import webrecorder.rec.storage.storagepaths as storagepaths
    storagepaths.init_props(config)
Exemplo n.º 3
0
    def __init__(self, *args, **kwargs):
        super(WebRecRedisIndexer, self).__init__(*args, **kwargs)

        self.info_keys = kwargs.get('info_keys', [])
        self.rec_info_key_templ = kwargs.get('rec_info_key_templ')

        config = kwargs['config']

        self.coll_cdxj_key = Collection.COLL_CDXJ_KEY
        self.rec_file_key_template = Recording.REC_WARC_KEY

        self.wam_loader = WAMLoader()

        # set shared wam_loader for CDXJIndexer index writers
        CDXJIndexer.wam_loader = self.wam_loader

        self.stats = Stats(self.redis)
Exemplo n.º 4
0
class WebRecRedisIndexer(WritableRedisIndexer):
    def __init__(self, *args, **kwargs):
        super(WebRecRedisIndexer, self).__init__(*args, **kwargs)

        self.info_keys = kwargs.get('info_keys', [])
        self.rec_info_key_templ = kwargs.get('rec_info_key_templ')

        config = kwargs['config']

        self.coll_cdxj_key = Collection.COLL_CDXJ_KEY
        self.rec_file_key_template = Recording.REC_WARC_KEY

        self.wam_loader = WAMLoader()

        # set shared wam_loader for CDXJIndexer index writers
        CDXJIndexer.wam_loader = self.wam_loader

        self.stats = Stats(self.redis)

    def add_warc_file(self, full_filename, params):
        base_filename = self._get_rel_or_base_name(full_filename, params)
        file_key = res_template(self.file_key_template, params)
        rec_key = res_template(self.rec_file_key_template, params)

        full_load_path = storagepaths.add_local_store_prefix(full_filename)

        self.redis.hset(file_key, base_filename, full_load_path)
        self.redis.sadd(rec_key, base_filename)

    def add_urls_to_index(self, stream, params, filename, length):
        upload_key = params.get('param.upid')
        if upload_key:
            stream = SizeTrackingReader(stream, length, self.redis, upload_key)

        params['writer_cls'] = CDXJIndexer

        cdx_list = (super(WebRecRedisIndexer,
                          self).add_urls_to_index(stream, params, filename,
                                                  length))

        # if replay key exists, add to it as well!
        coll_cdxj_key = res_template(self.coll_cdxj_key, params)
        if self.redis.exists(coll_cdxj_key):
            for cdx in cdx_list:
                if cdx:
                    self.redis.zadd(coll_cdxj_key, 0, cdx)

        dt_now = datetime.utcnow()

        ts_sec = int(dt_now.timestamp())

        with redis_pipeline(self.redis) as pi:
            for key_templ in self.info_keys:
                key = res_template(key_templ, params)
                pi.hincrby(key, 'size', length)
                if cdx_list:
                    pi.hset(key, 'updated_at', ts_sec)
                    if key_templ == self.rec_info_key_templ:
                        pi.hset(key, 'recorded_at', ts_sec)

        self.stats.incr_record(params, length, cdx_list)

        return cdx_list