Пример #1
0
    def add_urls_to_index(self, stream, params, filename, length):
        upload_key = params.get('param.upid')
        if upload_key:
            stream = SizeTrackingReader(stream, length, self.redis, upload_key)

        cdx_list = (super(WebRecRedisIndexer,
                          self).add_urls_to_index(stream, params, filename,
                                                  length))

        with redis_pipeline(self.redis) as pi:
            for key_templ in self.size_keys:
                key = res_template(key_templ, params)
                pi.hincrby(key, 'size', length)

                if key_templ == self.rec_info_key_templ and cdx_list:
                    pi.hset(key, 'updated_at', str(int(time.time())))

            # write size to usage hashes
            ts = datetime.now().date().isoformat()

            if 'param.user' in params:
                if params['param.user'].startswith(self.temp_prefix):
                    key = self.temp_usage_key
                    rate_limit_key = self.get_rate_limit_key(params)
                    if rate_limit_key:
                        pi.incrby(rate_limit_key, length)
                        pi.expire(rate_limit_key, self.rate_limit_ttl)

                else:
                    key = self.user_usage_key

                if key:
                    pi.hincrby(key, ts, length)

        return cdx_list
Пример #2
0
    def add_urls_to_index(self, stream, params, filename, length):
        cdx_list = (super(WebRecRedisIndexer,
                          self).add_urls_to_index(stream, params, filename,
                                                  length))

        with redis.utils.pipeline(self.redis) as pi:
            for key_templ in self.size_keys:
                key = res_template(key_templ, params)
                pi.hincrby(key, 'size', length)

                if key_templ == self.rec_info_key_templ and cdx_list:
                    pi.hset(key, 'updated_at', str(int(time.time())))

            # write size to usage hashes
            ts = datetime.now().date().isoformat()

            if 'param.user' in params:
                if params['param.user'].startswith(self.temp_prefix):
                    key = self.temp_usage_key
                else:
                    key = self.user_usage_key

                if key:
                    pi.hincrby(key, ts, length)

        return cdx_list
Пример #3
0
    def allow_new_file(self, filename, params):
        key = res_template(self.info_key, params)

        # ensure recording exists before writing anything
        # if not, abort opening new warc file here
        if not self.redis.exists(key):
            print('Writing skipped, recording does not exist for ' + filename)
            return False

        return True
Пример #4
0
    def _is_write_req(self, req, params):
        if not req or not req.rec_headers or not self.skip_key_template:
            return False

        skip_key = res_template(self.skip_key_template, params)

        if self.redis.get(skip_key) == b'1':
            print('SKIPPING REQ', params.get('url'))
            return False

        return True
Пример #5
0
    def _iter_sources(self, params):
        redis_key_pattern = res_template(self.redis_key_template, params)

        if '*' not in redis_key_pattern:
            keys = [redis_key_pattern.encode('utf-8')]
        else:
            keys = self.scan_keys(redis_key_pattern, params)

        mount_data_list = self._get_mounts(keys)
        source = None

        for key, mount_data in zip(keys, mount_data_list):
            key = key.decode('utf-8')
            if mount_data:
                source = init_index_source(mount_data.decode('utf-8'),
                                           source_list=self.SUPPORTED_SOURCES)

            elif self.mounts_only:
                continue

            else:
                source = self._get_source_for_key(key)

            yield key, source
Пример #6
0
 def create_buffer(self, params, name):
     info_key = res_template(self.info_keys['rec'], params)
     return TempWriteBuffer(self.redis, info_key, name, params['url'])