Пример #1
0
    def __init__(self, pcap_collection_data, max_queue_size, work_queue,
                 interface, custom_tag, return_deep_info, http_filter_json,
                 cache_size, session_size, bpf_filter, timeout, debug):
        """
		构造函数
		:param max_queue_size: 资产队列最大长度
		:param work_queue: 捕获资产数据消息发送队列
		:param interface: 捕获流量的网卡名
		:param custom_tag: 数据标签,用于区分不同的采集引擎
		:param return_deep_info: 是否处理更多信息,包括原始请求、响应头和正文
		:param http_filter_json: HTTP过滤器配置,支持按状态和内容类型过滤
		:param cache_size: 缓存的已处理数据条数,120秒内重复的数据将不会重复采集
		:param session_size: 缓存的HTTP/TCP会话数量,30秒未使用的会话将被自动清除
		:param bpf_filter: 数据包底层过滤器
		:param timeout: 采集程序的运行超时时间,默认为启动后1小时自动退出
		:param debug: 调试开关
		"""
        self.pcap_collection_data = pcap_collection_data
        self.total_msg_num = 0
        self.max_queue_size = max_queue_size
        self.work_queue = work_queue
        self.debug = debug
        self.timeout = timeout
        self.bpf_filter = bpf_filter
        self.cache_size = cache_size
        self.session_size = session_size
        self.http_filter_json = http_filter_json
        self.return_deep_info = return_deep_info
        self.custom_tag = custom_tag
        self.interface = interface
        self.sniffer = pcap.pcap(self.interface,
                                 snaplen=65535,
                                 promisc=True,
                                 timeout_ms=self.timeout,
                                 immediate=False)
        self.sniffer.setfilter(self.bpf_filter)
        self.tcp_stream_cache = Cache(maxsize=self.session_size,
                                      ttl=30,
                                      timer=time.time,
                                      default=None)
        if self.cache_size:
            self.tcp_cache = LRUCache(maxsize=self.cache_size,
                                      ttl=120,
                                      timer=time.time,
                                      default=None)
            self.http_cache = LRUCache(maxsize=self.cache_size,
                                       ttl=120,
                                       timer=time.time,
                                       default=None)
        # http数据分析正则
        self.decode_request_regex = re.compile(
            r'^([A-Z]+) +([^ \r\n]+) +HTTP/\d+(?:\.\d+)?[^\r\n]*(.*?)$', re.S)
        self.decode_response_regex = re.compile(
            r'^HTTP/(\d+(?:\.\d+)?) (\d+)[^\r\n]*(.*?)$', re.S)
        self.decode_body_regex = re.compile(
            rb'<meta[^>]+?charset=[\'"]?([a-z\d\-]+)[\'"]?', re.I)
Пример #2
0
    def __init__(self, work_queue, interface, custom_tag, return_deep_info,
                 http_filter_json, cache_size, session_size, bpf_filter,
                 timeout, debug):
        """
		构造函数
		:param work_queue: 捕获资产数据消息发送队列
		:param interface: 捕获流量的网卡名
		:param custom_tag: 数据标签,用于区分不同的采集引擎
		:param return_deep_info: 是否处理更多信息,包括原始请求、响应头和正文
		:param http_filter_json: HTTP过滤器配置,支持按状态和内容类型过滤
		:param cache_size: 缓存的已处理数据条数,120秒内重复的数据将不会发送Syslog
		:param session_size: 缓存的HTTP/TCP会话数量,16秒未使用的会话将被自动清除
		:param bpf_filter: 数据包底层过滤器
		:param timeout: 采集程序的运行超时时间,默认为启动后1小时自动退出
		:param debug: 调试开关
		"""
        self.work_queue = work_queue
        self.debug = debug
        self.timeout = timeout
        self.bpf_filter = bpf_filter
        self.cache_size = cache_size
        self.session_size = session_size
        self.http_filter_json = http_filter_json
        self.return_deep_info = return_deep_info
        self.custom_tag = custom_tag
        self.interface = interface
        self.pktcap = pyshark.LiveCapture(interface=self.interface,
                                          bpf_filter=self.bpf_filter,
                                          use_json=False,
                                          debug=self.debug)
        if self.session_size:
            self.http_stream_cache = Cache(maxsize=self.session_size,
                                           ttl=16,
                                           timer=time.time,
                                           default=None)
            self.tcp_stream_cache = Cache(maxsize=self.session_size,
                                          ttl=16,
                                          timer=time.time,
                                          default=None)
        if self.cache_size:
            self.http_cache = LRUCache(maxsize=self.cache_size,
                                       ttl=120,
                                       timer=time.time,
                                       default=None)
            self.tcp_cache = LRUCache(maxsize=self.cache_size,
                                      ttl=120,
                                      timer=time.time,
                                      default=None)
        # 检测页面编码的正则表达式
        self.encode_regex = re.compile(
            rb'<meta [^>]*?charset=["\']?([a-z\-\d]+)["\'>]?', re.I)
Пример #3
0
 def __init__(self):
     self.messages = []
     # TODO Implement in memory data handling to avoid unnecessary
     # Database accessing.
     self.words_detail_cache = LRUCache(maxsize=self.__CACHE_MAXSIZE)
     self.words_name_cache = LRUCache(maxsize=self.__CACHE_MAXSIZE)
     self.usage_cache = LRUCache(maxsize=self.__CACHE_MAXSIZE)
     self.article_detail_cache = LRUCache(maxsize=self.__CACHE_MAXSIZE)
     self.article_name_cache = LRUCache(maxsize=self.__CACHE_MAXSIZE)
     self.reference_cache = LRUCache(maxsize=self.__CACHE_MAXSIZE)
Пример #4
0
    def __init__(self, maxsize=0xff, ttl=None):

        self._cache = LRUCache(maxsize, ttl)
Пример #5
0
def cache():
    _cache = LRUCache(maxsize=10)
    for n in range(_cache.maxsize):
        _cache.set(n, n)
    assert _cache.full()
    return _cache
Пример #6
0
# -*- coding: utf-8 -*-
# __file__  : lru_cache.py
# __time__  : 2020/6/30 2:00 下午

from cacheout import LRUCache

lru = LRUCache()
'''
class LRUCache(Cache):
    """
    Like :class:`.Cache` but uses a least-recently-used eviction policy.

    The primary difference with :class:`.Cache` is that cache entries are moved to the
    end of the eviction queue when both :meth:`get` and :meth:`set` are called (as
    opposed to :class:`.Cache` that only moves entries on ``set()``.
    """

    def _get(self, key, default=None):
        value = super()._get(key, default=default)

        if key in self._cache:
            self._cache.move_to_end(key)

        return value
        
    def move_to_end(self, key, last=True):
        """Move an existing element to the end (or beginning if last is false).

        Raise KeyError if the element does not exist.
        """
        link = self.__map[key]