示例#1
0
 def __init__(self, key_lookup, component_cache_size, all_cache_size,
              fallback_cache_size):
     self.key_lookup = key_lookup
     self.component = lru_cache(component_cache_size)(key_lookup.component)
     self.fallback = lru_cache(fallback_cache_size)(key_lookup.fallback)
     self.all = lru_cache(all_cache_size)(
         lambda key: list(key_lookup.all(key)))
示例#2
0
文件: cache.py 项目: morepath/reg
 def __init__(self, key_lookup, component_cache_size, all_cache_size,
              fallback_cache_size):
     self.key_lookup = key_lookup
     self.component = lru_cache(component_cache_size)(key_lookup.component)
     self.fallback = lru_cache(fallback_cache_size)(key_lookup.fallback)
     self.all = lru_cache(all_cache_size)(
         lambda key: list(key_lookup.all(key)))
示例#3
0
文件: tests.py 项目: marcosptf/fedora
 def test_partial(self):
     #lru_cache decorator must not crash on functools.partial instances
     def add(a,b):
         return a + b
     from functools import partial
     from repoze.lru import lru_cache
     add_five = partial(add, 5)
     decorated = lru_cache(20)(add_five)
     self.assertEqual(decorated(3), 8)
示例#4
0
 def test_partial(self):
     #lru_cache decorator must not crash on functools.partial instances
     def add(a,b):
         return a + b
     from functools import partial
     from repoze.lru import lru_cache
     add_five = partial(add, 5)
     decorated = lru_cache(20)(add_five)
     self.assertEqual(decorated(3), 8)
示例#5
0
 def __init__(self, host, port=3772, timeout=None, cache_size=50, reconnect=False):
     self.host = host
     self.port = port
     self.timeout = timeout
     cache_size = 100
     self.cache = lru_cache(maxsize=cache_size)
     self.execute = self.cache(self.execute)
     self.reconnect = reconnect
     if not reconnect:
         self.connect()
示例#6
0
 def __init__(self, host, port=3772, timeout=None, cache_size=50, reconnect=False, use_plain_sasl=False):
     self.host = host
     self.port = port
     self.timeout = timeout
     cache_size = 100
     self.cache = lru_cache(maxsize=cache_size)
     self.execute = self.cache(self.execute)
     self.reconnect = reconnect
     self.use_plain_sasl = use_plain_sasl
     if not reconnect:
         self.connect()
示例#7
0
文件: __init__.py 项目: jul/yahi
def shoot( context, group_by,):
    """Produce a dict of the data found in the line.
    and use group_by to  group according to option (that can contain 
    a data_filter)
    * group_by : a lambda returning a Hankyu (dict)
        used to extract the valid informations
    * option.cache : cache strategy (beaker, repoze, dict, fixed, no_cache)
    * option.data_filter : f(data) => bool
        applies to the data dict, 
        if True extract the current data
    * option.diagnose array of string : 
      * match : tells on stderr wich line were rejected
      * rejected : tells stderr wich data were filtered out
    * option.skill : enable costly extraction of : 
        * geo_ip : geoip informations
        * user_agent : user_agent parsing
    * option.log_format : apache_log_combined or lighttpd
        tells the regexp to use to extract the fields from the line
        also used to select the datetime parser
    """
    context.log=dict(error=[],warning=[]) 
    aggregator=Hankyu({})
    if 'user_agent' in context.skill:
        import httpagentparser
    
    look_for = log_pattern[context.log_format].search
    match = None
    dt_format = dt_formater_from_format(date_pattern[context.log_format])
    parse_user_agent = lru_cache(context.cache_size)(normalize_user_agent)
    if "geo_ip" in context.skill:
        from pygeoip import GeoIP
        gi = GeoIP(context.geoip)
        country_by_ip = lru_cache(context.cache_size)(gi.country_code_by_addr)
    _input = fileinput.input(context.files)
    if not context.silent:
        sys.stderr.write("parsing:\n %s\n" % "\n-" . join(context.files))
    try:
        for line in _input:
            match = look_for(line)
            if not context.silent and not _input.lineno() % 10000:
                sys.stderr.write("*")
                
            if match:
                data = match.groupdict()
                if data.get("datetime"):
                    data['_datetime']=dt_format(data["datetime"])

                if 'geo_ip' in context.skill:
                    data.update( {"_country":country_by_ip(data["ip"])})
                if 'user_agent' in context.skill:
                    data.update(
                        parse_user_agent(data["agent"])
                    )
                if context.data_filter and not context.data_filter(data):
                    if "rejected" in context.diagnose:
                        if context.silent:
                            context.log["warning"]+=[ "REJECTED:at %s:%s:%s"%(
                                _input.lineno(),_input.filename(),data) ]
                        else:
                            sys.stderr.write("at %s:%s:" % (
                                _input.lineno(),_input.filename()) )
                            sys.stderr.write("REJECTED:{0}\n".format(data))
                else:
                    aggregator += group_by(data)
            elif "match" in context.diagnose:
                if context.silent:
                    context.log["warning"]+=[
                        "NOTMATCH:at %s:%s:\%s not match" % ( 
                        _input.lineno(),_input.filename(), line)]
                else:
                    sys.stderr.write("at %s:%s:" % ( 
                        _input.lineno(),_input.filename()) )
                    sys.stderr.write("NOT MATCHED:«{0}»\n".format(line))
    except Exception as e:
        if context.silent is True:
            context.log["error"]+=["ARRG(%s):at %s:%s" % (e, _input.lineno(),_input.filename())]
        else:
            sys.stderr.write("ARRG:at %s:%s\n" % ( 
                _input.lineno(),_input.filename()) )
            sys.stderr.write("CONTEXT:match %s:data : %s\n" % (
                match and match.groupdict() or "no_match",data))
            raise Exception(e)
    finally:
        ## causes a problem with stdin/stderr
        #_input.close()
        if not context.silent:
            sys.stderr.write("\n%s lines parsed\n" % _input.lineno())
    return aggregator
示例#8
0
    def __init__(cls, name, bases, namespace):
        for key, value in namespace.items():
            if isinstance(value, FunctionType):
                setattr(cls, key, lru_cache(maxsize=500)(value))

        return super(Cache, cls).__init__(name, bases, namespace)
示例#9
0
import math

from stemming.porter2 import stem
from repoze.lru import lru_cache


SPLIT_RE = re.compile(r'[\n\r\s\t' + string.punctuation + ']')
EMAIL_RE = re.compile(r"<?(mailto:)?[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>?")
URL_RE = re.compile(r'https?://[^ \t\r\n\<]+')

DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')


# cached version of stem
#   maxsize 4096 will use about 128 kb of memory
stem = lru_cache(maxsize=4096)(stem)

class Etiquetador(object):
    def __init__(self, word_min_size=2, min_occurrences=0,
                 max_tags=None, weight_range=None, stopwords=None):

        self.word_min_size = word_min_size
        self.min_occurrences = min_occurrences
        self.max_tags = max_tags
        self.weight_range = weight_range

        self.init_tags()
        self.init_stopwords(stopwords)

    def init_stopwords(self, stopwords):
        stopwords_file_path = os.path.join(DATA_DIR, 'stop_ptbr.txt')