Пример #1
0
class Metrics(object):
    def __init__(self, app=None):
        self.app = app
        if app is not None:
            self.init_app(app)

    def init_app(self, app):
        self.t = Timeseries(app.mxcache.redis_conn(),
           type='series', read_func=float, intervals={
                'minute': {
                    'step': 60,         # 60 seconds
                    'steps': 12 * 60,   # last 12 hours
                }
            })
        app.metrics = self

    def update(self, metrics):
        for f in MX_FIELDS:
            try:
                self.t.insert(f, metrics[f])
            except AttributeError:
                # default cache does not allow append
                pass

    def get_latest(self, minutes=5):
        now = float(time.time())
        joined_data = {}
        for f in MX_FIELDS:
            series_data = self.t.series(f, 'minute', start=now - minutes * 60, end=now)
            joined_data[f] = series_data
        return joined_data
Пример #2
0
    def __init__(self, name, config):
        self._count = 0
        self._name = name
        self._host = config.pop('host', 'sqlite:///:memory:')
        self._rolling = config.pop('rolling', 0)
        self._generator = config.pop('generator', None)

        config.setdefault('type', 'count')
        config.setdefault('write_func', long_or_float)
        config.setdefault('read_func', long_or_float)
        self._transform = config.get('transform')

        # parse the patterns and bind the Schema.match function
        # TODO: optimize this binding even further to reduce lookups at runtime
        self._patterns = config.pop('match', [])
        if isinstance(self._patterns, (tuple, list)):
            if len(self._patterns) != 1:
                self._patterns = [re.compile(x) for x in self._patterns]
                self.match = self._match_list
            else:
                self._patterns = re.compile(self._patterns[0])
                self.match = self._match_single
        else:
            self._patterns = re.compile(self._patterns)
            self.match = self._match_single

        self.config = config
        self.timeseries = Timeseries(self._host, **config)

        # Bind some of the timeseries methods to this for convenience
        self.list = self.timeseries.list
        self.properties = self.timeseries.properties
        self.iterate = self.timeseries.iterate
Пример #3
0
    def __init__(self, **kwargs):
        self.client = None
        try:
            self.client = redis.StrictRedis(host=kwargs['host'],
                                            port=kwargs['port'],
                                            db=kwargs['db'])
            self.client.ping()
            logging.debug('Redis host=%s,port=%s,db=%d- Connected!',
                          kwargs['host'], kwargs['port'], kwargs['db'])
        except Exception as ex:
            self.client = None
            logging.error("Redis host=%s,port=%s,db=%d- Error %s", ex,
                          kwargs['host'], kwargs['port'], kwargs['db'])
            pass

        self.ts = None
        if self.client != None:
            logging.debug('Timeseries - Create')
            if 'timeseries' in kwargs:
                self.ts = Timeseries(self.client,
                                     type='gauge',
                                     intervals=kwargs['timeseries'])
            else:
                self.ts = Timeseries(
                    self.client,
                    type='gauge',
                    intervals={
                        'seconds': {
                            'step': 5,  # 5 seconds
                            'steps': 120,  # last 10 minutes
                            'read_cast': float,
                        }
                    })
Пример #4
0
 def init_app(self, app):
     self.t = Timeseries(
         app.mxcache.redis_conn(),
         type='series',
         read_func=float,
         intervals={
             'minute': {
                 'step': 60,  # 60 seconds
                 'steps': 12 * 60,  # last 12 hours
             }
         })
     app.metrics = self
Пример #5
0
 def __init__(self, redis_client, key_prefix):
     """Create our counter structure."""
     if not key_prefix:
         raise ValueError('key_prefix must be provided and length must be >0')
     
     self.counters = Timeseries(redis_client, {
             'minute': {
                     'step': 60,              # 60 seconds
                     'steps': 60,             # last hour
                     'count_only' : True,    # store counts only.
                 },
             'hour': {
                     'step': 3600,           # Hourly
                     'steps': 24,            # Last day
                     'count_only' : True,    # Store counts only.
                 },
             'daily': {
                     'step': 86400,          # Daily
                     'steps': 30,            # Last 30 days
                     'count_only': True,     # Store counts only.
                 },
         }, 
         key_prefix=key_prefix)
     self.key_prefix = key_prefix
     self.redis_client = redis_client
Пример #6
0
  def __init__(self, name, config):
    self._count = 0
    self._name = name
    self._host = config.pop('host', 'sqlite:///:memory:')
    self._rolling = config.pop('rolling', 0)
    self._generator = config.pop('generator',None)

    config.setdefault('type', 'count')
    config.setdefault('write_func', long_or_float)
    config.setdefault('read_func', long_or_float)
    self._transform = config.get('transform')

    # parse the patterns and bind the Schema.match function
    # TODO: optimize this binding even further to reduce lookups at runtime
    self._patterns = config.pop('match', [])
    if isinstance(self._patterns, (tuple,list)):
      if len(self._patterns) != 1:
        self._patterns = [ re.compile(x) for x in self._patterns ]
        self.match = self._match_list
      else:
        self._patterns = re.compile(self._patterns[0])
        self.match = self._match_single
    else:
      self._patterns = re.compile(self._patterns)
      self.match = self._match_single

    self.config = config
    self.timeseries = Timeseries(self._host, **config)

    # Bind some of the timeseries methods to this for convenience
    self.list = self.timeseries.list
    self.properties = self.timeseries.properties
    self.iterate = self.timeseries.iterate
Пример #7
0
  def __init__(self, name, config):
    self._name = name
    self._host = config.pop('host', 'redis://localhost:6379/0')

    config.setdefault('type', 'count')
    config.setdefault('write_func', long_or_float)
    config.setdefault('read_func', long_or_float)
    self._transform = config.get('transform')

    # parse the patterns and bind the Schema.match function
    # TODO: optimize this binding even further to reduce lookups at runtime
    self._patterns = config.pop('match', [])
    if isinstance(self._patterns, (tuple,list)):
      if len(self._patterns) != 1:
        self._patterns = [ re.compile(x) for x in self._patterns ]
        self.match = self._match_list
      else:
        self._patterns = re.compile(self._patterns[0])
        self.match = self._match_single
    else:
      self._patterns = re.compile(self._patterns)
      self.match = self._match_single

    self._client = self._init_client()

    # TODO: Remove the need for this and add accessors to Schema or Kairos
    self.config = config
    self.timeseries = Timeseries(self._client, **config)
Пример #8
0
class EventCounter(object):
    """Manage event counters."""
    def __init__(self, redis_client, key_prefix):
        """Create our counter structure."""
        if not key_prefix:
            raise ValueError('key_prefix must be provided and length must be >0')
        
        self.counters = Timeseries(redis_client, {
                'minute': {
                        'step': 60,              # 60 seconds
                        'steps': 60,             # last hour
                        'count_only' : True,    # store counts only.
                    },
                'hour': {
                        'step': 3600,           # Hourly
                        'steps': 24,            # Last day
                        'count_only' : True,    # Store counts only.
                    },
                'daily': {
                        'step': 86400,          # Daily
                        'steps': 30,            # Last 30 days
                        'count_only': True,     # Store counts only.
                    },
            }, 
            key_prefix=key_prefix)
        self.key_prefix = key_prefix
        self.redis_client = redis_client

    def record_hit(self, item_id):
        """Record a hit.  All this does is increments the current time bucket."""
        self.counters.insert(item_id, 1)
    
    def get_counts(self, interval_name, item_id, n_recent_periods):
        """
        Return the n most recent periods for a given interval and item_id as a list.
        Items are returned oldest first; most recent entry is last in list.
        """
        series = self.counters.series(item_id, interval_name, steps=n_recent_periods, condensed=False)
        return series.values()
    
    def get_sum(self, interval_name, item_id, n_recent_periods):
        """Return the sum of the counts for the most recent periods."""
        return sum(self.get_counts(interval_name, item_id, n_recent_periods))
    
    def delete_keys(self):
        """Clean up (delete) all keys used by this instance."""
        raise NotImplementedError() 
Пример #9
0
 def init_app(self, app):
     self.t = Timeseries(app.mxcache.redis_conn(),
        type='series', read_func=float, intervals={
             'minute': {
                 'step': 60,         # 60 seconds
                 'steps': 12 * 60,   # last 12 hours
             }
         })
     app.metrics = self
Пример #10
0
class Metrics(object):
    def __init__(self, app=None):
        self.app = app
        if app is not None:
            self.init_app(app)

    def init_app(self, app):
        self.t = Timeseries(
            app.mxcache.redis_conn(),
            type='series',
            read_func=float,
            intervals={
                'minute': {
                    'step': 60,  # 60 seconds
                    'steps': 12 * 60,  # last 12 hours
                }
            })
        app.metrics = self

    def update(self, metrics):
        for f in MX_FIELDS:
            try:
                self.t.insert(f, metrics[f])
            except AttributeError:
                # default cache does not allow append
                pass

    def get_latest(self, minutes=5):
        now = float(time.time())
        joined_data = {}
        for f in MX_FIELDS:
            series_data = self.t.series(f,
                                        'minute',
                                        start=now - minutes * 60,
                                        end=now)
            joined_data[f] = series_data
        return joined_data
Пример #11
0
class TimeseriesManager(object):
    client = REDIS_CLIENT

    INTERVALS= {
        'hour': {
            'step': 60 * 60,  # 60 minutes
            'steps': 24, # last 24 hours
        },
        'day': {
            'step': 60 * 60 * 24, # 1 Day
            'steps': 7 # Last 1 week
        },
        'week': {
            'step': 60 * 60 * 24 * 7, # 1 Week
            'steps': 4 # Last 1 Month (~4 weeks)
        },
        'month': {
            'step': 60 * 60 * 24 * 7 * 4, # 1 Month (~4 weeks)
            'step': 3 # Last 1 'Quarter'
        }
    }

    def __init__(self, team_id, series_type):
        self.team_id = team_id
        self.series = Timeseries(self.client, type=series_type, intervals=self.INTERVALS)

    def gen_cache_key(self, stat_name):
        return "%s~%s" % (self.team_id, stat_name)

    def insert(self, stat_name, value):
        cache_key = self.gen_cache_key(stat_name)
        self.series.insert(cache_key, value, timestamp=time.time())

    def query(self, stat_name, interval):
        cache_key = self.gen_cache_key(stat_name)
        return self.series.series(cache_key, interval)
Пример #12
0
def start(ctx, argv):
    """
        Called once on script startup, before any other events.
    """
    global client
    global reqTable
    global db
    global respTable
    global timeSeries
    client = MongoClient()
    db = client.mydb
    reqTable = db.reqTable
    respTable = db.respTable
    timeSeries = Timeseries(client,
                            type='histogram',
                            read_func=float,
                            intervals={'minute': {
                                'step': 60,
                                'steps': 120,
                            }})
    ctx.log("start")
Пример #13
0
class RedisTimeSeries():
    def __init__(self, **kwargs):
        self.client = None
        try:
            self.client = redis.StrictRedis(host=kwargs['host'],
                                            port=kwargs['port'],
                                            db=kwargs['db'])
            self.client.ping()
            logging.debug('Redis host=%s,port=%s,db=%d- Connected!',
                          kwargs['host'], kwargs['port'], kwargs['db'])
        except Exception as ex:
            self.client = None
            logging.error("Redis host=%s,port=%s,db=%d- Error %s", ex,
                          kwargs['host'], kwargs['port'], kwargs['db'])
            pass

        self.ts = None
        if self.client != None:
            logging.debug('Timeseries - Create')
            if 'timeseries' in kwargs:
                self.ts = Timeseries(self.client,
                                     type='gauge',
                                     intervals=kwargs['timeseries'])
            else:
                self.ts = Timeseries(
                    self.client,
                    type='gauge',
                    intervals={
                        'seconds': {
                            'step': 5,  # 5 seconds
                            'steps': 120,  # last 10 minutes
                            'read_cast': float,
                        }
                    })

    def record_hit(self, key, measurement):
        if self.client:
            self.ts.insert(str(key), float(measurement))

    def record_response_time(self, content_id, measurement):
        self.record_hit(str(content_id) + ':rt', float(measurement))

    def record_status(self, content_id, measurement):
        self.record_hit(str(content_id) + ':status', float(measurement))

    def get_timeseries(self, key):
        if self.client != None:
            #logging.info("properties: %s", str(self.ts.properties(str(content_id) + ':' + str(monitor_id))) )
            return self.ts.series(str(key), 'seconds')

        return None

    def get_response_time_timeseries(self, content_id):
        #logging.info("properties: %s", str(self.ts.properties(str(content_id) + ':' + str(monitor_id))) )
        return self.get_timeseries(str(content_id) + ':rt')

    def get_status_timeseries(self, content_id):
        #logging.info("properties: %s", str(self.ts.properties(str(content_id) + ':' + str(monitor_id))) )
        return self.get_timeseries(str(content_id) + ':status')

    def get_timeseries_avg(self, key):
        #logging.info("properties: %s", str(self.ts.properties(str(content_id) + ':' + str(monitor_id))) )
        series = []
        avg = 0.0
        if self.client != None:
            series = self.ts.series(str(key), 'seconds')

            sum = 0.0
            count = 0.0
            for key, value in series.items():
                if value:
                    sum += float(value)
                    count += 1.0

            if count > 0.0:
                avg = sum / count

        logging.debug('serie avg: %f', avg)
        return avg

    def get_response_time_avg(self, content_id):
        return self.get_timeseries_avg(str(content_id) + ':rt')
Пример #14
0
 def __init__(self, team_id, series_type):
     self.team_id = team_id
     self.series = Timeseries(self.client, type=series_type, intervals=self.INTERVALS)
Пример #15
0
class Schema(object):
    '''
  Implements the schema and associated data processing for data points.
  '''
    def __init__(self, name, config):
        self._count = 0
        self._name = name
        self._host = config.pop('host', 'sqlite:///:memory:')
        self._rolling = config.pop('rolling', 0)
        self._generator = config.pop('generator', None)

        config.setdefault('type', 'count')
        config.setdefault('write_func', long_or_float)
        config.setdefault('read_func', long_or_float)
        self._transform = config.get('transform')

        # parse the patterns and bind the Schema.match function
        # TODO: optimize this binding even further to reduce lookups at runtime
        self._patterns = config.pop('match', [])
        if isinstance(self._patterns, (tuple, list)):
            if len(self._patterns) != 1:
                self._patterns = [re.compile(x) for x in self._patterns]
                self.match = self._match_list
            else:
                self._patterns = re.compile(self._patterns[0])
                self.match = self._match_single
        else:
            self._patterns = re.compile(self._patterns)
            self.match = self._match_single

        self.config = config
        self.timeseries = Timeseries(self._host, **config)

        # Bind some of the timeseries methods to this for convenience
        self.list = self.timeseries.list
        self.properties = self.timeseries.properties
        self.iterate = self.timeseries.iterate

    @property
    def name(self):
        return self._name

    @property
    def host(self):
        return self._host

    @property
    def count(self):
        return self._count

    def generate(self):
        if self._generator:
            stat, value = self._generator()
            return stat, value, time.time()
        return None

    def store(self, stat, val, timestamp=None):
        '''
    Store a value in this schema.
    '''
        if self.match(stat):
            if self._transform:
                stat, val = self._transform(stat, val)
                if stat is None:
                    return False
            self._count += 1
            self.timeseries.insert(stat,
                                   val,
                                   timestamp,
                                   intervals=self._rolling)
            return True
        return False

    def _match_single(self, stat):
        '''
    Used for when schema implements a single regular expression, returns
    True if the stat matches this schema, False otherwise.
    '''
        if isinstance(stat, (list, tuple)):
            matches = filter(None, [self._patterns.search(s) for s in stat])
            return len(matches) == len(stat)
        return self._patterns.search(stat) is not None

    def _match_list(self, stat):
        '''
    Used for when schema implements several regular expressions, returns
    True if the stat matches this schema, False otherwise.
    '''
        matches = set()
        for pattern in self._patterns:
            if isinstance(stat, (list, tuple)):
                for s in stat:
                    if pattern.search(s):
                        matches.add(s)
                if len(matches) == len(stat):
                    return True
            elif pattern.search(stat):
                return True
        return False
Пример #16
0
class AnalyticsManager:
    def __init__(self,
                 client,
                 loadFromDB=LOAD_DB,
                 bulk_insert=BULK_INSERT,
                 track_users=T_USER,
                 track_words=T_WORDS):
        """
        TODO: config persistence
        TODO: populate memory from database
        """

        # Redis server to send data to
        self._client = client
        self._log = setup_log('manager')

        # database
        if loadFromDB:
            try:
                self.dbclient = MongoClient(MONGO_URL)
                self._DB = dbclient[MONGO_DB]
                self.load()
            except:
                self._log.error('could not connect to MongoDB.')

        self._intervals = {
            'second': {
                'step': 1,  # one second
                'steps': 60 * 60 * 24,  # keep for 1 day
            },
            'minute': {
                'step': 60,  # 60 seconds
                'steps': 60 * 24 * 3,  # keep for 3 days
            },
            'hour': {
                'step': '1h',  # one hour
                'steps': 30 * 24  # keep for 1 month
            },
            'day': {
                'step': '1d',  # one day
                'steps': 90  # keep for 3 month
            }
        }

        self._inserts = defaultdict(Counter)
        self._insert_lock = threading.Lock()
        self._n_inserts = 0
        self.bulk_insert = bulk_insert
        self._bulk_size = 10000
        self._mini_batch_size = 16
        self._pipelined = 0
        self.track_users = track_users
        self.track_words = track_words

        # Series
        self.events = Timeseries(self._client,
                                 type='count',
                                 intervals=self._intervals)

        # Online users, consider offline after 2 minutes
        self.users = Timeseries(
            self._client,
            type='count',
            intervals={'second2': {
                'step': 1,
                'steps': 20
            }})

        # Effective words, keep for 1 month
        self.words = Timeseries(
            self._client,
            type='histogram',
            intervals={'month': {
                'step': '30d',
                'steps': 3
            }})

    def store_event(self, event):
        """
        parameters:
            event: {
            campID: (int)
            etype: event type (str). pview, imp, or click.
            timestamp: time of event (int), (default) current time.
            words: list of words related to this event, (default) [].
            }

        returns:
            0 if success, otherwise -1 and { error: "message" }
        """
        try:
            campID, etype = event['campID'], event['etype']
            timestamp = event.get('timestamp', time.time())
            words = event.get('words', [])
            eventKey = '%d:%s' % (campID, etype)
            userKey = '%d:online' % (campID)
        except:
            msg = 'wrong event structure. type: %s, event: %s' % (type(event),
                                                                  event)
            self._log.error(msg)
            return (-1, {'error': msg})

        if etype not in ['pview', 'imp', 'click']:
            msg = 'wrong event type "%s", valid values: pview, imp, click.' % etype
            self._log.error(msg)
            return (-1, {'error': msg})

        # Bulk insert
        if self.bulk_insert:
            temp_inserts = None
            with self._insert_lock:
                self._inserts[timestamp][eventKey] += 1
                self._n_inserts += 1

                if self._n_inserts >= self._bulk_size:
                    try:
                        self.events.bulk_insert({
                            t: {k: [v2]
                                for k, v2 in v.items()}
                            for t, v in self._inserts.items()
                        })
                        self._inserts = defaultdict(Counter)
                        self._n_inserts = 0
                    except Exception as e:
                        msg = '%s: %s' % (type(e), e)
                        self._log.error(msg)
                        return (-1, {'error': msg})

        # Single insert
        else:
            try:
                self.events.insert(eventKey, timestamp=timestamp)
                '''self._pipelined += 1
                if self._pipelined >= self._bulk_size:
                    self.events.execute()
                    self._pipelined = 0'''

                # only count the user if the event is page view
                if self.track_users and etype == 'pview':
                    self.users.insert(userKey, timestamp=timestamp)

                # only count the word if it led to impression or click
                if self.track_words and etype in ['imp', 'click']:
                    self.words.insert(campID, words, timestamp=timestamp)

            except Exception as e:
                msg = '%s: %s' % (type(e), e)
                self._log.error(msg)
                return (-1, {'error': msg})

        return (0, {'status': 'SUCCESS'})

    def get_camp_data(self, event):
        """
        parameters:
            event: {
            campID: (int)
            etype: event type (str). pview, imp, or click,
            interval: (str) second, minute, or hour. (default) minute,
            start: (int) timestamp of the beginning of interval, (default) None,
            end: (int) timestamp of the end of interval, (default) None,
            get_users: (bool) get online users. (default) True,
            get_camp_words: (bool) effective words for this campaign. (default) False,
            get_all_words: (bool) effective words for all campaigns. (default) False
            }

        returns:
            code (int) 0 for success, -1 for failure.
            if success, result:
                {
                  data: (list) (timestamp, event_count) tuples,
                  users: (int) count of online users,
                  camp_words: (list) most effective words for this campaign,
                  all_words: (list) most effective words of all campaigns
                }
            if failure: { error: "error message" }

        TODO: check if campID exists
        """
        try:
            campID, etype = event['campID'], event['etype']
            interval = event.get('interval', 'minute')
            start = event.get('start', 0)
            end = event.get('end', time.time())
            get_users = event.get('get_users', True)
            get_camp_words = event.get('get_camp_words', False)
            get_all_words = event.get('get_all_words', False)
            eventKey = '%d:%s' % (campID, etype)
            userKey = '%d:online' % (campID)

        except Exception as e:
            msg = 'wrong event structure. %s' % e
            self._log.error(msg)
            return (-1, {'error': msg})

        data = []

        if etype not in ['pview', 'imp', 'click']:
            msg = 'wrong event type "%s",\
                    valid values: pview, imp, click.' % etype
            self._log.error(msg)
            return (-1, {'error': msg})

        result = {}

        try:
            data = self.events.series(eventKey,
                                      interval=interval,
                                      start=start,
                                      end=end)
            result['data'] = list(data.items())

            if self.track_users and get_users:
                users = self.users.series(userKey, interval='second2')
                users = sum(users.values())
                result['users'] = users

            if self.track_words and get_camp_words:
                camp_words = self.words.series(campID, 'month')
                sorted_words = sorted([(w, c) for w, c in camp_words.items()],
                                      key=lambda t: t[1])
                result['camp_words'] = sorted_words

            if self.track_words and get_all_words:
                all_camps = self.words.list()
                all_camps_words = self.words.series(all_camps, 'month')
                sorted_words = sorted([(w, c)
                                       for w, c in all_camps_words.items()],
                                      key=lambda t: t[1])
                result['all_words'] = sorted_words

        except UnknownInterval as e:
            msg = 'wrong interval type "%s",\
                    valid values: second, minute, hour.' % str(interval)
            self._log.error(msg)
            return (-1, {'error': msg})

        except Exception as e:
            msg = '%s: %s.' % (type(e), e)
            self._log.error(msg)
            return (-1, {'error': msg})

        return (0, result)

    def flush(self):
        """
        Deletes all data stored in redis
        """
        self._log.info('Removing all data from database.')
        self.events.delete_all()
        self.users.delete_all()
        self.words.delete_all()

    def get_lost_data():
        """
        Reloads the lost data from the main database if Redis crashed.
        """
        # get the latest timestamp in Redis
        props = self.events.properties()
        latest_timestamp = props['second']['last']

        mongo_cl = MongoClient(MONGO_URL)
        db = mongo_cl[MONGO_DB]
        coll = db[MONGO_COLLECTION]

        # get data more recent than latest_timestamp
        data = coll.find({'timestamp': {'$gt': latest_timestamp}})

        #mapping event types from numbers to string
        etype_str = {0: '', 1: '', 2: ''}

        for event in data:
            timestamp, etype = event['timestamp'], etype_str[event['type']]
            campID = event['campaign']
            eventKey = '%d:%s' % (campID, etype)

            self._inserts[timestamp][eventKey] += 1
            self._n_inserts += 1

            if data.count - data.retrieved <= 0:
                done = True

            if self._n_inserts >= self._bulk_size or done:
                try:
                    self.events.bulk_insert({
                        t: {k: [v2]
                            for k, v2 in v.items()}
                        for t, v in self._inserts.items()
                    })
                    self._inserts = defaultdict(Counter)
                    self._n_inserts = 0
                except Exception as e:
                    msg = '%s: %s' % (type(e), e)
                    self._log.error(msg)
                    return -1
        return 0

    def load(self):
        """
        load data from the database when the cache manager is up and running
        """
        pass
Пример #17
0
class Schema(object):
  '''
  Implements the schema and associated data processing for data points.
  '''

  def __init__(self, name, config):
    self._count = 0
    self._name = name
    self._host = config.pop('host', 'sqlite:///:memory:')
    self._rolling = config.pop('rolling', 0)
    self._generator = config.pop('generator',None)

    config.setdefault('type', 'count')
    config.setdefault('write_func', long_or_float)
    config.setdefault('read_func', long_or_float)
    self._transform = config.get('transform')

    # parse the patterns and bind the Schema.match function
    # TODO: optimize this binding even further to reduce lookups at runtime
    self._patterns = config.pop('match', [])
    if isinstance(self._patterns, (tuple,list)):
      if len(self._patterns) != 1:
        self._patterns = [ re.compile(x) for x in self._patterns ]
        self.match = self._match_list
      else:
        self._patterns = re.compile(self._patterns[0])
        self.match = self._match_single
    else:
      self._patterns = re.compile(self._patterns)
      self.match = self._match_single

    self.config = config
    self.timeseries = Timeseries(self._host, **config)

    # Bind some of the timeseries methods to this for convenience
    self.list = self.timeseries.list
    self.properties = self.timeseries.properties
    self.iterate = self.timeseries.iterate

  @property
  def name(self):
    return self._name

  @property
  def host(self):
    return self._host

  @property
  def count(self):
    return self._count

  def generate(self):
    if self._generator:
      stat,value = self._generator()
      return stat,value,time.time()
    return None

  def store(self, stat, val, timestamp=None):
    '''
    Store a value in this schema.
    '''
    if self.match(stat):
      if self._transform:
        stat,val = self._transform(stat,val)
        if stat is None:
          return False
      self._count += 1
      self.timeseries.insert(stat, val, timestamp, intervals=self._rolling)
      return True
    return False

  def _match_single(self, stat):
    '''
    Used for when schema implements a single regular expression, returns
    True if the stat matches this schema, False otherwise.
    '''
    if isinstance(stat,(list,tuple)):
      matches = filter(None, [self._patterns.search(s) for s in stat] )
      return len(matches)==len(stat)
    return self._patterns.search(stat) is not None

  def _match_list(self, stat):
    '''
    Used for when schema implements several regular expressions, returns
    True if the stat matches this schema, False otherwise.
    '''
    matches = set()
    for pattern in self._patterns:
      if isinstance(stat,(list,tuple)):
        for s in stat:
          if pattern.search(s):
            matches.add(s)
        if len(matches)==len(stat):
          return True
      elif pattern.search(stat):
        return True
    return False
Пример #18
0
    def __init__(self,
                 client,
                 loadFromDB=LOAD_DB,
                 bulk_insert=BULK_INSERT,
                 track_users=T_USER,
                 track_words=T_WORDS):
        """
        TODO: config persistence
        TODO: populate memory from database
        """

        # Redis server to send data to
        self._client = client
        self._log = setup_log('manager')

        # database
        if loadFromDB:
            try:
                self.dbclient = MongoClient(MONGO_URL)
                self._DB = dbclient[MONGO_DB]
                self.load()
            except:
                self._log.error('could not connect to MongoDB.')

        self._intervals = {
            'second': {
                'step': 1,  # one second
                'steps': 60 * 60 * 24,  # keep for 1 day
            },
            'minute': {
                'step': 60,  # 60 seconds
                'steps': 60 * 24 * 3,  # keep for 3 days
            },
            'hour': {
                'step': '1h',  # one hour
                'steps': 30 * 24  # keep for 1 month
            },
            'day': {
                'step': '1d',  # one day
                'steps': 90  # keep for 3 month
            }
        }

        self._inserts = defaultdict(Counter)
        self._insert_lock = threading.Lock()
        self._n_inserts = 0
        self.bulk_insert = bulk_insert
        self._bulk_size = 10000
        self._mini_batch_size = 16
        self._pipelined = 0
        self.track_users = track_users
        self.track_words = track_words

        # Series
        self.events = Timeseries(self._client,
                                 type='count',
                                 intervals=self._intervals)

        # Online users, consider offline after 2 minutes
        self.users = Timeseries(
            self._client,
            type='count',
            intervals={'second2': {
                'step': 1,
                'steps': 20
            }})

        # Effective words, keep for 1 month
        self.words = Timeseries(
            self._client,
            type='histogram',
            intervals={'month': {
                'step': '30d',
                'steps': 3
            }})
Пример #19
0
from kairos import Timeseries
import pymongo

client = pymongo.MongoClient('localhost')
t = Timeseries(client, type='histogram', read_func=float, intervals={
  'minute':{
    'step':60,            # 60 seconds
    'steps':120,          # last 2 hours
  }
})

# t.insert('example', 3.14159)
# t.insert('example', 2.71828)
# t.insert('example', 2.71828)
# t.insert('example', 3.71828)
# t.insert('example', 4.71828)
# t.insert('example', 5.71828)
t.insert('example', 6.71828)
t.insert('example', 7.71828)
print t.get('example', 'minute')
Пример #20
0
from kairos import Timeseries
import pymongo

client = pymongo.MongoClient('localhost')
t = Timeseries(
    client,
    type='histogram',
    read_func=float,
    intervals={
        'minute': {
            'step': 60,  # 60 seconds
            'steps': 120,  # last 2 hours
        }
    })

# t.insert('example', 3.14159)
# t.insert('example', 2.71828)
# t.insert('example', 2.71828)
# t.insert('example', 3.71828)
# t.insert('example', 4.71828)
# t.insert('example', 5.71828)
t.insert('example', 6.71828)
t.insert('example', 7.71828)
print t.get('example', 'minute')
Пример #21
0
class Schema(object):
  '''
  Implements the schema and associated data processing for data points.
  '''

  def __init__(self, name, config):
    self._name = name
    self._host = config.pop('host', 'redis://localhost:6379/0')

    config.setdefault('type', 'count')
    config.setdefault('write_func', long_or_float)
    config.setdefault('read_func', long_or_float)
    self._transform = config.get('transform')

    # parse the patterns and bind the Schema.match function
    # TODO: optimize this binding even further to reduce lookups at runtime
    self._patterns = config.pop('match', [])
    if isinstance(self._patterns, (tuple,list)):
      if len(self._patterns) != 1:
        self._patterns = [ re.compile(x) for x in self._patterns ]
        self.match = self._match_list
      else:
        self._patterns = re.compile(self._patterns[0])
        self.match = self._match_single
    else:
      self._patterns = re.compile(self._patterns)
      self.match = self._match_single

    self._client = self._init_client()

    # TODO: Remove the need for this and add accessors to Schema or Kairos
    self.config = config
    self.timeseries = Timeseries(self._client, **config)

  @property
  def name(self):
    return self._name

  def store(self, stat, val, timestamp=None):
    '''
    Store a value in this schema.
    '''
    if self.match(stat):
      if self._transform:
        stat,val = self._transform(stat,val)
        if stat is None:
          return False
      self.timeseries.insert(stat, val, timestamp)
      return True
    return False

  def _match_single(self, stat):
    '''
    Used for when schema implements a single regular expression, returns
    True if the stat matches this schema, False otherwise.
    '''
    return self._patterns.search(stat) is not None

  def _match_list(self, stat):
    '''
    Used for when schema implements several regular expressions, returns
    True if the stat matches this schema, False otherwise.
    '''
    for pattern in self._patterns:
      if pattern.search(stat):
        return True
    return False

  def _init_client(self):
    '''
    Parse the host URL and initialize a client connection.
    '''
    if not isinstance(self._host, (str,unicode)):
      return self._host


    # To force scheme and netloc behavior. Easily a bug here but let it 
    # go for now
    if '//' not in self._host:
      self._host = '//'+self._host
    location = urlparse(self._host)

    if location.scheme in ('','redis'):
      if ':' in location.netloc:
        host,port = location.netloc.split(':')
      else:
        host,port = location.netloc,6379

      # TODO: better matching here
      if location.path in ('', '/'):
        db = 0
      else:
        db = location.path[1:]

      return Redis(host=host, port=int(port), db=int(db))
      

    raise ValueError("unsupported scheme", location.scheme)