class Metrics(object): def __init__(self, app=None): self.app = app if app is not None: self.init_app(app) def init_app(self, app): self.t = Timeseries(app.mxcache.redis_conn(), type='series', read_func=float, intervals={ 'minute': { 'step': 60, # 60 seconds 'steps': 12 * 60, # last 12 hours } }) app.metrics = self def update(self, metrics): for f in MX_FIELDS: try: self.t.insert(f, metrics[f]) except AttributeError: # default cache does not allow append pass def get_latest(self, minutes=5): now = float(time.time()) joined_data = {} for f in MX_FIELDS: series_data = self.t.series(f, 'minute', start=now - minutes * 60, end=now) joined_data[f] = series_data return joined_data
def __init__(self, name, config): self._count = 0 self._name = name self._host = config.pop('host', 'sqlite:///:memory:') self._rolling = config.pop('rolling', 0) self._generator = config.pop('generator', None) config.setdefault('type', 'count') config.setdefault('write_func', long_or_float) config.setdefault('read_func', long_or_float) self._transform = config.get('transform') # parse the patterns and bind the Schema.match function # TODO: optimize this binding even further to reduce lookups at runtime self._patterns = config.pop('match', []) if isinstance(self._patterns, (tuple, list)): if len(self._patterns) != 1: self._patterns = [re.compile(x) for x in self._patterns] self.match = self._match_list else: self._patterns = re.compile(self._patterns[0]) self.match = self._match_single else: self._patterns = re.compile(self._patterns) self.match = self._match_single self.config = config self.timeseries = Timeseries(self._host, **config) # Bind some of the timeseries methods to this for convenience self.list = self.timeseries.list self.properties = self.timeseries.properties self.iterate = self.timeseries.iterate
def __init__(self, **kwargs): self.client = None try: self.client = redis.StrictRedis(host=kwargs['host'], port=kwargs['port'], db=kwargs['db']) self.client.ping() logging.debug('Redis host=%s,port=%s,db=%d- Connected!', kwargs['host'], kwargs['port'], kwargs['db']) except Exception as ex: self.client = None logging.error("Redis host=%s,port=%s,db=%d- Error %s", ex, kwargs['host'], kwargs['port'], kwargs['db']) pass self.ts = None if self.client != None: logging.debug('Timeseries - Create') if 'timeseries' in kwargs: self.ts = Timeseries(self.client, type='gauge', intervals=kwargs['timeseries']) else: self.ts = Timeseries( self.client, type='gauge', intervals={ 'seconds': { 'step': 5, # 5 seconds 'steps': 120, # last 10 minutes 'read_cast': float, } })
def init_app(self, app): self.t = Timeseries( app.mxcache.redis_conn(), type='series', read_func=float, intervals={ 'minute': { 'step': 60, # 60 seconds 'steps': 12 * 60, # last 12 hours } }) app.metrics = self
def __init__(self, redis_client, key_prefix): """Create our counter structure.""" if not key_prefix: raise ValueError('key_prefix must be provided and length must be >0') self.counters = Timeseries(redis_client, { 'minute': { 'step': 60, # 60 seconds 'steps': 60, # last hour 'count_only' : True, # store counts only. }, 'hour': { 'step': 3600, # Hourly 'steps': 24, # Last day 'count_only' : True, # Store counts only. }, 'daily': { 'step': 86400, # Daily 'steps': 30, # Last 30 days 'count_only': True, # Store counts only. }, }, key_prefix=key_prefix) self.key_prefix = key_prefix self.redis_client = redis_client
def __init__(self, name, config): self._count = 0 self._name = name self._host = config.pop('host', 'sqlite:///:memory:') self._rolling = config.pop('rolling', 0) self._generator = config.pop('generator',None) config.setdefault('type', 'count') config.setdefault('write_func', long_or_float) config.setdefault('read_func', long_or_float) self._transform = config.get('transform') # parse the patterns and bind the Schema.match function # TODO: optimize this binding even further to reduce lookups at runtime self._patterns = config.pop('match', []) if isinstance(self._patterns, (tuple,list)): if len(self._patterns) != 1: self._patterns = [ re.compile(x) for x in self._patterns ] self.match = self._match_list else: self._patterns = re.compile(self._patterns[0]) self.match = self._match_single else: self._patterns = re.compile(self._patterns) self.match = self._match_single self.config = config self.timeseries = Timeseries(self._host, **config) # Bind some of the timeseries methods to this for convenience self.list = self.timeseries.list self.properties = self.timeseries.properties self.iterate = self.timeseries.iterate
def __init__(self, name, config): self._name = name self._host = config.pop('host', 'redis://localhost:6379/0') config.setdefault('type', 'count') config.setdefault('write_func', long_or_float) config.setdefault('read_func', long_or_float) self._transform = config.get('transform') # parse the patterns and bind the Schema.match function # TODO: optimize this binding even further to reduce lookups at runtime self._patterns = config.pop('match', []) if isinstance(self._patterns, (tuple,list)): if len(self._patterns) != 1: self._patterns = [ re.compile(x) for x in self._patterns ] self.match = self._match_list else: self._patterns = re.compile(self._patterns[0]) self.match = self._match_single else: self._patterns = re.compile(self._patterns) self.match = self._match_single self._client = self._init_client() # TODO: Remove the need for this and add accessors to Schema or Kairos self.config = config self.timeseries = Timeseries(self._client, **config)
class EventCounter(object): """Manage event counters.""" def __init__(self, redis_client, key_prefix): """Create our counter structure.""" if not key_prefix: raise ValueError('key_prefix must be provided and length must be >0') self.counters = Timeseries(redis_client, { 'minute': { 'step': 60, # 60 seconds 'steps': 60, # last hour 'count_only' : True, # store counts only. }, 'hour': { 'step': 3600, # Hourly 'steps': 24, # Last day 'count_only' : True, # Store counts only. }, 'daily': { 'step': 86400, # Daily 'steps': 30, # Last 30 days 'count_only': True, # Store counts only. }, }, key_prefix=key_prefix) self.key_prefix = key_prefix self.redis_client = redis_client def record_hit(self, item_id): """Record a hit. All this does is increments the current time bucket.""" self.counters.insert(item_id, 1) def get_counts(self, interval_name, item_id, n_recent_periods): """ Return the n most recent periods for a given interval and item_id as a list. Items are returned oldest first; most recent entry is last in list. """ series = self.counters.series(item_id, interval_name, steps=n_recent_periods, condensed=False) return series.values() def get_sum(self, interval_name, item_id, n_recent_periods): """Return the sum of the counts for the most recent periods.""" return sum(self.get_counts(interval_name, item_id, n_recent_periods)) def delete_keys(self): """Clean up (delete) all keys used by this instance.""" raise NotImplementedError()
def init_app(self, app): self.t = Timeseries(app.mxcache.redis_conn(), type='series', read_func=float, intervals={ 'minute': { 'step': 60, # 60 seconds 'steps': 12 * 60, # last 12 hours } }) app.metrics = self
class Metrics(object): def __init__(self, app=None): self.app = app if app is not None: self.init_app(app) def init_app(self, app): self.t = Timeseries( app.mxcache.redis_conn(), type='series', read_func=float, intervals={ 'minute': { 'step': 60, # 60 seconds 'steps': 12 * 60, # last 12 hours } }) app.metrics = self def update(self, metrics): for f in MX_FIELDS: try: self.t.insert(f, metrics[f]) except AttributeError: # default cache does not allow append pass def get_latest(self, minutes=5): now = float(time.time()) joined_data = {} for f in MX_FIELDS: series_data = self.t.series(f, 'minute', start=now - minutes * 60, end=now) joined_data[f] = series_data return joined_data
class TimeseriesManager(object): client = REDIS_CLIENT INTERVALS= { 'hour': { 'step': 60 * 60, # 60 minutes 'steps': 24, # last 24 hours }, 'day': { 'step': 60 * 60 * 24, # 1 Day 'steps': 7 # Last 1 week }, 'week': { 'step': 60 * 60 * 24 * 7, # 1 Week 'steps': 4 # Last 1 Month (~4 weeks) }, 'month': { 'step': 60 * 60 * 24 * 7 * 4, # 1 Month (~4 weeks) 'step': 3 # Last 1 'Quarter' } } def __init__(self, team_id, series_type): self.team_id = team_id self.series = Timeseries(self.client, type=series_type, intervals=self.INTERVALS) def gen_cache_key(self, stat_name): return "%s~%s" % (self.team_id, stat_name) def insert(self, stat_name, value): cache_key = self.gen_cache_key(stat_name) self.series.insert(cache_key, value, timestamp=time.time()) def query(self, stat_name, interval): cache_key = self.gen_cache_key(stat_name) return self.series.series(cache_key, interval)
def start(ctx, argv): """ Called once on script startup, before any other events. """ global client global reqTable global db global respTable global timeSeries client = MongoClient() db = client.mydb reqTable = db.reqTable respTable = db.respTable timeSeries = Timeseries(client, type='histogram', read_func=float, intervals={'minute': { 'step': 60, 'steps': 120, }}) ctx.log("start")
class RedisTimeSeries(): def __init__(self, **kwargs): self.client = None try: self.client = redis.StrictRedis(host=kwargs['host'], port=kwargs['port'], db=kwargs['db']) self.client.ping() logging.debug('Redis host=%s,port=%s,db=%d- Connected!', kwargs['host'], kwargs['port'], kwargs['db']) except Exception as ex: self.client = None logging.error("Redis host=%s,port=%s,db=%d- Error %s", ex, kwargs['host'], kwargs['port'], kwargs['db']) pass self.ts = None if self.client != None: logging.debug('Timeseries - Create') if 'timeseries' in kwargs: self.ts = Timeseries(self.client, type='gauge', intervals=kwargs['timeseries']) else: self.ts = Timeseries( self.client, type='gauge', intervals={ 'seconds': { 'step': 5, # 5 seconds 'steps': 120, # last 10 minutes 'read_cast': float, } }) def record_hit(self, key, measurement): if self.client: self.ts.insert(str(key), float(measurement)) def record_response_time(self, content_id, measurement): self.record_hit(str(content_id) + ':rt', float(measurement)) def record_status(self, content_id, measurement): self.record_hit(str(content_id) + ':status', float(measurement)) def get_timeseries(self, key): if self.client != None: #logging.info("properties: %s", str(self.ts.properties(str(content_id) + ':' + str(monitor_id))) ) return self.ts.series(str(key), 'seconds') return None def get_response_time_timeseries(self, content_id): #logging.info("properties: %s", str(self.ts.properties(str(content_id) + ':' + str(monitor_id))) ) return self.get_timeseries(str(content_id) + ':rt') def get_status_timeseries(self, content_id): #logging.info("properties: %s", str(self.ts.properties(str(content_id) + ':' + str(monitor_id))) ) return self.get_timeseries(str(content_id) + ':status') def get_timeseries_avg(self, key): #logging.info("properties: %s", str(self.ts.properties(str(content_id) + ':' + str(monitor_id))) ) series = [] avg = 0.0 if self.client != None: series = self.ts.series(str(key), 'seconds') sum = 0.0 count = 0.0 for key, value in series.items(): if value: sum += float(value) count += 1.0 if count > 0.0: avg = sum / count logging.debug('serie avg: %f', avg) return avg def get_response_time_avg(self, content_id): return self.get_timeseries_avg(str(content_id) + ':rt')
def __init__(self, team_id, series_type): self.team_id = team_id self.series = Timeseries(self.client, type=series_type, intervals=self.INTERVALS)
class Schema(object): ''' Implements the schema and associated data processing for data points. ''' def __init__(self, name, config): self._count = 0 self._name = name self._host = config.pop('host', 'sqlite:///:memory:') self._rolling = config.pop('rolling', 0) self._generator = config.pop('generator', None) config.setdefault('type', 'count') config.setdefault('write_func', long_or_float) config.setdefault('read_func', long_or_float) self._transform = config.get('transform') # parse the patterns and bind the Schema.match function # TODO: optimize this binding even further to reduce lookups at runtime self._patterns = config.pop('match', []) if isinstance(self._patterns, (tuple, list)): if len(self._patterns) != 1: self._patterns = [re.compile(x) for x in self._patterns] self.match = self._match_list else: self._patterns = re.compile(self._patterns[0]) self.match = self._match_single else: self._patterns = re.compile(self._patterns) self.match = self._match_single self.config = config self.timeseries = Timeseries(self._host, **config) # Bind some of the timeseries methods to this for convenience self.list = self.timeseries.list self.properties = self.timeseries.properties self.iterate = self.timeseries.iterate @property def name(self): return self._name @property def host(self): return self._host @property def count(self): return self._count def generate(self): if self._generator: stat, value = self._generator() return stat, value, time.time() return None def store(self, stat, val, timestamp=None): ''' Store a value in this schema. ''' if self.match(stat): if self._transform: stat, val = self._transform(stat, val) if stat is None: return False self._count += 1 self.timeseries.insert(stat, val, timestamp, intervals=self._rolling) return True return False def _match_single(self, stat): ''' Used for when schema implements a single regular expression, returns True if the stat matches this schema, False otherwise. ''' if isinstance(stat, (list, tuple)): matches = filter(None, [self._patterns.search(s) for s in stat]) return len(matches) == len(stat) return self._patterns.search(stat) is not None def _match_list(self, stat): ''' Used for when schema implements several regular expressions, returns True if the stat matches this schema, False otherwise. ''' matches = set() for pattern in self._patterns: if isinstance(stat, (list, tuple)): for s in stat: if pattern.search(s): matches.add(s) if len(matches) == len(stat): return True elif pattern.search(stat): return True return False
class AnalyticsManager: def __init__(self, client, loadFromDB=LOAD_DB, bulk_insert=BULK_INSERT, track_users=T_USER, track_words=T_WORDS): """ TODO: config persistence TODO: populate memory from database """ # Redis server to send data to self._client = client self._log = setup_log('manager') # database if loadFromDB: try: self.dbclient = MongoClient(MONGO_URL) self._DB = dbclient[MONGO_DB] self.load() except: self._log.error('could not connect to MongoDB.') self._intervals = { 'second': { 'step': 1, # one second 'steps': 60 * 60 * 24, # keep for 1 day }, 'minute': { 'step': 60, # 60 seconds 'steps': 60 * 24 * 3, # keep for 3 days }, 'hour': { 'step': '1h', # one hour 'steps': 30 * 24 # keep for 1 month }, 'day': { 'step': '1d', # one day 'steps': 90 # keep for 3 month } } self._inserts = defaultdict(Counter) self._insert_lock = threading.Lock() self._n_inserts = 0 self.bulk_insert = bulk_insert self._bulk_size = 10000 self._mini_batch_size = 16 self._pipelined = 0 self.track_users = track_users self.track_words = track_words # Series self.events = Timeseries(self._client, type='count', intervals=self._intervals) # Online users, consider offline after 2 minutes self.users = Timeseries( self._client, type='count', intervals={'second2': { 'step': 1, 'steps': 20 }}) # Effective words, keep for 1 month self.words = Timeseries( self._client, type='histogram', intervals={'month': { 'step': '30d', 'steps': 3 }}) def store_event(self, event): """ parameters: event: { campID: (int) etype: event type (str). pview, imp, or click. timestamp: time of event (int), (default) current time. words: list of words related to this event, (default) []. } returns: 0 if success, otherwise -1 and { error: "message" } """ try: campID, etype = event['campID'], event['etype'] timestamp = event.get('timestamp', time.time()) words = event.get('words', []) eventKey = '%d:%s' % (campID, etype) userKey = '%d:online' % (campID) except: msg = 'wrong event structure. type: %s, event: %s' % (type(event), event) self._log.error(msg) return (-1, {'error': msg}) if etype not in ['pview', 'imp', 'click']: msg = 'wrong event type "%s", valid values: pview, imp, click.' % etype self._log.error(msg) return (-1, {'error': msg}) # Bulk insert if self.bulk_insert: temp_inserts = None with self._insert_lock: self._inserts[timestamp][eventKey] += 1 self._n_inserts += 1 if self._n_inserts >= self._bulk_size: try: self.events.bulk_insert({ t: {k: [v2] for k, v2 in v.items()} for t, v in self._inserts.items() }) self._inserts = defaultdict(Counter) self._n_inserts = 0 except Exception as e: msg = '%s: %s' % (type(e), e) self._log.error(msg) return (-1, {'error': msg}) # Single insert else: try: self.events.insert(eventKey, timestamp=timestamp) '''self._pipelined += 1 if self._pipelined >= self._bulk_size: self.events.execute() self._pipelined = 0''' # only count the user if the event is page view if self.track_users and etype == 'pview': self.users.insert(userKey, timestamp=timestamp) # only count the word if it led to impression or click if self.track_words and etype in ['imp', 'click']: self.words.insert(campID, words, timestamp=timestamp) except Exception as e: msg = '%s: %s' % (type(e), e) self._log.error(msg) return (-1, {'error': msg}) return (0, {'status': 'SUCCESS'}) def get_camp_data(self, event): """ parameters: event: { campID: (int) etype: event type (str). pview, imp, or click, interval: (str) second, minute, or hour. (default) minute, start: (int) timestamp of the beginning of interval, (default) None, end: (int) timestamp of the end of interval, (default) None, get_users: (bool) get online users. (default) True, get_camp_words: (bool) effective words for this campaign. (default) False, get_all_words: (bool) effective words for all campaigns. (default) False } returns: code (int) 0 for success, -1 for failure. if success, result: { data: (list) (timestamp, event_count) tuples, users: (int) count of online users, camp_words: (list) most effective words for this campaign, all_words: (list) most effective words of all campaigns } if failure: { error: "error message" } TODO: check if campID exists """ try: campID, etype = event['campID'], event['etype'] interval = event.get('interval', 'minute') start = event.get('start', 0) end = event.get('end', time.time()) get_users = event.get('get_users', True) get_camp_words = event.get('get_camp_words', False) get_all_words = event.get('get_all_words', False) eventKey = '%d:%s' % (campID, etype) userKey = '%d:online' % (campID) except Exception as e: msg = 'wrong event structure. %s' % e self._log.error(msg) return (-1, {'error': msg}) data = [] if etype not in ['pview', 'imp', 'click']: msg = 'wrong event type "%s",\ valid values: pview, imp, click.' % etype self._log.error(msg) return (-1, {'error': msg}) result = {} try: data = self.events.series(eventKey, interval=interval, start=start, end=end) result['data'] = list(data.items()) if self.track_users and get_users: users = self.users.series(userKey, interval='second2') users = sum(users.values()) result['users'] = users if self.track_words and get_camp_words: camp_words = self.words.series(campID, 'month') sorted_words = sorted([(w, c) for w, c in camp_words.items()], key=lambda t: t[1]) result['camp_words'] = sorted_words if self.track_words and get_all_words: all_camps = self.words.list() all_camps_words = self.words.series(all_camps, 'month') sorted_words = sorted([(w, c) for w, c in all_camps_words.items()], key=lambda t: t[1]) result['all_words'] = sorted_words except UnknownInterval as e: msg = 'wrong interval type "%s",\ valid values: second, minute, hour.' % str(interval) self._log.error(msg) return (-1, {'error': msg}) except Exception as e: msg = '%s: %s.' % (type(e), e) self._log.error(msg) return (-1, {'error': msg}) return (0, result) def flush(self): """ Deletes all data stored in redis """ self._log.info('Removing all data from database.') self.events.delete_all() self.users.delete_all() self.words.delete_all() def get_lost_data(): """ Reloads the lost data from the main database if Redis crashed. """ # get the latest timestamp in Redis props = self.events.properties() latest_timestamp = props['second']['last'] mongo_cl = MongoClient(MONGO_URL) db = mongo_cl[MONGO_DB] coll = db[MONGO_COLLECTION] # get data more recent than latest_timestamp data = coll.find({'timestamp': {'$gt': latest_timestamp}}) #mapping event types from numbers to string etype_str = {0: '', 1: '', 2: ''} for event in data: timestamp, etype = event['timestamp'], etype_str[event['type']] campID = event['campaign'] eventKey = '%d:%s' % (campID, etype) self._inserts[timestamp][eventKey] += 1 self._n_inserts += 1 if data.count - data.retrieved <= 0: done = True if self._n_inserts >= self._bulk_size or done: try: self.events.bulk_insert({ t: {k: [v2] for k, v2 in v.items()} for t, v in self._inserts.items() }) self._inserts = defaultdict(Counter) self._n_inserts = 0 except Exception as e: msg = '%s: %s' % (type(e), e) self._log.error(msg) return -1 return 0 def load(self): """ load data from the database when the cache manager is up and running """ pass
class Schema(object): ''' Implements the schema and associated data processing for data points. ''' def __init__(self, name, config): self._count = 0 self._name = name self._host = config.pop('host', 'sqlite:///:memory:') self._rolling = config.pop('rolling', 0) self._generator = config.pop('generator',None) config.setdefault('type', 'count') config.setdefault('write_func', long_or_float) config.setdefault('read_func', long_or_float) self._transform = config.get('transform') # parse the patterns and bind the Schema.match function # TODO: optimize this binding even further to reduce lookups at runtime self._patterns = config.pop('match', []) if isinstance(self._patterns, (tuple,list)): if len(self._patterns) != 1: self._patterns = [ re.compile(x) for x in self._patterns ] self.match = self._match_list else: self._patterns = re.compile(self._patterns[0]) self.match = self._match_single else: self._patterns = re.compile(self._patterns) self.match = self._match_single self.config = config self.timeseries = Timeseries(self._host, **config) # Bind some of the timeseries methods to this for convenience self.list = self.timeseries.list self.properties = self.timeseries.properties self.iterate = self.timeseries.iterate @property def name(self): return self._name @property def host(self): return self._host @property def count(self): return self._count def generate(self): if self._generator: stat,value = self._generator() return stat,value,time.time() return None def store(self, stat, val, timestamp=None): ''' Store a value in this schema. ''' if self.match(stat): if self._transform: stat,val = self._transform(stat,val) if stat is None: return False self._count += 1 self.timeseries.insert(stat, val, timestamp, intervals=self._rolling) return True return False def _match_single(self, stat): ''' Used for when schema implements a single regular expression, returns True if the stat matches this schema, False otherwise. ''' if isinstance(stat,(list,tuple)): matches = filter(None, [self._patterns.search(s) for s in stat] ) return len(matches)==len(stat) return self._patterns.search(stat) is not None def _match_list(self, stat): ''' Used for when schema implements several regular expressions, returns True if the stat matches this schema, False otherwise. ''' matches = set() for pattern in self._patterns: if isinstance(stat,(list,tuple)): for s in stat: if pattern.search(s): matches.add(s) if len(matches)==len(stat): return True elif pattern.search(stat): return True return False
def __init__(self, client, loadFromDB=LOAD_DB, bulk_insert=BULK_INSERT, track_users=T_USER, track_words=T_WORDS): """ TODO: config persistence TODO: populate memory from database """ # Redis server to send data to self._client = client self._log = setup_log('manager') # database if loadFromDB: try: self.dbclient = MongoClient(MONGO_URL) self._DB = dbclient[MONGO_DB] self.load() except: self._log.error('could not connect to MongoDB.') self._intervals = { 'second': { 'step': 1, # one second 'steps': 60 * 60 * 24, # keep for 1 day }, 'minute': { 'step': 60, # 60 seconds 'steps': 60 * 24 * 3, # keep for 3 days }, 'hour': { 'step': '1h', # one hour 'steps': 30 * 24 # keep for 1 month }, 'day': { 'step': '1d', # one day 'steps': 90 # keep for 3 month } } self._inserts = defaultdict(Counter) self._insert_lock = threading.Lock() self._n_inserts = 0 self.bulk_insert = bulk_insert self._bulk_size = 10000 self._mini_batch_size = 16 self._pipelined = 0 self.track_users = track_users self.track_words = track_words # Series self.events = Timeseries(self._client, type='count', intervals=self._intervals) # Online users, consider offline after 2 minutes self.users = Timeseries( self._client, type='count', intervals={'second2': { 'step': 1, 'steps': 20 }}) # Effective words, keep for 1 month self.words = Timeseries( self._client, type='histogram', intervals={'month': { 'step': '30d', 'steps': 3 }})
from kairos import Timeseries import pymongo client = pymongo.MongoClient('localhost') t = Timeseries(client, type='histogram', read_func=float, intervals={ 'minute':{ 'step':60, # 60 seconds 'steps':120, # last 2 hours } }) # t.insert('example', 3.14159) # t.insert('example', 2.71828) # t.insert('example', 2.71828) # t.insert('example', 3.71828) # t.insert('example', 4.71828) # t.insert('example', 5.71828) t.insert('example', 6.71828) t.insert('example', 7.71828) print t.get('example', 'minute')
from kairos import Timeseries import pymongo client = pymongo.MongoClient('localhost') t = Timeseries( client, type='histogram', read_func=float, intervals={ 'minute': { 'step': 60, # 60 seconds 'steps': 120, # last 2 hours } }) # t.insert('example', 3.14159) # t.insert('example', 2.71828) # t.insert('example', 2.71828) # t.insert('example', 3.71828) # t.insert('example', 4.71828) # t.insert('example', 5.71828) t.insert('example', 6.71828) t.insert('example', 7.71828) print t.get('example', 'minute')
class Schema(object): ''' Implements the schema and associated data processing for data points. ''' def __init__(self, name, config): self._name = name self._host = config.pop('host', 'redis://localhost:6379/0') config.setdefault('type', 'count') config.setdefault('write_func', long_or_float) config.setdefault('read_func', long_or_float) self._transform = config.get('transform') # parse the patterns and bind the Schema.match function # TODO: optimize this binding even further to reduce lookups at runtime self._patterns = config.pop('match', []) if isinstance(self._patterns, (tuple,list)): if len(self._patterns) != 1: self._patterns = [ re.compile(x) for x in self._patterns ] self.match = self._match_list else: self._patterns = re.compile(self._patterns[0]) self.match = self._match_single else: self._patterns = re.compile(self._patterns) self.match = self._match_single self._client = self._init_client() # TODO: Remove the need for this and add accessors to Schema or Kairos self.config = config self.timeseries = Timeseries(self._client, **config) @property def name(self): return self._name def store(self, stat, val, timestamp=None): ''' Store a value in this schema. ''' if self.match(stat): if self._transform: stat,val = self._transform(stat,val) if stat is None: return False self.timeseries.insert(stat, val, timestamp) return True return False def _match_single(self, stat): ''' Used for when schema implements a single regular expression, returns True if the stat matches this schema, False otherwise. ''' return self._patterns.search(stat) is not None def _match_list(self, stat): ''' Used for when schema implements several regular expressions, returns True if the stat matches this schema, False otherwise. ''' for pattern in self._patterns: if pattern.search(stat): return True return False def _init_client(self): ''' Parse the host URL and initialize a client connection. ''' if not isinstance(self._host, (str,unicode)): return self._host # To force scheme and netloc behavior. Easily a bug here but let it # go for now if '//' not in self._host: self._host = '//'+self._host location = urlparse(self._host) if location.scheme in ('','redis'): if ':' in location.netloc: host,port = location.netloc.split(':') else: host,port = location.netloc,6379 # TODO: better matching here if location.path in ('', '/'): db = 0 else: db = location.path[1:] return Redis(host=host, port=int(port), db=int(db)) raise ValueError("unsupported scheme", location.scheme)