def push_key(self, k, v, ttl=0): T0 = time.time() STATS.incr('ts.graphite.push-key', 1) v64 = base64.b64encode(v) logger.debug("PUSH KEY", k, "and value", len(v64)) #self.clust.put_key(k, v64, allow_udp=True, ttl=ttl) self.clust.stack_put_key(k, v64, ttl=ttl) STATS.timer('ts.graphite.push-key', (time.time() - T0)*1000) return
def grok_graphite_data(self, data): STATS.incr('ts.graphite.grok.data', 1) forwards = {} for line in data.splitlines(): elts = line.split(' ') elts = [s.strip() for s in elts if s.strip()] if len(elts) != 3: return mname, value, timestamp = elts[0], elts[1], elts[2] hkey = hashlib.sha1(mname).hexdigest() ts_node_manager = self.clust.find_ts_node(hkey) # if it's me that manage this key, I add it in my backend if ts_node_manager == self.clust.uuid: logger.debug("I am the TS node manager") try: timestamp = int(timestamp) except ValueError: return value = to_best_int_float(value) if value is None: continue self.tsb.add_value(timestamp, mname, value) # not me? stack a forwarder else: logger.debug("The node manager for this Ts is ", ts_node_manager) l = forwards.get(ts_node_manager, []) l.append(line) forwards[ts_node_manager] = l for (uuid, lst) in forwards.iteritems(): node = self.clust.nodes.get(uuid, None) # maybe the node disapear? bail out, we are not lucky if node is None: continue packets = [] # first compute the packets buf = '' for line in lst: buf += line+'\n' if len(buf) > 1024: packets.append(buf) buf = '' if buf != '': packets.append(buf) # UDP sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) for packet in packets: # do NOT use the node['port'], it's the internal communication, not the graphite one! sock.sendto(packet, (node['addr'], self.graphite_port)) sock.close() '''
def add_value(self, t, key, v): # be sure to work with int time t = int(t) T0 = time.time() STATS.incr('ts-add-value', 1) # Try to get the minute memory element. If not available, create one and # set it's creation time so the ts-reaper thread can grok it and archive it if too old e = self.data.get('min::%s' % key, None) if e is None: now = NOW.now#int(time.time()) e = {'cur_min':0, 'sum':0, 'min':None, 'max':None, 'values':[None] * 60, 'nb':0, 'ctime':now} self.data['min::%s' % key] = e # Maybe we did not know about it, maybe so, but whatever, we add it self.set_name_if_unset(key) # Compute the minute start and the second idx inside the # minute (0-->59) _div = divmod(t, 60) t_minu = _div[0]*60 t_second = _div[1] # If we just changed the second if t_minu != e['cur_min']: # we don't save the first def_e if e['cur_min'] != 0: self.archive_minute(e, key) now = NOW.now#int(time.time()) e = {'cur_min':t_minu, 'sum':0, 'min':None, 'max':None, 'values':[None] * 60, 'nb':0, 'ctime':now} self.data['min::%s' % key] = e # We will insert the value at the t_second position, we are sure this place is # available as the structure is already filled when the dict is created e['values'][t_second] = v # Check if the new value change the min/max entry e_min = e['min'] e_max = e['max'] if not e_min or v < e_min: e['min'] = v if not e_max or v > e_max: e['max'] = v # And sum up the result so we will be able to compute the # avg entry e['sum'] += v e['nb'] += 1 STATS.timer('ts.add_value', (time.time() - T0)*1000)
def launch_graphite_udp_listener(self): self.graphite_udp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # UDP self.graphite_udp_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.graphite_udp_sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 1048576) self.log(self.graphite_udp_sock.getsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF)) self.graphite_udp_sock.bind((self.addr, self.graphite_port)) self.log("TS Graphite UDP port open", self.graphite_port) self.log("UDP RCVBUF", self.graphite_udp_sock.getsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF)) while True:#not self.interrupted: try: data, addr = self.graphite_udp_sock.recvfrom(65535) except socket.timeout: # loop until we got some data continue self.log("UDP Graphite: received message:", len(data), addr) STATS.incr('ts.graphite.udp.receive', 1) self.graphite_queue.append(data)
def do_reaper_thread(self): while True: now = int(time.time()) m = divmod(now, 60)[0]*60 # current minute all_names = [] with self.data_lock: all_names = self.data.keys() logger.info("DOING reaper thread on %d elements" % len(all_names)) for name in all_names: # Grok all minute entries if name.startswith('min::'): e = self.data.get(name, None) # maybe some one delete the entry? should not be possible if e is None: continue ctime = e['ctime'] logger.debug("REAPER old data for ", name) # if the creation time of this structure is too old and # really for data, force to save the entry in KV entry if ctime < now - self.max_data_age and e['nb'] > 0: STATS.incr('reaper-old-data', 1) logger.debug("REAPER TOO OLD DATA FOR", name) # get the raw metric name _id = name[5:] self.archive_minute(e, _id) # the element was too old, so we can assume it won't be upadte again. Delete it's entry try: del self.data[name] except: pass ''' # and set a new minute, the next one n_minute = e['cur_min'] + 60 e = {'cur_min':n_minute, 'sum':0, 'min':None, 'max':None, 'values':[None for _ in xrange(60)], 'nb':0, 'ctime':now} self.data[name] = e ''' time.sleep(10)
def launch_graphite_tcp_listener(self): self.graphite_tcp_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.graphite_tcp_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.graphite_tcp_sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 1048576) self.graphite_tcp_sock.bind((self.addr, self.graphite_port)) self.graphite_tcp_sock.listen(5) self.log("TS Graphite TCP port open", self.graphite_port) while True: try: conn, addr = self.graphite_tcp_sock.accept() except socket.timeout: # loop until we got some connect continue conn.settimeout(5.0) logger.debug('TCP Graphite Connection address:', addr) data = '' while 1: try: ldata = conn.recv(1024) except Exception, exp: print "TIMEOUT", exp break if not ldata: break # Look at only full lines, and not the last part # So we look at the position of the last \n lst_nidx = ldata.rfind('\n') # take all finished lines data += ldata[:lst_nidx+1] STATS.incr('ts.graphite.tcp.receive', 1) self.graphite_queue.append(data) # stack the data with the garbage so we will continue it # on the next turn data = ldata[lst_nidx+1:] conn.close() # Also stack what the last send self.graphite_queue.append(data)
def get_ttl_db(self, h): cdb = self.dbs.get(h, None) # If missing, look to load it but with a lock to be sure we load it only once if cdb is None: STATS.incr('ttl-db-cache-miss',1) with self.lock: # Maybe during the lock get one other thread succedd in getting the cdb if not h in self.dbs: # Ok really load it, but no more than self.db_cache_size # databases (number of open files can increase quickly) if len(self.dbs) > self.db_cache_size: ttodrop = self.dbs.keys()[0] del self.dbs[ttodrop] _t = time.time() cdb = leveldb.LevelDB(os.path.join(self.ttldb_dir, '%d' % h)) STATS.incr('ttl-db-open', time.time() - _t) self.dbs[h] = cdb # Ok a malicious thread just go before us, good :) else: cdb = self.dbs[h] # We alrady got it, thanks cache else: STATS.incr('ttl-db-cache-hit',1) return cdb
def archive_minute(self, e, ID): STATS.incr('ts-archive-minute', 1) T0 = time.time() cur_min = e['cur_min'] name = ID values = e['values'] e['avg'] = None if e['nb'] != 0: e['avg'] = e['sum'] / float(e['nb']) # the main key we use to save the minute entry in the DB key = '%s::m%d' % (name, cur_min) # Serialize and put the value _t = time.time() ser = SERIALIZER.dumps(e,2) STATS.incr('serializer', time.time() - _t) # We keep minutes for 1 day _t = time.time() self.usender.push_key(key, ser, ttl=86400) STATS.incr('put-key', time.time() - _t) # Also insert the key in a time switching database # (one database by hour) #_t = time.time() #self.its.assume_key(key, cur_min) #STATS.incr('its-assume-key', time.time() - _t) ### Hour now # Now look at if we just switch hour hour = divmod(cur_min, 3600)[0]*3600 key = '%s::h%d' % (name, hour) #CUR_H_KEY = ALL[ID]['CUR_H_KEY'] hour_e = self.data.get('hour::%s' % name, None) if hour_e is None: hour_e = {'hour':0, 'sum':0, 'min':None, 'max':None, 'values':[None] * 60, 'nb':0} self.data['hour::%s' % name] = hour_e old_hour = hour_e['hour'] # If we switch to a new hour and we are not the first def_hour value # we must save the hour entry in the database if hour != old_hour: if hour_e['hour'] != 0: _t = time.time() ser = SERIALIZER.dumps(hour_e) STATS.incr('serializer', time.time() - _t) # the main key we use to save the hour entry in the DB hkey = '%s::h%d' % (name, old_hour) # Keep hour thing for 1 month _t = time.time() self.usender.push_key(key, ser, ttl=86400*31) STATS.incr('put-hour', time.time() - _t) # Now new one with the good hour of t :) hour_e = {'hour':0, 'sum':0, 'min':None, 'max':None, 'values':[None] * 60, 'nb':0} hour_e['hour'] = hour self.data['hour::%s' % name] = hour_e _t = time.time() # Now compute the hour object update h_min = hour_e['min'] h_max = hour_e['max'] if h_min is None or e['min'] < h_min: hour_e['min'] = e['min'] if h_max is None or e['max'] > h_max: hour_e['max'] = e['max'] if e['avg'] is not None: hour_e['nb'] += 1 hour_e['sum'] += e['avg'] # We try to look at which minute we are in the hour object minute_hour_idx = (cur_min - hour) / 60 hour_e['values'][minute_hour_idx] = e['avg'] hour_e['avg'] = hour_e['sum'] / float(hour_e['nb']) STATS.incr('hour-compute', time.time() -_t) ### Day now # Now look at if we just switch day day = divmod(cur_min, 86400)[0]*86400 hkey = '%s::d%d' % (name, day) # Get the in-memory entry, and if none a default one day_e = self.data.get('day::%s' % hkey, None) if day_e is None: day_e = {'day':0, 'sum':0, 'min':None, 'max':None, 'values':[None] * 1440, 'nb':0} old_day = day_e['day'] # If we switch to a new day and we are not the first def_day value # we must save the day entry in the database if day != old_day and day_e['day'] != 0: _t = time.time() ser = SERIALIZER.dumps(day_e) STATS.incr('serializer', time.time() - _t) _t = time.time() # And keep day object for 1 year self.usender.push_key(hkey, ser, ttl=86400*366) STATS.incr('put-day', time.time() - _t) # Now new one :) day_e = {'day': day, 'sum':0, 'min':None, 'max':None, 'values':[None] * 1440, 'nb':0} self.data['day::%s' % key] = day_e _t = time.time() # Now compute the day object update h_min = day_e['min'] h_max = day_e['max'] if h_min is None or e['min'] < h_min: day_e['min'] = e['min'] if h_max is None or e['max'] > h_max: day_e['max'] = e['max'] if e['avg'] is not None: day_e['nb'] += 1 day_e['sum'] += e['avg'] # We try to look at which minute we are in the day object minute_day_idx = (cur_min - day) / 60 day_e['values'][minute_day_idx] = e['avg'] day_e['avg'] = day_e['sum'] / float(day_e['nb']) STATS.incr('day-compute', time.time() -_t) STATS.timer('ts.archive-minute', (time.time() - T0)*1000)