def test_histograms(self): config = get_config(get_config_path()) db = CASSANDRA_DB(config) ret = db.query_raw_data(path=self.tr.h_ttl_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end) self.assertEqual(len(ret), self.tr.h_ttl_len) self.assertEqual(ret[0]['ts'], self.tr.h_ttl_start_ts) self.assertEqual(ret[0]['val'], self.tr.h_ttl_start_val) self.assertEqual(ret[-1]['ts'], self.tr.h_ttl_end_ts) self.assertEqual(ret[-1]['val'], self.tr.h_ttl_end_val) ret = db.query_raw_data(path=self.tr.h_owd_min_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end) self.assertEqual(len(ret), self.tr.h_owd_min_len) self.assertEqual(ret[0]['ts'], self.tr.h_owd_min_start_ts) self.assertEqual(ret[0]['val'], self.tr.h_owd_min_start_val) self.assertEqual(ret[-1]['ts'], self.tr.h_owd_min_end_ts) self.assertEqual(ret[-1]['val'], self.tr.h_owd_min_end_val) ret = db.query_raw_data(path=self.tr.h_owd_day_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end, freq=self.tr.h_owd_day_freq) self.assertEqual(len(ret), self.tr.h_owd_day_len) self.assertEqual(ret[0]['ts'], self.tr.h_owd_day_start_ts) self.assertEqual(ret[0]['val'], self.tr.h_owd_day_start_val) self.assertEqual(ret[-1]['ts'], self.tr.h_owd_day_end_ts) self.assertEqual(ret[-1]['val'], self.tr.h_owd_day_end_val)
def test_histograms(self): config = get_config(get_config_path()) db = CASSANDRA_DB(config) ret = db.query_raw_data( path=self.tr.h_ttl_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end ) self.assertEqual(len(ret), self.tr.h_ttl_len) self.assertEqual(ret[0]['ts'], self.tr.h_ttl_start_ts) self.assertEqual(ret[0]['val'], self.tr.h_ttl_start_val) self.assertEqual(ret[-1]['ts'], self.tr.h_ttl_end_ts) self.assertEqual(ret[-1]['val'], self.tr.h_ttl_end_val) ret = db.query_raw_data( path=self.tr.h_owd_min_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end ) self.assertEqual(len(ret), self.tr.h_owd_min_len) self.assertEqual(ret[0]['ts'], self.tr.h_owd_min_start_ts) self.assertEqual(ret[0]['val'], self.tr.h_owd_min_start_val) self.assertEqual(ret[-1]['ts'], self.tr.h_owd_min_end_ts) self.assertEqual(ret[-1]['val'], self.tr.h_owd_min_end_val) ret = db.query_raw_data( path=self.tr.h_owd_day_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end, freq=self.tr.h_owd_day_freq ) self.assertEqual(len(ret), self.tr.h_owd_day_len) self.assertEqual(ret[0]['ts'], self.tr.h_owd_day_start_ts) self.assertEqual(ret[0]['val'], self.tr.h_owd_day_start_val) self.assertEqual(ret[-1]['ts'], self.tr.h_owd_day_end_ts) self.assertEqual(ret[-1]['val'], self.tr.h_owd_day_end_val)
def test_a_config_cassandra(self): ''' Clear database before starting a test. Takes too long to do before an individual test ''' config = get_config(get_config_path()) config.db_clear_on_testing = True db = CASSANDRA_DB(config)
def test_a_load_data(self): config = get_config(get_config_path()) config.db_clear_on_testing = True # return test_data = load_test_data("rtr_d_ifhcin_long.json") q = TestPersistQueue(test_data) p = CassandraPollPersister(config, "test", persistq=q) p.run() p.db.flush() p.db.close()
def test_persister(self): """This is a very basic smoke test for a cassandra persister.""" config = get_config(get_config_path()) test_data = json.loads(timeseries_test_data) #return q = TestPersistQueue(test_data) p = CassandraPollPersister(config, "test", persistq=q) p.run() p.db.close() p.db.stats.report('all')
def test_a_load_data(self): config = get_config(get_config_path()) config.db_clear_on_testing = True test_data = load_test_data("rtr_d_ifhcin_long.json") q = TestPersistQueue(test_data) p = CassandraPollPersister(config, "test", persistq=q) p.run() p.db.flush() p.db.close()
def espolld(): """Entry point for espolld.""" argv = sys.argv oparse = get_opt_parser(default_config_file=get_config_path()) (opts, args) = oparse.parse_args(args=argv) try: config = get_config(opts.config_file, opts) except ConfigError, e: print e sys.exit(1)
def gen_ma_storefile(): """Translated from the original Perl by jdugan""" argv = sys.argv oparse = get_opt_parser(default_config_file=get_config_path()) (opts, args) = oparse.parse_args(args=argv) try: config = get_config(opts.config_file, opts) except ConfigError, e: print >> sys.stderr, e sys.exit(1)
def espolld(): """Entry point for espolld.""" argv = sys.argv oparse = get_opt_parser(default_config_file=get_config_path()) (opts, args) = oparse.parse_args(args=argv) django.setup() try: config = get_config(opts.config_file, opts) except ConfigError, e: print e sys.exit(1)
def test_persister_backwards_counters(self): """Test for counters going backwards. Although this isn't supposed to happen, sometimes it does. The example data is real data from conf-rtr.sc13.org.""" test_data = json.loads(backwards_counters_test_data) config = get_config(get_config_path()) config.cassandra_keyspace='test_%s'%config.cassandra_keyspace config.db_clear_on_testing = True q = TestPersistQueue(test_data) p = CassandraPollPersister(config, "test", persistq=q) p.run() p.db.flush() p.db.close() p.db.stats.report('all') config.db_clear_on_testing = False t0 = 1384371885 t1 = 1384372034 freq = 30 b0 = t0 - (t0 % freq) b1 = t1 - (t1 % freq) b0 *= 1000 b1 *= 1000 key = '%s:%s:%s:%s:%s:%s:%s' % ( SNMP_NAMESPACE, 'rtr_d', 'FastPollHC', 'ifHCOutOctets', 'GigabitEthernet0/1', freq*1000, datetime.datetime.utcfromtimestamp(t0).year ) db = CASSANDRA_DB(config) rates = ColumnFamily(db.pool, db.rate_cf) data = rates.get(key, column_start=b0, column_finish=b1) self.assertEqual(len(data), 6) for k,v in data.iteritems(): # due to the bad data only two datapoints have full data, eg is_valid == 2 if k in (1384371900000, 1384371990000): self.assertEqual(v['is_valid'], 2) else: self.assertEqual(v['is_valid'], 1)
def test_a_load_data(self): config = get_config(get_config_path()) config.db_clear_on_testing = True db = CASSANDRA_DB(config) for dat in hist_data: for row in load_test_data(dat): db.set_raw_data(RawRateData(**row)) for dat in rate_data: for row in load_test_data(dat): db.update_rate_bin(BaseRateBin(**row)) db.flush()
def espoll(): argv = sys.argv oparse = get_opt_parser(default_config_file=get_config_path()) oparse.usage = "%prog [options] router oidset" (opts, args) = oparse.parse_args(args=argv) if len(args[1:]) != 2: oparse.error("requires router and oidset arguments") device_name, oidset_name = args[1:] try: config = get_config(opts.config_file, opts) except ConfigError, e: print e sys.exit(1)
def espersistq(): """Entry point for espersistq. """ argv = sys.argv oparse = get_opt_parser(default_config_file=get_config_path()) (opts, args) = oparse.parse_args(args=argv) opts.config_file = os.path.abspath(opts.config_file) try: config = get_config(opts.config_file, opts) except ConfigError, e: print >>sys.stderr, e sys.exit(1)
def check_connection(): global db global EVENT_TYPE_CF_MAP; if not db: db = CASSANDRA_DB(get_config(get_config_path())) # # Column families # EVENT_TYPE_CF_MAP = { 'histogram': db.raw_cf, 'integer': db.rate_cf, 'json': db.raw_cf, 'percentage': db.agg_cf, 'subinterval': db.raw_cf, 'float': db.agg_cf }
def main(argv=sys.argv): """Parse options, output config""" global OPTS prog = os.path.basename(argv[0]) usage = 'usage: %prog device [device]' parser = optparse.OptionParser(usage=usage, version=VERSION) parser.add_option('-D', None, action='store_true', dest='Debug', default=False, help='interactive debugging') parser.add_option('-n', None, action='store_true', dest='dry_run', default=False, help='''dry run: don't do anything just print what would be done''') parser.add_option('-b', '--begin', action='store', type='int', default=None, dest='begin', help="begin time (seconds since the epoch)") parser.add_option('-e', '--end', action='store', type='int', default=None, dest='end', help="end time (seconds since the epoch)") parser.add_option('-l', '--last', dest='last', action='store', type='int', default=3600, help="set time range to last n seconds") (opts, args) = parser.parse_args(args=argv[1:]) if (opts.begin and not opts.end) or (not opts.begin and opts.end): print "must specify both -b and -e" return 1 if not opts.begin and not opts.end: opts.end = int(time.time()) opts.begin = opts.end - opts.last # Interactive debugging if opts.Debug: import pdb pdb.set_trace() config = get_config(get_config_path()) # db = CASSANDRA_DB(config) db = None return process_devices(opts, args, db)
def test_sys_uptime(self): config = get_config(get_config_path()) q = TestPersistQueue(json.loads(sys_uptime_test_data)) p = CassandraPollPersister(config, "test", persistq=q) p.run() p.db.flush() p.db.close() db = CASSANDRA_DB(config) ret = db.query_raw_data( path=[SNMP_NAMESPACE,'rtr_d','FastPollHC', 'sysUpTime'], freq=30*1000, ts_min=self.ctr.begin*1000, ts_max=self.ctr.end*1000) ret = ret[0] self.assertEqual(ret['ts'], self.ctr.begin * 1000) self.assertEqual(ret['val'], 100)
def espoll(): argv = sys.argv oparse = get_opt_parser(default_config_file=get_config_path()) oparse.usage = "%prog [options] router oidset" (opts, args) = oparse.parse_args(args=argv) if len(args[1:]) != 2: oparse.error("requires router and oidset arguments") device_name, oidset_name = args[1:] django.setup() try: config = get_config(opts.config_file, opts) except ConfigError, e: print e sys.exit(1)
def test_values(self): config = get_config(get_config_path()) db = CASSANDRA_DB(config) ret = db.query_baserate_timerange(path=self.tr.throughput_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end) self.assertEqual(len(ret), self.tr.throughput_len) self.assertEqual(ret[0]['ts'], self.tr.throughput_start_ts) self.assertEqual(ret[0]['val'], self.tr.throughput_start_val) self.assertEqual(ret[-1]['ts'], self.tr.throughput_end_ts) self.assertEqual(ret[-1]['val'], self.tr.throughput_end_val) ret = db.query_baserate_timerange(path=self.tr.packet_dup_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end) self.assertEqual(len(ret), self.tr.packet_dup_len) self.assertEqual(ret[0]['ts'], self.tr.packet_dup_start_ts) self.assertEqual(ret[0]['val'], self.tr.packet_dup_start_val) self.assertEqual(ret[-1]['ts'], self.tr.packet_dup_end_ts) self.assertEqual(ret[-1]['val'], self.tr.packet_dup_end_val) ret = db.query_baserate_timerange(path=self.tr.packet_sent_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end) self.assertEqual(len(ret), self.tr.packet_sent_len) self.assertEqual(ret[0]['ts'], self.tr.packet_sent_start_ts) self.assertEqual(ret[0]['val'], self.tr.packet_sent_start_val) self.assertEqual(ret[-1]['ts'], self.tr.packet_sent_end_ts) self.assertEqual(ret[-1]['val'], self.tr.packet_sent_end_val) ret = db.query_baserate_timerange(path=self.tr.packet_lost_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end) self.assertEqual(len(ret), self.tr.packet_lost_len) self.assertEqual(ret[0]['ts'], self.tr.packet_lost_start_ts) self.assertEqual(ret[0]['val'], self.tr.packet_lost_start_val) self.assertEqual(ret[-1]['ts'], self.tr.packet_lost_end_ts) self.assertEqual(ret[-1]['val'], self.tr.packet_lost_end_val)
def main(): usage = '%prog [ -c col_family | -p pattern_to_find (optional) ]' parser = OptionParser(usage=usage) parser.add_option('-c', '--column', metavar='COLUMN_FAMILY', type='string', dest='column_family', default='raw', help='Column family to dump [raw|rate|aggs|stat] (default=%default).') parser.add_option('-p', '--pattern', metavar='PATTERN', type='string', dest='pattern', default="", help='Optional pattern to look for in keys (uses python string.find()).') parser.add_option('-l', '--limit', metavar='LIMIT', type='int', dest='limit', default=25, help='Limit number of keys dumped since a few generally makes the point (default=%default).') options, args = parser.parse_args() config = get_config(get_config_path()) # config.cassandra_keyspace = 'test_esmond' db = CASSANDRA_DB(config) col_fams = { 'raw': db.raw_data, 'rate': db.rates, 'aggs': db.aggs, 'stat': db.stat_agg } if options.column_family not in col_fams.keys(): print '{0} is not a valid column family selection'.format(options.column_family) parser.print_help() return -1 count = 0 for k in col_fams[options.column_family]._column_family.get_range( column_count=0, filter_empty=False): if count >= options.limit: break if k[0].find(options.pattern) == -1: continue print k[0] count += 1 return
def test_values(self): config = get_config(get_config_path()) db = CASSANDRA_DB(config) ret = db.query_baserate_timerange( path=self.tr.throughput_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end ) self.assertEqual(len(ret), self.tr.throughput_len) self.assertEqual(ret[0]['ts'], self.tr.throughput_start_ts) self.assertEqual(ret[0]['val'], self.tr.throughput_start_val) self.assertEqual(ret[-1]['ts'], self.tr.throughput_end_ts) self.assertEqual(ret[-1]['val'], self.tr.throughput_end_val) ret = db.query_baserate_timerange( path=self.tr.packet_dup_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end ) self.assertEqual(len(ret), self.tr.packet_dup_len) self.assertEqual(ret[0]['ts'], self.tr.packet_dup_start_ts) self.assertEqual(ret[0]['val'], self.tr.packet_dup_start_val) self.assertEqual(ret[-1]['ts'], self.tr.packet_dup_end_ts) self.assertEqual(ret[-1]['val'], self.tr.packet_dup_end_val) ret = db.query_baserate_timerange( path=self.tr.packet_sent_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end ) self.assertEqual(len(ret), self.tr.packet_sent_len) self.assertEqual(ret[0]['ts'], self.tr.packet_sent_start_ts) self.assertEqual(ret[0]['val'], self.tr.packet_sent_start_val) self.assertEqual(ret[-1]['ts'], self.tr.packet_sent_end_ts) self.assertEqual(ret[-1]['val'], self.tr.packet_sent_end_val) ret = db.query_baserate_timerange( path=self.tr.packet_lost_path, ts_min=self.tr.q_start, ts_max=self.tr.q_end ) self.assertEqual(len(ret), self.tr.packet_lost_len) self.assertEqual(ret[0]['ts'], self.tr.packet_lost_start_ts) self.assertEqual(ret[0]['val'], self.tr.packet_lost_start_val) self.assertEqual(ret[-1]['ts'], self.tr.packet_lost_end_ts) self.assertEqual(ret[-1]['val'], self.tr.packet_lost_end_val)
def test_persister(self): """This is a very basic smoke test for a TSDB persister.""" config = get_config(get_config_path()) test_data = json.loads(timeseries_test_data) q = TestPersistQueue(test_data) p = TSDBPollPersister(config, "test", persistq=q) p.run() test_data = json.loads(timeseries_test_data) db = tsdb.TSDB(config.tsdb_root) for pr in test_data: for oid, val in pr['data']: iface = oid.split('/')[-1] path = "%s/%s/%s/%s/" % (pr['device_name'], pr['oidset_name'], pr['oid_name'], iface) v = db.get_var(path) d = v.get(pr['timestamp']) self.assertEqual(val, d.value)
def test_persister_long(self): """Use actual data to test persister""" config = get_config(get_config_path()) # load example data test_data = load_test_data("rtr_d_ifhcin_long.json") q = TestPersistQueue(test_data) p = TSDBPollPersister(config, "test", persistq=q) p.run() test_data = load_test_data("rtr_d_ifhcin_long.json") ts0 = test_data[0]['timestamp'] tsn = test_data[-1]['timestamp'] # make sure it got written to disk as expected db = tsdb.TSDB(config.tsdb_root) paths = [] for pr in test_data: for oid, val in pr['data']: iface = oid.split('/')[-1] path = "%s/%s/%s/%s/" % (pr['device_name'], pr['oidset_name'], pr['oid_name'], iface) if path not in paths: paths.append(path) v = db.get_var(path) d = v.get(pr['timestamp']) self.assertEqual(val, d.value) # check that aggregates were calculated as expected db = tsdb.TSDB(config.tsdb_root) aggs = load_test_data("rtr_d_ifhcin_long_agg.json") for path in paths: p = path + "TSDBAggregates/30" v = db.get_var(p) for d in v.select(begin=ts0, end=tsn): average, delta = aggs[p][str(d.timestamp)] self.assertEqual(d.average, average) self.assertEqual(d.delta, delta) v.close()
def main(): config = get_config(get_config_path()) db = CASSANDRA_DB(config) print 'bogus key, valid time range:', path = ['snmp','rtr_d','FastPollHC','ifHCInOctets','fxp0.0', 'bogus'] print check(db, path, begin, end) print 'valid key, valid time range:', path = ['snmp','rtr_d','FastPollHC','ifHCInOctets','fxp0.0'] print check(db, path, begin, end) print 'valid key path, valid AND invalid range keys:', print check(db, path, begin, end+31557600000) # print check(db, path, begin-31557600000, end) pass
def espersistd(): """Entry point for espersistd. espersistd consists of one PersistenceManager thread and multiple worker sub-processes. """ django.setup() argv = sys.argv oparse = get_opt_parser(default_config_file=get_config_path()) oparse.add_option("-r", "--role", dest="role", default="manager") oparse.add_option("-q", "--queue", dest="qname", default="") oparse.add_option("-n", "--number", dest="number", default="") (opts, args) = oparse.parse_args(args=argv) opts.config_file = os.path.abspath(opts.config_file) try: config = get_config(opts.config_file, opts) except ConfigError, e: print >>sys.stderr, e sys.exit(1)
def main(): config = get_config(get_config_path()) db = CASSANDRA_DB(config) print 'bogus key, valid time range:', path = ['snmp', 'rtr_d', 'FastPollHC', 'ifHCInOctets', 'fxp0.0', 'bogus'] print check(db, path, begin, end) print 'valid key, valid time range:', path = ['snmp', 'rtr_d', 'FastPollHC', 'ifHCInOctets', 'fxp0.0'] print check(db, path, begin, end) print 'valid key path, valid AND invalid range keys:', print check(db, path, begin, end + 31557600000) # print check(db, path, begin-31557600000, end) pass
def __init__(self, keyspace_name, savedb=True): config = get_config(get_config_path()) if not savedb: config.db_clear_on_testing = True self.db = CASSANDRA_DB(config)
def main(): usage = '%prog [ -r NUM | -i NUM | -o NUM | -l NUM | -v ]' usage += '\n\tAmount of data generated ~= r * i * (o * 2) * l' parser = OptionParser(usage=usage) parser.add_option('-r', '--routers', metavar='NUM_ROUTERS', type='int', dest='routers', default=1, help='Number of test "routers" to generate (default=%default).') parser.add_option('-i', '--interfaces', metavar='NUM_INTERFACES', type='int', dest='interfaces', default=2, help='Number of test interfaces to generate on each test router (default=%default).') parser.add_option('-o', '--oidsets', metavar='NUM_OIDSETS', type='int', dest='oidsets', default=2, help='Number of oidsets to assign to each fake device/router (default=%default).') parser.add_option('-l', '--loop', metavar='NUM_LOOPS', type='int', dest='loop', default=1, help='Number of times to send data for each "device (default=%default)."') parser.add_option('-p', '--prefix', metavar='PREFIX', type='string', dest='prefix', default='fake', help='Device name prefix - make new names (default=%default).') parser.add_option('-W', '--write', dest='write', action='store_true', default=False, help='Actually write the data to the memcache queue.') parser.add_option('-v', '--verbose', dest='verbose', action='count', default=False, help='Verbose output - -v, -vv, etc.') options, args = parser.parse_args() router_names = [] for i in range(1,5): for c in string.lowercase: router_names.append(c*i) if options.routers > 26*4: print 'There is an upper bound of {0} fake routers.'.format(26*4) return -1 config = get_config(get_config_path()) qs = TestQueues(config, options.write, options.verbose) oidset_oid = {} oid_count = 0 for oidset in OIDSet.objects.filter(frequency=30)[0:options.oidsets]: if not oidset_oid.has_key(oidset.name): oidset_oid[oidset.name] = [] for oid in oidset.oids.exclude(name='sysUpTime'): oidset_oid[oidset.name].append(oid.name) oid_count += 1 if options.verbose: print 'Using following oidsets/oids for fake devices:' pp.pprint(oidset_oid) loopcount = 0 ts = int(time.time()) val = 100 # 43200 - 12 hrs. 1440 loops - 1/2 day of data print 'Generating {0} data points.'.format( options.loop*options.routers*options.interfaces*oid_count) for iteration in xrange(options.loop): if options.verbose: print 'Loop {0}/{1}'.format(iteration, options.loop) for dn in router_names[0:options.routers]: device_name = '{0}_rtr_{1}'.format(options.prefix, dn) for oidset in oidset_oid.keys(): data = [] for oid in oidset_oid[oidset]: for i in xrange(options.interfaces): interface_name = 'fake_iface_{0}'.format(i) datum = [[oid, interface_name], val] data.append(datum) pr = PollResult( oidset_name=oidset, device_name=device_name, oid_name=oid, timestamp=ts, data=data, metadata={'tsdb_flags': 1} ) if options.verbose > 1: print pr.json() qs.put(pr) ts += 30 val += 50 loopcount += 1 pass
def test_range_baserate_query(self): """ Presumed using test data loaded in previous test method. Shows the three query methods that return json formatted data. """ config = get_config(get_config_path()) db = CASSANDRA_DB(config) start_time = self.ctr.begin*1000 end_time = self.ctr.end*1000 ret = db.query_baserate_timerange( path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'], freq=30*1000, ts_min=start_time, ts_max=end_time ) self.assertEqual(len(ret), self.ctr.expected_results) self.assertEqual(ret[0]['ts'], start_time) self.assertEqual(ret[0]['val'], self.ctr.base_rate_val_first) self.assertEqual(ret[self.ctr.expected_results-1]['ts'], end_time) self.assertEqual(ret[self.ctr.expected_results-1]['val'], self.ctr.base_rate_val_last) ret = db.query_raw_data( path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'], freq=30*1000, ts_min=start_time, ts_max=end_time ) self.assertEqual(len(ret), self.ctr.expected_results - 1) self.assertEqual(ret[0]['ts'], self.ctr.raw_ts_first*1000) self.assertEqual(ret[0]['val'], self.ctr.raw_val_first) self.assertEqual(ret[len(ret)-1]['ts'], self.ctr.raw_ts_last*1000) self.assertEqual(ret[len(ret)-1]['val'], self.ctr.raw_val_last) ret = db.query_aggregation_timerange( path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'], ts_min=start_time - 3600*1000, ts_max=end_time, freq=self.ctr.agg_freq*1000, # required! cf='average', # min | max | average - also required! ) self.assertEqual(ret[0]['cf'], 'average') self.assertEqual(ret[0]['val'], self.ctr.agg_avg) self.assertEqual(ret[0]['ts'], self.ctr.agg_ts*1000) ret = db.query_aggregation_timerange( path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'], ts_min=start_time - 3600*1000, ts_max=end_time, freq=self.ctr.agg_freq*1000, # required! cf='raw', # raw - rarely used ) self.assertEqual(ret[0]['cf'], 'raw') self.assertEqual(ret[0]['val'], self.ctr.agg_raw) self.assertEqual(ret[0]['ts'], self.ctr.agg_ts*1000) return ret = db.query_aggregation_timerange( path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'], ts_min=start_time - 3600*1000, ts_max=end_time, freq=self.ctr.agg_freq*1000, # required! cf='min', # min | max | average - also required! ) self.assertEqual(ret[0]['cf'], 'min') self.assertEqual(ret[0]['val'], self.ctr.agg_min) self.assertEqual(ret[0]['ts'], self.ctr.agg_ts*1000) ret = db.query_aggregation_timerange( path=[SNMP_NAMESPACE,'rtr_d','FastPollHC','ifHCInOctets','fxp0.0'], ts_min=start_time - 3600*1000, ts_max=end_time, freq=self.ctr.agg_freq*1000, # required! cf='max', # min | max | average - also required! ) self.assertEqual(ret[0]['cf'], 'max') self.assertEqual(ret[0]['val'], self.ctr.agg_max) self.assertEqual(ret[0]['ts'], self.ctr.agg_ts*1000) db.close()
def test_persister_heartbeat(self): """Test the hearbeat code""" config = get_config(get_config_path()) freq = 30 iface = 'GigabitEthernet0/1' t0 = 1343953700 t1 = t0 + (4*freq) b0 = t0 - (t0 % freq) b1 = t1 - (t1 % freq) t2 = t1 + 2*freq + (SEEK_BACK_THRESHOLD/1000) b2 = t2 - (t2 % freq) b0 *= 1000 b1 *= 1000 b2 *= 1000 data_template = { 'oidset_name': 'FastPollHC', 'device_name': 'rtr_d', 'oid_name': 'ifHCInOctets', } # with backfill test_data = [] d0 = data_template.copy() d0['timestamp'] = t0 d0['data'] = [[["ifHCInOctets", iface], 0]] test_data.append(d0) d1 = data_template.copy() d1['timestamp'] = t1 d1['data'] = [[["ifHCInOctets", iface], 1000]] test_data.append(d1) # no backfill d2 = data_template.copy() d2['timestamp'] = t2 d2['data'] = [[["ifHCInOctets", iface], 865000]] test_data.append(d2) q = TestPersistQueue(test_data) p = CassandraPollPersister(config, "test", persistq=q) p.run() p.db.flush() p.db.close() p.db.stats.report('all') key = '%s:%s:%s:%s:%s:%s:%s' % ( SNMP_NAMESPACE, data_template['device_name'], data_template['oidset_name'], data_template['oid_name'], iface, freq*1000, datetime.datetime.utcfromtimestamp(t0).year ) db = CASSANDRA_DB(config) rates = ColumnFamily(db.pool, db.rate_cf) backfill = rates.get(key, column_start=b0, column_finish=b1) self.assertEqual(len(backfill), 5) last = backfill[b1] self.assertEqual(last['val'], 166) self.assertEqual(last['is_valid'], 1) nobackfill = rates.get(key, column_start=b1, column_finish=b2) # test no backfill, make sure we don't insert a month of zeros... self.assertEqual(len(nobackfill), 2) self.assertEqual(nobackfill[b1]['is_valid'], 1) self.assertEqual(nobackfill[b1]['val'], 166) self.assertEqual(nobackfill[b2]['is_valid'], 1) self.assertEqual(nobackfill[b2]['val'], 6)
def test_persister_long(self): """Make sure the tsdb and cassandra data match""" config = get_config(get_config_path()) test_data = load_test_data("rtr_d_ifhcin_long.json") # return config.db_clear_on_testing = True config.db_profile_on_testing = True q = TestPersistQueue(test_data) p = CassandraPollPersister(config, "test", persistq=q) p.run() p.db.flush() p.db.close() p.db.stats.report('all') return test_data = load_test_data("rtr_d_ifhcin_long.json") q = TestPersistQueue(test_data) p = TSDBPollPersister(config, "test", persistq=q) p.run() path_levels = [] rtr_d_path = os.path.join(settings.ESMOND_ROOT, "tsdb-data", "rtr_d") for (path, dirs, files) in os.walk(rtr_d_path): if dirs[0] == 'TSDBAggregates': break path_levels.append(dirs) oidsets = path_levels[0] oids = path_levels[1] paths = path_levels[2] full_paths = {} for oidset in oidsets: for oid in oids: for path in paths: full_path = 'rtr_d/%s/%s/%s/TSDBAggregates/30' % \ (oidset, oid, path) if not full_paths.has_key(full_path): full_paths[full_path] = 1 ts_db = tsdb.TSDB(config.tsdb_root) config.db_clear_on_testing = False db = CASSANDRA_DB(config) rates = ColumnFamily(db.pool, db.rate_cf) count_bad = 0 tsdb_aggs = 0 for p in full_paths.keys(): v = ts_db.get_var(p) device,oidset,oid,path,tmp1,tmp2 = p.split('/') path = path.replace("_", "/") for d in v.select(): tsdb_aggs += 1 key = '%s:%s:%s:%s:%s:%s:%s' % \ (SNMP_NAMESPACE, device,oidset,oid,path,int(tmp2)*1000, datetime.datetime.utcfromtimestamp(d.timestamp).year) val = rates.get(key, [d.timestamp*1000])[d.timestamp*1000] if d.flags != ROW_VALID: self.assertLess(val['is_valid'], 2) else: self.assertLessEqual(abs(val['val'] - d.delta), 1.0) self.assertGreater(val['is_valid'], 0) db.close()
def main(argv=sys.argv): """Parse options, output config""" global OPTS prog = os.path.basename(argv[0]) usage = 'usage: %prog device [device]' parser = optparse.OptionParser(usage=usage, version=VERSION) parser.add_option('-D', None, action='store_true', dest='Debug', default=False, help='interactive debugging') parser.add_option( '-n', None, action='store_true', dest='dry_run', default=False, help='''dry run: don't do anything just print what would be done''') parser.add_option('-b', '--begin', action='store', type='int', default=None, dest='begin', help="begin time (seconds since the epoch)") parser.add_option('-e', '--end', action='store', type='int', default=None, dest='end', help="end time (seconds since the epoch)") parser.add_option('-l', '--last', dest='last', action='store', type='int', default=3600, help="set time range to last n seconds") (opts, args) = parser.parse_args(args=argv[1:]) if (opts.begin and not opts.end) or (not opts.begin and opts.end): print "must specify both -b and -e" return 1 if not opts.begin and not opts.end: opts.end = int(time.time()) opts.begin = opts.end - opts.last # Interactive debugging if opts.Debug: import pdb pdb.set_trace() config = get_config(get_config_path()) # db = CASSANDRA_DB(config) db = None return process_devices(opts, args, db)
from esmond.cassandra import KEY_DELIMITER, CASSANDRA_DB, AGG_TYPES, ConnectionException, RawRateData, BaseRateBin, RawData, AggregationBin from esmond.config import get_config_path, get_config from esmond.util import get_logger # # Logger # log = get_logger(__name__) # # Cassandra db connection # try: db = CASSANDRA_DB(get_config(get_config_path())) EVENT_TYPE_CF_MAP = { 'histogram': db.raw_cf, 'integer': db.rate_cf, 'json': db.raw_cf, 'percentage': db.agg_cf, 'subinterval': db.raw_cf, 'float': db.agg_cf } except ConnectionException, e: #try to get a cassandra connection but don't sweat if cant get one now #corrects race condition with cassandra boot and esmond boot db = None def check_connection(): global db
""" Things used by the REST API (api.py) that are also imported by other modules. Reduces the overhead/etc of importing api.py itself. """ from esmond.config import get_config_path, get_config # Prefix used in all the snmp data cassandra keys SNMP_NAMESPACE = 'snmp' # Anon limit configurable in conf/sane default if unset. alim = lambda x: x.api_anon_limit if x.api_anon_limit else 30 ANON_LIMIT = alim(get_config(get_config_path())) # Set up data structure mapping oidsets/oids to REST uri endpoints. class EndpointMap(object): """ The dynamic endpoint map generation has been moved into this class to avoid the map being generated on module import. That could cause conflicts with the test suite loading fixtures and allows getting rid of the old "failover" static dict. Burying execution of the map generation until after the tests have set up the in-memory db makes things happy. """ def __init__(self): self.mapping = None def generate_endpoint_map(self): payload = {} from esmond.api.models import OIDSet
def test_z_throttle(self): ifaces = [ 'xe-7/0/0.0', 'ge-9/1/0', 'xe-1/3/0.911', 'ge-9/1/1.337', 'ge-9/1/2.0', 'xe-1/1/0.65', 'ge-9/0/8', 'xe-0/1/0.0', 'ge-9/1/0.909', 'ge-9/0/5', 'lo0.0', 'ge-9/1/9', 'ge-9/0/2.0', 'ge-9/1/3.0', 'xe-1/2/0', 'xe-0/1/0', 'ge-9/0/2', 'xe-1/3/0', 'ge-9/1/5.0', 'ge-9/1/9.0', 'irb.0', 'ge-9/0/9.1116', 'ge-9/0/7.0', 'ge-9/0/5.0', 'ge-9/0/4.0', 'xe-9/3/0.912', 'ge-9/0/8.0', 'ge-9/0/9.1114', 'xe-0/2/0.16', 'ge-9/1/6', 'ge-9/0/1.0', 'xe-1/1/0', 'ge-9/0/0.66', 'ge-9/1/5', 'ge-9/0/1', 'xe-7/1/0', 'ge-9/1/2', 'xe-0/0/0', 'ge-9/1/1.3003', 'fxp0.0', 'ge-9/0/0', 'lo0', 'ge-9/0/0.44', 'xe-1/2/0.41', 'ge-9/1/1.332', 'ge-9/1/8', 'xe-1/0/0.0', 'xe-9/3/0.916', 'ge-9/1/6.0', 'ge-9/1/4.0', 'ge-9/0/3', 'ge-9/1/1.336', 'ge-9/0/4', 'ge-9/1/1.333', 'xe-1/0/0', 'xe-1/3/0.915', 'xe-8/0/0', 'ge-9/1/0.913', 'ge-9/1/3', 'ge-9/0/6.0', 'ge-9/0/3.0', 'ge-9/1/8.0', 'xe-0/2/0', 'xe-8/0/0.0', 'xe-7/0/0', 'ge-9/0/9', 'ge-9/0/6', 'xe-0/0/0.0', 'ge-9/0/7', 'ge-9/1/1', 'xe-1/1/0.45', 'xe-9/3/0', 'ge-9/1/4', ] devs = [] for i in ifaces: devs.append({'device': 'rtr_d', 'iface': i}) payload = { 'interfaces': devs, 'endpoint': ['in', 'out'], 'cf': 'average', 'begin': self.ctr.begin, 'end': self.ctr.end } config = get_config(get_config_path()) # This assertion will trigger if the api_anon_limit is set # higher than the number of requests that are about to be # generated. The default is usually around 30 and this will # generate somewhere in the neighborhood of 150 different # queries and should trigger the throttling. self.assertLessEqual(ANON_LIMIT, len(ifaces)*len(payload['endpoint'])) # Make a request the bulk endpoint will throttle for too many # queries w/out auth. response = self.api_client.post('/v1/bulk/interface/', data=payload, format='json') self.assertEquals(response.status_code, 401) # Make the same request with authentication. authn = self.create_apikey(self.td.user_admin.username, self.td.user_admin_apikey.key) response = self.api_client.post('/v1/bulk/interface/', data=payload, format='json', authentication=authn) self.assertEquals(response.status_code, 201) # not 200! # Make a bunch of requests to make sure that the throttling # code kicks in. params = { 'begin': self.ctr.begin-3600, # back an hour to get agg bin. 'end': self.ctr.end, 'agg': self.ctr.agg_freq } url = '/v1/device/rtr_d/interface/fxp0.0/in' response = self.client.get(url, params) loops = 5 # leave a little overhead if not config.api_throttle_at: loops += 150 # tastypie default else: loops += config.api_throttle_at # Make looping requests looking for the 429 throttle return code. # Leave a couple of extra loops as margin of error, but break # out if no 429 received so it doesn't go into the loop of death. rcount = 1 got_429 = False while rcount < loops: response = self.client.get(url, params) if response.status_code == 429: got_429 = True break rcount += 1 self.assertEqual(got_429, True) pass
def main(): #Parse command-line opts parser = argparse.ArgumentParser(description="Remove old data and metadata based on a configuration file") parser.add_argument('-c', '--config', metavar='CONFIG', nargs=1, dest='config', default=DEFAULT_CONFIG_FILE, help='Configuration file location(default=%default).') parser.add_argument('-s', '--start', metavar='START', nargs=1, dest='start', default=None, help='Start looking for expired record at given time as unix timestamp. Default is current time.') parser.add_argument('-t', '--time-chunk', metavar='TIME_CHUNK', nargs=1, dest='time_chunk', default=[DEFAULT_MAX_TIME_CHUNK], type=int, help='The amount of data to look at each query in seconds. Defaults to {0}'.format(DEFAULT_MAX_TIME_CHUNK)) parser.add_argument('-m', '--max-misses', metavar='MAX_MISSES', nargs=1, dest='max_misses', default=[DEFAULT_MAX_MISSES], type=int, help='The maximum number of time chunks with no data before giving up. Defaults to {0}'.format(DEFAULT_MAX_MISSES)) args = parser.parse_args() #parse args expire_start = None if args.start: expire_start = dateutil.parser.parse(args.start[0]) #init django django.setup() #Connect to DB db = CASSANDRA_DB(get_config(get_config_path()), timeout=60) #read config file policies = {} json_file_data = open(args.config[0]) config = json.load(json_file_data) if 'policies' not in config: raise RuntimeError("Invalid JSON config. Missing required top-level 'policies' object") for p in config['policies']: i = 0 policies_builder = policies for req in POLICY_MATCH_FIELD_DEFS: i += 1 if req['name'] not in p: raise RuntimeError("Invalid policy in polcies list at position %d. Missing required field %s." % (i,req['name'])) val = p[req['name']] if val not in req['special_vals']: req['type'](val) if (req['valid_vals'] is not None) and (val not in req['valid_vals']): raise RuntimeError("Invalid policy in polcies list at position %d. Invalid value %s for %s. Allowed values are %s." % (i,val, req['name'], req['valid_vals'])) if val not in policies_builder: policies_builder[val] = {} policies_builder = policies_builder[val] build_policy_action(p, policies_builder) #Clean out data from cassandra metadata_counts = {} for et in PSEventTypes.objects.all(): #determine policy policy = get_policy(et, policies, [v['name'] for v in POLICY_MATCH_FIELD_DEFS], 0) if policy is None: print "Unable to find matching policy for %s:%s\n" % (et.metadata, et) continue #determine expire time if str(policy['expire']).lower() == 'never': continue expire_time = datetime_to_ts(datetime.utcnow() - timedelta(days=int(policy['expire']))) #handle command-line option if expire_start is not None: expire_start_ts = datetime_to_ts(expire_start) if expire_start_ts <= expire_time: expire_time =expire_start_ts else: #non-binding expire so skip continue #check metadata md_key = et.metadata.metadata_key if md_key not in metadata_counts: metadata_counts[md_key] = {"expired": 0, "total":0, "obj": et.metadata} metadata_counts[md_key]['total'] += 1 if et.time_updated is None: metadata_counts[md_key]['expired'] += 1 elif datetime_to_ts(et.time_updated) <= expire_time: metadata_counts[md_key]['expired'] += 1 expire_time = datetime_to_ts(et.time_updated) #Some datasets timeout if dataset is too large. in this case grab chunks begin_time = expire_time - args.time_chunk[0] end_time = expire_time misses = 0 while misses < args.max_misses[0]: if begin_time == 0: #only run one time after seeing begin_time of 0 misses = args.max_misses[0] elif begin_time < 0: #make sure begin_time is not below 0 begin_time = 0 misses = args.max_misses[0] #query data to delete try: (expired_data, cf, datapath) = query_data(db, et.metadata.metadata_key, et.event_type, et.summary_type, et.summary_window, begin_time, end_time) except Exception as e: print "Query error for metadata_key=%s, event_type=%s, summary_type=%s, summary_window=%s, begin_time=%s, end_time=%s, error=%s" % (md_key, et.event_type, et.summary_type, et.summary_window, begin_time, end_time, e) break #adjust begin_time end_time = begin_time begin_time = begin_time - args.time_chunk[0] #check if we got any data if len(expired_data) == 0: misses += 1 continue #delete data for expired_col in expired_data: year = datetime.utcfromtimestamp(float(expired_col['ts'])/1000.0).year row_key = get_rowkey(datapath, et.summary_window, year) try: cf.remove(row_key, [expired_col['ts']]) except Exception as e: sys.stderr.write("Error deleting {0}: {1}\n".format(row_key, e)) print "Sending request to delete %d rows for metadata_key=%s, event_type=%s, summary_type=%s, summary_window=%s" % (len(expired_data), md_key, et.event_type, et.summary_type, et.summary_window) try: cf.send() except Exception as e: sys.stderr.write("Error sending delete: {0}".format(e)) print "Deleted %d rows for metadata_key=%s, event_type=%s, summary_type=%s, summary_window=%s" % (len(expired_data), md_key, et.event_type, et.summary_type, et.summary_window) #Clean out metadata from relational database for md_key in metadata_counts: if metadata_counts[md_key]['total'] == metadata_counts[md_key]['expired']: metadata_counts[md_key]['obj'].delete() print "Deleted metadata %s" % md_key
""" Things used by the REST API (api.py) that are also imported by other modules. Reduces the overhead/etc of importing api.py itself. """ from esmond.config import get_config_path, get_config # Prefix used in all the snmp data cassandra keys SNMP_NAMESPACE = 'snmp' # Anon limit configurable in conf/sane default if unset. alim = lambda x: x.api_anon_limit if x.api_anon_limit else 30 ANON_LIMIT = alim(get_config(get_config_path())) # Set up data structure mapping oidsets/oids to REST uri endpoints. class EndpointMap(object): """ The dynamic endpoint map generation has been moved into this class to avoid the map being generated on module import. That could cause conflicts with the test suite loading fixtures and allows getting rid of the old "failover" static dict. Burying execution of the map generation until after the tests have set up the in-memory db makes things happy. """ def __init__(self): self.mapping = None def generate_endpoint_map(self): payload = {} from esmond.api.models import OIDSet for oidset in OIDSet.objects.all().order_by('name'):
def main(): #Parse command-line opts parser = argparse.ArgumentParser( description="Remove old data and metadata based on a configuration file" ) parser.add_argument('-c', '--config', metavar='CONFIG', nargs=1, dest='config', default=DEFAULT_CONFIG_FILE, help='Configuration file location(default=%default).') parser.add_argument( '-s', '--start', metavar='START', nargs=1, dest='start', default=None, help= 'Start looking for expired record at given time as unix timestamp. Default is current time.' ) parser.add_argument( '-t', '--time-chunk', metavar='TIME_CHUNK', nargs=1, dest='time_chunk', default=[DEFAULT_MAX_TIME_CHUNK], type=int, help= 'The amount of data to look at each query in seconds. Defaults to {0}'. format(DEFAULT_MAX_TIME_CHUNK)) parser.add_argument( '-m', '--max-misses', metavar='MAX_MISSES', nargs=1, dest='max_misses', default=[DEFAULT_MAX_MISSES], type=int, help= 'The maximum number of time chunks with no data before giving up. Defaults to {0}' .format(DEFAULT_MAX_MISSES)) args = parser.parse_args() #parse args expire_start = None if args.start: expire_start = dateutil.parser.parse(args.start[0]) #init django django.setup() #Connect to DB db = CASSANDRA_DB(get_config(get_config_path()), timeout=60) #read config file policies = {} json_file_data = open(args.config[0]) config = json.load(json_file_data) if 'policies' not in config: raise RuntimeError( "Invalid JSON config. Missing required top-level 'policies' object" ) for p in config['policies']: i = 0 policies_builder = policies for req in POLICY_MATCH_FIELD_DEFS: i += 1 if req['name'] not in p: raise RuntimeError( "Invalid policy in polcies list at position %d. Missing required field %s." % (i, req['name'])) val = p[req['name']] if val not in req['special_vals']: req['type'](val) if (req['valid_vals'] is not None) and (val not in req['valid_vals']): raise RuntimeError( "Invalid policy in polcies list at position %d. Invalid value %s for %s. Allowed values are %s." % (i, val, req['name'], req['valid_vals'])) if val not in policies_builder: policies_builder[val] = {} policies_builder = policies_builder[val] build_policy_action(p, policies_builder) #Clean out data from cassandra metadata_counts = {} for et in PSEventTypes.objects.all(): #determine policy policy = get_policy(et, policies, [v['name'] for v in POLICY_MATCH_FIELD_DEFS], 0) if policy is None: print "Unable to find matching policy for %s:%s\n" % (et.metadata, et) continue #determine expire time if str(policy['expire']).lower() == 'never': continue expire_time = datetime_to_ts(datetime.utcnow() - timedelta(days=int(policy['expire']))) #handle command-line option if expire_start is not None: expire_start_ts = datetime_to_ts(expire_start) if expire_start_ts <= expire_time: expire_time = expire_start_ts else: #non-binding expire so skip continue #check metadata md_key = et.metadata.metadata_key if md_key not in metadata_counts: metadata_counts[md_key] = { "expired": 0, "total": 0, "obj": et.metadata } metadata_counts[md_key]['total'] += 1 if et.time_updated is None: metadata_counts[md_key]['expired'] += 1 elif datetime_to_ts(et.time_updated) <= expire_time: metadata_counts[md_key]['expired'] += 1 expire_time = datetime_to_ts(et.time_updated) #Some datasets timeout if dataset is too large. in this case grab chunks begin_time = expire_time - args.time_chunk[0] end_time = expire_time misses = 0 while misses < args.max_misses[0]: if begin_time == 0: #only run one time after seeing begin_time of 0 misses = args.max_misses[0] elif begin_time < 0: #make sure begin_time is not below 0 begin_time = 0 misses = args.max_misses[0] #query data to delete try: (expired_data, cf, datapath) = query_data( db, et.metadata.metadata_key, et.event_type, et.summary_type, et.summary_window, begin_time, end_time) except Exception as e: print "Query error for metadata_key=%s, event_type=%s, summary_type=%s, summary_window=%s, begin_time=%s, end_time=%s, error=%s" % ( md_key, et.event_type, et.summary_type, et.summary_window, begin_time, end_time, e) break #adjust begin_time end_time = begin_time begin_time = begin_time - args.time_chunk[0] #check if we got any data if len(expired_data) == 0: misses += 1 continue #delete data for expired_col in expired_data: year = datetime.utcfromtimestamp( float(expired_col['ts']) / 1000.0).year row_key = get_rowkey(datapath, et.summary_window, year) try: cf.remove(row_key, [expired_col['ts']]) except Exception as e: sys.stderr.write("Error deleting {0}: {1}\n".format( row_key, e)) print "Sending request to delete %d rows for metadata_key=%s, event_type=%s, summary_type=%s, summary_window=%s" % ( len(expired_data), md_key, et.event_type, et.summary_type, et.summary_window) try: cf.send() except Exception as e: sys.stderr.write("Error sending delete: {0}".format(e)) print "Deleted %d rows for metadata_key=%s, event_type=%s, summary_type=%s, summary_window=%s" % ( len(expired_data), md_key, et.event_type, et.summary_type, et.summary_window) #Clean out metadata from relational database for md_key in metadata_counts: if metadata_counts[md_key]['total'] == metadata_counts[md_key][ 'expired']: metadata_counts[md_key]['obj'].delete() print "Deleted metadata %s" % md_key
def handle(self, *args, **options): print 'Initializing cassandra esmond keyspace' config = get_config(get_config_path()) db = CASSANDRA_DB(config)
def handle(self, *args, **options): print 'Dropping and re-initializing cassandra esmond keyspace' config = get_config(get_config_path()) config.db_clear_on_testing = True db = CASSANDRA_DB(config)
def generate_or_update_gap_inventory(limit=0, threshold=0, verbose=False): db = CASSANDRA_DB(get_config(get_config_path())) gap_duration_lower_bound = datetime.timedelta(seconds=threshold) if limit: row_inventory = Inventory.objects.filter( scan_complete=False).order_by('row_key')[:limit] else: row_inventory = Inventory.objects.filter( scan_complete=False).order_by('row_key') count = 1 inv_count = len(row_inventory) for entry in row_inventory: print entry if verbose: print ' *', entry.start_time, ts_epoch(entry.start_time) print ' *', entry.end_time, ts_epoch(entry.end_time) print ' * inventory item # {0}/{1}'.format(count, inv_count) count += 1 # Check for valid timestamps if entry.start_time > entry.end_time: print ' * Bad start/end times!' entry.issues = 'ifref start_time/end_time mismatch' entry.save() continue ts_start = ts_epoch(entry.start_time) ts_end = ts_epoch(entry.end_time) # If end_time of current row is in the # future (ie: probably when run on the row of the # current year), adjust end time arg to an hour ago. # Use this in both the query and when setting up fill # boundaries. # # Will also be setting last_scan_point to that # value in the main inventory table. future_end_time = False if ts_end > int(time.time()): future_end_time = True # fit it to a bin ts_end = (int(time.time() - 3600) / entry.frequency) * entry.frequency # if last scan point is set, adjust the start time to that if entry.last_scan_point != None: print ' * setting start to last scan point' ts_start = ts_epoch(entry.last_scan_point) path = _split_rowkey(entry.row_key)[0:5] if sig_handler.interrupted: print 'shutting down' break if entry.get_column_family_display() == 'base_rates': data = db.query_baserate_timerange(path=path, freq=entry.frequency * 1000, ts_min=ts_start * 1000, ts_max=ts_end * 1000) else: # XXX(mmg): figure out what data is being stored # in the raw data cf and process accordingly. print ' * not processing' continue if data: entry.data_found = True print ' * data found' # Format the data payload (transform ms timestamps back # to seconds and set is_valid = 0 values to None) and # build a filled series over the query range out of # the returned data. formatted_data = QueryUtil.format_data_payload(data) filled_data = Fill.verify_fill(ts_start, ts_end, entry.frequency, formatted_data) gaps = find_gaps_in_series(filled_data) # Lots of data being passed around, so explicitly clear # the lists. del filled_data[:] del formatted_data[:] del data[:] if sig_handler.interrupted: print 'shutting down' break for gap in gaps: g_start = make_aware(datetime.datetime.utcfromtimestamp(gap[0]), utc) g_end = make_aware(datetime.datetime.utcfromtimestamp(gap[1]), utc) # Skip gaps too small to be considered gaps if g_end - g_start < gap_duration_lower_bound: continue if verbose: print ' * gap' print ' *', g_start print ' *', g_end print ' * dur: ', g_end - g_start # See if there is already an existing gap ending on the # current last_scan_point. If so just "extend" the existing # gap (as long as it hasn't been processed) with up to date # information rather than creating a new gap entry. # # This prevents subsequent scans during the current year # from creating a bunch of gap_inventory entries for # a prolonged gap/inactive interface. g = None try: g = GapInventory.objects.get(row=entry, end_time=entry.last_scan_point, processed=False) except ObjectDoesNotExist: pass if g: if verbose: print ' * update gap' g.end_time = g_end else: if verbose: print ' * new gap' g = GapInventory(row=entry, start_time=g_start, end_time=g_end) g.save() if verbose: print ' * +++' if future_end_time: # Current year, keep our spot entry.last_scan_point = make_aware( datetime.datetime.utcfromtimestamp(ts_end), utc) else: # Previous year, mark the row as processed entry.last_scan_point = entry.end_time entry.scan_complete = True entry.save() # explicitly clear gaps list just in case and issue # the djanjo reset so as to no leak memory in the # form of saved queries. del gaps[:] django_db.reset_queries() if verbose: print '=======' if sig_handler.interrupted: print 'shutting down' break pass
def generate_or_update_gap_inventory(limit=0, threshold=0, verbose=False): db = CASSANDRA_DB(get_config(get_config_path())) gap_duration_lower_bound = datetime.timedelta(seconds=threshold) if limit: row_inventory = Inventory.objects.filter(scan_complete=False).order_by('row_key')[:limit] else: row_inventory = Inventory.objects.filter(scan_complete=False).order_by('row_key') count = 1 inv_count = len(row_inventory) for entry in row_inventory: print entry if verbose: print ' *', entry.start_time, ts_epoch(entry.start_time) print ' *', entry.end_time, ts_epoch(entry.end_time) print ' * inventory item # {0}/{1}'.format(count, inv_count) count += 1 # Check for valid timestamps if entry.start_time > entry.end_time: print ' * Bad start/end times!' entry.issues = 'ifref start_time/end_time mismatch' entry.save() continue ts_start = ts_epoch(entry.start_time) ts_end = ts_epoch(entry.end_time) # If end_time of current row is in the # future (ie: probably when run on the row of the # current year), adjust end time arg to an hour ago. # Use this in both the query and when setting up fill # boundaries. # # Will also be setting last_scan_point to that # value in the main inventory table. future_end_time = False if ts_end > int(time.time()): future_end_time = True # fit it to a bin ts_end = (int(time.time()-3600)/entry.frequency)*entry.frequency # if last scan point is set, adjust the start time to that if entry.last_scan_point != None: print ' * setting start to last scan point' ts_start = ts_epoch(entry.last_scan_point) path = _split_rowkey(entry.row_key)[0:5] if sig_handler.interrupted: print 'shutting down' break if entry.get_column_family_display() == 'base_rates': data = db.query_baserate_timerange(path=path, freq=entry.frequency*1000, ts_min=ts_start*1000, ts_max=ts_end*1000) else: # XXX(mmg): figure out what data is being stored # in the raw data cf and process accordingly. print ' * not processing' continue if data: entry.data_found = True print ' * data found' # Format the data payload (transform ms timestamps back # to seconds and set is_valid = 0 values to None) and # build a filled series over the query range out of # the returned data. formatted_data = QueryUtil.format_data_payload(data) filled_data = Fill.verify_fill(ts_start, ts_end, entry.frequency, formatted_data) gaps = find_gaps_in_series(filled_data) # Lots of data being passed around, so explicitly clear # the lists. del filled_data[:] del formatted_data[:] del data[:] if sig_handler.interrupted: print 'shutting down' break for gap in gaps: g_start = make_aware(datetime.datetime.utcfromtimestamp(gap[0]), utc) g_end = make_aware(datetime.datetime.utcfromtimestamp(gap[1]), utc) # Skip gaps too small to be considered gaps if g_end - g_start < gap_duration_lower_bound: continue if verbose: print ' * gap' print ' *', g_start print ' *', g_end print ' * dur: ', g_end - g_start # See if there is already an existing gap ending on the # current last_scan_point. If so just "extend" the existing # gap (as long as it hasn't been processed) with up to date # information rather than creating a new gap entry. # # This prevents subsequent scans during the current year # from creating a bunch of gap_inventory entries for # a prolonged gap/inactive interface. g = None try: g = GapInventory.objects.get(row=entry, end_time=entry.last_scan_point, processed=False) except ObjectDoesNotExist: pass if g: if verbose: print ' * update gap' g.end_time = g_end else: if verbose: print ' * new gap' g = GapInventory(row=entry, start_time=g_start, end_time=g_end) g.save() if verbose: print ' * +++' if future_end_time: # Current year, keep our spot entry.last_scan_point = make_aware(datetime.datetime.utcfromtimestamp(ts_end), utc) else: # Previous year, mark the row as processed entry.last_scan_point = entry.end_time entry.scan_complete = True entry.save() # explicitly clear gaps list just in case and issue # the djanjo reset so as to no leak memory in the # form of saved queries. del gaps[:] django_db.reset_queries() if verbose: print '=======' if sig_handler.interrupted: print 'shutting down' break pass