def __verify(self, lvl, _id, failover=False, empty=False): prdcr = LVX_prdcr(lvl, _id) log.info("Verifying %s" % prdcr) port = LVX_port(lvl, _id) x = ldms.LDMS_xprt_new(XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", str(port)) DEBUG.x = x DEBUG.rc = rc assert (rc == 0) s0 = set() N = 2**lvl off = N * _id if empty: s0 = set() else: s0 = set([LVX_prdcr(0, i) + "/" + s \ for i in range(off, off + N) \ for s in ["meminfo", "vmstat"] ]) if failover: off = N * (_id ^ 1) s0.update([LVX_prdcr(0, i) + "/" + s \ for i in range(off, off + N) \ for s in ["meminfo", "vmstat"] ]) dirs = ldms.LDMS_xprt_dir(x) s1 = set(dirs) DEBUG.s0 = s0 DEBUG.s1 = s1 msg = "ldmsd (%d, %d) verification failed, expecting %s, but got %s" % ( lvl, _id, str(s0), str(s1)) self.assertEqual(s0, s1, msg)
def test_01_verify(self): """Verify data in the storage""" x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", self.SMP_PORT) if rc: log.error("rc: %d" % rc) assert(rc == 0) dlist = ldms.LDMS_xprt_dir(x) _sets = [] log.info("Looking up sets") self.assertEqual(len(dlist), 1) s = ldms.LDMS_xprt_lookup(x, dlist[0], 0) log.info("Collecting data from LDMS for comparison") data = set() for i in range(0, 10): # update first s.update() l = ldms_set_as_tuple(s) data.add(l) time.sleep(1) time.sleep(1) # to make sure that the last data point has been stored log.info("Verifying...") csv_data = LdmsCsv("test_store_csv/csv") csv_data = set(csv_data) self.assertLessEqual(data, csv_data)
def test_01_verify(self): """Verify data in the storage""" x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", self.SMP_PORT) if rc: log.error("rc: %d" % rc) assert (rc == 0) dlist = ldms.LDMS_xprt_dir(x) self.assertEqual(len(dlist), 1) log.info("Looking up sets") _set = ldms.LDMS_xprt_lookup(x, dlist[0], 0) assert (_set) log.info("Collecting data from LDMS for comparison") data = [] for i in range(0, 10): # update first _set.update() d = ldms_set_as_dict(_set) data.append(d) time.sleep(1) time.sleep(1) # to make sure that the last data point has been stored log.info("Verifying...") keys = data[0].keys() for d in data: self.assertEqual(set(keys), set(d.keys())) for d in self.amqp_sink.data: self.assertEqual(set(keys), set(d.keys())) data = set(tuple_from_dict(d, keys) for d in data) amqp_data = set(tuple_from_dict(d, keys) for d in self.amqp_sink.data) self.assertGreater(len(data), 0) self.assertLessEqual(data, amqp_data)
def __init__(self, host, port, xprt, max_recv_len = MAX_RECV_LEN, auth=None, auth_opt=None): try: from ovis_ldms import ldms except: raise ImportError("Failed to import ovis_ldms.ldms. " "Please make sure that ldms is built with --enable-swig") else: self.ldms_module = ldms if xprt is None: raise ArgumentError("xprt is required to create an LDMS transport") self.socket = None self.host = host self.port = port self.xprt = xprt self.state = "INIT" if auth: self.ldms = ldms.LDMS_xprt_new_with_auth(self.xprt, auth, auth_opt) else: self.ldms = ldms.LDMS_xprt_new(self.xprt) if not self.ldms: raise ValueError("Failed to create LDMS transport") self.state = "NEW" self.max_recv_len = self.ldms.msg_max_get() self.rc = ldms.LDMS_xprt_connect_by_name(self.ldms, self.host, str(self.port)) if self.rc != 0: raise RuntimeError("Failed to connect to ldmsd. %s" % (self.ldms.event_errcode2str(self.rc))) self.type = "inband" self.state = "CONNECTED"
def test_01_verify(self): """Verify data in the storage""" x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", self.SMP_PORT) if rc: log.error("rc: %d" % rc) assert (rc == 0) dlist = ldms.LDMS_xprt_dir(x) _sets = [] log.info("Looking up sets") for name in dlist: s = ldms.LDMS_xprt_lookup(x, name, 0) assert (s) _sets.append(s) log.info("Collecting data from LDMS for comparison") data = set() for i in range(0, 10): # update first for s in _sets: s.update() for s in _sets: l = ldms_set_as_tuple(s, with_ts=True) data.add(l) dlen = len(l) time.sleep(1) time.sleep(1) # to make sure that the last data point has been stored log.info("Verifying...") rf = ResultFile(self.STORE_PATH) # Verify the computed results rf.verify() # Now, verify that the stored raw is good names = [s.metric_name_get(k) for k, v in s.iter_items()] names = ["#Time"] + names csv_data = set(r.as_tuple(names) for r in rf) self.assertLessEqual(data, csv_data)
def test_00_verify_cfg(self): """Verify sampler config, cmd-expand only env command""" host = socket.gethostname() xprt = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", "10001") self.assertEqual(rc, 0) dir_resp = ldms.LDMS_xprt_dir(xprt) self.assertEqual(dir_resp, [host + "/$(whoami)/meminfo"])
def __init__(self, port, xprt="sock", hostname="localhost"): self.xprt = ldms.LDMS_xprt_new(xprt) rc = ldms.LDMS_xprt_connect_by_name(self.xprt, hostname, port) assert (rc == 0) self.sets = [] self._dict = {} _dirs = ldms.LDMS_xprt_dir(self.xprt) for d in _dirs: s = ldms.LDMS_xprt_lookup(self.xprt, d, ldms.LDMS_LOOKUP_BY_INSTANCE) self.sets.append(s) self._dict[d] = s
def ldms_try_connect(host, port, xprt="sock", timeout=1): t0 = t = time.time() while t - t0 < timeout: x = ldms.LDMS_xprt_new(xprt) try: x.connectByName(str(host), str(port)) except: pass # just try again ... else: # connect success return x t = time.time() raise RuntimeError("ldms_try_connect timeout (%d sec)" % timeout)
def setUpClass(self): self.expt = None self.samp = None self.slurm_set = None # the `slurm` set self.mem_set = None # the `mem` (meminfo) set self.conn = None # ldms connection ldms.ldms_init(512*1024*1024) # 512MB should suffice try: cfg = """\ load name=slurm plugin=slurm_sampler config name=slurm instance=slurm stream=slurm \ job_count=%(JOB_SIZE)d task_count=%(TASK_SIZE)d load name=mem plugin=meminfo config name=mem job_set=slurm instance=mem smplr_add name=mem_smplr instance=mem interval=1000000 offset=0 smplr_start name=mem_smplr """ % { k: getattr(self, k) for k in dir(self) } self.samp = LDMSD(port=self.SAMP_PORT, cfg = cfg, logfile = self.SAMP_LOG, gdb_port = self.SAMP_GDB_PORT) D.samp = self.samp log.info("Starting sampler") self.samp.run() self.conn = ldms.LDMS_xprt_new(self.XPRT) D.conn = self.conn self.conn.connectByName("localhost", self.SAMP_PORT) self.slurm_set = self.conn.lookupSet("slurm", 0) D.slurm_set = self.slurm_set self.mem_set = self.conn.lookupSet("mem", 0) D.mem_set = self.mem_set expt = { "component_id" : [0L] * self.JOB_SIZE, "job_id" : [0L] * self.JOB_SIZE, "app_id" : [0L] * self.JOB_SIZE, "current_slot" : 0L, "job_state" : [0L] * self.JOB_SIZE, "job_tstamp" : [0L] * self.JOB_SIZE, "job_size" : [0L] * self.JOB_SIZE, "job_uid" : [0L] * self.JOB_SIZE, "job_gid" : [0L] * self.JOB_SIZE, "job_start" : [0L] * self.JOB_SIZE, "job_end" : [0L] * self.JOB_SIZE, "node_count" : [0L] * self.JOB_SIZE, "task_count" : [0L] * self.JOB_SIZE, } task_keys = ["task_pid", "task_rank", "task_exit_status"]
def test_tsampler(self): time.sleep(1) # wait for sampler to populate the set x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", self.SMP_PORT) self.assertEqual(rc, 0) s = ldms.LDMS_xprt_lookup(x, "smp/hfclock", 0) s.update() clk = s["clock"] DEBUG.clk = clk tmp = s["clock_timeval"] clk_tv = [ tmp[2*i] + tmp[2*i+1]*1e-6 for i in range(0, len(tmp)/2) ] DEBUG.clk_tv = clk_tv for (a, b) in zip(clk, clk_tv): self.assertGreater(a, 0) self.assertLess(abs(a-b), 0.001)
def test_000(self): """Verify that the agg collects from both smp0 and smp1""" x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", str(self.AGG_PORT_BASE)) self.assertEqual(rc, 0) dirs = ldms.LDMS_xprt_dir(x) self.assertEqual( set(dirs), set(["smp%d/meminfo" % i for i in range(0, self.SMP_NUM)])) sets = {d: ldms.LDMS_xprt_lookup(x, d, 0) for d in dirs} for k, s in sets.iteritems(): s.update() grp = re.match(r"smp(\d+)/meminfo", k).groups() comp_id = int(grp[0]) self.assertEqual(s['component_id'], comp_id) ts = s.ts_get() self.assertGreater(ts.sec, 0) pass
def test_01_req_noexp(self): """Request over xprt shall not be command-expanded""" ctrl = LDMSD_Controller(port=self.SMP_PORT, xprt=self.XPRT) ctrl.run() ctrl.read_pty() ctrl.write_pty("env X=$(hostname)\n") ctrl.write_pty("load name=vmstat\n") ctrl.write_pty("config name=vmstat producer=${X} \ instance=${X}/vmstat\ schema=vmstat\n") ctrl.write_pty("start name=vmstat interval=1000000 offset=0\n") time.sleep(0.2) host = socket.gethostname() xprt = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", "10001") self.assertEqual(rc, 0) dir_resp = ldms.LDMS_xprt_dir(xprt) dir_resp.sort() expected = [host + "/$(whoami)/meminfo", "$(hostname)/vmstat"] expected.sort() self.assertEqual(dir_resp, expected)
def test_update(self): x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", self.SMP_PORT) self.assertEqual(rc, 0) s = ldms.LDMS_xprt_lookup(x, self.SET_NAME, 0) time.sleep(self.UPD_INT * uS) log.info("First update ...") ts_list0 = self.__update(s) time.sleep(self.UPD_INT * uS) log.info("Second update ...") ts_list1 = self.__update(s) ts_list = ts_list0 + ts_list1 DEBUG.ts_list = ts_list DEBUG.ts_list0 = ts_list0 DEBUG.ts_list1 = ts_list1 log.info("Verifying data ...") for a, b in zip(ts_list, ts_list[1:]): DEBUG.a = a DEBUG.b = b self.assertLess(a, b) d = b - a self.assertLess(abs(d - self.SMP_INT * uS), 0.001) log.info("%d data timestamps verified" % len(ts_list)) pass
def _get_sets(self, port): x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", port) _dirs = ldms.LDMS_xprt_dir(x) for d in _dirs: pass