def __init__(self, settings): self.session = requests.Session() self.cloud = settings.cloud self.cloud_enabled = self.cloud['enabled'] if self.cloud_enabled: self.session = self.cloud["cloud_rest"] self.interval = settings.interval self.cluster = settings.cluster self.master_node = settings.master_node self.auth = (settings.rest_username, settings.rest_password) self.buckets = settings.buckets self.indexes = settings.indexes self.collections = settings.collections self.hostnames = settings.hostnames self.workers = settings.workers self.n2n_enabled = settings.is_n2n self.nodes = list(self.get_nodes()) self.ssh_username = getattr(settings, 'ssh_username', None) self.ssh_password = getattr(settings, 'ssh_password', None) self.store = PerfStore(settings.cbmonitor_host) self.mc = MetadataClient(settings) self.metrics = set() self.updater = None
def __init__(self, settings, cluster_spec: ClusterSpec, test_config: TestConfig): self.cluster_spec = cluster_spec self.test_config = test_config self.mc = MetadataClient(settings) self.store = PerfStore(settings.cbmonitor_host) self.workload_setting = PhaseSettings self.interval = self.MAX_SAMPLING_INTERVAL self.cluster = settings.cluster self.clients = [] self.cb_host = self.cluster_spec.servers[int(self.test_config.nodes)] self.sg_host = next(self.cluster_spec.masters) src_client = new_client(host=self.cb_host, bucket='bucket-1', password='******', timeout=self.TIMEOUT) self.clients.append(('bucket-1', src_client)) self.new_docs = Document(1024)
def add_snapshot(self, phase, ts_from, ts_to): for i, cluster in enumerate(self.clusters, start=1): snapshot = '{}_{}'.format(cluster, phase) self.settings.cluster = cluster md_client = MetadataClient(self.settings) md_client.add_snapshot(snapshot, ts_from, ts_to) self.snapshots.append(snapshot) self.trigger_reports(snapshot)
def add_snapshots(self): self.test.cbmonitor_snapshots = [] for cluster_id in self.test.cbmonitor_clusters: self.settings.cluster = cluster_id md_client = MetadataClient(self.settings) md_client.add_snapshot(cluster_id) self.trigger_report(cluster_id) self.test.cbmonitor_snapshots.append(cluster_id)
def __init__(self, settings): self.interval = settings.interval self.cluster = settings.cluster self.store = SerieslyStore(settings.seriesly_host) self.mc = MetadataClient(settings) self.nodes = settings.nodes self.stats_api = "http://{}:4985/_stats" self.prev_pause_total = None
def cluster_stats(self, cluster: str) -> Iterator[dict]: m = MetadataClient(settings=StatsSettings(cluster, CBMONITOR_HOST)) for metric in m.get_metrics(): db = self.ps.build_dbname(cluster=cluster, collector=metric['collector']) summary = self.get_summary(db=db, metric=metric['name']) if summary: yield { 'metric': metric['name'], 'summary': summary, }
def __init__(self, settings): self.cluster = settings.cluster self.master_node = settings.master_node self.auth = (settings.rest_username, settings.rest_password) self.nodes = list(self._get_nodes()) self.store = SerieslyStore(settings.seriesly_host) self.mc = MetadataClient(settings)
def __init__(self, settings): self.session = requests.Session() self.interval = settings.interval self.cluster = settings.cluster self.master_node = settings.master_node self.index_node = settings.index_node self.auth = (settings.rest_username, settings.rest_password) self.buckets = settings.buckets self.hostnames = settings.hostnames self.nodes = list(self.get_nodes()) self.ssh_username = getattr(settings, 'ssh_username', None) self.ssh_password = getattr(settings, 'ssh_password', None) self.secondary_statsfile = settings.secondary_statsfile self.store = SerieslyStore(settings.seriesly_host) self.mc = MetadataClient(settings) self.metrics = set() self.updater = None
def __init__(self, settings): self.session = requests.Session() self.interval = settings.interval self.cluster = settings.cluster self.master_node = settings.master_node self.auth = (settings.rest_username, settings.rest_password) self.buckets = settings.buckets self.hostnames = settings.hostnames self.nodes = list(self.get_nodes()) self.store = SerieslyStore(settings.seriesly_host) self.mc = MetadataClient(settings) self.metrics = set() self.updater = None
class SyncGateway(Collector): COLLECTOR = "sync_gateway" def __init__(self, settings): self.interval = settings.interval self.cluster = settings.cluster self.store = SerieslyStore(settings.seriesly_host) self.mc = MetadataClient(settings) self.nodes = settings.nodes self.stats_api = "http://{}:4985/_stats" self.prev_pause_total = None def _fetch_stats(self, node): stats_api = "http://{}:4985/_stats".format(node) for _, stats in requests.get(url=stats_api).json().items(): for metric, value in stats.items(): if type(value) == int: # can't use isinstance because of bool yield metric, value if metric == "PauseNs": yield metric, filter(lambda v: v, value)[-1] if metric == "PauseTotalNs": pause = value - (self.prev_pause_total or value) pause_pct = 100.0 * pause / 10**9 / self.interval self.prev_pause_total = value yield "PausesPct", pause_pct def update_metadata(self): self.mc.add_cluster() for node in self.nodes: self.mc.add_server(node) for metric, _ in self._fetch_stats(node): self.mc.add_metric(metric, server=node, collector=self.COLLECTOR) def sample(self): for node in self.nodes: samples = dict(stats for stats in self._fetch_stats(node)) self.store.append(samples, cluster=self.cluster, server=node, collector=self.COLLECTOR)
class SyncGateway(Collector): COLLECTOR = "sync_gateway" def __init__(self, settings): self.interval = settings.interval self.cluster = settings.cluster self.store = SerieslyStore(settings.seriesly_host) self.mc = MetadataClient(settings) self.nodes = settings.nodes self.stats_api = "http://{}:4985/_stats" self.prev_pause_total = None def _fetch_stats(self, node): stats_api = "http://{}:4985/_stats".format(node) for _, stats in requests.get(url=stats_api).json().items(): for metric, value in stats.items(): if type(value) == int: # can't use isinstance because of bool yield metric, value if metric == "PauseNs": yield metric, filter(lambda v: v, value)[-1] if metric == "PauseTotalNs": pause = value - (self.prev_pause_total or value) pause_pct = 100.0 * pause / 10 ** 9 / self.interval self.prev_pause_total = value yield "PausesPct", pause_pct def update_metadata(self): self.mc.add_cluster() for node in self.nodes: self.mc.add_server(node) for metric, _ in self._fetch_stats(node): self.mc.add_metric(metric, server=node, collector=self.COLLECTOR) def sample(self): for node in self.nodes: samples = dict(stats for stats in self._fetch_stats(node)) self.store.append(samples, cluster=self.cluster, server=node, collector=self.COLLECTOR)
class Collector(object): COLLECTOR = None def __init__(self, settings): self.session = requests.Session() self.interval = settings.interval self.cluster = settings.cluster self.master_node = settings.master_node self.auth = (settings.rest_username, settings.rest_password) self.buckets = settings.buckets self.hostnames = settings.hostnames self.nodes = list(self.get_nodes()) self.ssh_username = getattr(settings, 'ssh_username', None) self.ssh_password = getattr(settings, 'ssh_password', None) self.store = SerieslyStore(settings.seriesly_host) self.mc = MetadataClient(settings) self.metrics = set() self.updater = None def get_http(self, path, server=None, port=8091): server = server or self.master_node url = "http://{}:{}{}".format(server, port, path) try: r = self.session.get(url=url, auth=self.auth) if r.status_code in (200, 201, 202): return r.json() else: logger.warn("Bad response: {}".format(url)) return self.retry(path, server, port) except requests.ConnectionError: logger.warn("Connection error: {}".format(url)) return self.retry(path, server, port) def retry(self, path, server=None, port=8091): time.sleep(self.interval) for node in self.nodes: if self._check_node(node): self.master_node = node self.nodes = list(self.get_nodes()) break else: logger.interrupt("Failed to find at least one node") if server not in self.nodes: raise RuntimeError("Bad node {}".format(server or "")) else: return self.get_http(path, server, port) def _check_node(self, node): try: s = socket.socket() s.connect((node, 8091)) except socket.error: return False else: if not self.get_http(path="/pools", server=node).get("pools"): return False return True def get_buckets(self, with_stats=False): buckets = self.get_http(path="/pools/default/buckets") if not buckets: buckets = self.retry(path="/pools/default/buckets") for bucket in buckets: if self.buckets is not None and bucket["name"] not in self.buckets: continue if with_stats: yield bucket["name"], bucket["stats"] else: yield bucket["name"] def get_nodes(self): pool = self.get_http(path="/pools/default") for node in pool["nodes"]: hostname = node["hostname"].split(":")[0] if self.hostnames is not None and hostname not in self.hostnames: continue yield hostname def _update_metric_metadata(self, metrics, bucket=None, server=None): for metric in metrics: metric = metric.replace('/', '_') metric_hash = hash((metric, bucket, server)) if metric_hash not in self.metrics: self.metrics.add(metric_hash) self.mc.add_metric(metric, bucket, server, self.COLLECTOR) def update_metric_metadata(self, *args, **kwargs): if self.updater is None or not self.updater.is_alive(): self.updater = Thread( target=self._update_metric_metadata, args=args, kwargs=kwargs ) self.updater.daemon = True self.updater.start() def sample(self): raise NotImplementedError def collect(self): while True: try: self.sample() time.sleep(self.interval) except KeyboardInterrupt: sys.exit() except Exception as e: logger.warn(e)
class Collector: COLLECTOR = None def __init__(self, settings): self.session = requests.Session() self.interval = settings.interval self.cluster = settings.cluster self.master_node = settings.master_node self.auth = (settings.rest_username, settings.rest_password) self.buckets = settings.buckets self.indexes = settings.indexes self.hostnames = settings.hostnames self.workers = settings.workers self.nodes = list(self.get_nodes()) self.ssh_username = getattr(settings, 'ssh_username', None) self.ssh_password = getattr(settings, 'ssh_password', None) self.secondary_statsfile = settings.secondary_statsfile self.store = PerfStore(settings.cbmonitor_host) self.mc = MetadataClient(settings) self.metrics = set() self.updater = None def get_http(self, path, server=None, port=8091, json=True): server = server or self.master_node url = "http://{}:{}{}".format(server, port, path) try: r = self.session.get(url=url, auth=self.auth) if r.status_code in (200, 201, 202): return json and r.json() or r.text else: logger.warn("Bad response: {}".format(url)) return self.refresh_nodes_and_retry(path, server, port) except requests.ConnectionError: logger.warn("Connection error: {}".format(url)) return self.refresh_nodes_and_retry(path, server, port, json) def refresh_nodes_and_retry(self, path, server=None, port=8091, json=True): time.sleep(self.interval) for node in self.nodes: if self._check_node(node): self.master_node = node self.nodes = list(self.get_nodes()) break else: raise RuntimeError("Failed to find at least one node") if server not in self.nodes: raise RuntimeError("Bad node {}".format(server or "")) return self.get_http(path, server, port, json) def _check_node(self, node): try: s = socket.socket() s.connect((node, 8091)) except socket.error: return False else: if not self.get_http(path="/pools", server=node).get("pools"): return False return True def get_buckets(self, with_stats=False): buckets = self.get_http(path="/pools/default/buckets") if not buckets: buckets = self.refresh_nodes_and_retry( path="/pools/default/buckets") for bucket in buckets: if self.buckets is not None and bucket["name"] not in self.buckets: continue if with_stats: yield bucket["name"], bucket["stats"] else: yield bucket["name"] def get_nodes(self): pool = self.get_http(path="/pools/default") for node in pool["nodes"]: hostname = node["hostname"].split(":")[0] if self.hostnames is not None and hostname not in self.hostnames: continue yield hostname def get_indexes(self): pool = self.get_http(path="/indexStatus") for index in pool["indexes"]: if self.indexes and index["index"] not in self.indexes: continue index_name = index["index"] bucket_name = index["bucket"] yield index_name, bucket_name def _update_metric_metadata(self, metrics, bucket=None, index=None, server=None): for metric in metrics: metric = metric.replace('/', '_') metric_hash = hash((metric, bucket, index, server)) if metric_hash not in self.metrics: self.metrics.add(metric_hash) self.mc.add_metric(metric, bucket, index, server, self.COLLECTOR) def update_metric_metadata(self, *args, **kwargs): if self.updater is None or not self.updater.is_alive(): self.updater = Thread(target=self._update_metric_metadata, args=args, kwargs=kwargs) self.updater.daemon = True self.updater.start() def sample(self): raise NotImplementedError def collect(self): while True: try: t0 = time.time() self.sample() delta = time.time() - t0 if delta >= self.interval: continue time.sleep(self.interval - delta) except KeyboardInterrupt: sys.exit() except Exception as e: logger.warn("Unexpected exception in {}: {}".format( self.__class__.__name__, e))
class SGImport_latency(Collector): COLLECTOR = "sgimport_latency" METRICS = "sgimport_latency" INITIAL_POLLING_INTERVAL = 0.001 # 1 ms TIMEOUT = 3600 # 1hr minutes MAX_SAMPLING_INTERVAL = 10 # 250 ms def __init__(self, settings, cluster_spec: ClusterSpec, test_config: TestConfig): self.cluster_spec = cluster_spec self.test_config = test_config self.mc = MetadataClient(settings) self.store = PerfStore(settings.cbmonitor_host) self.workload_setting = PhaseSettings self.interval = self.MAX_SAMPLING_INTERVAL self.cluster = settings.cluster self.clients = [] self.cb_host = self.cluster_spec.servers[int(self.test_config.nodes)] self.sg_host = next(self.cluster_spec.masters) src_client = new_client(host=self.cb_host, bucket='bucket-1', password='******', timeout=self.TIMEOUT) self.clients.append(('bucket-1', src_client)) self.new_docs = Document(1024) def check_longpoll_changefeed(self, host: str, key: str, last_sequence: str): sg_db = 'db' api = 'http://{}:4985/{}/_changes'.format(host, sg_db) last_sequence_str = "{}".format(last_sequence) data = { 'filter': 'sync_gateway/bychannel', 'feed': 'longpoll', "channels": "123", "since": last_sequence_str, "heartbeat": 3600000 } response = requests.post(url=api, data=json.dumps(data)) t1 = time() record_found = 0 if response.status_code == 200: for record in response.json()['results']: if record['id'] == key: record_found = 1 break if record_found != 1: self.check_longpoll_changefeed(host=host, key=key, last_sequence=last_sequence) return t1 def insert_doc(self, src_client, key: str, doc): src_client.upsert(key, doc) return time() def get_lastsequence(self, host: str): sg_db = 'db' api = 'http://{}:4985/{}/_changes'.format(host, sg_db) data = { 'filter': 'sync_gateway/bychannel', 'feed': 'normal', "channels": "123", "since": "0" } response = requests.post(url=api, data=json.dumps(data)) last_sequence = response.json()['last_seq'] return last_sequence def measure(self, src_client): key = "sgimport_{}".format(uhex()) doc = self.new_docs.next(key) last_sequence = self.get_lastsequence(host=self.sg_host) executor = ThreadPoolExecutor(max_workers=2) future1 = executor.submit(self.check_longpoll_changefeed, host=self.sg_host, key=key, last_sequence=last_sequence) future2 = executor.submit(self.insert_doc, src_client=src_client, key=key, doc=doc) t1, t0 = future1.result(), future2.result() print('import latency t1, t0', t1, t0, (t1 - t0) * 1000) return {'sgimport_latency': (t1 - t0) * 1000} # s -> ms def sample(self): for bucket, src_client in self.clients: lags = self.measure(src_client) self.store.append(lags, cluster=self.cluster, collector=self.COLLECTOR) def update_metadata(self): self.mc.add_cluster() self.mc.add_metric(self.METRICS, collector=self.COLLECTOR)