forked from thepaul/cassandra-dtest
/
jmx_test.py
192 lines (153 loc) · 7.53 KB
/
jmx_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import re
import sys
import unittest
import time
from dtest import Tester, debug
from jmxutils import JolokiaAgent, make_mbean, remove_perf_disable_shared_mem
from tools import since
class TestJMX(Tester):
@since('2.1')
@unittest.skipIf(sys.platform == "win32", 'Skip long tests on Windows')
def cfhistograms_test(self):
"""
Test cfhistograms on large and small datasets
@jira_ticket CASSANDRA-8028
"""
cluster = self.cluster
cluster.populate(3).start(wait_for_binary_proto=True)
node1, node2, node3 = cluster.nodelist()
# issue large stress write to load data into cluster
node1.stress(['write', 'n=15M', '-schema', 'replication(factor=3)', '-rate', 'threads=50'])
node1.flush()
try:
# TODO the keyspace and table name are capitalized in 2.0
histogram = node1.nodetool("cfhistograms keyspace1 standard1", capture_output=True)
error_msg = "Unable to compute when histogram overflowed"
debug(histogram)
self.assertFalse(error_msg in histogram)
self.assertTrue("NaN" not in histogram)
except Exception as e:
self.fail("Cfhistograms command failed: " + str(e))
session = self.patient_cql_connection(node1)
session.execute("CREATE KEYSPACE test WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':3}")
session.execute("CREATE TABLE test.tab(key int primary key, val int);")
try:
finalhistogram = node1.nodetool("cfhistograms test tab", capture_output=True)
debug(finalhistogram)
error_msg = "Unable to compute when histogram overflowed"
self.assertFalse(error_msg in finalhistogram)
correct_error_msg = "No SSTables exists, unable to calculate 'Partition Size' and 'Cell Count' percentiles"
self.assertTrue(correct_error_msg in finalhistogram[1])
except Exception as e:
debug(finalhistogram)
self.fail("Cfhistograms command failed: " + str(e))
@since('2.1')
def netstats_test(self):
"""
Check functioning of nodetool netstats, especially with restarts.
@jira_ticket CASSANDRA-8122, CASSANDRA-6577
"""
cluster = self.cluster
cluster.populate(3).start(wait_for_binary_proto=True)
node1, node2, node3 = cluster.nodelist()
node1.stress(['write', 'n=5M', '-schema', 'replication(factor=3)'])
node1.flush()
node1.stop(gently=False)
try:
node1.nodetool("netstats")
except Exception as e:
if "ConcurrentModificationException" in str(e):
self.fail("Netstats failed due to CASSANDRA-6577")
else:
debug(str(e))
node1.start(wait_for_binary_proto=True)
try:
node1.nodetool("netstats")
except Exception as e:
if 'java.lang.reflect.UndeclaredThrowableException' in str(e):
debug(str(e))
self.fail("Netstats failed with UndeclaredThrowableException (CASSANDRA-8122)")
else:
self.fail(str(e))
@since('2.1')
def table_metric_mbeans_test(self):
"""
Test some basic table metric mbeans with simple writes.
"""
cluster = self.cluster
cluster.populate(3)
node1, node2, node3 = cluster.nodelist()
remove_perf_disable_shared_mem(node1)
cluster.start(wait_for_binary_proto=True)
version = cluster.version()
if version < "2.1":
node1.stress(['-o', 'insert', '--num-keys=10000', '--replication-factor=3'])
else:
node1.stress(['write', 'n=10000', '-schema', 'replication(factor=3)'])
typeName = "ColumnFamily" if version <= '2.2.X' else 'Table'
debug('Version {} typeName {}'.format(version, typeName))
# TODO the keyspace and table name are capitalized in 2.0
memtable_size = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='AllMemtablesHeapSize')
disk_size = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='LiveDiskSpaceUsed')
sstable_count = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='LiveSSTableCount')
with JolokiaAgent(node1) as jmx:
mem_size = jmx.read_attribute(memtable_size, "Value")
self.assertGreater(int(mem_size), 10000)
on_disk_size = jmx.read_attribute(disk_size, "Count")
self.assertEquals(int(on_disk_size), 0)
node1.flush()
on_disk_size = jmx.read_attribute(disk_size, "Count")
self.assertGreater(int(on_disk_size), 10000)
sstables = jmx.read_attribute(sstable_count, "Value")
self.assertGreaterEqual(int(sstables), 1)
def test_compactionstats(self):
"""
@jira_ticket CASSANDAR-10504
@jira_ticket CASSANDRA-10427
Test that jmx MBean used by nodetool compactionstats
properly updates the progress of a compaction
"""
cluster = self.cluster
cluster.populate(1)
node = cluster.nodelist()[0]
cluster.set_configuration_options({'concurrent_compactors': 1, 'memtable_cleanup_threshold': 0.01})
remove_perf_disable_shared_mem(node)
cluster.start(wait_for_binary_proto=True)
# Run a quick stress command to create the keyspace and table
node.stress(['write', 'n=1'])
# Disable compaction on the table
node.nodetool('disableautocompaction keyspace1 standard1')
node.stress(['write', 'n=750K'])
# Run a major compaction. This will be the compaction whose
# progress we track.
node.nodetool('compact', capture_output=False, wait=False)
# We need to sleep here to give compaction time to start
# Why not do something smarter? Because if the bug regresses,
# we can't rely on jmx to tell us that compaction started.
time.sleep(5)
compaction_manager = make_mbean('db', type='CompactionManager')
with JolokiaAgent(node) as jmx:
progress_string = jmx.read_attribute(compaction_manager, 'CompactionSummary')[0]
# Pause in between reads
# to allow compaction to move forward
time.sleep(2)
updated_progress_string = jmx.read_attribute(compaction_manager, 'CompactionSummary')[0]
progress = int(re.search('standard1, (\d+)\/', progress_string).groups()[0])
updated_progress = int(re.search('standard1, (\d+)\/', updated_progress_string).groups()[0])
debug(progress_string)
debug(updated_progress_string)
# We want to make sure that the progress is increasing,
# and that values other than zero are displayed.
self.assertGreater(updated_progress, progress)
self.assertGreater(progress, 0)
self.assertGreater(updated_progress, 0)
# Block until the major compaction is complete
# Otherwise nodetool will throw an exception
# Give a timeout, in case compaction is broken
# and never ends.
start = time.time()
max_query_timeout = 600
debug("Waiting for compaction to finish:")
while (len(jmx.read_attribute(compaction_manager, 'CompactionSummary')) > 0) and (time.time() - start < max_query_timeout):
debug(jmx.read_attribute(compaction_manager, 'CompactionSummary'))
time.sleep(2)