def connect_to_minority(): c_minority = Counter('127.0.0.1:14000') for i in range(50): c_minority.increment() # This method will timeout before it reaches here. print "Counter value: %d" % c_minority.getvalue() return True
def test_failure(numacceptors): acceptors = [] print "Running replica 0" replica = subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '14000' ]) for i in range(numacceptors): print "Running acceptor %d" % i acceptors.append( subprocess.Popen(['concoord', 'acceptor', '-b', '127.0.0.1:14000'])) # Give system time to stabilize time.sleep(10) c = Counter("127.0.0.1:14000", debug=True) for i in range(100): c.increment() print "Counter value after 100 increments: %d" % c.getvalue() for i in range((numacceptors - 1) / 2): print "Killing acceptor %d" % i acceptors[i].kill() for i in range(100): c.increment() print "Counter value after 100 more increments: %d" % c.getvalue() replica.kill() for a in (acceptors): a.kill() return True
def connect_to_leader(): c_leader = Counter('127.0.0.1:14000') print "Connecting to old leader" for i in range(100): c_leader.increment() # This method will timeout before it reaches here. print "Client Made Progress: Counter value: %d" % c_minority.getvalue() return True
def test_failure(numreplicas): replicas = [] acceptors = [] replicanames = [] print "Running replica 0" replicas.append( subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '14000' ])) replicanames.append("127.0.0.1:14000") for i in range(3): print "Running acceptor %d" % i acceptors.append( subprocess.Popen(['concoord', 'acceptor', '-b', '127.0.0.1:14000'])) for i in range(1, numreplicas): print "Running replica %d" % i replicas.append( subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '1400%d' % i, '-b', '127.0.0.1:14000' ])) replicanames.append("127.0.0.1:1400%d" % i) # Give the system some time to initialize time.sleep(10) replicastring = ','.join(replicanames) # Test Clientproxy operations c = Counter(replicastring) for i in range(100): c.increment() print "Counter value after 100 increments: %d" % c.getvalue() # Start kiling replicas for i in range(numreplicas - 1): print "Killing replica %d" % i replicas[i].kill() # Clientproxy operations should still work for i in range(100): c.increment() print "Counter value after 100 more increments: %d" % c.getvalue() for p in (replicas + acceptors): p.kill() return True
def main(): print("Running replica 0") replica0 = subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '14000' ]) replica0 print("Running acceptor 0") acceptor0 = subprocess.Popen( ['concoord', 'acceptor', '-b', '127.0.0.1:14000']) time.sleep(3) print("Running acceptor1 ") acceptor1 = subprocess.Popen( ['concoord', 'acceptor', '-b', '127.0.0.1:14000']) acceptor1 time.sleep(3) print("Running acceptor2 ") acceptor2 = subprocess.Popen( ['concoord', 'acceptor', '-b', '127.0.0.1:14000']) acceptor2 time.sleep(3) # Now kill acceptor0, and system does not work print("Killing acceptor 0") acceptor0.kill() # Now client/proxy operations still work c = Counter("127.0.0.1:14000, 127.0.0.1:14001, 127.0.0.1:14002", debug=True) for i in range(1000): c.increment() print "The value after 1000 increments:", c.getvalue() # Clean up acceptor1.kill() acceptor2.kill() replica0.kill()
def test_timeout(): numreplicas = 3 numacceptors = 3 processes = [] print "Running replica 0" processes.append(subprocess.Popen(['concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '14000'])) for i in range(numacceptors): print "Running acceptor %d" %i processes.append(subprocess.Popen(['concoord', 'acceptor', '-b', '127.0.0.1:14000'])) for i in range(1, numreplicas): print "Running replica %d" %i processes.append(subprocess.Popen(['concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '1400%d'%i, '-b', '127.0.0.1:14000'])) # Give the system some time to initialize time.sleep(10) # This client can only connect to the replicas in this partition c_P1 = Counter('127.0.0.1:14000', debug = True) c_P2 = Counter('127.0.0.1:14001, 127.0.0.1:14002') # The client should work print "Sending requests to the leader" for i in range(100): c_P1.increment() print "Counter value after 100 increments: %d" % c_P1.getvalue() # Save iptables settings for later recovery with open('test.iptables.rules', 'w') as output: subprocess.Popen(['sudo', 'iptables-save'], stdout=output) # Block all incoming traffic to leader iptablerule = subprocess.Popen(['sudo', 'iptables', '-I', 'INPUT', '-p', 'tcp', '--dport', '14000', '-j', 'DROP']) print "Cutting the connections to the leader. Waiting for system to stabilize." time.sleep(10) print "Connecting to old leader, which should not make progress." if connect_to_leader(): print "===== TEST FAILED =====" else: # c_P2 should make progress print "Connecting to other nodes, which should have a new leader." for i in range(100): c_P2.increment() print "Counter value after 100 increments: %d" % c_P2.getvalue() print "===== TEST PASSED =====" print "Fixing the connections and cleaning up." with open('test.iptables.rules', 'r') as input: subprocess.Popen(['sudo', 'iptables-restore'], stdin=input) subprocess.Popen(['sudo', 'rm', 'test.iptables.rules']) for p in (processes): p.kill() return True
def test_failure(numreplicas): replicas = [] acceptors = [] replicanames = [] print "Running replica 0" replicas.append(subprocess.Popen(['concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '14000'])) replicanames.append("127.0.0.1:14000") for i in range(3): print "Running acceptor %d" %i acceptors.append(subprocess.Popen(['concoord', 'acceptor', '-b', '127.0.0.1:14000'])) for i in range(1, numreplicas): print "Running replica %d" %i replicas.append(subprocess.Popen(['concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '1400%d'%i, '-b', '127.0.0.1:14000'])) replicanames.append("127.0.0.1:1400%d"%i) # Give the system sometime to initialize time.sleep(10) replicastring = ','.join(replicanames) # Test Clientproxy operations c = Counter(replicastring) for i in range(50): c.increment() print "Counter value after 50 increments: %d" % c.getvalue() # Start kiling replicas print "Killing replicas one by one." for i in range(numreplicas-1): print "Killing replica %d" %i replicas[i].kill() # Clientproxy operations should still work c = Counter('127.0.0.1:1400%d'%(i+1)) for i in range(50): c.increment() print "Counter value after 50 more increments: %d" % c.getvalue() # Start bringing replicas back for i in reversed(xrange(numreplicas-1)): print "Running replica %d" %i replicas.append(subprocess.Popen(['concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '1400%d'%i, '-b', '127.0.0.1:1400%d' %(i+1)])) time.sleep(10) # Clientproxy operations should still work connected = False while(not connected): try: c = Counter('127.0.0.1:1400%d'%i) except: continue connected = True for i in range(50): c.increment() print "Counter value after 50 more increments: %d" % c.getvalue() for p in (replicas+acceptors): p.kill() return True
def main(): print("Running replica 0") replica0 = subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '14000' ]) replica0 print("Running acceptor 0") acceptor0 = subprocess.Popen( ['concoord', 'acceptor', '-b', '127.0.0.1:14000']) acceptor0 time.sleep(3) c = Counter("127.0.0.1:14000, 127.0.0.1:14001, 127.0.0.1:14002", debug=True) print("increment of counter returned", c.increment()) print("getvalue of counter returned", c.getvalue()) time.sleep(1) print( "Running replica 1" ) # concoord replica -o concoord.object.counter.Counter -b 127.0.0.1:14000 -a 127.0.0.1 -p 14001 replica1 = subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-b', '127.0.0.1:14000', '-a', '127.0.0.1', '-p', '14001' ]) replica1 time.sleep(1) print("Running acceptor 1 ") acceptor1 = subprocess.Popen( ['concoord', 'acceptor', '-b', '127.0.0.1:14000']) acceptor1 time.sleep(1) print("Running acceptor 2 ") acceptor2 = subprocess.Popen( ['concoord', 'acceptor', '-b', '127.0.0.1:14000']) acceptor2 time.sleep(1) # Now kill acceptor0, and system does not work print("Killing replica 1") replica1.kill() time.sleep(1) print("Killing acceptor 1") acceptor1.kill() time.sleep(1) # Now client/proxy operations still work print("increment of counter returned", c.increment()) print("getvalue of counter returned", c.getvalue()) # Clean up replica0.kill() acceptor0.kill() acceptor2.kill() print("All done,quiting.")
def test_partition(): processes = [] p1_pids = [] p2_pids = [] # P1 Nodes print "Running replica 0" p = subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '14000' ]) processes.append(p) p1_pids.append(p.pid) # P2 Nodes print "Running replica 1" p = subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '14001', '-b', '127.0.0.1:14000' ]) processes.append(p) p2_pids.append(p.pid) print "Running replica 2" p = subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '14002', '-b', '127.0.0.1:14000' ]) processes.append(p) p2_pids.append(p.pid) # Give the system some time to initialize time.sleep(10) # This client can only connect to the replicas in this partition c_P1 = Counter('127.0.0.1:14000') c_P2 = Counter('127.0.0.1:14001,127.0.0.1:14002') # The client should work print "Sending requests to the leader" for i in range(50): c_P1.increment() print "Counter value after 50 increments: %d" % c_P1.getvalue() # Save iptables settings for later recovery with open('test.iptables.rules', 'w') as output: subprocess.Popen(['sudo', 'iptables-save'], stdout=output) # Start partition iptablerules = [] p1_ports = [14000] p2_ports = [14001, 14002] connectedports = [] # Find all ports that R1, A1 and A2 and have connecting to R0 and A0 for port in p1_ports: for pid in p2_pids: p1 = subprocess.Popen( ['lsof', '-w', '-a', '-p%d' % pid, '-i4'], stdout=subprocess.PIPE) p2 = subprocess.Popen(['grep', ':%d' % port], stdin=p1.stdout, stdout=subprocess.PIPE) output = p2.communicate()[0] if output: connectedports.append( output.split()[8].split('->')[0].split(':')[1]) # Block all traffic to/from R0 and A0 from other nodes but each other for porttoblock in connectedports: iptablerules.append( subprocess.Popen([ 'sudo', 'iptables', '-I', 'INPUT', '-p', 'tcp', '--dport', '14000', '--sport', porttoblock, '-j', 'DROP' ])) for porttoblock in p2_ports: iptablerules.append( subprocess.Popen([ 'sudo', 'iptables', '-I', 'INPUT', '-p', 'tcp', '--dport', '%d' % porttoblock, '--sport', '14000', '-j', 'DROP' ])) print "Created the partition. Waiting for system to stabilize." time.sleep(20) # c_P2 should make progress print "Connecting to the majority, which should have a new leader." for i in range(50): c_P2.increment() print "Counter value after 50 more increments: %d" % c_P2.getvalue() if c_P2.getvalue() == 100: print "SUCCESS: Majority made progress." print "Connecting to the minority, which should not make progress." if connect_to_minority(): print "===== TEST FAILED =====" sys.exit('Minority made progress.') print "SUCCESS: Minority did not make progress." print "Ending partition." # End partition with open('test.iptables.rules', 'r') as input: subprocess.Popen(['sudo', 'iptables-restore'], stdin=input) subprocess.Popen(['sudo', 'rm', 'test.iptables.rules']) time.sleep(40) # c_P1 should make progress print "Connecting to the old leader." if not connect_to_minority(): print "===== TEST FAILED =====" print "Old leader could not recover after partition." print get_replica_status('127.0.0.1:14000') for p in (processes): p.kill() return True if c_P1.getvalue() == 150: print "SUCCESS: Old leader recovered." print "===== TEST PASSED =====" else: print "FAILURE: Old leader lost some client commands." print "===== TEST FAILED =====" print get_replica_status('127.0.0.1:14000') for p in (processes): p.kill() return True
def main(): print("Running replica 0") replica0 = subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-a', '127.0.0.1', '-p', '14000' ]) replica0 print("Running acceptor 0") acceptor0 = subprocess.Popen( ['concoord', 'acceptor', '-b', '127.0.0.1:14000']) acceptor0 time.sleep(3) c = Counter("127.0.0.1:14000, 127.0.0.1:14001, 127.0.0.1:14002", debug=True) print("increment of counter returned", c.increment()) print("getvalue of counter returned", c.getvalue()) time.sleep(1) print( "Running replica 1" ) # concoord replica -o concoord.object.counter.Counter -b 127.0.0.1:14000 -a 127.0.0.1 -p 14001 replica1 = subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-b', '127.0.0.1:14000', '-a', '127.0.0.1', '-p', '14001' ]) replica1 print("Running acceptor 1") acceptor1 = subprocess.Popen( ['concoord', 'acceptor', '-b', '127.0.0.1:14000']) acceptor1 time.sleep(1) print( "Running replica 2" ) # concoord replica -o concoord.object.counter.Counter -b 127.0.0.1:14000 -a 127.0.0.1 -p 14001 replica2 = subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-b', '127.0.0.1:14000', '-a', '127.0.0.1', '-p', '14002' ]) replica2 print("Running acceptor 2") acceptor2 = subprocess.Popen( ['concoord', 'acceptor', '-b', '127.0.0.1:14000']) acceptor2 time.sleep(1) print("Killing replica 0") replica0.kill() time.sleep(1) print("Killing acceptor 0") acceptor0.kill() time.sleep(11) # Now create another replica and acceptor, connecting to the new leader print("Running replica 0 once again") new_replica0 = subprocess.Popen([ 'concoord', 'replica', '-o', 'concoord.object.counter.Counter', '-b', '127.0.0.1:14001', '-a', '127.0.0.1', '-p', '14000' ]) new_replica0 print("Running acceptor 3") acceptor3 = subprocess.Popen( ['concoord', 'acceptor', '-b', '127.0.0.1:14001']) acceptor3 print("increment of counter returned", c.increment()) value_after_2_incs = c.getvalue() print("getvalue of counter returned", c.getvalue()) assert value_after_2_incs == 2, "Counter object returned wrong value after two increments: %r" % ( value_after_2_incs, ) # Clean up replica0.kill() replica1.kill() replica2.kill() new_replica0.kill() acceptor0.kill() acceptor1.kill() acceptor2.kill() acceptor3.kill() print("All done,quiting.")