def get_local_addresses(ssh_runner, remote_dir): """Uses checked-in IP detect script to report local IP mapping Also functions as a test to verify cluster is up and accessible Args: ssh_runner: instance of ssh.ssh_runner.MultiRunner remote_dir (str): path on hosts for ip-detect to be copied and run in Returns: dict[public_IP] = local_IP """ def remote(path): return remote_dir + '/' + path ip_detect_script = pkg_resources.resource_filename('gen', 'ip-detect/aws.sh') ip_map_chain = CommandChain('ip_map') ip_map_chain.add_copy(ip_detect_script, remote('ip-detect.sh')) ip_map_chain.add_execute(['bash', remote('ip-detect.sh')]) mapping = {} result = run_loop(ssh_runner, ip_map_chain) # Check the running was successful check_results(copy.deepcopy(result)) # Gather the local IP addresses for host_result in result: host, data = host_result[-1].popitem() # Grab the last command trigging the script local_ip = data['stdout'][0].rstrip() assert local_ip != '', "Didn't get a valid IP for host {}:\n{}".format(host, data) mapping[host.split(":")[0]] = local_ip return mapping
def test_ssh_command_terminate_async(sshd_manager, loop): with sshd_manager.run(1) as sshd_ports: workspace = str(sshd_manager.tmpdir) runner = MultiRunner(['127.0.0.1:{}'.format(port) for port in sshd_ports], user=getpass.getuser(), key_path=sshd_manager.key_path, process_timeout=0.05) chain = CommandChain('test') chain.add_execute(['sleep', '20']) start_time = time.time() try: results = loop.run_until_complete(runner.run_commands_chain_async([chain], block=True, state_json_dir=workspace)) finally: loop.close() elapsed_time = time.time() - start_time assert elapsed_time < 5 assert os.path.isfile(workspace + '/test.json') with open(workspace + '/test.json') as fh: result_json = json.load(fh) assert result_json['total_hosts'] == 1 assert 'hosts_failed' not in result_json assert 'hosts_success' not in result_json for host_result in results: for command_result in host_result: for host, process_result in command_result.items(): assert result_json['hosts'][host]['host_status'] == 'terminated' assert process_result['stdout'] == [''] assert process_result['stderr'] == [''] assert process_result['returncode'] is None
def test_ssh_async(sshd_manager, loop): class DummyAsyncDelegate(AbstractSSHLibDelegate): def on_update(self, future, callback): callback.set_result(True) def on_done(self, *args, **kwargs): pass def prepare_status(self, name, nodes): pass with sshd_manager.run(20) as sshd_ports: runner = MultiRunner(['127.0.0.1:{}'.format(port) for port in sshd_ports], ssh_user=getpass.getuser(), ssh_key_path=sshd_manager.key_path, async_delegate=DummyAsyncDelegate()) host_port = ['127.0.0.1:{}'.format(port) for port in sshd_ports] chain = CommandChain('test') chain.add_execute(['uname', '-a']) try: results = loop.run_until_complete(runner.run_commands_chain_async([chain], block=True)) finally: loop.close() assert not os.path.isfile('test.json') assert len(results) == 20 for host_result in results: for command_result in host_result: for host, process_result in command_result.items(): assert process_result['returncode'] == 0, process_result['stderr'] assert host in host_port assert '/usr/bin/ssh' in process_result['cmd'] assert 'uname' in process_result['cmd'] assert '-tt' in process_result['cmd'] assert len(process_result['cmd']) == 13
def test_scp_recursive_async(sshd_manager, loop): with sshd_manager.run(1) as sshd_ports: workspace = str(sshd_manager.tmpdir) id = uuid.uuid4().hex pkgpanda.util.write_string(workspace + '/recursive_pilot.txt', id) runner = MultiRunner(['127.0.0.1:{}'.format(port) for port in sshd_ports], ssh_user=getpass.getuser(), ssh_key_path=sshd_manager.key_path) host_port = ['127.0.0.1:{}'.format(port) for port in sshd_ports] chain = CommandChain('test') chain.add_copy(workspace + '/recursive_pilot.txt', workspace + '/recursive_pilot.txt.copied', recursive=True) try: copy_results = loop.run_until_complete(runner.run_commands_chain_async([chain], block=True, state_json_dir=workspace)) finally: loop.close() dest_path = workspace + '/recursive_pilot.txt.copied' assert os.path.exists(dest_path) assert os.path.isfile(dest_path) assert len(copy_results) == 1 assert pkgpanda.util.load_string(dest_path) == id for host_result in copy_results: for command_result in host_result: for host, process_result in command_result.items(): assert process_result['returncode'] == 0, process_result['stderr'] assert host in host_port assert '/usr/bin/scp' in process_result['cmd'] assert '-r' in process_result['cmd'] assert workspace + '/recursive_pilot.txt' in process_result['cmd']
def test_ssh_async(sshd_manager, loop): class DummyAsyncDelegate(AbstractSSHLibDelegate): def on_update(self, future, callback): callback.set_result(True) def on_done(self, *args, **kwargs): pass def prepare_status(self, name, nodes): pass with sshd_manager.run(20) as sshd_ports: runner = MultiRunner(['127.0.0.1:{}'.format(port) for port in sshd_ports], user=getpass.getuser(), key_path=sshd_manager.key_path, async_delegate=DummyAsyncDelegate()) host_port = ['127.0.0.1:{}'.format(port) for port in sshd_ports] chain = CommandChain('test') chain.add_execute(['uname', '-a']) try: results = loop.run_until_complete(runner.run_commands_chain_async([chain], block=True)) finally: loop.close() assert not os.path.isfile('test.json') assert len(results) == 20 for host_result in results: for command_result in host_result: for host, process_result in command_result.items(): assert process_result['returncode'] == 0, process_result['stderr'] assert host in host_port assert '/usr/bin/ssh' in process_result['cmd'] assert 'uname' in process_result['cmd'] assert '-tt' in process_result['cmd'] assert len(process_result['cmd']) == 13
def test_ssh_command_terminate_async(sshd_manager, loop): with sshd_manager.run(1) as sshd_ports: workspace = str(sshd_manager.tmpdir) runner = MultiRunner(['127.0.0.1:{}'.format(port) for port in sshd_ports], ssh_user=getpass.getuser(), ssh_key_path=sshd_manager.key_path, process_timeout=2) chain = CommandChain('test') chain.add_execute(['sleep', '20']) start_time = time.time() try: results = loop.run_until_complete(runner.run_commands_chain_async([chain], block=True, state_json_dir=workspace)) finally: loop.close() elapsed_time = time.time() - start_time assert elapsed_time < 5 assert os.path.isfile(workspace + '/test.json') with open(workspace + '/test.json') as fh: result_json = json.load(fh) assert result_json['total_hosts'] == 1 assert 'hosts_failed' not in result_json assert 'hosts_success' not in result_json for host_result in results: for command_result in host_result: for host, process_result in command_result.items(): assert result_json['hosts'][host]['host_status'] == 'terminated' assert process_result['stdout'] == [''] assert process_result['stderr'] == [''] assert process_result['returncode'] is None
def test_scp_async(sshd_manager, loop): with sshd_manager.run(1) as sshd_ports: workspace = str(sshd_manager.tmpdir) id = uuid.uuid4().hex pkgpanda.util.write_string(workspace + '/pilot.txt', id) runner = MultiRunner( ['127.0.0.1:{}'.format(port) for port in sshd_ports], ssh_user=getpass.getuser(), ssh_key_path=sshd_manager.key_path) host_port = ['127.0.0.1:{}'.format(port) for port in sshd_ports] chain = CommandChain('test') chain.add_copy(workspace + '/pilot.txt', workspace + '/pilot.txt.copied') try: copy_results = loop.run_until_complete( runner.run_commands_chain_async([chain], block=True, state_json_dir=workspace)) finally: loop.close() assert len(copy_results) == 1 assert os.path.isfile(workspace + '/pilot.txt.copied') assert pkgpanda.util.load_string(workspace + '/pilot.txt.copied') == id for host_result in copy_results: for command_result in host_result: for host, process_result in command_result.items(): assert process_result['returncode'] == 0, process_result[ 'stderr'] assert host in host_port assert '/usr/bin/scp' in process_result['cmd'] assert workspace + '/pilot.txt' in process_result['cmd']
def break_prereqs(ssh_runner): """Performs commands that will cause preflight to fail on a prepared node Args: ssh_runner: instance of ssh.ssh_runner.MultiRunner """ break_prereq_chain = CommandChain('break_prereqs') break_prereq_chain.add_execute(['sudo', 'groupdel', 'nogroup']) check_results(run_loop(ssh_runner, break_prereq_chain))
def test_command_chain(): chain = CommandChain('test') chain.add_execute(['cmd2']) chain.add_copy('/local', '/remote') chain.prepend_command(['cmd1']) chain.add_execute(['cmd3']) assert chain.get_commands() == [ ('execute', ['cmd1'], None, None), ('execute', ['cmd2'], None, None), ('copy', '/local', '/remote', False, False, None), ('execute', ['cmd3'], None, None) ]
def test_tags_async(sshd_manager, loop): with sshd_manager.run(1) as sshd_ports: workspace = str(sshd_manager.tmpdir) host_ports = ['127.0.0.1:{}'.format(port) for port in sshd_ports] targets = [] for _port in sshd_ports: _host = Node('127.0.0.1:{}'.format(_port), { 'tag1': 'test1', 'tag2': 'test2' }) targets.append(_host) runner = MultiRunner(targets, ssh_user=getpass.getuser(), ssh_key_path=workspace + '/host_key') chain = CommandChain('test') chain.add_execute(['sleep', '1']) try: loop.run_until_complete( runner.run_commands_chain_async([chain], block=True, state_json_dir=workspace)) finally: loop.close() with open(workspace + '/test.json') as fh: result_json = json.load(fh) for host_port in host_ports: assert 'tags' in result_json['hosts'][host_port] assert len(result_json['hosts'][host_port]['tags']) == 2 assert result_json['hosts'][host_port]['tags'][ 'tag1'] == 'test1' assert result_json['hosts'][host_port]['tags'][ 'tag2'] == 'test2' assert result_json['hosts'][host_port]['commands'][0][ 'cmd'] == [ "/usr/bin/ssh", "-oConnectTimeout=10", "-oStrictHostKeyChecking=no", "-oUserKnownHostsFile=/dev/null", "-oBatchMode=yes", "-oPasswordAuthentication=no", "-p{}".format(sshd_ports[0]), "-i", "{}/host_key".format(workspace), "-tt", "{}@127.0.0.1".format(getpass.getuser()), "sleep", "1" ]
def integration_test( ssh_runner, dcos_dns, master_list, agent_list, region, registry_host, test_minuteman, test_dns_search, ci_flags): """Runs integration test on host Note: check_results() will raise AssertionError if test fails Args: ssh_runner: instance of ssh.ssh_runner.MultiRunner dcos_dns: string representing IP of DC/OS DNS host master_list: string of comma separated master addresses region: string indicating AWS region in which cluster is running agent_list: string of comma separated agent addresses registry_host: string for address where marathon can pull test app test_minuteman: if set to True then test for minuteman service test_dns_search: if set to True, test for deployed mesos DNS app ci_flags: optional additional string to be passed to test """ marker_args = '-m "not minuteman"' if test_minuteman: marker_args = '' run_test_chain = CommandChain('run_test') dns_search = 'true' if test_dns_search else 'false' test_cmd = [ 'docker', 'run', '-v', '/home/centos/integration_test.py:/integration_test.py', '-e', 'DCOS_DNS_ADDRESS=http://'+dcos_dns, '-e', 'MASTER_HOSTS='+','.join(master_list), '-e', 'PUBLIC_MASTER_HOSTS='+','.join(master_list), '-e', 'SLAVE_HOSTS='+','.join(agent_list), '-e', 'REGISTRY_HOST='+registry_host, '-e', 'DCOS_VARIANT=default', '-e', 'DNS_SEARCH='+dns_search, '-e', 'AWS_ACCESS_KEY_ID='+AWS_ACCESS_KEY_ID, '-e', 'AWS_SECRET_ACCESS_KEY='+AWS_SECRET_ACCESS_KEY, '-e', 'AWS_REGION='+region, '--net=host', 'py.test', 'py.test', '-vv', ci_flags, marker_args, '/integration_test.py'] print("To run this test again, ssh to test node and run:\n{}".format(' '.join(test_cmd))) run_test_chain.add_execute(test_cmd) check_results(run_loop(ssh_runner, run_test_chain), force_print=True)
def test_tags_async(sshd_manager, loop): with sshd_manager.run(1) as sshd_ports: workspace = str(sshd_manager.tmpdir) host_ports = ['127.0.0.1:{}'.format(port) for port in sshd_ports] targets = [] for _port in sshd_ports: _host = Node('127.0.0.1:{}'.format(_port), {'tag1': 'test1', 'tag2': 'test2'}) targets.append(_host) runner = MultiRunner(targets, ssh_user=getpass.getuser(), ssh_key_path=workspace + '/host_key') chain = CommandChain('test') chain.add_execute(['sleep', '1']) try: loop.run_until_complete(runner.run_commands_chain_async([chain], block=True, state_json_dir=workspace)) finally: loop.close() with open(workspace + '/test.json') as fh: result_json = json.load(fh) for host_port in host_ports: assert 'tags' in result_json['hosts'][host_port] assert len(result_json['hosts'][host_port]['tags']) == 2 assert result_json['hosts'][host_port]['tags']['tag1'] == 'test1' assert result_json['hosts'][host_port]['tags']['tag2'] == 'test2' assert result_json['hosts'][host_port]['commands'][0]['cmd'] == [ "/usr/bin/ssh", "-oConnectTimeout=10", "-oStrictHostKeyChecking=no", "-oUserKnownHostsFile=/dev/null", "-oBatchMode=yes", "-oPasswordAuthentication=no", "-p{}".format(sshd_ports[0]), "-i", "{}/host_key".format(workspace), "-tt", "{}@127.0.0.1".format(getpass.getuser()), "sleep", "1" ]
def integration_test(ssh_runner, dcos_dns, master_list, agent_list, region, registry_host, test_minuteman, test_dns_search, ci_flags): """Runs integration test on host Note: check_results() will raise AssertionError if test fails Args: ssh_runner: instance of ssh.ssh_runner.MultiRunner dcos_dns: string representing IP of DC/OS DNS host master_list: string of comma separated master addresses region: string indicating AWS region in which cluster is running agent_list: string of comma separated agent addresses registry_host: string for address where marathon can pull test app test_minuteman: if set to True then test for minuteman service test_dns_search: if set to True, test for deployed mesos DNS app ci_flags: optional additional string to be passed to test """ marker_args = '-m "not minuteman"' if test_minuteman: marker_args = '' run_test_chain = CommandChain('run_test') dns_search = 'true' if test_dns_search else 'false' test_cmd = [ 'docker', 'run', '-v', '/home/centos/integration_test.py:/integration_test.py', '-e', 'DCOS_DNS_ADDRESS=http://' + dcos_dns, '-e', 'MASTER_HOSTS=' + ','.join(master_list), '-e', 'PUBLIC_MASTER_HOSTS=' + ','.join(master_list), '-e', 'SLAVE_HOSTS=' + ','.join(agent_list), '-e', 'REGISTRY_HOST=' + registry_host, '-e', 'DCOS_VARIANT=default', '-e', 'DNS_SEARCH=' + dns_search, '-e', 'AWS_ACCESS_KEY_ID=' + AWS_ACCESS_KEY_ID, '-e', 'AWS_SECRET_ACCESS_KEY=' + AWS_SECRET_ACCESS_KEY, '-e', 'AWS_REGION=' + region, '--net=host', 'py.test', 'py.test', '-vv', ci_flags, marker_args, '/integration_test.py' ] print("To run this test again, ssh to test node and run:\n{}".format( ' '.join(test_cmd))) run_test_chain.add_execute(test_cmd) check_results(run_loop(ssh_runner, run_test_chain), force_print=True)
def get_local_addresses(ssh_runner, remote_dir): """Uses checked-in IP detect script to report local IP mapping Also functions as a test to verify cluster is up and accessible Args: ssh_runner: instance of ssh.ssh_runner.MultiRunner remote_dir (str): path on hosts for ip-detect to be copied and run in Returns: dict[public_IP] = local_IP """ def remote(path): return remote_dir + '/' + path ip_detect_script = pkg_resources.resource_filename('gen', 'ip-detect/aws.sh') ip_map_chain = CommandChain('ip_map') ip_map_chain.add_copy(ip_detect_script, remote('ip-detect.sh')) ip_map_chain.add_execute(['bash', remote('ip-detect.sh')]) mapping = {} result = run_loop(ssh_runner, ip_map_chain) # Check the running was successful check_results(copy.deepcopy(result)) # Gather the local IP addresses for host_result in result: host, data = host_result[-1].popitem( ) # Grab the last command trigging the script local_ip = data['stdout'][0].rstrip() assert local_ip != '', "Didn't get a valid IP for host {}:\n{}".format( host, data) mapping[host.split(":")[0]] = local_ip return mapping
def test_command_chain(): chain = CommandChain('test') chain.add_execute(['cmd2']) chain.add_copy('/local', '/remote') chain.prepend_command(['cmd1']) chain.add_execute(['cmd3']) assert chain.get_commands() == [('execute', ['cmd1'], None, None), ('execute', ['cmd2'], None, None), ('copy', '/local', '/remote', False, False, None), ('execute', ['cmd3'], None, None)]
def test_setup(ssh_runner, registry, remote_dir, use_zk_backend): """Transfer resources and issues commands on host to build test app, host it on a docker registry, and prepare the integration_test container Args: ssh_runner: instance of ssh.ssh_runner.MultiRunner registry (str): address of registry host that is visible to test nodes remote_dir (str): path to be used for setup and file transfer on host Returns: result from async chain that can be checked later for success """ test_server_docker = pkg_filename('docker/test_server/Dockerfile') test_server_script = pkg_filename('docker/test_server/test_server.py') pytest_docker = pkg_filename('docker/py.test/Dockerfile') test_script = pkg_filename('integration_test.py') test_setup_chain = CommandChain('test_setup') if use_zk_backend: test_setup_chain.add_execute([ 'sudo', 'docker', 'run', '-d', '-p', '2181:2181', '-p', '2888:2888', '-p', '3888:3888', 'jplock/zookeeper' ]) def remote(path): return remote_dir + '/' + path # Create test application test_setup_chain.add_execute(['mkdir', '-p', remote('test_server')]) test_setup_chain.add_copy(test_server_docker, remote('test_server/Dockerfile')) test_setup_chain.add_copy(test_server_script, remote('test_server/test_server.py')) test_setup_chain.add_execute([ 'docker', 'run', '-d', '-p', '5000:5000', '--restart=always', '--name', 'registry', 'registry:2' ]) test_setup_chain.add_execute([ 'cd', remote('test_server'), '&&', 'docker', 'build', '-t', '{}:5000/test_server'.format(registry), '.' ]) test_setup_chain.add_execute( ['docker', 'push', "{}:5000/test_server".format(registry)]) test_setup_chain.add_execute(['rm', '-rf', remote('test_server')]) # Create pytest/integration test instance on remote test_setup_chain.add_execute(['mkdir', '-p', remote('py.test')]) test_setup_chain.add_copy(pytest_docker, remote('py.test/Dockerfile')) test_setup_chain.add_copy(test_script, remote('integration_test.py')) test_setup_chain.add_execute([ 'cd', remote('py.test'), '&&', 'docker', 'build', '-t', 'py.test', '.' ]) test_setup_chain.add_execute(['rm', '-rf', remote('py.test')]) check_results(run_loop(ssh_runner, test_setup_chain))
def _run_chain_command(self, chain: CommandChain, host, chain_result): # Prepare status json if self.async_delegate is not None: log.debug('Preparing a status json') self.async_delegate.prepare_status(chain.namespace, self.__targets) host_status = 'hosts_success' host_port = '{}:{}'.format(host.ip, host.port) command_map = { CommandChain.execute_flag: self.run_async, CommandChain.copy_flag: self.copy_async } process_exit_code_map = { None: { 'host_status': 'terminated', 'host_status_count': 'hosts_terminated' }, 0: { 'host_status': 'success', 'host_status_count': 'hosts_success' }, 'failed': { 'host_status': 'failed', 'host_status_count': 'hosts_failed' } } for command in chain.get_commands(): stage = command[-1] if stage is not None: # a stage can be a function which takes a Node() object and does evaluation if callable(stage): stage = stage(host) log.debug('{}: {}'.format(host_port, stage)) future = asyncio.Future() if self.async_delegate is not None: log.debug('Using async_delegate with callback') callback_called = asyncio.Future() future.add_done_callback(lambda future: self.async_delegate.on_update(future, callback_called)) # command[0] is a type of a command, could be CommandChain.execute_flag, CommandChain.copy_flag result = yield from command_map.get(command[0], None)(host, command, chain.namespace, future, stage) status = process_exit_code_map.get(result[host_port]['returncode'], process_exit_code_map['failed']) host_status = status['host_status'] if self.async_delegate is not None: # We need to make sure the callback was executed before we can proceed further # 5 seconds should be enough for a callback. try: yield from asyncio.wait_for(callback_called, 5) except asyncio.TimeoutError: log.error('Callback did not execute within 5 sec') host_status = 'terminated' break _, result, host_object = future.result() chain_result.append(result) if host_status != 'success': break if self.async_delegate is not None: # Update chain status. self.async_delegate.on_done(chain.namespace, result, host_status=host_status)
def prep_hosts(ssh_runner, registry, minuteman_enabled=False): """Runs steps so that nodes can pass preflight checks. Nodes are expected to either use the custom AMI or have install-prereqs run on them. Additionally, Note: break_prereqs is run before this always Args: ssh_runner: instance of ssh.ssh_runner.MultiRunner registry: string to configure hosts with trusted registry for app deployment minuteman_enabled: if True, minuteman will be available after DC/OS install """ host_prep_chain = CommandChain('host_prep') host_prep_chain.add_execute([ 'sudo', 'sed', '-i', "'/ExecStart=\/usr\/bin\/docker/ !b; s/$/ --insecure-registry={}:5000/'" .format(registry), '/etc/systemd/system/docker.service.d/execstart.conf' ]) host_prep_chain.add_execute(['sudo', 'systemctl', 'daemon-reload']) host_prep_chain.add_execute(['sudo', 'systemctl', 'restart', 'docker']) host_prep_chain.add_execute(['sudo', 'groupadd', '-g', '65500', 'nogroup']) host_prep_chain.add_execute(['sudo', 'usermod', '-aG', 'docker', 'centos']) if minuteman_enabled: host_prep_chain.add_execute( ['sudo', 'mkdir', '-p', '/etc/mesosphere/roles']) host_prep_chain.add_execute( ['sudo', 'touch', '/etc/mesosphere/roles/minuteman']) check_results(run_loop(ssh_runner, host_prep_chain))
def main(): logging.basicConfig(level=logging.DEBUG) options = check_environment() host_list = None vpc = None # Set if the test owns the VPC if options.host_list is None: vpc = make_vpc(use_bare_os=options.test_install_prereqs) host_list = vpc.hosts() else: host_list = options.host_list assert os.path.exists( 'ssh_key'), 'Valid SSH key for hosts must be in working dir!' # key must be chmod 600 for SSH lib to use os.chmod('ssh_key', stat.S_IREAD | stat.S_IWRITE) # Create custom SSH Runnner to help orchestrate the test ssh_user = '******' ssh_key_path = 'ssh_key' remote_dir = '/home/centos' def make_runner(host_list): """process_timeout must be large enough for integration_test.py to run """ return MultiRunner(host_list, ssh_user=ssh_user, ssh_key_path=ssh_key_path, process_timeout=1200, async_delegate=SyncCmdDelegate()) all_host_runner = make_runner(host_list) test_host_runner = make_runner([host_list[0]]) dcos_host_runner = make_runner(host_list[1:]) print('Checking that hosts are accessible') local_ip = get_local_addresses(all_host_runner, remote_dir) print("VPC hosts: {}".format(host_list)) # use first node as bootstrap node, second node as master, all others as agents registry_host = local_ip[host_list[0]] master_list = [local_ip[_] for _ in host_list[1:2]] agent_list = [local_ip[_] for _ in host_list[2:]] if options.use_api: installer = test_util.installer_api_test.DcosApiInstaller() if not options.test_install_prereqs: # If we dont want to test the prereq install, use offline mode to avoid it installer.offline_mode = True else: installer = test_util.installer_api_test.DcosCliInstaller() # If installer_url is not set, then no downloading occurs installer.setup_remote(host_list[0], ssh_user, ssh_key_path, remote_dir + '/dcos_generate_config.sh', download_url=options.installer_url) if options.do_setup: host_prep_chain = CommandChain('host_prep') host_prep_chain.add_execute([ 'sudo', 'sed', '-i', "'/ExecStart=\/usr\/bin\/docker/ !b; s/$/ --insecure-registry={}:5000/'" .format(registry_host), '/etc/systemd/system/docker.service.d/execstart.conf' ]) host_prep_chain.add_execute(['sudo', 'systemctl', 'daemon-reload']) host_prep_chain.add_execute(['sudo', 'systemctl', 'restart', 'docker']) host_prep_chain.add_execute( ['sudo', 'usermod', '-aG', 'docker', 'centos']) check_results(run_loop(test_host_runner, host_prep_chain)) # Retrieve and test the password hash before starting web server test_pass = '******' hash_passwd = installer.get_hashed_password(test_pass) assert passlib.hash.sha512_crypt.verify( test_pass, hash_passwd), 'Hash does not match password' if options.do_setup and options.use_api: installer.start_web_server() print("Configuring install...") with open(pkg_resources.resource_filename( "gen", "ip-detect/aws.sh")) as ip_detect_fh: ip_detect_script = ip_detect_fh.read() with open('ssh_key', 'r') as key_fh: ssh_key = key_fh.read() # Using static exhibitor is the only option in the GUI installer if options.use_api: zk_host = None # causes genconf to use static exhibitor backend else: zk_host = registry_host + ':2181' # use first node as independent test/bootstrap node, second node as master, all others as slaves installer.genconf(zk_host=zk_host, master_list=master_list, agent_list=agent_list, ip_detect_script=ip_detect_script, ssh_user=ssh_user, ssh_key=ssh_key) # Test install-prereqs. This may take up 15 minutes... if options.test_install_prereqs: installer.install_prereqs() if options.test_install_prereqs_only: if vpc: vpc.delete() sys.exit(0) test_setup_handler = None if options.do_setup: print("Making sure prereqs are broken...") break_prereqs(all_host_runner) print('Check that --preflight gives an error') installer.preflight(expect_errors=True) print("Prepping all hosts...") prep_hosts(dcos_host_runner, registry=registry_host, minuteman_enabled=options.minuteman_enabled) # This will setup the integration test and its resources print('Setting up test node while deploy runs...') # TODO: remove calls to both multiprocessing and asyncio # at time of writing block=False only supported for JSON delegates test_setup_handler = multiprocessing.Process( target=test_setup, args=(test_host_runner, registry_host, remote_dir, not options.use_api)) # Wait for this to finish later as it is not required for deploy and preflight test_setup_handler.start() if not options.test_install_prereqs: # If we ran the prereq install test, then we already used preflight # Avoid running preflight twice in this case print("Running Preflight...") installer.preflight() print("Running Deploy...") installer.deploy() # If we needed setup, wait for it to finish if test_setup_handler: test_setup_handler.join() print("Running Postflight") installer.postflight() # Runs dcos-image/integration_test.py inside the cluster print("Test host: {}@{}:22".format(ssh_user, host_list[0])) integration_test( test_host_runner, region=vpc.get_region() if vpc else DEFAULT_AWS_REGION, dcos_dns=master_list[0], master_list=master_list, agent_list=agent_list, registry_host=registry_host, # Setting dns_search: mesos not currently supported in API test_dns_search=not options.use_api, test_minuteman=options.minuteman_enabled, ci_flags=options.ci_flags) # TODO(cmaloney): add a `--healthcheck` option which runs dcos-diagnostics # on every host to see if they are working. print("Test successsful!") # Delete the cluster if all was successful to minimize potential costs. # Failed clusters the hosts will continue running if vpc is not None: vpc.delete()
def _run_chain_command(self, chain: CommandChain, host, chain_result): # Prepare status json if self.async_delegate is not None: log.debug('Preparing a status json') self.async_delegate.prepare_status(chain.namespace, self.__targets) host_status = 'hosts_success' host_port = '{}:{}'.format(host.ip, host.port) command_map = { CommandChain.execute_flag: self.run_async, CommandChain.copy_flag: self.copy_async } process_exit_code_map = { None: { 'host_status': 'terminated', 'host_status_count': 'hosts_terminated' }, 0: { 'host_status': 'success', 'host_status_count': 'hosts_success' }, 'failed': { 'host_status': 'failed', 'host_status_count': 'hosts_failed' } } for command in chain.get_commands(): stage = command[-1] if stage is not None: # a stage can be a function which takes a Node() object and does evaluation if callable(stage): stage = stage(host) log.debug('{}: {}'.format(host_port, stage)) future = asyncio.Future() if self.async_delegate is not None: log.debug('Using async_delegate with callback') callback_called = asyncio.Future() future.add_done_callback(lambda future: self.async_delegate. on_update(future, callback_called)) # command[0] is a type of a command, could be CommandChain.execute_flag, CommandChain.copy_flag result = yield from command_map.get(command[0], None)(host, command, chain.namespace, future, stage) status = process_exit_code_map.get(result[host_port]['returncode'], process_exit_code_map['failed']) host_status = status['host_status'] if self.async_delegate is not None: # We need to make sure the callback was executed before we can proceed further # 5 seconds should be enough for a callback. try: yield from asyncio.wait_for(callback_called, 5) except asyncio.TimeoutError: log.error('Callback did not execute within 5 sec') host_status = 'terminated' break _, result, host_object = future.result() chain_result.append(result) if host_status != 'success': break if self.async_delegate is not None: # Update chain status. self.async_delegate.on_done(chain.namespace, result, host_status=host_status)
def main(): logging.basicConfig(level=logging.DEBUG) options = check_environment() host_list = None vpc = None # Set if the test owns the VPC if options.host_list is None: vpc = make_vpc(use_bare_os=options.test_install_prereqs) host_list = vpc.hosts() else: host_list = options.host_list assert os.path.exists('ssh_key'), 'Valid SSH key for hosts must be in working dir!' # key must be chmod 600 for SSH lib to use os.chmod('ssh_key', stat.S_IREAD | stat.S_IWRITE) # Create custom SSH Runnner to help orchestrate the test ssh_user = '******' ssh_key_path = 'ssh_key' remote_dir = '/home/centos' def make_runner(host_list): """process_timeout must be large enough for integration_test.py to run """ return MultiRunner( host_list, ssh_user=ssh_user, ssh_key_path=ssh_key_path, process_timeout=1200, async_delegate=SyncCmdDelegate()) all_host_runner = make_runner(host_list) test_host_runner = make_runner([host_list[0]]) dcos_host_runner = make_runner(host_list[1:]) print('Checking that hosts are accessible') local_ip = get_local_addresses(all_host_runner, remote_dir) print("VPC hosts: {}".format(host_list)) # use first node as bootstrap node, second node as master, all others as agents registry_host = local_ip[host_list[0]] master_list = [local_ip[_] for _ in host_list[1:2]] agent_list = [local_ip[_] for _ in host_list[2:]] if options.use_api: installer = test_util.installer_api_test.DcosApiInstaller() if not options.test_install_prereqs: # If we dont want to test the prereq install, use offline mode to avoid it installer.offline_mode = True else: installer = test_util.installer_api_test.DcosCliInstaller() # If installer_url is not set, then no downloading occurs installer.setup_remote( host_list[0], ssh_user, ssh_key_path, remote_dir+'/dcos_generate_config.sh', download_url=options.installer_url) if options.do_setup: host_prep_chain = CommandChain('host_prep') host_prep_chain.add_execute([ 'sudo', 'sed', '-i', "'/ExecStart=\/usr\/bin\/docker/ !b; s/$/ --insecure-registry={}:5000/'".format(registry_host), '/etc/systemd/system/docker.service.d/execstart.conf']) host_prep_chain.add_execute(['sudo', 'systemctl', 'daemon-reload']) host_prep_chain.add_execute(['sudo', 'systemctl', 'restart', 'docker']) host_prep_chain.add_execute(['sudo', 'usermod', '-aG', 'docker', 'centos']) check_results(run_loop(test_host_runner, host_prep_chain)) # Retrieve and test the password hash before starting web server test_pass = '******' hash_passwd = installer.get_hashed_password(test_pass) assert passlib.hash.sha512_crypt.verify(test_pass, hash_passwd), 'Hash does not match password' if options.do_setup and options.use_api: installer.start_web_server() print("Configuring install...") with open(pkg_resources.resource_filename("gen", "ip-detect/aws.sh")) as ip_detect_fh: ip_detect_script = ip_detect_fh.read() with open('ssh_key', 'r') as key_fh: ssh_key = key_fh.read() # Using static exhibitor is the only option in the GUI installer if options.use_api: zk_host = None # causes genconf to use static exhibitor backend else: zk_host = registry_host + ':2181' # use first node as independent test/bootstrap node, second node as master, all others as slaves installer.genconf( zk_host=zk_host, master_list=master_list, agent_list=agent_list, ip_detect_script=ip_detect_script, ssh_user=ssh_user, ssh_key=ssh_key) # Test install-prereqs. This may take up 15 minutes... if options.test_install_prereqs: installer.install_prereqs() if options.test_install_prereqs_only: if vpc: vpc.delete() sys.exit(0) test_setup_handler = None if options.do_setup: print("Making sure prereqs are broken...") break_prereqs(all_host_runner) print('Check that --preflight gives an error') installer.preflight(expect_errors=True) print("Prepping all hosts...") prep_hosts(dcos_host_runner, registry=registry_host, minuteman_enabled=options.minuteman_enabled) # This will setup the integration test and its resources print('Setting up test node while deploy runs...') # TODO: remove calls to both multiprocessing and asyncio # at time of writing block=False only supported for JSON delegates test_setup_handler = multiprocessing.Process( target=test_setup, args=(test_host_runner, registry_host, remote_dir, not options.use_api)) # Wait for this to finish later as it is not required for deploy and preflight test_setup_handler.start() if not options.test_install_prereqs: # If we ran the prereq install test, then we already used preflight # Avoid running preflight twice in this case print("Running Preflight...") installer.preflight() print("Running Deploy...") installer.deploy() # If we needed setup, wait for it to finish if test_setup_handler: test_setup_handler.join() print("Running Postflight") installer.postflight() # Runs dcos-image/integration_test.py inside the cluster print("Test host: {}@{}:22".format(ssh_user, host_list[0])) integration_test( test_host_runner, region=vpc.get_region() if vpc else DEFAULT_AWS_REGION, dcos_dns=master_list[0], master_list=master_list, agent_list=agent_list, registry_host=registry_host, # Setting dns_search: mesos not currently supported in API test_dns_search=not options.use_api, test_minuteman=options.minuteman_enabled, ci_flags=options.ci_flags) # TODO(cmaloney): add a `--healthcheck` option which runs dcos-diagnostics # on every host to see if they are working. print("Test successsful!") # Delete the cluster if all was successful to minimize potential costs. # Failed clusters the hosts will continue running if vpc is not None: vpc.delete()
def prep_hosts(ssh_runner, registry, minuteman_enabled=False): """Runs steps so that nodes can pass preflight checks. Nodes are expected to either use the custom AMI or have install-prereqs run on them. Additionally, Note: break_prereqs is run before this always Args: ssh_runner: instance of ssh.ssh_runner.MultiRunner registry: string to configure hosts with trusted registry for app deployment minuteman_enabled: if True, minuteman will be available after DC/OS install """ host_prep_chain = CommandChain('host_prep') host_prep_chain.add_execute([ 'sudo', 'sed', '-i', "'/ExecStart=\/usr\/bin\/docker/ !b; s/$/ --insecure-registry={}:5000/'".format(registry), '/etc/systemd/system/docker.service.d/execstart.conf']) host_prep_chain.add_execute(['sudo', 'systemctl', 'daemon-reload']) host_prep_chain.add_execute(['sudo', 'systemctl', 'restart', 'docker']) host_prep_chain.add_execute(['sudo', 'groupadd', '-g', '65500', 'nogroup']) host_prep_chain.add_execute(['sudo', 'usermod', '-aG', 'docker', 'centos']) if minuteman_enabled: host_prep_chain.add_execute(['sudo', 'mkdir', '-p', '/etc/mesosphere/roles']) host_prep_chain.add_execute(['sudo', 'touch', '/etc/mesosphere/roles/minuteman']) check_results(run_loop(ssh_runner, host_prep_chain))
def test_setup(ssh_runner, registry, remote_dir, use_zk_backend): """Transfer resources and issues commands on host to build test app, host it on a docker registry, and prepare the integration_test container Args: ssh_runner: instance of ssh.ssh_runner.MultiRunner registry (str): address of registry host that is visible to test nodes remote_dir (str): path to be used for setup and file transfer on host Returns: result from async chain that can be checked later for success """ test_server_docker = pkg_filename('docker/test_server/Dockerfile') test_server_script = pkg_filename('docker/test_server/test_server.py') pytest_docker = pkg_filename('docker/py.test/Dockerfile') test_script = pkg_filename('integration_test.py') test_setup_chain = CommandChain('test_setup') if use_zk_backend: test_setup_chain.add_execute([ 'sudo', 'docker', 'run', '-d', '-p', '2181:2181', '-p', '2888:2888', '-p', '3888:3888', 'jplock/zookeeper']) def remote(path): return remote_dir + '/' + path # Create test application test_setup_chain.add_execute(['mkdir', '-p', remote('test_server')]) test_setup_chain.add_copy(test_server_docker, remote('test_server/Dockerfile')) test_setup_chain.add_copy(test_server_script, remote('test_server/test_server.py')) test_setup_chain.add_execute([ 'docker', 'run', '-d', '-p', '5000:5000', '--restart=always', '--name', 'registry', 'registry:2']) test_setup_chain.add_execute([ 'cd', remote('test_server'), '&&', 'docker', 'build', '-t', '{}:5000/test_server'.format(registry), '.']) test_setup_chain.add_execute(['docker', 'push', "{}:5000/test_server".format(registry)]) test_setup_chain.add_execute(['rm', '-rf', remote('test_server')]) # Create pytest/integration test instance on remote test_setup_chain.add_execute(['mkdir', '-p', remote('py.test')]) test_setup_chain.add_copy(pytest_docker, remote('py.test/Dockerfile')) test_setup_chain.add_copy(test_script, remote('integration_test.py')) test_setup_chain.add_execute([ 'cd', remote('py.test'), '&&', 'docker', 'build', '-t', 'py.test', '.']) test_setup_chain.add_execute(['rm', '-rf', remote('py.test')]) check_results(run_loop(ssh_runner, test_setup_chain))