def sh(self): # we need verbose to see the prompt after running shell command Config.set('verbose', True) print('Seed bash') run_shell_command( f'{engine_compose()} -f {Config.get("docker_yaml")} exec seed bash' )
def down(self): if engine() == 'podman': run_shell_command( f'{engine_compose()} -f {Config.get("podman_yaml")} down') else: run_shell_command( f'{engine_compose()} -f {Config.get("docker_yaml")} down') print('Successfully killed all boxes')
def get_box_image(): print('Getting box image') if engine() == 'docker': run_shell_command( f'{engine()} build -t cephadm-box -f DockerfileDocker .') else: run_shell_command( f'{engine()} build -t cephadm-box -f DockerfilePodman .') print('Box image added')
def setup(self): if engine() == 'podman': run_shell_command( 'pip3 install https://github.com/containers/podman-compose/archive/devel.tar.gz' ) check_cgroups() check_selinux() get_ceph_image() get_box_image()
def _create_py_venv(self, env_name): py_pkgs = set() for pod in self.pods: if pod.env == env_name: py_pkgs.update(set(pod.required_python_modules)) env_path = os.path.join(self.venv_root, env_name) if not os.path.isdir(env_path): os.makedirs(env_path) # recursive mkdir if needed for cmd in [ 'python -m virtualenv {}'.format(env_path), 'source {}/bin/activate'.format(env_path), 'pip install {}'.format(' '.join(py_pkgs)), 'deactivate' ]: util.run_shell_command(cmd)
def deploy(self): data = Config.get('data') hostname = Config.get('hostname') vg = Config.get('vg') if not hostname: # assume this host hostname = run_shell_command('hostname') if vg: # deploy with vg lvs = json.loads(run_shell_command('lvs --reportformat json')) for lv in lvs['report'][0]['lv']: if lv['vg_name'] == vg: deploy_osd(f'{vg}/{lv["lv_name"]}', hostname) else: deploy_osd(data, hostname)
def deploy_osds_in_vg(vg: str): """ rotate host will deploy each osd in a different host deploying osds will not succeed with starting services so this makes another process to run on the background """ if inside_container(): lvs = json.loads(run_shell_command('lvs --reportformat json')) # distribute osds per host hosts = get_orch_hosts() host_index = 0 for lv in lvs['report'][0]['lv']: if lv['vg_name'] == vg: deployed = False while not deployed: deployed = deploy_osd( f'{vg}/{lv["lv_name"]}', hosts[host_index]['hostname'] ) host_index = (host_index + 1) % len(hosts) else: verbose = '-v' if Config.get('verbose') else '' print('Redirecting deploy osd in vg to inside container') run_dc_shell_command( f'/cephadm/box/box.py {verbose} osd deploy --vg {vg}', 1, 'seed' )
def nc_dump_axis(cls, ax_name, in_file=None, cwd=None, dry_run=False): # OK for 4.7.6, works on 4.5.4 if "--trd" flag removed ax_vals = util.run_shell_command( "ncks --trd -H -V -v '{}' {}".format(ax_name, in_file), cwd=cwd, dry_run=dry_run ) return [float(val) for val in ax_vals if val]
def check_selinux(): selinux = run_shell_command('getenforce') if 'Disabled' not in selinux: print( colored( 'selinux should be disabled, please disable it if you ' 'don\'t want unexpected behaviour.', Colors.WARNING))
def create_all_environments(self): try: _ = util.run_shell_command( '{}/conda_env_setup.sh -c "{}" -d "{}" --all'.format( self.conda_dir, self.conda_exe, self.conda_env_root)) except: raise
def _make_tar_file(self, tar_dest_dir): """Make tar file of web/bitmap output. """ out_file = os.path.join(tar_dest_dir, self.MODEL_WK_DIR+'.tar') if not self.file_overwrite: out_file, _ = util_mdtf.bump_version(out_file) print("Creating {}.".format(out_file)) elif os.path.exists(out_file): print('Overwriting {}.'.format(out_file)) tar_flags = ["--exclude=.{}".format(s) for s in ['netCDF','nc','ps','PS','eps']] tar_flags = ' '.join(tar_flags) util.run_shell_command( 'tar {} -czf {} -C {} .'.format(tar_flags, out_file, self.MODEL_WK_DIR), dry_run = self.dry_run ) return out_file
def start(self): osds = Config.get('osds') hosts = Config.get('hosts') # ensure boxes don't exist run_shell_command('docker-compose down') print('Checking docker images') if not image_exists(CEPH_IMAGE): get_ceph_image() if not image_exists(BOX_IMAGE): get_box_image() if not Config.get('skip_create_loop'): print( 'Adding logical volumes (block devices) in loopback device...') osd.create_loopback_devices(osds) print(f'Added {osds} logical volumes in a loopback device') print('Starting containers') dcflags = '-f docker-compose.yml' if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'): dcflags += ' -f docker-compose.cgroup1.yml' run_shell_command( f'docker-compose {dcflags} up --scale hosts={hosts} -d') run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1') run_shell_command('sudo iptables -P FORWARD ACCEPT') print('Seting up host ssh servers') ips = get_host_ips() print(ips) for h in range(hosts): host._setup_ssh(h + 1) verbose = '-v' if Config.get('verbose') else '' skip_deploy = '--skip_deploy_osds' if Config.get( 'skip_deploy_osds') else '' skip_monitoring_stack = '--skip_monitoring_stack' if Config.get( 'skip_monitoring_stack') else '' skip_dashboard = '--skip_dashboard' if Config.get( 'skip_dashboard') else '' box_bootstrap_command = ( f'/cephadm/box/box.py {verbose} cluster bootstrap ' '--osds {osds} ' '--hosts {hosts} ' f'{skip_deploy} ' f'{skip_dashboard} ' f'{skip_monitoring_stack} ') run_dc_shell_command( f'/cephadm/box/box.py {verbose} cluster bootstrap --osds {osds} --hosts {hosts} {skip_deploy}', 1, 'seed') host._copy_cluster_ssh_key(ips) print('Bootstrap finished successfully')
def _copy_cluster_ssh_key(ips: List[str]): if inside_container(): local_ip = run_shell_command('hostname -i') for ip in ips: if ip != local_ip: run_shell_command(('sshpass -p "root" ssh-copy-id -f ' f'-o StrictHostKeyChecking=no -i /etc/ceph/ceph.pub "root@{ip}"')) else: print('Redirecting to _copy_cluster_ssh to container') verbose = '-v' if Config.get('verbose') else '' print(ips) ips = ' '.join(ips) ips = f"{ips}" # assume we only have one seed run_dc_shell_command(f'/cephadm/box/box.py {verbose} host copy_cluster_ssh_key 1 --ips {ips}', 1, 'seed')
def cleanup() -> None: vg = 'vg1' pvs = json.loads(run_shell_command('sudo pvs --reportformat json')) for pv in pvs['report'][0]['pv']: if pv['vg_name'] == vg: device = pv['pv_name'] run_shell_command(f'sudo vgremove -f --yes {vg}') run_shell_command(f'sudo losetup -d {device}') run_shell_command(f'sudo wipefs -af {device}') # FIX: this can fail with excluded filter run_shell_command(f'sudo pvremove -f --yes {device}', expect_error=True) break remove_loop_img()
def create_environment(self, env_name): # check to see if conda env exists, and if not, try to create it conda_prefix = os.path.join(self.conda_env_root, env_name) try: _ = util.run_shell_command('{} env list | grep -qF "{}"'.format( self.conda_exe, conda_prefix)) except: print('Conda env {} not found (grepped for {})'.format( env_name, conda_prefix))
def get_ceph_image(): print('Getting ceph image') run_shell_command(f'{engine()} pull {CEPH_IMAGE}') # update run_shell_command(f'{engine()} build -t {CEPH_IMAGE} docker/ceph') if not os.path.exists('docker/ceph/image'): os.mkdir('docker/ceph/image') remove_ceph_image_tar() run_shell_command(f'{engine()} save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}') run_shell_command(f'chmod 777 {CEPH_IMAGE_TAR}') print('Ceph image added')
def _create_r_venv(self, env_name): r_pkgs = set() for pod in self.pods: if pod.env == env_name: r_pkgs.update(set(pod.required_r_packages)) r_pkg_str = ', '.join(['"' + x + '"' for x in r_pkgs]) if self.r_lib_root != '': env_path = os.path.join(self.r_lib_root, env_name) if not os.path.isdir(env_path): os.makedirs(env_path) # recursive mkdir if needed cmds = [ 'export R_LIBS_USER="******"'.format(env_path), 'Rscript -e \'install.packages(c({}), '.format(r_pkg_str) \ + 'lib=Sys.getenv("R_LIBS_USER"))\'' ] else: cmds = ['Rscript -e \'install.packages(c({}))\''.format(r_pkg_str)] for cmd in cmds: util.run_shell_command(cmd)
def deploy(self): data = Config.get('data') hostname = Config.get('hostname') vg = Config.get('vg') if not hostname: # assume this host hostname = run_shell_command('hostname') if vg: deploy_osds_in_vg(vg) else: deploy_osd(data, hostname)
def image_exists(image_name: str): # extract_tag assert image_name.find(':') image_name, tag = image_name.split(':') images = run_shell_command(f'{engine()} image ls').split('\n') IMAGE_NAME = 0 TAG = 1 for image in images: image = image.split() print(image) print(image_name, tag) if image[IMAGE_NAME] == image_name and image[TAG] == tag: return True return False
def shell_command_wrapper(cmd, **kwargs): print('SHELL RUN:') print(' ', cmd) try: stdout = util.run_shell_command(cmd, **kwargs) except: raise if stdout: print('SHELL STDOUT:') for line in stdout: print(' ', line) else: print('SHELL STDOUT: (no output returned)') return stdout
def get_ceph_image(): print('Getting ceph image') run_shell_command(f'docker pull {CEPH_IMAGE}') # update run_shell_command(f'docker build -t {CEPH_IMAGE} docker/ceph') if not os.path.exists('docker/ceph/image'): os.mkdir('docker/ceph/image') image_tar = 'docker/ceph/image/quay.ceph.image.tar' if not os.path.exists(image_tar): os.remove(image_tar) run_shell_command(f'docker save {CEPH_IMAGE} -o {image_tar}') print('Ceph image added')
def _setup_ssh(container_index): if inside_container(): if not os.path.exists('/root/.ssh/known_hosts'): run_shell_command('ssh-keygen -A') run_shell_command('echo "root:root" | chpasswd') with open('/etc/ssh/sshd_config', 'a+') as f: f.write('PermitRootLogin yes\n') f.write('PasswordAuthentication yes\n') f.flush() run_shell_command('/usr/sbin/sshd') else: print('Redirecting to _setup_ssh to container') verbose = '-v' if Config.get('verbose') else '' run_dc_shell_command(f'/cephadm/box/box.py {verbose} host setup_ssh {container_index}', container_index, 'hosts')
def _call_conda_create(self, env_name): if env_name.startswith(self.env_name_prefix): short_name = env_name[(len(self.env_name_prefix) + 1):] else: short_name = env_name path = '{}/env_{}.yml'.format(self.conda_dir, short_name) if not os.path.exists(path): print("Can't find {}".format(path)) else: conda_prefix = os.path.join(self.conda_env_root, env_name) print('Creating conda env {} in {}'.format(env_name, conda_prefix)) command = \ 'source {}/conda_init.sh {} && '.format( self.conda_dir, self.conda_root ) + '{} env create --force -q -p "{}" -f "{}"'.format( self.conda_exe, conda_prefix, path ) try: _ = util.run_shell_command(command) except: raise
def __init__(self, verbose=0): super(CondaEnvironmentManager, self).__init__(verbose) config = util_mdtf.ConfigManager() self.code_root = config.paths.CODE_ROOT self.conda_dir = os.path.join(self.code_root, 'src','conda') self.env_list = [] for file_ in os.listdir(self.conda_dir): if file_.endswith('.yml'): name, _ = os.path.splitext(file_) self.env_list.append(name.split('env_')[-1]) # find conda executable # conda_init for bash defines conda as a shell function; will get error # if we try to call the conda executable directly try: conda_info = util.run_shell_command( '{}/conda_init.sh {}'.format( self.conda_dir, config.paths.get('conda_root','') )) for line in conda_info: key, val = line.split('=') if key == '_CONDA_EXE': self.conda_exe = val assert os.path.exists(self.conda_exe) elif key == '_CONDA_ROOT': self.conda_root = val except: print("Error: can't find conda.") raise # find where environments are installed if 'conda_env_root' in config.paths: self.conda_env_root = config.paths.conda_env_root if not os.path.isdir(self.conda_env_root): os.makedirs(self.conda_env_root) # recursive mkdir if needed else: # only true in default anaconda install, may need to fix self.conda_env_root = os.path.join(self.conda_root, 'envs')
def _setup_ssh(container_type, container_index): if inside_container(): if not os.path.exists('/root/.ssh/known_hosts'): run_shell_command( 'ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N ""') run_shell_command('echo "root:root" | chpasswd') with open('/etc/ssh/sshd_config', 'a+') as f: f.write('PermitRootLogin yes\n') f.write('PasswordAuthentication yes\n') f.flush() run_shell_command('systemctl restart sshd') else: print('Redirecting to _setup_ssh to container') verbose = '-v' if Config.get('verbose') else '' run_dc_shell_command( f'/cephadm/box/box.py {verbose} --engine {engine()} host setup_ssh {container_type} {container_index}', container_index, container_type, )
def get_box_image(): print('Getting box image') run_shell_command('docker build -t cephadm-box -f Dockerfile .') print('Box image added')
def create_loopback_devices(osds: int) -> None: assert osds size = (5 * osds) + 1 print(f'Using {size}GB of data to store osds') avail_loop = run_shell_command('sudo losetup -f') base_name = os.path.basename(avail_loop) # create loop if we cannot find it if not os.path.exists(avail_loop): num_loops = int( run_shell_command('lsmod | grep loop | awk \'{print $3}\'')) num_loops += 1 run_shell_command(f'mknod {avail_loop} b 7 {num_loops}') if os.path.ismount(avail_loop): os.umount(avail_loop) loop_devices = json.loads( run_shell_command(f'losetup -l -J', expect_error=True)) for dev in loop_devices['loopdevices']: if dev['name'] == avail_loop: run_shell_command(f'sudo losetup -d {avail_loop}') if not os.path.exists('./loop-images'): os.mkdir('loop-images') remove_loop_img() loop_image = Config.get('loop_img') run_shell_command( f'sudo dd if=/dev/zero of={loop_image} bs=1 count=0 seek={size}G') run_shell_command(f'sudo losetup {avail_loop} {loop_image}') # cleanup last call cleanup() run_shell_command(f'sudo pvcreate {avail_loop} ') run_shell_command(f'sudo vgcreate vg1 {avail_loop}') p = int(100 / osds) for i in range(osds): run_shell_command('sudo vgchange --refresh') run_shell_command(f'sudo lvcreate -l {p}%VG --name lv{i} vg1')
def bootstrap(self): print('Running bootstrap on seed') cephadm_path = os.environ.get('CEPHADM_PATH') os.symlink('/cephadm/cephadm', cephadm_path) run_shell_command('systemctl restart docker') # restart to ensure docker is using daemon.json st = os.stat(cephadm_path) os.chmod(cephadm_path, st.st_mode | stat.S_IEXEC) run_shell_command('docker load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar') # cephadm guid error because it sometimes tries to use quay.ceph.io/ceph-ci/ceph:<none> # instead of master's tag run_shell_command('export CEPH_SOURCE_FOLDER=/ceph') run_shell_command('export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:master') run_shell_command('echo "export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:master" >> ~/.bashrc') extra_args = [] shared_ceph_folder = os.environ.get('SHARED_CEPH_FOLDER') if shared_ceph_folder: extra_args.extend(['--shared_ceph_folder', shared_ceph_folder]) extra_args.append('--skip-pull') # cephadm prints in warning, let's redirect it to the output so shell_command doesn't # complain extra_args.append('2>&0') extra_args = ' '.join(extra_args) skip_monitoring_stack = '--skip_monitoring_stack' if Config.get('skip_monitoring_stack') else '' skip_dashboard = '--skip_dashboard' if Config.get('skip_dashboard') else '' fsid = Config.get('fsid') config_folder = Config.get('config_folder') config = Config.get('config') mon_config = Config.get('mon_config') keyring = Config.get('keyring') if not os.path.exists(config_folder): os.mkdir(config_folder) cephadm_bootstrap_command = ( '$CEPHADM_PATH --verbose bootstrap ' '--mon-ip "$(hostname -i)" ' '--allow-fqdn-hostname ' '--initial-dashboard-password admin ' '--dashboard-password-noupdate ' '--shared_ceph_folder /ceph ' '--allow-overwrite ' f'--output-config {config} ' f'--output-keyring {keyring} ' f'--output-config {config} ' f'--fsid "{fsid}" ' '--log-to-file ' f'{skip_dashboard} ' f'{skip_monitoring_stack} ' f'{extra_args} ' ) print('Running cephadm bootstrap...') run_shell_command(cephadm_bootstrap_command) print('Cephadm bootstrap complete') run_shell_command('sudo vgchange --refresh') run_shell_command('cephadm ls') run_shell_command('ln -s /ceph/src/cephadm/box/box.py /usr/bin/box') hostname = run_shell_command('hostname') # NOTE: sometimes cephadm in the box takes a while to update the containers # running in the cluster and it cannot deploy the osds. In this case # run: box -v osd deploy --vg vg1 to deploy osds again. if not Config.get('skip_deploy_osds'): print('Deploying osds...') osds = Config.get('osds') for o in range(osds): osd.deploy_osd(f'vg1/lv{o}', hostname) print('Osds deployed') run_cephadm_shell_command('ceph -s') print('Bootstrap completed!')
def bootstrap(self): print('Running bootstrap on seed') cephadm_path = os.environ.get('CEPHADM_PATH') os.symlink('/cephadm/cephadm', cephadm_path) if engine() == 'docker': # restart to ensure docker is using daemon.json run_shell_command('systemctl restart docker') st = os.stat(cephadm_path) os.chmod(cephadm_path, st.st_mode | stat.S_IEXEC) run_shell_command( f'{engine()} load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar' ) # cephadm guid error because it sometimes tries to use quay.ceph.io/ceph-ci/ceph:<none> # instead of main branch's tag run_shell_command('export CEPH_SOURCE_FOLDER=/ceph') run_shell_command( 'export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main') run_shell_command( 'echo "export CEPHADM_IMAGE=quay.ceph.io/ceph-ci/ceph:main" >> ~/.bashrc' ) extra_args = [] extra_args.append('--skip-pull') # cephadm prints in warning, let's redirect it to the output so shell_command doesn't # complain extra_args.append('2>&0') extra_args = ' '.join(extra_args) skip_monitoring_stack = ('--skip-monitoring-stack' if Config.get('skip-monitoring-stack') else '') skip_dashboard = '--skip-dashboard' if Config.get( 'skip-dashboard') else '' fsid = Config.get('fsid') config_folder = Config.get('config_folder') config = Config.get('config') keyring = Config.get('keyring') if not os.path.exists(config_folder): os.mkdir(config_folder) cephadm_bootstrap_command = ('$CEPHADM_PATH --verbose bootstrap ' '--mon-ip "$(hostname -i)" ' '--allow-fqdn-hostname ' '--initial-dashboard-password admin ' '--dashboard-password-noupdate ' '--shared_ceph_folder /ceph ' '--allow-overwrite ' f'--output-config {config} ' f'--output-keyring {keyring} ' f'--output-config {config} ' f'--fsid "{fsid}" ' '--log-to-file ' f'{skip_dashboard} ' f'{skip_monitoring_stack} ' f'{extra_args} ') print('Running cephadm bootstrap...') run_shell_command(cephadm_bootstrap_command) print('Cephadm bootstrap complete') run_shell_command('sudo vgchange --refresh') run_shell_command('cephadm ls') run_shell_command('ln -s /ceph/src/cephadm/box/box.py /usr/bin/box') run_cephadm_shell_command('ceph -s') print('Bootstrap completed!')
def start(self): check_cgroups() check_selinux() osds = Config.get('osds') hosts = Config.get('hosts') # ensure boxes don't exist self.down() # podman is ran without sudo if engine() == 'podman': I_am = run_shell_command('whoami') if 'root' in I_am: print(root_error_msg) sys.exit(1) print('Checking docker images') if not image_exists(CEPH_IMAGE): get_ceph_image() if not image_exists(BOX_IMAGE): get_box_image() used_loop = "" if not Config.get('skip_create_loop'): print( 'Adding logical volumes (block devices) in loopback device...') used_loop = osd.create_loopback_devices(osds) print(f'Added {osds} logical volumes in a loopback device') loop_device_arg = "" if used_loop: loop_device_arg = f'--device {used_loop} -v /dev/vg1:/dev/vg1:Z' for o in range(osds): loop_device_arg += f' --device /dev/dm-{o}' print('Starting containers') if engine() == 'docker': dcflags = f'-f {Config.get("docker_yaml")}' if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'): dcflags += f' -f {Config.get("docker_v1_yaml")}' run_shell_command( f'{engine_compose()} {dcflags} up --scale hosts={hosts} -d') else: run_shell_command( f'{engine_compose()} -f {Config.get("podman_yaml")} --podman-run-args "--group-add keep-groups --network=host --device /dev/fuse -it {loop_device_arg}" up --scale hosts={hosts} -d' ) run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1') run_shell_command('sudo iptables -P FORWARD ACCEPT') # don't update clock with chronyd / setup chronyd on all boxes chronyd_setup = """ sed 's/$OPTIONS/-x/g' /usr/lib/systemd/system/chronyd.service -i systemctl daemon-reload systemctl start chronyd systemctl status --no-pager chronyd """ for h in range(hosts): run_dc_shell_commands(h + 1, 'hosts', chronyd_setup) run_dc_shell_commands(1, 'seed', chronyd_setup) print('Seting up host ssh servers') for h in range(hosts): host._setup_ssh('hosts', h + 1) host._setup_ssh('seed', 1) verbose = '-v' if Config.get('verbose') else '' skip_deploy = '--skip-deploy-osds' if Config.get( 'skip-deploy-osds') else '' skip_monitoring_stack = ('--skip-monitoring-stack' if Config.get('skip-monitoring-stack') else '') skip_dashboard = '--skip-dashboard' if Config.get( 'skip-dashboard') else '' box_bootstrap_command = ( f'/cephadm/box/box.py {verbose} --engine {engine()} cluster bootstrap ' f'--osds {osds} ' f'--hosts {hosts} ' f'{skip_deploy} ' f'{skip_dashboard} ' f'{skip_monitoring_stack} ') run_dc_shell_command(box_bootstrap_command, 1, 'seed') info = get_boxes_container_info() ips = info['ips'] hostnames = info['hostnames'] print(ips) host._copy_cluster_ssh_key(ips) expanded = Config.get('expanded') if expanded: host._add_hosts(ips, hostnames) # TODO: add osds if expanded and not Config.get('skip-deploy-osds'): if engine() == 'podman': print('osd deployment not supported in podman') else: print('Deploying osds... This could take up to minutes') osd.deploy_osds_in_vg('vg1') print('Osds deployed') dashboard_ip = 'localhost' info = get_boxes_container_info(with_seed=True) if engine() == 'docker': for i in range(info['size']): if 'seed' in info['container_names'][i]: dashboard_ip = info["ips"][i] print( colored(f'dashboard available at https://{dashboard_ip}:8443', Colors.OKGREEN)) print('Bootstrap finished successfully')