def multi(install_lists, task_id): client.hset(task_id, "install_lists", install_lists) # 取一个列表的 type和version, 一批机器的type 和 version 相同. _type = install_lists[0]["type"] version = install_lists[0]["version"] # 因为手动安装是使用一个默认的配置文件, 如果多组机器同时安装, 配置文件需要在 # 所有的机器都安装完成之后删除, 所以用一个队列来保存正在安装的任务. default_key = "default:%s:%s" % (_type, version) client.lpush(default_key, "") # 拷贝默认配置文件. # 不能同时安装两种类型的机器; # 而且还只能是一个版本. cmd = r"sudo /bin/cp -f %s /var/lib/tftpboot/pxelinux.cfg/default" % \ PXELINUX_CFGS[_type][version] rc, so, se = utils.shell(cmd) # 执行安装任务. pool = ThreadPool(MAX_THREAD_NUM) install_results = pool.map(single, install_lists) pool.close() pool.join() # 安装完成出队列. client.rpop(default_key) # 队列为空时, 说明没有任务要执行了, 删除配置文件. if len(client.lrange(default_key, 0, -1)) == 0: cmd = r"sudo /bin/rm -f /var/lib/tftpboot/pxelinux.cfg/default" rc, so, se = utils.shell(cmd) client.hset(task_id, "install_results", install_results) return install_results
def del_tftp(self, mac): cmd = r"sudo /bin/rm -f /var/lib/tftpboot/pxelinux.cfg/%s" % (mac) rc, so, se = utils.shell(cmd) if rc != 0: return False return True
def get_mac(self, nic): cmd = "racadm getsysinfo -s" r = self.ssh_cmd(cmd) mac = 'echo "%s" | grep "%s" | awk \'{print $4}\'' % (r, nic) rc, so, se = utils.shell(mac) format_mac = so.replace(":", "-").lower() constract_mac = "01-%s" % format_mac return constract_mac.strip()
def ip(idc, sn): cmd = '''nslookup idrac-%s.ilo.nosa.me. ddns0.%s01.nosa.me ''' % ( sn, idc) rc, so, se = utils.shell(cmd) if rc != 0: return False return so.strip().splitlines()[-1].split(":")[-1].strip()
def path_type(volume_path): """ 根据 volume_path 判断 volume 类型是 file 还是 block. """ cmd = """virsh vol-info %s |grep "Type" |awk '{print $NF}' """ % volume_path _type = utils.shell(cmd, strip=True) if _type == "file": _type = "qcow2" elif _type == "block": _type = "raw" return _type
def constract_tftp(self, _type, version, mac): """ 拷贝 pxelinux.cfg 配置文件到目标目录. """ cmd = r"sudo /bin/cp -f %s /var/lib/tftpboot/pxelinux.cfg/%s" % ( PXELINUX_CFGS[_type][version], mac) rc, so, se = utils.shell(cmd) if rc != 0: return False return True
def _shell(cmd, _logger=logger): """ 执行命令, 记录日志. """ rc, so, se = utils.shell(cmd) if rc == 0: message = "cmd:%s" % cmd _logger.info(message) else: message = "cmd:%s, error:%s" % (cmd, se) raise Exception(message)
def get(self): """ 返回 Nginx 的分支. """ cmd = "cd %s &&git fetch origin -p &&git branch -r" % NGINX_CONF_DIR rc, so, se = utils.shell(cmd) branches = [ i.strip().replace("origin/", "") for i in so.strip().splitlines() if "origin/HEAD" not in i ] self.write(json.dumps(branches))
def resize(volume_name, volume_size): """ 对存储池的一个 volume 改变大小, 这里只增大, 没法减小. 对于 qcow2 格式, 命令类似: virsh vol-resize vm1_data --capacity 30G --pool vm_storage_pool 而且只能增大, 如果减小的话, 会报类似的错: /usr/bin/qemu-img resize /vm_storage/vm1_data 32212254720) unexpected exit status 1: This image format does not support resize 另外对于 raw 格式, 不能用 virsh vol-resize, 如果想增大, 用如下命令: lvextend -L +10G /dev/vm_storage_pool_vg/vm3 virsh pool-refresh --pool vm_storage_pool 这里不考虑 raw 格式. """ cmd = " virsh vol-resize {volume_name} --capacity {volume_size} --pool {vm_storage_pool}".format( volume_name=volume_name, volume_size=volume_size, vm_storage_pool=STORAGE_POOL) utils.shell(cmd)
def restore_rawos_from_qcow2_image(dst_os, wmi_data): """ 把 qcow2 格式化的系统盘镜像恢复到 raw 格式的系统盘镜像. """ check_mfs() # 拿到需要的文件路径. src_os_url = wmi_data["os"]["tar"] # 用系统盘的 tar 包. src_mbr = get_path_from_filename( get_filename_from_url(wmi_data["partition"]["mbr"]["url"])) src_boot = get_path_from_filename( get_filename_from_url(wmi_data["partition"]["boot"]["url"])) src_lvmheader = get_path_from_filename( get_filename_from_url(wmi_data["partition"]["lvmheader"]["url"])) src_swap_uuid = wmi_data["partition"]["swap"]["uuid"] # 恢复 mbr. utils.restore_mbr(src_mbr, dst_os) # 映射系统盘中的设备, 拿到 boot, swap 和 lvm 设备路径. x = utils.kpartx_av(dst_os) dst_boot, dst_swap, dst_lvm = x[0], x[1], x[2] # kpartx 拿到结果是这个顺序. # 恢复 boot 分区. utils.restore_boot(src_boot, dst_boot) # 恢复 swap 分区. utils.make_swap(dst_swap, src_swap_uuid) # 恢复 lvm header. utils.restore_lvmheader(src_lvmheader, dst_lvm) # 导入目标虚拟机中的 lvm, 拿到 vg name. # vgimportclone 可能会报 fatal: not in vg 的错误, 升级 qemu-kvm 后解决. try: utils.vgimportclone(dst_lvm) except Exception, e: utils.shell("yum -y update qemu-kvm") utils.vgimportclone(dst_lvm)
def post(file_path): """ 上传文件到远程并拿到下载链接. 使用封装过的 wcdn 命令. """ cmd = " wcdn cp -f %s /cdn.internal.nosa.me/nginx_conf_deploy "\ "--no-verbose --md5" % file_path rc, so, se = utils.shell(cmd) if rc != 0: raise Exception(se) return "http://cdn.internal.nosa.me/nginx_conf_deploy/%s"\ % os.path.basename(file_path)
def upload_os_tar(volume_path, file_name): """ 上传系统盘数据, 打出来 tar 包. 此函数创建的系统盘 tar 文件用于在 raw 格式的宿主机上创建虚拟机. """ check_mfs() check_nbd() from libs import volume volume_path_type = volume.path_type(volume_path) if volume_path_type == "qcow2": nbd_device = utils.get_available_nbd_device() utils.connect_nbd_device(nbd_device, volume_path) volume_path = nbd_device try: cmd = "guestfish -a {path} run : list-filesystems".format(path=volume_path) fs_text = utils.shell(cmd) except Exception, e: utils.shell("yum -y update qemu-kvm") fs_text = utils.shell(cmd)
def create(volume_name, volume_size): """ 创建 volume. 老存储结构用类似命令: virsh vol-create-as --pool vm_storage_pool --name vm1.img \ --capacity 20G 新存储结构则用: virsh vol-create-as --pool vm_storage_pool --name vm1.img \ --capacity 20G --allocation 1G --format qcow2 因为没有优雅的方法区分两种结构, 临时用下面命令来区分: df |grep /vm_storage 存在 vm_storage 则是新结构, 否则是老结构. """ cmd = "df |grep /vm_storage" try: utils.shell(cmd) cmd = """ virsh vol-create-as --pool {storage_pool} --name {volume_name} """\ """--capacity {volume_size} --allocation 1G --format qcow2""".format( storage_pool=STORAGE_POOL, volume_name=volume_name, volume_size=volume_size) except Exception, e: cmd = """ virsh vol-create-as --pool {storage_pool} --name {volume_name} """\ """--capacity {volume_size} """.format(storage_pool=STORAGE_POOL, volume_name=volume_name, volume_size=volume_size)
def check_mfs(): """ 通过本机挂载 mfs 上传, 所以要先检查 /mfs 是否已经挂载. """ try: utils.shell("df -h |grep /mfs") # 先判断是否已经挂载 mfs. except Exception, e: cmd = "curl http://download.hy01.nosa.me/download/install_mfs_client.sh |bash" utils.shell(cmd) utils.shell("df -h |grep /mfs")
def path_size(volume_path): """ 根据 volume_path 拿到 volume 大小, 单位是 G. 指令如下: virsh vol-info /vm_storage/vm1 |grep Capacity |awk '{print $2,$3}' """ cmd = "virsh vol-info %s |grep Capacity |awk '{print $2,$3}' " % volume_path return_out = utils.shell(cmd) if "MiB" in return_out or "MB" in return_out: space_total = int(return_out.split()[0].split(".")[0]) / 1024 elif "GiB" in return_out or "GB" in return_out: volume_size = int(return_out.split()[0].split(".")[0]) elif "TiB" in return_out or "TB" in return_out: volume_size = int(return_out.split()[0].split(".")[0]) * 1000 return volume_size # 这里是数字, 不加单位 G 了.
def lips(lb, ip, lips): """ 配置 lb 的 lips. """ # 因为改了 DNS, 先清空本地 known_hosts 文件. cmd = "cat /dev/null >%s" % LOCAL_SSH_KNOWN_HOSTS rc, so, se = utils.shell(cmd) if rc != 0: message = "clean %s failed." % LOCAL_SSH_KNOWN_HOSTS logger.error(message) return False message = "Waiting for network administor to change "\ "network conf for %s" % lb logger.info(message) # 发邮件让网络工程师修改 lb 的网络配置. subject = u"[接入集群]请在一小时之内修改%s的网络设置, 否则集群会建立失败" % lb context = "" mail.mail(None, subject, context) # 检查是否能够 ping 通. time.sleep(120) checkcmd = "ping -c 3 %s &>/dev/null" % ip ret = utils.check_wait_null(checkcmd, timeinit=0, interval=5, timeout=3600) if not ret: logger.error("Ping failed, lb:%s, ip:%s" % (lb, ip)) return False # 配置 lip. cmd = "sudo -i wdconfig lvsfullnat_lip" rc, so, se = utils.remote_cmd(ip, cmd) if rc != 0: message = "Cfg lips failed, lb:%s, error:%s" % (lb, se) logger.error(message) return False cmd = "cd /tmp/post_config/lvsfullnat_lip_config && "\ "sudo sh lvsfullnat_lip_config.sh %s " % " ".join(lips) rc, so, se = utils.remote_cmd(ip, cmd) if rc != 0: message = "Cfg lips failed, lb:%s, error:%s" % (lb, se) logger.error(message) return False return True
def check_nbd(): """ 检查 nbd 模块是否已经加载, 如果没有, 则自动加载(同时安装 qemu). """ try: utils.shell("/sbin/lsmod | grep nbd") except Exception, e: cmd = "curl http://download.hy01.nosa.me/download/install_nbd.sh |bash" utils.shell(cmd) utils.shell("/sbin/lsmod | grep nbd") return
def constract_tftp(self, _type, version, mac): cmd = "sudo wget {url} -O {pxelinux_dir}/{mac}".format( url=PXELINUX_CFGS[_type][version], pxelinux_dir=PXELINUX_DIR, mac=mac) utils.shell(cmd, logger=logger)
def _umount(base_dir): cmd = "df | grep %s | awk '{print $NF}'" % base_dir.rstrip("/") umount_dirs = utils.shell(cmd, strip=True).splitlines() umount_dirs = umount_dirs[::-1] # 反转, 先挂载的后卸载. map(utils.umount, umount_dirs)
因为没有优雅的方法区分两种结构, 临时用下面命令来区分: df |grep /vm_storage 存在 vm_storage 则是新结构, 否则是老结构. """ cmd = "df |grep /vm_storage" try: utils.shell(cmd) cmd = """ virsh vol-create-as --pool {storage_pool} --name {volume_name} """\ """--capacity {volume_size} --allocation 1G --format qcow2""".format( storage_pool=STORAGE_POOL, volume_name=volume_name, volume_size=volume_size) except Exception, e: cmd = """ virsh vol-create-as --pool {storage_pool} --name {volume_name} """\ """--capacity {volume_size} """.format(storage_pool=STORAGE_POOL, volume_name=volume_name, volume_size=volume_size) utils.shell(cmd) cmd = "virsh vol-path --pool {storage_pool} {volume_name}".format( storage_pool=STORAGE_POOL, volume_name=volume_name) volume_path = utils.shell(cmd, strip=True) return volume_path # 这里返回 volume 的路径. def delete(volume_name): """ 删除 volume. """ cmd = ''' virsh vol-delete --pool {storage_pool} {volume_name} '''.format( storage_pool=STORAGE_POOL, volume_name=volume_name) utils.shell(cmd)
dir = "/" if dir == "/": mount_root_cmds = "mount {path} {dir}".format(path=block_path, dir=dir) if dir != "/": mount_cmds += "mount {path} /{dir} ".format(path=block_path, dir=dir) mount_cmds += ": " # / 最先被挂载. mount_cmds = mount_root_cmds + " : " + mount_cmds cmd = "guestfish -a {block_path} run : {mount_cmds} tar-out / {file_path}".format( block_path=volume_path, mount_cmds=mount_cmds, file_path=get_path_from_filename(file_name)) utils.shell(cmd) if volume_path_type == "qcow2": utils.disconnect_nbd_device(nbd_device) return get_url_from_filename(file_name) def _aggregate_log_dir(files): """ 聚合日志. 检查名为 log 或 logs 的目录, 并聚合. """ log_dirs = list()
def vmlist(): cmd = ''' virsh list --all |egrep -i "running|stop|shut" |awk '{print $2}' ''' return_out = utils.shell(cmd) return return_out.splitlines()
if volume.path_type(os_volume_path) == "raw": storage.restore_rawos_from_qcow2_image(os_volume_path, wmi_data) # 数据盘初始化. init_data_volume_cmd = """virt-format -a {data_volume_path} --lvm=/dev/datavg/home --filesystem=ext4""".format( data_volume_path=data_volume_path) try: utils.shell(init_data_volume_cmd) except Exception, e: utils.shell("yum -y update qemu-kvm") utils.shell(init_data_volume_cmd) # tar-in 数据到数据盘. tar_in_cmd = """curl {data_url} | guestfish add {data_volume_path} : run : mount /dev/datavg/home / : tar-in - / compress:gzip""".format( data_url=wmi_data["data"]["url"], data_volume_path=data_volume_path) utils.shell(tar_in_cmd) # volumes 用于创建配置文件. if volume.path_type(os_volume_path) == "qcow2": disk_type = "file" driver_type = "qcow2" source_type = "file" else: disk_type = "block" driver_type = "raw" source_type = "dev" volumes = [{ "file": os_volume_path, "dev": "vda", "disk_type": disk_type, "driver_type": driver_type,
def del_tftp(self, mac): cmd = r"sudo /bin/rm -f {pxelinux_dir}/{mac}".format( pxelinux_dir=PXELINUX_DIR, mac=mac) utils.shell(cmd, logger=logger)
def shutdown(name): """ 把一台 instance 停掉. """ cmd = ''' virsh destroy {name} '''.format(name=name) utils.shell(cmd)
def create(wmi_id, wmi_name, name, excludes): """ 根据一个虚拟机实例创建一个 wmi. 查看虚拟机实例的 blklist, 用下面命令: virsh domblklist --domain vm1 |awk 'NR>2' 第一列是 device, 第二列是 volume_path 然后根据 volume_path 查看其大小: virsh vol-info /vm_storage/vm1 |grep Capacity |awk '{print $2,$3}' 这里要注意的是, 由于我们兼容了两种格式的存储, 用 virsh vol-info volume_path 看到的 Type 是 block 的话, 是 raw 格式, 我们先把它转换成 qcow2 格式, 然后再 上传到远端, 上传成功后返回下载地址. 这里如果 device 是 vda 或者 hda, 则是系统盘, 其余是数据盘. 会把系统盘和数据盘存放到远程存储机器, 并拿到下载 url. """ cmd = "virsh domblklist --domain {name} |awk 'NR>2' ".format(name=name) return_out = utils.shell(cmd, strip=True) device_info = [ (i.split()[0],i.split()[1]) \ for i in return_out.splitlines() \ if i.split()[1] != "-" ] # 如果实例的 volume 大于2, 可能数据有两个 volume, 此时 guestfish add data_volume # 会缺数据, 所有要用 guestfish -d name, 而 -d name 参数需要关机. def _is_running(_name): cmd = """virsh list |grep running |awk '{print $2}'""" runnings = [ i.strip() for i in utils.shell(cmd, strip=True).splitlines() ] return _name in runnings is_running = _is_running(name) if len(device_info) > 2 and is_running: raise Exception("{name} is running".format(name=name)) for device, volume_path in device_info: if (device == "vda" or device == "hda"): if "_os" not in locals(): os_file_name = "{wmi_id}_os".format(wmi_id=wmi_id) url = storage.upload_os(volume_path, os_file_name) _os = { "device": device, "size": "{size}G".format(size=volume.path_size(volume_path)), "url": url } # 对系统盘的数据打包, 仅供在 raw 格式的宿主机上使用. os_tar_file_name = "{wmi_id}_os_tar".format(wmi_id=wmi_id) _os["tar"] = storage.upload_os_tar(volume_path, os_tar_file_name) # 下面会用. os_volume_path = volume_path else: if "_data" not in locals(): data_file_name = "{wmi_id}_data".format(wmi_id=wmi_id) url = storage.upload_data(name, is_running, volume_path, data_file_name, excludes) _data = { "device": device, "size": "{size}G".format(size=volume.path_size(volume_path)), "url": url } logger.info("os volume:{os}".format(os=_os)) logger.info("data volume:{data}".format(data=_data)) # 为了实现在 raw 格式的宿主机上基于镜像创建虚拟机的时候尽量少的拷贝数据, 我们这里获取镜像的分区表信息, # 并备份 mbr, boot 分区和 lvm header 信息. # 当在 qcow2 格式的宿主机上基于镜像创建虚拟机的时候用不到此分区表信息. _partition = storage.get_partition(os_volume_path, wmi_id) logger.info("partition info:{partition}".format(partition=_partition)) return _os, _data, _partition
def del_tftp(self, mac): cmd = r"sudo /bin/rm -f /var/lib/tftpboot/pxelinux.cfg/{mac}".format(mac=mac) utils.shell(cmd, logger=logger)
def constract_tftp(self, _type, version, mac): cmd = r"sudo /bin/cp -f {path} /var/lib/tftpboot/pxelinux.cfg/{mac}".format( path=PXELINUX_CFGS[_type][version], mac=mac) utils.shell(cmd, logger=logger)
def create_origin(name, uuid, version, vcpu, mem, os_size, data_size, ip, location, netmask, gateway, ks, bridge): """ 用传统方法创建 instance. """ # 确认 IP 是否合法. if not utils.is_valid_ip(ip): message = "ip:{ip} is illegal".format(ip=ip) logger.warning(message) raise Exception(message) # 如果没有 location, 则下载. if not os.path.isfile(location): cmd = ''' wget {url} -O {location}'''.format(url=ISO_URLS[version], location=location) utils.shell(cmd) # 创建系统盘. os_volume_path = volume.create(name, os_size) # 执行安装操作. cmd = """virt-install --name {name} --uuid {uuid} --vcpus={vcpu} --ram {mem} """\ """--disk path={os_volume_path} -localtime --accelerate """\ """--location={location} -x "ip={ip} netmask={netmask} gateway={gateway} """\ """dns={dns} dnsdomain={dnsdomain} ks={ks} console=tty0 console=ttyS0,115200n8" """\ """--nographics --network bridge={bridge} --noautoconsole &>/dev/null""".format( name=name, uuid=uuid, vcpu=vcpu, mem=mem, os_volume_path=os_volume_path, location=location, ip=ip, netmask=netmask, gateway=gateway, dns=NAMESERVER, dnsdomain=DOMAIN, ks=ks, bridge=bridge) utils.shell(cmd) # 设置自动启动. cmd = "virsh autostart {name}".format(name=name) utils.shell(cmd) # 创建数据盘, 盘的标识是 ${name}_data. data_volume = name + "_data" data_volume_path = volume.create(data_volume, data_size) # 默认也会自动增加第二块网卡. interface.add(name, "br1") # 删除一些无用配置, 不删的话起不来(第二种存储结构有问题, 第一种没问题). cmd = """sed -i '/<kernel>/d' /etc/libvirt/qemu/{name}.xml; sed -i '/<initrd>/d' /etc/libvirt/qemu/{name}.xml; sed -i '/<cmdline>/d' /etc/libvirt/qemu/{name}.xml; virsh define /etc/libvirt/qemu/{name}.xml """.format(name=name) utils.shell(cmd) # 这里是安装完成之后自动重启. check_cmd = "virsh list | grep -q {name} ".format(name=name) start_cmd = "sleep 1 && sh libs/guestfish_origin.sh {name} {uuid} && virsh start {name} && sleep 1 ".format( name=name, uuid=uuid) if utils.check_wait(check_cmd, start_cmd): logger.info("post start {name} success".format(name=name)) else: message = "post start {name} timeout".format(name=name) logger.warning(message) raise Exception(message) # 关联成 instance 的 vdb, 在装机完成之后 attach 的原因是: # 我发现在装机开始就 attach 的话, vdb 会被搞成 swap, # pvcreate 的时候就会失败. volume.attach(name, data_volume_path, "vdb")
def _is_running(_name): cmd = """virsh list |grep running |awk '{print $2}'""" runnings = [ i.strip() for i in utils.shell(cmd, strip=True).splitlines() ] return _name in runnings
def create_wmi(name, uuid, vcpu, mem, os_size, data_size, ip, hostname, wmi_id, netmask, gateway, bridge): """ 根据 wmi 创建 instance. 大概步骤是这样: 1. 根据 wmi_id 获取到 wmi 的信息, 数据结构类似. 2. 创建相应的系统盘和数据盘. 3. 下载对应的镜像, 覆盖掉上一步创建的盘. 4. 如果要求的 size 比 镜像中的大, 则增大空间. 5. 根据模板文件生成虚拟机配置文件, 需修改: 1). 硬盘信息; # 最麻烦 2). 网卡信息; 3). name; 4). uuid; 5). vcpu; 6). mem; 6. 定义配置文件, 修改系统镜像. 7. 启动系统. 8. 增加虚拟机机器的 DNS 记录. """ # 确认 IP 是否合法. if not utils.is_valid_ip(ip): message = "ip:{ip} is illegal".format(ip=ip) logger.warning(message) raise Exception(message) # 获取 wmi 数据. wmi_data = wmi.get(wmi_id) os_name = name os_volume_path = volume.create(os_name, os_size) data_name = name + "_data" data_volume_path = volume.create(data_name, data_size) os_url = wmi_data["os"]["url"] # 对于 qcow2 格式的系统盘, 直接 wget, 并重置大小. if volume.path_type(os_volume_path) == "qcow2": utils.wget(os_url, os_volume_path) if int(os_size.strip("G")) > int(wmi_data["os"]["size"].strip("G")): volume.resize(os_name, os_size) if int(data_size.strip("G")) > int( wmi_data["data"]["size"].strip("G")): volume.resize(data_name, data_size) # 对于 raw 格式的系统盘, 不能使用 wget. # 一种选择是使用 qemu-img convert -O raw 命令, # 但是会有系统盘大小的数据写入, 给 IO 造成很大压力. # 这里我使用 分区表 的方式来减少 IO. if volume.path_type(os_volume_path) == "raw": storage.restore_rawos_from_qcow2_image(os_volume_path, wmi_data) # 数据盘初始化. init_data_volume_cmd = """virt-format -a {data_volume_path} --lvm=/dev/datavg/home --filesystem=ext4""".format( data_volume_path=data_volume_path) try: utils.shell(init_data_volume_cmd) except Exception, e: utils.shell("yum -y update qemu-kvm") utils.shell(init_data_volume_cmd)