def copy_ceph_config_from_mon(): cluster_config = configuration() cluster_name = cluster_config.get_cluster_name() ceph_mon_keyring = ConfigAPI().get_ceph_mon_keyring(cluster_name) ceph_client_admin_keyring = ConfigAPI().get_ceph_keyring_path(cluster_name) remot_mon_ip = cluster_config.get_remote_ips( cluster_config.get_node_info().name)[0] status = StatusReport() ssh_obj = ssh() config_api = ConfigAPI() if not os.path.exists(config_api.get_cluster_ceph_dir_path()): os.makedirs(config_api.get_cluster_ceph_dir_path(), exist_ok=True) if not os.path.exists("/var/lib/ceph/bootstrap-osd/"): os.makedirs("/var/lib/ceph/bootstrap-osd/") if not ssh_obj.copy_file_from_host(remot_mon_ip, "{}".format(ceph_client_admin_keyring)): logger.error("Cannot copy {} from {}".format(ceph_client_admin_keyring, remot_mon_ip)) status.success = False elif not ssh_obj.copy_file_from_host( remot_mon_ip, "/etc/ceph/{}.conf".format(cluster_name)): logger.error("Cannot copy ceph.conf from {}".format(remot_mon_ip)) status.success = False elif not ssh_obj.copy_file_from_host( remot_mon_ip, "/var/lib/ceph/bootstrap-osd/{}.keyring".format(cluster_name)): logger.error("Cannot copy ceph.keyring from {}".format(remot_mon_ip)) status.success = False return status
def __test_leaders(): sleeps = [15, 15, 10, 10, 5, 5] tries = 5 leaders_in_cluster = [] cluster_members = [] cluster_conf = configuration() current_cluster_info = cluster_conf.get_cluster_info() current_node_info = cluster_conf.get_node_info() cluster_members.append(current_node_info.name) for i in current_cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) cluster_members.append(node_info.name) status_report = StatusReport() for host in cluster_members: while tries: status = None try: status = _leader_status_check_(host) except Exception as exc: logger.error("Error Connecting to consul for leader check.") # if not has_reached_quorum: if not status: tries -= 1 sleep_seconds = sleeps.pop() logger.warning('waiting %s seconds before retrying', sleep_seconds) # time.sleep(sleep_seconds) sleep(sleep_seconds) status_report.success = False else: leaders_in_cluster.append(host) logger.info('Cluster Node {} joined the cluster and is alive' + host) status_report.success = True break if status_report.success is False: status_report.failed_tasks.append( 'core_consul_deploy_build_node_fail_join_cluster_not_alive' + "%" + str(host)) if leaders_in_cluster == cluster_members: logger.info("Consul leaders are ready") status_report.success = True return status_report else: logger.error("Consul leaders are not ready") return status_report
def create_osds_local(): config_api = ConfigAPI() status = StatusReport() out, err = exec_command(" python {} ".format( config_api.get_node_create_osd_script_path())) status.load_json(str(out.split("/report/")[1])) if os.path.exists(config_api.get_node_pre_config_disks()): os.remove(config_api.get_node_pre_config_disks()) return status
def build_consul_client(): status_report = StatusReport() status_report.success = False if create_consul_client_config(): if start_client(): status_report.success = True return status_report else: status_report.failed_tasks.append( 'core_consul_deploy_build_cluster_node_failed_start_cluster') else: status_report.failed_tasks.append( 'core_consul_deploy_build_cluster_node_not_alive_cant_create_conf_file' ) return status_report
def replace_consul_leader(): key_gen = get_security_key_() if key_gen is None: status_report = StatusReport() status_report.failed_tasks.append( "core_consul_deploy_build_get_security_key_replace_consul_node") return status_report PetaSAN.core.common.cmd.exec_command( 'python ' + ConfigAPI().get_consul_create_conf_script() + ' -key="' + key_gen + '"') __start_leader_locally() return __test_leaders()
def build_consul(): try: # Generate a Security Key keygen = PetaSAN.core.common.cmd.exec_command('consul keygen')[0] keygen = str(keygen).splitlines()[0] logger.debug('keygen: ' + keygen) conf = configuration() cluster_info = conf.get_cluster_info() cluster_name = cluster_info.name logger.info('cluster_name: ' + cluster_name) local_node_info = conf.get_node_info() logger.info("local_node_info.name: " + local_node_info.name) __create_leader_conf_locally(keygen) continue_building_cluster = __create_leader_conf_remotely( keygen, cluster_info, local_node_info) if continue_building_cluster is True: __start_leader_remotely(cluster_info, local_node_info) __start_leader_locally() else: logger.error('Error building Consul cluster') consul_status_report = StatusReport() consul_status_report.success = False consul_status_report.failed_tasks.append( 'core_consul_deploy_build_error_build_consul_cluster') return consul_status_report # sleep(5) consul_status_report = __test_leaders() logger.debug(consul_status_report) return consul_status_report except Exception as ex: logger.exception(ex.message) consul_status_report = StatusReport() consul_status_report.success = False consul_status_report.failed_tasks.append( 'core_consul_deploy_build_error_build_consul_cluster') return consul_status_report
def __get_pre_config_disks(): disks = PreConfigStorageDisks() try: with open(ConfigAPI().get_node_pre_config_disks(), 'r') as f: data = json.load(f) disks.load_json(json.dumps(data)) return disks except: return disks # print subprocess.call("ceph-disk prepare --cluster ceph --zap-disk --fs-type xfs /dev/sdj /dev/sdh",shell=True) cluster_name = configuration().get_cluster_name() status = StatusReport() status.success = False try: cm = CacheManager() node_name = configuration().get_node_info().name storage_engine = configuration().get_cluster_info().storage_engine if configuration().get_node_info().is_storage: disks = __get_pre_config_disks() if len(disks.journals) > 0: for d in disks.journals: ceph_disk_lib.clean_disk(d) add_journal(d)
def build_monitors(): cluster_name = configuration().get_cluster_name() ceph_mon_keyring = ConfigAPI().get_ceph_mon_keyring(cluster_name) ceph_client_admin_keyring = ConfigAPI().get_ceph_keyring_path(cluster_name) status = StatusReport() try: _fsid = uuid.uuid4() content = "[global]\n\ fsid = {fsid}\n\ mon_host = {mon_host}\n\ \n\ public_network = {public_network}\n\ cluster_network = {cluster_network}\n\ \n" cluster_config = configuration() current_node_info = cluster_config.get_node_info() current_node_name = current_node_info.name current_cluster_info = cluster_config.get_cluster_info() config_api = ConfigAPI() mon_hosts_backend_ip = [] remote_mons_management_ips = [] for i in current_cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) mon_hosts_backend_ip.append(node_info.backend_1_ip) if current_node_name != node_info.name: remote_mons_management_ips.append(node_info.management_ip) if not os.path.exists(config_api.get_cluster_ceph_dir_path()): os.makedirs(os.path.dirname( config_api.get_cluster_ceph_dir_path())) with open( config_api.get_cluster_ceph_dir_path() + "{}.conf".format(cluster_name), 'w', ) as f: f.write( content.format( fsid=_fsid, public_network=str( current_cluster_info.backend_1_base_ip) + "/" + __get_net_size(str(current_cluster_info.backend_1_mask)), cluster_network=str( current_cluster_info.backend_2_base_ip) + "/" + __get_net_size(str(current_cluster_info.backend_2_mask)), mon_initial=cluster_config.get_node_name(), mon_host=cluster_config.get_node_info().backend_1_ip + ',' + ','.join(mon_hosts_backend_ip)) + cluster_config.get_ceph_tunings() + "\n") if not call_cmd( "ceph-authtool --create-keyring /tmp/{} --gen-key -n mon. --cap mon 'allow *'" .format(ceph_mon_keyring)): logger.error( "ceph-authtool --create-keyring for mon returned error") status.success = False # elif not call_cmd("".join(["ceph-authtool --create-keyring {}".format(ceph_client_admin_keyring), # " --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow'"])) : # Nautilius remove --set-uid=0 elif not call_cmd("".join([ "ceph-authtool --create-keyring {}".format( ceph_client_admin_keyring), " --gen-key -n client.admin --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow'" ])): logger.error( "ceph-authtool --create-keyring for admin returned error") status.success = False elif not call_cmd("ceph-authtool /tmp/{} --import-keyring {}".format( ceph_mon_keyring, ceph_client_admin_keyring)): logger.error("ceph-authtool --import-keyring returned error") status.success = False elif not call_cmd( "monmaptool --create --add {} {} --fsid {} /tmp/monmap".format( cluster_config.get_node_name(), cluster_config.get_node_info().backend_1_ip, _fsid)): logger.error("monmaptool --create --add returned error") status.success = False if not os.path.exists("/var/lib/ceph/mon/{}-{}".format( cluster_name, current_node_name)): os.makedirs("/var/lib/ceph/mon/{}-{}".format( cluster_name, current_node_name)) if not status.success or not call_cmd( "ceph-mon --cluster {} --mkfs -i {} --monmap /tmp/monmap --keyring /tmp/{}" .format(cluster_name, current_node_name, ceph_mon_keyring)): logger.error("ceph-mon --mkfs --add returned error") status.success = False open( "/var/lib/ceph/mon/{}-{}/done".format(cluster_name, current_node_name), 'w+').close() open( "/var/lib/ceph/mon/{}-{}/systemd".format(cluster_name, current_node_name), 'w+').close() call_cmd("chown -R ceph:ceph /var/lib/ceph/mon") call_cmd("systemctl enable ceph.target ") call_cmd("systemctl enable ceph-mon.target ") call_cmd("systemctl enable ceph-mon@{} ".format(current_node_name)) if not status.success or not call_cmd( "systemctl start ceph-mon@{} ".format(current_node_name)): status.success = False if not status.success: status.failed_tasks.append( "Create ceph mon on {} returned error.".format( current_node_name)) return status logger.info("First monitor started successfully") # create local manager : call_cmd('/opt/petasan/scripts/create_mgr.py') logger.info("Starting to deploy remote monitors") # call_cmd("ceph-create-keys --cluster {} -i {} ".format(cluster_name,current_node_name)) # Nautilius copy bootstrap-osd ourselves if not os.path.exists("/var/lib/ceph/bootstrap-osd/"): os.makedirs("/var/lib/ceph/bootstrap-osd/") call_cmd( 'ceph auth get client.bootstrap-osd > /var/lib/ceph/bootstrap-osd/ceph.keyring' ) for remote_mon in remote_mons_management_ips: ssh_obj = ssh() if not ssh_obj.copy_file_to_host( remote_mon, "{}".format(ceph_client_admin_keyring)): logger.error("Cannot copy {} to {}".format( ceph_client_admin_keyring, remote_mon)) status.success = False elif not ssh_obj.copy_file_to_host( remote_mon, "/etc/ceph/{}.conf".format(cluster_name)): logger.error("Cannot copy ceph.conf to {}".format(remote_mon)) status.success = False elif not ssh_obj.call_command( remote_mon, " python {} ".format( config_api.get_node_create_mon_script_path())): logger.error("Cannot create monitor on remote node {}".format( remote_mon)) status.success = False # Nautilius copy bootstrap-osd ourselves : elif not ssh_obj.call_command( remote_mon, 'mkdir -p /var/lib/ceph/bootstrap-osd'): logger.error( "Cannot create bootstrap-osd dir on remote node {}".format( remote_mon)) status.success = False elif not ssh_obj.copy_file_to_host( remote_mon, '/var/lib/ceph/bootstrap-osd/ceph.keyring'): logger.error("Cannot copy bootstrap-osd keyring to {}".format( remote_mon)) status.success = False if not status.success: status.failed_tasks.append( "core_cluster_deploy_monitor_create_err" + "%" + remote_mon) return status if not __test_mons(): status.success = False status.failed_tasks.append("core_cluster_deploy_monitors_down_err") return status # Nautilius enable msgr2 : call_cmd('ceph mon enable-msgr2') except Exception as ex: status.success = False logger.exception(ex.message) status.failed_tasks.append( "core_cluster_deploy_monitor_exception_occurred" + "%" + current_node_name) return status status.success = True return status
def create_osds_remote(remote_mons_ips_ls): config_api = ConfigAPI() remote_status = StatusReport() for remot_mon in remote_mons_ips_ls: ssh_obj = ssh() status = StatusReport() out, err = ssh_obj.exec_command( remot_mon, " python {} ".format(config_api.get_node_create_osd_script_path())) logger.info(" ".join([remot_mon, out])) if "/report/" in out: # To avoid -- IndexError: list index out of range status.load_json(str(out.split("/report/")[1])) else: if err: status.load_json("Status Report Error , error : {}".format( str(err))) else: status.load_json("Connection Error.") remote_status.failed_tasks.extend(status.failed_tasks) if not status.success: logger.error( "Cannot create osd for remote node {}".format(remot_mon)) remote_status.success = False return remote_status return remote_status