Exemplo n.º 1
0
def copy_ceph_config_from_mon():
    cluster_config = configuration()
    cluster_name = cluster_config.get_cluster_name()
    ceph_mon_keyring = ConfigAPI().get_ceph_mon_keyring(cluster_name)
    ceph_client_admin_keyring = ConfigAPI().get_ceph_keyring_path(cluster_name)
    remot_mon_ip = cluster_config.get_remote_ips(
        cluster_config.get_node_info().name)[0]
    status = StatusReport()
    ssh_obj = ssh()
    config_api = ConfigAPI()
    if not os.path.exists(config_api.get_cluster_ceph_dir_path()):
        os.makedirs(config_api.get_cluster_ceph_dir_path(), exist_ok=True)

    if not os.path.exists("/var/lib/ceph/bootstrap-osd/"):
        os.makedirs("/var/lib/ceph/bootstrap-osd/")

    if not ssh_obj.copy_file_from_host(remot_mon_ip,
                                       "{}".format(ceph_client_admin_keyring)):
        logger.error("Cannot copy {} from {}".format(ceph_client_admin_keyring,
                                                     remot_mon_ip))
        status.success = False
    elif not ssh_obj.copy_file_from_host(
            remot_mon_ip, "/etc/ceph/{}.conf".format(cluster_name)):
        logger.error("Cannot copy ceph.conf from {}".format(remot_mon_ip))
        status.success = False
    elif not ssh_obj.copy_file_from_host(
            remot_mon_ip,
            "/var/lib/ceph/bootstrap-osd/{}.keyring".format(cluster_name)):
        logger.error("Cannot copy ceph.keyring from {}".format(remot_mon_ip))
        status.success = False
    return status
Exemplo n.º 2
0
def __test_leaders():
    sleeps = [15, 15, 10, 10, 5, 5]
    tries = 5

    leaders_in_cluster = []
    cluster_members = []

    cluster_conf = configuration()
    current_cluster_info = cluster_conf.get_cluster_info()

    current_node_info = cluster_conf.get_node_info()
    cluster_members.append(current_node_info.name)

    for i in current_cluster_info.management_nodes:
        node_info = NodeInfo()
        node_info.load_json(json.dumps(i))
        cluster_members.append(node_info.name)

    status_report = StatusReport()

    for host in cluster_members:
        while tries:
            status = None
            try:
                status = _leader_status_check_(host)
            except Exception as exc:
                logger.error("Error Connecting to consul for leader check.")
            # if not has_reached_quorum:
            if not status:
                tries -= 1
                sleep_seconds = sleeps.pop()
                logger.warning('waiting %s seconds before retrying',
                               sleep_seconds)
                # time.sleep(sleep_seconds)
                sleep(sleep_seconds)
                status_report.success = False
            else:
                leaders_in_cluster.append(host)
                logger.info('Cluster Node {} joined the cluster and is alive' +
                            host)
                status_report.success = True
                break
        if status_report.success is False:
            status_report.failed_tasks.append(
                'core_consul_deploy_build_node_fail_join_cluster_not_alive' +
                "%" + str(host))
    if leaders_in_cluster == cluster_members:
        logger.info("Consul leaders are ready")
        status_report.success = True
        return status_report

    else:
        logger.error("Consul leaders are not ready")
        return status_report
Exemplo n.º 3
0
def create_osds_local():
    config_api = ConfigAPI()
    status = StatusReport()
    out, err = exec_command(" python {} ".format(
        config_api.get_node_create_osd_script_path()))
    status.load_json(str(out.split("/report/")[1]))

    if os.path.exists(config_api.get_node_pre_config_disks()):
        os.remove(config_api.get_node_pre_config_disks())

    return status
Exemplo n.º 4
0
def build_consul_client():
    status_report = StatusReport()
    status_report.success = False
    if create_consul_client_config():
        if start_client():
            status_report.success = True
            return status_report
        else:
            status_report.failed_tasks.append(
                'core_consul_deploy_build_cluster_node_failed_start_cluster')
    else:
        status_report.failed_tasks.append(
            'core_consul_deploy_build_cluster_node_not_alive_cant_create_conf_file'
        )

    return status_report
Exemplo n.º 5
0
def replace_consul_leader():
    key_gen = get_security_key_()
    if key_gen is None:
        status_report = StatusReport()
        status_report.failed_tasks.append(
            "core_consul_deploy_build_get_security_key_replace_consul_node")
        return status_report
    PetaSAN.core.common.cmd.exec_command(
        'python ' + ConfigAPI().get_consul_create_conf_script() + ' -key="' +
        key_gen + '"')
    __start_leader_locally()
    return __test_leaders()
Exemplo n.º 6
0
def build_consul():
    try:
        # Generate a Security Key
        keygen = PetaSAN.core.common.cmd.exec_command('consul keygen')[0]
        keygen = str(keygen).splitlines()[0]
        logger.debug('keygen: ' + keygen)

        conf = configuration()
        cluster_info = conf.get_cluster_info()
        cluster_name = cluster_info.name
        logger.info('cluster_name: ' + cluster_name)

        local_node_info = conf.get_node_info()
        logger.info("local_node_info.name: " + local_node_info.name)

        __create_leader_conf_locally(keygen)
        continue_building_cluster = __create_leader_conf_remotely(
            keygen, cluster_info, local_node_info)

        if continue_building_cluster is True:
            __start_leader_remotely(cluster_info, local_node_info)
            __start_leader_locally()
        else:
            logger.error('Error building Consul cluster')
            consul_status_report = StatusReport()
            consul_status_report.success = False
            consul_status_report.failed_tasks.append(
                'core_consul_deploy_build_error_build_consul_cluster')
            return consul_status_report

        # sleep(5)
        consul_status_report = __test_leaders()
        logger.debug(consul_status_report)
        return consul_status_report
    except Exception as ex:
        logger.exception(ex.message)
        consul_status_report = StatusReport()
        consul_status_report.success = False
        consul_status_report.failed_tasks.append(
            'core_consul_deploy_build_error_build_consul_cluster')
        return consul_status_report
Exemplo n.º 7
0
def __get_pre_config_disks():
    disks = PreConfigStorageDisks()

    try:
        with open(ConfigAPI().get_node_pre_config_disks(), 'r') as f:
            data = json.load(f)
            disks.load_json(json.dumps(data))
            return disks
    except:
        return disks


# print subprocess.call("ceph-disk prepare --cluster ceph --zap-disk --fs-type xfs /dev/sdj /dev/sdh",shell=True)
cluster_name = configuration().get_cluster_name()
status = StatusReport()

status.success = False

try:
    cm = CacheManager()
    node_name = configuration().get_node_info().name
    storage_engine = configuration().get_cluster_info().storage_engine
    if configuration().get_node_info().is_storage:
        disks = __get_pre_config_disks()

        if len(disks.journals) > 0:
            for d in disks.journals:
                ceph_disk_lib.clean_disk(d)
                add_journal(d)
Exemplo n.º 8
0
def build_monitors():
    cluster_name = configuration().get_cluster_name()
    ceph_mon_keyring = ConfigAPI().get_ceph_mon_keyring(cluster_name)
    ceph_client_admin_keyring = ConfigAPI().get_ceph_keyring_path(cluster_name)
    status = StatusReport()

    try:
        _fsid = uuid.uuid4()

        content = "[global]\n\
fsid = {fsid}\n\
mon_host = {mon_host}\n\
\n\
public_network = {public_network}\n\
cluster_network = {cluster_network}\n\
\n"

        cluster_config = configuration()
        current_node_info = cluster_config.get_node_info()

        current_node_name = current_node_info.name
        current_cluster_info = cluster_config.get_cluster_info()

        config_api = ConfigAPI()
        mon_hosts_backend_ip = []
        remote_mons_management_ips = []

        for i in current_cluster_info.management_nodes:
            node_info = NodeInfo()
            node_info.load_json(json.dumps(i))
            mon_hosts_backend_ip.append(node_info.backend_1_ip)
            if current_node_name != node_info.name:
                remote_mons_management_ips.append(node_info.management_ip)

        if not os.path.exists(config_api.get_cluster_ceph_dir_path()):
            os.makedirs(os.path.dirname(
                config_api.get_cluster_ceph_dir_path()))

        with open(
                config_api.get_cluster_ceph_dir_path() +
                "{}.conf".format(cluster_name),
                'w',
        ) as f:
            f.write(
                content.format(
                    fsid=_fsid,
                    public_network=str(
                        current_cluster_info.backend_1_base_ip) + "/" +
                    __get_net_size(str(current_cluster_info.backend_1_mask)),
                    cluster_network=str(
                        current_cluster_info.backend_2_base_ip) + "/" +
                    __get_net_size(str(current_cluster_info.backend_2_mask)),
                    mon_initial=cluster_config.get_node_name(),
                    mon_host=cluster_config.get_node_info().backend_1_ip +
                    ',' + ','.join(mon_hosts_backend_ip)) +
                cluster_config.get_ceph_tunings() + "\n")

        if not call_cmd(
                "ceph-authtool --create-keyring /tmp/{} --gen-key -n mon. --cap mon 'allow *'"
                .format(ceph_mon_keyring)):
            logger.error(
                "ceph-authtool --create-keyring for mon returned error")
            status.success = False

        # elif not call_cmd("".join(["ceph-authtool --create-keyring {}".format(ceph_client_admin_keyring),
        #                    " --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow'"])) :
        # Nautilius remove --set-uid=0

        elif not call_cmd("".join([
                "ceph-authtool --create-keyring {}".format(
                    ceph_client_admin_keyring),
                " --gen-key -n client.admin --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow'"
        ])):
            logger.error(
                "ceph-authtool --create-keyring for admin returned error")
            status.success = False

        elif not call_cmd("ceph-authtool /tmp/{} --import-keyring {}".format(
                ceph_mon_keyring, ceph_client_admin_keyring)):
            logger.error("ceph-authtool --import-keyring returned error")
            status.success = False

        elif not call_cmd(
                "monmaptool --create --add {} {} --fsid {} /tmp/monmap".format(
                    cluster_config.get_node_name(),
                    cluster_config.get_node_info().backend_1_ip, _fsid)):
            logger.error("monmaptool --create --add returned error")
            status.success = False

        if not os.path.exists("/var/lib/ceph/mon/{}-{}".format(
                cluster_name, current_node_name)):
            os.makedirs("/var/lib/ceph/mon/{}-{}".format(
                cluster_name, current_node_name))

        if not status.success or not call_cmd(
                "ceph-mon --cluster {} --mkfs -i {} --monmap /tmp/monmap --keyring /tmp/{}"
                .format(cluster_name, current_node_name, ceph_mon_keyring)):
            logger.error("ceph-mon --mkfs --add returned error")
            status.success = False

        open(
            "/var/lib/ceph/mon/{}-{}/done".format(cluster_name,
                                                  current_node_name),
            'w+').close()
        open(
            "/var/lib/ceph/mon/{}-{}/systemd".format(cluster_name,
                                                     current_node_name),
            'w+').close()

        call_cmd("chown -R ceph:ceph /var/lib/ceph/mon")

        call_cmd("systemctl enable ceph.target ")
        call_cmd("systemctl enable ceph-mon.target ")
        call_cmd("systemctl enable ceph-mon@{} ".format(current_node_name))
        if not status.success or not call_cmd(
                "systemctl start ceph-mon@{}  ".format(current_node_name)):
            status.success = False

        if not status.success:
            status.failed_tasks.append(
                "Create ceph mon on {} returned error.".format(
                    current_node_name))
            return status

        logger.info("First monitor started successfully")

        # create local manager :
        call_cmd('/opt/petasan/scripts/create_mgr.py')

        logger.info("Starting to deploy remote monitors")

        # call_cmd("ceph-create-keys --cluster {} -i {}  ".format(cluster_name,current_node_name))
        # Nautilius copy bootstrap-osd ourselves
        if not os.path.exists("/var/lib/ceph/bootstrap-osd/"):
            os.makedirs("/var/lib/ceph/bootstrap-osd/")
            call_cmd(
                'ceph auth get client.bootstrap-osd > /var/lib/ceph/bootstrap-osd/ceph.keyring'
            )

        for remote_mon in remote_mons_management_ips:
            ssh_obj = ssh()
            if not ssh_obj.copy_file_to_host(
                    remote_mon, "{}".format(ceph_client_admin_keyring)):
                logger.error("Cannot copy {} to {}".format(
                    ceph_client_admin_keyring, remote_mon))
                status.success = False
            elif not ssh_obj.copy_file_to_host(
                    remote_mon, "/etc/ceph/{}.conf".format(cluster_name)):
                logger.error("Cannot copy ceph.conf to {}".format(remote_mon))
                status.success = False
            elif not ssh_obj.call_command(
                    remote_mon, " python {} ".format(
                        config_api.get_node_create_mon_script_path())):
                logger.error("Cannot create monitor on remote node {}".format(
                    remote_mon))
                status.success = False

            # Nautilius copy bootstrap-osd ourselves :
            elif not ssh_obj.call_command(
                    remote_mon, 'mkdir -p /var/lib/ceph/bootstrap-osd'):
                logger.error(
                    "Cannot create bootstrap-osd dir on remote node {}".format(
                        remote_mon))
                status.success = False
            elif not ssh_obj.copy_file_to_host(
                    remote_mon, '/var/lib/ceph/bootstrap-osd/ceph.keyring'):
                logger.error("Cannot copy bootstrap-osd keyring to {}".format(
                    remote_mon))
                status.success = False

            if not status.success:
                status.failed_tasks.append(
                    "core_cluster_deploy_monitor_create_err" + "%" +
                    remote_mon)
                return status
        if not __test_mons():
            status.success = False
            status.failed_tasks.append("core_cluster_deploy_monitors_down_err")
            return status

        # Nautilius enable msgr2 :
        call_cmd('ceph mon enable-msgr2')

    except Exception as ex:
        status.success = False
        logger.exception(ex.message)
        status.failed_tasks.append(
            "core_cluster_deploy_monitor_exception_occurred" + "%" +
            current_node_name)
        return status

    status.success = True
    return status
Exemplo n.º 9
0
def create_osds_remote(remote_mons_ips_ls):
    config_api = ConfigAPI()
    remote_status = StatusReport()
    for remot_mon in remote_mons_ips_ls:
        ssh_obj = ssh()
        status = StatusReport()

        out, err = ssh_obj.exec_command(
            remot_mon,
            " python {} ".format(config_api.get_node_create_osd_script_path()))

        logger.info(" ".join([remot_mon, out]))

        if "/report/" in out:  # To avoid -- IndexError: list index out of range
            status.load_json(str(out.split("/report/")[1]))
        else:
            if err:
                status.load_json("Status Report Error , error : {}".format(
                    str(err)))
            else:
                status.load_json("Connection Error.")

        remote_status.failed_tasks.extend(status.failed_tasks)

        if not status.success:
            logger.error(
                "Cannot create osd for remote node {}".format(remot_mon))
            remote_status.success = False
            return remote_status

    return remote_status