Пример #1
0
def walg_ssh_archiving_init(nodes):
    logger.info ("processing {}".format ([n.hostname for n in nodes]))
    patt.host_id(nodes)
    # patt.check_dup_id (nodes)

    result = patt.exec_script (nodes=nodes, src="./dscripts/d27.walg.sh",
                                args=['ssh_archiving_init'], sudo=True)
    log_results (result)
Пример #2
0
def floating_ip_enable(nodes, floating_ips):
    patt.host_id(nodes)
    patt.check_dup_id(nodes)

    result = patt.exec_script(nodes=nodes,
                              src="./dscripts/d25.floating_ip.sh",
                              args=['enable', " ".join(floating_ips)],
                              sudo=True)
    log_results(result)
Пример #3
0
def haproxy_enable(nodes):
    patt.host_id(nodes)
    patt.check_dup_id(nodes)

    result = patt.exec_script(nodes=nodes,
                              src="./dscripts/d40.haproxy.sh",
                              args=['enable'],
                              sudo=True)
    log_results(result)
Пример #4
0
def walg_init(walg_version, nodes):
    logger.info ("processing {}".format ([n.hostname for n in nodes]))
    patt.host_id(nodes)
    patt.check_dup_id (nodes)

    result = patt.exec_script (nodes=nodes, src="./dscripts/d27.walg.sh",
                                args=['init'] + [walg_version], sudo=False)
    log_results (result)
    return all(x == True for x in [bool(n.out) for n in result])
Пример #5
0
def walg_archiving_add(cluster_name, nodes):
    patt.host_id(nodes)
    # patt.check_dup_id (nodes)

    result = patt.exec_script (nodes=nodes, src="./dscripts/d27.walg.sh",
                               args=['ssh_archiving_add'] + [cluster_name], sudo=True)
    log_results (result)
    return all(x == True for x in [bool(n.out == "drwx--x--x {}.{} {}".format (
        cluster_name, "walg", "/var/lib/walg/" + cluster_name)) for n in result])
Пример #6
0
def floating_ip_init(nodes, ip_takeover_version="0.9"):
    patt.host_id(nodes)
    patt.check_dup_id(nodes)

    result = patt.exec_script(nodes=nodes,
                              src="./dscripts/d25.floating_ip.sh",
                              args=['init'] + [ip_takeover_version],
                              sudo=True,
                              timeout=1440)
    log_results(result)
Пример #7
0
def postgres_ssl_cert_init(nodes):
    logger.info("processing {}".format([n.hostname for n in nodes]))
    patt.host_id(nodes)
    patt.check_dup_id(nodes)

    result = patt.exec_script(nodes=nodes,
                              src="dscripts/ssl_cert_postgres.sh",
                              args=['init'],
                              sudo=True)
    log_results(result)
Пример #8
0
def postgres_init(postgres_version, nodes):
    logger.info("processing {}".format([n.hostname for n in nodes]))
    patt.host_id(nodes)
    patt.check_dup_id(nodes)

    result = patt.exec_script(nodes=nodes,
                              src="./dscripts/d20.postgres.sh",
                              args=['init'] + [postgres_version],
                              sudo=True)
    log_results(result)
Пример #9
0
def floating_ip_build(nodes, ip_takeover_version="0.9"):
    patt.host_id(nodes)
    patt.check_dup_id(nodes)

    result = patt.exec_script(
        nodes=nodes,
        src="./dscripts/d25.floating_ip.sh",
        payload=["./ip_takeover.py", "./ip_takeover.make"],
        args=['build'] + [ip_takeover_version],
        sudo=False)
    log_results(result)
Пример #10
0
def walg_ssh_known_hosts(cluster_name, nodes, archiving_server):
    logger.info ("processing {}".format ([n.hostname for n in nodes]))
    patt.host_id(nodes)
    patt.check_dup_id (nodes)

    result = patt.exec_script (nodes=nodes, src="./dscripts/d27.walg.sh",
                               args=['ssh_known_hosts'] + [cluster_name] +
                               [archiving_server[0].hostname],
                               sudo=True)
    log_results (result)
    return not any(x == True for x in [bool(n.error) for n in result if hasattr(n,'error')])
Пример #11
0
def add_repo(repo_url, nodes):
    nodes = list({n.hostname: n for n in nodes}.values())
    logger.debug("add repo url {}".format(nodes))
    patt.host_id(nodes)
    patt.check_dup_id(nodes)

    result = patt.exec_script(nodes=nodes,
                              src="./dscripts/d03.repo.sh",
                              args=['add'] + [" ".join(repo_url)],
                              sudo=True)
    log_results(result)
Пример #12
0
def tuned_postgresql(nodes):
    nodes = list({n.hostname: n for n in nodes}.values())
    logger.debug("tuned postgresql {}".format(nodes))
    patt.host_id(nodes)
    patt.check_dup_id(nodes)

    result = patt.exec_script(nodes=nodes,
                              src="./dscripts/d22_tuned.sh",
                              args=['enable'],
                              sudo=True)
    log_results(result)
Пример #13
0
def nft_init(nodes):
    nodes = list({n.hostname: n for n in nodes}.values())
    logger.debug("nft_init {}".format([n.hostname for n in nodes]))
    patt.host_id(nodes)
    patt.check_dup_id(nodes)

    result = patt.exec_script(nodes=nodes,
                              src="./dscripts/d01.nft.sh",
                              args=['init'],
                              sudo=True)
    log_results(result)
Пример #14
0
def patroni_init(postgres_version, patroni_version, nodes):
    patt.host_id(nodes)
    patt.check_dup_id(nodes)

    result = patt.exec_script(nodes=nodes,
                              src="./dscripts/d30.patroni.sh",
                              payload='config/patroni.te',
                              args=['init'] + [postgres_version] +
                              [patroni_version] + ['patroni.te'],
                              sudo=True)
    log_results(result)
Пример #15
0
def walg_ssh_gen(cluster_name, nodes, postgres_user='******'):
    logger.info ("processing {}".format ([n.hostname for n in nodes]))
    patt.host_id(nodes)
    patt.check_dup_id (nodes)

    result = patt.exec_script (nodes=nodes, src="./dscripts/d27.walg.sh",
                               args=['ssh_archive_keygen'] + [cluster_name] + [postgres_user],
                               sudo=True, log_call=True)
    log_results (result, hide_stdout=True)
    assert all(x == True for x in [bool(n.out) for n in result])
    return [n.out for n in result]
Пример #16
0
def postgres_wait_ready(postgres_peers, postgres_version, timeout=120):
    logger.info("processing {}".format([n.hostname for n in postgres_peers]))
    patt.host_id(postgres_peers)
    patt.check_dup_id(postgres_peers)

    result = patt.exec_script(nodes=postgres_peers,
                              src="./dscripts/pg_wait_ready.sh",
                              args=['wait_pg_isready'] + [postgres_version] +
                              [timeout],
                              sudo=True)
    log_results(result)
    return not all(x == False for x in [bool(n.out) for n in result])
Пример #17
0
def walg_authorize_keys(cluster_name, nodes, keys=[]):
    patt.host_id(nodes)
    # patt.check_dup_id (nodes)
    with tempfile.NamedTemporaryFile(mode='w+', encoding='ascii') as tmpl_file:
        for k in keys + [""]:
            print("{}".format (k), file=tmpl_file)
        tmpl_file.flush()
        result = patt.exec_script (nodes=nodes, src="./dscripts/d27.walg.sh",
                                   payload=tmpl_file.name,
                                   args=['ssh_authorize_keys'] + [cluster_name] +
                                   [os.path.basename (tmpl_file.name)], sudo=True)
        log_results (result)
    return not any (x == True for x in [bool(n.error) for n in result if hasattr(n,'error')])
Пример #18
0
def disk_init(nodes, vol_size, mnt=None, user=None):
    nodes = list({n.hostname: n for n in nodes}.values())
    logger.debug("disk init {}".format(nodes))
    patt.host_id(nodes)
    patt.check_dup_id(nodes)
    util_init(nodes)
    if mnt:
        result = patt.exec_script(nodes=nodes,
                                  src="./dscripts/data_vol.py",
                                  args=['-m'] + [mnt] + ['-s'] + [vol_size],
                                  sudo=True)
    elif user:
        result = patt.exec_script(nodes=nodes,
                                  src="./dscripts/data_vol.py",
                                  args=['-u'] + [user] + ['-s'] + [vol_size],
                                  sudo=True)
    log_results(result)
Пример #19
0
def patroni_enable(postgres_version, patroni_version, nodes):
    patt.host_id(nodes)
    patt.check_dup_id(nodes)

    result = patt.exec_script(nodes=nodes,
                              src="./dscripts/d30.patroni.sh",
                              args=['enable'] + [postgres_version] +
                              [patroni_version],
                              sudo=True)
    log_results(result)

    random_node = [random.choice(nodes)]
    result = patt.exec_script(nodes=random_node,
                              src="./dscripts/d30.patroni.sh",
                              args=['check'],
                              sudo=True)
    for r in result:
        logger.warn("hostname: {}".format(r.hostname))
        return ("\n{}".format(r.out))
        logger.warn("error: {}".format(r.error))
Пример #20
0
def postgres_gc_cron(nodes, vaccum_full_df_percent, target, postgres_version):
    logger.info("processing {}".format([n.hostname for n in nodes]))
    patt.host_id(nodes)
    patt.check_dup_id(nodes)
    tmpl = "./config/postgres-gc.sh.tmpl"
    vacuumdb_option = ""
    if postgres_version >= 12:
        vacuumdb_option = "--skip-locked"
    result = patt.exec_script(
        nodes=nodes,
        src="./dscripts/tmpl2file.py",
        payload=tmpl,
        args=['-t'] + [os.path.basename(tmpl)] + ['-o'] + [target] +
        ['--chmod'] + ['755'] + ['--dictionary_key_val'] +
        ["pc={}".format(vaccum_full_df_percent)] + ['--dictionary_key_val'] +
        ["vacuumdb_option={}".format(vacuumdb_option)] +
        ['--dictionary_key_val'] +
        ["postgres_version={}".format(postgres_version)],
        sudo=True)
    log_results(result)
Пример #21
0
def postgres_ssl_cert(cluster_name,
                      postgres_user='******',
                      nodes=[],
                      keep_ca=True):
    ssl_script = "misc/self_signed_certificate.py"
    source = patt.Source()
    logger.info("processing {}".format([n.hostname for n in nodes]))
    patt.host_id(nodes)
    patt.check_dup_id(nodes)
    # if run via puppet it will install the cert on the running agent but
    # not the others node before running the installer.
    # Retrieve and distribue the cert to all nodes
    running_node = source.whoami(nodes)
    ca_provider = nodes
    if running_node:
        #ca_provider=[running_node]
        self_ca_dir = None  # installing from peer
    else:
        # not installing from peer
        if keep_ca:
            self_home = os.path.expanduser("~")
            self_ca_dir = self_home + '/' + '.patt/ca'
            Path(self_ca_dir).mkdir(parents=True, exist_ok=True, mode=0o700)

    for i in ['root.key', 'root.crt']:
        tmp = None
        for k in range(5):
            try:
                tmp = postgres_get_cert(q=i,
                                        postgres_user=postgres_user,
                                        nodes=ca_provider)
                assert isinstance(tmp, (str, bytes))
            except:
                # generate CA on first node and retry
                result = patt.exec_script(
                    nodes=[sorted(nodes, key=lambda n: n.hostname)[0]],
                    src="dscripts/ssl_cert_postgres.py",
                    payload=ssl_script,
                    args=['-c'] + [cluster_name] + ['-s'] +
                    [os.path.basename(ssl_script)] + ['-u'] + [postgres_user] +
                    ['--ca_country_name', "'UK'"] +
                    ['--ca_state_or_province_name', "'United Kingdom'"] +
                    ['--ca_locality_name', "'Cambridge'"] +
                    ['--ca_organization_name', "'Patroni Postgres Cluster'"] +
                    ['--ca_common_name', "'CA {}'".format(cluster_name)] +
                    ['--ca_not_valid_after', "'3650'"] + ['-p'] +
                    [p.hostname for p in nodes] +
                    list([" ".join(p.ip_aliases) for p in nodes]),
                    sudo=True)
                log_results(result)
                tmp = postgres_get_cert(
                    q=i,
                    postgres_user=postgres_user,
                    nodes=[sorted(nodes, key=lambda n: n.hostname)[0]])
                if isinstance(tmp, (str, bytes)):
                    break
                else:
                    time.sleep(3)
                    continue
            else:
                break

        assert isinstance(tmp, (str, bytes))

        with tempfile.TemporaryDirectory() as tmp_dir:
            with open(tmp_dir + '/' + i, "w") as cf:
                cf.write(tmp)
                cf.write('\n')
                cf.flush()
                cf.close()
                os.chmod(cf.name, 0o640)
                if self_ca_dir:
                    if os.path.isdir(self_ca_dir):
                        t = self_ca_dir + '/' + cluster_name + '-' + os.path.basename(
                            cf.name)
                        if not os.path.isfile(t): shutil.copy2(cf.name, t)
                result = patt.exec_script(
                    nodes=nodes,
                    src="dscripts/ssl_cert_postgres.sh",
                    payload=tmp_dir + '/' + i,
                    args=['copy_ca',
                          os.path.basename(tmp_dir + '/' + i), i],
                    sudo=True)
                log_results(result, hide_stdout=True)

    result = patt.exec_script(
        nodes=nodes,
        src="dscripts/ssl_cert_postgres.py",
        payload=ssl_script,
        args=['-c'] + [cluster_name] + ['-s'] +
        [os.path.basename(ssl_script)] + ['-u'] + [postgres_user] + ['-p'] +
        [p.hostname
         for p in nodes] + list([" ".join(p.ip_aliases) for p in nodes]),
        sudo=True)
    log_results(result)
Пример #22
0
        if cfg.haproxy_peers:
            haproxy_peers = patt.to_nodes (cfg.haproxy_peers, ssh_login, cfg.ssh_keyfile)
        else:
            haproxy_peers=nodes

        walg_ssh_destination = None
        if cfg.walg_ssh_destination:
            walg_ssh_destination =  patt.to_nodes ([cfg.walg_ssh_destination], ssh_login, cfg.ssh_keyfile)

        progress_bar (1, 14)

        # Peer check
        for p in [etcd_peers, postgres_peers, haproxy_peers]:
            for n in patt.check_priv(p):
                assert (n.sudo == True)
                patt.host_id(p)
                patt.host_ip_aliases(p)
        patt.check_dup_id ([p for p in etcd_peers])
        patt.check_dup_id ([p for p in postgres_peers])
        patt.check_dup_id ([p for p in haproxy_peers])

        logger.info ("cluster name   : {}".format(cfg.cluster_name))
        logger.info ("cluster nodes  : {}".format([(n.hostname, n.ip_aliases) for n in nodes]))
        logger.info ("etcd_peers     : {}".format([(n.hostname, n.id, n.ip_aliases) for n in etcd_peers]))
        logger.info ("postgres_peers : {}".format(
            [(n.hostname, n.id, n.ip_aliases) for n in postgres_peers]))
        if cfg.haproxy_template_file:
            logger.info ("haproxy_peers  : {}".format([(n.hostname, n.id) for n in haproxy_peers]))

        progress_bar (2, 14)
Пример #23
0
def etcd_init(cluster_name, nodes):
    patt.host_id(nodes)
    patt.check_dup_id (nodes)

    id_hosts = [n.id + '_' +  n.hostname for n in nodes]
    result = patt.exec_script (nodes=nodes, src="./dscripts/d10.etcd.sh",
                                args=['init'] + [cluster_name] + id_hosts, sudo=True)
    log_results (result)

    good_members = get_members(nodes, cluster_name, 'ok')
    bad_members = get_members(nodes, cluster_name, 'bad')

    initialized = not (not good_members and not bad_members)
    logger.info ("initialized cluster: {}".format (initialized))
    logger.info ("member ok {}".format (good_members))
    logger.info ("member ko {}".format (bad_members))

    init_node = pick_init_node(nodes)

    if not initialized:

        heartbeat_interval=10
        # rtt_matrix = patt.rtt6 (nodes)
        # heartbeat_interval=wca(rtt_matrix) * 1.5
        # if heartbeat_interval < 5:
        election_timeout=50
        # else:
        #     election_timeout=int (10 * heartbeat_interval)

        id_hosts = "{}_{}".format (init_node.id, init_node.hostname)

        result = patt.exec_script (nodes=[init_node], src="./dscripts/d10.etcd.sh",
                                   args=['config'] + ['new'] + [cluster_name] +
                                   [id_hosts], sudo=True)
        #                                   [heartbeat_interval] + [election_timeout] + id_hosts, sudo=True)
        log_results (result)

        result = patt.exec_script (nodes=[init_node], src="./dscripts/d10.etcd.sh",
                                    args=['enable'] + [cluster_name] + [id_hosts], sudo=True)
        log_results (result)

        bad_members = get_members([init_node], cluster_name, 'bad')
        for i in range(3):
            good_members = get_members([init_node], cluster_name, 'ok')
            if good_members: break
            time.sleep(11) #  > than dscripts/d10.etcd.sh file locks wait

        logger.info ("member ok {}".format (good_members))
        logger.info ("member ko {}".format (bad_members))

        if init_node.hostname not in good_members:
            result = patt.exec_script (nodes=[init_node], src="./dscripts/d10.etcd.sh",
                                       args=['disable'] + [cluster_name] + [id_hosts], sudo=True)
            raise EtcdError ('cluster init error', "error initialising new cluster {}".format(cluster_name))

    # process any remaining members one by one using one of the healthy nodes as a controller
    good_members = get_members([init_node], cluster_name, 'ok')
    bad_members = get_members([init_node], cluster_name, 'bad')

    ctrl = [n for n in nodes if n.hostname in good_members]
    members = ctrl

    nodes_to_remove = [n for n in bad_members if n not in [l.hostname for l in nodes]]
    logger.info ("to remove: {}".format (nodes_to_remove))
    if nodes_to_remove:
        result = patt.exec_script (nodes=[ctrl[0]], src="./dscripts/d10.etcd.sh",
                                    args=['member_remove'] + [cluster_name] + nodes_to_remove, sudo=True)
        log_results (result)

    good_members = get_members([init_node], cluster_name, 'ok')
    bad_members = get_members([init_node], cluster_name, 'bad')

    nodes_to_process = [n for n in nodes if n.hostname not in good_members and n.hostname not in bad_members]
    logger.info ("to process: {}".format ([n.hostname for n in nodes_to_process]))
    for m in nodes_to_process:
        logger.info ("process etcd member {}".format (m.hostname))
        if not m.hostname in [n.hostname for n in members]:
            members.append (m)
        id_hosts = [n.id + '_' +  n.hostname for n in members]

        # assert cluster_health([init_node]), "add member require no unhealthy nodes in the cluster"

        assert ctrl, "no usable controller node"
        # only the first control node is used to add member

        for i in range(10):
            result = patt.exec_script (nodes=[ctrl[0]], src="./dscripts/d10.etcd.sh",
                                       args=['member_add'] + [cluster_name] + id_hosts, sudo=True)
            log_results (result)
            if all(x == False for x in [bool(n.error) for n in result]): break
            time.sleep(3.0)

        result = patt.exec_script (nodes=members, src="./dscripts/d10.etcd.sh",
                                    args=['config'] + ['existing'] + [cluster_name] +
                                    id_hosts, sudo=True)
        # [heartbeat_interval] + [election_timeout] + id_hosts, sudo=True)
        log_results (result)

        result = patt.exec_script (nodes=members, src="./dscripts/d10.etcd.sh",
                                    args=['enable'] + [cluster_name] + id_hosts, sudo=True)
        log_results (result)

    good_members = get_members([init_node], cluster_name, 'ok')
    bad_members = get_members([init_node], cluster_name, 'bad')
    logger.warn ("member ok {}".format (good_members))
    logger.warn ("member ko {}".format (bad_members))
    assert good_members
    ok_nodes = [n for n in nodes if n.hostname in good_members and n.hostname not in bad_members]
    assert ok_nodes
    if bad_members:
        time.sleep(3)
        bad_members = get_members(ok_nodes, cluster_name, 'bad')
        assert not bad_members

    random_node = [random.choice(ok_nodes)]
    result = patt.exec_script (nodes=random_node, src="./dscripts/d10.etcd.sh",
                                args=['check'], sudo=True)
    for r in result:
        logger.warn ("hostname: {}".format(r.hostname))
        return ("\n{}".format (r.out))
        logger.warn ("error: {}".format (r.error))