Exemplo n.º 1
0
def engine_replica_mismatch(em_client, engine_rev_counter_disabled):  # NOQA
    rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA)
    replica_dir1 = tempfile.mkdtemp()
    replica_dir2 = tempfile.mkdtemp()

    replica_process1 = create_replica_process(
        rm_client,
        REPLICA_NAME,
        replica_dir=replica_dir1,
        disable_revision_counter=engine_rev_counter_disabled)
    grpc_replica_client1 = get_replica_client_with_delay(
        ReplicaClient(get_process_address(replica_process1)))
    grpc_replica_client1.replica_create(size=SIZE_STR)
    replica_process2 = create_replica_process(
        rm_client,
        REPLICA_2_NAME,
        replica_dir=replica_dir2,
        disable_revision_counter=not engine_rev_counter_disabled)
    grpc_replica_client2 = get_replica_client_with_delay(
        ReplicaClient(get_process_address(replica_process2)))
    grpc_replica_client2.replica_create(size=SIZE_STR)

    engine_process = create_engine_process(
        em_client, disable_revision_counter=engine_rev_counter_disabled)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url,
        r2_url,
    ])
    assert v.replicaCount == 2

    # Check if replica1 is mode `ERR`
    rs = grpc_controller_client.replica_list()
    assert len(rs) == 2
    r1_verified = False
    r2_verified = False
    for r in rs:
        if r.address == r1_url:
            assert r.mode == 'RW'
            r1_verified = True
        if r.address == r2_url:
            assert r.mode == 'ERR'
            r2_verified = True
    assert r1_verified
    assert r2_verified

    cleanup_process(em_client)
    cleanup_process(rm_client)
Exemplo n.º 2
0
def test_expand_multiple_times():
    for i in range(30):
        em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE)
        engine_process = create_engine_process(em_client)
        grpc_controller_client = ControllerClient(
            get_process_address(engine_process))
        rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA)
        replica_process = create_replica_process(rm_client, REPLICA_NAME)
        grpc_replica_client = ReplicaClient(
            get_process_address(replica_process))
        time.sleep(3)
        cleanup_replica(grpc_replica_client)

        open_replica(grpc_replica_client)
        r1_url = grpc_replica_client.url
        v = grpc_controller_client.volume_start(replicas=[
            r1_url,
        ])
        assert v.replicaCount == 1

        expand_volume_with_frontend(
            grpc_controller_client, EXPANDED_SIZE)
        wait_and_check_volume_expansion(
            grpc_controller_client, EXPANDED_SIZE)

        cleanup_process(em_client)
        cleanup_process(rm_client)
def test_multiple_volumes(pm_client, em_client):  # NOQA
    rs = pm_client.process_list()
    assert len(rs) == 0

    cnt = 5

    for i in range(cnt):
        replica_args = []
        tmp_dir = tempfile.mkdtemp()
        replica_name = REPLICA_NAME_BASE + str(i)
        r = create_replica_process(pm_client,
                                   name=replica_name,
                                   replica_dir=tmp_dir)

        assert r.spec.name == replica_name
        assert r.status.state == PROC_STATE_RUNNING

        r = pm_client.process_get(name=replica_name)
        assert r.spec.name == replica_name
        assert r.status.state == PROC_STATE_RUNNING

        rs = pm_client.process_list()
        assert len(rs) == i + 1
        assert replica_name in rs
        r = rs[replica_name]
        assert r.spec.name == replica_name
        assert r.status.state == PROC_STATE_RUNNING

        replica_args.append("tcp://localhost:" + str(r.status.port_start))

        engine_name = ENGINE_NAME_BASE + str(i)
        volume_name = VOLUME_NAME_BASE + str(i)
        e = create_engine_process(em_client,
                                  name=engine_name,
                                  volume_name=volume_name,
                                  replicas=replica_args)

        assert e.spec.name == engine_name
        check_dev_existence(volume_name)

        es = em_client.process_list()
        assert len(es) == i + 1
        assert engine_name in es
        e = es[engine_name]
        assert e.spec.name == engine_name
        assert e.status.state == PROC_STATE_RUNNING

        ps = pm_client.process_list()
        assert len(ps) == i + 1

    for i in range(cnt):
        engine_name = ENGINE_NAME_BASE + str(i)
        volume_name = VOLUME_NAME_BASE + str(i)
        delete_process(em_client, engine_name)
        wait_for_process_deletion(em_client, engine_name)
        wait_for_dev_deletion(volume_name)

        es = em_client.process_list()
        assert len(es) == (cnt - 1 - i)
        assert engine_name not in es
Exemplo n.º 4
0
def grpc_replica_client2(process_manager_client):
    time.sleep(3)
    r = create_replica_process(process_manager_client, REPLICA_2_NAME)

    listen = get_process_address(r)
    c = ReplicaClient(listen)
    return cleanup_replica(c)
Exemplo n.º 5
0
    def generate_grpc_replica_client(replica_name, args=[]):
        r = create_replica_process(process_manager_client,
                                   replica_name,
                                   args=args)

        listen = get_process_address(r)

        c = ReplicaClient(listen)
        grpc_replica_client.replica_client = cleanup_replica(c)
        return grpc_replica_client.replica_client
def test_start_stop_replicas(pm_client):  # NOQA
    rs = pm_client.process_list()
    assert len(rs) == 0

    for i in range(10):
        tmp_dir = tempfile.mkdtemp()
        name = REPLICA_NAME_BASE + str(i)
        r = create_replica_process(pm_client, name=name, replica_dir=tmp_dir)

        assert r.spec.name == name
        assert r.status.state == PROC_STATE_RUNNING

        r = pm_client.process_get(name=name)
        assert r.spec.name == name
        assert r.status.state == PROC_STATE_RUNNING

        rs = pm_client.process_list()
        assert len(rs) == (i + 1)
        assert name in rs
        assert r.spec.name == name
        assert r.status.state == PROC_STATE_RUNNING

    for i in range(10):
        rs = pm_client.process_list()
        assert len(rs) == (10 - i)

        name = REPLICA_NAME_BASE + str(i)
        r = pm_client.process_delete(name=name)
        assert r.spec.name == name
        assert r.status.state in (PROC_STATE_STOPPING, PROC_STATE_STOPPED)
        wait_for_process_deletion(pm_client, name)

        rs = pm_client.process_list()
        assert len(rs) == (9 - i)

    rs = pm_client.process_list()
    assert len(rs) == 0
Exemplo n.º 7
0
def test_engine_restart_after_sigkill(bin):  # NOQA
    """
    Test if engine can be restarted after crashing by SIGKILL.

    1. Create then initialize 1 engine and 2 replicas.
    2. Start the engine.
    3. Create 2 snapshots.
    4. Use SIGKILL to kill the engine process.
    5. Wait for the engine errored.
    6. Mock volume detachment by deleting
       the engine process and replicas processes.
    7. Mock volume reattachment by recreating processes and
       re-starting the engine.
    8. Check if the engine is up with 2 replicas.
    9. Check if the engine still works fine
       by creating/removing/purging snapshots.
    """
    em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE)
    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))

    rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA)
    replica_dir1 = tempfile.mkdtemp()
    replica_dir2 = tempfile.mkdtemp()
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    cleanup_replica(grpc_replica_client1)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    cleanup_replica(grpc_replica_client2)

    open_replica(grpc_replica_client1)
    open_replica(grpc_replica_client2)
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap0 = subprocess.check_output(cmd, encoding='utf-8').strip()
    expected = grpc_replica_client1.replica_get().chain[1]
    assert expected == 'volume-snap-{}.img'.format(snap0)

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create',
           '--label', 'name=snap1', '--label', 'key=value']
    snap1 = subprocess.check_output(cmd, encoding='utf-8').strip()

    cmd = ["bash", "-c",
           "kill -9 $(ps aux | grep %s | grep -v grep | awk '{print $2}')" %
           VOLUME_NAME]
    subprocess.check_call(cmd)
    wait_for_process_error(em_client, ENGINE_NAME)

    # Mock detach:
    cleanup_process(em_client)
    cleanup_process(rm_client)

    # Mock reattach:
    #   1. Directly create replicas processes.
    #   2. Call replica_create() to init replica servers for replica processes.
    #   3. Create one engine process and start the engine with replicas.
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    grpc_replica_client1.replica_create(size=SIZE_STR)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    grpc_replica_client2.replica_create(size=SIZE_STR)

    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    # Verify the engine still works fine
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap2 = subprocess.check_output(cmd, encoding='utf-8').strip()
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'rm', snap1]
    subprocess.check_call(cmd)
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'purge']
    subprocess.check_call(cmd)
    wait_for_purge_completion(grpc_controller_client.address)
    cmd = [bin, '--debug',
           '--url', grpc_controller_client.address,
           'snapshot', 'ls']
    ls_output = subprocess.check_output(cmd, encoding='utf-8')

    assert ls_output == '''ID
{}
{}
'''.format(snap2, snap0)

    cleanup_process(em_client)
    cleanup_process(rm_client)
Exemplo n.º 8
0
def test_single_replica_failure_during_engine_start(bin):  # NOQA
    """
    Test if engine still works fine if there is an invalid
    replica/backend in the starting phase

    1. Create then initialize 1 engine and 2 replicas.
    2. Start the engine.
    3. Create 2 snapshots.
    4. Mess up the replica1 by manually modifying the snapshot meta file.
    5. Mock volume detachment by deleting
       the engine process and replicas processes.
    6. Mock volume reattachment by recreating processes and
       re-starting the engine.
    7. Check if the engine is up and if replica1 is mode ERR
       in the engine.
    8. Check if the engine still works fine
       by creating one more snapshot.
    9. Remove the ERR replica from the engine
       then check snapshot remove and snapshot purge work fine.
    10. Check if the snapshot list is correct.
    """
    em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE)
    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))

    rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA)
    replica_dir1 = tempfile.mkdtemp()
    replica_dir2 = tempfile.mkdtemp()
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    cleanup_replica(grpc_replica_client1)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    cleanup_replica(grpc_replica_client2)

    open_replica(grpc_replica_client1)
    open_replica(grpc_replica_client2)
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap0 = subprocess.check_output(cmd, encoding='utf-8').strip()
    expected = grpc_replica_client1.replica_get().chain[1]
    assert expected == 'volume-snap-{}.img'.format(snap0)

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create',
           '--label', 'name=snap1', '--label', 'key=value']
    snap1 = subprocess.check_output(cmd, encoding='utf-8').strip()

    # Mess up the replica1 by manually modifying the snapshot meta file
    r1_snap1_meta_path = os.path.join(replica_dir1,
                                      'volume-snap-{}.img.meta'.format(snap1))
    with open(r1_snap1_meta_path, 'r') as f:
        snap1_meta_info = json.load(f)
    with open(r1_snap1_meta_path, 'w') as f:
        snap1_meta_info["Parent"] = "invalid-parent.img"
        json.dump(snap1_meta_info, f)

    # Mock detach:
    cleanup_process(em_client)
    cleanup_process(rm_client)

    # Mock reattach:
    #   1. Directly create replicas processes.
    #   2. Call replica_create() to init replica servers for replica processes.
    #   3. Create one engine process and start the engine with replicas.
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    grpc_replica_client1.replica_create(size=SIZE_STR)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    grpc_replica_client2.replica_create(size=SIZE_STR)

    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    # Check if replica1 is mode `ERR`
    rs = grpc_controller_client.replica_list()
    assert len(rs) == 2
    r1_verified = False
    r2_verified = False
    for r in rs:
        if r.address == r1_url:
            assert r.mode == 'ERR'
            r1_verified = True
        if r.address == r2_url:
            assert r.mode == 'RW'
            r2_verified = True
    assert r1_verified
    assert r2_verified

    # The engine still works fine
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap2 = subprocess.check_output(cmd, encoding='utf-8').strip()

    # Remove the ERR replica before removing snapshots
    grpc_controller_client.replica_delete(r1_url)
    rs = grpc_controller_client.replica_list()
    assert len(rs) == 1
    assert rs[0].address == r2_url
    assert rs[0].mode == "RW"

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'rm', snap1]
    subprocess.check_call(cmd)
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'purge']
    subprocess.check_call(cmd)
    wait_for_purge_completion(grpc_controller_client.address)

    cmd = [bin, '--debug',
           '--url', grpc_controller_client.address,
           'snapshot', 'ls']
    ls_output = subprocess.check_output(cmd, encoding='utf-8')

    assert ls_output == '''ID
{}
{}
'''.format(snap2, snap0)

    cleanup_process(em_client)
    cleanup_process(rm_client)
def test_engine_upgrade(pm_client, em_client):  # NOQA
    rs = pm_client.process_list()
    assert len(rs) == 0

    dir_base = "/tmp/replica"
    cnt = 3

    for i in range(cnt):
        replica_args = []
        dir = dir_base + str(i)
        replica_name = REPLICA_NAME_BASE + str(i)
        r = create_replica_process(pm_client,
                                   name=replica_name,
                                   replica_dir=dir)

        assert r.spec.name == replica_name
        assert r.status.state == PROC_STATE_RUNNING

        r = pm_client.process_get(name=replica_name)
        assert r.spec.name == replica_name
        assert r.status.state == PROC_STATE_RUNNING

        rs = pm_client.process_list()
        assert len(rs) == i + 1
        assert replica_name in rs
        r = rs[replica_name]
        assert r.spec.name == replica_name
        assert r.status.state == PROC_STATE_RUNNING

        replica_args.append("tcp://localhost:" + str(r.status.port_start))

        engine_name = ENGINE_NAME_BASE + str(i)
        volume_name = VOLUME_NAME_BASE + str(i)
        e = create_engine_process(em_client,
                                  name=engine_name,
                                  volume_name=volume_name,
                                  replicas=replica_args)

        assert e.spec.name == engine_name
        check_dev_existence(volume_name)

        es = em_client.process_list()
        assert len(es) == i + 1
        assert engine_name in es
        e = es[engine_name]
        assert e.spec.name == engine_name
        assert e.status.state == PROC_STATE_RUNNING

    dir = dir_base + "0"
    engine_name = ENGINE_NAME_BASE + "0"
    replica_name = REPLICA_NAME_BASE + "0"
    volume_name = VOLUME_NAME_BASE + "0"
    replica_name_upgrade = REPLICA_NAME_BASE + "0-upgrade"
    r = create_replica_process(pm_client,
                               name=replica_name_upgrade,
                               binary=LONGHORN_UPGRADE_BINARY,
                               replica_dir=dir)
    assert r.spec.name == replica_name_upgrade
    assert r.status.state == PROC_STATE_RUNNING

    replicas = ["tcp://localhost:" + str(r.status.port_start)]
    e = upgrade_engine(em_client, LONGHORN_UPGRADE_BINARY, engine_name,
                       volume_name, replicas)
    assert e.spec.name == engine_name
    check_dev_existence(volume_name)

    r = pm_client.process_delete(name=replica_name)
    assert r.spec.name == replica_name
    assert r.status.state in (PROC_STATE_STOPPING, PROC_STATE_STOPPED)

    wait_for_process_deletion(pm_client, replica_name)

    check_dev_existence(volume_name)

    wait_for_process_running(em_client, engine_name)
    es = em_client.process_list()
    assert engine_name in es
    e = es[engine_name]
    assert e.spec.name == engine_name
    assert e.status.state == PROC_STATE_RUNNING

    delete_process(em_client, engine_name)
    wait_for_process_deletion(em_client, engine_name)
    wait_for_dev_deletion(volume_name)
Exemplo n.º 10
0
def test_one_volume(pm_client, em_client):  # NOQA
    rs = pm_client.process_list()
    assert len(rs) == 0

    replica_args = []

    for i in range(3):
        tmp_dir = tempfile.mkdtemp()
        name = REPLICA_NAME_BASE + str(i)
        r = create_replica_process(pm_client, name=name, replica_dir=tmp_dir)

        assert r.spec.name == name
        assert r.status.state == PROC_STATE_RUNNING

        r = pm_client.process_get(name=name)
        assert r.spec.name == name
        assert r.status.state == PROC_STATE_RUNNING

        rs = pm_client.process_list()
        assert len(rs) == (i + 1)
        assert name in rs
        assert r.spec.name == name
        assert r.status.state == PROC_STATE_RUNNING

        replica_args.append("tcp://localhost:" + str(r.status.port_start))

    engine_name = ENGINE_NAME_BASE + "0"
    volume_name = VOLUME_NAME_BASE + "0"
    e = create_engine_process(em_client,
                              name=engine_name,
                              volume_name=volume_name,
                              replicas=replica_args)

    assert e.spec.name == engine_name

    check_dev_existence(volume_name)

    es = em_client.process_list()
    assert len(es) == 1
    assert engine_name in es
    e = es[engine_name]
    assert e.spec.name == engine_name
    assert e.status.state == PROC_STATE_RUNNING

    ps = pm_client.process_list()
    assert len(ps) == 3

    delete_process(em_client, engine_name)
    # test duplicate call
    delete_process(em_client, engine_name)
    wait_for_process_deletion(em_client, engine_name)
    # test duplicate call
    delete_process(em_client, engine_name)

    ps = pm_client.process_list()
    assert len(ps) == 3

    for i in range(3):
        name = REPLICA_NAME_BASE + str(i)
        r = pm_client.process_delete(name=name)
        assert r.spec.name == name
        assert r.status.state in (PROC_STATE_STOPPING, PROC_STATE_STOPPED)

        wait_for_process_deletion(pm_client, name)

    ps = pm_client.process_list()
    assert len(ps) == 0