Exemplo n.º 1
0
def become_leader(connection):
    global currentleader
    global follower
    global retrythread
    log.log({
        'info': 'Becoming leader of collective',
        'subsystem': 'collective'
    })
    if follower:
        follower.kill()
        cfm.stop_following()
        follower = None
    if retrythread:
        retrythread.cancel()
        retrythread = None
    currentleader = connection.getsockname()[0]
    skipaddr = connection.getpeername()[0]
    myname = get_myname()
    skipem = set(cfm.cfgstreams)
    skipem.add(currentleader)
    skipem.add(skipaddr)
    for member in cfm.list_collective():
        dronecandidate = cfm.get_collective_member(member)['address']
        if dronecandidate in skipem or member == myname:
            continue
        eventlet.spawn_n(try_assimilate, dronecandidate)
    schedule_rebalance()
Exemplo n.º 2
0
def follow_leader(remote, leader):
    global currentleader
    cleanexit = False
    try:
        cfm.follow_channel(remote)
    except greenlet.GreenletExit:
        cleanexit = True
    finally:
        if cleanexit:
            log.log({
                'info': 'Previous following cleanly closed',
                'subsystem': 'collective'
            })
            return
        log.log({
            'info':
            'Current leader ({0}) has disappeared, restarting '
            'collective membership'.format(leader),
            'subsystem':
            'collective'
        })
        # The leader has folded, time to startup again...
        cfm.stop_following()
        currentleader = None
        eventlet.spawn_n(start_collective)
Exemplo n.º 3
0
def follow_leader(remote):
    global currentleader
    cfm.follow_channel(remote)
    # The leader has folded, time to startup again...
    cfm.stop_following()
    currentleader = None
    eventlet.spawn_n(start_collective)
Exemplo n.º 4
0
def start_collective():
    global follower
    global retrythread
    if follower:
        follower.kill()
        cfm.stop_following()
        follower = None
    try:
        if cfm.cfgstreams:
            cfm.check_quorum()
            # Do not start if we have quorum and are leader
            return
    except exc.DegradedCollective:
        pass
    if leader_init.active:  # do not start trying to connect if we are
        # xmitting data to a follower
        return
    myname = get_myname()
    for member in sorted(list(cfm.list_collective())):
        if member == myname:
            continue
        if cfm.cfgleader is None:
            cfm.stop_following(True)
        ldrcandidate = cfm.get_collective_member(member)['address']
        log.log({'info': 'Performing startup attempt to {0}'.format(
            ldrcandidate), 'subsystem': 'collective'})
        if connect_to_leader(name=myname, leader=ldrcandidate):
            break
    else:
        retrythread = eventlet.spawn_after(30 + random.random(),
                                           start_collective)
Exemplo n.º 5
0
def start_collective():
    global follower
    global retrythread
    global initting
    initting = True
    retrythread = None
    try:
        cfm.membership_callback = schedule_rebalance
        if follower is not None:
            initting = False
            return
        try:
            if cfm.cfgstreams:
                cfm.check_quorum()
                # Do not start if we have quorum and are leader
                return
        except exc.DegradedCollective:
            pass
        if leader_init.active:  # do not start trying to connect if we are
            # xmitting data to a follower
            return
        myname = get_myname()
        connecto = []
        for member in sorted(list(cfm.list_collective())):
            if member == myname:
                continue
            if cfm.cfgleader is None:
                cfm.stop_following(True)
            ldrcandidate = cfm.get_collective_member(member)['address']
            connecto.append(ldrcandidate)
        conpool = greenpool.GreenPool(64)
        connections = conpool.imap(create_connection, connecto)
        for ent in connections:
            member, remote = ent
            if isinstance(remote, Exception):
                continue
            if follower is None:
                log.log({
                    'info':
                    'Performing startup attempt to {0}'.format(member),
                    'subsystem':
                    'collective'
                })
                if not connect_to_leader(
                        name=myname, leader=member, remote=remote):
                    remote.close()
            else:
                remote.close()
    except Exception as e:
        pass
    finally:
        if retrythread is None and follower is None:
            retrythread = eventlet.spawn_after(5 + random.random(),
                                               start_collective)
        initting = False
Exemplo n.º 6
0
def start_collective():
    global follower
    global retrythread
    if follower:
        follower.kill()
        follower = None
    if leader_init.active:  # do not start trying to connect if we are
        # xmitting data to a follower
        return
    myname = get_myname()
    for member in sorted(list(cfm.list_collective())):
        if member == myname:
            continue
        if cfm.cfgleader is None:
            cfm.stop_following(True)
        ldrcandidate = cfm.get_collective_member(member)['address']
        if connect_to_leader(name=myname, leader=ldrcandidate):
            break
    else:
        retrythread = eventlet.spawn_after(30 + random.random(),
                                           start_collective)
Exemplo n.º 7
0
def follow_leader(remote, leader):
    global currentleader
    global retrythread
    global follower
    cleanexit = False
    newleader = None
    try:
        exitcause = cfm.follow_channel(remote)
        newleader = exitcause.get('newleader', None)
    except greenlet.GreenletExit:
        cleanexit = True
    finally:
        if cleanexit:
            log.log({
                'info': 'Previous following cleanly closed',
                'subsystem': 'collective'
            })
            return
        if newleader:
            log.log({
                'info':
                'Previous leader directed us to join new leader {}'.format(
                    newleader)
            })
            if connect_to_leader(None, get_myname(), newleader):
                return
        log.log({
            'info':
            'Current leader ({0}) has disappeared, restarting '
            'collective membership'.format(leader),
            'subsystem':
            'collective'
        })
        # The leader has folded, time to startup again...
        follower = None
        cfm.stop_following()
        currentleader = None
        if retrythread is None:  # start a recovery
            retrythread = eventlet.spawn_after(random.random(),
                                               start_collective)
Exemplo n.º 8
0
def become_leader(connection):
    global currentleader
    global follower
    global retrythread
    global reassimilate
    log.log({
        'info': 'Becoming leader of collective',
        'subsystem': 'collective'
    })
    if follower is not None:
        follower.kill()
        cfm.stop_following()
        follower = None
    if retrythread is not None:
        retrythread.cancel()
        retrythread = None
    currentleader = connection.getsockname()[0]
    skipaddr = connection.getpeername()[0]
    if _assimilate_missing(skipaddr):
        schedule_rebalance()
        if reassimilate is not None:
            reassimilate.kill()
        reassimilate = eventlet.spawn(reassimilate_missing)
Exemplo n.º 9
0
def connect_to_leader(cert=None, name=None, leader=None):
    global currentleader
    global follower
    if leader is None:
        leader = currentleader
    log.log({
        'info': 'Attempting connection to leader {0}'.format(leader),
        'subsystem': 'collective'
    })
    try:
        remote = connect_to_collective(cert, leader)
    except socket.error as e:
        log.log({
            'error':
            'Collective connection attempt to {0} failed: {1}'
            ''.format(leader, str(e)),
            'subsystem':
            'collective'
        })
        return False
    with connecting:
        with cfm._initlock:
            banner = tlvdata.recv(remote)  # the banner
            vers = banner.split()[2]
            if vers != b'v2':
                raise Exception(
                    'This instance only supports protocol 2, synchronize versions between collective members'
                )
            tlvdata.recv(remote)  # authpassed... 0..
            if name is None:
                name = get_myname()
            tlvdata.send(
                remote, {
                    'collective': {
                        'operation': 'connect',
                        'name': name,
                        'txcount': cfm._txcount
                    }
                })
            keydata = tlvdata.recv(remote)
            if not keydata:
                return False
            if 'error' in keydata:
                if 'backoff' in keydata:
                    log.log({
                        'info':
                        'Collective initialization in progress on '
                        '{0}'.format(leader),
                        'subsystem':
                        'collective'
                    })
                    return False
                if 'leader' in keydata:
                    log.log({
                        'info':
                        'Prospective leader {0} has redirected this '
                        'member to {1}'.format(leader, keydata['leader']),
                        'subsystem':
                        'collective'
                    })
                    ldrc = cfm.get_collective_member_by_address(
                        keydata['leader'])
                    if ldrc and ldrc['name'] == name:
                        raise Exception("Redirected to self")
                    return connect_to_leader(name=name,
                                             leader=keydata['leader'])
                if 'txcount' in keydata:
                    log.log({
                        'info':
                        'Prospective leader {0} has inferior '
                        'transaction count, becoming leader'
                        ''.format(leader),
                        'subsystem':
                        'collective',
                        'subsystem':
                        'collective'
                    })
                    return become_leader(remote)
                return False
                follower.kill()
                cfm.stop_following()
                follower = None
            if follower:
                follower.kill()
                cfm.stop_following()
                follower = None
            log.log({
                'info': 'Following leader {0}'.format(leader),
                'subsystem': 'collective'
            })
            colldata = tlvdata.recv(remote)
            # the protocol transmits global data, but for now we ignore it
            globaldata = tlvdata.recv(remote)
            dbi = tlvdata.recv(remote)
            dbsize = dbi['dbsize']
            dbjson = b''
            while (len(dbjson) < dbsize):
                ndata = remote.recv(dbsize - len(dbjson))
                if not ndata:
                    try:
                        remote.close()
                    except Exception:
                        pass
                    raise Exception("Error doing initial DB transfer")
                dbjson += ndata
            cfm.clear_configuration()
            try:
                cfm._restore_keys(keydata, None, sync=False)
                for c in colldata:
                    cfm._true_add_collective_member(c,
                                                    colldata[c]['address'],
                                                    colldata[c]['fingerprint'],
                                                    sync=False)
                #for globvar in globaldata:
                #    cfm.set_global(globvar, globaldata[globvar], False)
                cfm._txcount = dbi.get('txcount', 0)
                cfm.ConfigManager(tenant=None)._load_from_json(dbjson,
                                                               sync=False)
                cfm.commit_clear()
            except Exception:
                cfm.stop_following()
                cfm.rollback_clear()
                raise
            currentleader = leader
        #spawn this as a thread...
        follower = eventlet.spawn(follow_leader, remote, leader)
    return True
Exemplo n.º 10
0
def connect_to_leader(cert=None, name=None, leader=None):
    global currentleader
    global cfginitlock
    global follower
    if cfginitlock is None:
        cfginitlock = threading.RLock()
    if leader is None:
        leader = currentleader
    try:
        remote = connect_to_collective(cert, leader)
    except socket.error:
        return False
    with connecting:
        with cfginitlock:
            tlvdata.recv(remote)  # the banner
            tlvdata.recv(remote)  # authpassed... 0..
            if name is None:
                name = get_myname()
            tlvdata.send(
                remote, {
                    'collective': {
                        'operation': 'connect',
                        'name': name,
                        'txcount': cfm._txcount
                    }
                })
            keydata = tlvdata.recv(remote)
            if not keydata:
                return False
            if 'error' in keydata:
                if 'backoff' in keydata:
                    eventlet.spawn_after(random.random(), connect_to_leader,
                                         cert, name, leader)
                    return True
                if 'leader' in keydata:
                    ldrc = cfm.get_collective_member_by_address(
                        keydata['leader'])
                    if ldrc and ldrc['name'] == name:
                        raise Exception("Redirected to self")
                    return connect_to_leader(name=name,
                                             leader=keydata['leader'])
                if 'txcount' in keydata:
                    return become_leader(remote)
                print(keydata['error'])
                return False
            if follower is not None:
                follower.kill()
                cfm.stop_following()
                follower = None
            colldata = tlvdata.recv(remote)
            globaldata = tlvdata.recv(remote)
            dbi = tlvdata.recv(remote)
            dbsize = dbi['dbsize']
            dbjson = ''
            while (len(dbjson) < dbsize):
                ndata = remote.recv(dbsize - len(dbjson))
                if not ndata:
                    try:
                        remote.close()
                    except Exception:
                        pass
                    raise Exception("Error doing initial DB transfer")
                dbjson += ndata
            cfm.clear_configuration()
            try:
                cfm._restore_keys(keydata, None, sync=False)
                for c in colldata:
                    cfm._true_add_collective_member(c,
                                                    colldata[c]['address'],
                                                    colldata[c]['fingerprint'],
                                                    sync=False)
                for globvar in globaldata:
                    cfm.set_global(globvar, globaldata[globvar], False)
                cfm._txcount = dbi.get('txcount', 0)
                cfm.ConfigManager(tenant=None)._load_from_json(dbjson,
                                                               sync=False)
                cfm.commit_clear()
            except Exception:
                cfm.stop_following()
                cfm.rollback_clear()
                raise
            currentleader = leader
        #spawn this as a thread...
        follower = eventlet.spawn(follow_leader, remote)
    return True