示例#1
0
def become_leader(connection):
    global currentleader
    global follower
    global retrythread
    log.log({
        'info': 'Becoming leader of collective',
        'subsystem': 'collective'
    })
    if follower:
        follower.kill()
        cfm.stop_following()
        follower = None
    if retrythread:
        retrythread.cancel()
        retrythread = None
    currentleader = connection.getsockname()[0]
    skipaddr = connection.getpeername()[0]
    myname = get_myname()
    skipem = set(cfm.cfgstreams)
    skipem.add(currentleader)
    skipem.add(skipaddr)
    for member in cfm.list_collective():
        dronecandidate = cfm.get_collective_member(member)['address']
        if dronecandidate in skipem or member == myname:
            continue
        eventlet.spawn_n(try_assimilate, dronecandidate)
    schedule_rebalance()
示例#2
0
def start_collective():
    global follower
    global retrythread
    if follower:
        follower.kill()
        cfm.stop_following()
        follower = None
    try:
        if cfm.cfgstreams:
            cfm.check_quorum()
            # Do not start if we have quorum and are leader
            return
    except exc.DegradedCollective:
        pass
    if leader_init.active:  # do not start trying to connect if we are
        # xmitting data to a follower
        return
    myname = get_myname()
    for member in sorted(list(cfm.list_collective())):
        if member == myname:
            continue
        if cfm.cfgleader is None:
            cfm.stop_following(True)
        ldrcandidate = cfm.get_collective_member(member)['address']
        log.log({'info': 'Performing startup attempt to {0}'.format(
            ldrcandidate), 'subsystem': 'collective'})
        if connect_to_leader(name=myname, leader=ldrcandidate):
            break
    else:
        retrythread = eventlet.spawn_after(30 + random.random(),
                                           start_collective)
示例#3
0
def handle_dispatch(connection, cert, dispatch, peername):
    cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert)
    if not util.cert_matches(
            cfm.get_collective_member(peername)['fingerprint'], cert):
        connection.close()
        return
    if dispatch[0:2] != b'\x01\x03':  # magic value to indicate msgpack
        # We only support msgpack now
        # The magic should preclude any pickle, as the first byte can never be
        # under 0x20 or so.
        connection.close()
        return
    dispatch = msgpack.unpackb(dispatch[2:], raw=False)
    configmanager = cfm.ConfigManager(dispatch['tenant'])
    nodes = dispatch['nodes']
    inputdata = dispatch['inputdata']
    operation = dispatch['operation']
    pathcomponents = dispatch['path']
    routespec = nested_lookup(noderesources, pathcomponents)
    inputdata = msg.get_input_message(pathcomponents, operation, inputdata,
                                      nodes, dispatch['isnoderange'],
                                      configmanager)
    plugroute = routespec.routeinfo
    plugpath = None
    nodesbyhandler = {}
    passvalues = []
    nodeattr = configmanager.get_node_attributes(nodes,
                                                 plugroute['pluginattrs'])
    for node in nodes:
        for attrname in plugroute['pluginattrs']:
            if attrname in nodeattr[node]:
                plugpath = nodeattr[node][attrname]['value']
            elif 'default' in plugroute:
                plugpath = plugroute['default']
        if plugpath:
            try:
                hfunc = getattr(pluginmap[plugpath], operation)
            except KeyError:
                nodesbyhandler[BadPlugin(node, plugpath).error] = [node]
                continue
            if hfunc in nodesbyhandler:
                nodesbyhandler[hfunc].append(node)
            else:
                nodesbyhandler[hfunc] = [node]
    try:
        for hfunc in nodesbyhandler:
            passvalues.append(
                hfunc(nodes=nodesbyhandler[hfunc],
                      element=pathcomponents,
                      configmanager=configmanager,
                      inputdata=inputdata))
        for res in itertools.chain(*passvalues):
            _forward_rsp(connection, res)
    except Exception as res:
        _forward_rsp(connection, res)
    connection.sendall('\x00\x00\x00\x00\x00\x00\x00\x00')
示例#4
0
def start_collective():
    global follower
    global retrythread
    global initting
    initting = True
    retrythread = None
    try:
        cfm.membership_callback = schedule_rebalance
        if follower is not None:
            initting = False
            return
        try:
            if cfm.cfgstreams:
                cfm.check_quorum()
                # Do not start if we have quorum and are leader
                return
        except exc.DegradedCollective:
            pass
        if leader_init.active:  # do not start trying to connect if we are
            # xmitting data to a follower
            return
        myname = get_myname()
        connecto = []
        for member in sorted(list(cfm.list_collective())):
            if member == myname:
                continue
            if cfm.cfgleader is None:
                cfm.stop_following(True)
            ldrcandidate = cfm.get_collective_member(member)['address']
            connecto.append(ldrcandidate)
        conpool = greenpool.GreenPool(64)
        connections = conpool.imap(create_connection, connecto)
        for ent in connections:
            member, remote = ent
            if isinstance(remote, Exception):
                continue
            if follower is None:
                log.log({
                    'info':
                    'Performing startup attempt to {0}'.format(member),
                    'subsystem':
                    'collective'
                })
                if not connect_to_leader(
                        name=myname, leader=member, remote=remote):
                    remote.close()
            else:
                remote.close()
    except Exception as e:
        pass
    finally:
        if retrythread is None and follower is None:
            retrythread = eventlet.spawn_after(5 + random.random(),
                                               start_collective)
        initting = False
示例#5
0
def connect_node(node, configmanager, username=None):
    attrval = configmanager.get_node_attributes(node, 'collective.manager')
    myc = attrval.get(node, {}).get('collective.manager', {}).get(
        'value', None)
    myname = collective.get_myname()
    if myc and myc != collective.get_myname():
        minfo = configmodule.get_collective_member(myc)
        return ProxyConsole(node, minfo, myname, configmanager, username)
    consk = (node, configmanager.tenant)
    if consk not in _handled_consoles:
        _handled_consoles[consk] = ConsoleHandler(node, configmanager)
    return _handled_consoles[consk]
示例#6
0
def start_proxy_term(connection, cert, request):
    cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert)
    droneinfo = configmanager.get_collective_member(request['name'])
    if not util.cert_matches(droneinfo['fingerprint'], cert):
        connection.close()
        return
    cfm = configmanager.ConfigManager(request['tenant'])
    ccons = ClientConsole(connection)
    consession = consoleserver.ConsoleSession(node=request['node'],
                                              configmanager=cfm,
                                              username=request['user'],
                                              datacallback=ccons.sendall,
                                              skipreplay=request['skipreplay'])
    term_interact(None, None, ccons, None, connection, consession, None)
示例#7
0
文件: manager.py 项目: jxdn/confluent
def become_leader(connection):
    global currentleader
    global follower
    if follower:
        follower.kill()
        follower = None
    currentleader = connection.getsockname()[0]
    skipaddr = connection.getpeername()[0]
    myname = get_myname()
    for member in cfm.list_collective():
        dronecandidate = cfm.get_collective_member(member)['address']
        if dronecandidate in (currentleader, skipaddr) or member == myname:
            continue
        eventlet.spawn_n(try_assimilate, dronecandidate)
示例#8
0
def handle_dispatch(connection, cert, dispatch, peername):
    cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert)
    if not util.cert_matches(
            cfm.get_collective_member(peername)['fingerprint'], cert):
        connection.close()
        return
    pversion = 0
    if bytearray(dispatch)[0] == 0x80:
        pversion = bytearray(dispatch)[1]
    dispatch = pickle.loads(dispatch, **pargs)
    configmanager = cfm.ConfigManager(dispatch['tenant'])
    nodes = dispatch['nodes']
    inputdata = dispatch['inputdata']
    operation = dispatch['operation']
    pathcomponents = dispatch['path']
    routespec = nested_lookup(noderesources, pathcomponents)
    plugroute = routespec.routeinfo
    plugpath = None
    nodesbyhandler = {}
    passvalues = []
    nodeattr = configmanager.get_node_attributes(nodes,
                                                 plugroute['pluginattrs'])
    for node in nodes:
        for attrname in plugroute['pluginattrs']:
            if attrname in nodeattr[node]:
                plugpath = nodeattr[node][attrname]['value']
            elif 'default' in plugroute:
                plugpath = plugroute['default']
        if plugpath:
            try:
                hfunc = getattr(pluginmap[plugpath], operation)
            except KeyError:
                nodesbyhandler[BadPlugin(node, plugpath).error] = [node]
                continue
            if hfunc in nodesbyhandler:
                nodesbyhandler[hfunc].append(node)
            else:
                nodesbyhandler[hfunc] = [node]
    try:
        for hfunc in nodesbyhandler:
            passvalues.append(
                hfunc(nodes=nodesbyhandler[hfunc],
                      element=pathcomponents,
                      configmanager=configmanager,
                      inputdata=inputdata))
        for res in itertools.chain(*passvalues):
            _forward_rsp(connection, res, pversion)
    except Exception as res:
        _forward_rsp(connection, res, pversion)
    connection.sendall('\x00\x00\x00\x00\x00\x00\x00\x00')
示例#9
0
文件: manager.py 项目: jxdn/confluent
def start_collective():
    global follower
    global retrythread
    if follower:
        follower.kill()
        follower = None
    if leader_init.active:  # do not start trying to connect if we are
        # xmitting data to a follower
        return
    myname = get_myname()
    for member in sorted(list(cfm.list_collective())):
        if member == myname:
            continue
        if cfm.cfgleader is None:
            cfm.stop_following(True)
        ldrcandidate = cfm.get_collective_member(member)['address']
        if connect_to_leader(name=myname, leader=ldrcandidate):
            break
    else:
        retrythread = eventlet.spawn_after(30 + random.random(),
                                           start_collective)
示例#10
0
def _assimilate_missing(skipaddr=None):
    connecto = []
    myname = get_myname()
    skipem = set(cfm.cfgstreams)
    numfollowers = len(skipem)
    skipem.add(currentleader)
    if skipaddr is not None:
        skipem.add(skipaddr)
    for member in cfm.list_collective():
        dronecandidate = cfm.get_collective_member(member)['address']
        if dronecandidate in skipem or member == myname or member in skipem:
            continue
        connecto.append(dronecandidate)
    if not connecto:
        return True
    conpool = greenpool.GreenPool(64)
    connections = conpool.imap(create_connection, connecto)
    for ent in connections:
        member, remote = ent
        if isinstance(remote, Exception):
            continue
        if not try_assimilate(member, numfollowers, remote):
            return False
    return True
示例#11
0
def handle_connection(connection, cert, request, local=False):
    global currentleader
    global retrythread
    operation = request['operation']
    if cert:
        cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert)
    else:
        if not local:
            return
        if operation in ('show', 'delete'):
            if not list(cfm.list_collective()):
                tlvdata.send(
                    connection, {
                        'collective': {
                            'error':
                            'Collective mode not '
                            'enabled on this '
                            'system'
                        }
                    })
                return
            if follower:
                linfo = cfm.get_collective_member_by_address(currentleader)
                remote = socket.create_connection((currentleader, 13001))
                remote = ssl.wrap_socket(remote,
                                         cert_reqs=ssl.CERT_NONE,
                                         keyfile='/etc/confluent/privkey.pem',
                                         certfile='/etc/confluent/srvcert.pem')
                cert = remote.getpeercert(binary_form=True)
                if not (linfo
                        and util.cert_matches(linfo['fingerprint'], cert)):
                    remote.close()
                    tlvdata.send(connection, {
                        'error':
                        'Invalid certificate, '
                        'redo invitation process'
                    })
                    connection.close()
                    return
                tlvdata.recv(remote)  # ignore banner
                tlvdata.recv(remote)  # ignore authpassed: 0
                tlvdata.send(remote, {
                    'collective': {
                        'operation': 'getinfo',
                        'name': get_myname()
                    }
                })
                collinfo = tlvdata.recv(remote)
            else:
                collinfo = {}
                populate_collinfo(collinfo)
            try:
                cfm.check_quorum()
                collinfo['quorum'] = True
            except exc.DegradedCollective:
                collinfo['quorum'] = False
            if operation == 'show':
                tlvdata.send(connection, {'collective': collinfo})
            elif operation == 'delete':
                todelete = request['member']
                if (todelete == collinfo['leader']
                        or todelete in collinfo['active']):
                    tlvdata.send(
                        connection, {
                            'collective': {
                                'error':
                                '{0} is still active, stop the confluent service to remove it'
                                .format(todelete)
                            }
                        })
                    return
                if todelete not in collinfo['offline']:
                    tlvdata.send(
                        connection, {
                            'collective': {
                                'error':
                                '{0} is not a recognized collective member'.
                                format(todelete)
                            }
                        })
                    return
                cfm.del_collective_member(todelete)
                tlvdata.send(
                    connection, {
                        'collective': {
                            'status':
                            'Successfully deleted {0}'.format(todelete)
                        }
                    })
                connection.close()
            return
        if 'invite' == operation:
            try:
                cfm.check_quorum()
            except exc.DegradedCollective:
                tlvdata.send(connection, {
                    'collective': {
                        'error': 'Collective does not have quorum'
                    }
                })
                return
            #TODO(jjohnson2): Cannot do the invitation if not the head node, the certificate hand-carrying
            #can't work in such a case.
            name = request['name']
            invitation = invites.create_server_invitation(name)
            tlvdata.send(connection,
                         {'collective': {
                             'invitation': invitation
                         }})
            connection.close()
        if 'join' == operation:
            invitation = request['invitation']
            try:
                invitation = base64.b64decode(invitation)
                name, invitation = invitation.split(b'@', 1)
                name = util.stringify(name)
            except Exception:
                tlvdata.send(
                    connection,
                    {'collective': {
                        'status': 'Invalid token format'
                    }})
                connection.close()
                return
            host = request['server']
            try:
                remote = socket.create_connection((host, 13001))
                # This isn't what it looks like.  We do CERT_NONE to disable
                # openssl verification, but then use the invitation as a
                # shared secret to validate the certs as part of the join
                # operation
                remote = ssl.wrap_socket(remote,
                                         cert_reqs=ssl.CERT_NONE,
                                         keyfile='/etc/confluent/privkey.pem',
                                         certfile='/etc/confluent/srvcert.pem')
            except Exception:
                tlvdata.send(
                    connection, {
                        'collective': {
                            'status': 'Failed to connect to {0}'.format(host)
                        }
                    })
                connection.close()
                return
            mycert = util.get_certificate_from_file(
                '/etc/confluent/srvcert.pem')
            cert = remote.getpeercert(binary_form=True)
            proof = base64.b64encode(
                invites.create_client_proof(invitation, mycert, cert))
            tlvdata.recv(remote)  # ignore banner
            tlvdata.recv(remote)  # ignore authpassed: 0
            tlvdata.send(remote, {
                'collective': {
                    'operation': 'enroll',
                    'name': name,
                    'hmac': proof
                }
            })
            rsp = tlvdata.recv(remote)
            if 'error' in rsp:
                tlvdata.send(connection,
                             {'collective': {
                                 'status': rsp['error']
                             }})
                connection.close()
                return
            proof = rsp['collective']['approval']
            proof = base64.b64decode(proof)
            j = invites.check_server_proof(invitation, mycert, cert, proof)
            if not j:
                remote.close()
                tlvdata.send(connection,
                             {'collective': {
                                 'status': 'Bad server token'
                             }})
                connection.close()
                return
            tlvdata.send(connection, {'collective': {'status': 'Success'}})
            connection.close()
            currentleader = rsp['collective']['leader']
            f = open('/etc/confluent/cfg/myname', 'w')
            f.write(name)
            f.close()
            log.log({
                'info': 'Connecting to collective due to join',
                'subsystem': 'collective'
            })
            eventlet.spawn_n(connect_to_leader,
                             rsp['collective']['fingerprint'], name)
    if 'enroll' == operation:
        #TODO(jjohnson2): error appropriately when asked to enroll, but the master is elsewhere
        mycert = util.get_certificate_from_file('/etc/confluent/srvcert.pem')
        proof = base64.b64decode(request['hmac'])
        myrsp = invites.check_client_proof(request['name'], mycert, cert,
                                           proof)
        if not myrsp:
            tlvdata.send(connection, {'error': 'Invalid token'})
            connection.close()
            return
        myrsp = base64.b64encode(myrsp)
        fprint = util.get_fingerprint(cert)
        myfprint = util.get_fingerprint(mycert)
        cfm.add_collective_member(get_myname(),
                                  connection.getsockname()[0], myfprint)
        cfm.add_collective_member(request['name'],
                                  connection.getpeername()[0], fprint)
        myleader = get_leader(connection)
        ldrfprint = cfm.get_collective_member_by_address(
            myleader)['fingerprint']
        tlvdata.send(
            connection, {
                'collective': {
                    'approval': myrsp,
                    'fingerprint': ldrfprint,
                    'leader': get_leader(connection)
                }
            })
    if 'assimilate' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not droneinfo:
            tlvdata.send(
                connection,
                {'error': 'Unrecognized leader, '
                 'redo invitation process'})
            return
        if not util.cert_matches(droneinfo['fingerprint'], cert):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            return
        if request['txcount'] < cfm._txcount:
            tlvdata.send(
                connection, {
                    'error': 'Refusing to be assimilated by inferior'
                    'transaction count',
                    'txcount': cfm._txcount,
                })
            return
        if connecting.active:
            # don't try to connect while actively already trying to connect
            tlvdata.send(connection, {'status': 0})
            connection.close()
            return
        if (currentleader == connection.getpeername()[0] and follower
                and not follower.dead):
            # if we are happily following this leader already, don't stir
            # the pot
            tlvdata.send(connection, {'status': 0})
            connection.close()
            return
        log.log({
            'info': 'Connecting in response to assimilation',
            'subsystem': 'collective'
        })
        eventlet.spawn_n(connect_to_leader,
                         None,
                         None,
                         leader=connection.getpeername()[0])
        tlvdata.send(connection, {'status': 0})
        connection.close()
    if 'getinfo' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not (droneinfo
                and util.cert_matches(droneinfo['fingerprint'], cert)):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            connection.close()
            return
        collinfo = {}
        populate_collinfo(collinfo)
        tlvdata.send(connection, collinfo)
    if 'connect' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not (droneinfo
                and util.cert_matches(droneinfo['fingerprint'], cert)):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            connection.close()
            return
        myself = connection.getsockname()[0]
        if connecting.active:
            tlvdata.send(connection, {
                'error': 'Connecting right now',
                'backoff': True
            })
            connection.close()
            return
        if myself != get_leader(connection):
            tlvdata.send(
                connection, {
                    'error': 'Cannot assimilate, our leader is '
                    'in another castle',
                    'leader': currentleader
                })
            connection.close()
            return
        if request['txcount'] > cfm._txcount:
            retire_as_leader()
            tlvdata.send(
                connection, {
                    'error': 'Client has higher tranasaction count, '
                    'should assimilate me, connecting..',
                    'txcount': cfm._txcount
                })
            log.log({
                'info': 'Connecting to leader due to superior '
                'transaction count',
                'subsystem': collective
            })
            eventlet.spawn_n(connect_to_leader, None, None,
                             connection.getpeername()[0])
            connection.close()
            return
        if retrythread:
            retrythread.cancel()
            retrythread = None
        with leader_init:
            cfm.update_collective_address(request['name'],
                                          connection.getpeername()[0])
            tlvdata.send(connection, cfm._dump_keys(None, False))
            tlvdata.send(connection, cfm._cfgstore['collective'])
            tlvdata.send(connection, {})  # cfm.get_globals())
            cfgdata = cfm.ConfigManager(None)._dump_to_json()
            tlvdata.send(connection, {
                'txcount': cfm._txcount,
                'dbsize': len(cfgdata)
            })
            connection.sendall(cfgdata)
        #tlvdata.send(connection, {'tenants': 0}) # skip the tenants for now,
        # so far unused anyway
        if not cfm.relay_slaved_requests(drone, connection):
            if not retrythread:  # start a recovery if everyone else seems
                # to have disappeared
                retrythread = eventlet.spawn_after(30 + random.random(),
                                                   start_collective)