示例#1
0
def aggmon_data_store(argv):
    global component

    ap = argparse.ArgumentParser()
    ap.add_argument('-g', '--group', default="universe", action="store", help="group/cluster served by this daemon instance")
    ap.add_argument('-C', '--cmd-port', default="tcp://0.0.0.0:5511", action="store", help="RPC command port")
    ap.add_argument('-D', '--dispatcher', default="", action="store", help="agg_control dispatcher RPC command port")
    ap.add_argument('-e', '--expire', default=180, action="store", help="days for expiring value metrics")
    ap.add_argument('-b', '--backend', default="mongodb", action="store", help="database backend(s), comma separated. Default is 'mongodb'.")
    ap.add_argument('-H', '--host', default="localhost", action="store", help="data store host")
    ap.add_argument('-n', '--port', default=None, action="store", help="data store port")
    ap.add_argument('-d', '--dbname', default="metricdb", action="store", help="database name")
    ap.add_argument('-P', '--prefix', default="gmetric", action="store", help="collections prefix")
    ap.add_argument('-u', '--user', default="", action="store", help="user name")
    ap.add_argument('-p', '--passwd', default="", action="store", help="password")
    ap.add_argument('-l', '--log', default="info", action="store", help="logging: info, debug, ...")
    ap.add_argument('-L', '--listen', default="tcp://0.0.0.0:5550", action="store", help="zmq pull port to listen on")
    ap.add_argument('-M', '--msgbus', default=[], action="append",
                    help="subscription port(s) for message bus. can be used multiple times.")
    ap.add_argument('-s', '--stats', default=False, action="store_true", help="print statistics info")
    ap.add_argument('-v', '--verbose', type=int, default=0, action="store", help="verbosity")
    pargs = ap.parse_args(argv)

    log_level = eval("logging."+pargs.log.upper())
    FMT = "%(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s"
    logging.basicConfig( stream=sys.stderr, level=log_level, format=FMT )

    pargs.backend = pargs.backend.split(",")
    if pargs.port:
        pargs.port = pargs.port.split(",")      # TODO: get rid of this and move it into (shared) config
    else:
        pargs.port = [None, None]
    # open DB
    try:
        store = DataStore(pargs.backend, pargs.host, pargs.port, pargs.dbname, pargs.user, pargs.passwd,
                           pargs.group, coll_prefix=pargs.prefix, value_metrics_ttl=pargs.expire*24*3600)
    except Exception as e:
        log.error("Failed to create DataStore: %r" % e)
        sys.exit(1)
    store.start()

    context = zmq.Context()

    # Socket to receive messages on
    receiver = context.socket(zmq.PULL)
    receiver.setsockopt(zmq.RCVHWM, 40000)
    recv_port = zmq_socket_bind_range(receiver, pargs.listen)
    assert( recv_port is not None)


    def subscribe_collectors(__msg):
        for msgb in pargs.msgbus:
            log.info( "subscribing to all msgs at '%s'" % msgb )
            me_addr = zmq_own_addr_for_uri(msgb)
            send_rpc(context, msgb, "subscribe", TARGET="tcp://%s:%d" % (me_addr, recv_port))

    def unsubscribe_and_quit(__msg):
        for msgb in pargs.msgbus:
            log.info( "unsubscribing from '%s'" % msgb )
            me_addr = zmq_own_addr_for_uri(msgb)
            send_rpc(context, msgb, "unsubscribe", TARGET="tcp://%s:%d" % (me_addr, recv_port))
        os._exit(0)


    rpc = RPCThread(context, listen=pargs.cmd_port)
    rpc.start()
    rpc.register_rpc("quit", unsubscribe_and_quit, early_reply=True)
    rpc.register_rpc("resubscribe", subscribe_collectors)

    if len(pargs.dispatcher) > 0:
        me_addr = zmq_own_addr_for_uri(pargs.dispatcher)
        me_listen = "tcp://%s:%d" % (me_addr, recv_port)
        me_rpc = "tcp://%s:%d" % (me_addr, rpc.port)
        state = get_kwds(component="data_store", cmd_port=me_rpc, listen=me_listen, group=pargs.group)
        component = ComponentState(context, pargs.dispatcher, state=state)
        rpc.register_rpc("resend_state", component.reset_timer)

    # subscribe to message bus
    subscribe_collectors(None)

    tstart = None
    log.info( "Started msg receiver on %s" % pargs.listen )
    count = 0
    while True:
        try:
            s = receiver.recv()
            log.debug("received msg on PULL port: %r" % s)
            msg = json.loads(s)

            cmd = None
            if "_COMMAND_" in msg:
                log.info("_COMMAND_ received: msg = %r" % msg)
                cmd = msg["_COMMAND_"]

            if cmd is not None:
                if cmd["cmd"] == "quit":
                    log.info( "Stopping data_store on 'quit' command.")
                    # raw exit!!!
                    os._exit(0)
                    break
                elif cmd["cmd"] == "resend_state":
                    log.info( "State resend requested." )
                    if component is not None:
                        component.reset_timer()
                    continue
            
            store.queue.put(msg)
            if count == 0 or (cmd is not None and cmd["cmd"] == "reset-stats"):
                tstart = time.time()
                count = 0
            count += 1
            if component is not None:
                component.update({"stats.msgs_recvd": count})
            if (pargs.stats and count % 10000 == 0) or \
               (cmd is not None and cmd["cmd"] == "show-stats"):
                tend = time.time()
                sys.stdout.write("%d msgs in %f seconds, %f msg/s\n" %
                                 (count, tend - tstart, float(count)/(tend - tstart)))
                sys.stdout.flush()
        except Exception as e:
            print "Exception in msg receiver: %r" % e
            break
    log.info("THE END")
示例#2
0
def aggmon_jobagg(argv):
    global component

    ap = argparse.ArgumentParser()
    ap.add_argument('-C', '--cmd-port', default="tcp://0.0.0.0:5501", action="store", help="RPC command port")
    ap.add_argument('-D', '--dispatcher', default="", action="store", help="agg_control dispatcher RPC command port")
    ap.add_argument('-j', '--jobid', default="", action="store", help="jobid for which this instance does aggregation")
    ap.add_argument('-l', '--log', default="info", action="store", help="logging: info, debug, ...")
    ap.add_argument('-L', '--listen', default="tcp://127.0.0.1:5560", action="store", help="zmq pull port to listen on")
    ap.add_argument('-M', '--msgbus', default=[], action="append",
                    help="subscription port(s) for message bus. can be used multiple times.")
    ap.add_argument('-s', '--stats', default=False, action="store_true", help="print statistics info")
    ap.add_argument('-v', '--verbose', type=int, default=0, action="store", help="verbosity")
    pargs = ap.parse_args(argv)

    log_level = eval("logging."+pargs.log.upper())
    FMT = "%(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s"
    logging.basicConfig( stream=sys.stderr, level=log_level, format=FMT )
    component = None

    if len(pargs.jobid) == 0:
        log.error("jobid argument can not be empty!")
        sys.exit(1)

    context = zmq.Context()
    try:
        jagg = JobAggregator(pargs.jobid, context)
    except Exception as e:
        log.error("Failed to create JobAggregator: %r" % e)
        sys.exit(1)
    jagg.start()

    # Socket to receive messages on
    receiver = context.socket(zmq.PULL)
    receiver.setsockopt(zmq.RCVHWM, 40000)
    recv_port = zmq_socket_bind_range(receiver, pargs.listen)
    assert(recv_port is not None)


    def aggregate_rpc(msg):
        agg_rpcs = component.state.get("stats.agg_rpcs", 0)
        agg_rpcs += 1
        num_sent = jagg.do_aggregate_and_send(msg)
        aggs_sent = component.state.get("stats.aggs_sent", 0) + num_sent
        component.update({"stats.agg_rpcs": agg_rpcs, "stats.aggs_sent": aggs_sent})

    def show_mcache(msg):
        return jagg.metric_caches

    def subscribe_collectors(__msg):
        for msgb in pargs.msgbus:
            log.info( "subscribing to msgs of job %s at %s" % (pargs.jobid, msgb) )
            me_addr = zmq_own_addr_for_uri(msgb)
            send_rpc(context, msgb, "subscribe", TARGET="tcp://%s:%d" % (me_addr, recv_port),
                     J=pargs.jobid)

    def unsubscribe_and_quit(__msg):
        # subscribe to message bus
        for msgb in pargs.msgbus:
            log.info( "unsubscribing jobid %s from %s" % (pargs.jobid, msgb) )
            me_addr = zmq_own_addr_for_uri(msgb)
            send_rpc(context, msgb, "unsubscribe", TARGET="tcp://%s:%d" % (me_addr, recv_port))
        os._exit(0)

    rpc = RPCThread(context, listen=pargs.cmd_port)
    rpc.start()
    rpc.register_rpc("agg", aggregate_rpc)
    rpc.register_rpc("quit", unsubscribe_and_quit, early_reply=True)
    rpc.register_rpc("resubscribe", subscribe_collectors)
    rpc.register_rpc("show_mcache", show_mcache)

    # subscribe to message bus
    subscribe_collectors(None)

    if len(pargs.dispatcher) > 0:
        me_addr = zmq_own_addr_for_uri(pargs.dispatcher)
        me_listen = "tcp://%s:%d" % (me_addr, recv_port)
        me_rpc = "tcp://%s:%d" % (me_addr, rpc.port)
        state = get_kwds(component="job_agg", cmd_port=me_rpc, listen=me_listen, jobid=pargs.jobid)
        component = ComponentState(context, pargs.dispatcher, state=state)
        rpc.register_rpc("resend_state", component.reset_timer)

    tstart = None
    log.info( "Started msg receiver on %s" % pargs.listen )
    count = 0
    while True:
        try:
            s = receiver.recv()
            #log.debug("received msg on PULL port: %r" % s)
            msg = json.loads(s)

            cmd = None
            if "_COMMAND_" in msg:
                cmd = msg["_COMMAND_"]

            if cmd is not None:
                if cmd["cmd"] == "quit":
                    log.info( "Stopping job aggregator for jobid %s on 'quit' command." % pargs.jobid )
                    break
                elif cmd["cmd"] == "resend_state":
                    log.info( "State resend requested." )
                    if component is not None:
                        component.reset_timer()
                    continue

            jagg.queue.put(msg)
            if count == 0 or (cmd is not None and cmd["cmd"] == "reset-stats"):
                tstart = time.time()
                count = 0
            count += 1
            component.update({"stats.val_msgs_recvd": count})
            if (pargs.stats and count % 10000 == 0) or \
               (cmd is not None and cmd["cmd"] == "show-stats"):
                tend = time.time()
                sys.stderr.write("%d msgs in %f seconds, %f msg/s\n" %
                                 (count, tend - tstart, float(count)/(tend - tstart)))
                sys.stderr.flush()
        except Exception as e:
            print "Exception in msg receiver: %r" % e
            jagg.stopping = True
            break

    time.sleep(0.1)
    print "%d messages received" % count
示例#3
0
def aggmon_collector(argv):
    global component

    ap = argparse.ArgumentParser()
    ap.add_argument('-C', '--cmd-port', default="tcp://127.0.0.1:5556", action="store", help="RPC command port")
    ap.add_argument('-D', '--dispatcher', default="", action="store", help="agg_control dispatcher RPC command port")
    ap.add_argument('-g', '--group', default="universe", action="store", help="group for this message bus. Default: /universe")
    ap.add_argument('-l', '--log', default="info", action="store", help="logging: info, debug, ...")
    ap.add_argument('-L', '--listen', default="tcp://127.0.0.1:5555", action="store", help="zmq pull port to listen on")
    ap.add_argument('-M', '--msgbus', default="", action="store", help="subscription port for other message bus")
    ap.add_argument('-s', '--stats', default=False, action="store_true", help="print statistics info")
    ap.add_argument('-S', '--state-file', default="agg_collector.state", action="store", help="file to store tagger rules and subscriptions")
    ap.add_argument('-v', '--verbose', type=int, default=0, action="store", help="verbosity")
    pargs = ap.parse_args(argv)

    log_level = eval("logging."+pargs.log.upper())
    FMT = "%(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s"
    logging.basicConfig( stream=sys.stderr, level=log_level, format=FMT )

    state = []
    subs = {}
    tags = {}

    # EF 6.7.16 disabled state loading
    #if len(pargs.state_file) > 0:
    #    state = load_state(pargs.state_file)
    if len(state) >= 2:
        subs = state[0]
        tags = state[1]

    def spoofed_host(msg):
        # treat spoofed hosts already here
        if "H" in msg and ":" in msg["H"]:
            msg["H"] = msg["H"].split(":")[1]
        elif "HOST" in msg and ":" in msg["HOST"]:
            msg["HOST"] = msg["HOST"].split(":")[1]
        ##
        ## For debugging duplicates...
        ##
        #if "N" in msg and msg["N"] == "cpu_user":
        #    log.info("cpu_user val: %r" % msg)
        return msg

    def convert_str_int_float(msg):
        # convert string values to int or float
        if "V" in msg:
            val = msg["V"]
            if isinstance(val, basestring):
                if val.isdigit():
                    val = int(val)
                    msg["V"] = val
                else:
                    try:
                        val = float(val)
                        msg["V"] = val
                    except ValueError:
                        pass
        return msg

    def save_subs_tags(msg):
        # EF 6.7.16 disabled state saving
        #save_state(pargs.state_file, [pubsub.subs, tagger.tags])
        pass

    def quit(msg):
        subq.stopping = True
        # raw exit for now
        os._exit(0)

    try:
        context = zmq.Context()
        subq = SubscriberQueue(context, pargs.listen, pre=[spoofed_host, convert_str_int_float])

        tagger = MsgTagger(tags=tags)
        pubsub = AggPubThread(context, subq.queue, subs=subs, tagger=tagger.do_tag)
    
        rpc = RPCThread(context, listen=pargs.cmd_port)
        rpc.start()
    except Exception as e:
        log.error(traceback.format_exc())
        log.error("Failed to initialize something essential. Exiting.")
        os._exit(1)

    rpc.register_rpc("subscribe", pubsub.subscribe, post=save_subs_tags)
    rpc.register_rpc("unsubscribe", pubsub.unsubscribe, post=save_subs_tags)
    rpc.register_rpc("show_subs", pubsub.show_subscriptions)
    rpc.register_rpc("reset_subs", pubsub.reset_subscriptions, post=save_subs_tags)
    rpc.register_rpc("add_tag", tagger.add_tag, post=save_subs_tags)
    rpc.register_rpc("remove_tag", tagger.remove_tag, post=save_subs_tags)
    rpc.register_rpc("reset_tags", tagger.reset_tags, post=save_subs_tags)
    rpc.register_rpc("show_tags", tagger.show_tags)
    rpc.register_rpc("quit", quit, early_reply=True)

    pubsub.start()
    subq.start()

    if len(pargs.dispatcher) > 0:
        me_addr = zmq_own_addr_for_uri(pargs.dispatcher)
        me_listen = "tcp://%s:%d" % (me_addr, subq.port)
        me_rpc = "tcp://%s:%d" % (me_addr, rpc.port)
        state = get_kwds(component="collector", cmd_port=me_rpc, listen=me_listen, group=pargs.group)
        component = ComponentState(context, pargs.dispatcher, state=state)
        rpc.register_rpc("resend_state", component.reset_timer)

    if len(pargs.msgbus) > 0:
        print "subscribing to all msgs from %s" % pargs.msgbus
        msg = {"TARGET": pargs.listen}
        send_rpc(context, pargs.msgbus, "subscribe", **msg)

    while True:
        try:
            subq.join(0.1)
        except Exception as e:
            print "main thread exception: %r" % e
            break