Exemplo n.º 1
0
	def init(self):
		self.ns = nnslib.NameServer(self.nameserver)
		self.socket = zmq.Context().socket(zmq.PUB)
		self.socket.setsockopt(HIGHWATER, 20000000)
		port = self.socket.bind_to_random_port("tcp://%s" %self.ip)
		try:
			self.ns.publishService(self.cluster+".allpoints", "tcp://%s:%s" % (self.ip, port), self.publishTimeout, "pub/sub", "Point")
			self.resettime = time.time()+300
		except nnslib.NameServerException, e:
			collectd.error("Error",e)
Exemplo n.º 2
0
def main():
    try:
        server = sys.argv[1]
        if ":" not in server:
            server = "tcp://%s:6967" % server
    except:
        usage()
    ns = nnslib.NameServer(server)
    #try:
    if sys.argv[2] not in options.keys():
        usage("Unknown service: %s" % sys.argv[2])
    if len(sys.argv) - 3 < options[sys.argv[2]][0] or len(
            sys.argv) - 3 > options[sys.argv[2]][1]:
        usage("Wrong parameter count for service %s" % sys.argv[2])
    try:
        ret = ns.__getattribute__(sys.argv[2])(*sys.argv[3:])
    except nnslib.NameServerException, e:
        print "Server Error:", e
        sys.exit()
Exemplo n.º 3
0
	def run(self):
		ns = nnslib.NameServer(self.ns)
		ctx = zmq.Context()
		count = 0
		sock = ctx.socket(zmq.SUB)
		sock.setsockopt(zmq.SUBSCRIBE, "")
		ns.connectService(sock, self.service)
		poll = zmq.core.poll.Poller()
		poll.register(sock, zmq.POLLIN)
		while True:
			res = poll.poll(1000)
			if res:
				count += 1
				if count > 1000000:
					print "emergency flush", time.ctime()
					count = 0
					self.pointStore.flush()
				(header, payload) = sock.recv_multipart()
				payload = json.loads(payload)
				# As far as I know, slurm only reports the first part of the hostname so we should only
				# store the first entry in the name
				payload["host"] = payload["host"].split(".")[0]
				self.pointStore.savePoint(payload["host"], payload["pointname"], payload["time"], payload["val"])
			else:
				count = 0
				self.pointStore.flush()
				try:
					ns.updateSocket(sock)
				except:
					sock = ctx.socket(zmq.SUB)
					sock.setsockopt(zmq.SUBSCRIBE, "")
					connected = False
					while not connected:
						try:
							ns.connectService(sock, self.service)
						except:
							print "Error connecting to %s. Sleeping" % self.service
							time.sleep(1)
						finally:
							connected = True
					poll = zmq.core.poll.Poller()
					poll.register(sock, zmq.POLLIN)
Exemplo n.º 4
0
def main(nsurl, cname, myip, statefile, infoport=None, nport=None, eip=None):
    global running
    timeout = 600
    if eip == None:
        eip = myip
    print cname, myip, statefile, infoport, nport, eip
    ctx = zmq.Context()

    sock = ctx.socket(zmq.REP)
    if infoport:
        sock.bind("tcp://%s:%d" % (myip, infoport))
        port = infoport
    else:
        port = sock.bind_to_random_port("tcp://%s" % (myip))

    notifysock = ctx.socket(zmq.PUB)
    if nport:
        notifysock.bind("tcp://%s:%d" % (myip, nport))
        notifyport = nport
    else:
        notifyport = notifysock.bind_to_random_port("tcp://%s" % (myip))
    print port, notifyport
    ns = nnslib.NameServer(nsurl)
    try:
        dt = ns.getDataType("JobDictionary")
    except nnslib.NameServerException:
        ns.addDataType("JobDictionary", "JSON", ','.join(notifyfields), "")

    ns.publishService(cname + ".slurm.jobinfo", "tcp://%s:%s" % (eip, port),
                      600, "req/rep", "JobDictionary")
    ns.publishService(cname + ".slurm.jobnotify",
                      "tcp://%s:%s" % (eip, notifyport), 600, "pub/sub",
                      "JobDictionary")

    z_poll = zmq.Poller()
    z_poll.register(sock, zmq.POLLIN)
    z_poll.register(notifysock, zmq.POLLIN)

    status = JobStatus(notifysock, statefile)

    running = True

    def doStop(num=None, frame=None):
        global running
        running = False
        try:
            ns.removeServices()
        except zmq.ZMQError:
            pass

    signal.signal(signal.SIGTERM, doStop)
    signal.signal(signal.SIGINT, doStop)

    while running:
        try:
            ready = z_poll.poll(60)
        except zmq.ZMQError:
            ready = []
        #print ready
        if ready and ready[0][0] == sock:
            request = sock.recv_multipart()
            try:
                print request
                if request[0] == "jobinfo":
                    info = status.jobInfo(request[1])
                    sock.send_json(info)
                if request[0] == "running":
                    info = status.runningJobs()
                    sock.send_json(info)
                if request[0] == "completed":
                    info = status.completedJobs()
                    sock.send_json(info)
            except:
                sock.send_json({})
            print "done"
        else:
            ns.updateServices()
            status.checkUpdate()
Exemplo n.º 5
0
def main():
        parser = nwperf.defaultServerOptionParser()
        parser.add_option(      "-c", "--cluster", action="store", type="string", dest="cluster",
                                help="name of cluster to generate graphs for")
        parser.add_option(      "-S", "--name-server", action="store", type="string", dest="nameserver",
                                help="The ZMQ URL of the nameserver")
        parser.add_option(      "-j", "--job-service", action="store", type="string", dest="jobservice",
                                help="The service name that provides job information")
        parser.add_option(      "-P", "--point-service", action="store", type="string", dest="pointservice",
                                help="The service name that provides point data")

        (options, args) = nwperf.parseServerOptions()

	if not options.nameserver:
		parser.error("No name server specified")

	if not options.pointservice:
		parser.error("No point service specified")

	if not options.jobservice:
		parser.error("No job service specified")

	if not options.cluster:
		parser.error("No cluster specified")

	pointStore = MongoPointStore.MongoPointStore()
	jobStore = MongoJobStore.MongoJobStore()

	ns = nnslib.NameServer(options.nameserver)
	ctx = zmq.Context()
	q = multiprocessing.Queue()
	#if not options.generategraphs:
	children = [JobArchiveGenerator(pointStore, q, jobStore, extraFields={"cluster": options.cluster}) for i in range(8)]
	for child in children:
		child.start()
	psp =  PointStoreProcess(pointStore, options.nameserver, options.pointservice)
	psp.start()

	sock = ctx.socket(zmq.SUB)
	sock.setsockopt(zmq.SUBSCRIBE, "JobEnd")
	ns.connectService(sock, options.jobservice)
	while True:
		poll = zmq.core.poll.Poller()
		poll.register(sock, zmq.POLLIN)
		res = poll.poll(1000)
		if res:
			job = sock.recv_multipart()[1]
			job = json.loads(job)
			job["Nodes"] = hostlist.expand_hostlist(job["NodeList"])
			q.put(job)
		for i in range(len(children)):
			if not children[i].is_alive():
				children[i].join()
				del(children[i])
				newchild = JobArchiveGenerator(pointStore, q, jobStore, extraFields={"cluster": options.cluster})
				newchild.start()
				children.append(newchild)
		try:
			ns.updateSocket(sock)
		except:
			sock.close()
			sock = ctx.socket(zmq.SUB)
			sock.setsockopt(zmq.SUBSCRIBE, "JobEnd")
			connected = False
			while not connected:
				try:
					ns.connectService(sock, options.jobservice)
				except:
					print "Error connecting to %s. Sleeping" % options.jobservice
					time.sleep(1)
				finally:
					connected = True
			poll.register(sock, zmq.POLLIN)
		if q.qsize() > 0:
			print "Queue size: %d" % q.qsize()
		if not psp.is_alive():
			psp.join()
			psp =  PointStoreProcess(pointStore, options.nameserver, options.pointservice)
Exemplo n.º 6
0
 def __init__(self, ns, prefix, ip=None):
     self.ns = nnslib.NameServer(ns)
     self.prefix = prefix
     self.ip = ip
     self.streams = {}
     self.publishTimeout = 600