Python BonjourResolver примеры использования

Язык программирования: Python

Пространство имен/Пакет: lib.bonjour_detect

Класс/Тип: BonjourResolver

Примеров на hotexamples.com: 10

Python BonjourResolver - 10 примеров найдено. Это лучшие примеры Python кода для lib.bonjour_detect.BonjourResolver, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BonjourResolver(3)

start(3)

Пример #1

Показать файл

Файл: module_vertex.py Проект: lmhtz/remap

    def __init__( self, app, appconfig, workdata ):
        WorkerBase.__init__( self, app, appconfig, workdata )
        self.surveyorname = workdata["hostname"]
        self.vsub = nn.Socket( nn.SUB, domain=nn.AF_SP )
        self.vpub = nn.Socket( nn.PUB, domain=nn.AF_SP )
        self.broker_address = None

        self.bonjour = BonjourResolver( "_vertexremap._tcp", self.cb_broker_changed )
        self.bonjour.start()

        inputfile = os.path.join( self.remaproot, "data", self.workdata["inputfile"] )
        outputdir = os.path.join( self.remaproot, "job", self.jobid, "part" )

        self.input = self.app.create_vertex_reader( inputfile )
        self.outputdir = outputdir
        self.partitions = {}

        self.mode = MODE_IDLE

        self.surveyor = nn.Socket( nn.RESPONDENT )
        self.surveyor.connect( "tcp://%s:8688"%(self.surveyorname) )
        # 6 seconds
        self.surveyor.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, 50 )
        self.vertices = {}

        logger.info("Waiting to get vertex broker host from bonjour")

        self.ready = False

Пример #2

Показать файл

 def __init__(self, remaproot):
     self.remaproot = remaproot
     self.cores = {}
     self.broker_address = "unknown"
     self.brokerChanged = False
     self.bsub = None
     self.bpub = None
     self.tot_m_rcv = 0
     self.hw = NodeHardware()
     self.nodeid = remap_utils.node_id()
     self.bonjour = BonjourResolver("_remap._tcp", self.cb_broker_changed)
     self.bonjour.start()
     self.coresChanged = False

Пример #3

Показать файл

Файл: initiator.py Проект: lmhtz/remap

 def __init__(self, rootdir):
     Monitor.__init__(self, rootdir)
     self.remaproot = rootdir
     self.broker_address = "unknown"
     self.brokerChanged = False
     self.bsub = None
     self.bpub = None
     self.bonjour = BonjourResolver("_remap._tcp", self.cb_broker_changed)
     self.bonjour.start()
     self.jobid = None
     self.refreshed = 0
     self.job_status = "waiting"
     self.rejectedtasks = {}
     self.completedtasks = {}
     self.tasks = {}
     self.allocatedtasks = {}
     self.jobtype = "not_started"
     self.priority = 0
     self.parallellism = 1
     self.manager = None
     self.last_check = time.time()

Пример #4

Показать файл

Файл: node_daemon.py Проект: dprop-developers/remap

 def __init__(self, remaproot):
     self.remaproot = remaproot
     self.cores = {}
     self.broker_address = "unknown"
     self.brokerChanged = False
     self.bsub = None
     self.bpub = None
     self.tot_m_rcv = 0
     self.hw = NodeHardware()
     self.nodeid = remap_utils.node_id()
     self.bonjour = BonjourResolver( "_remap._tcp", self.cb_broker_changed )
     self.bonjour.start()
     self.coresChanged = False

Пример #5

Показать файл

Файл: initiator.py Проект: dprop-developers/remap

 def __init__(self, rootdir):
     Monitor.__init__(self, rootdir)
     self.remaproot = rootdir
     self.broker_address = "unknown"
     self.brokerChanged = False
     self.bsub = None
     self.bpub = None
     self.bonjour = BonjourResolver("_remap._tcp", self.cb_broker_changed)
     self.bonjour.start()
     self.jobid = None
     self.refreshed = 0
     self.job_status = "waiting"
     self.rejectedtasks = {}
     self.completedtasks = {}
     self.tasks = {}
     self.allocatedtasks = {}
     self.jobtype = "not_started"
     self.priority = 0
     self.parallellism = 1
     self.manager = None
     self.last_check = time.time()

Пример #6

Показать файл

Файл: initiator.py Проект: lmhtz/remap

class Initiator(Monitor):
    def __init__(self, rootdir):
        Monitor.__init__(self, rootdir)
        self.remaproot = rootdir
        self.broker_address = "unknown"
        self.brokerChanged = False
        self.bsub = None
        self.bpub = None
        self.bonjour = BonjourResolver("_remap._tcp", self.cb_broker_changed)
        self.bonjour.start()
        self.jobid = None
        self.refreshed = 0
        self.job_status = "waiting"
        self.rejectedtasks = {}
        self.completedtasks = {}
        self.tasks = {}
        self.allocatedtasks = {}
        self.jobtype = "not_started"
        self.priority = 0
        self.parallellism = 1
        self.manager = None
        self.last_check = time.time()

    def load_plugin(self, name):
        try:
            mod = __import__("module_%s" % name)
            return mod
        except ImportError as ie:
            raise RemapException("No such worker type: %s" % (name))

    # -------
    # Broker handling
    # -------

    def setup_broker(self):
        self.brokerChanged = False
        if self.bsub != None:
            self.bsub.close()
            self.bsub = None

        self.apply_timeouts()

        if self.broker_address == "unknown":
            logger.error("Deferring broker setup as address is still unknown.")
            return

        self.bsub = nn.Socket(nn.SUB)
        self.bsub.connect("tcp://%s:8687" % (self.broker_address))
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "global")
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "local")
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "notlocal")
        if self.jobid != None:
            self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, self.jobid)
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "tracker")
        self.apply_timeouts()

        self.bpub = nn.Socket(nn.PUB)
        self.bpub.connect("tcp://%s:8686" % (self.broker_address))

        logger.info("Broker setup complete")

    def apply_timeouts(self):
        if self.bsub != None:
            rcv_timeout = 100
            self.bsub.set_int_option(nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout)

    def cb_broker_changed(self, broker_address):
        logger.info("Received new broker address: %s" % (broker_address))
        self.broker_address = broker_address
        self.brokerChanged = True

    def forward_to_broker(self, msg):
        if self.bpub != None:
            try:
                self.bpub.send(msg)
            except nn.NanoMsgAPIError as e:
                pass

    def process_broker_messages(self):
        if self.bsub == None:
            # No broker is known yet.
            if self.brokerChanged:
                logger.info("The broker configuration changed.")
                self.setup_broker()
                if self.bsub == None:
                    logger.info("Failed broker setup.")
                    return False
            else:
                return False

        try:
            # Grab next msg from broker if any
            msg = self.bsub.recv()
            if msg != None and len(msg) > 0:
                msgprefix, data = remap_utils.unpack_msg(msg)
                recipientid, msgtype, senderid = remap_utils.split_prefix(
                    msgprefix)
                if msgtype == "complete":
                    self.update_corecomplete(recipientid, senderid, data)
                if msgtype == "corestatus":
                    self.update_corestatus(recipientid, senderid, data)
                if msgtype == "raisehand":
                    self.update_hands(recipientid, senderid, data)
                return True
            else:
                return False
        except nn.NanoMsgAPIError as e:
            return False

    # -------
    # Messaging handling
    # -------

    def update_corestatus(self, recipientid, senderid, data):
        if self.manager != None:
            key = self.manager.get_work_key(data)
            if key in self.allocatedtasks:
                job = self.allocatedtasks[key]
                job["ts_finish"] = time.time() + 7

    def update_corecomplete(self, recipientid, senderid, data):
        if self.manager != None:
            key = self.manager.get_work_key(data)
            logger.info("Job %s completed." % (key))
            if key in self.allocatedtasks:
                job = self.allocatedtasks[key]
                task = self.tasks[key]
                self.completedtasks[key] = task
                del self.tasks[key]
                del self.allocatedtasks[key]
                logger.info(
                    "%d tasks left, %d tasks committed, %d tasks complete, %d tasks failed."
                    % (len(self.tasks), len(self.allocatedtasks),
                       len(self.completedtasks), len(self.rejectedtasks)))

    def update_hands(self, recipientid, senderid, data):
        # "%s.raisehand.%s"%( senderid, self.nodeid ), {"cores":3,"interruptable":0}
        if senderid in self.nodes:
            self.nodes[senderid]["avail"] = data
        else:
            self.nodes[senderid] = {}
            self.nodes[senderid]["avail"] = data

    # -------
    # Job management
    # -------

    def start_job(self, jobdata):
        if self.job_status != "waiting":
            raise RemapException(
                "A job is currently in progress on this monitor")

        if "type" not in jobdata:
            raise RemapException("Must have job type specified")
        if "priority" not in jobdata:
            raise RemapException("Must have priority specified")
        if "parallellism" not in jobdata:
            raise RemapException("Must have parallellism specified")

        self.job_status = "preparing"
        self.prepare_start = time.time()

        self.jobtype = jobdata["type"]
        self.priority = jobdata["priority"]
        self.parallellism = jobdata["parallellism"]
        plugin = self.load_plugin(self.jobtype)
        self.rejectedtasks = {}
        self.completedtasks = {}

        if self.jobid != None:
            self.bsub.set_string_option(nn.SUB, nn.SUB_UNSUBSCRIBE, self.jobid)

        if "jobid" in jobdata:
            self.jobid = jobdata["jobid"]
            del jobdata["jobid"]
        else:
            self.jobid = remap_utils.unique_id()

        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, self.jobid)

        if "app" not in jobdata:
            raise RemapException("The name of the app must be provided")

        if jobdata["app"] not in self.list_apps():
            raise RemapException("No such application: %s" % (jobdata["app"]))

        config = {"jobid": self.jobid, "remaproot": self.remaproot}

        logger.info("Started a new job: %s" % (self.jobid))
        self.manager = plugin.create_manager(jobdata, config)

        if ((time.time() - self.refreshed) > 60):
            # Not refreshed > 60s
            self.refresh_nodes(self.priority)
            # Wait for a bunch of nodes to advertise themselves
            r = Timer(1.0, self.resume, ())
            r.start()
        else:
            self.resume()

        return {"jobid": self.jobid}

    def resume(self):
        self.manager.prepare()

        logger.info("Starting a %s job" % (self.jobtype))

        self.planner = JobPlanner(self.manager.config_file)
        self.tasks = self.manager.plan_jobs(self.planner)

        logger.info("Found %d tasks to execute" % (len(self.tasks)))

        numnodes, self.allocatedtasks = self.planner.distribute_jobs_over_nodes(
            self.tasks, {}, self.nodes, self.parallellism)
        if len(self.allocatedtasks) == 0:
            logger.error("No nodes found to distribute the tasks.")
            self.job_status = "waiting"
            return

        if self.manager.all_hands_on_deck():
            if len(self.allocatedtasks) != len(self.tasks):
                raise RemapException(
                    "Not enough cores available. Have %d, need %d." %
                    (len(self.allocatedtasks), len(self.tasks)))

        logger.info("%d new tasks distributed over %d nodes." %
                    (len(self.allocatedtasks), numnodes))
        self.job_status = "executing"
        self.outbound_work(self.allocatedtasks)

    # In outbound work we update our local "jobs" data with timestamps
    # when they were sent out and send the task data to nodes.
    def outbound_work(self, jobs):
        nodes = {}
        for key, job in jobs.items():
            nodeid = job["nodeid"]
            if nodeid in nodes:
                nodes[nodeid]["cores"].append(job["jobdata"])
            else:
                tasks = {}
                tasklist = []
                job["ts_start"] = time.time()
                job["ts_finish"] = time.time() + 7
                tasklist.append(job["jobdata"])
                tasks["cores"] = tasklist
                tasks["priority"] = self.priority
                nodes[nodeid] = tasks

        for nodeid, tasks in nodes.items():
            msg = remap_utils.pack_msg("%s.jobstart.%s" % (nodeid, self.jobid),
                                       tasks)
            self.forward_to_broker(msg)

    def check_progress(self):
        if self.manager != None:
            if self.manager.module_tracks_progress():
                if not self.manager.check_progress(len(self.tasks)):
                    self.manager.finish()
                    self.manager = None
                    self.job_status = "waiting"
                    logger.info("Vertex job complete")
            else:
                if time.time() - self.last_check <= 4:
                    return
                newtime = time.time()
                kill_list = []
                for key, job in self.allocatedtasks.items():
                    if newtime > job["ts_finish"]:
                        # This job hasn't been updated, probably dead.
                        jobdata = job["jobdata"]
                        # Update tasks with an attempt + 1
                        task = self.tasks[key]
                        task["attempts"] = task["attempts"] + 1
                        nodeid = job["nodeid"]
                        logger.info(
                            "Task %s failed on node %s. Reattempting elsewhere"
                            % (key, nodeid))
                        if task["attempts"] > 4:
                            # 5 attempts so far. let's cancel it.
                            logger.warn(
                                "Task %s failed 5 attempts. Cancelling file to reject."
                                % (key))
                            del self.tasks[key]
                            kill_list.append(key)
                            self.rejectedtasks[key] = task

                for key in kill_list:
                    del self.allocatedtasks[key]

                # Now also check if there are jobs that can be started
                if len(self.tasks) > 0:
                    numnodes, new_allocations = self.planner.distribute_jobs_over_nodes(
                        self.tasks, self.allocatedtasks, self.nodes,
                        self.parallellism)
                    if numnodes > 0:
                        logger.info("%d new tasks distributed over %d nodes" %
                                    (len(new_allocations), numnodes))
                        self.outbound_work(new_allocations)
                        self.allocatedtasks.update(new_allocations)

                if self.job_status == "executing" and len(
                        self.tasks) == 0 and len(self.allocatedtasks) == 0:
                    # finished all work
                    self.job_status = "waiting"
                    self.manager.finish()
                    self.manager = None
                    logger.info(
                        "%d jobs left, %d jobs committed, %d jobs complete, %d jobs failed."
                        % (len(self.tasks), len(self.allocatedtasks),
                           len(self.completedtasks), len(self.rejectedtasks)))

                if self.job_status == "preparing" and time.time(
                ) - self.prepare_start > 5:
                    # over 5 seconds? quit it
                    self.job_status = "waiting"
                    if self.manager != None:
                        self.manager.finish()
                        self.manager = None
                    logger.info("Cancelled job in progress.")
        else:
            # no manager.
            if self.job_status != "waiting":
                self.job_status = "waiting"
                logger.info("Resolving inconsistent state.")

        self.last_check = time.time()

    # -------
    # Node management
    # -------
    def refresh_nodes(self, priority):
        self.nodes = {}
        self.priority = priority
        self.refreshed = time.time()
        msg = remap_utils.pack_msg("local.showhands.%s" % (self.jobid),
                                   {"priority": self.priority})
        self.forward_to_broker(msg)

Пример #7

Показать файл

Файл: module_vertex.py Проект: lmhtz/remap

class Vertex(WorkerBase):
    def __init__( self, app, appconfig, workdata ):
        WorkerBase.__init__( self, app, appconfig, workdata )
        self.surveyorname = workdata["hostname"]
        self.vsub = nn.Socket( nn.SUB, domain=nn.AF_SP )
        self.vpub = nn.Socket( nn.PUB, domain=nn.AF_SP )
        self.broker_address = None

        self.bonjour = BonjourResolver( "_vertexremap._tcp", self.cb_broker_changed )
        self.bonjour.start()

        inputfile = os.path.join( self.remaproot, "data", self.workdata["inputfile"] )
        outputdir = os.path.join( self.remaproot, "job", self.jobid, "part" )

        self.input = self.app.create_vertex_reader( inputfile )
        self.outputdir = outputdir
        self.partitions = {}

        self.mode = MODE_IDLE

        self.surveyor = nn.Socket( nn.RESPONDENT )
        self.surveyor.connect( "tcp://%s:8688"%(self.surveyorname) )
        # 6 seconds
        self.surveyor.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, 50 )
        self.vertices = {}

        logger.info("Waiting to get vertex broker host from bonjour")

        self.ready = False

    def cb_broker_changed( self, broker_address ):
        logger.info("Received vertex broker address: %s"%(broker_address) )
        if self.broker_address != None:
            return

        self.broker_address = broker_address

        # vertex broker pub and sub
        self.vpubc = self.vpub.connect( "tcp://%s:8689"%(self.broker_address) )
        self.vsubc = self.vsub.connect( "tcp://%s:8690"%(self.broker_address) )

        # 2 seconds max
        self.vsub.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, 2000 )

        logger.info("Vertex broker setup complete")

        for value in self.input.read():
            key, vertex = self.app.prepare( value )
            if key == None or vertex == None:
                continue
            # Store vertex by id in dict with 2 lists for messages
            self.vertices[ key ] = (vertex, [], [])
            self.vsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, key )

        logger.info("Ready for processing")

        self.ready = True

    def module_manages_progress( self ):
        return True

    def result( self ):
        return "complete", {"inputfile":self.workdata["inputfile"]}

    def forward( self, id, msg ):
        # Forward to vertex broker
        self.vpub.send( id + " " + msg )

    def subscribe( self, topic ):
        self.vsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, topic)

    def unsubscribe( self, topic ):
        self.vsub.set_string_option( nn.SUB, nn.SUB_UNSUBSCRIBE, topic)

    # This function performs actual work. The *state* is in the initiator daemon only,
    # so a worker is directly responsive to whatever the surveyor tells the worker to do.
    def work( self ):
        if not self.ready:
            return True

        surveyormsg = None
        try:
            surveyormsg = remap_utils.decode( self.surveyor.recv() )
        except nn.NanoMsgAPIError as e:
            return True

        if surveyormsg[0] == 'S':
            # Shift messages
            if self.mode != MODE_MSGS:
                self.mode = MODE_MSGS
                # We haven't done this in a previous step. Due to recovery, it might be 
                # used by the initiator to get others up to speed.
                for key, (vertex,messages,messagesNext) in self.vertices.items():
                    self.vertices[ key ] = ( vertex, messagesNext, [] )
            self.surveyor.send( "D" )
            return True

        if surveyormsg[0] == 'H':
            self.mode = MODE_HALT
            logger.info("Halting core.")
            self.surveyor.close()
            return False

        if surveyormsg[0] == 'P':
            if self.mode != MODE_PROCESS:
                # First time in this state, we need to grab all messages and
                # allocate them to vertex queue
                self.mode = MODE_PROCESS
                logger.info("Processing messages 1")
                while True:
                    try:
                        msg = self.vsub.recv()
                        prefix, data = remap_utils.unpack_msg( msg )
                        if prefix in self.vertices:
                            # This vertex is indeed on this host. Add the message to its new msg list for next iteration
                            vertex, messages, messagesNext = self.vertices[ prefix ]
                            messagesNext.append( data )
                    except nn.NanoMsgAPIError as e:
                        logger.error( "No more messages available." )
                        break
            else:
                logger.info("Processing messages 2")
                # doing things twice does not make a difference. Second time around, just throw away all messages
                while True:
                    try:
                        msg = self.vsub.recv()
                        print("Received and thrown away: ", msg)
                    except nn.NanoMsgAPIError as e:
                        logger.error( "No more messages available." )
                        break
            self.surveyor.send( "D" )
            return True

        self.mode = MODE_RUN
        self.superstep = int(surveyormsg)
        mainHalt = True

        for key, (vertex,messages,messagesNext) in self.vertices.items():
            vertex, halt = self.app.compute( self.forward, self.subscribe, self.unsubscribe, self.superstep, vertex, messages )
    
            if vertex != None:
                # Store the new vertex object in its place, maintaining the messagesNext list as we know it
                self.vertices[ key ] = (vertex,[],messagesNext)
            if not halt:
                mainHalt = False

        if mainHalt:
            self.surveyor.send( "H" )
        else:
            self.surveyor.send( "D" )
        return True

Пример #8

Показать файл

Файл: node_daemon.py Проект: dprop-developers/remap

class NodeDaemon( object ):
    def __init__(self, remaproot):
        self.remaproot = remaproot
        self.cores = {}
        self.broker_address = "unknown"
        self.brokerChanged = False
        self.bsub = None
        self.bpub = None
        self.tot_m_rcv = 0
        self.hw = NodeHardware()
        self.nodeid = remap_utils.node_id()
        self.bonjour = BonjourResolver( "_remap._tcp", self.cb_broker_changed )
        self.bonjour.start()
        self.coresChanged = False

    # Create a bi-directional communication channel, where the node daemon 
    # 'shouts' in the room even to contact a single core, but the core only
    # sends written messages back to the shouter with the megaphone.
    # (embarassing protocol).
    def setup_bus( self ):
        self.lsub = nn.Socket( nn.SUB )
        self.lsub.bind("ipc:///tmp/node_pub.ipc")
        self.lsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "" )
        self.lpub = nn.Socket( nn.PUB )
        self.lpub.bind("ipc:///tmp/node_sub.ipc")

    def apply_timeouts( self ):
        if self.bsub == None:
            rcv_timeout = 100
            self.lsub.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout )     
        else:
            rcv_timeout = 100
            self.bsub.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout )
            rcv_timeout = 0
            self.lsub.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout )     

    def cb_broker_changed( self, broker_address ):
        logger.info("Received new broker address: %s"%(broker_address) )
        self.broker_address = broker_address
        self.brokerChanged = True

    def setup_broker( self ):
        self.brokerChanged = False
        if self.bsub != None:
            self.bsub.close()
            self.bsub = None

        self.apply_timeouts()

        if self.broker_address == "unknown":
            logger.error("Deferring broker setup as address is still unknown.")
            return

        self.bsub = nn.Socket( nn.SUB )
        self.bsub.connect( "tcp://%s:8687"%( self.broker_address ))
        self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "global")
        self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "local")
        self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "notlocal")
        self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, self.nodeid)
        self.apply_timeouts()

        self.bpub = nn.Socket( nn.PUB )
        self.bpub.connect( "tcp://%s:8686"%( self.broker_address ))

        logger.info("Broker setup complete")

    def process_bus_messages( self ):
        try:
            msg = self.lsub.recv()
            msgprefix, data = remap_utils.unpack_msg( msg )

            if len(msgprefix) == 0:
                return True

            recipientid,msgtype,senderid = remap_utils.split_prefix(msgprefix)

            if msgtype[0] == '_':
                # node message
                self.process_core_message( msgtype, senderid, data )
            elif msgtype == "corestatus":
                if senderid in self.cores:                
                    coredata = self.cores[ senderid ]
                    coredata["ts_last_seen"] = time.time()
                    coredata["progress"] = data["progress"]
                    logger.info("Core %s progressed %d"%( senderid, coredata["progress"] ))
                    self.forward_to_broker( msg )
            elif msgtype == "complete":
                if senderid in self.cores:                
                    coredata = self.cores[ senderid ]
                    logger.info("Core %s completed the job"%( senderid ))
                    self.forward_to_broker( msg )
                    del self.cores[ senderid ]
                    self.coresChanged = True
            else:
                # forward to broker instead
                self.forward_to_broker( msg )             
            return True
        except nn.NanoMsgAPIError as e:
            return False

    def process_core_message( self, msgtype, senderid, data ):
        if msgtype == "_hello":
            self.process_hello( data )
        if msgtype == "_todo":
            self.process_todo( senderid, data )
        if msgtype == "_status":
            self.process_status( senderid, data )
        if msgtype == "_sub":
            self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, data["prefix"])
        if msgtype == "_unsub":
            self.bsub.set_string_option( nn.SUB, nn.SUB_UNSUBSCRIBE, data["prefix"])

    def forward_to_broker( self, msg ):
        if self.bpub != None:
            try:
                self.bpub.send( msg )
            except nn.NanoMsgAPIError as e:
                pass

    # This processes a message where a core is announcing itself and wants to 
    # get a core id to start existing on the network    
    def process_hello( self, data ):
        msgid = remap_utils.safe_get(data, "msgid")
        pid = remap_utils.safe_get(data, "pid")
        priority = remap_utils.safe_get( data, "priority" )
        coreid = remap_utils.core_id( self.nodeid, pid )
        self.cores[ coreid ] = {"coreid":coreid,"ts_last_seen":time.time(),"progress":-1,"pid":pid,"priority":priority}
        msg = remap_utils.pack_msg( "%s._hey.%s"%(coreid, self.nodeid), {"msgid":msgid,"coreid":coreid} )
        logger.info( "A core registered %s"%( coreid ))
        self.lpub.send( msg )

    def process_todo( self, senderid, data ):
        coredata = self.cores[ senderid ]
        work = self.hw.grab_work_item()
        if work != None:
            msg = remap_utils.pack_msg( "%s._work.%s"%(senderid, self.nodeid), work )
            logger.info( "A core was given some work to do: %s"%( senderid ))
            self.lpub.send( msg )

    def process_status( self, senderid, data ):
        coredata = self.cores[ senderid ]
        coredata["ts_last_seen"] = time.time()

    def process_broker_messages( self ):
        if self.bsub == None:
            # No broker is known yet.
            if self.brokerChanged:
                logger.info("The broker configuration changed.")
                self.setup_broker()
                if self.bsub == None:
                    logger.info("Failed broker setup.")
                    return False
            else:              
                return False

        try:
            # Grab next msg from broker if any
            msg = self.bsub.recv()
            self.tot_m_rcv = self.tot_m_rcv + 1
            if msg == None or len(msg)==0:
                return False

            msgprefix, data = remap_utils.unpack_msg( msg )
            recipientid,msgtype,senderid = remap_utils.split_prefix(msgprefix)
            if msgtype == "showhands":
                self.handle_showhands( recipientid, senderid, data )
            elif msgtype == "jobstart":
                #if recipientid == self.nodeid:
                self.handle_jobstart( recipientid, senderid, data )
            else:
                # Forward to all cores for their processing.
                self.lpub.send(msg)
            return True
        except nn.NanoMsgAPIError as e:
            return False

    def purge_inactive_cores( self, new_ts ):
        kill_list = []
        for key, coredata in self.cores.items():
            last_ts = coredata["ts_last_seen"]
            if (new_ts - last_ts) > remap_constants.THR_STATUS_DELAY:
                logger.info("Core %s missed a status report."%( key ))
            if (new_ts - last_ts) > remap_constants.MAX_STATUS_DELAY:
                logger.info("Core %s is considered dead."%( key ))
                kill_list.append( key )
                # Add code here to kill core just in case.

        for key in kill_list:                
            del self.cores[ key ]

    def maybe_send_status( self ):
        if self.coresChanged:
            self.handle_showhands( "tracker", "unknown", { "priority":0 } )
            self.coresChanged = False

    # Request re-registration of existing core processes currently on the bus
    # allows failover restart of this node daemon.
    def req_registration( self ):
        msg = remap_utils.pack_msg( "node._plzreg.%s"%(self.nodeid), {} )
        self.lpub.send( msg )

    # Some app initiator requests processing capacity
    def handle_showhands( self, recipientid, senderid, data ):
        avail, interruptable = self.hw.available_cpus( remap_utils.safe_get( data, "priority" ), self.cores )
        if avail > 0 or interruptable > 0:
            logger.info( "Volunteering with %d cores, %d interruptable"%( avail, interruptable ))
            msg = remap_utils.pack_msg( "tracker.raisehand.%s"%( self.nodeid ), {"free":avail,"interruptable":interruptable} ) 
            self.forward_to_broker( msg )

    # Some app initiator wants this node to start work
    def handle_jobstart( self, recipientid, senderid, data ):
        avail, interruptable = self.hw.available_cpus( remap_utils.safe_get( data, "priority" ), self.cores )
        numcores = len(remap_utils.safe_get( data, "cores" ))
        if (avail + interruptable) >= numcores:
            logger.info("Starting job with %d cores"%( numcores ))
            if not self.hw.start_job( self.remaproot, senderid, numcores, data ):
                logger.error("Error starting job")
        else:
            # Something changed in the meantime. Reject
            logger.info( "Initiator requested %d cores, %d can be committed. Rejecting"%( numcores, avail + interruptable ))
            msg = remap_utils.pack_msg( "%s.rejectjob.%s"%( senderid, self.nodeid ), {} ) 
            self.forward_to_broker( msg )

        self.coresChanged = True

Пример #9

Показать файл

class NodeDaemon(object):
    def __init__(self, remaproot):
        self.remaproot = remaproot
        self.cores = {}
        self.broker_address = "unknown"
        self.brokerChanged = False
        self.bsub = None
        self.bpub = None
        self.tot_m_rcv = 0
        self.hw = NodeHardware()
        self.nodeid = remap_utils.node_id()
        self.bonjour = BonjourResolver("_remap._tcp", self.cb_broker_changed)
        self.bonjour.start()
        self.coresChanged = False

    # Create a bi-directional communication channel, where the node daemon
    # 'shouts' in the room even to contact a single core, but the core only
    # sends written messages back to the shouter with the megaphone.
    # (embarassing protocol).
    def setup_bus(self):
        self.lsub = nn.Socket(nn.SUB)
        self.lsub.bind("ipc:///tmp/node_pub.ipc")
        self.lsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "")
        self.lpub = nn.Socket(nn.PUB)
        self.lpub.bind("ipc:///tmp/node_sub.ipc")

    def apply_timeouts(self):
        if self.bsub == None:
            rcv_timeout = 100
            self.lsub.set_int_option(nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout)
        else:
            rcv_timeout = 100
            self.bsub.set_int_option(nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout)
            rcv_timeout = 0
            self.lsub.set_int_option(nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout)

    def cb_broker_changed(self, broker_address):
        logger.info("Received new broker address: %s" % (broker_address))
        self.broker_address = broker_address
        self.brokerChanged = True

    def setup_broker(self):
        self.brokerChanged = False
        if self.bsub != None:
            self.bsub.close()
            self.bsub = None

        self.apply_timeouts()

        if self.broker_address == "unknown":
            logger.error("Deferring broker setup as address is still unknown.")
            return

        self.bsub = nn.Socket(nn.SUB)
        self.bsub.connect("tcp://%s:8687" % (self.broker_address))
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "global")
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "local")
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "notlocal")
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, self.nodeid)
        self.apply_timeouts()

        self.bpub = nn.Socket(nn.PUB)
        self.bpub.connect("tcp://%s:8686" % (self.broker_address))

        logger.info("Broker setup complete")

    def process_bus_messages(self):
        try:
            msg = self.lsub.recv()
            msgprefix, data = remap_utils.unpack_msg(msg)

            if len(msgprefix) == 0:
                return True

            recipientid, msgtype, senderid = remap_utils.split_prefix(
                msgprefix)

            if msgtype[0] == '_':
                # node message
                self.process_core_message(msgtype, senderid, data)
            elif msgtype == "corestatus":
                if senderid in self.cores:
                    coredata = self.cores[senderid]
                    coredata["ts_last_seen"] = time.time()
                    coredata["progress"] = data["progress"]
                    logger.info("Core %s progressed %d" %
                                (senderid, coredata["progress"]))
                    self.forward_to_broker(msg)
            elif msgtype == "complete":
                if senderid in self.cores:
                    coredata = self.cores[senderid]
                    logger.info("Core %s completed the job" % (senderid))
                    self.forward_to_broker(msg)
                    del self.cores[senderid]
                    self.coresChanged = True
            else:
                # forward to broker instead
                self.forward_to_broker(msg)
            return True
        except nn.NanoMsgAPIError as e:
            return False

    def process_core_message(self, msgtype, senderid, data):
        if msgtype == "_hello":
            self.process_hello(data)
        if msgtype == "_todo":
            self.process_todo(senderid, data)
        if msgtype == "_status":
            self.process_status(senderid, data)
        if msgtype == "_sub":
            self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE,
                                        data["prefix"])
        if msgtype == "_unsub":
            self.bsub.set_string_option(nn.SUB, nn.SUB_UNSUBSCRIBE,
                                        data["prefix"])

    def forward_to_broker(self, msg):
        if self.bpub != None:
            try:
                self.bpub.send(msg)
            except nn.NanoMsgAPIError as e:
                pass

    # This processes a message where a core is announcing itself and wants to
    # get a core id to start existing on the network
    def process_hello(self, data):
        msgid = remap_utils.safe_get(data, "msgid")
        pid = remap_utils.safe_get(data, "pid")
        priority = remap_utils.safe_get(data, "priority")
        coreid = remap_utils.core_id(self.nodeid, pid)
        self.cores[coreid] = {
            "coreid": coreid,
            "ts_last_seen": time.time(),
            "progress": -1,
            "pid": pid,
            "priority": priority
        }
        msg = remap_utils.pack_msg("%s._hey.%s" % (coreid, self.nodeid), {
            "msgid": msgid,
            "coreid": coreid
        })
        logger.info("A core registered %s" % (coreid))
        self.lpub.send(msg)

    def process_todo(self, senderid, data):
        coredata = self.cores[senderid]
        work = self.hw.grab_work_item()
        if work != None:
            msg = remap_utils.pack_msg("%s._work.%s" % (senderid, self.nodeid),
                                       work)
            logger.info("A core was given some work to do: %s" % (senderid))
            self.lpub.send(msg)

    def process_status(self, senderid, data):
        coredata = self.cores[senderid]
        coredata["ts_last_seen"] = time.time()

    def process_broker_messages(self):
        if self.bsub == None:
            # No broker is known yet.
            if self.brokerChanged:
                logger.info("The broker configuration changed.")
                self.setup_broker()
                if self.bsub == None:
                    logger.info("Failed broker setup.")
                    return False
            else:
                return False

        try:
            # Grab next msg from broker if any
            msg = self.bsub.recv()
            self.tot_m_rcv = self.tot_m_rcv + 1
            if msg == None or len(msg) == 0:
                return False

            msgprefix, data = remap_utils.unpack_msg(msg)
            recipientid, msgtype, senderid = remap_utils.split_prefix(
                msgprefix)
            if msgtype == "showhands":
                self.handle_showhands(recipientid, senderid, data)
            elif msgtype == "jobstart":
                #if recipientid == self.nodeid:
                self.handle_jobstart(recipientid, senderid, data)
            else:
                # Forward to all cores for their processing.
                self.lpub.send(msg)
            return True
        except nn.NanoMsgAPIError as e:
            return False

    def purge_inactive_cores(self, new_ts):
        kill_list = []
        for key, coredata in self.cores.items():
            last_ts = coredata["ts_last_seen"]
            if (new_ts - last_ts) > remap_constants.THR_STATUS_DELAY:
                logger.info("Core %s missed a status report." % (key))
            if (new_ts - last_ts) > remap_constants.MAX_STATUS_DELAY:
                logger.info("Core %s is considered dead." % (key))
                kill_list.append(key)
                # Add code here to kill core just in case.

        for key in kill_list:
            del self.cores[key]

    def maybe_send_status(self):
        if self.coresChanged:
            self.handle_showhands("tracker", "unknown", {"priority": 0})
            self.coresChanged = False

    # Request re-registration of existing core processes currently on the bus
    # allows failover restart of this node daemon.
    def req_registration(self):
        msg = remap_utils.pack_msg("node._plzreg.%s" % (self.nodeid), {})
        self.lpub.send(msg)

    # Some app initiator requests processing capacity
    def handle_showhands(self, recipientid, senderid, data):
        avail, interruptable = self.hw.available_cpus(
            remap_utils.safe_get(data, "priority"), self.cores)
        if avail > 0 or interruptable > 0:
            logger.info("Volunteering with %d cores, %d interruptable" %
                        (avail, interruptable))
            msg = remap_utils.pack_msg("tracker.raisehand.%s" % (self.nodeid),
                                       {
                                           "free": avail,
                                           "interruptable": interruptable
                                       })
            self.forward_to_broker(msg)

    # Some app initiator wants this node to start work
    def handle_jobstart(self, recipientid, senderid, data):
        avail, interruptable = self.hw.available_cpus(
            remap_utils.safe_get(data, "priority"), self.cores)
        numcores = len(remap_utils.safe_get(data, "cores"))
        if (avail + interruptable) >= numcores:
            logger.info("Starting job with %d cores" % (numcores))
            if not self.hw.start_job(self.remaproot, senderid, numcores, data):
                logger.error("Error starting job")
        else:
            # Something changed in the meantime. Reject
            logger.info(
                "Initiator requested %d cores, %d can be committed. Rejecting"
                % (numcores, avail + interruptable))
            msg = remap_utils.pack_msg(
                "%s.rejectjob.%s" % (senderid, self.nodeid), {})
            self.forward_to_broker(msg)

        self.coresChanged = True

Пример #10

Показать файл

Файл: initiator.py Проект: dprop-developers/remap

class Initiator(Monitor):
    def __init__(self, rootdir):
        Monitor.__init__(self, rootdir)
        self.remaproot = rootdir
        self.broker_address = "unknown"
        self.brokerChanged = False
        self.bsub = None
        self.bpub = None
        self.bonjour = BonjourResolver("_remap._tcp", self.cb_broker_changed)
        self.bonjour.start()
        self.jobid = None
        self.refreshed = 0
        self.job_status = "waiting"
        self.rejectedtasks = {}
        self.completedtasks = {}
        self.tasks = {}
        self.allocatedtasks = {}
        self.jobtype = "not_started"
        self.priority = 0
        self.parallellism = 1
        self.manager = None
        self.last_check = time.time()

    def load_plugin(self, name):
        try:
            mod = __import__("module_%s" % name)
            return mod
        except ImportError as ie:
            raise RemapException("No such worker type: %s" % (name))

    # -------
    # Broker handling
    # -------

    def setup_broker(self):
        self.brokerChanged = False
        if self.bsub != None:
            self.bsub.close()
            self.bsub = None

        self.apply_timeouts()

        if self.broker_address == "unknown":
            logger.error("Deferring broker setup as address is still unknown.")
            return

        self.bsub = nn.Socket(nn.SUB)
        self.bsub.connect("tcp://%s:8687" % (self.broker_address))
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "global")
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "local")
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "notlocal")
        if self.jobid != None:
            self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, self.jobid)
        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, "tracker")
        self.apply_timeouts()

        self.bpub = nn.Socket(nn.PUB)
        self.bpub.connect("tcp://%s:8686" % (self.broker_address))

        logger.info("Broker setup complete")

    def apply_timeouts(self):
        if self.bsub != None:
            rcv_timeout = 100
            self.bsub.set_int_option(nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout)

    def cb_broker_changed(self, broker_address):
        logger.info("Received new broker address: %s" % (broker_address))
        self.broker_address = broker_address
        self.brokerChanged = True

    def forward_to_broker(self, msg):
        if self.bpub != None:
            try:
                self.bpub.send(msg)
            except nn.NanoMsgAPIError as e:
                pass

    def process_broker_messages(self):
        if self.bsub == None:
            # No broker is known yet.
            if self.brokerChanged:
                logger.info("The broker configuration changed.")
                self.setup_broker()
                if self.bsub == None:
                    logger.info("Failed broker setup.")
                    return False
            else:
                return False

        try:
            # Grab next msg from broker if any
            msg = self.bsub.recv()
            if msg != None and len(msg) > 0:
                msgprefix, data = remap_utils.unpack_msg(msg)
                recipientid, msgtype, senderid = remap_utils.split_prefix(msgprefix)
                if msgtype == "complete":
                    self.update_corecomplete(recipientid, senderid, data)
                if msgtype == "corestatus":
                    self.update_corestatus(recipientid, senderid, data)
                if msgtype == "raisehand":
                    self.update_hands(recipientid, senderid, data)
                return True
            else:
                return False
        except nn.NanoMsgAPIError as e:
            return False

    # -------
    # Messaging handling
    # -------

    def update_corestatus(self, recipientid, senderid, data):
        if self.manager != None:
            key = self.manager.get_work_key(data)
            if key in self.allocatedtasks:
                job = self.allocatedtasks[key]
                job["ts_finish"] = time.time() + 7

    def update_corecomplete(self, recipientid, senderid, data):
        if self.manager != None:
            key = self.manager.get_work_key(data)
            logger.info("Job %s completed." % (key))
            if key in self.allocatedtasks:
                job = self.allocatedtasks[key]
                task = self.tasks[key]
                self.completedtasks[key] = task
                del self.tasks[key]
                del self.allocatedtasks[key]
                logger.info(
                    "%d tasks left, %d tasks committed, %d tasks complete, %d tasks failed."
                    % (len(self.tasks), len(self.allocatedtasks), len(self.completedtasks), len(self.rejectedtasks))
                )

    def update_hands(self, recipientid, senderid, data):
        # "%s.raisehand.%s"%( senderid, self.nodeid ), {"cores":3,"interruptable":0}
        if senderid in self.nodes:
            self.nodes[senderid]["avail"] = data
        else:
            self.nodes[senderid] = {}
            self.nodes[senderid]["avail"] = data

    # -------
    # Job management
    # -------

    def start_job(self, jobdata):
        if self.job_status != "waiting":
            raise RemapException("A job is currently in progress on this monitor")

        if "type" not in jobdata:
            raise RemapException("Must have job type specified")
        if "priority" not in jobdata:
            raise RemapException("Must have priority specified")
        if "parallellism" not in jobdata:
            raise RemapException("Must have parallellism specified")

        self.job_status = "preparing"
        self.prepare_start = time.time()

        self.jobtype = jobdata["type"]
        self.priority = jobdata["priority"]
        self.parallellism = jobdata["parallellism"]
        plugin = self.load_plugin(self.jobtype)
        self.rejectedtasks = {}
        self.completedtasks = {}

        if self.jobid != None:
            self.bsub.set_string_option(nn.SUB, nn.SUB_UNSUBSCRIBE, self.jobid)

        if "jobid" in jobdata:
            self.jobid = jobdata["jobid"]
            del jobdata["jobid"]
        else:
            self.jobid = remap_utils.unique_id()

        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, self.jobid)

        if "app" not in jobdata:
            raise RemapException("The name of the app must be provided")

        if jobdata["app"] not in self.list_apps():
            raise RemapException("No such application: %s" % (jobdata["app"]))

        config = {"jobid": self.jobid, "remaproot": self.remaproot}

        logger.info("Started a new job: %s" % (self.jobid))
        self.manager = plugin.create_manager(jobdata, config)

        if (time.time() - self.refreshed) > 60:
            # Not refreshed > 60s
            self.refresh_nodes(self.priority)
            # Wait for a bunch of nodes to advertise themselves
            r = Timer(1.0, self.resume, ())
            r.start()
        else:
            self.resume()

        return {"jobid": self.jobid}

    def resume(self):
        self.manager.prepare()

        logger.info("Starting a %s job" % (self.jobtype))

        self.planner = JobPlanner(self.manager.config_file)
        self.tasks = self.manager.plan_jobs(self.planner)

        logger.info("Found %d tasks to execute" % (len(self.tasks)))

        numnodes, self.allocatedtasks = self.planner.distribute_jobs_over_nodes(
            self.tasks, {}, self.nodes, self.parallellism
        )
        if len(self.allocatedtasks) == 0:
            logger.error("No nodes found to distribute the tasks.")
            self.job_status = "waiting"
            return

        if self.manager.all_hands_on_deck():
            if len(self.allocatedtasks) != len(self.tasks):
                raise RemapException(
                    "Not enough cores available. Have %d, need %d." % (len(self.allocatedtasks), len(self.tasks))
                )

        logger.info("%d new tasks distributed over %d nodes." % (len(self.allocatedtasks), numnodes))
        self.job_status = "executing"
        self.outbound_work(self.allocatedtasks)

    # In outbound work we update our local "jobs" data with timestamps
    # when they were sent out and send the task data to nodes.
    def outbound_work(self, jobs):
        nodes = {}
        for key, job in jobs.items():
            nodeid = job["nodeid"]
            if nodeid in nodes:
                nodes[nodeid]["cores"].append(job["jobdata"])
            else:
                tasks = {}
                tasklist = []
                job["ts_start"] = time.time()
                job["ts_finish"] = time.time() + 7
                tasklist.append(job["jobdata"])
                tasks["cores"] = tasklist
                tasks["priority"] = self.priority
                nodes[nodeid] = tasks

        for nodeid, tasks in nodes.items():
            msg = remap_utils.pack_msg("%s.jobstart.%s" % (nodeid, self.jobid), tasks)
            self.forward_to_broker(msg)

    def check_progress(self):
        if self.manager != None:
            if self.manager.module_tracks_progress():
                if not self.manager.check_progress(len(self.tasks)):
                    self.manager.finish()
                    self.manager = None
                    self.job_status = "waiting"
                    logger.info("Vertex job complete")
            else:
                if time.time() - self.last_check <= 4:
                    return
                newtime = time.time()
                kill_list = []
                for key, job in self.allocatedtasks.items():
                    if newtime > job["ts_finish"]:
                        # This job hasn't been updated, probably dead.
                        jobdata = job["jobdata"]
                        # Update tasks with an attempt + 1
                        task = self.tasks[key]
                        task["attempts"] = task["attempts"] + 1
                        nodeid = job["nodeid"]
                        logger.info("Task %s failed on node %s. Reattempting elsewhere" % (key, nodeid))
                        if task["attempts"] > 4:
                            # 5 attempts so far. let's cancel it.
                            logger.warn("Task %s failed 5 attempts. Cancelling file to reject." % (key))
                            del self.tasks[key]
                            kill_list.append(key)
                            self.rejectedtasks[key] = task

                for key in kill_list:
                    del self.allocatedtasks[key]

                # Now also check if there are jobs that can be started
                if len(self.tasks) > 0:
                    numnodes, new_allocations = self.planner.distribute_jobs_over_nodes(
                        self.tasks, self.allocatedtasks, self.nodes, self.parallellism
                    )
                    if numnodes > 0:
                        logger.info("%d new tasks distributed over %d nodes" % (len(new_allocations), numnodes))
                        self.outbound_work(new_allocations)
                        self.allocatedtasks.update(new_allocations)

                if self.job_status == "executing" and len(self.tasks) == 0 and len(self.allocatedtasks) == 0:
                    # finished all work
                    self.job_status = "waiting"
                    self.manager.finish()
                    self.manager = None
                    logger.info(
                        "%d jobs left, %d jobs committed, %d jobs complete, %d jobs failed."
                        % (len(self.tasks), len(self.allocatedtasks), len(self.completedtasks), len(self.rejectedtasks))
                    )

                if self.job_status == "preparing" and time.time() - self.prepare_start > 5:
                    # over 5 seconds? quit it
                    self.job_status = "waiting"
                    if self.manager != None:
                        self.manager.finish()
                        self.manager = None
                    logger.info("Cancelled job in progress.")
        else:
            # no manager.
            if self.job_status != "waiting":
                self.job_status = "waiting"
                logger.info("Resolving inconsistent state.")

        self.last_check = time.time()

    # -------
    # Node management
    # -------
    def refresh_nodes(self, priority):
        self.nodes = {}
        self.priority = priority
        self.refreshed = time.time()
        msg = remap_utils.pack_msg("local.showhands.%s" % (self.jobid), {"priority": self.priority})
        self.forward_to_broker(msg)