def agent(num, input): """ A thread that will check resource availability and information """ logger.info("Agent %s starting" % num) while True: resource = input.get() # resource hostname ''' Load node information from the testbed/facility DB (e.g. with SFA or local API) We get from there the node status ''' node = Query('Nodes').hostname(resource).execute().first() if not node: logger.info("Node disappeared : %s", (node.hostname)) if not node.enabled: #logger.info("(%s) %s is not enabled" % (node.boot, node.hostname)) availability = 0 status = "disabled" elif not node.is_running(): #logger.info(" (%s) %s is not running" % (node.boot, node.hostname)) availability = 0 status = "down" else: # if not r: # print "+=> (%s) %s is not accessible" % (node.boot, node.hostname) # availability = 0 # status = "no access" # else : # print "+=> (%s) %s is ok" % (node.boot, node.hostname) availability = 1 status = "up" #updates info about the node (testing) # d.info_resource(node.hostname, { # #'ipv4' : node.ip(4), # 'ipv6' : node.ip(6), # }) ''' Node access status: e.g. ssh, we try to do a setup on the node and report the result We try also with nodes that are marked as disabled or not working anyway ''' result = r.setup(resource) print result if not result['status'] : logger.info("%s : Failed SSH access (%s)" % (resource, result['message'])) else : logger.info("%s : Setup complete" % (resource)) s.resource({ "hostname": node.hostname, "state": node.boot_state, "access" : result })
def process_job(num, input): """ This worker will try to check for resource availability """ logger.info("Agent %s starting" % num) try : c = r.connect(host=Config.rethinkdb["host"], port=Config.rethinkdb["port"], db=Config.rethinkdb['db']) except r.RqlDriverError : logger.error("Can't connect to RethinkDB") raise SystemExit("Can't connect to RethinkDB") while True: job = input.get() logger.info("Agent %s processing job %s" % (num, job)) j = r.table('jobs').get(job).run(c) logger.info("Job: %s" % (j,)) r.table('jobs').get(job).update({ 'started': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'jobstatus': 'running', 'message': 'executing job' }).run(c) result = remote.setup(j['node']) if not result['status'] : logger.info("%s : Failed SSH access (%s)" % (j['node'], result['message'])) upd = { 'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'jobstatus': 'error', 'message': 'node not reachable', 'returnstatus': 1, 'stdout': '', 'stderr': result['message'] } else : if not 'arg' in j['parameters']: j['parameters']['arg'] = "" if j['command'] == 'ping': command = 'ping' remote_command = '%s.py %s %s' % (command, j['parameters']['arg'], j['parameters']['dst']) try: ret = remote_worker(j['node'], remote_command) except Exception, msg: logger.error("EXEC error: %s" % (msg,)) upd = { 'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'jobstatus': 'error', 'message': 'job error', 'returnstatus': 1, 'stdout': '', 'stderr': "execution error %s" % (msg) } logger.error("execution error %s" % (msg)) else: upd = { 'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'jobstatus': ret['jobstatus'], 'message': ret['message'], 'returnstatus': ret['returnstatus'], 'stdout': ret['stdout'], 'stderr': ret['stderr'] } logger.info("Command executed, result: %s" % (upd)) elif j['command'] == 'traceroute': command = 'traceroute' remote_command = '%s.py %s %s' % (command, j['parameters']['arg'], j['parameters']['dst']) try: ret = remote_worker(j['node'], remote_command) except Exception, msg: logger.error("EXEC error: %s" % (msg,)) upd = { 'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'jobstatus': 'error', 'message': 'job error', 'returnstatus': 1, 'stdout': '', 'stderr': "execution error %s" % (msg) } logger.error("execution error %s" % (msg)) else: upd = { 'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'jobstatus': ret['jobstatus'], 'message': ret['message'], 'returnstatus': ret['returnstatus'], 'stdout': ret['stdout'], 'stderr': ret['stderr'] } logger.info("Command executed, result: %s" % (upd))
else: upd = { 'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'jobstatus': ret['jobstatus'], 'message': ret['message'], 'returnstatus': ret['returnstatus'], 'stdout': ret['stdout'], 'stderr': ret['stderr'] } logger.info("Command executed, result: %s" % (upd)) elif j['command'] == 'iperf': ## # setup second node result_dst = remote.setup(j['parameters']['dst']) if not result_dst['status']: logger.error("%s : Failed SSH access (%s)" % (j['parameters']['dst'], result_dst['message'])) upd = { 'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), 'jobstatus': 'error', 'message': 'job error', 'returnstatus': 1, 'stdout': '', 'stderr': "Node %s not responding" % (j['parameters']['dst']) } logger.error("Node %s not responding" % (j['parameters']['dst'])) else: