def print_VOViewLocal(cp): ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) vo_queues = getVoQueues(cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") for vo, queue in vo_queues: ce_unique_id = buildCEUniqueID(cp, ce_name, 'sge', queue) info = { 'ceUniqueID' : ce_unique_id, 'voLocalID' : vo, 'acbr' : 'VO:%s' % vo, 'running' : queue_jobs.get(queue, {}).get(vo, {}).\ get('running', 0), 'waiting' : queue_jobs.get(queue, {}).get(vo, {}).\ get('waiting', 0), #'free_slots' : vo.get(queue, {}).get('free_slots', 0), 'free_slots' : 0, #TODO: fix 'ert' : 3600, 'wrt' : 3600, 'default_se' : getDefaultSE(cp), 'app' : cp_get(cp, "osg_dirs", "app", "/OSG_APP_UNKNOWN"), 'data' : cp_get(cp, "osg_dirs", "data", "/OSG_DATA_UNKNOWN"), } info['total'] = info['waiting'] + info['running'] printTemplate(VOView, info)
def print_VOViewLocal(queue_info, cp): """ Print out the VOView objects for the LSF batch system. One VOView per VO per queue, for each VO which has access to the queue. """ ce_name = cp.get(ce, "name") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") vo_queues = getVoQueues(queue_info, cp) for vo, queue in vo_queues: vo = vo.lower() vo_info = queue_jobs.get(queue, {}) info2 = vo_info.get(vo, {}) ce_unique_id = buildCEUniqueID(cp, ce_name, 'lsf', queue) my_queue_info = queue_info.setdefault(queue, {}) if cp.has_option("lsf", "max_wall"): my_queue_info["max_wall"] = cp_getInt(cp, "lsf", "max_wall", 1440) else: if "max_wall" not in my_queue_info: my_queue_info["max_wall"] = 1440 ert, wrt = responseTimes(cp, info2.get("running", 0), info2.get("waiting", 0), max_job_time=my_queue_info.get("max_wall", 0)) free_slots = my_queue_info.get('free_slots', 0) waiting = info2.get('waiting', 0) if waiting > cp_getInt(cp, 'lsf', 'idle_slack', '10'): free_slots = 0 info = { 'ceUniqueID': ce_unique_id, 'job_slots': my_queue_info.get('job_slots', 0), 'free_slots': free_slots, 'ce_name': ce_name, 'queue': queue, 'vo': vo, 'voLocalID': vo, 'job_manager': 'lsf', 'running': info2.get('running', 0), 'max_running': info2.get('max_running', 0), 'priority': queue_info.get(queue, {}).get('priority', 0), 'waiting': waiting, 'data': cp.get("osg_dirs", "data"), 'app': cp.get("osg_dirs", "app"), 'default_se': getDefaultSE(cp), 'ert': ert, 'wrt': wrt, 'acbr': 'VO:%s' % vo } info['total'] = info['waiting'] + info['running'] printTemplate(VOView, info)
def print_VOViewLocal(queue_info, cp): """ Print out the VOView objects for the LSF batch system. One VOView per VO per queue, for each VO which has access to the queue. """ ce_name = cp.get(ce, "name") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") vo_queues = getVoQueues(queue_info, cp) for vo, queue in vo_queues: vo = vo.lower() vo_info = queue_jobs.get(queue, {}) info2 = vo_info.get(vo, {}) ce_unique_id = buildCEUniqueID(cp, ce_name, 'lsf', queue) my_queue_info = queue_info.setdefault(queue, {}) if cp.has_option("lsf", "max_wall"): my_queue_info["max_wall"] = cp_getInt(cp, "lsf", "max_wall", 1440) else: if "max_wall" not in my_queue_info: my_queue_info["max_wall"] = 1440 ert, wrt = responseTimes(cp, info2.get("running", 0), info2.get("waiting", 0), max_job_time=my_queue_info.get("max_wall", 0)) free_slots = my_queue_info.get('free_slots', 0) waiting = info2.get('waiting', 0) if waiting > cp_getInt(cp, 'lsf', 'idle_slack', '10'): free_slots = 0 info = { 'ceUniqueID' : ce_unique_id, 'job_slots' : my_queue_info.get('job_slots', 0), 'free_slots' : free_slots, 'ce_name' : ce_name, 'queue' : queue, 'vo' : vo, 'voLocalID' : vo, 'job_manager' : 'lsf', 'running' : info2.get('running', 0), 'max_running' : info2.get('max_running', 0), 'priority' : queue_info.get(queue, {}).get('priority', 0), 'waiting' : waiting, 'data' : cp.get("osg_dirs", "data"), 'app' : cp.get("osg_dirs", "app"), 'default_se' : getDefaultSE(cp), 'ert' : ert, 'wrt' : wrt, 'acbr' : 'VO:%s' % vo } info['total'] = info['waiting'] + info['running'] printTemplate(VOView, info)
def print_VOViewLocal(queue_info, cp): ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") vo_queues = getVoQueues(cp) for vo, queue in vo_queues: vo_info = queue_jobs.get(queue, {}) info2 = vo_info.get(vo, {}) port = getPort(cp) ce_unique_id = buildCEUniqueID(cp, ce_name, 'pbs', queue) my_queue_info = queue_info.setdefault(queue, {}) max_job_time = my_queue_info.get("max_wall", 0) if cp.has_option("pbs", "max_wall"): max_job_time = cp_getInt(cp, "pbs", "max_wall", 1440) ert, wrt = responseTimes(cp, info2.get("running", 0), info2.get("wait", 0), max_job_time) free_slots = my_queue_info.get('free_slots', 0) waiting = info2.get('wait', 0) if waiting > cp_getInt(cp, 'pbs', 'idle_slack', '10'): free_slots = 0 info = { 'ceUniqueID' : ce_unique_id, 'job_slots' : my_queue_info.get('job_slots', 0), 'free_slots' : free_slots, 'ce_name' : ce_name, 'queue' : queue, 'vo' : vo, 'voLocalID' : vo, 'job_manager' : 'pbs', 'running' : info2.get('running', 0), 'max_running' : info2.get('max_running', 0), 'priority' : queue_info.get(queue, {}).get('priority', 0), 'waiting' : waiting, 'data' : cp_get(cp, "osg_dirs", "data", "UNKNOWN_DATA"), 'app' : cp_get(cp, "osg_dirs", "app", "UNKNOWN_APP"), 'default_se' : getDefaultSE(cp), 'ert' : 3600, 'wrt' : 3600, 'acbr' : 'VO:%s' % vo } info['total'] = info['waiting'] + info['running'] printTemplate(VOView, info)
def print_VOViewLocal(queue_info, cp): ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") vo_queues = getVoQueues(cp) for vo, queue in vo_queues: vo_info = queue_jobs.get(queue, {}) info2 = vo_info.get(vo, {}) port = getPort(cp) ce_unique_id = buildCEUniqueID(cp, ce_name, "pbs", queue) my_queue_info = queue_info.setdefault(queue, {}) max_job_time = my_queue_info.get("max_wall", 0) if cp.has_option("pbs", "max_wall"): max_job_time = cp_getInt(cp, "pbs", "max_wall", 1440) ert, wrt = responseTimes(cp, info2.get("running", 0), info2.get("wait", 0), max_job_time) free_slots = my_queue_info.get("free_slots", 0) waiting = info2.get("wait", 0) if waiting > cp_getInt(cp, "pbs", "idle_slack", "10"): free_slots = 0 info = { "ceUniqueID": ce_unique_id, "job_slots": my_queue_info.get("job_slots", 0), "free_slots": free_slots, "ce_name": ce_name, "queue": queue, "vo": vo, "voLocalID": vo, "job_manager": "pbs", "running": info2.get("running", 0), "max_running": info2.get("max_running", 0), "priority": queue_info.get(queue, {}).get("priority", 0), "waiting": waiting, "data": cp_get(cp, "osg_dirs", "data", "UNKNOWN_DATA"), "app": cp_get(cp, "osg_dirs", "app", "UNKNOWN_APP"), "default_se": getDefaultSE(cp), "ert": 3600, "wrt": 3600, "acbr": "VO:%s" % vo, } info["total"] = info["waiting"] + info["running"] printTemplate(VOView, info)
def print_VOViewLocal(queue_info, cp): ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") vo_queues = getVoQueues(cp) for vo, queue in vo_queues: vo_info = queue_jobs.get(queue, {}) info2 = vo_info.get(vo, {}) port = getPort(cp) ce_unique_id = buildCEUniqueID(cp, ce_name, 'pbs', queue) my_queue_info = queue_info.setdefault(queue, {}) ert, wrt = responseTimes(cp, info2.get("running", 0), info2.get("wait", 0), max_job_time=my_queue_info.get("max_wall", 0)) free_slots = my_queue_info.get('free_slots', 0) waiting = info2.get('wait', 0) if waiting > 0: free_slots = 0 info = { 'ceUniqueID' : ce_unique_id, 'job_slots' : my_queue_info.get('job_slots', 0), 'free_slots' : free_slots, 'ce_name' : ce_name, 'queue' : queue, 'vo' : vo, 'voLocalID' : vo, 'job_manager' : 'pbs', 'running' : info2.get('running', 0), 'max_running' : info2.get('max_running', 0), 'priority' : queue_info.get(queue, {}).get('priority', 0), 'waiting' : waiting, 'data' : cp_get(cp, "osg_dirs", "data", "UNKNOWN_DATA"), 'app' : cp_get(cp, "osg_dirs", "app", "UNKNOWN_APP"), 'default_se' : getDefaultSE(cp), 'ert' : 3600, 'wrt' : 3600, 'acbr' : 'VO:%s' % vo } info['total'] = info['waiting'] + info['running'] printTemplate(VOView, info)
def print_CE(cp): slurmVersion = getLrmsInfo(cp) queueInfo = getQueueInfo(cp) ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") CE = getTemplate("GlueCE", "GlueCEUniqueID") try: excludeQueues = [i.strip() for i in cp_get(cp, "slurm", "queue_exclude", "").split(",")] except: excludeQueues = [] vo_queues = getVoQueues(cp) for queue, info in queueInfo.items(): if queue in excludeQueues: continue info["lrmsVersion"] = slurmVersion info["job_manager"] = "slurm" # if no jobs are waiting in the queue, set the number of free slots # to (job_slots - running), or the total number of free slots on the cluster, # whichever is less. info["queue"] = queue info["ceName"] = ce_name unique_id = buildCEUniqueID(cp, ce_name, "slurm", queue) ceImpl, ceImplVersion = getCEImpl(cp) port = getPort(cp) info["ceUniqueID"] = unique_id if "job_slots" not in info: log.error("no job_slots found for %s!" % queue) if "priority" not in info: info["priority"] = 0 if "max_running" not in info: log.error("no max_running found for %s!" % queue) if "max_wall" not in info: info["max_wall"] = 1440 info["free_slots"] = 0 if info["wait"] == 0: freeSlots = info["job_slots"] - info["running"] if freeSlots > 0: info["free_slots"] = freeSlots ert, wrt = responseTimes(cp, info.get("running", 0), info.get("wait", 0), max_job_time=info["max_wall"]) info["ert"] = ert info["wrt"] = wrt info["hostingCluster"] = cp_get(cp, ce, "hosting_cluster", ce_name) info["hostName"] = cp_get(cp, ce, "host_name", ce_name) info["ceImpl"] = ceImpl info["ceImplVersion"] = ceImplVersion contact_string = buildContactString(cp, "slurm", queue, unique_id, log) info["contact_string"] = contact_string info["app_dir"] = cp_get(cp, "osg_dirs", "app", "/UNKNOWN_APP") info["data_dir"] = cp_get(cp, "osg_dirs", "data", "/UNKNOWN_DATA") info["default_se"] = getDefaultSE(cp) if "max_waiting" not in info: info["max_waiting"] = 999999 if "max_queuable" in info: info["max_total"] = info["max_queuable"] info["free_slots"] = min(info["free_slots"], info["max_queuable"]) else: info["max_total"] = info["max_waiting"] + info["max_running"] info["free_slots"] = min(info["free_slots"], info["max_total"]) # Enforce invariants: # max_total <= max_running # free_slots <= max_running info["max_total"] = min(info["max_total"], info["max_running"]) info["free_slots"] = min(info["free_slots"], info["max_running"]) info["assigned"] = info["job_slots"] # Enforce invariants: # assigned <= max_running info["assigned"] = min(info["assigned"], info["max_running"]) info["lrmsType"] = "slurm" info["preemption"] = cp_get(cp, "slurm", "preemption", "0") acbr = "" has_vo = False for vo, queue2 in vo_queues: if queue == queue2: acbr += "GlueCEAccessControlBaseRule: VO:%s\n" % vo has_vo = True if not has_vo: continue info["acbr"] = acbr[:-1] info["bdii"] = cp.get("bdii", "endpoint") gramVersion = getGramVersion(cp) info["gramVersion"] = gramVersion info["port"] = port info["waiting"] = info["wait"] info["referenceSI00"] = gip_cluster.getReferenceSI00(cp) info["clusterUniqueID"] = getClusterID(cp) extraCapabilities = "" if cp_getBoolean(cp, "site", "glexec_enabled", False): extraCapabilities = extraCapabilities + "\n" + "GlueCECapability: glexec" htpcRSL, maxSlots = getHTPCInfo(cp, "slurm", queue, log) info["max_slots"] = maxSlots if maxSlots > 1: extraCapabilities = extraCapabilities + "\n" + "GlueCECapability: htpc" info["extraCapabilities"] = extraCapabilities info["htpc"] = htpcRSL printTemplate(CE, info) return queueInfo
def print_CE(cp): """ Print out the GlueCE objects for LSF; one GlueCE per grid queue. """ try: lsfVersion = getLrmsInfo(cp) except: lsfVersion = 'Unknown' log.debug('Using LSF version %s' % lsfVersion) queueInfo = getQueueInfo(cp) try: totalCpu, freeCpu, queueCpus = parseNodes(queueInfo, cp) except: #raise totalCpu, freeCpu, queueCpus = 0, 0, {} log.debug('Total, Free CPU: (%s, %s)' % (totalCpu, freeCpu)) ce_name = cp.get(ce, "name") CE = getTemplate("GlueCE", "GlueCEUniqueID") try: excludeQueues = [i.strip() for i in cp.get("lsf", \ "queue_exclude").split(',')] except: excludeQueues = [] vo_queues = getVoQueues(queueInfo, cp) for queue, info in queueInfo.items(): if queue in excludeQueues: continue log.debug('Processing queue %s' % queue) if 'running' not in info: info['running'] = 0 if 'status' not in info: # There really should be an unknown status... info['status'] = 'Closed' if 'total' not in info: info['total'] = 0 info["lrmsVersion"] = lsfVersion info["job_manager"] = "lsf" if int(info.get("wait", 0)) > 0: info["free_slots"] = 0 else: if queue in queueCpus and 'max' in queueCpus[queue] and 'njobs' in queueCpus[queue]: info["free_slots"] = queueCpus[queue]['max'] - queueCpus[queue]['njobs'] else: info["free_slots"] = freeCpu info["queue"] = queue info["ceName"] = ce_name unique_id = buildCEUniqueID(cp, ce_name, 'lsf', queue) info['ceUniqueID'] = unique_id if "job_slots" not in info: if queue in queueCpus and 'max' in queueCpus[queue]: log.debug('queue %s, info is %s' % (queue, queueCpus[queue])) info['job_slots'] = queueCpus[queue]['max'] else: info["job_slots"] = totalCpu if "priority" not in info: info["priority"] = 0 if "max_running" not in info: info["max_running"] = info["job_slots"] elif not info['max_running'] or info['max_running'] == '-': info['max_running'] = 999999 if cp.has_option("lsf", "max_wall"): info["max_wall"] = cp_getInt(cp, "lsf", "max_wall", 1440) else: if "max_wall" not in info: info["max_wall"] = 1440 info["max_wall"] = int(info["max_wall"]) # glue proscribes ints info["job_slots"] = min(totalCpu, info["job_slots"]) ert, wrt = responseTimes(cp, info["running"], info["wait"], max_job_time=info["max_wall"]) contact_string = buildContactString(cp, 'lsf', queue, unique_id, log) ceImpl, ceImplVersion = getCEImpl(cp) info['ert'] = ert info['wrt'] = wrt info['hostingCluster'] = cp_get(cp, ce, 'hosting_cluster', ce_name) info['hostName'] = cp_get(cp, ce, 'host_name', ce_name) info['ceImpl'] = ceImpl info['ceImplVersion'] = ceImplVersion info['contact_string'] = contact_string info['app_dir'] = cp.get('osg_dirs', 'app') info['data_dir'] = cp.get('osg_dirs', 'data') info['default_se'] = getDefaultSE(cp) info['max_waiting'] = 999999 #info['max_total'] = info['max_running'] info['max_total'] = info['max_waiting'] + info['max_running'] info['assigned'] = info['job_slots'] info['lrmsType'] = 'lsf' info['preemption'] = str(cp_getInt(cp, 'lsf', 'preemption', '0')) acbr = '' for vo, queue2 in vo_queues: if queue == queue2: acbr += 'GlueCEAccessControlBaseRule: VO:%s\n' % vo.lower() if not acbr: continue #print info info['acbr'] = acbr[:-1] info['bdii'] = cp.get('bdii', 'endpoint') gramVersion = getGramVersion(cp) port = getPort(cp) info['gramVersion'] = gramVersion info['port'] = port info['waiting'] = info.get('wait', 0) info['referenceSI00'] = gip_cluster.getReferenceSI00(cp) info['clusterUniqueID'] = getClusterID(cp) extraCapabilities = '' if cp_getBoolean(cp, 'site', 'glexec_enabled', False): extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: glexec' htpcRSL, maxSlots = getHTPCInfo(cp, 'lsf', queue, log) info['max_slots'] = maxSlots info['htpc'] = htpcRSL if maxSlots > 1: extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: htpc' info['extraCapabilities'] = extraCapabilities printTemplate(CE, info) return queueInfo, totalCpu, freeCpu, queueCpus
def print_VOViewLocal(cp): """ Print the GLUE VOView entity; shows the VO's view of the condor batch system. Config options used: * ce.name. The human-readable name of the ce. * condor.status. The status of condor; defaults to "Production" * osg_dirs.app. The $OSG_APP directory; defaults to "/Unknown" * osg_dirs.data. The $OSG_DATA directory; defaults to "/Unknown" * se.name. The human-readable name of the closest SE. @param cp: The GIP configuration object @type cp: ConfigParser.ConfigParser """ VOView = getTemplate("GlueCE", "GlueVOViewLocalID") ce_name = cp_get(cp, "ce", "name", "") #status = cp_get(cp, "condor", "status", "Production") #condorVersion = getLrmsInfo(cp) total_nodes, _, unclaimed = parseNodes(cp) vo_map = VoMapper(cp) jobs_info = getJobsInfo(vo_map, cp) groupInfo = getGroupInfo(vo_map, cp) # Add in the default group all_group_vos = [] total_assigned = 0 for key, val in groupInfo.items(): if key == 'default': continue all_group_vos.extend(val['vos']) total_assigned += val.get('quota', 0) all_vos = sets.Set(voList(cp)) defaultVoList = [i for i in all_vos if i not in all_group_vos] if 'default' not in groupInfo: groupInfo['default'] = {} groupInfo['default']['vos'] = defaultVoList if total_nodes > total_assigned: log.info("There are %i assigned job slots out of %i total; assigning" \ " the rest to the default group." % (total_assigned, total_nodes)) groupInfo['default']['quota'] = total_nodes - total_assigned else: log.warning("More assigned nodes (%i) than actual nodes (%i)!" % \ (total_assigned, total_nodes)) if defaultGroupIsExcluded(cp): if groupInfo.has_key('default'): del groupInfo['default'] for group in groupInfo: jinfo = jobs_info.get(group, {}) vos = sets.Set(groupInfo[group].get('vos', [group])) vos.update(jinfo.keys()) vos.intersection_update(all_vos) # Enforce invariants # VO_FREE_SLOTS <= CE_FREE_SLOTS # VO_FREE_SLOTS <= CE_ASSIGNED - VO_RUNNING # This code determines CE_ASSIGNED ginfo = groupInfo[group] if ginfo.get("quota", 0) > 0: assigned = ginfo.get("quota", 0) else: assigned = total_nodes log.debug("All VOs for %s: %s" % (group, ", ".join(vos))) ce_unique_id = buildCEUniqueID(cp, ce_name, 'condor', group) max_wall = cp_getInt(cp, "condor", "max_wall", 1440) myrunning = sum([i.get('running', 0) for i in jinfo.values()], 0) assigned = max(assigned, myrunning) for vo in vos: acbr = 'VO:%s' % vo info = jinfo.get(vo.lower(), {"running": 0, "idle": 0, "held": 0}) ert, wrt = responseTimes(cp, info["running"], info["idle"] + \ info["held"], max_job_time=max_wall*60) free = min(unclaimed, assigned - myrunning, assigned - int(info['running'])) free = int(free) waiting = int(info["idle"]) + int(info["held"]) if waiting > cp_getInt(cp, 'condor', 'idle_slack', '10'): free = 0 info = { "vo": vo, "acbr": acbr, "ceUniqueID": ce_unique_id, "voLocalID": vo, "ce_name": ce_name, "job_manager": 'condor', "queue": vo, "running": info["running"], # Held jobs are included as "waiting" since the definition is: # Number of jobs that are in a state different than running "waiting": waiting, "total": info["running"] + info["idle"] + info["held"], "free_slots": free, "job_slots": int(total_nodes), "ert": ert, "wrt": wrt, "default_se": getDefaultSE(cp), 'app': cp_get(cp, 'osg_dirs', 'app', '/Unknown'), "data": cp_get(cp, "osg_dirs", "data", "/Unknown"), } printTemplate(VOView, info)
"waiting" : myidle + myheld, "running" : myrunning, "total" : myrunning + myidle + myheld, "priority" : ginfo.get('prio', 0), "assigned" : assigned, "max_slots" : maxSlots, "preemption" : str(int(cp_getBoolean(cp, "condor", \ "preemption", False))), "max_running" : max_running, "max_waiting" : 99999, "max_total" : 99999, "max_wall" : cp_getInt(cp, "condor", "max_wall", 1440), "status" : status, 'app_dir' : cp_get(cp, 'osg_dirs', 'app', '/Unknown'), "data_dir" : cp_get(cp, "osg_dirs", "data", "/Unknown"), "default_se" : getDefaultSE(cp), "acbr" : ginfo['acbr'], "referenceSI00" : referenceSI00, "clusterUniqueID": getClusterID(cp), "bdii" : cp_get(cp, "bdii", "endpoint", "Unknown"), 'extraCapabilities' : extraCapabilities, "htpc" : htpcRSL } printTemplate(ce_template, info) return total_nodes, claimed, unclaimed def print_VOViewLocal(cp): """ Print the GLUE VOView entity; shows the VO's view of the condor batch system.
def print_CE(cp): pbsVersion = getLrmsInfo(cp) queueInfo = getQueueInfo(cp) totalCpu, freeCpu, queueCpus = parseNodes(cp, pbsVersion) log.debug("totalCpu, freeCpu, queueCPus: %s %s %s" % (totalCpu, freeCpu, queueCpus)) ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") CE = getTemplate("GlueCE", "GlueCEUniqueID") try: excludeQueues = [i.strip() for i in cp_get(cp, "pbs", \ "queue_exclude", "").split(',')] except: excludeQueues = [] vo_queues = getVoQueues(cp) for queue, info in queueInfo.items(): if queue in excludeQueues: continue info["lrmsVersion"] = pbsVersion info["job_manager"] = "pbs" # if no jobs are waiting in the queue, set the number of free slots # to (job_slots - running), or the total number of free slots on the cluster, # whichever is less. info["queue"] = queue info["ceName"] = ce_name unique_id = buildCEUniqueID(cp, ce_name, 'pbs', queue) ceImpl, ceImplVersion = getCEImpl(cp) port = getPort(cp) info['ceUniqueID'] = unique_id if "job_slots" not in info: info["job_slots"] = totalCpu if "priority" not in info: info["priority"] = 0 if "max_running" not in info: info["max_running"] = info["job_slots"] if "max_wall" not in info: info["max_wall"] = 1440 info["free_slots"] = 0 if info["wait"] == 0: freeSlots = info["job_slots"] - info["running"] if freeSlots > 0: info["free_slots"] = min(freeSlots, freeCpu) log.debug("queue info: %s %s" % (queue, info)) ert, wrt = responseTimes(cp, info.get("running", 0), info.get("wait", 0), max_job_time=info["max_wall"]) info["job_slots"] = min(totalCpu, info["job_slots"]) info['ert'] = ert info['wrt'] = wrt info['hostingCluster'] = cp_get(cp, ce, 'hosting_cluster', ce_name) info['hostName'] = cp_get(cp, ce, 'host_name', ce_name) info['ceImpl'] = ceImpl info['ceImplVersion'] = ceImplVersion contact_string = buildContactString(cp, 'pbs', queue, unique_id, log) info['contact_string'] = contact_string info['app_dir'] = cp_get(cp, 'osg_dirs', 'app', "/UNKNOWN_APP") info['data_dir'] = cp_get(cp, 'osg_dirs', 'data', "/UNKNOWN_DATA") info['default_se'] = getDefaultSE(cp) if 'max_waiting' not in info: info['max_waiting'] = 999999 if 'max_queuable' in info: info['max_total'] = info['max_queuable'] info['free_slots'] = min(info['free_slots'], info['max_queuable']) else: info['max_total'] = info['max_waiting'] + info['max_running'] info['free_slots'] = min(info['free_slots'], info['max_total']) # Enforce invariants: # max_total <= max_running # free_slots <= max_running info['max_total'] = min(info['max_total'], info['max_running']) info['free_slots'] = min(info['free_slots'], info['max_running']) info['assigned'] = info['job_slots'] # Enforce invariants: # assigned <= max_running info['assigned'] = min(info['assigned'], info['max_running']) info['lrmsType'] = 'pbs' info['preemption'] = cp_get(cp, 'pbs', 'preemption', '0') acbr = '' has_vo = False for vo, queue2 in vo_queues: if queue == queue2: acbr += 'GlueCEAccessControlBaseRule: VO:%s\n' % vo has_vo = True if not has_vo: continue info['acbr'] = acbr[:-1] info['bdii'] = cp.get('bdii', 'endpoint') gramVersion = getGramVersion(cp) info['gramVersion'] = gramVersion info['port'] = port info['waiting'] = info['wait'] info['referenceSI00'] = gip_cluster.getReferenceSI00(cp) info['clusterUniqueID'] = getClusterID(cp) extraCapabilities = '' if cp_getBoolean(cp, 'site', 'glexec_enabled', False): extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: glexec' htpcRSL, maxSlots = getHTPCInfo(cp, 'pbs', queue, log) info['max_slots'] = maxSlots if maxSlots > 1: extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: htpc' info['extraCapabilities'] = extraCapabilities info['htpc'] = htpcRSL print CE % info return queueInfo, totalCpu, freeCpu, queueCpus
def print_CE(cp): SGEVersion = getLrmsInfo(cp) queueInfo, _ = getQueueInfo(cp) ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") ce_template = getTemplate("GlueCE", "GlueCEUniqueID") queueList = getQueueList(cp) vo_queues = getVoQueues(cp) default_max_waiting = 999999 for queue in queueInfo.values(): if 'name' not in queue or queue['name'] not in queueList: continue if queue['name'] == 'waiting': continue unique_id = buildCEUniqueID(cp, ce_name, 'sge', queue['name']) acbr = '' for vo, queue2 in vo_queues: if queue['name'] == queue2: acbr += 'GlueCEAccessControlBaseRule: VO:%s\n' % vo referenceSI00 = gip_cluster.getReferenceSI00(cp) contact_string = buildContactString(cp, 'sge', queue['name'], unique_id, log) extraCapabilities = '' if cp_getBoolean(cp, 'site', 'glexec_enabled', False): extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: glexec' htpcRSL, maxSlots = getHTPCInfo(cp, 'sge', queue, log) if maxSlots > 1: extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: htpc' gramVersion = getGramVersion(cp) port = getPort(cp) ceImpl, ceImplVersion = getCEImpl(cp) max_wall = queue["max_wall"] if cp.has_option("sge", "max_wall"): max_wall = cp_getInt(cp, "sge", "max_wall", 1440) info = { \ "ceUniqueID" : unique_id, "ceName" : ce_name, "ceImpl" : ceImpl, "ceImplVersion" : ceImplVersion, "clusterUniqueID" : getClusterID(cp), "queue" : queue['name'], "priority" : queue['priority'], "lrmsType" : 'sge', "lrmsVersion" : SGEVersion, "job_manager" : "sge", "job_slots" : queue["slots_total"], "free_slots" : queue["slots_free"], "running" : queue["slots_used"], "status" : queue['status'], "total" : queue['slots_used'] + queue['waiting'], "ert" : 3600, "wrt" : 3600, "hostingCluster" : cp_get(cp, ce, 'hosting_cluster', ce_name), "hostName" : cp_get(cp, ce, 'host_name', ce_name), "contact_string" : contact_string, "app_dir" : cp_get(cp, 'osg_dirs', 'app', "/OSG_APP_UNKNOWN"), "data_dir" : cp_get(cp, 'osg_dirs', 'data', "/OSG_DATA_UNKNOWN"), "default_se" : getDefaultSE(cp), "max_running" : queue["slots_total"], "max_wall" : max_wall, "max_waiting" : default_max_waiting, "max_slots" : maxSlots, "max_total" : default_max_waiting + queue["slots_total"], "assigned" : queue["slots_used"], "preemption" : cp_get(cp, 'sge', 'preemption', '0'), "acbr" : acbr[:-1], "bdii": cp.get('bdii', 'endpoint'), "gramVersion" : gramVersion, "port" : port, "waiting" : queue['waiting'], "referenceSI00": referenceSI00, 'extraCapabilities' : extraCapabilities, "htpc" : htpcRSL } printTemplate(ce_template, info) return queueInfo
def print_CE(cp): """ Print out the GlueCE objects for LSF; one GlueCE per grid queue. """ try: lsfVersion = getLrmsInfo(cp) except: lsfVersion = 'Unknown' log.debug('Using LSF version %s' % lsfVersion) queueInfo = getQueueInfo(cp) try: totalCpu, freeCpu, queueCpus = parseNodes(queueInfo, cp) except: #raise totalCpu, freeCpu, queueCpus = 0, 0, {} log.debug('Total, Free CPU: (%s, %s)' % (totalCpu, freeCpu)) ce_name = cp.get(ce, "name") CE = getTemplate("GlueCE", "GlueCEUniqueID") try: excludeQueues = [i.strip() for i in cp.get("lsf", \ "queue_exclude").split(',')] except: excludeQueues = [] vo_queues = getVoQueues(queueInfo, cp) for queue, info in queueInfo.items(): if queue in excludeQueues: continue log.debug('Processing queue %s' % queue) if 'running' not in info: info['running'] = 0 if 'status' not in info: # There really should be an unknown status... info['status'] = 'Closed' if 'total' not in info: info['total'] = 0 info["lrmsVersion"] = lsfVersion info["job_manager"] = "lsf" if int(info.get("wait", 0)) > 0: info["free_slots"] = 0 else: if queue in queueCpus and 'max' in queueCpus[ queue] and 'njobs' in queueCpus[queue]: info["free_slots"] = queueCpus[queue]['max'] - queueCpus[ queue]['njobs'] else: info["free_slots"] = freeCpu info["queue"] = queue info["ceName"] = ce_name unique_id = buildCEUniqueID(cp, ce_name, 'lsf', queue) info['ceUniqueID'] = unique_id if "job_slots" not in info: if queue in queueCpus and 'max' in queueCpus[queue]: log.debug('queue %s, info is %s' % (queue, queueCpus[queue])) info['job_slots'] = queueCpus[queue]['max'] else: info["job_slots"] = totalCpu if "priority" not in info: info["priority"] = 0 if "max_running" not in info: info["max_running"] = info["job_slots"] elif not info['max_running'] or info['max_running'] == '-': info['max_running'] = 999999 if cp.has_option("lsf", "max_wall"): info["max_wall"] = cp_getInt(cp, "lsf", "max_wall", 1440) else: if "max_wall" not in info: info["max_wall"] = 1440 info["max_wall"] = int(info["max_wall"]) # glue proscribes ints info["job_slots"] = min(totalCpu, info["job_slots"]) ert, wrt = responseTimes(cp, info["running"], info["wait"], max_job_time=info["max_wall"]) contact_string = buildContactString(cp, 'lsf', queue, unique_id, log) ceImpl, ceImplVersion = getCEImpl(cp) info['ert'] = ert info['wrt'] = wrt info['hostingCluster'] = cp_get(cp, ce, 'hosting_cluster', ce_name) info['hostName'] = cp_get(cp, ce, 'host_name', ce_name) info['ceImpl'] = ceImpl info['ceImplVersion'] = ceImplVersion info['contact_string'] = contact_string info['app_dir'] = cp.get('osg_dirs', 'app') info['data_dir'] = cp.get('osg_dirs', 'data') info['default_se'] = getDefaultSE(cp) info['max_waiting'] = 999999 #info['max_total'] = info['max_running'] info['max_total'] = info['max_waiting'] + info['max_running'] info['assigned'] = info['job_slots'] info['lrmsType'] = 'lsf' info['preemption'] = str(cp_getInt(cp, 'lsf', 'preemption', '0')) acbr = '' for vo, queue2 in vo_queues: if queue == queue2: acbr += 'GlueCEAccessControlBaseRule: VO:%s\n' % vo.lower() if not acbr: continue #print info info['acbr'] = acbr[:-1] info['bdii'] = cp.get('bdii', 'endpoint') gramVersion = getGramVersion(cp) port = getPort(cp) info['gramVersion'] = gramVersion info['port'] = port info['waiting'] = info.get('wait', 0) info['referenceSI00'] = gip_cluster.getReferenceSI00(cp) info['clusterUniqueID'] = getClusterID(cp) extraCapabilities = '' if cp_getBoolean(cp, 'site', 'glexec_enabled', False): extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: glexec' htpcRSL, maxSlots = getHTPCInfo(cp, 'lsf', queue, log) info['max_slots'] = maxSlots info['htpc'] = htpcRSL if maxSlots > 1: extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: htpc' info['extraCapabilities'] = extraCapabilities printTemplate(CE, info) return queueInfo, totalCpu, freeCpu, queueCpus
def print_CE(cp): slurmVersion = getLrmsInfo(cp) queueInfo = getQueueInfo(cp) ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") CE = getTemplate("GlueCE", "GlueCEUniqueID") try: excludeQueues = [i.strip() for i in cp_get(cp, "slurm", \ "queue_exclude", "").split(',')] except: excludeQueues = [] vo_queues = getVoQueues(cp) for queue, info in queueInfo.items(): if queue in excludeQueues: continue info["lrmsVersion"] = slurmVersion info["job_manager"] = "slurm" # if no jobs are waiting in the queue, set the number of free slots # to (job_slots - running), or the total number of free slots on the cluster, # whichever is less. info["queue"] = queue info["ceName"] = ce_name unique_id = buildCEUniqueID(cp, ce_name, 'slurm', queue) ceImpl, ceImplVersion = getCEImpl(cp) port = getPort(cp) info['ceUniqueID'] = unique_id if "job_slots" not in info: log.error("no job_slots found for %s!" % queue) if "priority" not in info: info["priority"] = 0 if "max_running" not in info: log.error("no max_running found for %s!" % queue) if "max_wall" not in info: info["max_wall"] = 1440 info["free_slots"] = 0 if info["wait"] == 0: freeSlots = info["job_slots"] - info["running"] if freeSlots > 0: info["free_slots"] = freeSlots ert, wrt = responseTimes(cp, info.get("running", 0), info.get("wait", 0), max_job_time=info["max_wall"]) info['ert'] = ert info['wrt'] = wrt info['hostingCluster'] = cp_get(cp, ce, 'hosting_cluster', ce_name) info['hostName'] = cp_get(cp, ce, 'host_name', ce_name) info['ceImpl'] = ceImpl info['ceImplVersion'] = ceImplVersion contact_string = buildContactString(cp, 'slurm', queue, unique_id, log) info['contact_string'] = contact_string info['app_dir'] = cp_get(cp, 'osg_dirs', 'app', "/UNKNOWN_APP") info['data_dir'] = cp_get(cp, 'osg_dirs', 'data', "/UNKNOWN_DATA") info['default_se'] = getDefaultSE(cp) if 'max_waiting' not in info: info['max_waiting'] = 999999 if 'max_queuable' in info: info['max_total'] = info['max_queuable'] info['free_slots'] = min(info['free_slots'], info['max_queuable']) else: info['max_total'] = info['max_waiting'] + info['max_running'] info['free_slots'] = min(info['free_slots'], info['max_total']) # Enforce invariants: # max_total <= max_running # free_slots <= max_running info['max_total'] = min(info['max_total'], info['max_running']) info['free_slots'] = min(info['free_slots'], info['max_running']) info['assigned'] = info['job_slots'] # Enforce invariants: # assigned <= max_running info['assigned'] = min(info['assigned'], info['max_running']) info['lrmsType'] = 'slurm' info['preemption'] = cp_get(cp, 'slurm', 'preemption', '0') acbr = '' has_vo = False for vo, queue2 in vo_queues: if queue == queue2: acbr += 'GlueCEAccessControlBaseRule: VO:%s\n' % vo has_vo = True if not has_vo: continue info['acbr'] = acbr[:-1] info['bdii'] = cp.get('bdii', 'endpoint') gramVersion = getGramVersion(cp) info['gramVersion'] = gramVersion info['port'] = port info['waiting'] = info['wait'] info['referenceSI00'] = gip_cluster.getReferenceSI00(cp) info['clusterUniqueID'] = getClusterID(cp) extraCapabilities = '' if cp_getBoolean(cp, 'site', 'glexec_enabled', False): extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: glexec' htpcRSL, maxSlots = getHTPCInfo(cp, 'slurm', queue, log) info['max_slots'] = maxSlots if maxSlots > 1: extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: htpc' info['extraCapabilities'] = extraCapabilities info['htpc'] = htpcRSL printTemplate(CE, info) return queueInfo
def print_VOViewLocal(cp): """ Print the GLUE VOView entity; shows the VO's view of the condor batch system. Config options used: * ce.name. The human-readable name of the ce. * condor.status. The status of condor; defaults to "Production" * osg_dirs.app. The $OSG_APP directory; defaults to "/Unknown" * osg_dirs.data. The $OSG_DATA directory; defaults to "/Unknown" * se.name. The human-readable name of the closest SE. @param cp: The GIP configuration object @type cp: ConfigParser.ConfigParser """ VOView = getTemplate("GlueCE", "GlueVOViewLocalID") ce_name = cp_get(cp, "ce", "name", "") #status = cp_get(cp, "condor", "status", "Production") #condorVersion = getLrmsInfo(cp) total_nodes, _, unclaimed = parseNodes(cp) vo_map = VoMapper(cp) jobs_info = getJobsInfo(vo_map, cp) groupInfo = getGroupInfo(vo_map, cp) # Add in the default group all_group_vos = [] total_assigned = 0 for key, val in groupInfo.items(): if key == 'default': continue all_group_vos.extend(val['vos']) total_assigned += val.get('quota', 0) all_vos = sets.Set(voList(cp)) defaultVoList = [i for i in all_vos if i not in all_group_vos] if 'default' not in groupInfo: groupInfo['default'] = {} groupInfo['default']['vos'] = defaultVoList if total_nodes > total_assigned: log.info("There are %i assigned job slots out of %i total; assigning" \ " the rest to the default group." % (total_assigned, total_nodes)) groupInfo['default']['quota'] = total_nodes-total_assigned else: log.warning("More assigned nodes (%i) than actual nodes (%i)!" % \ (total_assigned, total_nodes)) if defaultGroupIsExcluded(cp): if groupInfo.has_key('default'): del groupInfo['default'] for group in groupInfo: jinfo = jobs_info.get(group, {}) vos = sets.Set(groupInfo[group].get('vos', [group])) vos.update(jinfo.keys()) vos.intersection_update(all_vos) # Enforce invariants # VO_FREE_SLOTS <= CE_FREE_SLOTS # VO_FREE_SLOTS <= CE_ASSIGNED - VO_RUNNING # This code determines CE_ASSIGNED ginfo = groupInfo[group] if ginfo.get("quota", 0) > 0: assigned = ginfo.get("quota", 0) else: assigned = total_nodes log.debug("All VOs for %s: %s" % (group, ", ".join(vos))) ce_unique_id = buildCEUniqueID(cp, ce_name, 'condor', group) max_wall = cp_getInt(cp, "condor", "max_wall", 1440) myrunning = sum([i.get('running', 0) for i in jinfo.values()], 0) assigned = max(assigned, myrunning) for vo in vos: acbr = 'VO:%s' % vo info = jinfo.get(vo.lower(), {"running": 0, "idle": 0, "held": 0}) ert, wrt = responseTimes(cp, info["running"], info["idle"] + \ info["held"], max_job_time=max_wall*60) free = min(unclaimed, assigned-myrunning, assigned-int(info['running'])) free = int(free) waiting = int(info["idle"]) + int(info["held"]) if waiting > cp_getInt(cp, 'condor', 'idle_slack', '10'): free = 0 info = {"vo" : vo, "acbr" : acbr, "ceUniqueID" : ce_unique_id, "voLocalID" : vo, "ce_name" : ce_name, "job_manager" : 'condor', "queue" : vo, "running" : info["running"], # Held jobs are included as "waiting" since the definition is: # Number of jobs that are in a state different than running "waiting" : waiting, "total" : info["running"] + info["idle"] + info["held"], "free_slots" : free, "job_slots" : int(total_nodes), "ert" : ert, "wrt" : wrt, "default_se" : getDefaultSE(cp), 'app' : cp_get(cp, 'osg_dirs', 'app', '/Unknown'), "data" : cp_get(cp, "osg_dirs", "data", "/Unknown"), } printTemplate(VOView, info)