def __init__(self, cms_name): self.site_name = cms_name self.endpoints = self.getEndpoints() self.tier = sites.getTier(self.site_name) #In progress -> secondary self.fts_server = "https://testurl.cern.ch/" #Statistical attributes self.totalrecords = 0 self.successful = 0 self.totalfailed = 0 #Number of failed fts files categorized by type (undecided is not counted in the final rate) self.failed = dict() self.failed['undecided'] = 0 self.failed['quota'] = 0 self.failed['permissions'] = 0 self.failed['unreachable'] = 0 self.failed['file'] = 0 self.failed['other'] = 0 #Success rate before categorizing error messages self.status = "" self.quality = 0.0
def __init__(self, cms_name): self.site_name = cms_name self.endpoints = self.getEndpoints() self.tier = sites.getTier(self.site_name) #In progress -> secondary self.fts_server = "https://testurl.cern.ch/" #Statistical attributes self.totalrecords = 0 self.successful = 0 self.totalfailed = 0 #Number of failed fts files categorized by type (undecided is not counted in the final rate) self.failed = dict() self.failed['undecided']=0 self.failed['quota']=0 self.failed['permissions']=0 self.failed['unreachable']=0 self.failed['file']=0 self.failed['other']=0 #Success rate before categorizing error messages self.status = "" self.quality = 0.0
def main(): parser = OptionParser(usage="usage: %prog [options] filename", version="%prog 1.0") parser.add_option( "-d", "--date", dest="inputDate", help= "date from which to fetch the results for HC in format %Y-%m-%dT%H:%M:%SZ " ) parser.add_option("-o", "--outputDir", dest="outputDir", help="Directory in which to save the output") (options, args) = parser.parse_args() if options.inputDate is None: print "Please input a date with the --date option" exit(-1) else: try: datetmp = dateutil.parser.parse(options.inputDate, ignoretz=True) except: print "I couldn't recognize the date, please give me one like 2015-12-31T23:59:59Z" exit(-1) if options.outputDir is None: print "Please add a directory with option --outputDir" exit(-1) else: if os.path.isdir(options.outputDir) == False: print options.outputDir + " is not a valid directory or you don't have read permissions" exit(-1) # Constants interval = 1439 dateFrom = datetmp - timedelta(minutes=datetmp.minute % interval, seconds=datetmp.second, microseconds=datetmp.microsecond) dateTo = dateFrom + timedelta(minutes=interval) dateFormat = "%Y-%m-%dT%H:%M:%SZ" dateFromStr = datetime.strftime(dateFrom, dateFormat) print dateFromStr dateToStr = datetime.strftime(dateTo, dateFormat) OUTPUT_FILE_NAME = os.path.join(options.outputDir, "site_avail_sum.txt") OUTPUT_FILE_CORRECTIONS = os.path.join(options.outputDir, "site_avail_sum_POST_REQUEST.txt") SAM_COLUMN_NUMBER = "126" print "Getting SAM Score from " + str(dateFrom) + " to " + str(dateTo) samUrl = "http://wlcg-sam-cms.cern.ch/dashboard/request.py/getstatsresultsmin?profile_name=CMS_CRITICAL_FULL&plot_type=quality&start_time=%s&end_time=%s&granularity=single&view=siteavl" % ( dateFromStr, dateToStr) print samUrl # Download the url or die try: print "Fetching url : " + samUrl jsonStr = url.read(samUrl) samInfo = json.loads(jsonStr) except: exit(100) print "Data retrieved!" sitesfromDashboard = [] for samSummary in samInfo['data']: sitesfromDashboard.append(samSummary['name']) print sitesfromDashboard samScoreSites = [] print "Getting SAM for all sites" for site in sitesfromDashboard: for samSummary in samInfo['data']: if samSummary['name'] == site: try: siteOK = float(samSummary['data'][0]['OK']) siteCritical = float(samSummary['data'][0]['CRIT']) siteSched = float(samSummary['data'][0]['SCHED']) if (siteOK + siteCritical + siteSched) > 0.0: siteAvailabilityNum = ( float(siteOK) / (float(siteOK + siteCritical + siteSched))) * 100.0 siteAvailability = int(siteAvailabilityNum) if siteAvailabilityNum > 89.9: siteColor = "green" elif (sites.getTier(site) == 2 or sites.getTier(site) == 3) and siteAvailabilityNum > 79.9: siteColor = "green" else: siteColor = "red" else: siteAvailability = "n/a" siteAvailabilityNum = None siteColor = "white" except: siteAvailability = "Error" siteAvailabilityNum = None siteColor = "white" print site + " OK " + str(siteOK) + " CRIT " + str( siteCritical) + " SCHED " + str( siteSched) + " SCORE : " + str(siteAvailability) samScoreSites.append( dashboard.entry(date=dateTo.strftime("%Y-%m-%d %H:%M:%S"), name=site, value=siteAvailability, color=siteColor, url=getSuccessrateUrl( site, dateFrom, dateTo), nvalue=siteAvailabilityNum)) print str(samScoreSites) if len(samScoreSites) > 1: OutputFile = open(OUTPUT_FILE_NAME, 'w') correctionOutputFile = open(OUTPUT_FILE_CORRECTIONS, 'a') startDateStr = (dateFrom + timedelta(days=1)).replace( hour=0, minute=0, second=1, microsecond=0).strftime("%Y-%m-%d %H:%M:%S") endDateStr = (dateFrom + timedelta(days=1)).replace( hour=23, minute=59, second=59, microsecond=0).strftime("%Y-%m-%d %H:%M:%S") for site in samScoreSites: if site.name != "unknown": OutputFile.write(str(site) + '\n') correctionOutputFile.write(("\t".join([ startDateStr, endDateStr, str(SAM_COLUMN_NUMBER), site.name, str(site.value), site.color, site.url, "nvalue=0" ])) + "\n") print "\n--SAM Score output written to %s" % OUTPUT_FILE_NAME OutputFile.close() correctionOutputFile.close() else: print "There's no data, I quit!"
lfEnd = datetime.utcnow() lfStatus = getJSONMetricforAllSitesForDate(235, formatDate(lfStart), formatDate(lfEnd)) print 'Got LifeStatus' #Current value prod_status pdStart = datetime.utcnow() - timedelta(days=1) pdEnd = datetime.utcnow() pdStatus = getJSONMetricforAllSitesForDate(237, formatDate(pdStart), formatDate(pdEnd)) allsites = set(srStatus.getSites()).union(set(lfStatus.getSites())).union( set(downtimes.getSites())) allsitesMetric = [] for site in allsites: tier = sites.getTier(site) siteCurrentLifeStatus = lfStatus.getLatestEntry(site) flagLifeStatus = False if siteCurrentLifeStatus is not None and (siteCurrentLifeStatus.value in BAD_LIFESTATUS): flagLifeStatus = True siteSiteReadiness = srStatus.getSiteEntries(site) siteCurrentProd_Status = pdStatus.getLatestEntry(site) siteDowntimes = downtimes.getSiteEntries(site) if tier == 2 or tier == 1: #Check if the site will be on downtime in 24 hours or is on downtime flagDowntime = False for key, value in siteDowntimes.iteritems(): if value.color == 'saddlebrown': dateEnd = datetime.utcfromtimestamp(key) dateStart = datetime.utcfromtimestamp(value.date)
if slot.color == dashboard.green: return False return True # merge sites that are not in the vo-feed (sites.getSites) # --look at the loop, we are getting some sites from the manual changes # metric. reason: some sites are not listed in the sites.getSites siteList = sites.getSites() for site in usableSitesMC.getSites(): if not site in siteList: siteList[site] = {} for i in siteList: badSiteFlag = False ## detect bad sites! # site has bad hammercloud history if sites.getTier(i) == 2 and hasBadHistory(i): print i + " hasBadHistory" badSiteFlag = True # site is in the morgue elif morgue.hasSite(i) and morgue.getSiteEntry(i).value == 'Morgue': print i + " is in the Morgue" badSiteFlag = True # site has been blocked elif usableSitesMC.hasSite(i) and usableSitesMC.getSiteEntry(i).color == dashboard.red: print i + " usableSitesMC.getSiteEntry color is red" badSiteFlag = True if badSiteFlag: metric.append(dashboard.entry(None, i, 'not_usable', dashboard.red, urlStamp)) else: metric.append(dashboard.entry(None, i, 'usable', dashboard.green, urlStamp))
def main(): parser = OptionParser(usage="usage: %prog [options] filename", version="%prog 1.0") parser.add_option("-d", "--date", dest="inputDate", help="date from which to fetch the results for HC in format %Y-%m-%dT%H:%M:%SZ ") parser.add_option("-o", "--outputDir", dest="outputDir", help="Directory in which to save the output") (options, args) = parser.parse_args() if options.inputDate is None: print "Please input a date with the --date option" exit(-1) else: try: datetmp = dateutil.parser.parse(options.inputDate, ignoretz=True) except: print "I couldn't recognize the date, please give me one like 2015-12-31T23:59:59Z" exit(-1) if options.outputDir is None: print "Please add a directory with option --outputDir" exit(-1) else: if os.path.isdir(options.outputDir) == False: print options.outputDir + " is not a valid directory or you don't have read permissions" exit(-1) # Constants interval = 30 dateFrom = datetmp- timedelta(minutes=datetmp.minute % interval, seconds=datetmp.second, microseconds=datetmp.microsecond) dateTo = dateFrom + timedelta(minutes=interval) dateFormat = "%Y-%m-%dT%H:%M:%SZ" dateFromStr = datetime.strftime(dateFrom, dateFormat) print dateFromStr dateToStr = datetime.strftime(dateTo, dateFormat) OUTPUT_FILE_NAME = os.path.join(options.outputDir,"sam.txt") print "Getting SAM Score from " + str(dateFrom) + " to " + str(dateTo) samUrl = "http://wlcg-sam-cms.cern.ch/dashboard/request.py/getstatsresultsmin?profile_name=CMS_CRITICAL_FULL&plot_type=quality&start_time=%s&end_time=%s&granularity=single&view=siteavl" % (dateFromStr, dateToStr) print samUrl # Download the url or die try: print "Fetching url : " + samUrl jsonStr = url.read(samUrl) samInfo = json.loads(jsonStr) except: exit(100) print "Data retrieved!" sitesfromDashboard = [] for samSummary in samInfo['data']: sitesfromDashboard.append(samSummary['name']) print sitesfromDashboard samScoreSites = [] print"Getting SAM for all sites" for site in sitesfromDashboard: for samSummary in samInfo['data']: if samSummary['name'] == site: try: siteOK = float(samSummary['data'][0]['OK']) siteCritical = float(samSummary['data'][0]['CRIT']) siteSched = float(samSummary['data'][0]['SCHED']) if (siteOK + siteCritical + siteSched) > 0.0: siteAvailabilityNum = (float(siteOK) / (float(siteOK + siteCritical + siteSched)))*100.0 siteAvailability = int(siteAvailabilityNum) if siteAvailabilityNum > 89.9: siteColor = "c*k" elif (sites.getTier(site) == 2 or sites.getTier(site) == 3)and siteAvailabilityNum > 79.9: siteColor = "c*k" else: siteColor = "cNotOk" else: siteAvailability = "n/a" siteAvailabilityNum = None siteColor = "cNA" except: siteAvailability = "Error" siteAvailabilityNum = None siteColor = "cError" print site + " OK " + str(siteOK) + " CRIT " + str(siteCritical) + " SCHED " + str(siteSched) + " SCORE : " + str(siteAvailability) samScoreSites.append(dashboard.entry(date = dateFrom.strftime("%Y-%m-%d %H:%M:%S"), name = site, value = siteAvailability, color = siteColor, url = getSuccessrateUrl (site, dateFrom, dateTo), nvalue=siteAvailabilityNum)) print str(samScoreSites) if len(samScoreSites) > 1 : OutputFile = open(OUTPUT_FILE_NAME, 'w') for site in samScoreSites: if site.name != "unknown": OutputFile.write(str(site) + '\n') print "\n--SAM Score output written to %s" % OUTPUT_FILE_NAME OutputFile.close() else: print "There's no data, I quit!"
print "Got Readiness" # LifeStatus lfStart = datetime.utcnow() - timedelta(days=2) lfEnd = datetime.utcnow() lfStatus = getJSONMetricforAllSitesForDate(235, formatDate(lfStart), formatDate(lfEnd)) print "Got LifeStatus" # Current value prod_status pdStart = datetime.utcnow() - timedelta(days=1) pdEnd = datetime.utcnow() pdStatus = getJSONMetricforAllSitesForDate(237, formatDate(pdStart), formatDate(pdEnd)) allsites = set(srStatus.getSites()).union(set(lfStatus.getSites())).union(set(downtimes.getSites())) allsitesMetric = [] for site in allsites: tier = sites.getTier(site) siteCurrentLifeStatus = lfStatus.getLatestEntry(site) flagLifeStatus = False if siteCurrentLifeStatus is not None and (siteCurrentLifeStatus.value in BAD_LIFESTATUS): flagLifeStatus = True siteSiteReadiness = srStatus.getSiteEntries(site) siteCurrentProd_Status = pdStatus.getLatestEntry(site) siteDowntimes = downtimes.getSiteEntries(site) if tier == 2 or tier == 1: # Check if the site will be on downtime in 24 hours or is on downtime flagDowntime = False for key, value in siteDowntimes.iteritems(): if value.color == "saddlebrown": dateEnd = datetime.utcfromtimestamp(key) dateStart = datetime.utcfromtimestamp(value.date) intersection = 0
def main(): parser = OptionParser(usage="usage: %prog [options] filename", version="%prog 1.0") parser.add_option( "-d", "--date", dest="inputDate", help= "date from which to fetch the results for HC in format %Y-%m-%dT%H:%M:%SZ " ) parser.add_option("-o", "--outputDir", dest="outputDir", help="Directory in which to save the output") (options, args) = parser.parse_args() if options.inputDate is None: print "Please input a date with the --date option" exit(-1) else: try: datetmp = dateutil.parser.parse(options.inputDate, ignoretz=True) except: print "I couldn't recognize the date, please give me one like 2015-12-31T23:59:59Z" exit(-1) if options.outputDir is None: print "Please add a directory with option --outputDir" exit(-1) else: if os.path.isdir(options.outputDir) == False: print options.outputDir + " is not a valid directory or you don't have read permissions" exit(-1) # Constants: # Dashboard API for Hammercloud # replace (site, startTimeStamp, endTimeStamp) interval = 15 dateFrom = datetmp - timedelta(minutes=datetmp.minute % interval, seconds=datetmp.second, microseconds=datetmp.microsecond) dateTo = dateFrom + timedelta(minutes=interval) dateFormat = "%Y-%m-%d+%H%%3A%M" dateFromStr = datetime.strftime(dateFrom, dateFormat) dateToStr = datetime.strftime(dateTo, dateFormat) OUTPUT_FILE_NAME = os.path.join(options.outputDir, "hammercloud.txt") print "Calcuating Hammercloud Score from " + str(dateFrom) + " to " + str( dateTo) urlHC = "http://dashb-cms-job.cern.ch/dashboard/request.py/jobsummary-plot-or-table2?user=&site=&submissiontool=&application=&activity=hctest&status=&check=terminated&tier=&sortby=site&ce=&rb=&grid=&jobtype=&submissionui=&dataset=&submissiontype=&task=&subtoolver=&genactivity=&outputse=&appexitcode=&accesstype=&inputse=&cores=&date1=%s&date2=%s&prettyprint" % ( dateFromStr, dateToStr) # Download the url or die try: print "Fetching url : " + urlHC jsonStr = url.read(urlHC) hcInfo = json.loads(jsonStr) except: exit(100) print "Data retrieved!" sitesfromDashboard = [] for hcSummary in hcInfo['summaries']: sitesfromDashboard.append(hcSummary['name']) hcScoreSites = [] noNa = 0 print "Calculating HammerCloud scores" for site in sitesfromDashboard: for hcSummary in hcInfo['summaries']: if hcSummary['name'] == site and site != "unknown": siteTerminated = hcSummary['terminated'] siteSuccesful = hcSummary['app-succeeded'] siteUnsuccesful = hcSummary['unsuccess'] siteCancelled = hcSummary['cancelled'] siteUnk = hcSummary['allunk'] siteScore = None if (siteTerminated - siteCancelled - siteUnk) > 0: siteScore = (float(siteSuccesful - siteUnsuccesful) / float(siteTerminated - siteCancelled - siteUnk)) * 100.0 siteColor = "cNotOk" if (sites.getTier(site) == 2 or sites.getTier(site) == 3) and siteScore > 79.9: siteColor = "c*k" if sites.getTier(site) == 1 and siteScore > 89.9: siteColor = "c*k" print site + " (" + str(siteSuccesful) + " - " + str( siteUnsuccesful) + ")/(" + str( siteTerminated) + " - " + str( siteCancelled) + " - " + str( siteUnk) + ") =" + str(siteScore) elif siteTerminated > 0 or siteCancelled > 0 or siteUnk > 0 or siteUnsuccesful > 0 or siteSuccesful > 0: siteScore = "Error" noNa += 1 siteColor = "cError" if siteScore is not None: hcScoreSites.append( dashboard.entry( date=dateFrom.strftime("%Y-%m-%d %H:%M:%S"), name=site, value=siteScore, color=siteColor, url=getSuccessrateUrl(site, dateFromStr, dateToStr))) #print str(hcScoreSites) if len(hcScoreSites) > noNa: OutputFile = open(OUTPUT_FILE_NAME, 'w') for site in hcScoreSites: if site.name != "unknown": OutputFile.write(str(site) + '\n') print "\n--HC Score output written to %s" % OUTPUT_FILE_NAME OutputFile.close() else: print "There's no data, I quit!"
for site in siteList: badSiteFlag = False errMsg = "bad" siteDownTimeFlag = False downTimeColors = ["grey", "yellow", "saddlebrown"] # conditions to mark a site as bad if samAccess[site] < 50.0 or samAccess[site] == "n/a": badSiteFlag = badSiteFlag | True if samAccess[site] == "n/a": val = samAccess[site] else: val = round(samAccess[site], 2) errMsg = errMsg + "_SAM(%s)" % val if (hammerCloud[site] < 70.0 or hammerCloud[site] == "n/a") and sites.getTier(site) != 3: badSiteFlag = badSiteFlag | True if hammerCloud[site] == "n/a": val = hammerCloud[site] else: val = round(hammerCloud[site], 2) errMsg = errMsg + "_HC(%s)" % val if site in ggus.keys() and len(ggus[site]): badSiteFlag = badSiteFlag | True errMsg = errMsg + "_GGUS(%s)" % str(ggus[site]) if siteDownTimes[site] in downTimeColors: siteDownTimeFlag = True if badSiteFlag: entry = dashboard.entry(None, site, errMsg, dashboard.red, reportURL % site) if siteDownTimeFlag: entry = dashboard.entry(None, site, "site is on downtime", siteDownTimes[site], reportURL % site)
production = dashboard.metric() transitional = dashboard.metric() for site in siteList: badSiteFlag = False errMsg = 'bad' siteDownTimeFlag = False downTimeColors = ['grey', 'yellow', 'saddlebrown'] # conditions to mark a site as bad if samAccess[site] < 50.0 or samAccess[site] == 'n/a': badSiteFlag = badSiteFlag | True if samAccess[site] == 'n/a': val = samAccess[site] else: val = round(samAccess[site], 2) errMsg = errMsg + '_SAM(%s)' % val if (hammerCloud[site] < 70.0 or hammerCloud[site] == 'n/a') and sites.getTier(site) != 3: badSiteFlag = badSiteFlag | True if hammerCloud[site] == 'n/a': val = hammerCloud[site] else: val = round(hammerCloud[site], 2) errMsg = errMsg + '_HC(%s)' % val if site in ggus.keys() and len(ggus[site]): badSiteFlag = badSiteFlag | True errMsg = errMsg + '_GGUS(%s)' % str(ggus[site]) if site in siteDownTimes and siteDownTimes[site] in downTimeColors: siteDownTimeFlag = True if badSiteFlag: entry = dashboard.entry(None, site, errMsg, dashboard.red, reportURL % site) if siteDownTimeFlag: entry = dashboard.entry(None, site, 'site is on downtime', siteDownTimes[site], reportURL % site) else: entry = dashboard.entry(None, site, 'on', dashboard.green, reportURL % site)
def main(): parser = OptionParser(usage="usage: %prog [options] filename", version="%prog 1.0") parser.add_option("-d", "--date", dest="inputDate", help="date from which to fetch the results for HC in format %Y-%m-%dT%H:%M:%SZ ") parser.add_option("-o", "--outputDir", dest="outputDir", help="Directory in which to save the output") (options, args) = parser.parse_args() if options.inputDate is None: print "Please input a date with the --date option" exit(-1) else: try: datetmp = dateutil.parser.parse(options.inputDate, ignoretz=True) except: print "I couldn't recognize the date, please give me one like 2015-12-31T23:59:59Z" exit(-1) if options.outputDir is None: print "Please add a directory with option --outputDir" exit(-1) else: if os.path.isdir(options.outputDir) == False: print options.outputDir + " is not a valid directory or you don't have read permissions" exit(-1) # Constants: # Dashboard API for Hammercloud # replace (site, startTimeStamp, endTimeStamp) interval = 30 dateFrom = datetmp- timedelta(minutes=datetmp.minute % interval, seconds=datetmp.second, microseconds=datetmp.microsecond) dateTo = dateFrom + timedelta(minutes=interval) dateFormat = "%Y-%m-%d+%H%%3A%M" dateFromStr = datetime.strftime(dateFrom, dateFormat) dateToStr = datetime.strftime(dateTo, dateFormat) OUTPUT_FILE_NAME = os.path.join(options.outputDir,"hammercloud.txt") print "Calcuating Hammercloud Score from " + str(dateFrom) + " to " + str(dateTo) urlHC = "http://dashb-cms-job.cern.ch/dashboard/request.py/jobsummary-plot-or-table2?user=&site=&submissiontool=&application=&activity=hctest&status=&check=terminated&tier=&sortby=site&ce=&rb=&grid=&jobtype=&submissionui=&dataset=&submissiontype=&task=&subtoolver=&genactivity=&outputse=&appexitcode=&accesstype=&inputse=&cores=&date1=%s&date2=%s&prettyprint" % (dateFromStr, dateToStr) # Download the url or die try: print "Fetching url : " + urlHC jsonStr = url.read(urlHC) hcInfo = json.loads(jsonStr) except: exit(100) print "Data retrieved!" print json.dumps(hcInfo, sort_keys=True, indent=1, separators=(',', ': ')) sitesfromDashboard = [] for hcSummary in hcInfo['summaries']: sitesfromDashboard.append(hcSummary['name']) hcScoreSites = [] noNa = 0 print"Calculating HammerCloud scores" for site in sitesfromDashboard: for hcSummary in hcInfo['summaries']: if hcSummary['name'] == site and site != "unknown": siteTerminated = hcSummary['terminated'] siteSuccesful = hcSummary['app-succeeded'] siteUnsuccesful = hcSummary['unsuccess'] siteCancelled = hcSummary['cancelled'] siteUnk = hcSummary['allunk'] siteScore = -1.0 siteColor = "white" if (siteTerminated - siteCancelled - siteUnk) > 0: siteScore = (float(siteSuccesful - siteUnsuccesful) / float(siteTerminated - siteCancelled - siteUnk)) * 100.0 siteColor = "red" if (sites.getTier(site) > 1) and siteScore > 79.9: siteColor = "yellow" if siteScore > 89.9: siteColor = "green" #print site + " (" + str(siteSuccesful) + " - " + str(siteUnsuccesful) + ")/(" +str(siteTerminated)+" - "+str(siteCancelled)+" - "+str(siteUnk)+") =" + str(siteScore) if siteScore is not None: print site + "\t" + str(siteScore) + "\t" + siteColor hcScoreSites.append(dashboard.entry(date = dateFrom.strftime("%Y-%m-%d %H:%M:%S"), name = site, value = '%.1f' % siteScore, color = siteColor, url = getSuccessrateUrl (site, dateFromStr, dateToStr))) #print str(hcScoreSites) if len(hcScoreSites) > noNa: OutputFile = open(OUTPUT_FILE_NAME, 'w') for site in hcScoreSites: if site.name != "unknown": OutputFile.write(str(site) + '\n') print "\n--HC Score output written to %s" % OUTPUT_FILE_NAME OutputFile.close() else: print "There's no data, I quit!"