def classifyRequests(url, requests, historic, noNameSites, requestType): for request in requests: name = request['request_name'] status = 'NoStatus' reqType = 'NoType' if 'type' in request.keys(): reqType = request['type'] if reqType != requestType: continue if 'status' in request.keys(): status = request['status'] if status in noNameSites.keys(): namefound = 0 for Site in historic.keys(): if name.find(Site) >= 0: namefound = 1 for stat in historic[Site].keys( ): #stat is the status of the request in the list of requests if status == stat: EffectiveLumi = getEffectiveLumiSections( url, name, requestType) if EffectiveLumi <= 0: EffectiveLumi = 0.0000001 TimeEvent = getTimeEventRequest(url, name) priority = getPriorityWorkflow(url, name) numevents = dbsTest.getInputEvents(url, name) checkLumi = False if float(numevents / EffectiveLumi) > 400: checkLumi = True else: checkLumi = False maxEvents = maxEventsFileDataset(url, name) historic[Site][stat].append( (name, priority, numevents, TimeEvent, EffectiveLumi, checkLumi, maxEvents)) if namefound == 0: for stat in noNameSites.keys(): if status == stat: EffectiveLumi = getEffectiveLumiSections( url, name, requestType) if EffectiveLumi <= 0: EffectiveLumi = 0.0000001 TimeEvent = getTimeEventRequest(url, name) priority = getPriorityWorkflow(url, name) numevents = dbsTest.getInputEvents(url, name) checkLumi = False if float(numevents / EffectiveLumi) > 400: checkLumi = True else: checkLumi = False maxEvents = maxEventsFileDataset(url, name) noNameSites[stat].append( (name, priority, numevents, TimeEvent, EffectiveLumi, checkLumi, maxEvents))
def classifyRequests(url, requests): print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' print '| Request |req Type |Status Req | Dataset |Status Dataset | Percentage|FilterEfficiency| ' print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' classifiedRequests = {} for request in requests: if 'type' in request: name = request['request_name'] if request['type'] == 'MonteCarloFromGEN' or request[ 'type'] == 'MonteCarlo': datasetWorkflow = phedexSubscription.outputdatasetsWorkflow( url, name) problem = False percentage = 0 if len(datasetWorkflow) < 1: continue dataset = datasetWorkflow[0] inputEvents = 0.0001 inputEvents = inputEvents + int( dbsTest.getInputEvents(url, name)) outputEvents = dbsTest.getEventCountDataSet(dataset) percentage = outputEvents / float(inputEvents) duplicate = duplicateLumi(dataset) problem = False if duplicate: problem = True if problem: FilterEfficiency = getFilterEfficiency(url, name) datasetStatus = getDatasetStatus(dataset) print '| %20s | %8s| %8s | %20s | %10s| %10s | %10s| ' % ( name, request['type'], request['status'], dataset, datasetStatus, str(percentage * 100), FilterEfficiency) print '---------------------------------------------------------------------------------------------------------------------------'
def getDbsProgress(options,wfs_dict,wfs_dict_skipped): print cya+"Getting progress from dbs..."+dfa url = "cmsweb.cern.ch" for wf in wfs_dict.keys(): wfs_dict[wf]['dbsProgress'] = [] try: outputDataSets = dbsTest.phedexSubscription.outputdatasetsWorkflow(url, wf) except: print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (red,wf,dfa) wfs_dict_skipped[wf] = wfs_dict[wf] del wfs_dict[wf] continue try: inputEvents = dbsTest.getInputEvents(url, wf) except: print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (red,wf,dfa) wfs_dict_skipped[wf] = wfs_dict[wf] del wfs_dict[wf] continue for dataset in outputDataSets: outputEvents = dbsTest.getEventCountDataSet(dataset) wfs_dict[wf]['dbsProgress'].append({"dataset":dataset,"progress":str(outputEvents/float(inputEvents)*100)}) if options.verbose: print cya+"Added dbs progress info to workflow dictionary."+dfa appendFile(log_cmst1,"== "+str(datetime.datetime.now())+" == progress queried from dbs ==\n") return wfs_dict,wfs_dict_skipped
def retrieveSchema(url, workflowName, user, group ): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() #print " retrieving original workflow...", helper.load(specURL) #print "done." schema = {} #for (key, value) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): for (key, value) in helper.data.request.schema.dictionary_().iteritems(): #print key, value if key == 'ProcConfigCacheID': schema['ConfigCacheID'] = value elif key=='RequestSizeEvents': schema['RequestSizeEvents'] = value #schema['RequestNumEvents'] = int(value) elif key=='Requestor': schema['Requestor']=user elif key=='Group': schema['Group']=group elif key=='RequestNumEvents': schema['RequestNumEvents']=getFinalRequestedNumEvents(url, workflowName) elif key=='FirstLumi': schema['FirstLumi']=getMaxLumi(url, workflowName)*2 elif key=='FirstEvent': schema['FirstEvent']=dbsTest.getInputEvents(url, workflowName)*2 elif key=='RequestString': schema['RequestString']='ACDC_'+value elif value != None: schema[key] = value return schema
def classifyRequests(url, requests): print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' print '| Request |req Type |Status Req | Dataset |Status Dataset | Percentage|FilterEfficiency| ' print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' classifiedRequests={} for request in requests: if 'type' in request: name=request['request_name'] if request['type']=='MonteCarloFromGEN' or request['type']=='MonteCarlo': datasetWorkflow=phedexSubscription.outputdatasetsWorkflow(url, name) problem=False percentage=0 if len(datasetWorkflow)<1: continue dataset=datasetWorkflow[0] inputEvents=0.0001 inputEvents=inputEvents+int(dbsTest.getInputEvents(url, name)) outputEvents=dbsTest.getEventCountDataSet(dataset) percentage=outputEvents/float(inputEvents) duplicate=duplicateLumi(dataset) problem=False if duplicate: problem=True if problem: FilterEfficiency=getFilterEfficiency(url, name) datasetStatus=getDatasetStatus(dataset) print '| %20s | %8s| %8s | %20s | %10s| %10s | %10s| ' % (name, request['type'], request['status'], dataset,datasetStatus, str(percentage*100), FilterEfficiency) print '---------------------------------------------------------------------------------------------------------------------------'
def main(): url = 'cmsweb.cern.ch' args = sys.argv[1:] if not len(args) == 1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile = args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets = phedexSubscription.outputdatasetsWorkflow( url, workflow) inputEvents = dbsTest.getInputEvents(url, workflow) closeOut = True for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents = dbsTest.getOutputEvents(url, workflow, dataset) print "dataset = " + dataset print "inputEvents = " + str(inputEvents) print "outputEvents = " + str(outputEvents) if inputEvents != 0: if (outputEvents / float(inputEvents) * 100) >= 100.0: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" else: #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) #if status == 'completed': # closeOutTaskChainWorkflows(url, workflow) #else: # pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) f.close print '-------------------------------------------------------------------------------------------------------------------------------------' sys.exit(0)
def classifyRequests(url, requests, historic, noNameSites, requestType): for request in requests: name=request['request_name'] status='NoStatus' reqType='NoType' if 'type' in request.keys(): reqType=request['type'] if reqType!=requestType: continue if 'status' in request.keys(): status=request['status'] if status in noNameSites.keys(): namefound=0 for Site in historic.keys(): if name.find(Site)>=0: namefound=1 for stat in historic[Site].keys():#stat is the status of the request in the list of requests if status==stat: EffectiveLumi=getEffectiveLumiSections(url, name, requestType) if EffectiveLumi <=0: EffectiveLumi=0.0000001 TimeEvent=getTimeEventRequest(url, name) priority=getPriorityWorkflow(url, name) numevents=dbsTest.getInputEvents(url, name) checkLumi=False if float(numevents/EffectiveLumi)>400: checkLumi=True else: checkLumi=False maxEvents=maxEventsFileDataset(url, name) historic[Site][stat].append((name,priority,numevents, TimeEvent,EffectiveLumi, checkLumi, maxEvents)) if namefound==0: for stat in noNameSites.keys(): if status==stat: EffectiveLumi=getEffectiveLumiSections(url, name, requestType) if EffectiveLumi <=0: EffectiveLumi=0.0000001 TimeEvent=getTimeEventRequest(url, name) priority=getPriorityWorkflow(url, name) numevents=dbsTest.getInputEvents(url, name) checkLumi=False if float(numevents/EffectiveLumi)>400: checkLumi=True else: checkLumi=False maxEvents=maxEventsFileDataset(url, name) noNameSites[stat].append((name,priority,numevents, TimeEvent,EffectiveLumi, checkLumi, maxEvents))
def main(): url='cmsweb.cern.ch' args=sys.argv[1:] if not len(args)==1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile=args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) inputEvents=dbsTest.getInputEvents(url, workflow) closeOut = True for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents=dbsTest.getOutputEvents(url, workflow, dataset) print "dataset = " + dataset print "inputEvents = " + str(inputEvents) print "outputEvents = " + str(outputEvents) if inputEvents!=0: if (outputEvents/float(inputEvents)*100) >= 100.0: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" else: #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) #if status == 'completed': # closeOutTaskChainWorkflows(url, workflow) #else: # pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) f.close print '-------------------------------------------------------------------------------------------------------------------------------------' sys.exit(0)
def PercentageCompletion(url, workflow, dataset): inputEvents = 0 inputEvents = inputEvents + int(dbsTest.getInputEvents(url, workflow)) outputEvents = dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents == 0: return 0 percentage = outputEvents / float(inputEvents) return percentage
def PercentageCompletion(url, workflow, dataset): inputEvents=0 inputEvents=inputEvents+int(dbsTest.getInputEvents(url, workflow)) outputEvents=dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents==0: return 0 percentage=outputEvents/float(inputEvents) return percentage
def testEventCountWorkflow(url, workflow): inputEvents=0 inputEvents=inputEvents+dbsTest.getInputEvents(url, workflow) datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) for dataset in datasets: outputEvents=dbsTest.getEventCountDataSet(dataset) percentage=outputEvents/float(inputEvents) if float(percentage)>float(1): print "Workflow: " + workflow+" duplicate events in outputdataset: "+dataset +" percentage: "+str(outputEvents/float(inputEvents)*100) +"%" return 1
def testEventCountWorkflow(url, workflow): inputEvents = 0 inputEvents = inputEvents + dbsTest.getInputEvents(url, workflow) datasets = phedexSubscription.outputdatasetsWorkflow(url, workflow) for dataset in datasets: outputEvents = dbsTest.getEventCountDataSet(dataset) percentage = outputEvents / float(inputEvents) if float(percentage) > float(1): print "Workflow: " + workflow + " duplicate events in outputdataset: " + dataset + " percentage: " + str( outputEvents / float(inputEvents) * 100) + "%" return 1
def getDbsProgress(options, wfs_dict, wfs_dict_skipped): print cya + "Getting progress from dbs..." + dfa url = "cmsweb.cern.ch" for wf in wfs_dict.keys(): wfs_dict[wf]['dbsProgress'] = [] try: outputDataSets = dbsTest.phedexSubscription.outputdatasetsWorkflow( url, wf) except: print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % ( red, wf, dfa) wfs_dict_skipped[wf] = wfs_dict[wf] del wfs_dict[wf] continue try: inputEvents = dbsTest.getInputEvents(url, wf) except: print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % ( red, wf, dfa) wfs_dict_skipped[wf] = wfs_dict[wf] del wfs_dict[wf] continue for dataset in outputDataSets: outputEvents = dbsTest.getEventCountDataSet(dataset) wfs_dict[wf]['dbsProgress'].append({ "dataset": dataset, "progress": str(outputEvents / float(inputEvents) * 100) }) if options.verbose: print cya + "Added dbs progress info to workflow dictionary." + dfa appendFile( log_cmst1, "== " + str(datetime.datetime.now()) + " == progress queried from dbs ==\n") return wfs_dict, wfs_dict_skipped
def main(): url='cmsweb.cern.ch' args=sys.argv[1:] if not len(args)==1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile=args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] tooManyEvents = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) inputEvents=dbsTest.getInputEvents(url, workflow) closeOut = True tooMany = False for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents=dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents!=0: if outputEvents == inputEvents: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" elif outputEvents < inputEvents : #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False elif outputEvents > inputEvents : closeOut = False tooMany = True break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) if tooMany: tooManyEvents.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) if status == 'completed': closeOutTaskChainWorkflows(url, workflow) else: pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) print '-------------------------------------------------------------------------------------------------------------------------------------' for workflow in tooManyEvents: outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) inputEvents=dbsTest.getInputEvents(url, workflow) for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents=dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents!=0: if outputEvents > inputEvents: print "WARNING about workflow " + workflow + ": The dataset " + dataset + " contains MORE events than expected. " + str(inputEvents) + " events were expected and " + str(outputEvents) + " were found." f.close sys.exit(0)
def getFinalRequestedNumEvents(url, workflow): outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow) obtainedEvents=dbsTest.getOutputEvents(url, workflow, outputDataSets[0]) requestedEvents=dbsTest.getInputEvents(url, workflow) return (requestedEvents-obtainedEvents)
def main(): url = 'cmsweb.cern.ch' args = sys.argv[1:] if not len(args) == 1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile = args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] tooManyEvents = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets = phedexSubscription.outputdatasetsWorkflow( url, workflow) inputEvents = dbsTest.getInputEvents(url, workflow) closeOut = True tooMany = False for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents = dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents != 0: if outputEvents == inputEvents: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" elif outputEvents < inputEvents: #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False elif outputEvents > inputEvents: closeOut = False tooMany = True break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) if tooMany: tooManyEvents.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) if status == 'completed': closeOutTaskChainWorkflows(url, workflow) else: pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) print '-------------------------------------------------------------------------------------------------------------------------------------' for workflow in tooManyEvents: outputDatasets = phedexSubscription.outputdatasetsWorkflow( url, workflow) inputEvents = dbsTest.getInputEvents(url, workflow) for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents = dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents != 0: if outputEvents > inputEvents: print "WARNING about workflow " + workflow + ": The dataset " + dataset + " contains MORE events than expected. " + str( inputEvents) + " events were expected and " + str( outputEvents) + " were found." f.close sys.exit(0)