def main(): usage = "usage: %prog [options] workflow" parser = OptionParser(usage=usage) parser.add_option("-f","--file", dest="fileName", default=None, help="Input file") parser.add_option("-v","--verbose",action="store_true", dest="verbose", default=False, help="Show detailed info") parser.add_option("--test",action="store_true", dest="test", default=False, help="Only test and console output (doesn't make the actual calls)") (options, args) = parser.parse_args() if len(args) != 1 and options.fileName is None: parser.error("Provide the workflow name or a file") sys.exit(1) if options.fileName is None: workflows = [args[0]] else: workflows = [l.strip() for l in open(options.fileName) if l.strip()] datasets = [] i = 0 print "Getting output from workflows" for wf in workflows: if options.verbose: print wf try: ds = rqmgr.outputdatasetsWorkflow(url, wf) datasets += ds except: print wf, "skipped" reqs = makeDeletionRequests(url, datasets, options.verbose, options.test) print "Deletion request made:" print '\n'.join(reqs)
def closeOutRedigiWorkflows(url, workflows): """ Closes out a list of redigi workflows """ noSiteWorkflows = [] for workflow in workflows: closeOutWorkflow = True inputDataset = reqMgrClient.getInputDataSet(url, workflow) datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) for dataset in datasets: closeOutDataset = False percentage = percentageCompletion(url, workflow, dataset) phedexSubscription = phedexClient.hasCustodialSubscription(dataset) duplicate = None # if dataset has subscription and more than 95% events we check # duplicates if phedexSubscription and percentage >= float(0.95): duplicate = dbs3Client.duplicateRunLumi(dataset) #if not duplicate events, dataset is ready if not duplicate: closeOutDataset = True else: closeOutDataset = False #validate when percentage is ok but has not phedex subscription if percentage >= float(0.95) and not phedexSubscription: noSiteWorkflows.append(workflow) #if at least one dataset is not ready wf cannot be closed out closeOutWorkflow = closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s|%5s| ' % (workflow, dataset,str(int(percentage*100)), str(phedexSubscription), 100, duplicate, closeOutDataset) #workflow can only be closed out if all datasets are ready if closeOutWorkflow: reqMgrClient.closeOutWorkflowCascade(url, workflow) print '-'*180 return noSiteWorkflows
def main(): url='cmsweb.cern.ch' #Create option parser usage = "\n python %prog [-f FILE_NAME | WORKFLOW_NAME ...]\n" parser = OptionParser(usage=usage) parser.add_option('-f', '--file', help='Text file with a list of workflows', dest='file') parser.add_option('-i', '--invalidate', action='store_true', default=False, help='Also invalidate output datasets on DBS', dest='invalidate') (options, args) = parser.parse_args() if options.file: wfs = [l.strip() for l in open(options.file) if l.strip()] elif args: wfs = args else: parser.error("Provide the workflow of a file of workflows") sys.exit(1) for wf in wfs: print "Rejecting workflow: " + wf reqMgrClient.rejectWorkflow(url, wf) print "Rejected" if options.invalidate: print "Invalidating datasets" datasets = reqMgrClient.outputdatasetsWorkflow(url, wf) for ds in datasets: print ds dbs3.setDatasetStatus(ds, 'INVALID', files=True)
def classifyCompletedRequests(url, requests): """ Sorts completed requests using the type. returns a dic cointaining a list for each type of workflows. """ workflows={'ReDigi':[],'MonteCarloFromGEN':[],'MonteCarlo':[] , 'ReReco':[], 'LHEStepZero':[]} for request in requests: name=request['id'] #if a wrong or weird name if len(request['key'])<3: print request continue status=request['key'][1] #only completed requests if status=='completed': requestType=request['key'][2] #sort by type if requestType=='MonteCarlo': #MonteCarlo's which datasets end with /GEN #are Step0 datasets = reqMgrClient.outputdatasetsWorkflow(url, name) m = re.search('.*/GEN$', datasets[0]) if m: workflows['LHEStepZero'].append(name) else: workflows[requestType].append(name) elif requestType in ['MonteCarloFromGEN', 'LHEStepZero', 'ReDigi', 'ReReco']: workflows[requestType].append(name) return workflows
def main(): """ Read the text file, for each workflow try: First abort it, then clone it. """ args=sys.argv[1:] if not len(args)==3: print "usage:abortAndClone file.txt user group" sys.exit(0) filename = args[0] user = args[1] group = args[2] #reading workflow list workflows = [wf.strip() for wf in open(filename).readlines() if wf.strip()] for workflow in workflows: #abort workflow print "Aborting workflow: " + workflow reqMgrClient.abortWorkflow(url, workflow) print "Aborted. Now cloning workflow..." #invalidates datasets print "Invalidating datasets" datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) for dataset in datasets: print dataset dbs3.setStatusDBS3(dbs3_url, dataset, 'INVALID', None) #clone workflow clone = resubmit.cloneWorkflow(workflow, user, group) print "Cloned workflow: ", clone sys.exit(0);
def getMissingEvents(workflow): """ Gets the missing events for the workflow """ inputEvents = reqMgrClient.getInputEvents(url, workflow) dataset = reqMgrClient.outputdatasetsWorkflow(url, workflow).pop() outputEvents = reqMgrClient.getOutputEvents(url, workflow, dataset) return int(inputEvents) - int(outputEvents)
def main(): """ Read the text file, for each workflow try: First abort it, then clone it. """ usage = "\n python %prog [options] [WORKFLOW_NAME] [USER GROUP]\n"\ "WORKFLOW_NAME: if the list file is provided this should be empty\n"\ "USER: the user for creating the clone, if empty it will\n"\ " use the OS user running the script\n"\ "GROUP: the group for creating the clone, if empty it will\n"\ " use 'DATAOPS' by default" parser = OptionParser(usage=usage) parser.add_option('-f', '--file', help='Text file of workflows to Reject and Clone', dest='file') (options, args) = parser.parse_args() # Check the arguments, get info from them if options.file: wfs = [l.strip() for l in open(options.file) if l.strip()] if len(args) == 2: user = args[0] group = args[1] elif len(args) == 0: #get os username by default uinfo = pwd.getpwuid(os.getuid()) user = uinfo.pw_name #group by default DATAOPS group = 'DATAOPS' else: if len(args) == 3: user = args[1] group = args[2] elif len(args) == 1: #get os username by default uinfo = pwd.getpwuid(os.getuid()) user = uinfo.pw_name #group by default DATAOPS group = 'DATAOPS' else: parser.error("Provide the workflow of a file of workflows") sys.exit(1) #name of workflow wfs = [args[0]] for wf in wfs: #abort workflow print "Rejecting workflow: " + wf reqMgrClient.rejectWorkflow(url, wf) #invalidates datasets print "Invalidating datasets" datasets = reqMgrClient.outputdatasetsWorkflow(url, wf) for ds in datasets: print ds dbs3.setDatasetStatus(ds, 'INVALID', files=True) #clone workflow clone = resubmit.cloneWorkflow(wf, user, group) sys.exit(0);
def main(): """ Read the text file, for each workflow try: First abort it, then clone it. """ usage = "\n python %prog [options] [WORKFLOW_NAME] [USER GROUP]\n"\ "WORKFLOW_NAME: if the list file is provided this should be empty\n"\ "USER: the user for creating the clone, if empty it will\n"\ " use the OS user running the script\n"\ "GROUP: the group for creating the clone, if empty it will\n"\ " use 'DATAOPS' by default" parser = OptionParser(usage=usage) parser.add_option('-f', '--file', help='Text file of workflows to Abort and Clone', dest='file') (options, args) = parser.parse_args() # Check the arguments, get info from them if options.file: wfs = [l.strip() for l in open(options.file) if l.strip()] if len(args) == 2: user = args[0] group = args[1] elif len(args) == 0: #get os username by default uinfo = pwd.getpwuid(os.getuid()) user = uinfo.pw_name #group by default DATAOPS group = 'DATAOPS' else: if len(args) == 3: user = args[1] group = args[2] elif len(args) == 1: #get os username by default uinfo = pwd.getpwuid(os.getuid()) user = uinfo.pw_name #group by default DATAOPS group = 'DATAOPS' else: parser.error("Provide the workflow of a file of workflows") sys.exit(1) #name of workflow wfs = [args[0]] for wf in wfs: #abort workflow print "Aborting workflow: " + wf reqMgrClient.abortWorkflow(url, wf) #invalidates datasets print "Invalidating datasets" datasets = reqMgrClient.outputdatasetsWorkflow(url, wf) for ds in datasets: print ds dbs3.setDatasetStatus(ds, 'INVALID', files=True) #clone workflow clone = resubmit.cloneWorkflow(wf, user, group) sys.exit(0);
def main(): usage = "\n python %prog [options] [WORKFLOW_NAME]\n" \ "WORKFLOW_NAME: if the list file is provided this should be empty\n" parser = OptionParser(usage=usage) parser.add_option('-f', '--file', help='Text file of workflows to Reject and Clone', dest='file') parser.add_option('-c', '--clone', help='Are the workflows going to be cloned? The default value is False',action="store_true", dest='clone', default=False) parser.add_option('-i', '--invalidate', help='Invalidate datasets? The default value is False',action="store_true", dest='invalidate', default=False) parser.add_option("-u", "--user", dest="user",help="The user for creating the clone, if empty it will use the OS user running the script") parser.add_option("-g", "--group", dest="group", default='DATAOPS',help="The group for creating the clone, if empty it will, use 'DATAOPS' by default") (options, args) = parser.parse_args() # Check the arguments, get info from them if options.file: try: workflows = [l.strip() for l in open(options.file) if l.strip()] except: parser.error("Provide a valid file of workflows") sys.exit(1) elif len(args) >0: # name of workflow workflows = [args[0]] else: parser.error("Provide the workflow of a file of workflows") sys.exit(1) if not options.user: # get os username by default uinfo = pwd.getpwuid(os.getuid()) user = uinfo.pw_name else: user = options.user for workflow in workflows: try: workflowInfo = reqMgrClient.Workflow(workflow) except: print("The workflow name: "+ workflow+" is not valid.") continue # invalidates workflow print("Invalidating the workflow: "+ workflow) reqMgrClient.invalidateWorkflow(url,workflow,workflowInfo.status) # invalidates datasets if options.invalidate: print("Invalidating datasets") datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) for dataset in datasets: print(dataset) dbs3.setDatasetStatus(dataset, 'INVALID', files=True) # clones workflow if options.clone: print("Cloning workflow: "+ workflow) cloned = resubmit.cloneWorkflow(workflow, user, options.group) sys.exit(0);
def classifyCompletedRequests(url, requests): """ Sorts completed requests using the type. returns a dic cointaining a list for each type of workflows. """ workflows={'ReDigi':[],'MonteCarloFromGEN':[],'MonteCarlo':[] , 'ReReco':[], 'LHEStepZero':[], 'StoreResults':[], 'TaskChain':[]} for request in requests: name=request['id'] #if a wrong or weird name if len(request['key'])<3: print request continue #discard RelVals if 'RVCMSSW' in name: continue status=request['key'][1] #only completed requests if status=='completed': requestType=request['key'][2] #sort by type if requestType=='MonteCarlo': #MonteCarlo's which datasets end with /GEN #are Step0 try: datasets = reqMgrClient.outputdatasetsWorkflow(url, name) m = re.search('.*/GEN$', datasets[0]) if m: workflows['LHEStepZero'].append(name) else: workflows[requestType].append(name) #TODO identify MonteCarlo with two output except Exception as e: print "Error on wf", name continue elif requestType=='TaskChain': #only taskchains with MC or ReDigi subType subType = reqMgrClient.getWorkflowSubType(url, name) if subType in ['MC','ReDigi']: workflows[requestType].append(name) elif requestType in ['MonteCarloFromGEN', 'LHEStepZero', 'ReDigi', 'ReReco', 'StoreResults']: workflows[requestType].append(name) return workflows
def extendWorkflow(workflow, user, group, verbose=False, events=None, firstlumi=None): if events is None: events = getMissingEvents(workflow) events = int(events) if firstlumi is None: #get the last lumi of the dataset dataset = reqMgrClient.outputdatasetsWorkflow(url, workflow).pop() lastLumi = dbs3Client.getMaxLumi(dataset) firstlumi = lastLumi firstlumi = int(firstlumi) # Get info about the workflow to be cloned cache = reqMgrClient.getWorkflowInfo(url, workflow) schema = modifySchema(cache, workflow, user, group, events, firstlumi, None) if verbose: pprint(schema) print 'Submitting workflow' # Submit cloned workflow to ReqMgr response = reqMgrClient.submitWorkflow(url, schema) if verbose: print "RESPONSE", response #find the workflow name in response m = re.search("details\/(.*)\'", response) if m: newWorkflow = m.group(1) print 'Cloned workflow: ' + newWorkflow print 'Extended with', events, 'events' print response # Move the request to Assignment-approved print 'Approve request response:' data = reqMgrClient.setWorkflowApproved(url, newWorkflow) print data else: print response
def closeOutStep0Requests(url, workflows): """ Closes either montecarlo step0 requests """ noSiteWorkflows = [] for workflow in workflows: datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) status = reqMgrClient.getWorkflowStatus(url, workflow) #if not completed skip if status != 'completed': continue closeOutWorkflow = True #skip montecarlos on a special queue if reqMgrClient.getRequestTeam(url, workflow) == 'analysis': continue for dataset in datasets: closeOutDataset = False percentage = percentageCompletion(url, workflow, dataset) phedexSubscription = phedexClient.getCustodialMoveSubscriptionSite(dataset) transPerc = 0 closedBlocks = None duplicate = None correctLumis = None # if dataset has subscription and enough events we check # duplicates, transfer percentage, closed blocks and lumis if phedexSubscription and percentage >= float(0.95): transPerc = phedexClient.getTransferPercentage(url, dataset, phedexSubscription) duplicate = dbs3Client.duplicateLumi(dataset) correctLumis = checkCorrectLumisEventGEN(dataset) #TODO validate closed blocks if not duplicate and correctLumis: closeOutDataset = True else: closeOutDataset = False #validate when percentage is ok but has not phedex subscription if percentage >= float(0.95) and not phedexSubscription: noSiteWorkflows.append(workflow) #if at least one dataset is not ready wf cannot be closed out closeOutWorkflow = closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s| %5s| ' % (workflow, dataset,str(int(percentage*100)), str(phedexSubscription), str(correctLumis), duplicate, closeOutDataset) #workflow can only be closed out if all datasets are ready if closeOutWorkflow: reqMgrClient.closeOutWorkflowCascade(url, workflow) print '-'*180 return noSiteWorkflows
def closeOutMonterCarloRequests(url, workflows): """ Closes either montecarlo or montecarlo from gen workflows """ noSiteWorkflows = [] for workflow in workflows: datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) closeOutWorkflow = True #skip montecarlos on a special queue if reqMgrClient.getRequestTeam(url, workflow) == 'analysis': continue for dataset in datasets: closePercentage = 0.95 # validation for SMS montecarlos if 'SMS' in dataset: closePercentage= 1.00 percentage = percentageCompletion(url, workflow, dataset) phedexSubscription = phedexClient.getCustodialMoveSubscriptionSite(dataset) transPerc = 0 closedBlocks = None duplicate = None # if dataset has subscription and enough events we check # duplicates, transfer percentage and closed blocks if phedexSubscription and percentage >= float(closePercentage): transPerc = phedexClient.getTransferPercentage(url, dataset, phedexSubscription) duplicate = dbs3Client.duplicateLumi(dataset) if not duplicate: closeOutDataset = True else: closeOutDataset = False else: closeOutDataset = False #validate when percentage is ok but has not phedex subscription if percentage >= float(closePercentage) and not phedexSubscription: noSiteWorkflows.append(workflow) #if at least one dataset is not ready wf cannot be closed out closeOutWorkflow = closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s| %5s|' % (workflow, dataset,str(int(percentage*100)), str(phedexSubscription), str(int(transPerc*100)), duplicate, closeOutDataset) #workflow can only be closed out if all datasets are ready if closeOutWorkflow: reqMgrClient.closeOutWorkflowCascade(url, workflow) #separation line print '-'*180 return noSiteWorkflows
def getDatasetVersion(url, workflow, era, procstring): versionNum = 1 outputs = reqMgrClient.outputdatasetsWorkflow(url, workflow) for output in outputs: bits = output.split('/') outputCheck = '/'+bits[1]+'/'+era+'-'+procstring+'*/'+bits[len(bits)-1] datasets = getDatasets(outputCheck) for dataset in datasets: datasetName = dataset['dataset'] matchObj = re.match(r".*-v(\d+)/.*", datasetName) if matchObj: currentVersionNum = int(matchObj.group(1)) if versionNum <= currentVersionNum: versionNum=versionNum+1 return versionNum
def main(): args=sys.argv[1:] if not len(args)==1: print "usage:WorkflowPercentage.py workflowname" sys.exit(0) workflow=args[0] url='cmsweb.cern.ch' #retrieve the output datasets outputDataSets=reqMgrClient.outputdatasetsWorkflow(url, workflow) for dataset in outputDataSets: perc = percentageCompletion(url, workflow, dataset, verbose=True) print dataset,"match:",perc,"%" sys.exit(0);
def main(): usage = "usage: %prog [options] workflow" parser = OptionParser(usage=usage) parser.add_option("-f", "--file", dest="fileName", default=None, help="Input file") parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Show detailed info") parser.add_option( "--test", action="store_true", dest="test", default=False, help="Only test and console output (doesn't make the actual calls)") (options, args) = parser.parse_args() if len(args) != 1 and options.fileName is None: parser.error("Provide the workflow name or a file") sys.exit(1) if options.fileName is None: workflows = [args[0]] else: workflows = [l.strip() for l in open(options.fileName) if l.strip()] datasets = [] i = 0 print "Getting output from workflows" for wf in workflows: if options.verbose: print wf try: ds = rqmgr.outputdatasetsWorkflow(url, wf) datasets += ds except: print wf, "skipped" reqs = makeDeletionRequests(url, datasets, options.verbose, options.test) print "Deletion request made:" print '\n'.join(reqs)
def duplicateLumisWorkflow(url, workflow, verbose=False): """ Shows where the workflow hs duplicate events """ datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) duplicate = False print 'workflow:',workflow #check e for dataset in datasets: print 'dataset :', dataset #if dbs3Client.duplicateLumi(dataset, verbose): if dbs3Client.duplicateRunLumi(dataset, verbose, skipInvalid=True): duplicate = True #fast check, one dataset duplicated if not verbose: print 'Has duplicated lumis' return True if not duplicate: print "No duplicate found" return duplicate
def duplicateLumisWorkflow(url, workflow, verbose=False): """ Shows where the workflow hs duplicate events """ datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) duplicate = False print 'workflow:', workflow #check e for dataset in datasets: print 'dataset :', dataset #if dbs3Client.duplicateLumi(dataset, verbose): if dbs3Client.duplicateRunLumi(dataset, verbose, skipInvalid=True): duplicate = True #fast check, one dataset duplicated if not verbose: print 'Has duplicated lumis' return True if not duplicate: print "No duplicate found" return duplicate
def extendWorkflow(workflow, user, group, verbose=False, events=None, firstlumi=None): if events is None: events = getMissingEvents(workflow) events = int(events) if firstlumi is None: #get the last lumi of the dataset dataset = reqMgrClient.outputdatasetsWorkflow(url, workflow).pop() lastLumi = dbs3Client.getMaxLumi(dataset) firstlumi = lastLumi firstlumi = int(firstlumi) # Get info about the workflow to be cloned helper = reqMgrClient.retrieveSchema(workflow) schema = modifySchema(helper, workflow, user, group, events, firstlumi) schema['OriginalRequestName'] = workflow if verbose: pprint(schema) print 'Submitting workflow' # Sumbit cloned workflow to ReqMgr response = reqMgrClient.submitWorkflow(url,schema) if verbose: print "RESPONSE", response #find the workflow name in response m = re.search("details\/(.*)\'",response) if m: newWorkflow = m.group(1) print 'Cloned workflow: '+newWorkflow print 'Extended with', events, 'events' print response # Move the request to Assignment-approved print 'Approve request response:' data = reqMgrClient.setWorkflowApproved(url, newWorkflow) print data else: print response pass
def closeOutReRecoWorkflows(url, workflows): """ Closeout ReReco workflows """ noSiteWorkflows = [] for workflow in workflows: if 'RelVal' in workflow: continue if 'TEST' in workflow: continue datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) inputDataset = reqMgrClient.getInputDataSet(url, workflow) closeOutWorkflow = True #check if dataset is ready for dataset in datasets: duplicate = False closeOutDataset = True percentage = percentageCompletion(url, workflow, dataset) phedexSubscription = phedexClient.hasCustodialSubscription(dataset) closeOutDataset = False #dataset can be closed out only with 100% of events if percentage == 1 and phedexSubscription and not duplicate: closeOutDataset = True else: closeOutDataset = False #validate when percentage is ok but has not phedex subscription if percentage == 1 and not phedexSubscription: noSiteWorkflows.append(workflow) #if at least one dataset is not ready wf cannot be closed out closeOutWorkflow = closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s|%5s| ' % (workflow, dataset,str(int(percentage*100)), str(phedexSubscription), 100, duplicate, closeOutDataset) #workflow can only be closed out if all datasets are ready if closeOutWorkflow: reqMgrClient.closeOutWorkflowCascade(url, workflow) print '-'*180 return noSiteWorkflows
def main(): url = 'cmsweb.cern.ch' #Create option parser usage = "\n python %prog [-f FILE_NAME | WORKFLOW_NAME ...]\n" parser = OptionParser(usage=usage) parser.add_option('-f', '--file', help='Text file with a list of workflows', dest='file') parser.add_option('-i', '--invalidate', action='store_true', default=False, help='Also invalidate output datasets on DBS', dest='invalidate') (options, args) = parser.parse_args() if options.file: wfs = [l.strip() for l in open(options.file) if l.strip()] elif args: wfs = args else: parser.error("Provide the workflow of a file of workflows") sys.exit(1) for wf in wfs: print "Aborting workflow: " + wf reqMgrClient.abortWorkflow(url, wf) print "Aborted" if options.invalidate: print "Invalidating datasets" datasets = reqMgrClient.outputdatasetsWorkflow(url, wf) for ds in datasets: print ds dbs3.setDatasetStatus(ds, 'INVALID', files=True) sys.exit(0)
def modifySchema(helper, workflow, user, group, events): """ Adapts schema to right parameters """ result = {} # Add AcquisitionEra, ProcessingString and ProcessingVersion result["ProcessingString"] = helper.getProcessingString() result["ProcessingVersion"] = helper.getProcessingVersion() result["AcquisitionEra"] = helper.getAcquisitionEra() for key, value in helper.data.request.schema.dictionary_().items(): #previous versions of tags if key == 'ProcConfigCacheID': result['ConfigCacheID'] = value elif key == 'RequestSizeEvents': result['RequestSizeEvents'] = value #requestor info elif key == 'Requestor': result['Requestor'] = user elif key == 'Group': result['Group'] = group #preppend EXT to recognize as an extension elif key == 'RequestString': result['RequestString'] = 'EXT_'+str(value) #if emtpy elif key in ["RunWhitelist", "RunBlacklist", "BlockWhitelist", "BlockBlacklist"] and not value: result[key]=[] #skip empty entries elif not value: continue elif value != None: result[key] = value #extend workflow so it will safely start outside of the boundary RequestNumEvents = int(result['RequestNumEvents']) FirstEvent = int(result['FirstEvent']) FirstLumi = int(result['FirstLumi']) EventsPerLumi = int(result['EventsPerLumi']) FilterEfficiency = float(result['FilterEfficiency']) #FirstEvent_NEW > FirstEvent + RequestNumEvents #the fist event needs to be oustide the range result['FirstEvent'] = FirstEvent + RequestNumEvents + DELTA_EVENTS #FirstLumi_NEW > FirstLumi + RequestNumEvents/events_per_job/filterEff # same for the first lumi, needs to be after the last lumi """ result['FirstLumi'] = int(FirstLumi + math.ceil( RequestNumEvents / float(EventsPerLumi) / FilterEfficiency ) + DELTA_LUMIS / FilterEfficiency ) """ #get the last lumi of the dataset dataset = reqMgrClient.outputdatasetsWorkflow(url, workflow).pop() LastLumi = dbs3Client.getMaxLumi(dataset) result['FirstLumi'] = LastLumi + DELTA_LUMIS #only the desired events result['RequestNumEvents'] = events if 'LumisPerJob' not in result and result['RequestType']=='MonteCarlo': #seek for lumis per job on helper splitting = helper.listJobSplittingParametersByTask() lumisPerJob = 300 for k, v in splitting.items(): if k.endswith('/Production'): if 'lumis_per_job' in v: lumisPerJob = v['lumis_per_job'] result['LumisPerJob'] = lumisPerJob #TODO do this always? if 'EventsPerJob' not in result and result['RequestType']=='MonteCarlo': #seek for events per job on helper splitting = helper.listJobSplittingParametersByTask() eventsPerJob = 120000 for k, v in splitting.items(): if k.endswith('/Production'): if 'events_per_job' in v: eventsPerJob = v['events_per_job'] result['EventsPerJob'] = eventsPerJob if 'MergedLFNBase' not in result: result['MergedLFNBase'] = helper.getMergedLFNBase() return result
def main(): usage = ( "\n python %prog [options] [WORKFLOW_NAME]\n" "WORKFLOW_NAME: if the list file is provided this should be empty\n" ) parser = OptionParser(usage=usage) parser.add_option("-f", "--file", help="Text file of workflows to Reject and Clone", dest="file") parser.add_option( "-c", "--clone", help="Are the workflows going to be cloned? The default value is False", action="store_true", dest="clone", default=False, ) parser.add_option( "-i", "--invalidate", help="Invalidate datasets? The default value is False", action="store_true", dest="invalidate", default=False, ) parser.add_option( "-u", "--user", dest="user", help="The user for creating the clone, if empty it will use the OS user running the script", ) parser.add_option( "-g", "--group", dest="group", default="DATAOPS", help="The group for creating the clone, if empty it will, use 'DATAOPS' by default", ) parser.add_option( "-m", "--memory", dest="memory", help="Set max memory for the clone. At assignment, this will be used to calculate maxRSS = memory*1024", ) (options, args) = parser.parse_args() # Check the arguments, get info from them if options.file: try: workflows = [l.strip() for l in open(options.file) if l.strip()] except: parser.error("Provide a valid file of workflows") sys.exit(1) elif len(args) > 0: # name of workflow workflows = [args[0]] else: parser.error("Provide the workflow of a file of workflows") sys.exit(1) if not options.user: # get os username by default uinfo = pwd.getpwuid(os.getuid()) user = uinfo.pw_name else: user = options.user for workflow in workflows: try: workflowInfo = reqMgrClient.Workflow(workflow) except: print("The workflow name: " + workflow + " is not valid.") continue # invalidates workflow print("Invalidating the workflow: " + workflow) reqMgrClient.invalidateWorkflow(url, workflow, workflowInfo.status) # invalidates datasets if options.invalidate: print("Invalidating datasets") datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) for dataset in datasets: print(dataset) dbs3.setDatasetStatus(dataset, "INVALID", files=True) # clones workflow if options.clone: print("Cloning workflow: " + workflow) if options.memory: mem = float(options.memory) else: mem = workflowInfo.info["Memory"] cloned = resubmit.cloneWorkflow(workflow, user, options.group, memory=mem) sys.exit(0)
def main(): usage = "python %prog [OPTIONS]" parser = OptionParser(usage) parser.add_option("-a", "--doall",dest="doall", action="store_true" , default=False, help="It will analyze all datasets of the workflow from the beginning. If this option is true,"\ " you should provide a workflow name or a list of them in the --file option.") parser.add_option("-f", "--file",dest="file", help="Input file with the contents of duplicateEvents.py (a list of lumis and files)."\ " If you are using the --doall option, it should contain a list of workflows instead") options, args = parser.parse_args() workflows = None #if we not doing all, input should be treated as list of lumis an files if not options.doall and options.file: lines = [l.strip() for l in open(options.file)] graphs = buildGraphs(lines) # if do all and input file elif options.doall and options.file: workflows = [l.strip() for l in open(options.file)] elif options.doall and not options.file: workflows = args else: parser.error("You should provide an input file with the output of duplicateEvents") # get the output datasets of the workflos and create the graph if workflows: datasets = [] for wf in workflows: datasets += reqMgrClient.outputdatasetsWorkflow(url, wf); graphs = {} #analyze each dataset for dataset in datasets: dup, lumis = dbs.duplicateRunLumi(dataset, verbose="dict", skipInvalid=True) #print lumis graphs[dataset] = buildGraph(lumis) for dataset, graph in graphs.items(): #look for datasetname print "Getting events per file" events = getFileEvents(dataset, graph.keys()) try: #first algorithm that assumes bipartition files = colorBipartiteGraph(graph, events) except Exception as e: #second, algorithm #files = deleteMaxDegreeFirst(graph, events) files = deleteSmallestVertexFirst(graph, events) total = dbs.getEventCountDataSet(dataset) invalid = dbs.getEventCountDataSetFileList(dataset, files) print 'total events %s'%total print 'invalidated files %s'%len(files) print 'invalidated events %s'%invalid if total: print '%s%%'%(float(total-invalid)/total*100.0) for f in sorted(files): print f
def main(): usage = "\n python %prog [options] [WORKFLOW_NAME]\n" \ "WORKFLOW_NAME: if the list file is provided this should be empty\n" parser = OptionParser(usage=usage) parser.add_option('-f', '--file', help='Text file of workflows to Reject and Clone', dest='file') parser.add_option( '-c', '--clone', help='Are the workflows going to be cloned? The default value is False', action="store_true", dest='clone', default=False) parser.add_option('-i', '--invalidate', help='Invalidate datasets? The default value is False', action="store_true", dest='invalidate', default=False) parser.add_option( "-u", "--user", dest="user", help= "The user for creating the clone, if empty it will use the OS user running the script" ) parser.add_option( "-g", "--group", dest="group", default='DATAOPS', help= "The group for creating the clone, if empty it will, use 'DATAOPS' by default" ) parser.add_option( "-m", "--memory", dest="memory", help= "Set max memory for the clone. At assignment, this will be used to calculate maxRSS = memory*1024" ) (options, args) = parser.parse_args() # Check the arguments, get info from them if options.file: try: workflows = [l.strip() for l in open(options.file) if l.strip()] except: parser.error("Provide a valid file of workflows") sys.exit(1) elif len(args) > 0: # name of workflow workflows = [args[0]] else: parser.error("Provide the workflow of a file of workflows") sys.exit(1) if not options.user: # get os username by default uinfo = pwd.getpwuid(os.getuid()) user = uinfo.pw_name else: user = options.user for workflow in workflows: try: workflowInfo = reqMgrClient.Workflow(workflow) except: print("The workflow name: " + workflow + " is not valid.") continue # invalidates workflow print("Invalidating the workflow: " + workflow) reqMgrClient.invalidateWorkflow(url, workflow, workflowInfo.status) # invalidates datasets if options.invalidate: print("Invalidating datasets") datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) for dataset in datasets: print(dataset) dbs3.setDatasetStatus(dataset, 'INVALID', files=True) # clones workflow if options.clone: print("Cloning workflow: " + workflow) if options.memory: mem = options.memory else: mem = workflowInfo.info["Memory"] cloned = resubmit.cloneWorkflow(workflow, user, options.group, memory=mem) sys.exit(0)
def modifySchema(helper, workflow, user, group, events): """ Adapts schema to right parameters """ result = {} #pprint.pprint(helper.data.request.schema.dictionary_()) for key, value in helper.data.request.schema.dictionary_().items(): #previous versions of tags if key == 'ProcConfigCacheID': result['ConfigCacheID'] = value elif key == 'RequestSizeEvents': result['RequestSizeEvents'] = value #requestor info elif key == 'Requestor': result['Requestor'] = user elif key == 'Group': result['Group'] = group #if emtpy elif key in ["RunWhitelist", "RunBlacklist", "BlockWhitelist", "BlockBlacklist"] and not value: result[key]=[] #replace old DBS2 URL elif value == "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet": result[key] = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader' #copy the right LFN base elif key == 'MergedLFNBase': result['MergedLFNBase'] = helper.getMergedLFNBase() #TODO deleting timeout so they will move to running-close as soon as they can #elif key == 'OpenRunningTimeout': #delete entry # continue #skip empty entries elif not value: continue elif value != None: result[key] = value #extend workflow so it will safely start outside of the boundary RequestNumEvents = int(result['RequestNumEvents']) FirstEvent = int(result['FirstEvent']) FirstLumi = int(result['FirstLumi']) EventsPerLumi = int(result['EventsPerLumi']) FilterEfficiency = float(result['FilterEfficiency']) #FirstEvent_NEW > FirstEvent + RequestNumEvents #the fist event needs to be oustide the range result['FirstEvent'] = FirstEvent + RequestNumEvents + DELTA_EVENTS #FirstLumi_NEW > FirstLumi + RequestNumEvents/events_per_job/filterEff # same for the first lumi, needs to be after the last lumi #get the last lumi of the dataset dataset = reqMgrClient.outputdatasetsWorkflow(url, workflow).pop() LastLumi = dbs3Client.getMaxLumi(dataset) result['FirstLumi'] = LastLumi + DELTA_LUMIS #only the desired events result['RequestNumEvents'] = events #prepend EXT_ to recognize as extension result["RequestString"] = 'EXT_'+result["RequestString"] #check MonteCarlo if result['RequestType']=='MonteCarlo': #check assigning parameters #seek for events per job on helper splitting = helper.listJobSplittingParametersByTask() eventsPerJob = 120000 eventsPerLumi = 100000 for k, v in splitting.items(): print k,":",v if k.endswith('/Production'): if 'events_per_job' in v: eventsPerJob = v['events_per_job'] elif 'events_per_lumi' in v: eventsPerLumi = v['events_per_lumi'] result['EventsPerJob'] = eventsPerJob #result['EventsPerLumi'] = eventsPerLumi #Merged LFN if 'MergedLFNBase' not in result: result['MergedLFNBase'] = helper.getMergedLFNBase() #update information from reqMgr # Add AcquisitionEra, ProcessingString and ProcessingVersion result["ProcessingString"] = helper.getProcessingString() result["AcquisitionEra"] = helper.getAcquisitionEra() #try to parse processing version as an integer, if don't, assign 1 try: result["ProcessingVersion"] = int(helper.getProcessingVersion()) except ValueError: result["ProcessingVersion"] = 1 return result