def setUp(self): """ _setUp_ Setup couchdb and the test environment """ super(ResubmitBlockTest, self).setUp() self.group = 'unknown' self.user = '******' # Set external test helpers self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setupCouch("resubmitblock_t", "ACDC", "GroupUser") # Define test environment self.couchUrl = os.environ["COUCHURL"] self.acdcDBName = 'resubmitblock_t' self.validLocations = ['T2_US_Nebraska', 'T1_US_FNAL_Disk', 'T1_UK_RAL_Disk'] self.siteWhitelist = ['T2_XX_SiteA'] siteDB = SiteDB() #Convert phedex node name to a valid processing site name self.PSNs = siteDB.PNNstoPSNs(self.validLocations) self.workflowName = 'dballest_ReReco_workflow' couchServer = CouchServer(dburl=self.couchUrl) self.acdcDB = couchServer.connectDatabase(self.acdcDBName, create=False) user = makeUser(self.group, '*****@*****.**', self.couchUrl, self.acdcDBName) user.create() return
class ResubmitBlock(StartPolicyInterface): """Split elements into blocks""" def __init__(self, **args): StartPolicyInterface.__init__(self, **args) self.args.setdefault('SliceType', 'NumberOfFiles') self.args.setdefault('SliceSize', 1) self.args.setdefault('SplittingAlgo', 'LumiBased') self.lumiType = "NumberOfLumis" # Define how to handle the different splitting algorithms self.algoMapping = { 'Harvest': self.singleChunk, 'ParentlessMergeBySize': self.singleChunk, 'MinFileBased': self.singleChunk, 'LumiBased': self.singleChunk, 'EventAwareLumiBased': self.singleChunk, 'EventBased': self.singleChunk } self.unsupportedAlgos = ['WMBSMergeBySize', 'SiblingProcessingBased'] self.defaultAlgo = self.fixedSizeChunk self.sites = [] self.siteDB = SiteDB() def split(self): """Apply policy to spec""" # Prepare a site list in case we need it siteWhitelist = self.initialTask.siteWhitelist() siteBlacklist = self.initialTask.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) for block in self.validBlocks(self.initialTask): parentList = {} parentFlag = False if self.initialTask.parentProcessingFlag(): parentFlag = True parentList[block["Name"]] = block['Sites'] self.newQueueElement( Inputs={block['Name']: block['Sites']}, ParentFlag=parentFlag, ParentData=parentList, NumberOfLumis=block[self.lumiType], NumberOfFiles=block['NumberOfFiles'], NumberOfEvents=block['NumberOfEvents'], Jobs=ceil( float(block[self.args['SliceType']]) / float(self.args['SliceSize'])), ACDC=block['ACDC'], NoInputUpdate=self.initialTask.getTrustSitelists().get( 'trustlists'), NoPileupUpdate=self.initialTask.getTrustSitelists().get( 'trustPUlists')) def validate(self): """Check args and spec work with block splitting""" StartPolicyInterface.validateCommon(self) def validBlocks(self, task): """Return blocks that pass the input data restriction according to the splitting algorithm""" validBlocks = [] acdcInfo = task.getInputACDC() if not acdcInfo: raise WorkQueueWMSpecError( self.wmspec, 'No acdc section for %s' % task.getPathName()) acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) if self.data: acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0]) else: # if self.data is not passed, assume the the data is input dataset # from the spec acdcBlockSplit = False if acdcBlockSplit: dbsBlock = {} dbsBlock['Name'] = self.data.keys()[0] block = acdc.getChunkInfo(acdcInfo['collection'], acdcBlockSplit['TaskName'], acdcBlockSplit['Offset'], acdcBlockSplit['NumOfFiles']) dbsBlock['NumberOfFiles'] = block['files'] dbsBlock['NumberOfEvents'] = block['events'] dbsBlock['NumberOfLumis'] = block['lumis'] dbsBlock['ACDC'] = acdcInfo if task.getTrustSitelists().get('trustlists'): dbsBlock["Sites"] = self.sites else: dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(block["locations"]) validBlocks.append(dbsBlock) else: if self.args['SplittingAlgo'] in self.unsupportedAlgos: raise WorkQueueWMSpecError( self.wmspec, 'ACDC is not supported for %s' % self.args['SplittingAlgo']) splittingFunc = self.defaultAlgo if self.args['SplittingAlgo'] in self.algoMapping: splittingFunc = self.algoMapping[self.args['SplittingAlgo']] validBlocks = splittingFunc(acdc, acdcInfo, task) return validBlocks def fixedSizeChunk(self, acdc, acdcInfo, task): """Return a set of blocks with a fixed number of ACDC records""" fixedSizeBlocks = [] chunkSize = 250 acdcBlocks = acdc.chunkFileset(acdcInfo['collection'], acdcInfo['fileset'], chunkSize) for block in acdcBlocks: dbsBlock = {} dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(), acdcInfo["fileset"], block['offset'], block['files']) dbsBlock['NumberOfFiles'] = block['files'] dbsBlock['NumberOfEvents'] = block['events'] dbsBlock['NumberOfLumis'] = block['lumis'] if task.getTrustSitelists().get('trustlists'): dbsBlock["Sites"] = self.sites else: dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(block["locations"]) dbsBlock['ACDC'] = acdcInfo if dbsBlock['NumberOfFiles']: fixedSizeBlocks.append(dbsBlock) return fixedSizeBlocks def singleChunk(self, acdc, acdcInfo, task): """Return a single block (inside a list) with all associated ACDC records""" result = [] acdcBlock = acdc.singleChunkFileset(acdcInfo['collection'], acdcInfo['fileset']) dbsBlock = {} dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(), acdcInfo["fileset"], acdcBlock['offset'], acdcBlock['files']) dbsBlock['NumberOfFiles'] = acdcBlock['files'] dbsBlock['NumberOfEvents'] = acdcBlock['events'] dbsBlock['NumberOfLumis'] = acdcBlock['lumis'] if task.getTrustSitelists().get('trustlists'): dbsBlock["Sites"] = self.sites else: dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(acdcBlock["locations"]) dbsBlock['ACDC'] = acdcInfo if dbsBlock['NumberOfFiles']: result.append(dbsBlock) return result
class RequestQuery: def __init__(self,config): self.br=Browser() self.config = config # Initialise connections self.mySiteDB = SiteDBJSON() self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json") self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/") self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/") self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/") def __del__(self): self.br.close() def login2Savannah(self): """ login2Savannah log into savannah with the given parameters in the config (username and password) User must have admin priviledges for store results requests """ login_page='https://savannah.cern.ch/account/login.php?uri=%2F' savannah_page='https://savannah.cern.ch/task/?group=cms-storeresults' self.br.open(login_page) ## 'Search' form is form 0 ## login form is form 1 self.br.select_form(nr=1) username = self.config["SavannahUser"] self.br['form_loginname']=username self.br['form_pw']=self.config["SavannahPasswd"] self.br.submit() response = self.br.open(savannah_page) # Check to see if login was successful if not re.search('Logged in as ' + username, response.read()): print('login unsuccessful, please check your username and password') return False else: return True def selectQueryForm(self,**kargs): """ selectQueryForm create the browser view to get all the store result tickets from savannah """ if self.isLoggedIn: self.br.select_form(name="bug_form") ## Use right query form labelled Test control = self.br.find_control("report_id",type="select") for item in control.items: if item.attrs['label'] == "Test": control.value = [item.attrs['value']] ##select number of entries displayed per page control = self.br.find_control("chunksz",type="text") control.value = "150" ##check additional searching parameter for arg in kargs: if arg == "approval_status": control = self.br.find_control("resolution_id",type="select") for item in control.items: if item.attrs['label'] == kargs[arg].strip(): control.value = [item.attrs['value']] elif arg == "task_status": control = self.br.find_control("status_id",type="select") for item in control.items: if item.attrs['label'] == kargs[arg].strip(): control.value = [item.attrs['value']] elif arg == "team": control = self.br.find_control("custom_sb5",type="select") for item in control.items: if item.attrs['label'] == kargs[arg].strip(): control.value = [item.attrs['value']] response = self.br.submit() response.read() return def getScramArchByCMSSW(self): """ Get from the list of available CMSSW releases return a dictionary of ScramArchitecture by CMSSW """ # Set temporary conection to the server and get the response from cmstags url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML' br = Browser() br.set_handle_robots(False) response=br.open(url) soup = BeautifulSoup(response.read()) # Dictionary form # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... } archByCmssw={} # Fill the dictionary for arch in soup.find_all('architecture'): for cmssw in arch.find_all('project'): # CMSSW release cmsswLabel = cmssw.get('label').encode('ascii', 'ignore') if cmsswLabel not in archByCmssw: archByCmssw[cmsswLabel]=[] # ScramArch related to this CMSSW release archName = arch.get('name').encode('ascii', 'ignore') archByCmssw[cmsswLabel].append(archName) return archByCmssw def createValueDicts(self): """ Init dictionaries by value/label: - Releases by Value - Physics group by value - DBS url by value - DBS rul by label - Status of savannah request by value - Status of savannah ticket by value (Open/Closed/Any) """ if self.isLoggedIn: self.br.select_form(name="bug_form") control = self.br.find_control("custom_sb2",type="select") self.ReleaseByValueDict = self.getLabelByValueDict(control) control = self.br.find_control("custom_sb3",type="select") self.GroupByValueDict = self.getLabelByValueDict(control) control = self.br.find_control("custom_sb4",type="select") self.DBSByValueDict = self.getLabelByValueDict(control) self.DBSByLabelDict = self.getValueByLabelDict(control) control = self.br.find_control("resolution_id",type="select") self.StatusByValueDict = self.getLabelByValueDict(control) control = self.br.find_control("status_id",type="select") self.TicketStatusByLabelDict = self.getValueByLabelDict(control) return def getDatasetOriginSites(self, dbs_url, data): """ Get the origin sites for each block of the dataset. Return a list block origin sites. """ local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listBlocks(detail=True,dataset=data) elif local_dbs == 'phys02': response = self.dbsPhys02.listBlocks(detail=True,dataset=data) elif local_dbs == 'phys03': response = self.dbsPhys03.listBlocks(detail=True,dataset=data) pnnList = set() for block in response: pnnList.add(block['origin_site_name']) psnList = self.mySiteDB.PNNstoPSNs(pnnList) return psnList, list(pnnList) def phEDExNodetocmsName(self, nodeList): """ Convert PhEDEx node name list to cms names list """ names = [] for node in nodeList: name = node.replace('_MSS', '').replace('_Disk', '').replace('_Buffer', '').replace('_Export', '') if name not in names: names.append(name) return names def setGlobalTagFromOrigin(self, dbs_url,input_dataset): """ Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN' """ globalTag = "" local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset) globalTag = response[0]['global_tag'] # GlobalTag cannot be empty if globalTag == '': globalTag = 'UNKNOWN' return globalTag def isDataAtUrl(self, dbs_url,input_dataset): """ Returns True if the dataset is at the dbs url, if not returns False """ local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listDatasets(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listDatasets(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listDatasets(dataset=input_dataset) # This means that the dataset is not at the url if not response: return False else: return True def getLabelByValueDict(self, control): """ From control items, create a dictionary by values """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[value] = label return d def getValueByLabelDict(self, control): """ From control items, create a dictionary by labels """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[label] = value return d def getRequests(self,**kargs): """ getRequests Actually goes through all the savannah requests and create json files if the ticket is not Closed and the status of the item is Done. It also reports back the summary of the requests in savannah """ requests = [] # Open Browser and login into Savannah self.br=Browser() self.isLoggedIn = self.login2Savannah() if self.isLoggedIn: if not kargs: self.selectQueryForm(approval_status='1',task_status='0') else: self.selectQueryForm(**kargs) self.createValueDicts() self.br.select_form(name="bug_form") response = self.br.submit() html_ouput = response.read() scramArchByCMSSW = self.getScramArchByCMSSW() self.nodeMappings = self.phedex.getNodeMap() for link in self.br.links(text_regex="#[0-9]+"): response = self.br.follow_link(link) try: ## Get Information self.br.select_form(name="item_form") ## remove leading   and # from task task = link.text.replace('#','').decode('utf-8').strip() print("Processing ticket: %s" % task) ## Get input dataset name control = self.br.find_control("custom_tf1",type="text") input_dataset = control.value input_primary_dataset = input_dataset.split('/')[1].replace(' ','') input_processed_dataset = input_dataset.split('/')[2].replace(' ','') data_tier = input_dataset.split('/')[3].replace(' ','') ## Get DBS URL by Drop Down control = self.br.find_control("custom_sb4",type="select") dbs_url = self.DBSByValueDict[control.value[0]] ## Get DBS URL by text field (for old entries) if dbs_url=='None': control = self.br.find_control("custom_tf4",type="text") dbs_url = control.value.replace(' ','') else: # Transform input value to a valid DBS url #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader" dbs_url = dbs_base_url+dbs_url+"/DBSReader" ## Get Release control = self.br.find_control("custom_sb2",type="select") release_id = control.value ## Get current request status control = self.br.find_control("status_id",type="select") request_status_id = control.value RequestStatusByValueDict = self.getLabelByValueDict(control) # close the request if deprecated release was used try: release = self.ReleaseByValueDict[release_id[0]] except: if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed": msg = "Your request is not valid anymore, since the given CMSSW release is deprecated. If your request should be still processed, please reopen the request and update the CMSSW release to a more recent *working* release.\n" msg+= "\n" msg+= "Thanks,\n" msg+= "Your StoreResults team" self.closeRequest(task,msg) self.br.back() print("I tried to Close ticket %s due to CMSSW not valid" % task) continue # close the request if release has not ScramArch match if release not in scramArchByCMSSW: if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed": msg = "Your request is not valid, there is no ScramArch match for the given CMSSW release.\n" msg+= "If your request should be still processed, please reopen the request and update the CMSSW release according to: https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML \n" msg+= "\n" msg+= "Thanks,\n" msg+= "Your StoreResults team" self.closeRequest(task,msg) self.br.back() print("I tried to Close ticket %s due to ScramArch mismatch" % task) continue else: index=len(scramArchByCMSSW[release]) scram_arch = scramArchByCMSSW[release][index-1] # close the request if dataset is not at dbs url try: data_at_url = self.isDataAtUrl(dbs_url,input_dataset) except: print('I got an error trying to look for dataset %s at %s, please look at this ticket: %s' %(input_dataset,dbs_url,task)) continue if not data_at_url: msg = "Your request is not valid, I could not find the given dataset at %s\n" % dbs_url msg+= "If your request should be still processed, please reopen the request and change DBS url properly \n" msg+= "\n" msg+= "Thanks,\n" msg+= "Your StoreResults team" self.closeRequest(task,msg) self.br.back() print("I tried to Close ticket %s, dataset is not at DBS url" % task) continue # Avoid not approved Tickets #if not RequestStatusByValueDict[request_status_id[0]] == "Done": # continue ## Get Physics Group control = self.br.find_control("custom_sb3",type="select") group_id = control.value[0] group_squad = 'cms-storeresults-'+self.GroupByValueDict[group_id].replace("-","_").lower() ## Get Dataset Version control = self.br.find_control("custom_tf3",type="text") dataset_version = control.value.replace(' ','') if dataset_version == "": dataset_version = '1' ## Get current status control = self.br.find_control("resolution_id",type="select") status_id = control.value ## Get assigned to control = self.br.find_control("assigned_to",type="select") AssignedToByValueDict = self.getLabelByValueDict(control) assignedTo_id = control.value ##Assign task to the physics group squad if AssignedToByValueDict[assignedTo_id[0]]!=group_squad: assignedTo_id = [self.getValueByLabelDict(control)[group_squad]] control.value = assignedTo_id self.br.submit() # Set default Adquisition Era for StoreResults acquisitionEra = "StoreResults" ## Construction of the new dataset name (ProcessingString) ## remove leading hypernews or physics group name and StoreResults+Version if input_processed_dataset.find(self.GroupByValueDict[group_id])==0: new_dataset = input_processed_dataset.replace(self.GroupByValueDict[group_id],"",1) else: stripped_dataset = input_processed_dataset.split("-")[1:] new_dataset = '_'.join(stripped_dataset) except Exception as ex: self.br.back() print("There is a problem with this ticket %s, please have a look to the error:" % task) print(str(ex)) print(traceback.format_exc()) continue self.br.back() # Get dataset site info: psnList, pnnList = self.getDatasetOriginSites(dbs_url,input_dataset) infoDict = {} # Build store results json # First add all the defaults values infoDict["RequestType"] = "StoreResults" infoDict["UnmergedLFNBase"] = "/store/unmerged" infoDict["MergedLFNBase"] = "/store/results/" + self.GroupByValueDict[group_id].replace("-","_").lower() infoDict["MinMergeSize"] = 1500000000 infoDict["MaxMergeSize"] = 5000000000 infoDict["MaxMergeEvents"] = 100000 infoDict["TimePerEvent"] = 40 infoDict["SizePerEvent"] = 512.0 infoDict["Memory"] = 2394 infoDict["CmsPath"] = "/uscmst1/prod/sw/cms" infoDict["Group"] = "DATAOPS" infoDict["DbsUrl"] = dbs_url # Add all the information pulled from Savannah infoDict["AcquisitionEra"] = acquisitionEra infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url,input_dataset) infoDict["DataTier"] = data_tier infoDict["InputDataset"] = input_dataset infoDict["ProcessingString"] = new_dataset infoDict["CMSSWVersion"] = release infoDict["ScramArch"] = scram_arch infoDict["ProcessingVersion"] = dataset_version infoDict["SiteWhitelist"] = psnList # Create report for Migration2Global report = {} #Fill json file, if status is done if self.StatusByValueDict[status_id[0]]=='Done' and RequestStatusByValueDict[request_status_id[0]] != "Closed": self.writeJSONFile(task, infoDict) report["json"] = 'y' else: report["json"] = 'n' report["task"] = int(task) report["InputDataset"] = input_dataset report["ProcessingString"] = new_dataset report["ticketStatus"] = self.StatusByValueDict[status_id[0]] report["assignedTo"] = AssignedToByValueDict[assignedTo_id[0]] report["localUrl"] = dbs_url report["sites"] = psnList report["pnns"] = pnnList # if the request is closed, change the item status to report to Closed if report["ticketStatus"] == "Done" and RequestStatusByValueDict[request_status_id[0]] == "Closed": report["ticketStatus"] = "Closed" requests.append(report) # Print out report self.printReport(requests) # Close connections self.br.close() return requests def closeRequest(self,task,msg): """ This close a specific savannag ticket Insert a message in the ticket """ if self.isLoggedIn: #self.createValueDicts() response = self.br.open('https://savannah.cern.ch/task/?'+str(task)) html = response.read() self.br.select_form(name="item_form") control = self.br.find_control("status_id",type="select") control.value = [self.TicketStatusByLabelDict["Closed"]] #Put reason to the comment field control = self.br.find_control("comment",type="textarea") control.value = msg #DBS Drop Down is a mandatory field, if set to None (for old requests), it is not possible to close the request self.setDBSDropDown() self.br.submit() #remove JSON ticket self.removeJSONFile(task) self.br.back() return def setDBSDropDown(self): ## Get DBS URL by Drop Down control = self.br.find_control("custom_sb4",type="select") dbs_url = self.DBSByValueDict[control.value[0]] ## Get DBS URL by text field (for old entries) if dbs_url=='None': tmp = self.br.find_control("custom_tf4",type="text") dbs_url = tmp.value.replace(' ','') if dbs_url.find("phys01")!=-1: control.value = [self.DBSByLabelDict["phys01"]] elif dbs_url.find("phys02")!=-1: control.value = [self.DBSByLabelDict["phys02"]] elif dbs_url.find("phys03")!=-1: control.value = [self.DBSByLabelDict["phys03"]] else: msg = 'DBS URL of the old request is neither phys01, phys02 nor phys03. Please, check!' print(msg) raise RuntimeError(msg) return def writeJSONFile(self, task, infoDict): """ This writes a JSON file at ComponentDir """ ##check if file already exists filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if not os.access(filename,os.F_OK): jsonfile = open(filename,'w') request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING jsonfile.write(json.dumps(request,sort_keys=True, indent=4)) jsonfile.close return def removeJSONFile(self,task): """ This removes the JSON file at ComponentDir if it was created """ filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if os.access(filename,os.F_OK): os.remove(filename) return def printReport(self, requests): """ Print out a report """ print("%20s %10s %5s %35s %10s %50s %50s" %( 'Savannah Ticket','Status','json','Assigned to','local DBS','Sites','pnns')) print("%20s %10s %5s %35s %10s %50s %50s" %( '-'*20,'-'*10,'-'*5,'-'*35,'-'*10,'-'*50,'-'*50 )) for report in requests: json = report["json"] ticket = report["task"] status = report["ticketStatus"] assigned = report["assignedTo"] localUrl = report["localUrl"].split('/')[5] site = ', '.join(report["sites"]) pnns = ', '.join(report["pnns"]) print("%20s %10s %5s %35s %10s %50s %50s" %(ticket,status,json,assigned,localUrl,site,pnns))
class Block(StartPolicyInterface): """Split elements into blocks""" def __init__(self, **args): StartPolicyInterface.__init__(self, **args) self.args.setdefault('SliceType', 'NumberOfFiles') self.args.setdefault('SliceSize', 1) self.lumiType = "NumberOfLumis" # Initialize a list of sites where the data is self.sites = [] # Initialize modifiers of the policy self.blockBlackListModifier = [] self.siteDB = SiteDB() def split(self): """Apply policy to spec""" dbs = self.dbs() for block in self.validBlocks(self.initialTask, dbs): # set the parent flag for processing only for clarity on the couch doc parentList = {} parentFlag = False # TODO this is slow process needs to change in DBS3 if self.initialTask.parentProcessingFlag(): parentFlag = True for dbsBlock in dbs.listBlockParents(block["block"]): if self.initialTask.getTrustSitelists().get('trustlists'): parentList[dbsBlock["Name"]] = self.sites else: parentList[dbsBlock["Name"]] = self.siteDB.PNNstoPSNs( dbsBlock['PhEDExNodeList']) self.newQueueElement( Inputs={block['block']: self.data.get(block['block'], [])}, ParentFlag=parentFlag, ParentData=parentList, NumberOfLumis=int(block[self.lumiType]), NumberOfFiles=int(block['NumberOfFiles']), NumberOfEvents=int(block['NumberOfEvents']), Jobs=ceil( float(block[self.args['SliceType']]) / float(self.args['SliceSize'])), OpenForNewData=True if str(block.get('OpenForWriting')) == '1' else False, NoInputUpdate=self.initialTask.getTrustSitelists().get( 'trustlists'), NoPileupUpdate=self.initialTask.getTrustSitelists().get( 'trustPUlists')) def validate(self): """Check args and spec work with block splitting""" StartPolicyInterface.validateCommon(self) if not self.initialTask.inputDataset(): raise WorkQueueWMSpecError(self.wmspec, 'No input dataset') def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() validBlocks = [] blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getLumiMask( ): # if we have a lumi mask get only the relevant blocks maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath) if task.getTrustSitelists().get('trustlists'): siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) blocks = [] # Take data inputs or from spec if not self.data: if blockWhiteList: self.data = dict((block, []) for block in blockWhiteList) else: self.data = { datasetPath: [] } # same structure as in WorkQueueElement for data in self.data: if data.find('#') > -1: Lexicon.block(data) # check block name datasetPath = str(data.split('#')[0]) blocks.append(str(data)) else: Lexicon.dataset(data) # check dataset name for block in dbs.listFileBlocks(data, onlyClosedBlocks=True): blocks.append(str(block)) for blockName in blocks: # check block restrictions if blockWhiteList and blockName not in blockWhiteList: continue if blockName in blockBlackList: continue if blockName in self.blockBlackListModifier: # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist continue if task.getLumiMask() and blockName not in maskedBlocks: self.rejectedWork.append(blockName) continue block = dbs.getDBSSummaryInfo(datasetPath, block=blockName) # blocks with 0 valid files should be ignored # - ideally they would be deleted but dbs can't delete blocks if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0': self.rejectedWork.append(blockName) continue # check lumi restrictions if task.getLumiMask(): accepted_lumis = sum([ len(maskedBlocks[blockName][lfn].getLumis()) for lfn in maskedBlocks[blockName] ]) # use the information given from getMaskedBlocks to compute che size of the block block['NumberOfFiles'] = len(maskedBlocks[blockName]) # ratio = lumis which are ok in the block / total num lumis ratioAccepted = 1. * accepted_lumis / float( block['NumberOfLumis']) block['NumberOfEvents'] = float( block['NumberOfEvents']) * ratioAccepted block[self.lumiType] = accepted_lumis # check run restrictions elif runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block=block['block']) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: self.rejectedWork.append(blockName) continue if len(runs) == len(runLumis): # If there is no change in the runs, then we can skip recalculating lumi counts recalculateLumiCounts = False if recalculateLumiCounts: # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock( fileBlockName=block['block']) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry['LumiList']: runNumber = lumiInfo['RunNumber'] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 acceptedLumiCount += len( lumiInfo['LumiSectionNumber']) if acceptedFile: acceptedFileCount += 1 if len(fileEntry['LumiList'] ) != acceptedFileLumiCount: acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents'] \ / len(fileEntry['LumiList']) else: acceptedEventCount += fileEntry[ 'NumberOfEvents'] block[self.lumiType] = acceptedLumiCount block['NumberOfFiles'] = acceptedFileCount block['NumberOfEvents'] = acceptedEventCount # save locations if task.getTrustSitelists().get('trustlists'): self.data[block['block']] = self.sites else: self.data[block['block']] = self.siteDB.PNNstoPSNs( dbs.listFileBlockLocation(block['block'])) # TODO: need to decide what to do when location is no find. # There could be case for network problem (no connection to dbs, phedex) # or DBS se is not recorded (This will be retried anyway by location mapper) if not self.data[block['block']]: self.data[block['block']] = ["NoInitialSite"] # # No sites for this block, move it to rejected # self.rejectedWork.append(blockName) # continue validBlocks.append(block) return validBlocks def getMaskedBlocks(self, task, dbs, datasetPath): """ Get the blocks which pass the lumi mask restrictions. For each block return the list of lumis which were ok (given the lumi mask). The data structure returned is the following: { "block1" : {"file1" : LumiList(), "file5" : LumiList(), ...} "block2" : {"file2" : LumiList(), "file7" : LumiList(), ...} } """ # Get the task mask as a LumiList object to make operations easier maskedBlocks = {} taskMask = task.getLumiMask() # for performance reasons, we first get all the blocknames blocks = [ x['block_name'] for x in dbs.dbs.listBlocks(dataset=datasetPath) ] for block in blocks: fileLumis = dbs.dbs.listFileLumis(block_name=block, validFileOnly=1) for fileLumi in fileLumis: lfn = fileLumi['logical_file_name'] runNumber = str(fileLumi['run_num']) lumis = fileLumi['lumi_section_num'] fileMask = LumiList(runsAndLumis={runNumber: lumis}) commonMask = taskMask & fileMask if commonMask: maskedBlocks.setdefault(block, {}) maskedBlocks[block].setdefault(lfn, LumiList()) maskedBlocks[block][lfn] += commonMask return maskedBlocks def modifyPolicyForWorkAddition(self, inboxElement): """ A block blacklist modifier will be created, this policy object will split excluding the blocks in both the spec blacklist and the blacklist modified """ # Get the already processed input blocks from the inbox element existingBlocks = inboxElement.get('ProcessedInputs', []) self.blockBlackListModifier = existingBlocks self.blockBlackListModifier.extend( inboxElement.get('RejectedInputs', [])) return def newDataAvailable(self, task, inbound): """ In the case of the block policy, the new data available returns True if it finds at least one open block. """ self.initialTask = task dbs = self.dbs() openBlocks = dbs.listOpenFileBlocks(task.getInputDatasetPath()) if openBlocks: return True return False @staticmethod def supportsWorkAddition(): """ Block start policy supports continuous addition of work """ return True
class Dataset(StartPolicyInterface): """Split elements into datasets""" def __init__(self, **args): StartPolicyInterface.__init__(self, **args) self.args.setdefault('SliceType', 'NumberOfRuns') self.args.setdefault('SliceSize', 1) self.lumiType = "NumberOfLumis" self.sites = [] if os.getenv("WMAGENT_USE_CRIC", False) or os.getenv( "WMCORE_USE_CRIC", False): self.cric = CRIC() else: self.cric = None self.siteDB = SiteDB() def split(self): """Apply policy to spec""" work = set() if self.args['SliceType'] == 'NumberOfRuns' else 0 numFiles = 0 numEvents = 0 numLumis = 0 datasetPath = self.initialTask.getInputDatasetPath() # dataset splitting can't have its data selection overridden if self.data and self.data.keys() != [datasetPath]: raise RuntimeError("Can't provide different data to split with") blocks = self.validBlocks(self.initialTask, self.dbs()) if not blocks: return for block in blocks: if self.args['SliceType'] == 'NumberOfRuns': work = work.union(block[self.args['SliceType']]) else: work += float(block[self.args['SliceType']]) numLumis += int(block[self.lumiType]) numFiles += int(block['NumberOfFiles']) numEvents += int(block['NumberOfEvents']) if self.args['SliceType'] == 'NumberOfRuns': numJobs = ceil(len(work) / float(self.args['SliceSize'])) else: numJobs = ceil(float(work) / float(self.args['SliceSize'])) # parentage parentFlag = True if self.initialTask.parentProcessingFlag() else False self.newQueueElement( Inputs={datasetPath: self.data.get(datasetPath, [])}, ParentFlag=parentFlag, NumberOfLumis=numLumis, NumberOfFiles=numFiles, NumberOfEvents=numEvents, Jobs=numJobs, NoInputUpdate=self.initialTask.getTrustSitelists().get( 'trustlists'), NoPileupUpdate=self.initialTask.getTrustSitelists().get( 'trustPUlists')) def validate(self): """Check args and spec work with block splitting""" StartPolicyInterface.validateCommon(self) if not self.initialTask.inputDataset(): raise WorkQueueWMSpecError(self.wmspec, 'No input dataset') def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() Lexicon.dataset(datasetPath) # check dataset name validBlocks = [] locations = None blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() lumiMask = task.getLumiMask() if lumiMask: maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath) for blockName in dbs.listFileBlocks(datasetPath): # check block restrictions if blockWhiteList and blockName not in blockWhiteList: continue if blockName in blockBlackList: continue blockSummary = dbs.getDBSSummaryInfo(block=blockName) if int(blockSummary.get('NumberOfFiles', 0)) == 0: logging.warning( "Block %s being rejected for lack of valid files to process", blockName) self.badWork.append(blockName) continue if self.args['SliceType'] == 'NumberOfRuns': blockSummary['NumberOfRuns'] = dbs.listRuns(block=blockName) # check lumi restrictions if lumiMask: if blockName not in maskedBlocks: logging.warning( "Block %s doesn't pass the lumi mask constraints", blockName) self.rejectedWork.append(blockName) continue acceptedLumiCount = sum([ len(maskedBlocks[blockName][lfn].getLumis()) for lfn in maskedBlocks[blockName] ]) ratioAccepted = 1. * acceptedLumiCount / float( blockSummary['NumberOfLumis']) maskedRuns = [ maskedBlocks[blockName][lfn].getRuns() for lfn in maskedBlocks[blockName] ] acceptedRuns = set(lumiMask.getRuns()).intersection( set().union(*maskedRuns)) blockSummary['NumberOfFiles'] = len(maskedBlocks[blockName]) blockSummary['NumberOfEvents'] = float( blockSummary['NumberOfEvents']) * ratioAccepted blockSummary[self.lumiType] = acceptedLumiCount blockSummary['NumberOfRuns'] = acceptedRuns # check run restrictions elif runWhiteList or runBlackList: runs = set(dbs.listRuns(block=blockName)) # multi run blocks need special account, requires more DBS calls recalculateLumiCounts = True if len(runs) > 1 else False # apply blacklist and whitelist runs = runs.difference(runBlackList) if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: logging.warning( "Block %s doesn't pass the runs constraints", blockName) self.rejectedWork.append(blockName) continue if recalculateLumiCounts: # Recalculate the number of files, lumis and ~events accepted acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock(fileBlockName=blockName) for fileEntry in fileInfo: acceptedFile = False for lumiInfo in fileEntry['LumiList']: if lumiInfo['RunNumber'] in runs: acceptedFile = True acceptedLumiCount += len( lumiInfo['LumiSectionNumber']) if acceptedFile: acceptedFileCount += 1 acceptedEventCount += fileEntry['NumberOfEvents'] else: acceptedLumiCount = blockSummary["NumberOfLumis"] acceptedFileCount = blockSummary['NumberOfFiles'] acceptedEventCount = blockSummary['NumberOfEvents'] blockSummary[self.lumiType] = acceptedLumiCount blockSummary['NumberOfFiles'] = acceptedFileCount blockSummary['NumberOfEvents'] = acceptedEventCount blockSummary['NumberOfRuns'] = runs validBlocks.append(blockSummary) if locations is None: locations = set(dbs.listFileBlockLocation(blockName)) else: locations = locations.intersection( dbs.listFileBlockLocation(blockName)) # all needed blocks present at these sites if task.getTrustSitelists().get('trustlists'): siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) self.data[datasetPath] = self.sites elif locations: if self.cric: self.data[datasetPath] = list( set(self.cric.PNNstoPSNs(locations))) else: self.data[datasetPath] = list( set(self.siteDB.PNNstoPSNs(locations))) return validBlocks
class Dataset(StartPolicyInterface): """Split elements into datasets""" def __init__(self, **args): StartPolicyInterface.__init__(self, **args) self.args.setdefault('SliceType', 'NumberOfFiles') self.args.setdefault('SliceSize', 1) self.lumiType = "NumberOfLumis" self.sites = [] self.siteDB = SiteDB() def split(self): """Apply policy to spec""" dbs = self.dbs() work = 0 numFiles = 0 numEvents = 0 numLumis = 0 inputDataset = self.initialTask.inputDataset() datasetPath = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) # dataset splitting can't have its data selection overridden if self.data and self.data.keys() != [datasetPath]: raise RuntimeError("Can't provide different data to split with") blocks = self.validBlocks(self.initialTask, self.dbs()) if not blocks: return for block in blocks: work += float(block[self.args['SliceType']]) numLumis += int(block[self.lumiType]) numFiles += int(block['NumberOfFiles']) numEvents += int(block['NumberOfEvents']) dataset = dbs.getDBSSummaryInfo(dataset=datasetPath) # If the dataset which is not in dbs is passed, just return. # The exception will be created in upper level # when there is no work created if not dataset: return # parentage parentFlag = True if self.initialTask.parentProcessingFlag() else False if not work: work = dataset[self.args['SliceType']] self.newQueueElement( Inputs={dataset['path']: self.data.get(dataset['path'], [])}, ParentFlag=parentFlag, NumberOfLumis=numLumis, NumberOfFiles=numFiles, NumberOfEvents=numEvents, Jobs=ceil(float(work) / float(self.args['SliceSize'])), NoLocationUpdate=self.initialTask.getTrustSitelists()) def validate(self): """Check args and spec work with block splitting""" StartPolicyInterface.validateCommon(self) if not self.initialTask.inputDataset(): raise WorkQueueWMSpecError(self.wmspec, 'No input dataset') def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() Lexicon.dataset(datasetPath) # check dataset name validBlocks = [] locations = None blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getTrustSitelists(): siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) for blockName in dbs.listFileBlocks(datasetPath): block = dbs.getDBSSummaryInfo(datasetPath, block=blockName) # check block restrictions if blockWhiteList and block['block'] not in blockWhiteList: continue if block['block'] in blockBlackList: continue # check run restrictions if runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block=block['block']) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue if recalculateLumiCounts: # get correct lumi count # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock( fileBlockName=block['block']) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry['LumiList']: runNumber = lumiInfo['RunNumber'] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 if acceptedFile: acceptedFileCount += 1 acceptedLumiCount += acceptedFileLumiCount if len(fileEntry['LumiList'] ) != acceptedFileLumiCount: acceptedEventCount += float( acceptedFileLumiCount ) * fileEntry['NumberOfEvents'] / len( fileEntry['LumiList']) else: acceptedEventCount += fileEntry[ 'NumberOfEvents'] else: acceptedLumiCount = block["NumberOfLumis"] acceptedFileCount = block['NumberOfFiles'] acceptedEventCount = block['NumberOfEvents'] # recalculate effective size of block # make a guess for new event/file numbers from ratio # of accepted lumi sections (otherwise have to pull file info) block[self.lumiType] = acceptedLumiCount block['NumberOfFiles'] = acceptedFileCount block['NumberOfEvents'] = acceptedEventCount validBlocks.append(block) if locations is None: locations = set(dbs.listFileBlockLocation(block['block'])) else: locations = locations.intersection( dbs.listFileBlockLocation(block['block'])) # all needed blocks present at these sites if self.wmspec.getTrustLocationFlag(): self.data[datasetPath] = self.sites elif locations: self.data[datasetPath] = list( set(self.siteDB.PNNstoPSNs(locations))) return validBlocks
class RequestQuery: def __init__(self, config): self.br = Browser() self.config = config # Initialise connections self.mySiteDB = SiteDBJSON() self.dbsPhys01 = DbsApi(url=dbs_base_url + "phys01/DBSReader/") self.dbsPhys02 = DbsApi(url=dbs_base_url + "phys02/DBSReader/") self.dbsPhys03 = DbsApi(url=dbs_base_url + "phys03/DBSReader/") def __del__(self): self.br.close() def getScramArchByCMSSW(self): """ Get from the list of available CMSSW releases return a dictionary of ScramArchitecture by CMSSW """ # Set temporary conection to the server and get the response from cmstags url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML' br = Browser() br.set_handle_robots(False) response = br.open(url) soup = BeautifulSoup(response.read()) # Dictionary form # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... } archByCmssw = {} # Fill the dictionary for arch in soup.find_all('architecture'): for cmssw in arch.find_all('project'): # CMSSW release cmsswLabel = cmssw.get('label').encode('ascii', 'ignore') if cmsswLabel not in archByCmssw: archByCmssw[cmsswLabel] = [] # ScramArch related to this CMSSW release archName = arch.get('name').encode('ascii', 'ignore') archByCmssw[cmsswLabel].append(archName) return archByCmssw def getDatasetOriginSites(self, dbs_url, data): """ Get the origin sites for each block of the dataset. Return a list block origin sites. """ local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listBlocks(detail=True, dataset=data) elif local_dbs == 'phys02': response = self.dbsPhys02.listBlocks(detail=True, dataset=data) elif local_dbs == 'phys03': response = self.dbsPhys03.listBlocks(detail=True, dataset=data) pnnList = set() for block in response: pnnList.add(block['origin_site_name']) psnList = self.mySiteDB.PNNstoPSNs(pnnList) return psnList, list(pnnList) def setGlobalTagFromOrigin(self, dbs_url, input_dataset): """ Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN' """ globalTag = "" local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset) globalTag = response[0]['global_tag'] # GlobalTag cannot be empty if globalTag == '': globalTag = 'UNKNOWN' return globalTag def isDataAtUrl(self, dbs_url, input_dataset): """ Returns True if the dataset is at the dbs url, if not returns False """ local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listDatasets(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listDatasets(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listDatasets(dataset=input_dataset) # This means that the dataset is not at the url if not response: return False else: return True def getLabelByValueDict(self, control): """ From control items, create a dictionary by values """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[value] = label return d def getValueByLabelDict(self, control): """ From control items, create a dictionary by labels """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[label] = value return d def createRequestJSON(self, ticket, input_dataset, dbs_url, cmssw_release, group_name, version=1): """ Creates a JSON file 'Ticket_#TICKET.json' with the needed information for creating a requeston ReqMgr. Input: - ticket: the ticket #, for instance 110773 on https://ggus.eu/?mode=ticket_info&ticket_id=110773 - input_dataset - dbs_url: only the instance name, For example: "phys01" for https://cmsweb.cern.ch/dbs/prod/phys01/DBSReader - cmssw_release - group_name: the physics group name - version: the dataset version, 1 by default. It returns a dictionary that contains the request information. """ scramArchByCMSSW = self.getScramArchByCMSSW() self.nodeMappings = self.phedex.getNodeMap() task = ticket print("Processing ticket: %s" % task) #splitting input dataset input_primary_dataset = input_dataset.split('/')[1].replace(' ', '') input_processed_dataset = input_dataset.split('/')[2].replace(' ', '') data_tier = input_dataset.split('/')[3].replace(' ', '') # Transform input value to a valid DBS url #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader" dbs_url = dbs_base_url + dbs_url + "/DBSReader" release_id = cmssw_release # check if deprecated release was used release = cmssw_release # check if release has not ScramArch match if release not in scramArchByCMSSW: raise Exception("Error on ticket %s due to ScramArch mismatch" % task) else: scram_arch = scramArchByCMSSW[release][-1] # check if dataset is not at dbs url try: data_at_url = self.isDataAtUrl(dbs_url, input_dataset) except: raise Exception( 'Error on ticket %s, dataset %s not available at %s' % (task, input_dataset, dbs_url)) if not data_at_url: raise Exception( 'Error on ticket %s, dataset %s not available at %s' % (task, input_dataset, dbs_url)) ## Get Physics Group group_squad = 'cms-storeresults-' + group_name.replace("-", "_").lower() ## Get Dataset Version dataset_version = str(version) # Set default Adquisition Era for StoreResults acquisitionEra = "StoreResults" ## Construction of the new dataset name (ProcessingString) ## remove leading hypernews or physics group name and StoreResults+Version if input_processed_dataset.find(group_name) == 0: new_dataset = input_processed_dataset.replace(group_name, "", 1) else: stripped_dataset = input_processed_dataset.split("-")[1:] new_dataset = '_'.join(stripped_dataset) # Get dataset site info: psnList, pnnList = self.getDatasetOriginSites(dbs_url, input_dataset) infoDict = {} # Build store results json # First add all the defaults values infoDict["RequestType"] = "StoreResults" infoDict["UnmergedLFNBase"] = "/store/unmerged" infoDict["MergedLFNBase"] = "/store/results/" + group_name.replace( "-", "_").lower() infoDict["MinMergeSize"] = 1500000000 infoDict["MaxMergeSize"] = 5000000000 infoDict["MaxMergeEvents"] = 100000 infoDict["TimePerEvent"] = 40 infoDict["SizePerEvent"] = 512.0 infoDict["Memory"] = 2394 infoDict["CmsPath"] = "/uscmst1/prod/sw/cms" infoDict["Group"] = "DATAOPS" infoDict["DbsUrl"] = dbs_url # Add all the information pulled from Savannah infoDict["AcquisitionEra"] = acquisitionEra infoDict["GlobalTag"] = self.setGlobalTagFromOrigin( dbs_url, input_dataset) infoDict["DataTier"] = data_tier infoDict["InputDataset"] = input_dataset infoDict["ProcessingString"] = new_dataset infoDict["CMSSWVersion"] = release infoDict["ScramArch"] = scram_arch infoDict["ProcessingVersion"] = dataset_version infoDict["SiteWhitelist"] = psnList # Create report for Migration2Global report = {} #Fill json file, if status is done self.writeJSONFile(task, infoDict) report["json"] = 'y' report["task"] = int(task) report["InputDataset"] = input_dataset report["ProcessingString"] = new_dataset report["localUrl"] = dbs_url report["sites"] = psnList report["pnns"] = pnnList return report def writeJSONFile(self, task, infoDict): """ This writes a JSON file at ComponentDir """ ##check if file already exists filename = self.config["ComponentDir"] + '/Ticket_' + str( task) + '.json' if not os.access(filename, os.F_OK): jsonfile = open(filename, 'w') request = { 'createRequest': infoDict } ## CHECK THIS BEFORE FINISHING jsonfile.write(json.dumps(request, sort_keys=True, indent=4)) jsonfile.close return def removeJSONFile(self, task): """ This removes the JSON file at ComponentDir if it was created """ filename = self.config["ComponentDir"] + '/Ticket_' + str( task) + '.json' if os.access(filename, os.F_OK): os.remove(filename) return def printReport(self, report): """ Print out a report """ print("%20s %5s %10s %50s %50s" % ('Ticket', 'json', 'local DBS', 'Sites', 'pnns')) print("%20s %5s %10s %50s %50s" % ('-' * 20, '-' * 5, '-' * 10, '-' * 50, '-' * 50)) json = report["json"] ticket = report["task"] #status = report["ticketStatus"] localUrl = report["localUrl"].split('/')[5] site = ', '.join(report["sites"]) pnns = ', '.join(report["pnns"]) print("%20s %5s %10s %50s %50s" % (ticket, json, localUrl, site, pnns))
class SiteDBTest(EmulatedUnitTestCase): """ Unit tests for SiteScreening module """ def __init__(self, methodName='runTest'): super(SiteDBTest, self).__init__(methodName=methodName) def setUp(self): """ Setup for unit tests """ super(SiteDBTest, self).setUp() EmulatorHelper.setEmulators(phedex=False, dbs=False, siteDB=False, requestMgr=True) self.mySiteDB = SiteDBJSON() def tearDown(self): """ _tearDown_ """ super(SiteDBTest, self).tearDown() EmulatorHelper.resetEmulators() return def testCmsNametoPhEDExNode(self): """ #Tests CmsNametoSE """ target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'] results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL') self.assertItemsEqual(results, target) def testSEtoCmsName(self): """ Tests CmsNametoSE """ target = [u'T1_US_FNAL', u'T1_US_FNAL_Disk'] results = self.mySiteDB.seToCMSName("cmsdcadisk01.fnal.gov") self.assertTrue(results == target) target = sorted([u'T2_CH_CERN', u'T2_CH_CERN_HLT']) results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch")) self.assertItemsEqual(results, target) target = sorted([u'T0_CH_CERN', u'T1_CH_CERN']) results = sorted(self.mySiteDB.seToCMSName("srm-cms.cern.ch")) self.assertItemsEqual(results, target) target = sorted([u'T2_CH_CERN_AI']) results = sorted(self.mySiteDB.seToCMSName("eoscmsftp.cern.ch")) self.assertItemsEqual(results, target) def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testSEFinder(self): """ _testSEFinder_ See if we can retrieve seNames from all sites """ seNames = self.mySiteDB.getAllSENames() self.assertTrue(len(seNames) > 1) self.assertTrue('cmsdcadisk01.fnal.gov' in seNames) return def testPNNtoPSN(self): """ _testPNNtoPSN_ Test converting PhEDEx Node Name to Processing Site Name """ result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk') self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape') self.assertTrue(result == []) result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC') self.assertTrue(result == ['T2_UK_London_IC']) return def testCMSNametoList(self): """ Test PNN to storage list """ result = self.mySiteDB.cmsNametoList("T1_US*", "SE") self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov']) def testCheckAndConvertSENameToPNN(self): """ Test the conversion of SE name to PNN for single and multiple sites/PNNs using checkAndConvertSENameToPNN """ fnalSE = u'cmsdcadisk01.fnal.gov' purdueSE = u'srm.rcac.purdue.edu' fnalPNNs = [u'T1_US_FNAL_Buffer', u'T1_US_FNAL_MSS', u'T1_US_FNAL_Disk'] purduePNN = [u'T2_US_Purdue'] pnnList = fnalPNNs + purduePNN seList = [fnalSE, purdueSE] self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(fnalSE), fnalPNNs) self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN([fnalSE]), fnalPNNs) self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(purdueSE), purduePNN) self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN([purdueSE]), purduePNN) self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(seList), purduePNN + fnalPNNs) self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(pnnList), pnnList) return def testPNNstoPSNs(self): """ _testPNNstoPSNs_ Test converting PhEDEx Node Names to Processing Site Names """ result = self.mySiteDB.PNNstoPSNs(['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']) self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue']) self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue']) return
class SiteDBTest(EmulatedUnitTestCase): """ Unit tests for SiteScreening module """ def __init__(self, methodName='runTest'): super(SiteDBTest, self).__init__(methodName=methodName) def setUp(self): """ Setup for unit tests """ super(SiteDBTest, self).setUp() self.mySiteDB = SiteDBJSON() def testCmsNametoPhEDExNode(self): """ #Tests CmsNametoSE """ target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'] results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL') self.assertItemsEqual(results, target) def testSEtoCmsName(self): """ Tests CmsNametoSE """ target = [u'T1_US_FNAL', u'T1_US_FNAL_Disk'] results = self.mySiteDB.seToCMSName("cmsdcadisk01.fnal.gov") self.assertTrue(results == target) target = sorted([u'T2_CH_CERN', u'T2_CH_CERN_HLT']) results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch")) self.assertItemsEqual(results, target) target = sorted([u'T0_CH_CERN', u'T1_CH_CERN']) results = sorted(self.mySiteDB.seToCMSName("srm-cms.cern.ch")) self.assertItemsEqual(results, target) target = sorted([u'T2_CH_CERN_AI']) results = sorted(self.mySiteDB.seToCMSName("eoscmsftp.cern.ch")) self.assertItemsEqual(results, target) def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testSEFinder(self): """ _testSEFinder_ See if we can retrieve seNames from all sites """ seNames = self.mySiteDB.getAllSENames() self.assertTrue(len(seNames) > 1) self.assertTrue('cmsdcadisk01.fnal.gov' in seNames) return def testPNNtoPSN(self): """ _testPNNtoPSN_ Test converting PhEDEx Node Name to Processing Site Name """ result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk') self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape') self.assertTrue(result == []) result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC') self.assertTrue(result == ['T2_UK_London_IC']) return def testCMSNametoList(self): """ Test PNN to storage list """ result = self.mySiteDB.cmsNametoList("T1_US*", "SE") self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov']) def testPNNstoPSNs(self): """ _testPNNstoPSNs_ Test converting PhEDEx Node Names to Processing Site Names """ result = self.mySiteDB.PNNstoPSNs( ['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']) self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue']) self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue']) return
class SiteDBTest(EmulatedUnitTestCase): """ Unit tests for SiteScreening module """ def __init__(self, methodName='runTest'): super(SiteDBTest, self).__init__(methodName=methodName) def setUp(self): """ Setup for unit tests """ super(SiteDBTest, self).setUp() self.mySiteDB = SiteDBJSON() def testCmsNametoPhEDExNode(self): """ Tests CMS Name to PhEDEx Node Name """ target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'] results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL') self.assertItemsEqual(results, target) def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testSEFinder(self): """ _testSEFinder_ See if we can retrieve seNames from all sites """ seNames = self.mySiteDB.getAllSENames() self.assertTrue(len(seNames) > 1) self.assertTrue('cmsdcadisk01.fnal.gov' in seNames) return def testPNNtoPSN(self): """ _testPNNtoPSN_ Test converting PhEDEx Node Name to Processing Site Name """ result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk') self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape') self.assertTrue(result == []) result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC') self.assertTrue(result == ['T2_UK_London_IC']) return def testCMSNametoList(self): """ Test PNN to storage list """ result = self.mySiteDB.cmsNametoList("T1_US*", "SE") self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov']) def testPNNstoPSNs(self): """ _testPNNstoPSNs_ Test converting PhEDEx Node Names to Processing Site Names """ result = self.mySiteDB.PNNstoPSNs( ['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']) self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue']) self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue']) return def testPSNtoPNNMap(self): """ _PSNtoPNNMap_ Test API to get a map of PSNs and PNNs """ result = self.mySiteDB.PSNtoPNNMap() self.assertTrue( [psn for psn in result.keys() if psn.startswith('T1_')]) self.assertTrue( [psn for psn in result.keys() if psn.startswith('T2_')]) self.assertTrue( [psn for psn in result.keys() if psn.startswith('T3_')]) self.assertTrue(len(result) > 50) result = self.mySiteDB.PSNtoPNNMap(psnPattern='T1.*') self.assertFalse( [psn for psn in result.keys() if not psn.startswith('T1_')]) self.assertTrue(len(result) < 10) result = self.mySiteDB.PSNtoPNNMap(psnPattern='T2.*') self.assertFalse( [psn for psn in result.keys() if not psn.startswith('T2_')]) self.assertTrue(len(result) > 10) result = self.mySiteDB.PSNtoPNNMap(psnPattern='T3.*') self.assertFalse( [psn for psn in result.keys() if not psn.startswith('T3_')]) self.assertTrue(len(result) > 10) return def testGetAllPhEDExNodeNames(self): """ _testGetAllPhEDExNodeNames_ Test API to get all PhEDEx Node Names """ result = self.mySiteDB.getAllPhEDExNodeNames(excludeBuffer=True) self.assertFalse([pnn for pnn in result if pnn.endswith('_Buffer')]) result = self.mySiteDB.getAllPhEDExNodeNames(excludeBuffer=False) self.assertTrue( len([pnn for pnn in result if pnn.endswith('_Buffer')]) > 5) result = self.mySiteDB.getAllPhEDExNodeNames(pattern='T1.*', excludeBuffer=True) self.assertFalse([pnn for pnn in result if not pnn.startswith('T1_')]) self.assertTrue(len(result) > 10) result = self.mySiteDB.getAllPhEDExNodeNames(pattern='.*', excludeBuffer=True) self.assertTrue([pnn for pnn in result if pnn.startswith('T1_')]) self.assertTrue([pnn for pnn in result if pnn.startswith('T2_')]) self.assertTrue([pnn for pnn in result if pnn.startswith('T3_')]) self.assertTrue(len(result) > 60) return